TensorFlo commited on
Commit
37b9a66
·
0 Parent(s):

initial commit

Browse files
Files changed (8) hide show
  1. .gitignore +62 -0
  2. ReadMe.md +0 -0
  3. app.py +43 -0
  4. requirements.txt +5 -0
  5. src/__init__.py +0 -0
  6. src/assess_text.py +42 -0
  7. src/main.py +105 -0
  8. src/transcribe_image.py +40 -0
.gitignore ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Cython generated files
7
+ *.c
8
+ *.cpp
9
+ *.so
10
+
11
+ # Virtual environment directories
12
+ .env/
13
+ .venv/
14
+ venv/
15
+ ENV/
16
+ env/
17
+ venv.bak/
18
+
19
+ # Jupyter Notebook checkpoints
20
+ .ipynb_checkpoints
21
+
22
+ # Pytest cache
23
+ .pytest_cache/
24
+
25
+ # VS Code workspace settings
26
+ .vscode/
27
+
28
+ # macOS file
29
+ .DS_Store
30
+
31
+ # Local development output
32
+ output/
33
+
34
+ # Python egg and package distribution files
35
+ *.egg-info/
36
+ .eggs/
37
+ dist/
38
+ build/
39
+ __pypackages__/
40
+
41
+ # pip cache
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Logs
46
+ *.log
47
+
48
+ # Ignore data files for testing
49
+ data/*
50
+
51
+ # Ignore environment files
52
+ *.env
53
+ *.env.*
54
+
55
+ # Streamlit and Hugging Face cache
56
+ streamlit/.streamlit/
57
+ streamlit/cache/
58
+ hf_cache/
59
+
60
+ # IDE specific
61
+ .idea/
62
+ .vscode/
ReadMe.md ADDED
File without changes
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from src.main import process_image # Assume process_image is a function in main.py
4
+ from src.assess_text import assess_essay_with_gpt
5
+ from src.transcribe_image import transcribe_image
6
+ from PIL import Image
7
+
8
+ st.title("AutoAssess: Student Essay Transcription and Assessment")
9
+
10
+ # Upload folder of images
11
+ uploaded_files = st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True)
12
+
13
+ # Text inputs for question and criteria
14
+ essay_question = st.text_input("Enter the essay question:")
15
+ grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
16
+
17
+ # Upload Excel file with student IDs and page count
18
+ student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
19
+
20
+ if st.button("Process Essays"):
21
+ if not uploaded_files or not essay_question or not grading_criteria or not student_info_file:
22
+ st.warning("Please upload all required files and enter necessary information.")
23
+ else:
24
+ # Process student info file
25
+ student_df = pd.read_excel(student_info_file)
26
+ st.write("Student Information:")
27
+ st.write(student_df)
28
+
29
+ results = []
30
+ for uploaded_file in uploaded_files:
31
+ image = Image.open(uploaded_file)
32
+ # Use your backend function to process each image
33
+ transcription = process_image(image, essay_question, grading_criteria)
34
+ results.append({"filename": uploaded_file.name, "transcription": transcription})
35
+
36
+ for result in results:
37
+ st.write(f"**File:** {result['filename']}")
38
+ st.write(result['transcription'])
39
+
40
+ # Optional: Save results to the output folder
41
+ output_file = "output/results.csv"
42
+ pd.DataFrame(results).to_csv(output_file)
43
+ st.success(f"All essays processed. Results saved to {output_file}")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ Pillow
4
+ openai
5
+ openpyxl
src/__init__.py ADDED
File without changes
src/assess_text.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from openai import OpenAI
3
+
4
+ def generate_chatgpt_prompt(question, guidelines, examples):
5
+ """Generate a prompt for ChatGPT based on question, guidelines, and examples."""
6
+ prompt = f"Here is an essay question and assessment guidelines:\n\nQuestion:\n{question}\n\nGuidelines:\n{guidelines}\n\n"
7
+
8
+ if examples:
9
+ prompt += "Here are examples of previously graded essays:\n"
10
+ for example in examples:
11
+ prompt += f"Essay: {example['essay']}\nMark: {example['mark']}\nReason: {example['reason']}\n\n"
12
+
13
+ prompt += "Please assess the following essays based on the provided guidelines and give a mark and reasoning in JSON format."
14
+ return prompt
15
+
16
+ def assess_essay_with_gpt(essay_text, question, guidelines, examples=None):
17
+ """Get ChatGPT to assess the essay based on provided question and guidelines."""
18
+ prompt = generate_chatgpt_prompt(question, guidelines, examples)
19
+ prompt += f"\n\nEssay to assess:\n{essay_text}\n\nProvide the response in JSON format as follows:\n{{\"mark\": <mark>, \"reason\": \"<reason>\"}}. Do not include ```json``` in the response."
20
+
21
+ client = OpenAI()
22
+
23
+ response = client.chat.completions.create(
24
+ model="gpt-4o-mini",
25
+ messages=[
26
+ {"role": "system", "content": "You are an AI assistant that assesses essays."},
27
+ {"role": "user", "content": prompt}
28
+ ],
29
+ max_tokens=150,
30
+ temperature=0.2
31
+ )
32
+
33
+ # Extract the content from the response
34
+ result = response.choices[0].message.content.strip()
35
+
36
+ # Parse the JSON response
37
+ try:
38
+ return json.loads(result)
39
+ except json.JSONDecodeError:
40
+ print('result:', result)
41
+ # Handle cases where the response might not be valid JSON
42
+ return {"mark": 0, "reason": "Error: Unable to parse response"}
src/main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+ from openpyxl import load_workbook, Workbook
4
+
5
+ from src.transcribe_image import transcribe_image
6
+ from src.assess_text import assess_essay_with_gpt
7
+
8
+ # OpenAI API key setup
9
+ openai.api_key = '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'
10
+
11
+ def process_essays(folder_path, question_file, guidelines_file, excel_file):
12
+ # Load question and guidelines
13
+ with open(question_file, 'r') as file:
14
+ question = file.read().strip()
15
+
16
+ with open(guidelines_file, 'r') as file:
17
+ guidelines = file.read().strip()
18
+
19
+ # Load the Excel sheet
20
+ workbook = load_workbook(excel_file)
21
+ sheet = workbook.active
22
+
23
+ # Create a new workbook to save results
24
+ new_workbook = Workbook()
25
+ new_sheet = new_workbook.active
26
+
27
+ # Copy headers
28
+ for col in range(1, sheet.max_column + 1):
29
+ new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
30
+
31
+ # Sort images in folder
32
+ images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
33
+ img_index = 0
34
+
35
+ # First Pass: Transcribe missing texts
36
+ for row in range(2, sheet.max_row + 1):
37
+ student_id = sheet.cell(row=row, column=1).value
38
+ num_pages = sheet.cell(row=row, column=2).value
39
+ transcribed_text = sheet.cell(row=row, column=3).value
40
+
41
+ # Copy student ID and number of pages
42
+ new_sheet.cell(row=row, column=1).value = student_id
43
+ new_sheet.cell(row=row, column=2).value = num_pages
44
+
45
+ # Transcribe if text is missing
46
+ if transcribed_text is None:
47
+ print(f"Transcribing essay for student {student_id}...")
48
+ essay_text = ""
49
+ for _ in range(num_pages):
50
+ essay_text += transcribe_image(images[img_index]) + "\n"
51
+ img_index += 1
52
+ new_sheet.cell(row=row, column=3).value = essay_text.strip()
53
+ else:
54
+ # Copy the existing transcription if available
55
+ new_sheet.cell(row=row, column=3).value = transcribed_text
56
+
57
+ # Save current state with transcriptions
58
+ new_workbook.save("data/transcribed_essays.xlsx")
59
+ print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
60
+
61
+ # Collect graded examples and initialize list
62
+ examples = []
63
+ for row in range(2, sheet.max_row + 1):
64
+ student_id = sheet.cell(row=row, column=1).value
65
+ transcribed_text = sheet.cell(row=row, column=3).value
66
+ mark = sheet.cell(row=row, column=4).value
67
+ reason = sheet.cell(row=row, column=5).value
68
+
69
+ # Store graded examples for prompt generation
70
+ if mark is not None or reason is not None:
71
+ assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
72
+ examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})
73
+
74
+ # Second Pass: Grade missing grades/reasons
75
+ for row in range(2, sheet.max_row + 1):
76
+ student_id = sheet.cell(row=row, column=1).value
77
+ transcribed_text = new_sheet.cell(row=row, column=3).value
78
+ mark = sheet.cell(row=row, column=4).value
79
+ reason = sheet.cell(row=row, column=5).value
80
+
81
+ if mark is None and reason is None:
82
+ print(f"Assessing essay for student {student_id}...")
83
+ assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
84
+ new_sheet.cell(row=row, column=4).value = assessment['mark']
85
+ new_sheet.cell(row=row, column=5).value = assessment['reason']
86
+ # Add the assessed essay as an example for subsequent assessments
87
+ examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
88
+ else:
89
+ # Copy the existing mark and reason to the new sheet
90
+ new_sheet.cell(row=row, column=4).value = mark
91
+ new_sheet.cell(row=row, column=5).value = reason
92
+
93
+ # Save the new Excel file with assessments filled in
94
+ new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
95
+ print("Assessment complete. Results saved in assessed version of the Excel file.")
96
+
97
+
98
+
99
+ # Replace with actual file paths
100
+ process_essays(
101
+ folder_path="data/images",
102
+ question_file="data/question.txt",
103
+ guidelines_file="data/assessment_guidelines.txt",
104
+ excel_file="data/essays.xlsx"
105
+ )
src/transcribe_image.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from openai import OpenAI
3
+ import os
4
+
5
+ # Function to encode the image
6
+ def encode_image(image_path):
7
+ assert os.path.exists(image_path), "The image file does not exist."
8
+ with open(image_path, "rb") as image_file:
9
+ return base64.b64encode(image_file.read()).decode('utf-8')
10
+
11
+ def transcribe_image(image_path):
12
+ """Transcribe handwritten text from an image using OCR."""
13
+ # Initialize the OpenAI client
14
+ client = OpenAI()
15
+
16
+ # Encoding the image
17
+ base64_image = encode_image(image_path)
18
+
19
+
20
+ # Preparing the API call
21
+ response = client.chat.completions.create(
22
+ model="gpt-4o-mini",
23
+ messages=[
24
+ {
25
+ "role": "user",
26
+ "content": [
27
+ {"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."},
28
+ {
29
+ "type": "image_url",
30
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
31
+ }
32
+ ]
33
+ }
34
+ ],
35
+ max_tokens=300
36
+ )
37
+
38
+ transcribed_text = response.choices[0].message.content
39
+
40
+ return transcribed_text