Spaces:
Sleeping
Sleeping
Commit
·
37b9a66
0
Parent(s):
initial commit
Browse files- .gitignore +62 -0
- ReadMe.md +0 -0
- app.py +43 -0
- requirements.txt +5 -0
- src/__init__.py +0 -0
- src/assess_text.py +42 -0
- src/main.py +105 -0
- src/transcribe_image.py +40 -0
.gitignore
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# Cython generated files
|
| 7 |
+
*.c
|
| 8 |
+
*.cpp
|
| 9 |
+
*.so
|
| 10 |
+
|
| 11 |
+
# Virtual environment directories
|
| 12 |
+
.env/
|
| 13 |
+
.venv/
|
| 14 |
+
venv/
|
| 15 |
+
ENV/
|
| 16 |
+
env/
|
| 17 |
+
venv.bak/
|
| 18 |
+
|
| 19 |
+
# Jupyter Notebook checkpoints
|
| 20 |
+
.ipynb_checkpoints
|
| 21 |
+
|
| 22 |
+
# Pytest cache
|
| 23 |
+
.pytest_cache/
|
| 24 |
+
|
| 25 |
+
# VS Code workspace settings
|
| 26 |
+
.vscode/
|
| 27 |
+
|
| 28 |
+
# macOS file
|
| 29 |
+
.DS_Store
|
| 30 |
+
|
| 31 |
+
# Local development output
|
| 32 |
+
output/
|
| 33 |
+
|
| 34 |
+
# Python egg and package distribution files
|
| 35 |
+
*.egg-info/
|
| 36 |
+
.eggs/
|
| 37 |
+
dist/
|
| 38 |
+
build/
|
| 39 |
+
__pypackages__/
|
| 40 |
+
|
| 41 |
+
# pip cache
|
| 42 |
+
pip-log.txt
|
| 43 |
+
pip-delete-this-directory.txt
|
| 44 |
+
|
| 45 |
+
# Logs
|
| 46 |
+
*.log
|
| 47 |
+
|
| 48 |
+
# Ignore data files for testing
|
| 49 |
+
data/*
|
| 50 |
+
|
| 51 |
+
# Ignore environment files
|
| 52 |
+
*.env
|
| 53 |
+
*.env.*
|
| 54 |
+
|
| 55 |
+
# Streamlit and Hugging Face cache
|
| 56 |
+
streamlit/.streamlit/
|
| 57 |
+
streamlit/cache/
|
| 58 |
+
hf_cache/
|
| 59 |
+
|
| 60 |
+
# IDE specific
|
| 61 |
+
.idea/
|
| 62 |
+
.vscode/
|
ReadMe.md
ADDED
|
File without changes
|
app.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from src.main import process_image # Assume process_image is a function in main.py
|
| 4 |
+
from src.assess_text import assess_essay_with_gpt
|
| 5 |
+
from src.transcribe_image import transcribe_image
|
| 6 |
+
from PIL import Image
|
| 7 |
+
|
| 8 |
+
st.title("AutoAssess: Student Essay Transcription and Assessment")
|
| 9 |
+
|
| 10 |
+
# Upload folder of images
|
| 11 |
+
uploaded_files = st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True)
|
| 12 |
+
|
| 13 |
+
# Text inputs for question and criteria
|
| 14 |
+
essay_question = st.text_input("Enter the essay question:")
|
| 15 |
+
grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
|
| 16 |
+
|
| 17 |
+
# Upload Excel file with student IDs and page count
|
| 18 |
+
student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
|
| 19 |
+
|
| 20 |
+
if st.button("Process Essays"):
|
| 21 |
+
if not uploaded_files or not essay_question or not grading_criteria or not student_info_file:
|
| 22 |
+
st.warning("Please upload all required files and enter necessary information.")
|
| 23 |
+
else:
|
| 24 |
+
# Process student info file
|
| 25 |
+
student_df = pd.read_excel(student_info_file)
|
| 26 |
+
st.write("Student Information:")
|
| 27 |
+
st.write(student_df)
|
| 28 |
+
|
| 29 |
+
results = []
|
| 30 |
+
for uploaded_file in uploaded_files:
|
| 31 |
+
image = Image.open(uploaded_file)
|
| 32 |
+
# Use your backend function to process each image
|
| 33 |
+
transcription = process_image(image, essay_question, grading_criteria)
|
| 34 |
+
results.append({"filename": uploaded_file.name, "transcription": transcription})
|
| 35 |
+
|
| 36 |
+
for result in results:
|
| 37 |
+
st.write(f"**File:** {result['filename']}")
|
| 38 |
+
st.write(result['transcription'])
|
| 39 |
+
|
| 40 |
+
# Optional: Save results to the output folder
|
| 41 |
+
output_file = "output/results.csv"
|
| 42 |
+
pd.DataFrame(results).to_csv(output_file)
|
| 43 |
+
st.success(f"All essays processed. Results saved to {output_file}")
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
Pillow
|
| 4 |
+
openai
|
| 5 |
+
openpyxl
|
src/__init__.py
ADDED
|
File without changes
|
src/assess_text.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
|
| 4 |
+
def generate_chatgpt_prompt(question, guidelines, examples):
|
| 5 |
+
"""Generate a prompt for ChatGPT based on question, guidelines, and examples."""
|
| 6 |
+
prompt = f"Here is an essay question and assessment guidelines:\n\nQuestion:\n{question}\n\nGuidelines:\n{guidelines}\n\n"
|
| 7 |
+
|
| 8 |
+
if examples:
|
| 9 |
+
prompt += "Here are examples of previously graded essays:\n"
|
| 10 |
+
for example in examples:
|
| 11 |
+
prompt += f"Essay: {example['essay']}\nMark: {example['mark']}\nReason: {example['reason']}\n\n"
|
| 12 |
+
|
| 13 |
+
prompt += "Please assess the following essays based on the provided guidelines and give a mark and reasoning in JSON format."
|
| 14 |
+
return prompt
|
| 15 |
+
|
| 16 |
+
def assess_essay_with_gpt(essay_text, question, guidelines, examples=None):
|
| 17 |
+
"""Get ChatGPT to assess the essay based on provided question and guidelines."""
|
| 18 |
+
prompt = generate_chatgpt_prompt(question, guidelines, examples)
|
| 19 |
+
prompt += f"\n\nEssay to assess:\n{essay_text}\n\nProvide the response in JSON format as follows:\n{{\"mark\": <mark>, \"reason\": \"<reason>\"}}. Do not include ```json``` in the response."
|
| 20 |
+
|
| 21 |
+
client = OpenAI()
|
| 22 |
+
|
| 23 |
+
response = client.chat.completions.create(
|
| 24 |
+
model="gpt-4o-mini",
|
| 25 |
+
messages=[
|
| 26 |
+
{"role": "system", "content": "You are an AI assistant that assesses essays."},
|
| 27 |
+
{"role": "user", "content": prompt}
|
| 28 |
+
],
|
| 29 |
+
max_tokens=150,
|
| 30 |
+
temperature=0.2
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Extract the content from the response
|
| 34 |
+
result = response.choices[0].message.content.strip()
|
| 35 |
+
|
| 36 |
+
# Parse the JSON response
|
| 37 |
+
try:
|
| 38 |
+
return json.loads(result)
|
| 39 |
+
except json.JSONDecodeError:
|
| 40 |
+
print('result:', result)
|
| 41 |
+
# Handle cases where the response might not be valid JSON
|
| 42 |
+
return {"mark": 0, "reason": "Error: Unable to parse response"}
|
src/main.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import openai
|
| 2 |
+
import os
|
| 3 |
+
from openpyxl import load_workbook, Workbook
|
| 4 |
+
|
| 5 |
+
from src.transcribe_image import transcribe_image
|
| 6 |
+
from src.assess_text import assess_essay_with_gpt
|
| 7 |
+
|
| 8 |
+
# OpenAI API key setup
|
| 9 |
+
openai.api_key = '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'
|
| 10 |
+
|
| 11 |
+
def process_essays(folder_path, question_file, guidelines_file, excel_file):
|
| 12 |
+
# Load question and guidelines
|
| 13 |
+
with open(question_file, 'r') as file:
|
| 14 |
+
question = file.read().strip()
|
| 15 |
+
|
| 16 |
+
with open(guidelines_file, 'r') as file:
|
| 17 |
+
guidelines = file.read().strip()
|
| 18 |
+
|
| 19 |
+
# Load the Excel sheet
|
| 20 |
+
workbook = load_workbook(excel_file)
|
| 21 |
+
sheet = workbook.active
|
| 22 |
+
|
| 23 |
+
# Create a new workbook to save results
|
| 24 |
+
new_workbook = Workbook()
|
| 25 |
+
new_sheet = new_workbook.active
|
| 26 |
+
|
| 27 |
+
# Copy headers
|
| 28 |
+
for col in range(1, sheet.max_column + 1):
|
| 29 |
+
new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
|
| 30 |
+
|
| 31 |
+
# Sort images in folder
|
| 32 |
+
images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
|
| 33 |
+
img_index = 0
|
| 34 |
+
|
| 35 |
+
# First Pass: Transcribe missing texts
|
| 36 |
+
for row in range(2, sheet.max_row + 1):
|
| 37 |
+
student_id = sheet.cell(row=row, column=1).value
|
| 38 |
+
num_pages = sheet.cell(row=row, column=2).value
|
| 39 |
+
transcribed_text = sheet.cell(row=row, column=3).value
|
| 40 |
+
|
| 41 |
+
# Copy student ID and number of pages
|
| 42 |
+
new_sheet.cell(row=row, column=1).value = student_id
|
| 43 |
+
new_sheet.cell(row=row, column=2).value = num_pages
|
| 44 |
+
|
| 45 |
+
# Transcribe if text is missing
|
| 46 |
+
if transcribed_text is None:
|
| 47 |
+
print(f"Transcribing essay for student {student_id}...")
|
| 48 |
+
essay_text = ""
|
| 49 |
+
for _ in range(num_pages):
|
| 50 |
+
essay_text += transcribe_image(images[img_index]) + "\n"
|
| 51 |
+
img_index += 1
|
| 52 |
+
new_sheet.cell(row=row, column=3).value = essay_text.strip()
|
| 53 |
+
else:
|
| 54 |
+
# Copy the existing transcription if available
|
| 55 |
+
new_sheet.cell(row=row, column=3).value = transcribed_text
|
| 56 |
+
|
| 57 |
+
# Save current state with transcriptions
|
| 58 |
+
new_workbook.save("data/transcribed_essays.xlsx")
|
| 59 |
+
print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
|
| 60 |
+
|
| 61 |
+
# Collect graded examples and initialize list
|
| 62 |
+
examples = []
|
| 63 |
+
for row in range(2, sheet.max_row + 1):
|
| 64 |
+
student_id = sheet.cell(row=row, column=1).value
|
| 65 |
+
transcribed_text = sheet.cell(row=row, column=3).value
|
| 66 |
+
mark = sheet.cell(row=row, column=4).value
|
| 67 |
+
reason = sheet.cell(row=row, column=5).value
|
| 68 |
+
|
| 69 |
+
# Store graded examples for prompt generation
|
| 70 |
+
if mark is not None or reason is not None:
|
| 71 |
+
assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
|
| 72 |
+
examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})
|
| 73 |
+
|
| 74 |
+
# Second Pass: Grade missing grades/reasons
|
| 75 |
+
for row in range(2, sheet.max_row + 1):
|
| 76 |
+
student_id = sheet.cell(row=row, column=1).value
|
| 77 |
+
transcribed_text = new_sheet.cell(row=row, column=3).value
|
| 78 |
+
mark = sheet.cell(row=row, column=4).value
|
| 79 |
+
reason = sheet.cell(row=row, column=5).value
|
| 80 |
+
|
| 81 |
+
if mark is None and reason is None:
|
| 82 |
+
print(f"Assessing essay for student {student_id}...")
|
| 83 |
+
assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
|
| 84 |
+
new_sheet.cell(row=row, column=4).value = assessment['mark']
|
| 85 |
+
new_sheet.cell(row=row, column=5).value = assessment['reason']
|
| 86 |
+
# Add the assessed essay as an example for subsequent assessments
|
| 87 |
+
examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
|
| 88 |
+
else:
|
| 89 |
+
# Copy the existing mark and reason to the new sheet
|
| 90 |
+
new_sheet.cell(row=row, column=4).value = mark
|
| 91 |
+
new_sheet.cell(row=row, column=5).value = reason
|
| 92 |
+
|
| 93 |
+
# Save the new Excel file with assessments filled in
|
| 94 |
+
new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
|
| 95 |
+
print("Assessment complete. Results saved in assessed version of the Excel file.")
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# Replace with actual file paths
|
| 100 |
+
process_essays(
|
| 101 |
+
folder_path="data/images",
|
| 102 |
+
question_file="data/question.txt",
|
| 103 |
+
guidelines_file="data/assessment_guidelines.txt",
|
| 104 |
+
excel_file="data/essays.xlsx"
|
| 105 |
+
)
|
src/transcribe_image.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# Function to encode the image
|
| 6 |
+
def encode_image(image_path):
|
| 7 |
+
assert os.path.exists(image_path), "The image file does not exist."
|
| 8 |
+
with open(image_path, "rb") as image_file:
|
| 9 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 10 |
+
|
| 11 |
+
def transcribe_image(image_path):
|
| 12 |
+
"""Transcribe handwritten text from an image using OCR."""
|
| 13 |
+
# Initialize the OpenAI client
|
| 14 |
+
client = OpenAI()
|
| 15 |
+
|
| 16 |
+
# Encoding the image
|
| 17 |
+
base64_image = encode_image(image_path)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Preparing the API call
|
| 21 |
+
response = client.chat.completions.create(
|
| 22 |
+
model="gpt-4o-mini",
|
| 23 |
+
messages=[
|
| 24 |
+
{
|
| 25 |
+
"role": "user",
|
| 26 |
+
"content": [
|
| 27 |
+
{"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."},
|
| 28 |
+
{
|
| 29 |
+
"type": "image_url",
|
| 30 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
|
| 31 |
+
}
|
| 32 |
+
]
|
| 33 |
+
}
|
| 34 |
+
],
|
| 35 |
+
max_tokens=300
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
transcribed_text = response.choices[0].message.content
|
| 39 |
+
|
| 40 |
+
return transcribed_text
|