Spaces:

TensorFlo
/

AutoAssess

Sleeping

App Files Files Community

TensorFlo commited on Nov 7, 2024

Commit

37b9a66

0 Parent(s):

initial commit

Browse files

Files changed (8) hide show

.gitignore +62 -0
ReadMe.md +0 -0
app.py +43 -0
requirements.txt +5 -0
src/__init__.py +0 -0
src/assess_text.py +42 -0
src/main.py +105 -0
src/transcribe_image.py +40 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,62 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Cython generated files
+*.c
+*.cpp
+*.so
+# Virtual environment directories
+.env/
+.venv/
+venv/
+ENV/
+env/
+venv.bak/
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints
+# Pytest cache
+.pytest_cache/
+# VS Code workspace settings
+.vscode/
+# macOS file
+.DS_Store
+# Local development output
+output/
+# Python egg and package distribution files
+*.egg-info/
+.eggs/
+dist/
+build/
+__pypackages__/
+# pip cache
+pip-log.txt
+pip-delete-this-directory.txt
+# Logs
+*.log
+# Ignore data files for testing
+data/*
+# Ignore environment files
+*.env
+*.env.*
+# Streamlit and Hugging Face cache
+streamlit/.streamlit/
+streamlit/cache/
+hf_cache/
+# IDE specific
+.idea/
+.vscode/

ReadMe.md ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import streamlit as st
+import pandas as pd
+from src.main import process_image  # Assume process_image is a function in main.py
+from src.assess_text import assess_essay_with_gpt
+from src.transcribe_image import transcribe_image
+from PIL import Image
+st.title("AutoAssess: Student Essay Transcription and Assessment")
+# Upload folder of images
+uploaded_files = st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True)
+# Text inputs for question and criteria
+essay_question = st.text_input("Enter the essay question:")
+grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
+# Upload Excel file with student IDs and page count
+student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
+if st.button("Process Essays"):
+    if not uploaded_files or not essay_question or not grading_criteria or not student_info_file:
+        st.warning("Please upload all required files and enter necessary information.")
+    else:
+        # Process student info file
+        student_df = pd.read_excel(student_info_file)
+        st.write("Student Information:")
+        st.write(student_df)
+        results = []
+        for uploaded_file in uploaded_files:
+            image = Image.open(uploaded_file)
+            # Use your backend function to process each image
+            transcription = process_image(image, essay_question, grading_criteria)
+            results.append({"filename": uploaded_file.name, "transcription": transcription})
+        for result in results:
+            st.write(f"**File:** {result['filename']}")
+            st.write(result['transcription'])
+        # Optional: Save results to the output folder
+        output_file = "output/results.csv"
+        pd.DataFrame(results).to_csv(output_file)
+        st.success(f"All essays processed. Results saved to {output_file}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+pandas
+Pillow
+openai
+openpyxl

src/__init__.py ADDED Viewed

File without changes

src/assess_text.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import json
+from openai import OpenAI
+def generate_chatgpt_prompt(question, guidelines, examples):
+    """Generate a prompt for ChatGPT based on question, guidelines, and examples."""
+    prompt = f"Here is an essay question and assessment guidelines:\n\nQuestion:\n{question}\n\nGuidelines:\n{guidelines}\n\n"
+    if examples:
+        prompt += "Here are examples of previously graded essays:\n"
+        for example in examples:
+            prompt += f"Essay: {example['essay']}\nMark: {example['mark']}\nReason: {example['reason']}\n\n"
+    prompt += "Please assess the following essays based on the provided guidelines and give a mark and reasoning in JSON format."
+    return prompt
+def assess_essay_with_gpt(essay_text, question, guidelines, examples=None):
+    """Get ChatGPT to assess the essay based on provided question and guidelines."""
+    prompt = generate_chatgpt_prompt(question, guidelines, examples)
+    prompt += f"\n\nEssay to assess:\n{essay_text}\n\nProvide the response in JSON format as follows:\n{{\"mark\": <mark>, \"reason\": \"<reason>\"}}. Do not include ```json``` in the response."
+    client = OpenAI()
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {"role": "system", "content": "You are an AI assistant that assesses essays."},
+            {"role": "user", "content": prompt}
+        ],
+        max_tokens=150,
+        temperature=0.2
+    )
+    # Extract the content from the response
+    result = response.choices[0].message.content.strip()
+    # Parse the JSON response
+    try:
+        return json.loads(result)
+    except json.JSONDecodeError:
+        print('result:', result)
+        # Handle cases where the response might not be valid JSON
+        return {"mark": 0, "reason": "Error: Unable to parse response"}

src/main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import openai
+import os
+from openpyxl import load_workbook, Workbook
+from src.transcribe_image import transcribe_image
+from src.assess_text import assess_essay_with_gpt
+# OpenAI API key setup
+openai.api_key = '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'
+def process_essays(folder_path, question_file, guidelines_file, excel_file):
+    # Load question and guidelines
+    with open(question_file, 'r') as file:
+        question = file.read().strip()
+    with open(guidelines_file, 'r') as file:
+        guidelines = file.read().strip()
+    # Load the Excel sheet
+    workbook = load_workbook(excel_file)
+    sheet = workbook.active
+    # Create a new workbook to save results
+    new_workbook = Workbook()
+    new_sheet = new_workbook.active
+    # Copy headers
+    for col in range(1, sheet.max_column + 1):
+        new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
+    # Sort images in folder
+    images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
+    img_index = 0
+    # First Pass: Transcribe missing texts
+    for row in range(2, sheet.max_row + 1):
+        student_id = sheet.cell(row=row, column=1).value
+        num_pages = sheet.cell(row=row, column=2).value
+        transcribed_text = sheet.cell(row=row, column=3).value
+        # Copy student ID and number of pages
+        new_sheet.cell(row=row, column=1).value = student_id
+        new_sheet.cell(row=row, column=2).value = num_pages
+        # Transcribe if text is missing
+        if transcribed_text is None:
+            print(f"Transcribing essay for student {student_id}...")
+            essay_text = ""
+            for _ in range(num_pages):
+                essay_text += transcribe_image(images[img_index]) + "\n"
+                img_index += 1
+            new_sheet.cell(row=row, column=3).value = essay_text.strip()
+        else:
+            # Copy the existing transcription if available
+            new_sheet.cell(row=row, column=3).value = transcribed_text
+    # Save current state with transcriptions
+    new_workbook.save("data/transcribed_essays.xlsx")
+    print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
+    # Collect graded examples and initialize list
+    examples = []
+    for row in range(2, sheet.max_row + 1):
+        student_id = sheet.cell(row=row, column=1).value
+        transcribed_text = sheet.cell(row=row, column=3).value
+        mark = sheet.cell(row=row, column=4).value
+        reason = sheet.cell(row=row, column=5).value
+        # Store graded examples for prompt generation
+        if mark is not None or reason is not None:
+            assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
+            examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})
+    # Second Pass: Grade missing grades/reasons
+    for row in range(2, sheet.max_row + 1):
+        student_id = sheet.cell(row=row, column=1).value
+        transcribed_text = new_sheet.cell(row=row, column=3).value
+        mark = sheet.cell(row=row, column=4).value
+        reason = sheet.cell(row=row, column=5).value
+        if mark is None and reason is None:
+            print(f"Assessing essay for student {student_id}...")
+            assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
+            new_sheet.cell(row=row, column=4).value = assessment['mark']
+            new_sheet.cell(row=row, column=5).value = assessment['reason']
+            # Add the assessed essay as an example for subsequent assessments
+            examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
+        else:
+            # Copy the existing mark and reason to the new sheet
+            new_sheet.cell(row=row, column=4).value = mark
+            new_sheet.cell(row=row, column=5).value = reason
+    # Save the new Excel file with assessments filled in
+    new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
+    print("Assessment complete. Results saved in assessed version of the Excel file.")
+# Replace with actual file paths
+process_essays(
+    folder_path="data/images",
+    question_file="data/question.txt",
+    guidelines_file="data/assessment_guidelines.txt",
+    excel_file="data/essays.xlsx"
+)

src/transcribe_image.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import base64
+from openai import OpenAI
+import os
+# Function to encode the image
+def encode_image(image_path):
+    assert os.path.exists(image_path), "The image file does not exist."
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+def transcribe_image(image_path):
+    """Transcribe handwritten text from an image using OCR."""
+    # Initialize the OpenAI client
+    client = OpenAI()
+    # Encoding the image
+    base64_image = encode_image(image_path)
+    # Preparing the API call
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
+                    }
+                ]
+            }
+        ],
+        max_tokens=300
+    )
+    transcribed_text = response.choices[0].message.content
+    return transcribed_text