Spaces:

MrSimple01
/

SimpleLearn_2

Sleeping

App Files Files Community

MrSimple01 commited on Apr 19, 2025

Commit

fa8d0c9

verified ·

1 Parent(s): bcf9fd7

Update src/documentProcessing.py

Browse files

Files changed (1) hide show

src/documentProcessing.py +26 -12

src/documentProcessing.py CHANGED Viewed

@@ -36,11 +36,27 @@ def extract_text_from_txt(txt_path):
 def process_document(document_path, gemini_api_key, language, content_type):
     try:
-        temp_file = tempfile.mktemp(suffix=os.path.splitext(document_path.name)[-1])
-        with open(temp_file, 'wb') as f:
-            f.write(document_path.read())
         file_extension = os.path.splitext(document_path.name)[-1].lower()
         if file_extension == '.pdf':
             text = extract_text_from_pdf(temp_file)
         elif file_extension == '.docx':
@@ -49,19 +65,17 @@ def process_document(document_path, gemini_api_key, language, content_type):
             text = extract_text_from_txt(temp_file)
         else:
             raise Exception(f"Unsupported file type: {file_extension}")
         text_file_path = tempfile.mktemp(suffix='.txt')
         with open(text_file_path, 'w', encoding='utf-8') as f:
             f.write(text)
         formatted_output, json_path, txt_path = analyze_document(
-            text,
-            gemini_api_key,
-            language,
-            content_type
         )
         return f"Document processed successfully", text_file_path, formatted_output, txt_path, json_path
     except Exception as e:
         error_message = f"Error processing document: {str(e)}"
-        return error_message, None, error_message, None, None

 def process_document(document_path, gemini_api_key, language, content_type):
     try:
+        # Create a temporary file
         file_extension = os.path.splitext(document_path.name)[-1].lower()
+        temp_file = tempfile.mktemp(suffix=file_extension)
+        # Handle different file-like objects
+        if hasattr(document_path, 'read'):
+            # If it's a file-like object with read method
+            with open(temp_file, 'wb') as f:
+                f.write(document_path.read())
+        elif hasattr(document_path, 'file'):
+            # If it's a Django or similar web framework file upload
+            with open(temp_file, 'wb') as f:
+                for chunk in document_path.file.chunks():
+                    f.write(chunk)
+        elif isinstance(document_path, str):
+            # If it's a file path string
+            temp_file = document_path
+        else:
+            raise Exception("Unsupported document_path type")
+        # Process based on file type
         if file_extension == '.pdf':
             text = extract_text_from_pdf(temp_file)
         elif file_extension == '.docx':
             text = extract_text_from_txt(temp_file)
         else:
             raise Exception(f"Unsupported file type: {file_extension}")
         text_file_path = tempfile.mktemp(suffix='.txt')
         with open(text_file_path, 'w', encoding='utf-8') as f:
             f.write(text)
+        # Assume this function is defined elsewhere
         formatted_output, json_path, txt_path = analyze_document(
+            text, gemini_api_key, language, content_type
         )
         return f"Document processed successfully", text_file_path, formatted_output, txt_path, json_path
     except Exception as e:
         error_message = f"Error processing document: {str(e)}"
+        return error_message, None, error_message, None, None