Spaces:
Running
Running
| import asyncio | |
| import logging | |
| from io import BytesIO | |
| from fastapi import Depends, HTTPException, UploadFile, status | |
| from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer | |
| from config import Config | |
| from .inferencer import analyze_text_with_sentences, classify_text | |
| from .preprocess import parse_docx, parse_pdf, parse_txt | |
| security = HTTPBearer() | |
| # def build_bias_summary(ai_likelihood: float) -> dict[str, object]: | |
| # """Convert an AI likelihood score into a human-readable bias summary.""" | |
| # if ai_likelihood > 50: | |
| # overall_bias = "AI" | |
| # bias_statement = f"The text is biased toward AI-generated writing ({ai_likelihood}% AI likelihood)." | |
| # elif ai_likelihood < 50: | |
| # overall_bias = "Human" | |
| # bias_statement = f"The text is biased toward human writing ({100 - ai_likelihood}% human likelihood)." | |
| # else: | |
| # overall_bias = "Balanced" | |
| # bias_statement = "The text is balanced between AI and human writing." | |
| # return { | |
| # "overall_bias": overall_bias, | |
| # "bias_statement": bias_statement, | |
| # } | |
| # Verify Bearer token from Authorization header | |
| async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): | |
| token = credentials.credentials | |
| expected_token = Config.SECRET_TOKEN | |
| if token != expected_token: | |
| raise HTTPException( | |
| status_code=status.HTTP_403_FORBIDDEN, detail="Invalid or expired token" | |
| ) | |
| return token | |
| # Classify plain text input | |
| async def handle_text_analysis(text: str): | |
| text = text.strip() | |
| if not text or len(text.split()) < 10: | |
| raise HTTPException( | |
| status_code=400, detail="Text must contain at least 10 words" | |
| ) | |
| if len(text) > 50000: | |
| raise HTTPException( | |
| status_code=413, detail="Text must be less than 50,000 characters" | |
| ) | |
| label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text) | |
| # bias_summary = build_bias_summary(ai_likelihood) | |
| return { | |
| "result": label, | |
| "perplexity": round(perplexity, 2), | |
| "ai_likelihood": ai_likelihood, | |
| } | |
| # Extract text from uploaded files (.docx, .pdf, .txt) | |
| async def extract_file_contents(file: UploadFile) -> str: | |
| content = await file.read() | |
| file_stream = BytesIO(content) | |
| if ( | |
| file.content_type | |
| == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| ): | |
| return parse_docx(file_stream) | |
| elif file.content_type == "application/pdf": | |
| return parse_pdf(file_stream) | |
| elif file.content_type == "text/plain": | |
| return parse_txt(file_stream) | |
| else: | |
| raise HTTPException( | |
| status_code=415, | |
| detail="Invalid file type. Only .docx, .pdf and .txt are allowed.", | |
| ) | |
| # Classify text from uploaded file | |
| async def handle_file_upload(file: UploadFile): | |
| try: | |
| file_contents = await extract_file_contents(file) | |
| logging.info(f"Extracted text length: {len(file_contents)} characters") | |
| if len(file_contents) > 50000: | |
| return { | |
| "status_code": 413, | |
| "detail": "Text must be less than 50,000 characters", | |
| } | |
| cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip() | |
| if not cleaned_text: | |
| raise HTTPException( | |
| status_code=400, | |
| detail="The uploaded file is empty or only contains whitespace.", | |
| ) | |
| # print(f"Cleaned text: '{cleaned_text}'") # Debugging statement | |
| label, perplexity, ai_likelihood = await asyncio.to_thread( | |
| classify_text, cleaned_text | |
| ) | |
| return { | |
| "content": file_contents, | |
| "result": label, | |
| "perplexity": round(perplexity, 2), | |
| "ai_likelihood": ai_likelihood, | |
| } | |
| except Exception as e: | |
| logging.error(f"Error processing file: {e}") | |
| raise HTTPException(status_code=500, detail="Error processing the file") | |
| async def handle_sentence_level_analysis(text: str): | |
| text = text.strip() | |
| if not text or len(text.split()) < 10: | |
| raise HTTPException( | |
| status_code=400, detail="Text must contain at least 10 words" | |
| ) | |
| if len(text) > 50000: | |
| raise HTTPException( | |
| status_code=413, detail="Text must be less than 50,000 characters" | |
| ) | |
| result = await asyncio.to_thread(analyze_text_with_sentences, text) | |
| return result | |
| # Analyze each sentence from uploaded file | |
| async def handle_file_sentence(file: UploadFile): | |
| try: | |
| file_contents = await extract_file_contents(file) | |
| if len(file_contents) > 50000: | |
| # raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters") | |
| return { | |
| "status_code": 413, | |
| "detail": "Text must be less than 50,000 characters", | |
| } | |
| cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip() | |
| if not cleaned_text: | |
| raise HTTPException( | |
| status_code=400, | |
| detail="The uploaded file is empty or only contains whitespace.", | |
| ) | |
| result = await handle_sentence_level_analysis(cleaned_text) | |
| return {"content": file_contents, **result} | |
| except HTTPException: | |
| raise | |
| except Exception as e: | |
| logging.error(f"Error processing file: {e}") | |
| raise HTTPException(status_code=500, detail="Error processing the file") | |
| def classify(text: str): | |
| return classify_text(text) | |