Spaces:

HamidOmarov
/

FastAPI-RAG-API

Sleeping

App Files Files Community

HamidOmarov commited on Aug 12, 2025

Commit

7715973

verified ·

1 Parent(s): 26ad320

Update app/api.py

Browse files

Files changed (1) hide show

app/api.py +200 -16

app/api.py CHANGED Viewed

@@ -1,14 +1,24 @@
 # app/api.py
-from typing import List
-import faiss, os
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, RedirectResponse
-from pydantic import BaseModel
 from .rag_system import SimpleRAG, UPLOAD_DIR, INDEX_DIR
 app = FastAPI(title="RAG API", version="1.3.0")
 app.add_middleware(
@@ -21,23 +31,148 @@ app.add_middleware(
 rag = SimpleRAG()
-# ---------- Schemas ----------
 class UploadResponse(BaseModel):
     filename: str
     chunks_added: int
 class AskRequest(BaseModel):
-    question: str
-    top_k: int = 5
 class AskResponse(BaseModel):
     answer: str
     contexts: List[str]
 class HistoryResponse(BaseModel):
     total_chunks: int
-# ---------- Utility ----------
 @app.get("/")
 def root():
     return RedirectResponse(url="/docs")
@@ -56,9 +191,11 @@ def debug_translate():
     except Exception as e:
         return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
-# ---------- Core ----------
 @app.post("/upload_pdf", response_model=UploadResponse)
 async def upload_pdf(file: UploadFile = File(...)):
     dest = UPLOAD_DIR / file.filename
     with open(dest, "wb") as f:
         while True:
@@ -66,30 +203,71 @@ async def upload_pdf(file: UploadFile = File(...)):
             if not chunk:
                 break
             f.write(chunk)
     added = rag.add_pdf(dest)
     if added == 0:
-        # Clear message for scanned/empty PDFs
         raise HTTPException(status_code=400, detail="No extractable text found (likely a scanned image PDF).")
     return UploadResponse(filename=file.filename, chunks_added=added)
 @app.post("/ask_question", response_model=AskResponse)
 def ask_question(payload: AskRequest):
-    hits = rag.search(payload.question, k=max(1, payload.top_k))
-    contexts = [c for c, _ in hits]
-    answer = rag.synthesize_answer(payload.question, contexts)
-    return AskResponse(answer=answer, contexts=contexts or rag.last_added[:5])
 @app.get("/get_history", response_model=HistoryResponse)
 def get_history():
-    return HistoryResponse(total_chunks=len(rag.chunks))
 @app.get("/stats")
-def stats():
     return {
         "total_chunks": len(rag.chunks),
         "faiss_ntotal": int(getattr(rag.index, "ntotal", 0)),
         "model_dim": int(getattr(rag.index, "d", rag.embed_dim)),
-        "last_added_chunks": len(rag.last_added),
         "version": app.version,
     }
@@ -104,6 +282,12 @@ def reset_index():
                 os.remove(p)
             except FileNotFoundError:
                 pass
         return {"ok": True}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 # app/api.py
+from __future__ import annotations
+from typing import List, Optional
+from collections import deque
+from datetime import datetime
+from time import perf_counter
+import re
+import os
+import faiss
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, RedirectResponse
+from pydantic import BaseModel, Field
 from .rag_system import SimpleRAG, UPLOAD_DIR, INDEX_DIR
+# ------------------------------------------------------------------------------
+# App setup
+# ------------------------------------------------------------------------------
 app = FastAPI(title="RAG API", version="1.3.0")
 app.add_middleware(
 rag = SimpleRAG()
+# ------------------------------------------------------------------------------
+# Models
+# ------------------------------------------------------------------------------
 class UploadResponse(BaseModel):
     filename: str
     chunks_added: int
 class AskRequest(BaseModel):
+    question: str = Field(..., min_length=1)
+    top_k: int = Field(5, ge=1, le=20)
 class AskResponse(BaseModel):
     answer: str
     contexts: List[str]
+class HistoryItem(BaseModel):
+    question: str
+    timestamp: str
 class HistoryResponse(BaseModel):
     total_chunks: int
+    history: List[HistoryItem] = []
+# ------------------------------------------------------------------------------
+# Lightweight stats store (in-memory)
+# ------------------------------------------------------------------------------
+class StatsStore:
+    def __init__(self):
+        self.documents_indexed = 0
+        self.questions_answered = 0
+        self.latencies_ms = deque(maxlen=500)
+        # Mon..Sun simple counter (index 0 = today for simplicity)
+        self.last7_questions = deque([0] * 7, maxlen=7)
+        self.history = deque(maxlen=50)  # recent questions
+    def add_docs(self, n: int):
+        if n > 0:
+            self.documents_indexed += n
+    def add_question(self, latency_ms: Optional[int] = None, q: Optional[str] = None):
+        self.questions_answered += 1
+        if latency_ms is not None:
+            self.latencies_ms.append(int(latency_ms))
+        if len(self.last7_questions) < 7:
+            self.last7_questions.appendleft(1)
+        else:
+            # attribute to "today" bucket
+            self.last7_questions[0] += 1
+        if q:
+            self.history.appendleft(
+                {"question": q, "timestamp": datetime.utcnow().isoformat()}
+            )
+    @property
+    def avg_ms(self) -> int:
+        return int(sum(self.latencies_ms) / len(self.latencies_ms)) if self.latencies_ms else 0
+stats = StatsStore()
+# ------------------------------------------------------------------------------
+# Helpers
+# ------------------------------------------------------------------------------
+_GENERIC_PATTERNS = [
+    r"\bbased on document context\b",
+    r"\bappears to be\b",
+    r"\bgeneral (?:summary|overview)\b",
+]
+_STOPWORDS = {
+    "the","a","an","of","for","and","or","in","on","to","from","with","by","is","are",
+    "was","were","be","been","being","at","as","that","this","these","those","it",
+    "its","into","than","then","so","such","about","over","per","via","vs","within"
+}
+def is_generic_answer(text: str) -> bool:
+    if not text:
+        return True
+    low = text.strip().lower()
+    if len(low) < 15:
+        return True
+    for pat in _GENERIC_PATTERNS:
+        if re.search(pat, low):
+            return True
+    return False
+def tokenize(s: str) -> List[str]:
+    return [w for w in re.findall(r"[a-zA-Z0-9]+", s.lower()) if w and w not in _STOPWORDS and len(w) > 2]
+def extractive_answer(question: str, contexts: List[str], max_chars: int = 500) -> str:
+    """
+    Simple keyword-based extractive fallback:
+    pick sentences containing most question tokens.
+    """
+    if not contexts:
+        return "I couldn't find relevant information in the indexed documents for this question."
+    q_tokens = set(tokenize(question))
+    if not q_tokens:
+        # if question is e.g. numbers only
+        q_tokens = set(tokenize(" ".join(contexts[:1])))
+    # split into sentences
+    sentences: List[str] = []
+    for c in contexts:
+        c = c or ""
+        # rough sentence split
+        for s in re.split(r"(?<=[\.!\?])\s+|\n+", c.strip()):
+            s = s.strip()
+            if s:
+                sentences.append(s)
+    if not sentences:
+        # fallback to first context chunk
+        return (contexts[0] or "")[:max_chars]
+    # score sentences
+    scored: List[tuple[int, str]] = []
+    for s in sentences:
+        toks = set(tokenize(s))
+        score = len(q_tokens & toks)
+        scored.append((score, s))
+    # pick top sentences with score > 0, otherwise first few sentences
+    scored.sort(key=lambda x: (x[0], len(x[1])), reverse=True)
+    picked: List[str] = []
+    for score, sent in scored:
+        if score <= 0 and picked:
+            break
+        if len(" ".join(picked) + " " + sent) > max_chars:
+            break
+        picked.append(sent)
+    if not picked:
+        # no overlap, take first ~max_chars from contexts
+        return (contexts[0] or "")[:max_chars]
+    return " ".join(picked).strip()
+# ------------------------------------------------------------------------------
+# Routes
+# ------------------------------------------------------------------------------
 @app.get("/")
 def root():
     return RedirectResponse(url="/docs")
     except Exception as e:
         return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
 @app.post("/upload_pdf", response_model=UploadResponse)
 async def upload_pdf(file: UploadFile = File(...)):
+    if not file.filename.lower().endswith(".pdf"):
+        raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
     dest = UPLOAD_DIR / file.filename
     with open(dest, "wb") as f:
         while True:
             if not chunk:
                 break
             f.write(chunk)
     added = rag.add_pdf(dest)
     if added == 0:
         raise HTTPException(status_code=400, detail="No extractable text found (likely a scanned image PDF).")
+    stats.add_docs(added)
     return UploadResponse(filename=file.filename, chunks_added=added)
 @app.post("/ask_question", response_model=AskResponse)
 def ask_question(payload: AskRequest):
+    q = (payload.question or "").strip()
+    if not q:
+        raise HTTPException(status_code=400, detail="Missing 'question'.")
+    k = max(1, int(payload.top_k))
+    t0 = perf_counter()
+    # retrieval
+    try:
+        hits = rag.search(q, k=k)  # expected: List[Tuple[str, float]]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Search failed: {e}")
+    contexts = [c for c, _ in (hits or []) if c] or (rag.last_added[:k] if getattr(rag, "last_added", None) else [])
+    if not contexts:
+        stats.add_question(int((perf_counter() - t0) * 1000), q=q)
+        return AskResponse(
+            answer="I couldn't find relevant information in the indexed documents for this question.",
+            contexts=[]
+        )
+    # synthesis (LLM or rule-based inside rag)
+    try:
+        synthesized = rag.synthesize_answer(q, contexts) or ""
+    except Exception:
+        synthesized = ""
+    # guard against generic/unchanging answers
+    if is_generic_answer(synthesized):
+        synthesized = extractive_answer(q, contexts, max_chars=600)
+    latency_ms = int((perf_counter() - t0) * 1000)
+    stats.add_question(latency_ms, q=q)
+    return AskResponse(answer=synthesized.strip(), contexts=contexts)
 @app.get("/get_history", response_model=HistoryResponse)
 def get_history():
+    return HistoryResponse(
+        total_chunks=len(rag.chunks),
+        history=[HistoryItem(**h) for h in list(stats.history)]
+    )
 @app.get("/stats")
+def stats_endpoint():
+    # keep backward compat fields + add dashboard-friendly metrics
     return {
+        "documents_indexed": stats.documents_indexed,
+        "questions_answered": stats.questions_answered,
+        "avg_ms": stats.avg_ms,
+        "last7_questions": list(stats.last7_questions),
         "total_chunks": len(rag.chunks),
         "faiss_ntotal": int(getattr(rag.index, "ntotal", 0)),
         "model_dim": int(getattr(rag.index, "d", rag.embed_dim)),
+        "last_added_chunks": len(getattr(rag, "last_added", [])),
         "version": app.version,
     }
                 os.remove(p)
             except FileNotFoundError:
                 pass
+        # also reset stats counters to avoid stale analytics
+        stats.documents_indexed = 0
+        stats.questions_answered = 0
+        stats.latencies_ms.clear()
+        stats.last7_questions = deque([0] * 7, maxlen=7)
+        stats.history.clear()
         return {"ok": True}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))