Spaces:

LazyHuman
/

plexi-api

Running

LazyHuman10

Initial commit for HF Space

3b6130d about 1 month ago

6.63 kB

	"""
	main.py — Plexi API (FastAPI service for HuggingFace Spaces)
	============================================================
	Endpoints:
	POST /retrieve — embed query + vector search (scope-filtered)
	GET /manifest — proxy + cache the materials manifest.json
	GET /health — liveness probe (also used by keep-alive cron)

	The heavy resources (index + embedding model) are loaded ONCE at startup via
	FastAPI's lifespan context manager and shared across all requests.
	"""

	import os
	import time
	from contextlib import asynccontextmanager
	from functools import lru_cache

	import requests
	from fastapi import FastAPI, HTTPException, Request
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from pydantic import BaseModel, Field

	from rag import (
	DEFAULT_TOP_K,
	MATERIALS_REPO,
	MANIFEST_BRANCH,
	format_context,
	load_index,
	retrieve_chunks,
	)

	# ---------------------------------------------------------------------------
	# Config
	# ---------------------------------------------------------------------------
	ALLOWED_ORIGINS = os.getenv(
	"ALLOWED_ORIGINS",
	# Default: allow the Cloudflare Pages domain + localhost for dev
	"https://plexi.lazyhideout.tech,http://localhost:5173,http://localhost:4173",
	).split(",")

	# ---------------------------------------------------------------------------
	# Startup / Shutdown — load heavy resources once
	# ---------------------------------------------------------------------------
	_state: dict = {}


	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Load the RAG index at startup; release on shutdown."""
	print("Loading RAG index from GitHub…")
	t0 = time.time()
	index, error = load_index()
	elapsed = round(time.time() - t0, 2)

	if error:
	print(f"⚠️ RAG index unavailable: {error}")
	_state["index"] = None
	_state["index_error"] = error
	else:
	print(f"✅ RAG index loaded in {elapsed}s")
	_state["index"] = index
	_state["index_error"] = None

	_state["index_loaded"] = index is not None
	_state["startup_ts"] = time.time()
	yield
	# Cleanup (nothing heavy to clean up here)
	_state.clear()


	# ---------------------------------------------------------------------------
	# App
	# ---------------------------------------------------------------------------
	app = FastAPI(
	title="Plexi API",
	description=(
	"RAG retrieval backend for Plexi. "
	"Accepts student queries and returns relevant study material chunks."
	),
	version="1.0.0",
	lifespan=lifespan,
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=ALLOWED_ORIGINS,
	allow_credentials=False,
	allow_methods=["GET", "POST", "OPTIONS"],
	allow_headers=["Content-Type"],
	)


	# ---------------------------------------------------------------------------
	# Request / Response models
	# ---------------------------------------------------------------------------
	class RetrieveRequest(BaseModel):
	query: str = Field(..., min_length=1, max_length=2000)
	semester: str = Field(..., min_length=1, max_length=100)
	subject: str = Field(..., min_length=1, max_length=100)
	top_k: int = Field(default=DEFAULT_TOP_K, ge=1, le=20)


	class ChunkResult(BaseModel):
	text: str
	score: float \| None
	filename: str \| None
	subject: str \| None


	class RetrieveResponse(BaseModel):
	chunks: list[ChunkResult]
	query: str
	semester: str
	subject: str
	rag_active: bool
	context_formatted: str


	# ---------------------------------------------------------------------------
	# Manifest caching (simple in-memory, 5-minute TTL)
	# ---------------------------------------------------------------------------
	_manifest_cache: dict = {"data": None, "fetched_at": 0}
	MANIFEST_TTL = 300 # seconds


	def _get_manifest() -> dict:
	now = time.time()
	if _manifest_cache["data"] and (now - _manifest_cache["fetched_at"]) < MANIFEST_TTL:
	return _manifest_cache["data"]

	url = f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json"
	resp = requests.get(url, timeout=15)
	resp.raise_for_status()
	data = resp.json()

	_manifest_cache["data"] = data
	_manifest_cache["fetched_at"] = now
	return data


	# ---------------------------------------------------------------------------
	# Routes
	# ---------------------------------------------------------------------------
	@app.get("/health")
	def health():
	"""Liveness probe — also pinged by the GitHub Actions keep-alive cron."""
	uptime = round(time.time() - _state.get("startup_ts", time.time()), 1)
	return {
	"status": "ok",
	"index_loaded": _state.get("index_loaded", False),
	"index_error": _state.get("index_error"),
	"embed_model": "sentence-transformers/all-MiniLM-L6-v2",
	"uptime_seconds": uptime,
	}


	@app.get("/manifest")
	def get_manifest():
	"""
	Proxy and cache the study materials manifest.json from GitHub.
	The Cloudflare Worker also caches this in KV — this is a double layer.
	"""
	try:
	data = _get_manifest()
	return JSONResponse(content=data)
	except requests.HTTPError as err:
	raise HTTPException(status_code=502, detail=f"GitHub fetch failed: {err}")
	except Exception as err:
	raise HTTPException(status_code=500, detail=str(err))


	@app.post("/retrieve", response_model=RetrieveResponse)
	def retrieve(body: RetrieveRequest):
	"""
	Core RAG endpoint.

	1. Embeds the query using all-MiniLM-L6-v2 (local, fast ~5-10ms)
	2. Searches the pre-built LlamaIndex vector store
	3. Filters results by semester + subject metadata
	4. Returns top-k chunks + a formatted context string for the LLM prompt
	"""
	index = _state.get("index")

	chunks = retrieve_chunks(
	index=index,
	query=body.query,
	semester=body.semester,
	subject=body.subject,
	top_k=body.top_k,
	)

	context_formatted = format_context(chunks)

	return RetrieveResponse(
	chunks=chunks,
	query=body.query,
	semester=body.semester,
	subject=body.subject,
	rag_active=index is not None,
	context_formatted=context_formatted,
	)


	# ---------------------------------------------------------------------------
	# Run (for local development only — HF uses Dockerfile CMD)
	# ---------------------------------------------------------------------------
	if __name__ == "__main__":
	import uvicorn

	uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=True)