Spaces:

Deign86
/

mathpulse-api-v3test

Running

App Files Files Community

github-actions[bot] commited on about 8 hours ago

Commit

b222bcc

1 Parent(s): 57fbb45

🚀 Auto-deploy backend from GitHub (1393543)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.deploy-trigger +0 -1
.env.example +0 -33
.gitattributes +35 -0
analytics.py +23 -18
config/env.sample +11 -16
config/models.yaml +42 -72
datasets/sample_curriculum.json +0 -137
main.py +0 -0
middleware/__init__.py +0 -4
middleware/rate_limiter.py +0 -184
pre_deploy_check.py +2 -10
rag/__init__.py +1 -9
rag/curriculum_rag.py +48 -199
rag/firebase_storage_loader.py +0 -175
rag/pdf_ingestion.py +0 -368
rag/vectorstore_loader.py +2 -11
requirements.txt +0 -8
routes/admin_model_routes.py +0 -67
routes/admin_routes.py +0 -87
routes/curriculum_routes.py +0 -66
routes/diagnostic.py +0 -797
routes/quiz_battle.py +0 -205
routes/quiz_generation_routes.py +0 -356
routes/rag_routes.py +54 -298
routes/video_routes.py +0 -102
scripts/download_vectorstore_from_firebase.py +9 -74
scripts/ingest_curriculum.py +221 -136
scripts/ingest_from_storage.py +0 -285
scripts/migrate_grade12_to_grade11.py +0 -107
scripts/register_firestore_metadata.py +0 -183
scripts/seed_curriculum.py +0 -64
scripts/upload_curriculum_pdfs.py +0 -264
scripts/upload_lesson_modules.py +0 -142
scripts/upload_vectorstore_to_firebase.py +0 -71
services/__init__.py +0 -43
services/ai_client.py +0 -28
services/curriculum_service.py +0 -232
services/inference_client.py +551 -528
services/question_bank_service.py +0 -123
services/user_provisioning_service.py +1 -0
services/variance_engine.py +0 -115
services/youtube_service.py +0 -1017
startup.sh +5 -41
startup_validation.py +21 -94
test_full_rag.py +0 -75
test_retrieval.py +0 -39
tests/README.md +0 -46
tests/test_admin_model_routes.py +0 -213
tests/test_api.py +200 -118
tests/test_hf_monitoring_routes.py +0 -148

.deploy-trigger DELETED Viewed

	@@ -1 +0,0 @@
1	- 2026-04-29 21:37:27

.env.example DELETED Viewed

@@ -1,33 +0,0 @@
-# ── Vector Store ──────────────────────────────────────────────────
-# Path to ChromaDB vectorstore directory
-CURRICULUM_VECTORSTORE_DIR=datasets/vectorstore
-# Sentence transformer for embeddings
-# WARNING: changing this requires full re-ingestion of all curriculum data
-EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
-# ── DeepSeek AI Inference ─────────────────────────────────────────
-# DeepSeek API key (OpenAI-compatible), required for all AI features
-DEEPSEEK_API_KEY=your_deepseek_api_key_here
-DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
-DEEPSEEK_REASONER_MODEL=deepseek-reasoner
-# ── HuggingFace (dataset push / HF Space deployment only) ─────────
-# HF API token — kept only for HF Space deployment and dataset push
-HF_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-# HF Model ID for AI monitoring proxy
-VITE_HF_MODEL_ID=Qwen/QwQ-32B
-# ── Model Selection ───────────────────────────────────────────────
-# LOCAL DEVELOPMENT — deepseek-chat (fast, $0.14/M input)
-HF_MODEL_ID=deepseek-chat
-# PRODUCTION — deepseek-reasoner for step-by-step solutions
-# HF_MODEL_ID=deepseek-reasoner
-# ── Quiz Battle Internal Auth ─────────────────────────────────────
-# Shared secret between Firebase Cloud Functions and FastAPI backend
-# Used to authenticate server-to-server requests for correct answers
-QUIZ_BATTLE_INTERNAL_SECRET=change_this_to_a_random_string

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

analytics.py CHANGED Viewed

@@ -232,6 +232,7 @@ class EnhancedRiskRequest(BaseModel):
     avgQuizScore: float = Field(..., ge=0, le=100)
     attendance: float = Field(..., ge=0, le=100)
     assignmentCompletion: float = Field(..., ge=0, le=100)
     xpGrowthRate: Optional[float] = 0.0
     timeOnPlatform: Optional[float] = 0.0  # hours
     # Optional trend data
@@ -809,7 +810,7 @@ def _build_risk_features(data: EnhancedRiskRequest) -> np.ndarray:
         data.avgQuizScore,
         data.attendance,
         data.assignmentCompletion,
-        0,  # streak removed
         data.xpGrowthRate or 0.0,
         data.timeOnPlatform or 0.0,
         data.engagementTrend7d or 0.0,
@@ -870,8 +871,12 @@ def _rule_based_risk(data: EnhancedRiskRequest) -> EnhancedRiskPrediction:
         score -= 10
     if (data.daysSinceLastActivity or 0) >= 7:
         score -= 10
     # Bonuses
     if (data.engagementTrend7d or 0) > 0:
         score += 5
@@ -1146,19 +1151,19 @@ async def train_risk_model(force_retrain: bool = False) -> RiskTrainResponse:
                 if not data:
                     continue
-                    features = [
-                        data.get("engagementScore", 50),
-                        data.get("avgQuizScore", 50),
-                        data.get("attendance", 80),
-                        data.get("assignmentCompletion", 60),
-                        0,  # streak removed
-                        data.get("xpGrowthRate", 0),
-                        data.get("timeOnPlatform", 0),
-                        0.0,  # engagementTrend7d
-                        0.0,  # quizScoreVariance
-                        data.get("consecutiveAbsences", 0),
-                        data.get("daysSinceLastActivity", 0),
-                    ]
                 X_data.append(features)
                 # Determine label from existing riskLevel or compute it
@@ -1255,7 +1260,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             quiz = np.random.normal(35, 12)
             attendance = np.random.normal(50, 15)
             completion = np.random.normal(35, 15)
-            # streak removed
             xp_growth = np.random.normal(-0.5, 0.3)
             time_platform = np.random.normal(2, 1)
             trend = np.random.normal(-10, 5)
@@ -1267,7 +1272,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             quiz = np.random.normal(60, 10)
             attendance = np.random.normal(72, 10)
             completion = np.random.normal(60, 12)
-            # streak removed
             xp_growth = np.random.normal(0.2, 0.3)
             time_platform = np.random.normal(5, 2)
             trend = np.random.normal(0, 8)
@@ -1279,7 +1284,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             quiz = np.random.normal(85, 8)
             attendance = np.random.normal(93, 5)
             completion = np.random.normal(88, 8)
-            # streak removed
             xp_growth = np.random.normal(1.0, 0.4)
             time_platform = np.random.normal(10, 3)
             trend = np.random.normal(5, 5)
@@ -1292,7 +1297,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             max(0, min(100, quiz)),
             max(0, min(100, attendance)),
             max(0, min(100, completion)),
-            0,  # streak removed
             xp_growth,
             max(0, time_platform),
             trend,

     avgQuizScore: float = Field(..., ge=0, le=100)
     attendance: float = Field(..., ge=0, le=100)
     assignmentCompletion: float = Field(..., ge=0, le=100)
+    streak: Optional[int] = 0
     xpGrowthRate: Optional[float] = 0.0
     timeOnPlatform: Optional[float] = 0.0  # hours
     # Optional trend data
         data.avgQuizScore,
         data.attendance,
         data.assignmentCompletion,
+        data.streak or 0,
         data.xpGrowthRate or 0.0,
         data.timeOnPlatform or 0.0,
         data.engagementTrend7d or 0.0,
         score -= 10
     if (data.daysSinceLastActivity or 0) >= 7:
         score -= 10
+    if (data.streak or 0) == 0:
+        score -= 5
     # Bonuses
+    if (data.streak or 0) >= 7:
+        score += 5
     if (data.engagementTrend7d or 0) > 0:
         score += 5
                 if not data:
                     continue
+                features = [
+                    data.get("engagementScore", 50),
+                    data.get("avgQuizScore", 50),
+                    data.get("attendance", 80),
+                    data.get("assignmentCompletion", 60),
+                    data.get("streak", 0),
+                    data.get("xpGrowthRate", 0),
+                    data.get("timeOnPlatform", 0),
+                    0.0,  # engagementTrend7d
+                    0.0,  # quizScoreVariance
+                    data.get("consecutiveAbsences", 0),
+                    data.get("daysSinceLastActivity", 0),
+                ]
                 X_data.append(features)
                 # Determine label from existing riskLevel or compute it
             quiz = np.random.normal(35, 12)
             attendance = np.random.normal(50, 15)
             completion = np.random.normal(35, 15)
+            streak = max(0, int(np.random.normal(1, 2)))
             xp_growth = np.random.normal(-0.5, 0.3)
             time_platform = np.random.normal(2, 1)
             trend = np.random.normal(-10, 5)
             quiz = np.random.normal(60, 10)
             attendance = np.random.normal(72, 10)
             completion = np.random.normal(60, 12)
+            streak = max(0, int(np.random.normal(3, 3)))
             xp_growth = np.random.normal(0.2, 0.3)
             time_platform = np.random.normal(5, 2)
             trend = np.random.normal(0, 8)
             quiz = np.random.normal(85, 8)
             attendance = np.random.normal(93, 5)
             completion = np.random.normal(88, 8)
+            streak = max(0, int(np.random.normal(10, 5)))
             xp_growth = np.random.normal(1.0, 0.4)
             time_platform = np.random.normal(10, 3)
             trend = np.random.normal(5, 5)
             max(0, min(100, quiz)),
             max(0, min(100, attendance)),
             max(0, min(100, completion)),
+            streak,
             xp_growth,
             max(0, time_platform),
             trend,

config/env.sample CHANGED Viewed

@@ -1,16 +1,10 @@
-# DeepSeek AI API (OpenAI-compatible)
-DEEPSEEK_API_KEY=your_deepseek_api_key_here
-DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
-DEEPSEEK_REASONER_MODEL=deepseek-reasoner
 # Inference provider selection
 # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
-INFERENCE_PROVIDER=deepseek
 INFERENCE_PRO_ENABLED=true
-INFERENCE_PRO_PROVIDER=deepseek
-INFERENCE_GPU_PROVIDER=deepseek
-INFERENCE_CPU_PROVIDER=deepseek
 INFERENCE_ENABLE_PROVIDER_FALLBACK=true
 INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution
 INFERENCE_PRO_ROUTE_HEADER_NAME=
@@ -30,14 +24,15 @@ INFERENCE_LOCAL_SPACE_URL=http://127.0.0.1:7860
 INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate
 INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180
-# HF_TOKEN kept for Hugging Face Space deployment and dataset push only
 # Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN
 HF_TOKEN=your_hf_token
 FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026
 # Prefer one of the options below for backend Firestore/Admin access in deployment:
 # FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...}
 # FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
-# DeepSeek timeout settings
 INFERENCE_HF_TIMEOUT_SEC=90
 INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
 INFERENCE_BACKGROUND_TIMEOUT_SEC=120
@@ -69,13 +64,13 @@ APP_BRAND_AVATAR_URL=
 # model defaults
 # Global default model for all tasks.
-INFERENCE_MODEL_ID=deepseek-chat
 INFERENCE_ENFORCE_QWEN_ONLY=true
-INFERENCE_QWEN_LOCK_MODEL=deepseek-chat
 INFERENCE_MAX_NEW_TOKENS=8192
 INFERENCE_TEMPERATURE=0.2
 INFERENCE_TOP_P=0.9
-INFERENCE_CHAT_MODEL_ID=deepseek-chat
 # Temporary chat-only override for experiments (clear to roll back instantly).
 # Example: Qwen/Qwen3-32B
 INFERENCE_CHAT_MODEL_TEMP_OVERRIDE=
@@ -95,7 +90,7 @@ CHAT_STREAM_CONTINUATION_TAIL_CHARS=900
 CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto
 # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
 HF_QUIZ_MODEL_ID=
-HF_QUIZ_JSON_REPAIR_MODEL_ID=deepseek-chat
 # retry behavior
 INFERENCE_MAX_RETRIES=3

 # Inference provider selection
 # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
+INFERENCE_PROVIDER=hf_inference
 INFERENCE_PRO_ENABLED=true
+INFERENCE_PRO_PROVIDER=hf_inference
+INFERENCE_GPU_PROVIDER=hf_inference
+INFERENCE_CPU_PROVIDER=hf_inference
 INFERENCE_ENABLE_PROVIDER_FALLBACK=true
 INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution
 INFERENCE_PRO_ROUTE_HEADER_NAME=
 INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate
 INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180
+# hf_inference provider settings
 # Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN
 HF_TOKEN=your_hf_token
 FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026
 # Prefer one of the options below for backend Firestore/Admin access in deployment:
 # FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...}
 # FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
+INFERENCE_HF_BASE_URL=https://router.huggingface.co/hf-inference/models
+INFERENCE_HF_CHAT_URL=https://router.huggingface.co/v1/chat/completions
 INFERENCE_HF_TIMEOUT_SEC=90
 INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
 INFERENCE_BACKGROUND_TIMEOUT_SEC=120
 # model defaults
 # Global default model for all tasks.
+INFERENCE_MODEL_ID=Qwen/Qwen3-32B
 INFERENCE_ENFORCE_QWEN_ONLY=true
+INFERENCE_QWEN_LOCK_MODEL=Qwen/Qwen3-32B
 INFERENCE_MAX_NEW_TOKENS=8192
 INFERENCE_TEMPERATURE=0.2
 INFERENCE_TOP_P=0.9
+INFERENCE_CHAT_MODEL_ID=Qwen/Qwen3-32B
 # Temporary chat-only override for experiments (clear to roll back instantly).
 # Example: Qwen/Qwen3-32B
 INFERENCE_CHAT_MODEL_TEMP_OVERRIDE=
 CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto
 # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
 HF_QUIZ_MODEL_ID=
+HF_QUIZ_JSON_REPAIR_MODEL_ID=Qwen/Qwen3-32B
 # retry behavior
 INFERENCE_MAX_RETRIES=3

config/models.yaml CHANGED Viewed

@@ -1,85 +1,55 @@
 models:
   primary:
-    id: deepseek-chat
-    description: Default DeepSeek chat model — all chat tasks, quizzes, lessons, reasoning
-    max_new_tokens: 800
-    temperature: 0.7
     top_p: 0.9
-  rag_primary:
-    id: deepseek-reasoner
-    description: DeepSeek reasoner — extended reasoning for complex RAG tasks
-    max_new_tokens: 1800
-    temperature: 0.2
-    top_p: 0.9
-    enable_thinking_tasks:
-      - rag_lesson
-      - verify_solution
-      - risk_narrative
-    no_thinking_tasks:
-      - chat
-      - quiz_generation
-      - learning_path
-      - daily_insight
-  embedding:
-    id: BAAI/bge-small-en-v1.5
-    description: Embedding model for RAG retrieval — curriculum vectorstore ingestion and semantic search
-    note: Not part of the generation pipeline. Read from EMBEDDING_MODEL env var only. Not swappable via admin panel.
-  model_capabilities:
-    sequential_only:
-      - deepseek-reasoner
-    supports_thinking:
-      - deepseek-reasoner
 routing:
   task_model_map:
-    chat:                  deepseek-chat
-    verify_solution:       deepseek-reasoner
-    lesson_generation:     deepseek-chat
-    quiz_generation:       deepseek-chat
-    learning_path:         deepseek-chat
-    daily_insight:         deepseek-chat
-    risk_classification:   deepseek-chat
-    risk_narrative:        deepseek-reasoner
-    rag_lesson:            deepseek-reasoner
-    rag_problem:           deepseek-chat
-    rag_analysis_context:  deepseek-chat
   task_fallback_model_map:
-    chat:
-      - deepseek-chat
     verify_solution:
-      - deepseek-chat
-    lesson_generation:
-      - deepseek-chat
-    quiz_generation:
-      - deepseek-chat
-    learning_path:
-      - deepseek-chat
-    daily_insight:
-      - deepseek-chat
-    risk_classification:
-      - deepseek-chat
-    risk_narrative:
-      - deepseek-chat
-    rag_lesson:
-      - deepseek-chat
-    rag_problem:
-      - deepseek-chat
-    rag_analysis_context:
-      - deepseek-chat
   task_provider_map:
-    chat:                  deepseek
-    verify_solution:       deepseek
-    lesson_generation:     deepseek
-    quiz_generation:       deepseek
-    learning_path:         deepseek
-    daily_insight:         deepseek
-    risk_classification:   deepseek
-    risk_narrative:        deepseek
-    rag_lesson:            deepseek
-    rag_problem:           deepseek
-    rag_analysis_context:  deepseek

 models:
   primary:
+    id: Qwen/Qwen3-32B
+    description: Global default instruction model for interactive Grade 11-12 math tutoring
+    max_new_tokens: 640
+    temperature: 0.25
     top_p: 0.9
+  backup:
+    - id: meta-llama/Meta-Llama-3-70B-Instruct
+      description: High-quality model used for harder multi-step prompts
+      max_new_tokens: 768
+      temperature: 0.3
+      top_p: 0.9
+    - id: google/gemma-2-2b-it
+      description: Secondary backup with broad instruction coverage
+      max_new_tokens: 384
+      temperature: 0.2
+      top_p: 0.9
+  experimental:
+    - id: mistralai/Mistral-7B-Instruct-v0.3
+      notes: Prompt/procedure experimentation
+    - id: meta-llama/Meta-Llama-3-8B-Instruct
+      notes: Baseline comparison against legacy deployment
 routing:
   task_model_map:
+    # Keep all task defaults aligned to Qwen3-32B.
+    # Hard prompts can still escalate via runtime policy in inference_client.
+    chat: Qwen/Qwen3-32B
+    verify_solution: Qwen/Qwen3-32B
+    lesson_generation: Qwen/Qwen3-32B
+    quiz_generation: Qwen/Qwen3-32B
+    learning_path: Qwen/Qwen3-32B
+    daily_insight: Qwen/Qwen3-32B
+    risk_classification: Qwen/Qwen3-32B
+    risk_narrative: Qwen/Qwen3-32B
   task_fallback_model_map:
+    chat: []                                       # Chat is strict-primary only (no fallback chain)
     verify_solution:
+      - meta-llama/Meta-Llama-3-70B-Instruct      # Higher-capacity fallback
+      - meta-llama/Llama-3.1-8B-Instruct          # Second fallback
   task_provider_map:
+    # All tasks use hf_inference router (Qwen/Qwen3-32B natively supported)
+    chat: hf_inference
+    verify_solution: hf_inference
+    lesson_generation: hf_inference
+    quiz_generation: hf_inference
+    learning_path: hf_inference
+    daily_insight: hf_inference
+    risk_narrative: hf_inference
+    risk_classification: hf_inference

datasets/sample_curriculum.json DELETED Viewed

@@ -1,137 +0,0 @@
-[
-  {
-    "content": "The learner demonstrates understanding of key concepts of functions. Functions can be represented as ordered pairs, tables of values, graphs, and equations. A function is a relation where each element in the domain corresponds to exactly one element in the range. Key types include linear functions (f(x)=mx+b), quadratic functions (f(x)=ax^2+bx+c), and polynomial functions of higher degrees.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Functions and Their Graphs",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 1
-  },
-  {
-    "content": "Learning Competency (M11GM-Ia-1): Represents real-life situations using functions, including piece-wise functions. Example: A taxi fare is computed as P40 for the first 500 meters plus P3.50 for every additional 300 meters or fraction thereof. This is a piecewise function where f(d)=40 for d<=500 and f(d)=40+3.5*ceil((d-500)/300) for d>500.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Functions and Their Graphs",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 1
-  },
-  {
-    "content": "Learning Competency (M11GM-Ia-2): Evaluates a function. To evaluate f(x) at x=a, substitute a for every occurrence of x in the expression and simplify. Example: Given f(x)=2x^2-3x+5, evaluate f(2): f(2)=2(4)-3(2)+5=8-6+5=7.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Functions and Their Graphs",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 2
-  },
-  {
-    "content": "Rational Functions have the form f(x)=P(x)/Q(x) where P(x) and Q(x) are polynomials and Q(x)!=0. Key features: vertical asymptotes occur where Q(x)=0 but P(x)!=0; horizontal asymptotes depend on the degrees of P and Q. The domain of f(x) excludes all x-values that make the denominator zero. Solving rational equations and inequalities requires careful handling of the denominator signs.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Rational Functions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 3
-  },
-  {
-    "content": "Learning Competency (M11GM-Ib-3): Solves problems involving rational functions, rational equations, and rational inequalities. Example: A jeepney operator's average revenue per trip is modeled by R(n)=(5000+300n)/n where n is the number of trips per day. Find how many trips are needed for average revenue to reach P450.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Rational Functions",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 3
-  },
-  {
-    "content": "Exponential Functions f(x)=a*b^x (a!=0, b>0, b!=1) model growth and decay. Key properties: domain is all real numbers; range is (0,infinity) for a>0; horizontal asymptote at y=0; y-intercept at (0,a). Solving exponential equations involves expressing both sides with the same base and equating exponents. Philippine applications include bacterial growth and radioactive decay in medical contexts.",
-    "subject": "General Mathematics",
-    "quarter": 2,
-    "content_domain": "Exponential Functions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 4
-  },
-  {
-    "content": "Compound Interest is calculated using A=P(1+r/n)^(nt) where A is the final amount, P is the principal, r is the annual interest rate (decimal), n is the number of compounding periods per year, and t is the time in years. Philippine banks offer savings and loan products with various compounding frequencies: annually (n=1), semi-annually (n=2), quarterly (n=4), monthly (n=12).",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 5
-  },
-  {
-    "content": "Learning Competency (M11GM-IIc-1): Illustrates simple and compound interests. Simple interest I=Prt where P is principal, r is rate, t is time. Compound interest uses compounding formula. Example: Juana deposits P50,000 in a bank offering 3.5% interest compounded quarterly. After 3 years, her balance will be A=50000(1+0.035/4)^(4*3)=55543.19 using the compound interest formula.",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 5
-  },
-  {
-    "content": "Annuities are sequences of equal payments made at equal time intervals. The future value of an ordinary annuity (payment at end of period) is FV=PMT*[(1+r)^n-1]/r and present value is PV=PMT*[1-(1+r)^(-n)]/r. Applications include Pag-IBIG housing loans, SSS contributions, and insurance premiums. Philippine context problems often involve 15-year and 25-year housing loans.",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 6
-  },
-  {
-    "content": "Stocks and Bonds represent two types of investments. Stocks represent ownership shares in a corporation with dividends as earnings — prices are quoted per share in the Philippine Stock Exchange (PSE). Bonds are debt instruments where the issuing entity borrows money and pays periodic interest then repays principal at maturity. Key computations: stock yield = annual dividend per share / market price; bond yield = annual interest payment / market price.",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 6
-  },
-  {
-    "content": "A Random Variable is a function that assigns a real number to each outcome in the sample space of a random experiment. A Discrete Random Variable has a countable number of possible values. The probability mass function (PMF) gives the probability P(X=x) for each value x. Key properties: sum of all P(X=x)=1 and P(X=x)>=0 for all x. Common discrete distributions include Binomial for success/failure and Poisson for rare events.",
-    "subject": "Statistics and Probability",
-    "quarter": 1,
-    "content_domain": "Random Variables and Probability Distributions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 7
-  },
-  {
-    "content": "Learning Competency (M11/12SP-IIIa-1): Illustrates a random variable (discrete and continuous). A discrete random variable takes countable values like the number of defective items in a batch of 50 bulbs. A continuous random variable takes infinite uncountable values in an interval, such as the height of Grade 11 students in centimeters. The learner distinguishes between discrete and continuous random variables for real Philippine data.",
-    "subject": "Statistics and Probability",
-    "quarter": 1,
-    "content_domain": "Random Variables and Probability Distributions",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 7
-  },
-  {
-    "content": "The Normal Distribution (Gaussian) is a continuous probability distribution with a bell-shaped curve symmetric about the mean mu. Standard normal distribution has mu=0 and sigma=1; converting to standard normal z=(x-mu)/sigma allows probability calculation using z-tables. Properties: 68% of data within 1 sigma of mu, 95% within 2 sigma, 99.7% within 3 sigma. Philippine applications include standardized test scores (NAT, college entrance exams) and quality control in manufacturing.",
-    "subject": "Statistics and Probability",
-    "quarter": 1,
-    "content_domain": "Random Variables and Probability Distributions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 8
-  },
-  {
-    "content": "Conic Sections are curves formed by the intersection of a plane and a double-napped cone. The four types are: Circle (all points equidistant from a center), Parabola (all points equidistant from a focus and directrix), Ellipse (sum of distances to two foci is constant), and Hyperbola (absolute difference of distances to two foci is constant). Standard forms: Circle (x-h)^2+(y-k)^2=r^2; Parabola (x-h)^2=4p(y-k) or (y-k)^2=4p(x-h).",
-    "subject": "Pre-Calculus",
-    "quarter": 1,
-    "content_domain": "Analytic Geometry",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 9
-  },
-  {
-    "content": "Learning Competency (STEM_PC11AG-Ia-1): Illustrates the different types of conic sections: circle, parabola, ellipse, and hyperbola. The learner identifies conic sections from their standard equations and determines their key properties including center, radius (for circles), vertex, focus, directrix (for parabolas), and asymptotes (for hyperbolas). Real-world applications include satellite dishes, telescope mirrors, and bridge arch designs.",
-    "subject": "Pre-Calculus",
-    "quarter": 1,
-    "content_domain": "Analytic Geometry",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 9
-  }
-]

main.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

middleware/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-# Middleware package
-from .rate_limiter import rate_limiter, setup_rate_limiting, RateLimitExceeded
-__all__ = ["rate_limiter", "setup_rate_limiting", "RateLimitExceeded"]

middleware/rate_limiter.py DELETED Viewed

@@ -1,184 +0,0 @@
-"""
-Rate limiting middleware using slowapi.
-"""
-import os
-import logging
-from fastapi import Request
-from slowapi import Limiter
-from slowapi.errors import RateLimitExceeded as SlowAPIRateLimitExceeded
-logger = logging.getLogger("mathpulse.ratelimit")
-# Environment-based configuration with defaults
-RATE_LIMIT_AI_RPM = int(os.getenv("RATE_LIMIT_AI_RPM", "20"))
-RATE_LIMIT_QUIZ_GENERATE_RPM = int(os.getenv("RATE_LIMIT_QUIZ_GENERATE_RPM", "10"))
-RATE_LIMIT_QUIZ_SUBMIT_RPM = int(os.getenv("RATE_LIMIT_QUIZ_SUBMIT_RPM", "30"))
-RATE_LIMIT_AUTH_RPM = int(os.getenv("RATE_LIMIT_AUTH_RPM", "5"))
-RATE_LIMIT_LEADERBOARD_RPM = int(os.getenv("RATE_LIMIT_LEADERBOARD_RPM", "60"))
-RATE_LIMIT_DEFAULT_RPM = int(os.getenv("RATE_LIMIT_DEFAULT_RPM", "100"))
-RATE_LIMIT_ADMIN_MULTIPLIER = int(os.getenv("RATE_LIMIT_ADMIN_MULTIPLIER", "10"))
-RATE_LIMIT_TEACHER_MULTIPLIER = int(os.getenv("RATE_LIMIT_TEACHER_MULTIPLIER", "3"))
-# Role multipliers for rate limit adjustment
-ROLE_MULTIPLIERS = {
-    "admin": RATE_LIMIT_ADMIN_MULTIPLIER,
-    "teacher": RATE_LIMIT_TEACHER_MULTIPLIER,
-    "student": 1,
-}
-def _get_user_identifier(request: Request) -> str:
-    """
-    Extract user identifier for rate limiting.
-    Uses Firebase UID from request.state.user if authenticated, otherwise falls back to IP.
-    """
-    user = getattr(request.state, "user", None)
-    if user and hasattr(user, "uid") and user.uid:
-        return f"uid:{user.uid}"
-    if request.client:
-        return f"ip:{request.client.host}"
-    return "ip:unknown"
-def _get_user_role(request: Request) -> str:
-    """Get user role from request state for multiplier calculation."""
-    user = getattr(request.state, "user", None)
-    if user and hasattr(user, "role") and user.role:
-        return user.role
-    return "student"
-def _get_role_multiplier(request: Request) -> int:
-    """Get rate limit multiplier based on user role."""
-    role = _get_user_role(request)
-    return ROLE_MULTIPLIERS.get(role, 1)
-class MathPulseLimiter:
-    """
-    Rate limiter with role-aware multipliers for MathPulse AI.
-    """
-    def __init__(self) -> None:
-        self._limiter = Limiter(
-            key_func=_get_user_identifier,
-            storage_uri="memory://",
-            default_limits=[f"{RATE_LIMIT_DEFAULT_RPM}/minute"],
-        )
-    @property
-    def limiter(self) -> Limiter:
-        return self._limiter
-    def _get_adjusted_limit(self, base_rpm: int, request: Request) -> int:
-        """Apply role multiplier to base rate limit."""
-        multiplier = _get_role_multiplier(request)
-        return base_rpm * multiplier
-    def ai_limit(self, request: Request) -> str:
-        """Rate limit for AI endpoints with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_AI_RPM, request)
-        return f"{limit}/minute"
-    def quiz_generate_limit(self, request: Request) -> str:
-        """Rate limit for quiz generation with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_QUIZ_GENERATE_RPM, request)
-        return f"{limit}/minute"
-    def quiz_submit_limit(self, request: Request) -> str:
-        """Rate limit for quiz submission with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_QUIZ_SUBMIT_RPM, request)
-        return f"{limit}/minute"
-    def auth_limit(self, request: Request) -> str:
-        """Rate limit for auth endpoints with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_AUTH_RPM, request)
-        return f"{limit}/minute"
-    def leaderboard_limit(self, request: Request) -> str:
-        """Rate limit for leaderboard with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_LEADERBOARD_RPM, request)
-        return f"{limit}/minute"
-    def default_limit(self, request: Request) -> str:
-        """Default rate limit with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_DEFAULT_RPM, request)
-        return f"{limit}/minute"
-# Global rate limiter instance
-rate_limiter = MathPulseLimiter()
-def setup_rate_limiting(app) -> None:
-    """
-    Set up rate limiting for the FastAPI application.
-    """
-    # Add limiter to app state
-    app.state.limiter = rate_limiter.limiter
-    # Add slowapi exception handler
-    app.add_exception_handler(
-        SlowAPIRateLimitExceeded,
-        lambda request, exc: _rate_limit_exceeded_handler(request, exc)
-    )
-    logger.info(
-        f"Rate limiting configured: AI={RATE_LIMIT_AI_RPM}/min, "
-        f"QuizGen={RATE_LIMIT_QUIZ_GENERATE_RPM}/min, "
-        f"Auth={RATE_LIMIT_AUTH_RPM}/min, "
-        f"Admin={RATE_LIMIT_ADMIN_MULTIPLIER}x, Teacher={RATE_LIMIT_TEACHER_MULTIPLIER}x"
-    )
-def _rate_limit_exceeded_handler(request: Request, exc: SlowAPIRateLimitExceeded):
-    """Handle rate limit exceeded errors with proper JSON response."""
-    from fastapi.responses import JSONResponse
-    retry_after = getattr(exc, "retry_after", 60)
-    return JSONResponse(
-        status_code=429,
-        content={
-            "error": "rate_limit_exceeded",
-            "message": "Too many requests. Please try again later.",
-            "retry_after": retry_after,
-        },
-        headers={
-            "Retry-After": str(retry_after),
-            "Content-Type": "application/json",
-        }
-    )
-# Decorator helpers
-def ai_rate_limit():
-    """Decorator for AI endpoint rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.ai_limit)
-def quiz_generate_rate_limit():
-    """Decorator for quiz generation rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.quiz_generate_limit)
-def quiz_submit_rate_limit():
-    """Decorator for quiz submit rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.quiz_submit_limit)
-def auth_rate_limit():
-    """Decorator for auth endpoint rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.auth_limit)
-def leaderboard_rate_limit():
-    """Decorator for leaderboard rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.leaderboard_limit)
-def default_rate_limit():
-    """Decorator for default rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.default_limit)

pre_deploy_check.py CHANGED Viewed

@@ -16,16 +16,8 @@ Exit codes:
 import sys
 import os
-# Add repo root to path (for services/ delegation) AND backend to path
-_repo_root = os.path.dirname(os.path.abspath(__file__))
-_parent = os.path.dirname(_repo_root)
-_backend = _repo_root
-# Add in order: parent first (so services/ can delegate), then backend (for when services/__init__.py tries to import)
-if _parent not in sys.path:
-    sys.path.insert(0, _parent)
-if _backend not in sys.path:
-    sys.path.insert(0, _backend)
 def main() -> int:
     """Run pre-deployment checks."""

 import sys
 import os
+# Add backend to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 def main() -> int:
     """Run pre-deployment checks."""

rag/__init__.py CHANGED Viewed

@@ -5,19 +5,11 @@ from .curriculum_rag import (
     build_lesson_prompt,
     build_problem_generation_prompt,
     build_analysis_curriculum_context,
-    build_lesson_query,
-    format_retrieved_chunks,
-    summarize_retrieval_confidence,
 )
-from .vectorstore_loader import reset_vectorstore_singleton
 __all__ = [
     "retrieve_curriculum_context",
     "build_lesson_prompt",
     "build_problem_generation_prompt",
     "build_analysis_curriculum_context",
-    "build_lesson_query",
-    "format_retrieved_chunks",
-    "summarize_retrieval_confidence",
-    "reset_vectorstore_singleton",
-]

     build_lesson_prompt,
     build_problem_generation_prompt,
     build_analysis_curriculum_context,
 )
 __all__ = [
     "retrieve_curriculum_context",
     "build_lesson_prompt",
     "build_problem_generation_prompt",
     "build_analysis_curriculum_context",
+]

rag/curriculum_rag.py CHANGED Viewed

@@ -1,10 +1,8 @@
-"""
-Updated curriculum RAG with exact match retrieval and 7-section notebook output.
-"""
 from __future__ import annotations
-from typing import Dict, List, Optional, Tuple
 def _to_where(
@@ -12,10 +10,6 @@ def _to_where(
     quarter: Optional[int] = None,
     content_domain: Optional[str] = None,
     chunk_type: Optional[str] = None,
-    module_id: Optional[str] = None,
-    lesson_id: Optional[str] = None,
-    competency_code: Optional[str] = None,
-    storage_path: Optional[str] = None,
 ) -> Optional[Dict[str, object]]:
     clauses = []
     if subject:
@@ -26,14 +20,6 @@ def _to_where(
         clauses.append({"content_domain": {"$eq": content_domain}})
     if chunk_type:
         clauses.append({"chunk_type": {"$eq": chunk_type}})
-    if module_id:
-        clauses.append({"module_id": {"$eq": module_id}})
-    if lesson_id:
-        clauses.append({"lesson_id": {"$eq": lesson_id}})
-    if competency_code:
-        clauses.append({"competency_code": {"$eq": competency_code}})
-    if storage_path:
-        clauses.append({"storage_path": {"$eq": storage_path}})
     if not clauses:
         return None
     if len(clauses) == 1:
@@ -42,6 +28,7 @@ def _to_where(
 def _distance_to_score(distance: float) -> float:
     return round(1.0 / (1.0 + max(distance, 0.0)), 4)
@@ -51,23 +38,12 @@ def retrieve_curriculum_context(
     quarter: int | None = None,
     content_domain: str | None = None,
     chunk_type: str | None = None,
-    module_id: str | None = None,
-    lesson_id: str | None = None,
-    competency_code: str | None = None,
-    storage_path: str | None = None,
-    top_k: int = 8,
 ) -> list[dict]:
-    from rag.vectorstore_loader import get_vectorstore_components
     _, collection, embedder = get_vectorstore_components()
-    where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
-    prefixed_query = f"Represent this sentence for searching relevant passages: {query}"
-    query_embedding = embedder.encode(
-        prefixed_query,
-        normalize_embeddings=True,
-    ).tolist()
     result = collection.query(
         query_embeddings=[query_embedding],
         n_results=max(1, top_k),
@@ -83,39 +59,20 @@ def retrieve_curriculum_context(
     for idx, content in enumerate(documents):
         md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
         distance = float(distances[idx]) if idx < len(distances) else 1.0
-        rows.append({
-            "content": str(content or ""),
-            "subject": str(md.get("subject") or "unknown"),
-            "quarter": int(md.get("quarter") or 0),
-            "content_domain": str(md.get("content_domain") or "general"),
-            "chunk_type": str(md.get("chunk_type") or "concept"),
-            "source_file": str(md.get("source_file") or ""),
-            "storage_path": str(md.get("storage_path") or ""),
-            "module_id": str(md.get("module_id") or ""),
-            "lesson_id": str(md.get("lesson_id") or ""),
-            "competency_code": str(md.get("competency_code") or ""),
-            "page": int(md.get("page") or 0),
-            "score": _distance_to_score(distance),
-        })
-    return rows
-def build_exact_lesson_query(
-    topic: str,
-    subject: str,
-    quarter: int,
-    lesson_title: str | None = None,
-    competency: str | None = None,
-    module_unit: str | None = None,
-    learner_level: str | None = None,
-    competency_code: str | None = None,
-) -> str:
-    parts = [topic, subject, f"Quarter {quarter}"]
-    for value in (lesson_title, competency, module_unit, learner_level, competency_code):
-        clean = str(value or "").strip()
-        if clean:
-            parts.append(clean)
-    return " | ".join(parts)
 def build_lesson_query(
@@ -136,120 +93,30 @@ def build_lesson_query(
     return " | ".join(parts)
-def retrieve_lesson_pdf_context(
-    topic: str,
-    subject: str,
-    quarter: int,
-    lesson_title: str | None = None,
-    competency: str | None = None,
-    module_id: str | None = None,
-    lesson_id: str | None = None,
-    competency_code: str | None = None,
-    storage_path: str | None = None,
-    top_k: int = 8,
-) -> Tuple[list[dict], str]:
-    """Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query.
-    NOTE: Curriculum PDF chunks are often tagged with quarter=1 even when they cover all quarters.
-    We first try the exact quarter, then fallback to quarter=1, then no quarter filter.
-    """
-    # Try 1: Exact match with storage_path + quarter
-    if storage_path:
-        exact_chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject,
-            quarter=quarter,
-            storage_path=storage_path,
-            top_k=top_k,
-        )
-        if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
-            return exact_chunks, "exact"
-    # Try 2: General query with exact quarter
-    general_chunks = retrieve_curriculum_context(
-        query=topic,
-        subject=subject,
-        quarter=quarter,
-        top_k=top_k,
-    )
-    # Try 3: Fallback to quarter=1 (most curriculum PDFs are tagged Q1)
-    if not general_chunks and quarter != 1:
-        general_chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject,
-            quarter=1,
-            top_k=top_k,
-        )
-    # Try 4: Final fallback - no quarter filter at all
-    if not general_chunks:
-        general_chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject,
-            top_k=top_k,
-        )
-    if storage_path and exact_chunks:
-        all_chunks = exact_chunks + general_chunks
-        seen = set()
-        deduped = []
-        for c in all_chunks:
-            key = f"{c.get('source_file')}:{c.get('page')}:{c.get('content', '')[:60]}"
-            if key not in seen:
-                seen.add(key)
-                deduped.append(c)
-        deduped.sort(key=lambda x: x.get("score", 0), reverse=True)
-        return deduped[:top_k], "hybrid"
-    return general_chunks, "general"
 def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
-    refs = []
     for i, chunk in enumerate(curriculum_chunks, start=1):
-        refs.append(
             f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
             f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
             f"   Excerpt: {chunk.get('content', '')}"
         )
-    return "\n".join(refs) if refs else "No curriculum context retrieved."
-def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
     if not curriculum_chunks:
-        return {"confidence": 0.0, "band": "low", "chunkCount": 0}
-    top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
     score = sum(top_scores) / max(1, len(top_scores))
-    band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
-    return {"confidence": round(score, 3), "band": band, "chunkCount": len(curriculum_chunks)}
-def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
-    """Organize retrieved chunks into lesson section categories."""
-    sections: Dict[str, List[dict]] = {
-        "introduction": [],
-        "key_concepts": [],
-        "worked_examples": [],
-        "important_notes": [],
-        "practice": [],
-        "summary": [],
-        "assessment": [],
-        "general": [],
-    }
-    domain_priority = {
-        "introduction": 1, "key_concepts": 2, "worked_examples": 3,
-        "important_notes": 4, "practice": 5, "summary": 6,
-        "assessment": 7, "general": 8,
-    }
-    for chunk in chunks:
-        domain = chunk.get("content_domain", "general")
-        if domain in sections:
-            sections[domain].append(chunk)
-        else:
-            sections["general"].append(chunk)
-    return sections
 def build_lesson_prompt(
@@ -262,57 +129,39 @@ def build_lesson_prompt(
     learner_level: Optional[str],
     module_unit: Optional[str],
     curriculum_chunks: list[dict],
-    competency_code: Optional[str] = None,
 ) -> str:
     refs_text = format_retrieved_chunks(curriculum_chunks)
-    organized = organize_chunks_by_section(curriculum_chunks)
     return (
-        "You are a DepEd-aligned Grade 11-12 mathematics instructional designer.\n"
-        "Generate a lesson in JSON format. Use ONLY the retrieved curriculum evidence below.\n"
-        "Do NOT invent content. Do NOT add generic motivational text. All content must be grounded in the retrieved excerpts.\n\n"
         f"Lesson title: {lesson_title}\n"
-        f"Competency code: {competency_code or 'n/a'}\n"
         f"Curriculum competency: {competency}\n"
         f"Grade level: {grade_level}\n"
         f"Subject: {subject}\n"
         f"Quarter: Q{quarter}\n"
-        f"Learner level: {learner_level or 'Grade 11-12'}\n"
         f"Module/unit: {module_unit or 'n/a'}\n\n"
         "[CURRICULUM CONTEXT]\n"
         f"{refs_text}\n\n"
-        "Return ONLY valid JSON with this exact structure. All 7 sections are required:\n"
-        "{\n"
-        '  "sections": [\n'
-        '    {"type": "introduction",    "title": "Introduction",       "content": "..."},\n'
-        '    {"type": "key_concepts",    "title": "Key Concepts",      "content": "...", "callouts": [{"type":"important|ti..."}]\n},'
-        '    {"type": "video",           "title": "Video Lesson",      "content": "...", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},\n'
-        '    {"type": "worked_examples",  "title": "Worked Examples",    "examples": [{"problem":"...","steps":["Step 1: ...","Step 2: ..."],"answer":"..."}]},\n'
-        '    {"type": "important_notes",  "title": "Important Notes",   "bulletPoints": ["...","..."]},\n'
-        '    {"type": "try_it_yourself", "title": "Try It Yourself",   "practiceProblems": [{"question":"...","solution":"..."}]},\n'
-        '    {"type": "summary",         "title": "Summary",           "content": "..."}\n'
-        "  ],\n"
-        '  "needsReview": false\n'
-        "}\n\n"
         "Rules:\n"
-        "- content in introduction, key_concepts, important_notes, summary: use paragraph/bullet text grounded in retrieved chunks\n"
-        "- examples must reflect actual content from the retrieved curriculum (real formulas, real contexts)\n"
-        "- practiceProblems should be derivable from worked examples\n"
-        "- callouts: type is 'important', 'tip', or 'warning'\n"
-        "- video section: content is a brief sentence, leave videoId empty (will be filled by backend)\n"
-        "- Do not use placeholder text like 'placeholder' or 'example text'\n"
-        "- Do not fabricate worked examples - use actual curriculum content\n"
     )
 def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
-    refs = []
     for i, chunk in enumerate(curriculum_chunks, start=1):
-        refs.append(
             f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
             f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
         )
-    refs_text = "\n".join(refs) if refs else "No curriculum context retrieved."
     return (
         "Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
@@ -335,7 +184,7 @@ def build_analysis_curriculum_context(weak_topics: list[str], subject: str) -> l
             top_k=2,
         )
         for row in rows:
-            key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content', '')[:80]}"
             if key not in dedup:
                 dedup[key] = row
-    return list(dedup.values())

 from __future__ import annotations
+from typing import Dict, List, Optional
+from .vectorstore_loader import get_vectorstore_components
 def _to_where(
     quarter: Optional[int] = None,
     content_domain: Optional[str] = None,
     chunk_type: Optional[str] = None,
 ) -> Optional[Dict[str, object]]:
     clauses = []
     if subject:
         clauses.append({"content_domain": {"$eq": content_domain}})
     if chunk_type:
         clauses.append({"chunk_type": {"$eq": chunk_type}})
     if not clauses:
         return None
     if len(clauses) == 1:
 def _distance_to_score(distance: float) -> float:
+    # Chroma returns smaller distance for better matches. Map to (0,1].
     return round(1.0 / (1.0 + max(distance, 0.0)), 4)
     quarter: int | None = None,
     content_domain: str | None = None,
     chunk_type: str | None = None,
+    top_k: int = 5,
 ) -> list[dict]:
     _, collection, embedder = get_vectorstore_components()
+    where = _to_where(subject, quarter, content_domain, chunk_type)
+    query_embedding = embedder.encode(query).tolist()
     result = collection.query(
         query_embeddings=[query_embedding],
         n_results=max(1, top_k),
     for idx, content in enumerate(documents):
         md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
         distance = float(distances[idx]) if idx < len(distances) else 1.0
+        rows.append(
+            {
+                "content": str(content or ""),
+                "subject": str(md.get("subject") or "unknown"),
+                "quarter": int(md.get("quarter") or 0),
+                "content_domain": str(md.get("content_domain") or "unknown"),
+                "chunk_type": str(md.get("chunk_type") or "unknown"),
+                "source_file": str(md.get("source_file") or ""),
+                "page": int(md.get("page") or 0),
+                "score": _distance_to_score(distance),
+            }
+        )
+    return rows
 def build_lesson_query(
     return " | ".join(parts)
 def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
+    references = []
     for i, chunk in enumerate(curriculum_chunks, start=1):
+        references.append(
             f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
             f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
             f"   Excerpt: {chunk.get('content', '')}"
         )
+    return "\n".join(references) if references else "No curriculum context retrieved."
+def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, float | str]:
     if not curriculum_chunks:
+        return {"confidence": 0.0, "band": "low"}
+    top_scores = [float(chunk.get("score") or 0.0) for chunk in curriculum_chunks[:5]]
     score = sum(top_scores) / max(1, len(top_scores))
+    if score >= 0.72:
+        band = "high"
+    elif score >= 0.5:
+        band = "medium"
+    else:
+        band = "low"
+    return {"confidence": round(score, 3), "band": band}
 def build_lesson_prompt(
     learner_level: Optional[str],
     module_unit: Optional[str],
     curriculum_chunks: list[dict],
 ) -> str:
     refs_text = format_retrieved_chunks(curriculum_chunks)
     return (
+        "You are a Grade 11-12 DepEd SHS math instructional designer.\n"
+        "Generate JSON only. Use ONLY the retrieved curriculum evidence below. Do not invent competencies or content beyond the retrieved scope.\n\n"
         f"Lesson title: {lesson_title}\n"
         f"Curriculum competency: {competency}\n"
         f"Grade level: {grade_level}\n"
         f"Subject: {subject}\n"
         f"Quarter: Q{quarter}\n"
+        f"Learner level: {learner_level or 'mixed'}\n"
         f"Module/unit: {module_unit or 'n/a'}\n\n"
         "[CURRICULUM CONTEXT]\n"
         f"{refs_text}\n\n"
+        "Return JSON with these keys only:\n"
+        "lessonTitle, curriculumCompetency, lessonObjective, realWorldHook, explanation, workedExample, guidedPractice, independentPractice, quickAssessment, reflectionPrompt, sourceCitations, needsReview, reviewReason\n\n"
         "Rules:\n"
+        "- Keep the lesson age-appropriate for SHS learners.\n"
+        "- Use real Philippine contexts where possible, such as payroll, VAT, discounts, loans, logistics, travel, or school data.\n"
+        "- If evidence is thin, set needsReview=true and explain why in reviewReason.\n"
+        "- Do not mention unsupported curriculum facts.\n"
+        "- sourceCitations should be an array of short citations referencing the retrieved excerpts."
     )
 def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
+    references = []
     for i, chunk in enumerate(curriculum_chunks, start=1):
+        references.append(
             f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
             f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
         )
+    refs_text = "\n".join(references) if references else "No curriculum context retrieved."
     return (
         "Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
             top_k=2,
         )
         for row in rows:
+            key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content')[:80]}"
             if key not in dedup:
                 dedup[key] = row
+    return list(dedup.values())

rag/firebase_storage_loader.py DELETED Viewed

@@ -1,175 +0,0 @@
-"""
-Firebase Storage PDF loader for curriculum ingestion.
-Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
-"""
-from __future__ import annotations
-import logging
-import os
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-logger = logging.getLogger("mathpulse.fb_storage_loader")
-_FIREBASE_INITIALIZED = False
-def _init_firebase_storage() -> Tuple[any, any]:
-    global _FIREBASE_INITIALIZED
-    if _FIREBASE_INITIALIZED:
-        try:
-            from firebase_admin import storage as fb_storage
-            bucket = fb_storage.bucket()
-            return fb_storage, bucket
-        except Exception as e:
-            logger.warning("Firebase storage unavailable: %s", e)
-            _FIREBASE_INITIALIZED = False
-            return None, None
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage
-    except ImportError:
-        logger.warning("firebase_admin not installed")
-        return None, None
-    if firebase_admin._apps:
-        _FIREBASE_INITIALIZED = True
-        try:
-            bucket = storage.bucket()
-            return storage, bucket
-        except Exception as e:
-            logger.warning("Firebase storage bucket unavailable: %s", e)
-            return None, None
-    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-    try:
-        if sa_json:
-            import json as _json
-            creds = credentials.Certificate(_json.loads(sa_json))
-        elif sa_file and Path(sa_file).exists():
-            creds = credentials.Certificate(sa_file)
-        else:
-            creds = credentials.ApplicationDefault()
-        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-        _FIREBASE_INITIALIZED = True
-        bucket = storage.bucket()
-        return storage, bucket
-    except Exception as e:
-        logger.warning("Firebase init failed: %s", e)
-        return None, None
-def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
-    """Download a PDF from Firebase Storage and return its bytes."""
-    _, bucket = _init_firebase_storage()
-    if bucket is None:
-        logger.warning("Firebase Storage not available, skipping download")
-        return None
-    try:
-        blob = bucket.blob(storage_path)
-        if not blob.exists():
-            logger.warning("Blob does not exist: %s", storage_path)
-            return None
-        bytes_data = blob.download_as_bytes()
-        logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))
-        if dest_path:
-            Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
-            with open(dest_path, "wb") as f:
-                f.write(bytes_data)
-            logger.info("Saved to %s", dest_path)
-        return bytes_data
-    except Exception as e:
-        logger.error("Failed to download %s: %s", storage_path, e)
-        return None
-def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
-    """List all blobs under a prefix in Firebase Storage."""
-    _, bucket = _init_firebase_storage()
-    if bucket is None:
-        return []
-    blobs = bucket.list_blobs(prefix=prefix)
-    result = []
-    for blob in blobs:
-        if blob.name.endswith(".pdf"):
-            result.append({
-                "name": blob.name,
-                "size": blob.size,
-                "updated": str(blob.updated) if blob.updated else None,
-                "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
-            })
-    return result
-# NOTE: Curriculum guide PDFs (shaping papers) are stored in Firebase Storage
-# for system reference but are NOT included in RAG ingestion because they
-# contain only learning objectives and course descriptions — insufficient
-# content for lesson generation (typically <10 chunks each).
-#
-# Only SDO teaching modules (full lesson content with examples and problems)
-# are included in the RAG pipeline.
-PDF_METADATA: Dict[str, dict] = {
-    # General Mathematics Q1 — SDO Navotas teaching module (100 pages, ~117k chars)
-    "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 1,
-        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
-    },
-    # General Mathematics Q2 — Interest & Annuities modules (~27-35 pages each)
-    "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf",
-    },
-    "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf",
-    },
-    "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf",
-    },
-    "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf",
-    },
-    # Statistics and Probability — Full textbook (331 pages, ~607k chars)
-    "curriculum/stat_prob/Full.pdf": {
-        "subject": "Statistics and Probability",
-        "subjectId": "stats-prob",
-        "type": "sdo_module",
-        "content_domain": "statistics",
-        "quarter": 1,
-        "storage_path": "curriculum/stat_prob/Full.pdf",
-    },
-}

rag/pdf_ingestion.py DELETED Viewed

@@ -1,368 +0,0 @@
-"""
-PDF Ingestion Module for Quiz Battle RAG Question Bank.
-Ingests PDFs from Firebase Storage, extracts text, chunks content,
-generates embeddings, calls DeepSeek to produce base questions,
-and stores results in Firestore.
-"""
-import asyncio
-import hashlib
-import io
-import json
-import logging
-import os
-import random
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from typing import Optional
-from google.cloud.firestore import Client
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from sentence_transformers import SentenceTransformer
-import pypdf
-from rag.firebase_storage_loader import _init_firebase_storage
-from services.ai_client import get_deepseek_client, CHAT_MODEL
-logger = logging.getLogger(__name__)
-EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
-DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
-@dataclass
-class IngestionResult:
-    """Result of a PDF ingestion operation."""
-    filename: str
-    processed: bool
-    question_count: int
-    grade_level: int
-    topic: str
-    storage_path: str
-    timestamp: datetime
-def _extract_filename(storage_path: str) -> str:
-    """Extract filename from a Firebase Storage path."""
-    return storage_path.split("/")[-1]
-def _generate_chunk_id(source_chunk_id: str, question_text: str) -> str:
-    """Generate a unique document ID for a question."""
-    return hashlib.md5(f"{source_chunk_id}:{question_text}".encode()).hexdigest()
-def _strip_json_fences(text: str) -> str:
-    """Strip markdown JSON fences from text."""
-    text = text.strip()
-    if text.startswith("```json"):
-        text = text[7:]
-    if text.startswith("```"):
-        text = text[3:]
-    if text.endswith("```"):
-        text = text[:-3]
-    return text.strip()
-async def _generate_questions_for_chunk(
-    chunk_text: str,
-    chunk_id: str,
-    topic: str,
-    grade_level: int,
-    deepseek_client,
-) -> list[dict]:
-    """Call DeepSeek to generate MCQs for a text chunk."""
-    system_prompt = (
-        "You are a DepEd-aligned math question generator for Filipino students. "
-        "Given a curriculum excerpt, generate 5 multiple-choice questions. "
-        "Return ONLY a JSON array. No markdown, no explanation."
-    )
-    user_prompt = f"""Given this curriculum excerpt:
-<chunk>
-{chunk_text}
-</chunk>
-Generate 5 multiple-choice questions. For each question output JSON:
-{{
-  "question": "...",
-  "choices": ["A) ...", "B) ...", "C) ...", "D) ..."],
-  "correct_answer": "A",
-  "explanation": "...",
-  "topic": "{topic}",
-  "difficulty": "easy|medium|hard",
-  "grade_level": {grade_level},
-  "source_chunk_id": "{chunk_id}"
-}}
-Return a JSON array only, no extra text."""
-    try:
-        response = deepseek_client.chat.completions.create(
-            model=CHAT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ],
-            temperature=0.7,
-        )
-        raw_response = response.choices[0].message.content
-        clean_response = _strip_json_fences(raw_response)
-        questions = json.loads(clean_response)
-        return questions if isinstance(questions, list) else []
-    except json.JSONDecodeError as e:
-        logger.error(f"Failed to parse DeepSeek response as JSON for chunk {chunk_id}: {e}")
-        return []
-    except Exception as e:
-        logger.error(f"Error calling DeepSeek for chunk {chunk_id}: {e}")
-        return []
-def _chunk_text(text: str) -> list[str]:
-    """Split text into chunks using RecursiveCharacterTextSplitter."""
-    splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=50,
-        length_function=len,
-        separators=["\n\n", "\n", " ", ""],
-    )
-    return splitter.split_text(text)
-def _extract_pdf_text(pdf_bytes: bytes) -> str:
-    """Extract text from PDF bytes using pypdf."""
-    reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
-    text_parts = []
-    for page in reader.pages:
-        text_parts.append(page.extract_text())
-    return "\n".join(text_parts)
-async def _save_questions_batch(
-    firestore_client: Client,
-    questions: list[dict],
-    grade_level: int,
-    topic: str,
-) -> int:
-    """Save questions to Firestore using batch writes. Returns count saved."""
-    batch = firestore_client.batch()
-    question_count = 0
-    for question in questions:
-        doc_id = question.get("id") or _generate_chunk_id(
-            question.get("source_chunk_id", ""),
-            question.get("question", ""),
-        )
-        doc_ref = firestore_client.collection("question_bank").document(
-            str(grade_level)
-        ).collection(topic).document("questions").collection("questions").document(doc_id)
-        doc_data = {
-            "question": question.get("question", ""),
-            "choices": question.get("choices", []),
-            "correct_answer": question.get("correct_answer", ""),
-            "explanation": question.get("explanation", ""),
-            "topic": question.get("topic", topic),
-            "difficulty": question.get("difficulty", "medium"),
-            "grade_level": question.get("grade_level", grade_level),
-            "source_chunk_id": question.get("source_chunk_id", ""),
-            "random_seed": random.random(),
-            "created_at": datetime.now(timezone.utc),
-        }
-        batch.set(doc_ref, doc_data)
-        question_count += 1
-        if question_count % 500 == 0:
-            await batch.commit()
-            batch = firestore_client.batch()
-    await batch.commit()
-    return question_count
-async def _save_embeddings_batch(
-    firestore_client: Client,
-    chunks: list[dict],
-    filename: str,
-) -> int:
-    """Save chunk embeddings to Firestore. Returns count saved."""
-    batch = firestore_client.batch()
-    count = 0
-    for chunk in chunks:
-        chunk_id = chunk["id"]
-        doc_ref = firestore_client.collection("question_bank_embeddings").document(chunk_id)
-        doc_data = {
-            "chunk_id": chunk_id,
-            "text": chunk["text"],
-            "embedding": chunk["embedding"],
-            "filename": filename,
-            "created_at": datetime.now(timezone.utc),
-        }
-        batch.set(doc_ref, doc_data)
-        count += 1
-        if count % 500 == 0:
-            await batch.commit()
-            batch = firestore_client.batch()
-    await batch.commit()
-    return count
-async def _save_processing_manifest(
-    firestore_client: Client,
-    filename: str,
-    question_count: int,
-    chunk_count: int,
-    grade_level: int,
-    topic: str,
-    storage_path: str,
-) -> None:
-    """Save processing manifest to Firestore."""
-    doc_ref = firestore_client.collection("pdf_processing_status").document(filename)
-    doc_data = {
-        "filename": filename,
-        "question_count": question_count,
-        "chunk_count": chunk_count,
-        "grade_level": grade_level,
-        "topic": topic,
-        "storage_path": storage_path,
-        "processed_at": datetime.now(timezone.utc),
-        "status": "completed",
-    }
-    await doc_ref.set(doc_data)
-async def ingest_pdf(
-    storage_path: str,
-    grade_level: int,
-    topic: str,
-    force_reingest: bool = False,
-) -> IngestionResult:
-    """
-    Ingest a PDF from Firebase Storage, generate questions, and store in Firestore.
-    Args:
-        storage_path: Path to PDF in Firebase Storage (e.g., "rag-pdfs/filename.pdf")
-        grade_level: Grade level (11 or 12)
-        topic: Topic identifier for the questions
-        force_reingest: If True, reprocess even if already processed
-    Returns:
-        IngestionResult with processing summary
-    """
-    filename = _extract_filename(storage_path)
-    project_id = os.getenv("FIREBASE_AUTH_PROJECT_ID", DEFAULT_FIREBASE_PROJECT)
-    firestore_client = Client(project=project_id)
-    # Step 1: Check if already processed
-    if not force_reingest:
-        status_ref = firestore_client.collection("pdf_processing_status").document(filename)
-        status_doc = await status_ref.get()
-        if status_doc.exists:
-            logger.info(f"PDF {filename} already processed, skipping (use force_reingest=True to override)")
-            data = status_doc.to_dict() or {}
-            return IngestionResult(
-                filename=filename,
-                processed=True,
-                question_count=data.get("question_count", 0),
-                grade_level=data.get("grade_level", grade_level),
-                topic=data.get("topic", topic),
-                storage_path=data.get("storage_path", storage_path),
-                timestamp=data.get("timestamp", datetime.now(timezone.utc)),
-            )
-    # Step 2: Download PDF from Firebase Storage
-    try:
-        _, bucket = _init_firebase_storage()
-        blob = bucket.blob(storage_path)
-        pdf_bytes = blob.download_as_bytes()
-    except Exception as e:
-        logger.error(f"Failed to download PDF from Firebase Storage: {e}")
-        return IngestionResult(
-            filename=filename,
-            processed=False,
-            question_count=0,
-            grade_level=grade_level,
-            topic=topic,
-            storage_path=storage_path,
-            timestamp=datetime.now(timezone.utc),
-        )
-    # Step 3: Extract text from PDF
-    try:
-        text = _extract_pdf_text(pdf_bytes)
-    except Exception as e:
-        logger.error(f"Failed to extract text from PDF: {e}")
-        return IngestionResult(
-            filename=filename,
-            processed=False,
-            question_count=0,
-            grade_level=grade_level,
-            topic=topic,
-            storage_path=storage_path,
-            timestamp=datetime.now(timezone.utc),
-        )
-    # Step 4: Chunk text
-    chunks = _chunk_text(text)
-    # Step 5: Generate embeddings
-    embedding_model = SentenceTransformer(EMBEDDING_MODEL)
-    chunk_ids = []
-    chunk_data = []
-    for i, chunk_text in enumerate(chunks):
-        chunk_id = hashlib.md5(f"{filename}:{i}:{chunk_text[:100]}".encode()).hexdigest()
-        embedding = embedding_model.encode(chunk_text).tolist()
-        chunk_ids.append(chunk_id)
-        chunk_data.append({
-            "id": chunk_id,
-            "text": chunk_text,
-            "embedding": embedding,
-        })
-    # Step 6: Initialize DeepSeek client
-    deepseek_client = get_deepseek_client()
-    # Step 7: Generate questions for each chunk
-    all_questions = []
-    for i, chunk_text in enumerate(chunks):
-        chunk_id = chunk_ids[i]
-        questions = await _generate_questions_for_chunk(
-            chunk_text, chunk_id, topic, grade_level, deepseek_client
-        )
-        for q in questions:
-            q["id"] = _generate_chunk_id(chunk_id, q.get("question", ""))
-        all_questions.extend(questions)
-    # Step 8: Save questions to Firestore
-    question_count = await _save_questions_batch(
-        firestore_client, all_questions, grade_level, topic
-    )
-    # Step 9: Save embeddings to Firestore
-    await _save_embeddings_batch(firestore_client, chunk_data, filename)
-    # Step 10: Save manifest to Firestore
-    await _save_processing_manifest(
-        firestore_client, filename, question_count, len(chunks),
-        grade_level, topic, storage_path
-    )
-    logger.info(
-        f"Completed ingestion for {filename}: {question_count} questions, "
-        f"{len(chunks)} chunks"
-    )
-    return IngestionResult(
-        filename=filename,
-        processed=True,
-        question_count=question_count,
-        grade_level=grade_level,
-        topic=topic,
-        storage_path=storage_path,
-        timestamp=datetime.now(timezone.utc),
-    )

rag/vectorstore_loader.py CHANGED Viewed

@@ -12,12 +12,6 @@ _VECTORSTORE_LOCK = Lock()
 _VECTORSTORE_SINGLETON: Tuple[Any, Any, SentenceTransformer] | None = None
-def reset_vectorstore_singleton() -> None:
-    global _VECTORSTORE_SINGLETON
-    with _VECTORSTORE_LOCK:
-        _VECTORSTORE_SINGLETON = None
 def _resolve_vectorstore_dir() -> Path:
     raw = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
     path = Path(raw)
@@ -34,7 +28,7 @@ def _resolve_vectorstore_dir() -> Path:
 def get_vectorstore_components(
     collection_name: str = "curriculum_chunks",
-    model_name: str = "BAAI/bge-base-en-v1.5",
 ):
     global _VECTORSTORE_SINGLETON
     if _VECTORSTORE_SINGLETON is None:
@@ -43,10 +37,7 @@ def get_vectorstore_components(
                 vectorstore_dir = _resolve_vectorstore_dir()
                 vectorstore_dir.mkdir(parents=True, exist_ok=True)
                 client = chromadb.PersistentClient(path=str(vectorstore_dir))
-                collection = client.get_or_create_collection(
-                    name=collection_name,
-                    metadata={"hnsw:space": "cosine"},
-                )
                 embedder = SentenceTransformer(model_name)
                 _VECTORSTORE_SINGLETON = (client, collection, embedder)
     return _VECTORSTORE_SINGLETON

 _VECTORSTORE_SINGLETON: Tuple[Any, Any, SentenceTransformer] | None = None
 def _resolve_vectorstore_dir() -> Path:
     raw = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
     path = Path(raw)
 def get_vectorstore_components(
     collection_name: str = "curriculum_chunks",
+    model_name: str = "BAAI/bge-small-en-v1.5",
 ):
     global _VECTORSTORE_SINGLETON
     if _VECTORSTORE_SINGLETON is None:
                 vectorstore_dir = _resolve_vectorstore_dir()
                 vectorstore_dir.mkdir(parents=True, exist_ok=True)
                 client = chromadb.PersistentClient(path=str(vectorstore_dir))
+                collection = client.get_or_create_collection(name=collection_name)
                 embedder = SentenceTransformer(model_name)
                 _VECTORSTORE_SINGLETON = (client, collection, embedder)
     return _VECTORSTORE_SINGLETON

requirements.txt CHANGED Viewed

@@ -1,6 +1,5 @@
 fastapi>=0.104.0
 uvicorn[standard]>=0.24.0
-openai>=1.0.0
 huggingface-hub>=0.31.0
 requests>=2.31.0
 pandas==2.2.3
@@ -20,10 +19,3 @@ numpy==2.2.1
 firebase-admin>=6.2.0
 redis[hiredis]>=5.0.0
 PyYAML>=6.0.0
-mypy>=1.20.0
-pytest>=9.0.0
-pytest-asyncio>=0.23.0
-google-api-python-client>=2.0.0
-pypdf>=4.0.0
-slowapi>=0.1.0
-limits>=3.0.0

 fastapi>=0.104.0
 uvicorn[standard]>=0.24.0
 huggingface-hub>=0.31.0
 requests>=2.31.0
 pandas==2.2.3
 firebase-admin>=6.2.0
 redis[hiredis]>=5.0.0
 PyYAML>=6.0.0

routes/admin_model_routes.py DELETED Viewed

@@ -1,67 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException, Request
-from pydantic import BaseModel
-from services.inference_client import (
-    set_runtime_model_profile, set_runtime_model_override,
-    reset_runtime_overrides, get_current_runtime_config, _MODEL_PROFILES,
-)
-router = APIRouter(prefix="/api/admin/model-config", tags=["admin"])
-ALLOWED_OVERRIDE_KEYS = {
-    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-}
-def require_admin(request: Request):
-    user = getattr(request.state, "user", None)
-    if user is None:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    if user.role != "admin":
-        raise HTTPException(status_code=403, detail="Admin access required")
-    return user
-class ProfileSwitchRequest(BaseModel):
-    profile: str
-class OverrideRequest(BaseModel):
-    key: str
-    value: str
-@router.get("")
-def get_model_config(_admin=Depends(require_admin)):
-    return {
-        **get_current_runtime_config(),
-        "availableProfiles": list(_MODEL_PROFILES.keys()),
-        "profileDescriptions": {
-            "dev":    "deepseek-chat everywhere - fast, $0.14/M input",
-            "budget": "deepseek-chat for all tasks - minimal cost",
-            "prod":   "deepseek-reasoner for RAG, deepseek-chat for chat - best quality",
-        },
-    }
-@router.post("/profile")
-def switch_profile(req: ProfileSwitchRequest, _admin=Depends(require_admin)):
-    try:
-        set_runtime_model_profile(req.profile)
-        return {"success": True, "applied": get_current_runtime_config()}
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-@router.post("/override")
-def set_override(req: OverrideRequest, _admin=Depends(require_admin)):
-    if req.key not in ALLOWED_OVERRIDE_KEYS:
-        raise HTTPException(status_code=400, detail=f"Key '{req.key}' is not overridable.")
-    set_runtime_model_override(req.key, req.value)
-    return {"success": True, "applied": get_current_runtime_config()}
-@router.delete("/reset")
-def reset_to_env(_admin=Depends(require_admin)):
-    reset_runtime_overrides()
-    return {"success": True, "current": get_current_runtime_config()}

routes/admin_routes.py DELETED Viewed

@@ -1,87 +0,0 @@
-from typing import Optional
-from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File, Form, BackgroundTasks
-from pydantic import BaseModel
-import logging
-from rag.firebase_storage_loader import _init_firebase_storage, PDF_METADATA
-from scripts.ingest_from_storage import ingest_from_firebase_storage
-logger = logging.getLogger("mathpulse.admin")
-router = APIRouter(prefix="/api/admin", tags=["admin"])
-def require_admin(request: Request):
-    user = getattr(request.state, "user", None)
-    if user is None:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    if user.role != "admin":
-        raise HTTPException(status_code=403, detail="Admin access required")
-    return user
-class ReingestRequest(BaseModel):
-    subjectId: Optional[str] = None
-    storagePath: Optional[str] = None
-@router.post("/upload-pdf")
-async def upload_pdf(
-    subjectId: str = Form(...),
-    subjectName: str = Form(...),
-    semester: int = Form(...),
-    quarter: int = Form(...),
-    file: UploadFile = File(...),
-    _admin=Depends(require_admin)
-):
-    if not file.filename.endswith('.pdf'):
-        raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
-    file_content = await file.read()
-    if len(file_content) > 50 * 1024 * 1024:
-        raise HTTPException(status_code=400, detail="File size exceeds 50MB limit.")
-    _, bucket = _init_firebase_storage()
-    if not bucket:
-        raise HTTPException(status_code=500, detail="Firebase storage is not initialized.")
-    storage_path = f"curriculum/{subjectId}/{file.filename}"
-    try:
-        blob = bucket.blob(storage_path)
-        blob.upload_from_string(file_content, content_type="application/pdf")
-    except Exception as e:
-        logger.error(f"Failed to upload PDF: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to upload to Firebase Storage: {e}")
-    # Update metadata in memory before reingesting
-    PDF_METADATA[storage_path] = {
-        "subject": subjectName,
-        "subjectId": subjectId,
-        "type": "uploaded_module",
-        "semester": semester,
-        "quarter": quarter
-    }
-    # Reingest
-    try:
-        ingest_from_firebase_storage(force_reindex=True)
-    except Exception as e:
-        logger.error(f"Failed to trigger reingestion: {e}")
-    storage_url = f"gs://{bucket.name}/{storage_path}"
-    return {
-        "success": True,
-        "chunkCount": 0,
-        "subjectId": subjectId,
-        "storageUrl": storage_url
-    }
-@router.post("/reingest-pdf")
-async def reingest_pdf(
-    req: Optional[ReingestRequest] = None,
-    _admin=Depends(require_admin)
-):
-    try:
-        ingest_from_firebase_storage(force_reindex=True)
-        return {"success": True, "message": "Reingestion triggered successfully."}
-    except Exception as e:
-        logger.error(f"Failed to reingest: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to reingest: {e}")

routes/curriculum_routes.py DELETED Viewed

@@ -1,66 +0,0 @@
-from __future__ import annotations
-import logging
-from typing import Optional
-from fastapi import APIRouter, HTTPException, Query
-from pydantic import BaseModel
-from services.curriculum_service import (
-    get_subject,
-    get_subjects,
-    get_topic,
-    get_topics,
-)
-logger = logging.getLogger("mathpulse.curriculum")
-router = APIRouter(prefix="/api/curriculum", tags=["curriculum"])
-class SubjectResponse(BaseModel):
-    id: str
-    code: str
-    name: str
-    gradeLevel: str
-    semester: str
-    color: str
-    pdfAvailable: bool
-    topics: list
-class TopicResponse(BaseModel):
-    id: str
-    name: str
-    unit: str
-@router.get("/subjects", response_model=list[SubjectResponse])
-async def list_subjects(grade_level: Optional[str] = Query(None, description="Filter by grade level (e.g., 'Grade 11', 'Grade 12')")):
-    """List all curriculum subjects, optionally filtered by grade level."""
-    subjects = get_subjects(grade_level)
-    return subjects
-@router.get("/subjects/{subject_id}", response_model=SubjectResponse)
-async def get_subject_by_id(subject_id: str):
-    """Get a specific subject by ID."""
-    subject = get_subject(subject_id)
-    if not subject:
-        raise HTTPException(status_code=404, detail=f"Subject not found: {subject_id}")
-    return subject
-@router.get("/subjects/{subject_id}/topics", response_model=list[TopicResponse])
-async def list_subject_topics(subject_id: str):
-    """List all topics for a subject."""
-    topics = get_topics(subject_id)
-    return topics
-@router.get("/subjects/{subject_id}/topics/{topic_id}", response_model=TopicResponse)
-async def get_topic_by_id(subject_id: str, topic_id: str):
-    """Get a specific topic."""
-    topic = get_topic(subject_id, topic_id)
-    if not topic:
-        raise HTTPException(status_code=404, detail=f"Topic not found: {subject_id}/{topic_id}")
-    return topic

routes/diagnostic.py DELETED Viewed

@@ -1,797 +0,0 @@
-"""
-MathPulse AI - Diagnostic Assessment Router
-POST /api/diagnostic/generate - Generate 15-item diagnostic test grounded in RAG curriculum
-POST /api/diagnostic/submit  - Score responses, run risk analysis, save to Firestore
-"""
-from __future__ import annotations
-import asyncio
-import json
-import logging
-import time
-import traceback
-import uuid
-from collections import defaultdict
-from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional
-from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
-from services.ai_client import CHAT_MODEL, get_deepseek_client
-from rag.curriculum_rag import retrieve_curriculum_context
-import firebase_admin
-from firebase_admin import firestore as fs
-logger = logging.getLogger("mathpulse.diagnostic")
-router = APIRouter(prefix="/api/diagnostic", tags=["diagnostic"])
-# In-memory fallback session store (used if Firestore is unavailable)
-# This ensures assessment works even without Firebase credentials
-_in_memory_sessions: Dict[str, Dict[str, Any]] = defaultdict(dict)
-# ─── Pydantic Models ───────────────────────────────────────────────
-class DiagnosticGenerateRequest(BaseModel):
-    strand: str = Field(..., description="Student strand: ABM, STEM, HUMSS, GAS, TVL")
-    grade_level: str = Field(..., description="Grade level: Grade 11 or Grade 12")
-class DiagnosticOption(BaseModel):
-    A: str
-    B: str
-    C: str
-    D: str
-class DiagnosticQuestionStripped(BaseModel):
-    question_id: str
-    competency_code: str
-    domain: str
-    topic: str
-    difficulty: str
-    bloom_level: str
-    question_text: str
-    options: DiagnosticOption
-    curriculum_reference: str
-class DiagnosticGenerateResponse(BaseModel):
-    test_id: str
-    questions: List[DiagnosticQuestionStripped]
-    total_items: int
-    estimated_minutes: float
-class DiagnosticResponseItem(BaseModel):
-    question_id: str
-    student_answer: str
-    time_spent_seconds: int
-class DiagnosticSubmitRequest(BaseModel):
-    test_id: str
-    responses: List[DiagnosticResponseItem]
-class MasterySummary(BaseModel):
-    mastered: List[str]
-    developing: List[str]
-    beginning: List[str]
-class DiagnosticSubmitResponse(BaseModel):
-    success: bool
-    overall_risk: str
-    overall_score_percent: float
-    mastery_summary: MasterySummary
-    recommended_intervention: str
-    xp_earned: int
-    badge_unlocked: str
-    redirect_to: str
-# ─── Competency Code Registry ───────────────────────────────────────
-COMPETENCY_REGISTRY = {
-    "NA-WAGE-01": {"subject": "General Mathematics", "title": "Wages, Salaries, Overtime, Commissions, VAT"},
-    "NA-SEQ-01": {"subject": "General Mathematics", "title": "Arithmetic Sequences and Series"},
-    "NA-SEQ-02": {"subject": "General Mathematics", "title": "Geometric Sequences and Series"},
-    "NA-FUNC-01": {"subject": "General Mathematics", "title": "Functions, Relations, Vertical Line Test"},
-    "NA-FUNC-02": {"subject": "General Mathematics", "title": "Evaluating Functions, Operations, Composition"},
-    "NA-FUNC-03": {"subject": "General Mathematics", "title": "One-to-One Functions, Inverse Functions"},
-    "NA-EXP-01": {"subject": "General Mathematics", "title": "Exponential Functions, Equations, Inequalities"},
-    "NA-LOG-01": {"subject": "General Mathematics", "title": "Logarithmic Functions"},
-    "MG-TRIG-01": {"subject": "General Mathematics", "title": "Trigonometric Ratios, Right Triangles"},
-    "NA-FIN-01": {"subject": "General Mathematics", "title": "Compound Interest, Maturity Value"},
-    "NA-FIN-02": {"subject": "General Mathematics", "title": "Simple and General Annuities"},
-    "NA-FIN-04": {"subject": "General Mathematics", "title": "Business and Consumer Loans, Amortization"},
-    "NA-LOGIC-01": {"subject": "General Mathematics", "title": "Logical Propositions, Connectives, Truth Tables"},
-    "BM-FDP-01": {"subject": "Business Mathematics", "title": "Fractions, Decimals, Percent Conversions"},
-    "BM-FDP-02": {"subject": "Business Mathematics", "title": "Proportion: Direct, Inverse, Partitive"},
-    "BM-BUS-01": {"subject": "Business Mathematics", "title": "Markup, Margin, Trade Discounts, VAT"},
-    "BM-BUS-02": {"subject": "Business Mathematics", "title": "Profit, Loss, Break-even Point"},
-    "BM-COMM-01": {"subject": "Business Mathematics", "title": "Straight Commission, Salary Plus Commission"},
-    "BM-COMM-02": {"subject": "Business Mathematics", "title": "Commission on Cash and Installment Basis"},
-    "BM-SW-01": {"subject": "Business Mathematics", "title": "Salary vs. Wage, Income"},
-    "BM-SW-03": {"subject": "Business Mathematics", "title": "Mandatory Deductions: SSS, PhilHealth, Pag-IBIG"},
-    "BM-SW-04": {"subject": "Business Mathematics", "title": "Overtime Pay Computation (Labor Code)"},
-    "SP-RV-01": {"subject": "Statistics & Probability", "title": "Random Variables, Discrete vs. Continuous"},
-    "SP-RV-02": {"subject": "Statistics & Probability", "title": "Probability Distribution, Mean, Variance, SD"},
-    "SP-NORM-01": {"subject": "Statistics & Probability", "title": "Normal Curve Properties"},
-    "SP-NORM-02": {"subject": "Statistics & Probability", "title": "Z-Scores, Standard Normal Table"},
-    "SP-SAMP-01": {"subject": "Statistics & Probability", "title": "Types of Random Sampling"},
-    "SP-SAMP-03": {"subject": "Statistics & Probability", "title": "Central Limit Theorem"},
-    "SP-HYP-01": {"subject": "Statistics & Probability", "title": "Hypothesis Testing: H0 and Ha"},
-    "FM1-MAT-01": {"subject": "Finite Mathematics", "title": "Matrices and Matrix Operations"},
-    "FM2-PROB-01": {"subject": "Finite Mathematics", "title": "Counting Principles and Permutations"},
-    "FM2-PROB-02": {"subject": "Finite Mathematics", "title": "Combinations and Probability"},
-}
-LEARNING_PATH_ORDER: Dict[str, List[str]] = {
-    "BM": ["BM-FDP-01", "BM-FDP-02", "BM-BUS-01", "BM-BUS-02", "BM-COMM-01",
-           "BM-COMM-02", "BM-SW-01", "BM-SW-03", "BM-SW-04"],
-    "NA": ["NA-WAGE-01", "NA-SEQ-01", "NA-SEQ-02", "NA-FUNC-01", "NA-FUNC-02",
-           "NA-FUNC-03", "NA-EXP-01", "NA-LOG-01", "NA-FIN-01", "NA-FIN-02",
-           "NA-FIN-04", "NA-LOGIC-01"],
-    "SP": ["SP-RV-01", "SP-RV-02", "SP-NORM-01", "SP-NORM-02", "SP-SAMP-01",
-           "SP-SAMP-03", "SP-HYP-01"],
-}
-STRAND_SUBJECTS: Dict[str, List[str]] = {
-    "ABM": ["General Mathematics", "Business Mathematics"],
-    "STEM": ["General Mathematics", "Statistics and Probability"],
-    "HUMSS": ["General Mathematics"],
-    "GAS": ["General Mathematics"],
-    "TVL": ["General Mathematics"],
-}
-FULL_QUESTION_SCHEMA: Dict[str, List[str]] = {
-    "ABM": [
-        "General Mathematics: 5 items",
-        "Business Mathematics: 5 items",
-        "Statistics & Probability: 5 items",
-    ],
-    "STEM": [
-        "General Mathematics: 7 items",
-        "Statistics & Probability: 5 items",
-        "Finite Mathematics: 3 items",
-    ],
-    "HUMSS": ["General Mathematics: 15 items"],
-    "GAS": ["General Mathematics: 15 items"],
-    "TVL": ["General Mathematics: 15 items"],
-}
-STRAND_COVERAGE_TEXT: Dict[str, str] = {
-    "ABM": """FOR ABM STRAND:
-  - 5 questions: General Mathematics (NA-WAGE, NA-SEQ, NA-FIN topics -- wages, sequences, interest)
-  - 5 questions: Business Mathematics (BM-FDP, BM-BUS, BM-COMM, BM-SW topics -- percent, markup, commission, salaries, deductions using SSS/PhilHealth/Pag-IBIG rates)
-  - 5 questions: Statistics & Probability (SP-RV, SP-NORM topics -- random variables, normal distribution, z-scores)""",
-    "STEM": """FOR STEM STRAND:
-  - 7 questions: General Mathematics (NA-FUNC, NA-EXP, NA-LOG, MG-TRIG, NA-FIN -- functions, exponentials, trigonometry, financial math)
-  - 5 questions: Statistics & Probability (SP-RV, SP-NORM, SP-SAMP, SP-HYP -- distributions, sampling, hypothesis)
-  - 3 questions: Finite Mathematics (FM1-MAT or FM2-PROB -- matrices or counting/probability)""",
-    "HUMSS": """FOR HUMSS STRAND:
-  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
-    "GAS": """FOR GAS STRAND:
-  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
-    "TVL": """FOR TVL STRAND:
-  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
-}
-def _get_strand_coverage(strand: str) -> str:
-    return STRAND_COVERAGE_TEXT.get(strand.upper(), STRAND_COVERAGE_TEXT["STEM"])
-def _build_rag_context(strand: str) -> str:
-    subjects = STRAND_SUBJECTS.get(strand.upper(), ["General Mathematics"])
-    rag_context_parts: List[str] = []
-    rag_query = f"SHS {strand} diagnostic assessment competency questions Grade 11"
-    for subject in subjects:
-        try:
-            chunks = retrieve_curriculum_context(
-                query=rag_query,
-                subject=subject,
-                top_k=3,
-            )
-        except Exception as e:
-            logger.warning(f"[WARN] RAG unavailable for {subject}: {e}")
-            continue
-        if not chunks:
-            continue
-        chunk_texts: List[str] = []
-        for chunk in chunks:
-            source = chunk.get("source_file", "unknown")
-            content = str(chunk.get("content", ""))[:600]
-            chunk_texts.append(f"[Source: {source}]\n{content}")
-        rag_context_parts.append(
-            f"\n=== {subject.upper()} CURRICULUM REFERENCE ===\n" + "\n---\n".join(chunk_texts)
-        )
-    if not rag_context_parts:
-        logger.warning("[WARN] RAG unavailable for diagnostic generation -- proceeding without curriculum context")
-        return ""
-    return "\n".join(rag_context_parts)
-async def _get_previous_questions(
-    user_id: str,
-    firestore_client,
-    max_attempts: int = 3,
-) -> list[str]:
-    """Fetch question texts from the user's last N assessment attempts to avoid duplicates."""
-    try:
-        attempts_ref = (
-            firestore_client.collection("assessmentResults")
-            .document(user_id)
-            .collection("attempts")
-            .order_by("completedAt", direction=fs.Query.DESCENDING)
-            .limit(max_attempts)
-        )
-        docs = attempts_ref.stream()
-        previous_texts: list[str] = []
-        for doc in docs:
-            data = doc.to_dict()
-            answers = data.get("answers", [])
-            for a in answers:
-                previous_texts.append(a.get("questionText", ""))
-        return previous_texts
-    except Exception:
-        return []
-def _build_system_prompt(strand: str, grade_level: str, rag_context: str, variance_seed: int = 0, previous_questions: list[str] | None = None) -> str:
-    strand_upper = strand.upper()
-    coverage_text = _get_strand_coverage(strand_upper)
-    rag_block = ""
-    if rag_context:
-        rag_block = f"""
-OFFICIAL CURRICULUM REFERENCE (from indexed DepEd modules via RAG):
-{rag_context}
-IMPORTANT: Base ALL questions strictly on the curriculum content above.
-Do not invent formulas, definitions, or problem types not found in the
-reference material. If the reference material is insufficient for a topic,
-use only standard DepEd SHS competencies for that strand.
-"""
-    previous_block = ""
-    if previous_questions:
-        previous_lines = [
-            "PREVIOUS QUESTIONS TO AVOID (DO NOT REPEAT):",
-            "The following questions were already asked to this student.",
-            "You MUST NOT reuse or rephrase any of these:",
-        ]
-        for i, q in enumerate(previous_questions[:20], 1):
-            previous_lines.append(f"{i}. {q}")
-        previous_block = "\n".join(previous_lines) + "\n\n"
-    variance_block = ""
-    if variance_seed > 0:
-        variance_block = (
-            f"VARIANCE SEED: {variance_seed}\n"
-            "To ensure unique questions, use this seed to generate DIFFERENT "
-            "numerical values, problem contexts, and variable names compared "
-            "to the standard template.\n\n"
-        )
-    return f"""SYSTEM ROLE:
-You are MathPulse AI's Diagnostic Test Generator. Your job is to create a
-15-item multiple-choice diagnostic assessment for a Filipino SHS student,
-strictly grounded in the DepEd Strengthened SHS Curriculum (SDO Navotas
-modules and DepEd K-12 Curriculum Guides).
-STUDENT CONTEXT:
-- Strand: {strand_upper}
-- Grade Level: {grade_level}
-- Test Purpose: DIAGNOSTIC (pre-learning, not summative -- assess current
-  knowledge to build a personalized learning path)
-{rag_block}
-STRAND-SUBJECT COVERAGE:
-Generate 15 questions distributed across these subjects and domains:
-{coverage_text}
-COMPETENCY CODE FORMAT:
-Assign each question exactly one competency_code from this registry:
-General Math:    NA-WAGE-01, NA-SEQ-01, NA-SEQ-02, NA-FUNC-01,
-                 NA-FUNC-02, NA-FUNC-03, NA-EXP-01, NA-LOG-01,
-                 MG-TRIG-01, NA-FIN-01, NA-FIN-02, NA-FIN-04,
-                 NA-LOGIC-01
-Business Math:   BM-FDP-01, BM-FDP-02, BM-BUS-01, BM-BUS-02,
-                 BM-COMM-01, BM-COMM-02, BM-SW-01, BM-SW-03, BM-SW-04
-Statistics:      SP-RV-01, SP-RV-02, SP-NORM-01, SP-NORM-02,
-                 SP-SAMP-01, SP-SAMP-03, SP-HYP-01
-Finite Math:     FM1-MAT-01, FM2-PROB-01, FM2-PROB-02
-{previous_block}{variance_block}DIFFICULTY DISTRIBUTION (across all 15 questions):
-  - Easy   (Bloom: remembering / understanding): 6 questions (40%)
-  - Medium (Bloom: applying / analyzing):         6 questions (40%)
-  - Hard   (Bloom: evaluating / creating):        3 questions (20%)
-QUESTION RULES:
-1. All questions are 4-option multiple choice (A, B, C, D).
-2. Use Filipino real-life context: peso amounts, Filipino names
-   (Juan, Maria, Jose), Philippine institutions (SSS, PhilHealth,
-   Pag-IBIG, BIR, BDO, local schools, SM malls).
-3. Never use trick questions. Wrong options must be plausible but clearly
-   incorrect to a student who knows the concept.
-4. Include a solution_hint (1-2 sentences) -- this is for the backend
-   scoring engine ONLY. NEVER include it in the client response.
-5. Cover as many different competency codes as possible across 15 items.
-   Do not repeat the same competency code more than twice.
-OUTPUT FORMAT (strict JSON array, no extra text, no markdown):
-[
-  {{
-    "question_id": "DX-<uuid>",
-    "competency_code": "BM-SW-03",
-    "domain": "Business Mathematics",
-    "topic": "Mandatory Deductions",
-    "difficulty": "medium",
-    "bloom_level": "applying",
-    "question_text": "...",
-    "options": {{"A": "...", "B": "...", "C": "...", "D": "..."}},
-    "correct_answer": "C",
-    "solution_hint": "Compute SSS contribution using the prescribed table...",
-    "curriculum_reference": "SDO Navotas Bus. Math SHS 1st Sem - Salaries and Wages"
-  }}
-]
-"""
-async def _call_deepseek(system_prompt: str, user_message: str, temperature: float = 0.7) -> str:
-    try:
-        client = get_deepseek_client()
-        response = client.chat.completions.create(
-            model=CHAT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_message},
-            ],
-            temperature=temperature,
-            response_format={"type": "json_object"},
-        )
-        return response.choices[0].message.content or ""
-    except Exception as e:
-        logger.error(f"DeepSeek API error: {e}")
-        raise HTTPException(status_code=500, detail="AI model unavailable. Please try again later.")
-def _parse_questions_response(raw_response: str) -> List[Dict[str, Any]]:
-    try:
-        data = json.loads(raw_response)
-        if isinstance(data, dict):
-            for key in ("questions", "items", "data", "results"):
-                if key in data and isinstance(data[key], list):
-                    return data[key]
-            for key, value in data.items():
-                if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
-                    if "question_text" in value[0]:
-                        return value
-        if isinstance(data, list):
-            return data
-    except json.JSONDecodeError:
-        pass
-    import re
-    match = re.search(r'\[.*\]', raw_response, re.DOTALL)
-    if match:
-        try:
-            return json.loads(match.group())
-        except json.JSONDecodeError:
-            pass
-    raise ValueError("Could not parse questions from AI response")
-async def _generate_questions(
-    strand: str,
-    grade_level: str,
-    user_id: str = "",
-    firestore_client=None,
-) -> tuple[str, List[Dict[str, Any]]]:
-    test_id = f"DX-{uuid.uuid4().hex[:12]}"
-    # Generate variance seed based on user's attempt count and fetch previous questions
-    variance_seed = 0
-    previous_questions: list[str] = []
-    if firestore_client and user_id:
-        try:
-            attempts_ref = (
-                firestore_client.collection("assessmentResults")
-                .document(user_id)
-                .collection("attempts")
-            )
-            attempts = attempts_ref.stream()
-            attempt_count = sum(1 for _ in attempts)
-            variance_seed = int(time.time()) % 10000 + attempt_count * 137
-            previous_questions = await _get_previous_questions(user_id, firestore_client)
-        except Exception:
-            pass
-    rag_context = _build_rag_context(strand)
-    system_prompt = _build_system_prompt(
-        strand,
-        grade_level,
-        rag_context,
-        variance_seed=variance_seed,
-        previous_questions=previous_questions,
-    )
-    user_message = f"Generate 15 diagnostic questions for a Grade 11 {strand} student."
-    for attempt in range(2):
-        temperature = 0.7 if attempt == 0 else 0.3
-        try:
-            raw_response = await _call_deepseek(system_prompt, user_message, temperature)
-            questions = _parse_questions_response(raw_response)
-            if questions:
-                return test_id, questions[:15]
-        except ValueError:
-            if attempt == 0:
-                logger.warning("Malformed JSON from DeepSeek, retrying with temperature=0.3")
-                continue
-            raise
-    raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
-async def _store_diagnostic_session(
-    firestore_client: Any,
-    user_id: str,
-    test_id: str,
-    strand: str,
-    grade_level: str,
-    questions: List[Dict[str, Any]],
-) -> bool:
-    try:
-        doc_ref = (
-            firestore_client.collection("diagnosticSessions")
-            .document(test_id)
-        )
-        doc_ref.set({
-            "testId": test_id,
-            "userId": user_id,
-            "generatedAt": fs.SERVER_TIMESTAMP,
-            "strand": strand,
-            "gradeLevel": grade_level,
-            "questions": questions,
-            "status": "in_progress",
-        })
-        return True
-    except Exception as e:
-        logger.error(f"Failed to store diagnostic session: {e}")
-        return False
-def _strip_answers(questions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    stripped = []
-    for q in questions:
-        stripped.append({
-            "question_id": q.get("question_id", ""),
-            "competency_code": q.get("competency_code", ""),
-            "domain": q.get("domain", ""),
-            "topic": q.get("topic", ""),
-            "difficulty": q.get("difficulty", ""),
-            "bloom_level": q.get("bloom_level", ""),
-            "question_text": q.get("question_text", ""),
-            "options": q.get("options", {}),
-            "curriculum_reference": q.get("curriculum_reference", ""),
-        })
-    return stripped
-# ─── ENDPOINT 1: Generate Diagnostic ────��───────────────────────────
-@router.post("/generate", response_model=DiagnosticGenerateResponse)
-async def generate_diagnostic(request: DiagnosticGenerateRequest, req: Request):
-    user = getattr(req.state, "user", None)
-    if not user or not getattr(user, "uid", None):
-        raise HTTPException(status_code=401, detail="Authentication required")
-    try:
-        firestore_client = fs.client()
-        test_id, questions = await _generate_questions(
-            request.strand,
-            request.grade_level,
-            user_id=user.uid,
-            firestore_client=firestore_client,
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Generation error: {e}\n{traceback.format_exc()}")
-        raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
-    try:
-        stored = await _store_diagnostic_session(
-            firestore_client,
-            user.uid,
-            test_id,
-            request.strand,
-            request.grade_level,
-            questions,
-        )
-        if not stored:
-            raise HTTPException(status_code=503, detail="Session storage failed. Please try again.")
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Could not store diagnostic session: {e}")
-        raise HTTPException(status_code=503, detail="Database unavailable. Please try again.")
-    client_questions = _strip_answers(questions)
-    return DiagnosticGenerateResponse(
-        test_id=test_id,
-        questions=client_questions,
-        total_items=len(client_questions),
-        estimated_minutes=11.6,
-    )
-# ─── ENDPOINT 2: Submit and Evaluate ─────────────────────────────────
-def _score_responses(stored_questions: List[Dict[str, Any]], responses: List[DiagnosticResponseItem]) -> tuple:
-    question_map: Dict[str, Dict[str, Any]] = {}
-    for q in stored_questions:
-        question_map[q.get("question_id", "")] = q
-    scored = []
-    total_correct = 0
-    domain_correct: Dict[str, int] = {}
-    domain_total: Dict[str, int] = {}
-    comp_attempts: Dict[str, List[bool]] = {}
-    for resp in responses:
-        question = question_map.get(resp.question_id, {})
-        correct_answer = question.get("correct_answer", "")
-        is_correct = (resp.student_answer.strip().upper() == correct_answer.strip().upper())
-        domain = question.get("domain", "Unknown")
-        competency_code = question.get("competency_code", "")
-        if domain not in domain_correct:
-            domain_correct[domain] = 0
-            domain_total[domain] = 0
-        domain_total[domain] += 1
-        if is_correct:
-            domain_correct[domain] += 1
-            total_correct += 1
-        if competency_code not in comp_attempts:
-            comp_attempts[competency_code] = []
-        comp_attempts[competency_code].append(is_correct)
-        scored.append({
-            "question_id": resp.question_id,
-            "competency_code": competency_code,
-            "domain": domain,
-            "topic": question.get("topic", ""),
-            "difficulty": question.get("difficulty", ""),
-            "bloom_level": question.get("bloom_level", ""),
-            "student_answer": resp.student_answer,
-            "correct_answer": correct_answer,
-            "is_correct": is_correct,
-            "time_spent_seconds": resp.time_spent_seconds,
-        })
-    return scored, total_correct, domain_correct, domain_total, comp_attempts
-def _compute_domain_scores(domain_correct: Dict[str, int], domain_total: Dict[str, int]) -> Dict[str, Dict[str, Any]]:
-    domain_scores = {}
-    for domain in domain_total:
-        correct = domain_correct.get(domain, 0)
-        total = domain_total[domain]
-        pct = (correct / total * 100) if total > 0 else 0
-        mastery = "mastered" if pct >= 80 else "developing" if pct >= 60 else "beginning"
-        domain_scores[domain] = {
-            "correct": correct,
-            "total": total,
-            "percentage": round(pct, 1),
-            "mastery_level": mastery,
-        }
-    return domain_scores
-def _compute_risk_profile(
-    total_correct: int,
-    total_items: int,
-    scored_responses: List[Dict[str, Any]],
-    domain_scores: Dict[str, Dict[str, Any]],
-) -> Dict[str, Any]:
-    overall_pct = (total_correct / total_items * 100) if total_items > 0 else 0
-    mastered = [d for d, s in domain_scores.items() if s["mastery_level"] == "mastered"]
-    developing = [d for d, s in domain_scores.items() if s["mastery_level"] == "developing"]
-    beginning = [d for d, s in domain_scores.items() if s["mastery_level"] == "beginning"]
-    critical_gaps = []
-    for resp in scored_responses:
-        code = resp.get("competency_code", "")
-        if not code:
-            continue
-        attempts = [r for r in scored_responses if r.get("competency_code") == code]
-        if len(attempts) >= 2 and not any(r.get("is_correct") for r in attempts):
-            if code not in critical_gaps:
-                critical_gaps.append(code)
-    if overall_pct >= 75 and len(beginning) == 0:
-        overall_risk = "low"
-    elif overall_pct >= 55 or len(beginning) <= 2:
-        overall_risk = "moderate"
-    elif overall_pct >= 40 or len(beginning) <= 4:
-        overall_risk = "high"
-    else:
-        overall_risk = "critical"
-    suggested_path = []
-    for code in critical_gaps:
-        if code not in suggested_path:
-            suggested_path.append(code)
-    for domain in beginning:
-        for prefix in ["NA", "BM", "SP", "FM"]:
-            if domain.upper().startswith(prefix) or any(
-                s.upper().startswith(prefix) for s in [domain]
-            ):
-                for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
-                    if comp_code not in suggested_path:
-                        suggested_path.append(comp_code)
-                break
-    for domain in developing:
-        for prefix in ["NA", "BM", "SP", "FM"]:
-            if any(c.startswith(prefix) for c in COMPETENCY_REGISTRY):
-                for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
-                    if comp_code not in suggested_path:
-                        suggested_path.append(comp_code)
-    interventions = {
-        "low": "Great job! You have a solid foundation. Keep practicing to maintain your skills!",
-        "moderate": "You're making good progress. Focus on the topics where you need more practice. Kaya mo yan!",
-        "high": "Don't worry! With focused practice on your weak areas, you'll improve quickly.",
-        "critical": "Let's work on this together. Start with the basics and build up your confidence step by step.",
-    }
-    return {
-        "overall_risk": overall_risk,
-        "overall_score_percent": round(overall_pct, 1),
-        "mastery_summary": {
-            "mastered": mastered,
-            "developing": developing,
-            "beginning": beginning,
-        },
-        "weak_domains": beginning,
-        "critical_gaps": critical_gaps,
-        "recommended_intervention": interventions.get(overall_risk, interventions["moderate"]),
-        "suggested_learning_path": suggested_path[:20],
-    }
-async def _save_results(
-    firestore_client: Any,
-    user_id: str,
-    test_id: str,
-    strand: str,
-    grade_level: str,
-    scored_responses: List[Dict[str, Any]],
-    domain_scores: Dict[str, Dict[str, Any]],
-    risk_profile: Dict[str, Any],
-    total_correct: int,
-    total_items: int,
-) -> None:
-    try:
-        overall_pct = round(total_correct / total_items * 100, 1) if total_items > 0 else 0
-        firestore_client.collection("diagnosticResults").document(user_id).set({
-            "userId": user_id,
-            "testId": test_id,
-            "takenAt": fs.SERVER_TIMESTAMP,
-            "strand": strand,
-            "gradeLevel": grade_level,
-            "status": "completed",
-            "totalItems": total_items,
-            "totalScore": total_correct,
-            "percentageScore": overall_pct,
-            "responses": scored_responses,
-            "domainScores": domain_scores,
-            "riskProfile": risk_profile,
-        })
-        mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
-        firestore_client.collection("studentProgress").document(user_id).collection("stats").document("main").set({
-            "learning_path": risk_profile.get("suggested_learning_path", []),
-            "current_topic_index": 0,
-            "total_xp": fs.Increment(50 + mastered_count * 10),
-            "badges": fs.ArrayUnion(["first_assessment"]),
-            "topics_mastered": mastered_count,
-            "diagnostic_completed": True,
-            "overall_risk": risk_profile.get("overall_risk", "moderate"),
-        }, merge=True)
-        firestore_client.collection("diagnosticSessions").document(test_id).update({
-            "status": "completed",
-            "completedAt": fs.SERVER_TIMESTAMP,
-        })
-    except Exception as e:
-        logger.error(f"Firestore save error: {e}")
-        raise
-@router.post("/submit", response_model=DiagnosticSubmitResponse)
-async def submit_diagnostic(request: DiagnosticSubmitRequest, req: Request):
-    user = getattr(req.state, "user", None)
-    if not user or not getattr(user, "uid", None):
-        raise HTTPException(status_code=401, detail="Authentication required")
-    try:
-        firestore_client = fs.client()
-    except Exception as e:
-        raise HTTPException(status_code=503, detail="Database unavailable")
-    try:
-        session_doc = firestore_client.collection("diagnosticSessions").document(request.test_id).get()
-        if not session_doc.exists:
-            raise HTTPException(status_code=404, detail="Diagnostic session not found")
-        session_data = session_doc.to_dict() or {}
-        stored_questions = session_data.get("questions", [])
-        strand = session_data.get("strand", "STEM")
-        grade_level = session_data.get("gradeLevel", "Grade 11")
-        if not stored_questions:
-            raise HTTPException(status_code=400, detail="No questions found for this session")
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Session retrieval error: {e}")
-        raise HTTPException(status_code=500, detail="Failed to retrieve diagnostic session")
-    scored_responses, total_correct, domain_correct, domain_total, _ = _score_responses(
-        stored_questions, request.responses
-    )
-    total_items = len(stored_questions)
-    domain_scores = _compute_domain_scores(domain_correct, domain_total)
-    risk_profile = _compute_risk_profile(total_correct, total_items, scored_responses, domain_scores)
-    await _save_results(
-        firestore_client,
-        user.uid,
-        request.test_id,
-        strand,
-        grade_level,
-        scored_responses,
-        domain_scores,
-        risk_profile,
-        total_correct,
-        total_items,
-    )
-    mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
-    return DiagnosticSubmitResponse(
-        success=True,
-        overall_risk=risk_profile["overall_risk"],
-        overall_score_percent=risk_profile["overall_score_percent"],
-        mastery_summary=MasterySummary(**risk_profile["mastery_summary"]),
-        recommended_intervention=risk_profile["recommended_intervention"],
-        xp_earned=50 + mastered_count * 10,
-        badge_unlocked="first_assessment",
-        redirect_to="/dashboard",
-    )

routes/quiz_battle.py DELETED Viewed

@@ -1,205 +0,0 @@
-"""
-Quiz Battle API Routes.
-Endpoints:
-- POST /api/quiz-battle/generate       → Generate varied questions for a battle session
-- POST /api/quiz-battle/ingest-pdf     → Trigger PDF ingestion (teacher/admin)
-- GET  /api/quiz-battle/bank-status    → List processed PDFs (teacher/admin)
-"""
-import os
-from typing import List, Optional, Dict, Any
-from datetime import datetime, timezone
-from fastapi import APIRouter, Request, HTTPException, Depends
-from pydantic import BaseModel, Field
-from rag.pdf_ingestion import ingest_pdf, IngestionResult
-from services.question_bank_service import get_questions_for_battle, cache_session_questions, get_cached_session
-from services.variance_engine import apply_variance
-router = APIRouter(prefix="/api/quiz-battle", tags=["quiz-battle"])
-# ── Pydantic Models ──────────────────────────────────────────────────
-class GenerateRequest(BaseModel):
-    grade_level: int = Field(..., ge=7, le=12)
-    topic: str = Field(..., min_length=1)
-    question_count: int = Field(default=10, ge=1, le=50)
-    session_id: str = Field(..., min_length=1)
-    player_ids: List[str] = Field(default_factory=list)
-class GenerateResponse(BaseModel):
-    questions: List[Dict[str, Any]]
-    session_id: str
-class IngestPdfRequest(BaseModel):
-    storage_path: str = Field(..., min_length=1)
-    grade_level: int = Field(..., ge=7, le=12)
-    topic: str = Field(..., min_length=1)
-    force_reingest: bool = False
-class IngestPdfResponse(BaseModel):
-    status: str
-    filename: str
-    question_count: int
-    grade_level: int
-    topic: str
-    storage_path: str
-    timestamp: datetime
-class BankStatusItem(BaseModel):
-    filename: str
-    processed: bool
-    timestamp: Optional[datetime]
-    question_count: int
-    grade_level: int
-    topic: str
-    storage_path: str
-class BankStatusResponse(BaseModel):
-    pdfs: List[BankStatusItem]
-# ── Helper ───────────────────────────────────────────────────────────
-def _get_current_user(request: Request):
-    user = getattr(request.state, "user", None)
-    if user is None:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    return user
-def _is_internal_request(request: Request) -> bool:
-    """Check if request is from an internal service (Cloud Functions)."""
-    internal_secret = request.headers.get("X-Internal-Service")
-    expected = os.getenv("QUIZ_BATTLE_INTERNAL_SECRET")
-    if expected and internal_secret == expected:
-        return True
-    return False
-# ── Endpoints ────────────────────────────────────────────────────────
-@router.post("/generate", response_model=GenerateResponse)
-async def generate_questions(
-    body: GenerateRequest,
-    request: Request,
-):
-    """
-    Generate varied questions for a quiz battle session.
-    Returns questions with choices but WITHOUT correct_answer (unless called
-    by an internal service with X-Internal-Service header).
-    """
-    # 1. Fetch base questions
-    questions = await get_questions_for_battle(
-        body.grade_level,
-        body.topic,
-        body.question_count,
-    )
-    if not questions:
-        raise HTTPException(
-            status_code=404,
-            detail=f"No questions found for grade {body.grade_level}, topic '{body.topic}'",
-        )
-    # 2. Apply variance (with 24h cache)
-    varied = await apply_variance(questions, body.session_id)
-    # 3. Cache session metadata
-    await cache_session_questions(
-        body.session_id,
-        varied,
-        body.player_ids,
-        body.grade_level,
-        body.topic,
-    )
-    # 4. Prepare response
-    is_internal = _is_internal_request(request)
-    response_questions = []
-    for q in varied:
-        q_copy = dict(q)
-        if not is_internal:
-            q_copy.pop("correct_answer", None)
-        response_questions.append(q_copy)
-    return GenerateResponse(questions=response_questions, session_id=body.session_id)
-@router.post("/ingest-pdf", response_model=IngestPdfResponse)
-async def ingest_pdf_endpoint(
-    body: IngestPdfRequest,
-    user=Depends(_get_current_user),
-):
-    """
-    Trigger PDF ingestion into the question bank.
-    Requires teacher or admin role.
-    """
-    if user.role not in ("teacher", "admin"):
-        raise HTTPException(status_code=403, detail="Teacher or admin access required")
-    try:
-        result = await ingest_pdf(
-            storage_path=body.storage_path,
-            grade_level=body.grade_level,
-            topic=body.topic,
-            force_reingest=body.force_reingest,
-        )
-    except FileNotFoundError as e:
-        raise HTTPException(status_code=404, detail=str(e))
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
-    return IngestPdfResponse(
-        status="processed" if result.processed else "skipped",
-        filename=result.filename,
-        question_count=result.question_count,
-        grade_level=result.grade_level,
-        topic=result.topic,
-        storage_path=result.storage_path,
-        timestamp=result.timestamp,
-    )
-@router.get("/bank-status", response_model=BankStatusResponse)
-async def bank_status(
-    user=Depends(_get_current_user),
-):
-    """
-    Get the status of all processed PDFs in the question bank.
-    Requires teacher or admin role.
-    """
-    if user.role not in ("teacher", "admin"):
-        raise HTTPException(status_code=403, detail="Teacher or admin access required")
-    from google.cloud import firestore
-    db = firestore.Client(project=os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026"))
-    docs = db.collection("pdf_processing_status").stream()
-    pdfs = []
-    for doc in docs:
-        data = doc.to_dict()
-        pdfs.append(BankStatusItem(
-            filename=doc.id,
-            processed=data.get("processed", False),
-            timestamp=data.get("timestamp"),
-            question_count=data.get("question_count", 0),
-            grade_level=data.get("grade_level", 0),
-            topic=data.get("topic", ""),
-            storage_path=data.get("storage_path", ""),
-        ))
-    return BankStatusResponse(pdfs=pdfs)

routes/quiz_generation_routes.py DELETED Viewed

@@ -1,356 +0,0 @@
-"""
-Unified Quiz Generation Routes.
-Generates dynamic quiz questions using DeepSeek AI + RAG curriculum context.
-Used by: lesson practice quizzes, module quizzes, and quiz battle.
-When new PDFs are ingested into the vectorstore, this endpoint automatically
-picks up the new content via RAG retrieval.
-"""
-from __future__ import annotations
-import json
-import logging
-import random
-import re
-from typing import Any, Dict, List, Optional
-from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
-from rag.curriculum_rag import (
-    retrieve_curriculum_context,
-    summarize_retrieval_confidence,
-)
-from services.inference_client import (
-    InferenceRequest,
-    create_default_client,
-    get_model_for_task,
-)
-logger = logging.getLogger("mathpulse.quiz_generation")
-router = APIRouter(prefix="/api/quiz", tags=["quiz-generation"])
-_inference_client = None
-def _get_inference_client():
-    global _inference_client
-    if _inference_client is None:
-        _inference_client = create_default_client()
-    return _inference_client
-# ── Request/Response Models ────────────────────────────────────────────
-class QuizGenerationRequest(BaseModel):
-    topic: str = Field(..., min_length=1, description="Lesson topic or competency")
-    subject: str = Field(..., min_length=1, description="Subject name (e.g., 'General Mathematics')")
-    lessonTitle: Optional[str] = Field(default=None, description="Full lesson title")
-    questionCount: int = Field(default=6, ge=1, le=20, description="Number of questions to generate")
-    questionTypes: List[str] = Field(
-        default=["multiple-choice", "true-false", "fill-in-blank"],
-        description="Question types to include",
-    )
-    difficulty: str = Field(default="medium", pattern="^(easy|medium|hard)$")
-    quarter: Optional[int] = Field(default=1, ge=1, le=4)
-    moduleId: Optional[str] = Field(default=None)
-    lessonId: Optional[str] = Field(default=None)
-    competencyCode: Optional[str] = Field(default=None)
-    storagePath: Optional[str] = Field(default=None)
-    userId: Optional[str] = Field(default=None)
-    varianceSeed: Optional[int] = Field(default=None, description="Random seed for variance across generations")
-class QuizQuestion(BaseModel):
-    id: int
-    type: str
-    question: str
-    options: Optional[List[str]] = None
-    correctAnswer: str
-    explanation: str
-class QuizGenerationResponse(BaseModel):
-    questions: List[QuizQuestion]
-    retrievalConfidence: Dict[str, Any]
-    sourceChunks: int
-    generatedAt: str
-# ── Prompt Builder ─────────────────────────────────────────────────────
-def _build_quiz_generation_prompt(
-    topic: str,
-    subject: str,
-    lesson_title: Optional[str],
-    question_count: int,
-    question_types: List[str],
-    difficulty: str,
-    retrieved_context: str,
-    variance_seed: Optional[int] = None,
-) -> str:
-    """Build the DeepSeek prompt for quiz generation with variance."""
-    # Build variance instruction based on seed
-    variance_instruction = ""
-    if variance_seed is not None:
-        variance_instruction = f"""
-8. VARIANCE REQUIREMENT: Use seed {variance_seed} to ensure variety. Generate DIFFERENT questions each time.
-   - Paraphrase concepts in fresh ways
-   - Use different numerical values and scenarios
-   - Vary question phrasing and structure
-   - Avoid repeating similar question patterns"""
-    return f"""You are a DepEd-aligned mathematics quiz generator for Filipino Senior High School students (Grades 11-12).
-Given the following curriculum context about "{topic}" from {subject}, generate {question_count} {difficulty}-difficulty quiz questions.
-## Retrieved Curriculum Context
-{retrieved_context}
-## Instructions
-1. Generate exactly {question_count} questions covering the topic above.
-2. Question types to use: {', '.join(question_types)}
-3. DISTRIBUTION (for {question_count} questions):
-   - 2 items: Recall and Basics (simple recall, definitions, fundamental facts)
-   - 4 items: Direct Application (real-world context with pesos, jeepney, sari-sari store, etc.)
-   - 3 items: Mixed/Interleaved Problems (combine concepts, multi-step reasoning)
-   - 1 item: Metacognitive/Reflective (explain reasoning, justify approach, identify errors)
-4. Difficulty: {difficulty} — appropriate for Grade 11-12 Filipino STEM students.
-5. Use Filipino-localized context where possible (pesos, jeepney, barangay, sari-sari store, etc.).
-6. Each question must be mathematically accurate and curriculum-aligned.
-7. Provide clear explanations for the correct answer.{variance_instruction}
-## Question Type Rules
-- multiple-choice: 4 options (A/B/C/D format), exactly one correct answer
-- true-false: statement that is either True or False
-- fill-in-blank: question with a single numeric or short text answer
-## Output Format
-Return ONLY a valid JSON array. No markdown, no extra text. Format:
-[
-  {{
-    "type": "multiple-choice",
-    "question": "What is the derivative of f(x) = x³?",
-    "options": ["2x²", "3x²", "x²", "3x"],
-    "correctAnswer": "3x²",
-    "explanation": "Using the power rule: d/dx(xⁿ) = nxⁿ⁻¹. So d/dx(x³) = 3x²."
-  }},
-  {{
-    "type": "true-false",
-    "question": "The sum of angles in a triangle is 180 degrees.",
-    "options": ["True", "False"],
-    "correctAnswer": "True",
-    "explanation": "By the triangle angle sum theorem, the interior angles of any Euclidean triangle sum to 180°."
-  }},
-  {{
-    "type": "fill-in-blank",
-    "question": "If f(x) = 2x + 3, then f(4) = ___",
-    "options": null,
-    "correctAnswer": "11",
-    "explanation": "Substitute x = 4: f(4) = 2(4) + 3 = 8 + 3 = 11."
-  }}
-]
-IMPORTANT:
-- Return ONLY the JSON array, no other text
-- Ensure correctAnswer exactly matches one of the options (for MC/TF)
-- For fill-in-blank, correctAnswer is the exact text that fills the blank
-- Generate FRESH, VARIED questions - no two questions should be identical or nearly identical
-- Questions should feel like they were created independently, not templated"""
-# ── Response Parser ────────────────────────────────────────────────────
-def _parse_quiz_response(text: str, expected_count: int) -> List[Dict[str, Any]]:
-    """Parse and validate DeepSeek quiz generation response."""
-    cleaned = text.strip()
-    # Strip markdown fences
-    cleaned = re.sub(r"^```json\s*", "", cleaned, flags=re.IGNORECASE)
-    cleaned = re.sub(r"^```\s*", "", cleaned)
-    cleaned = re.sub(r"\s*```$", "", cleaned)
-    cleaned = cleaned.strip()
-    try:
-        questions = json.loads(cleaned)
-    except json.JSONDecodeError as e:
-        logger.error(f"Failed to parse quiz response as JSON: {e}")
-        # Try to extract JSON array from text
-        match = re.search(r"\[.*\]", cleaned, re.DOTALL)
-        if match:
-            try:
-                questions = json.loads(match.group())
-            except json.JSONDecodeError:
-                raise ValueError(f"Invalid JSON in quiz response: {e}")
-        else:
-            raise ValueError(f"No JSON array found in quiz response")
-    if not isinstance(questions, list):
-        raise ValueError("Quiz response is not a JSON array")
-    validated = []
-    for i, q in enumerate(questions):
-        if not isinstance(q, dict):
-            continue
-        # Ensure required fields
-        if "question" not in q or "correctAnswer" not in q:
-            continue
-        # Normalize field names
-        normalized = {
-            "id": i + 1,
-            "type": q.get("type", "multiple-choice"),
-            "question": q["question"],
-            "correctAnswer": q["correctAnswer"],
-            "explanation": q.get("explanation", ""),
-        }
-        # Handle options
-        if "options" in q and q["options"]:
-            normalized["options"] = q["options"]
-        elif "choices" in q and q["choices"]:
-            normalized["options"] = q["choices"]
-        else:
-            # For true-false, auto-populate options
-            if normalized["type"] == "true-false":
-                normalized["options"] = ["True", "False"]
-            else:
-                normalized["options"] = None
-        validated.append(normalized)
-    if len(validated) < min(expected_count, 3):
-        raise ValueError(f"Only {len(validated)} valid questions parsed, expected at least {min(expected_count, 3)}")
-    return validated[:expected_count]
-# ── Variance Application ───────────────────────────────────────────────
-def _apply_variance(questions: List[Dict[str, Any]], seed: int) -> List[Dict[str, Any]]:
-    """Apply deterministic variance to questions (shuffle choices, etc.)."""
-    rng = random.Random(seed)
-    for q in questions:
-        # Shuffle multiple-choice options while tracking correct answer
-        if q.get("type") == "multiple-choice" and q.get("options"):
-            options = q["options"].copy()
-            correct = q["correctAnswer"]
-            # Only shuffle if correct answer is in options
-            if correct in options:
-                rng.shuffle(options)
-                q["options"] = options
-                q["correctAnswer"] = correct  # Keep original correct answer text
-    return questions
-# ── Endpoints ──────────────────────────────────────────────────────────
-@router.post("/generate", response_model=QuizGenerationResponse)
-async def generate_quiz(request: QuizGenerationRequest):
-    """
-    Generate a dynamic quiz using DeepSeek AI + RAG curriculum context.
-    This endpoint retrieves relevant curriculum chunks from the vectorstore,
-    then calls DeepSeek to generate varied quiz questions based on that context.
-    When new PDFs are ingested, they automatically become available via RAG.
-    """
-    try:
-        # 1. Retrieve curriculum context via RAG
-        query = request.lessonTitle or request.topic
-        chunks = retrieve_curriculum_context(
-            query=query,
-            subject=request.subject,
-            quarter=request.quarter,
-            module_id=request.moduleId,
-            lesson_id=request.lessonId,
-            competency_code=request.competencyCode,
-            storage_path=request.storagePath,
-            top_k=8,
-        )
-        if not chunks:
-            logger.warning(f"No curriculum chunks found for topic '{request.topic}' in subject '{request.subject}'")
-            raise HTTPException(
-                status_code=404,
-                detail=f"No curriculum content found for topic '{request.topic}'. Please ensure PDFs are ingested.",
-            )
-        # Shuffle retrieved chunks for variance BEFORE formatting prompt context
-        # This ensures different lessons → different curriculum context → different generated questions
-        seed = request.varianceSeed if request.varianceSeed else hash(f"{request.topic}:{request.subject}:{request.lessonTitle or ''}:{request.userId or 'anon'}") % (2**32)
-        rng = random.Random(seed)
-        rng.shuffle(chunks)  # In-place shuffle for deterministic variety per seed
-        # Format retrieved chunks for the prompt
-        formatted_context = "\n\n---\n\n".join(
-            f"[Source: {chunk.get('metadata', {}).get('source_file', 'Unknown')}, Page {chunk.get('metadata', {}).get('page', 'N/A')}]\n{chunk.get('document', '')}"
-            for chunk in chunks
-        )
-        confidence = summarize_retrieval_confidence(chunks)
-        # 2. Build generation prompt
-        prompt = _build_quiz_generation_prompt(
-            topic=request.topic,
-            subject=request.subject,
-            lesson_title=request.lessonTitle,
-            question_count=request.questionCount,
-            question_types=request.questionTypes,
-            difficulty=request.difficulty,
-            retrieved_context=formatted_context,
-            variance_seed=request.varianceSeed,
-        )
-        # 3. Call DeepSeek with higher temperature for variance
-        inference_request = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You are a precise DepEd-aligned curriculum quiz generator. Generate FRESH, VARIED questions each time - do not repeat patterns."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="quiz_generation",
-            max_new_tokens=3000,
-            temperature=0.7,  # Higher temp for variance
-            top_p=0.9,
-        )
-        raw_response = _get_inference_client().generate_from_messages(inference_request)
-        # 4. Parse response
-        questions = _parse_quiz_response(raw_response, request.questionCount)
-        # 5. Apply variance (shuffle options) with user-based seed for consistency
-        seed = request.varianceSeed if request.varianceSeed else hash(f"{request.topic}:{request.subject}:{request.lessonTitle or ''}:{request.userId or 'anon'}") % (2**32)
-        varied_questions = _apply_variance(questions, seed)
-        # 6. Build response
-        return QuizGenerationResponse(
-            questions=[QuizQuestion(**q) for q in varied_questions],
-            retrievalConfidence=confidence,
-            sourceChunks=len(chunks),
-            generatedAt=__import__("datetime").datetime.now(__import__("datetime").timezone.utc).isoformat(),
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Quiz generation failed")
-        raise HTTPException(status_code=500, detail=f"Quiz generation failed: {str(e)}")
-@router.get("/health")
-async def quiz_generation_health():
-    """Check quiz generation service health."""
-    model = get_model_for_task("quiz_generation")
-    return {
-        "status": "ok",
-        "activeModel": model,
-        "endpoint": "/api/quiz/generate",
-        "features": ["rag-retrieval", "deepseek-generation", "choice-shuffling", "auto-pdf-updates"],
-    }

routes/rag_routes.py CHANGED Viewed

@@ -2,8 +2,6 @@ from __future__ import annotations
 import json
 import logging
-import os
-import re
 from datetime import datetime, timezone
 from threading import Lock
 from typing import Any, Dict, List, Optional
@@ -11,28 +9,21 @@ from typing import Any, Dict, List, Optional
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field
-from services.inference_client import (
-    InferenceRequest,
-    create_default_client,
-    is_sequential_model,
-    get_model_for_task,
-)
 from rag.curriculum_rag import (
     build_analysis_curriculum_context,
     build_lesson_prompt,
     build_lesson_query,
     build_problem_generation_prompt,
-    format_retrieved_chunks,
     retrieve_curriculum_context,
-    retrieve_lesson_pdf_context,
     summarize_retrieval_confidence,
 )
-from rag.vectorstore_loader import get_vectorstore_health, reset_vectorstore_singleton
 try:
-    from firebase_admin import firestore as firebase_firestore
 except Exception:
-    firebase_firestore = None
 logger = logging.getLogger("mathpulse.rag")
 router = APIRouter(prefix="/api/rag", tags=["rag"])
@@ -50,12 +41,7 @@ def _get_inference_client():
     return _inference_client
-async def _generate_text(
-    prompt: str,
-    task_type: str,
-    max_new_tokens: int = 900,
-    enable_thinking: bool = False,
-) -> str:
     request = InferenceRequest(
         messages=[
             {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
@@ -65,7 +51,6 @@ async def _generate_text(
         max_new_tokens=max_new_tokens,
         temperature=0.2,
         top_p=0.9,
-        enable_thinking=enable_thinking,
     )
     return _get_inference_client().generate_from_messages(request)
@@ -103,21 +88,6 @@ def _log_rag_usage(
         logger.warning("rag_usage logging skipped: %s", exc)
-def _strip_thinking_and_parse(text: str) -> dict:
-    cleaned = text.strip()
-    cleaned = re.sub(r" </think>", "", cleaned, flags=re.DOTALL).strip()
-    if "{" in cleaned and "}" in cleaned:
-        try:
-            start = cleaned.find("{")
-            end = cleaned.rfind("}") + 1
-            parsed = json.loads(cleaned[start:end])
-            if isinstance(parsed, dict):
-                return parsed
-        except Exception:
-            pass
-    return {"explanation": text}
 class RagLessonRequest(BaseModel):
     topic: str
     subject: str
@@ -127,10 +97,6 @@ class RagLessonRequest(BaseModel):
     moduleUnit: Optional[str] = None
     learnerLevel: Optional[str] = None
     userId: Optional[str] = None
-    moduleId: Optional[str] = None
-    lessonId: Optional[str] = None
-    competencyCode: Optional[str] = None
-    storagePath: Optional[str] = None
 class RagProblemRequest(BaseModel):
@@ -149,8 +115,6 @@ class RagAnalysisContextRequest(BaseModel):
 @router.get("/health")
 async def rag_health():
-    active_model = get_model_for_task("rag_lesson")
-    is_seq = is_sequential_model(active_model)
     try:
         health = get_vectorstore_health()
         return {
@@ -158,8 +122,6 @@ async def rag_health():
             "chunkCount": health["chunkCount"],
             "subjects": health["subjects"],
             "lastIngested": datetime.now(timezone.utc).isoformat(),
-            "activeModel": active_model,
-            "isSequentialModel": is_seq,
         }
     except Exception as exc:
         return {
@@ -167,273 +129,68 @@ async def rag_health():
             "chunkCount": 0,
             "subjects": {},
             "lastIngested": None,
-            "activeModel": active_model,
-            "isSequentialModel": is_seq,
             "warning": str(exc),
         }
-def _fetch_youtube_videos(
-    lesson_title: str,
-    subject: str,
-    competency: str,
-    quarter: int,
-    lesson_id: Optional[str] = None,
-) -> List[Dict]:
-    """Fetch up to 3 relevant YouTube videos for a lesson."""
-    try:
-        from services.youtube_service import get_video_search_results
-    except ImportError:
-        return []
-    try:
-        result = get_video_search_results(
-            topic=lesson_title,
-            subject=subject,
-            lesson_context=competency,
-            grade_level=f"Grade {quarter + 10}",
-            lesson_id=lesson_id,
-            max_results=3,
-        )
-        return result.get("videos", [])
-    except Exception as e:
-        logger.warning("YouTube video search failed: %s", e)
-        return []
-def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
-    sections = lesson_data.get("sections", [])
-    section_types = {s.get("type") for s in sections}
-    required = ["introduction", "key_concepts", "video", "worked_examples", "important_notes", "try_it_yourself", "summary"]
-    default_content = {
-        "introduction": {"type": "introduction", "title": "Introduction", "content": f"Welcome to the lesson on {lesson_title}. This topic builds foundational skills for your mathematics journey."},
-        "key_concepts": {"type": "key_concepts", "title": "Key Concepts", "content": f"The following key concepts are essential for mastering {lesson_title}:", "callouts": [{"type": "important", "text": "Review the curriculum PDF for detailed explanations of each concept."}]},
-        "video": {"type": "video", "title": "Video Lesson", "content": "Watch the video explanation below to understand the concepts visually.", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},
-        "worked_examples": {"type": "worked_examples", "title": "Worked Examples", "examples": [{"problem": f"Sample problem for {lesson_title}", "steps": ["Step 1: Identify the given information.", "Step 2: Apply the appropriate formula or method.", "Step 3: Solve step-by-step.", "Step 4: Verify your answer."], "answer": "Solution will vary based on specific problem parameters."}]},
-        "important_notes": {"type": "important_notes", "title": "Important Notes", "bulletPoints": [f"Always read problems carefully before solving {lesson_title} questions.", "Check your units and ensure consistency throughout calculations.", "Practice regularly to build fluency with these concepts."]},
-        "try_it_yourself": {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": [{"question": f"Practice applying {lesson_title} concepts. Solve a similar problem from your textbook or worksheets.", "solution": "Compare your solution with the worked examples above. If stuck, re-read the key concepts section or ask your teacher for guidance."}]},
-        "summary": {"type": "summary", "title": "Summary", "content": f"In this lesson on {lesson_title}, you explored key concepts, worked through examples, and practiced problem-solving techniques. Continue reviewing these materials and seek additional practice to strengthen your understanding."},
-    }
-    def _is_section_blank(section: dict, s_type: str) -> bool:
-        """Check if a section has effectively no content."""
-        if not section:
-            return True
-        text_content = (section.get("content") or "").strip()
-        if s_type in ("introduction", "key_concepts", "video", "summary"):
-            return len(text_content) < 10
-        if s_type == "worked_examples":
-            examples = section.get("examples") or []
-            return not examples or all(not (ex.get("problem") or "").strip() for ex in examples)
-        if s_type == "important_notes":
-            bullets = section.get("bulletPoints") or []
-            return not bullets or all(not (b or "").strip() for b in bullets)
-        if s_type == "try_it_yourself":
-            problems = section.get("practiceProblems") or []
-            return not problems or all(not (p.get("question") or "").strip() for p in problems)
-        return False
-    filled = {}
-    for req_type in required:
-        for existing in sections:
-            if existing.get("type") == req_type:
-                filled[req_type] = existing
-                break
-        else:
-            filled[req_type] = default_content[req_type]
-    # Validate and replace blank sections with defaults
-    for req_type in required:
-        if _is_section_blank(filled[req_type], req_type):
-            filled[req_type] = default_content[req_type]
-    ordered = [filled[t] for t in required]
-    for i, section in enumerate(ordered):
-        s_type = section.get("type")
-        if s_type == "key_concepts" and not section.get("callouts"):
-            section["callouts"] = []
-        if s_type == "worked_examples" and not section.get("examples"):
-            section["examples"] = []
-        if s_type == "important_notes" and not section.get("bulletPoints"):
-            section["bulletPoints"] = []
-        if s_type == "try_it_yourself" and not section.get("practiceProblems"):
-            section["practiceProblems"] = []
-        ordered[i] = section
-    return {**lesson_data, "sections": ordered}
 @router.post("/lesson")
 async def rag_lesson(request: Request, payload: RagLessonRequest):
-    # ── Step 1: Retrieve curriculum chunks ───────────────────────────────────
-    try:
-        chunks, retrieval_mode = retrieve_lesson_pdf_context(
-            topic=build_lesson_query(
-                payload.topic,
-                payload.subject,
-                payload.quarter,
-                lesson_title=payload.lessonTitle,
-                competency=payload.learningCompetency,
-                module_unit=payload.moduleUnit,
-                learner_level=payload.learnerLevel,
-            ),
-            subject=payload.subject,
-            quarter=payload.quarter,
-            lesson_title=payload.lessonTitle,
-            competency=payload.learningCompetency,
-            module_id=payload.moduleId,
-            lesson_id=payload.lessonId,
-            competency_code=payload.competencyCode,
-            storage_path=payload.storagePath,
-            top_k=8,
-        )
-    except Exception as exc:
-        import traceback
-        logger.error(f"RAG retrieval error: {type(exc).__name__}: {exc}\n{traceback.format_exc()}")
-        raise HTTPException(
-            status_code=503,
-            detail={
-                "error": "retrieval_failed",
-                "message": f"Curriculum retrieval failed: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-    if not chunks:
-        raise HTTPException(
-            status_code=404,
-            detail={
-                "error": "no_curriculum_context",
-                "message": f"No curriculum content found for lesson '{payload.lessonTitle}' ({payload.subject} Q{payload.quarter}). Please ensure the PDF has been ingested.",
-                "retrievalBand": "low",
-                "sources": [],
-            },
-        )
-    # ── Step 2: Build prompt ─────────────────────────────────────────────────
-    try:
-        prompt = build_lesson_prompt(
-            lesson_title=payload.lessonTitle or payload.topic,
-            competency=payload.learningCompetency or payload.topic,
-            grade_level="Grade 11-12",
-            subject=payload.subject,
-            quarter=payload.quarter,
-            learner_level=payload.learnerLevel,
-            module_unit=payload.moduleUnit,
-            curriculum_chunks=chunks,
-            competency_code=payload.competencyCode,
-        )
-    except Exception as exc:
-        logger.error(f"RAG prompt build error: {type(exc).__name__}: {exc}")
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "prompt_build_failed",
-                "message": f"Failed to build lesson prompt: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-    # ── Step 3: AI inference ─────────────────────────────────────────────────
-    try:
-        raw_explanation = await _generate_text(
-            prompt,
-            task_type="rag_lesson",
-            max_new_tokens=1800,
-            enable_thinking=True,
-        )
-    except Exception as exc:
-        logger.error(f"RAG inference error: {type(exc).__name__}: {exc}")
-        raise HTTPException(
-            status_code=502,
-            detail={
-                "error": "inference_failed",
-                "message": f"AI model call failed: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-    # ── Step 4: Parse & validate response ────────────────────────────────────
-    try:
-        parsed_lesson = _strip_thinking_and_parse(raw_explanation)
-        parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
-    except Exception as exc:
-        logger.error(f"RAG parse error: {type(exc).__name__}: {exc}")
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "parse_failed",
-                "message": f"Failed to parse AI response: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-    # ── Step 5: Enrich with videos ───────────────────────────────────────────
-    if parsed_lesson.get("sections"):
-        video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
-        if video_section:
-            try:
-                videos = _fetch_youtube_videos(
-                    payload.lessonTitle or payload.topic,
-                    payload.subject,
-                    payload.learningCompetency or "",
-                    payload.quarter,
-                    lesson_id=payload.lessonId,
-                )
-                if videos:
-                    # Primary video for backwards compatibility
-                    primary = videos[0]
-                    video_section["videoId"] = primary.get("videoId", "")
-                    video_section["videoTitle"] = primary.get("title", "")
-                    video_section["videoChannel"] = primary.get("channelTitle", "")
-                    video_section["embedUrl"] = f"https://www.youtube.com/embed/{primary.get('videoId', '')}"
-                    video_section["thumbnailUrl"] = primary.get("thumbnailUrl", "")
-                    # New: full videos array for Smart Video Integration
-                    video_section["videos"] = videos
-            except Exception as exc:
-                logger.warning("YouTube enrichment skipped: %s", exc)
-    # ── Step 6: Assemble response ────────────────────────────────────────────
     retrieval_summary = summarize_retrieval_confidence(chunks)
-    try:
-        _log_rag_usage(
-            request,
-            event_type="lesson",
-            topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
-            subject=payload.subject,
-            quarter=payload.quarter,
-            chunks=chunks,
-        )
-    except Exception as exc:
-        logger.warning("RAG usage logging skipped: %s", exc)
-    needs_review = parsed_lesson.get("needsReview", False)
-    if retrieval_summary.get("band") == "low":
-        needs_review = True
     return {
-        **parsed_lesson,
         "retrievalConfidence": retrieval_summary.get("confidence", 0.0),
         "retrievalBand": retrieval_summary.get("band", "low"),
-        "retrievalMode": retrieval_mode,
-        "needsReview": needs_review,
         "sources": [
             {
                 "subject": row.get("subject"),
                 "quarter": row.get("quarter"),
                 "source_file": row.get("source_file"),
-                "storage_path": row.get("storage_path"),
                 "page": row.get("page"),
                 "score": row.get("score"),
                 "content_domain": row.get("content_domain"),
                 "chunk_type": row.get("chunk_type"),
-                "content": row.get("content"),
             }
             for row in chunks
         ],
-        "activeModel": get_model_for_task("rag_lesson"),
     }
@@ -446,20 +203,19 @@ async def rag_generate_problem(request: Request, payload: RagProblemRequest):
         top_k=5,
     )
     prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
-    raw = await _generate_text(
-        prompt,
-        task_type="quiz_generation",
-        max_new_tokens=600,
-        enable_thinking=False,
-    )
-    parsed = _strip_thinking_and_parse(raw)
     problem = str(parsed.get("problem") or raw)
-    if not problem or problem.startswith("{"):
-        problem = str(parsed.get("content") or str(parsed))
-    if len(problem) < 3 or problem.startswith("{"):
-        problem = raw
     solution = str(parsed.get("solution") or "")
     competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
@@ -511,4 +267,4 @@ async def rag_analysis_context(request: Request, payload: RagAnalysisContextRequ
         chunks=chunks,
     )
-    return {"curriculumContext": "\n".join(lines)}

 import json
 import logging
 from datetime import datetime, timezone
 from threading import Lock
 from typing import Any, Dict, List, Optional
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field
+from services.inference_client import InferenceRequest, create_default_client
 from rag.curriculum_rag import (
     build_analysis_curriculum_context,
     build_lesson_prompt,
     build_lesson_query,
     build_problem_generation_prompt,
     retrieve_curriculum_context,
     summarize_retrieval_confidence,
 )
+from rag.vectorstore_loader import get_vectorstore_health
 try:
+    from firebase_admin import firestore as firebase_firestore  # type: ignore[import-not-found]
 except Exception:
+    firebase_firestore = None  # type: ignore[assignment]
 logger = logging.getLogger("mathpulse.rag")
 router = APIRouter(prefix="/api/rag", tags=["rag"])
     return _inference_client
+async def _generate_text(prompt: str, task_type: str, max_new_tokens: int = 900) -> str:
     request = InferenceRequest(
         messages=[
             {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
         max_new_tokens=max_new_tokens,
         temperature=0.2,
         top_p=0.9,
     )
     return _get_inference_client().generate_from_messages(request)
         logger.warning("rag_usage logging skipped: %s", exc)
 class RagLessonRequest(BaseModel):
     topic: str
     subject: str
     moduleUnit: Optional[str] = None
     learnerLevel: Optional[str] = None
     userId: Optional[str] = None
 class RagProblemRequest(BaseModel):
 @router.get("/health")
 async def rag_health():
     try:
         health = get_vectorstore_health()
         return {
             "chunkCount": health["chunkCount"],
             "subjects": health["subjects"],
             "lastIngested": datetime.now(timezone.utc).isoformat(),
         }
     except Exception as exc:
         return {
             "chunkCount": 0,
             "subjects": {},
             "lastIngested": None,
             "warning": str(exc),
         }
 @router.post("/lesson")
 async def rag_lesson(request: Request, payload: RagLessonRequest):
+    retrieval_query = build_lesson_query(
+        payload.topic,
+        payload.subject,
+        payload.quarter,
+        lesson_title=payload.lessonTitle,
+        competency=payload.learningCompetency,
+        module_unit=payload.moduleUnit,
+        learner_level=payload.learnerLevel,
+    )
+    chunks = retrieve_curriculum_context(
+        query=retrieval_query,
+        subject=payload.subject,
+        quarter=payload.quarter,
+        top_k=5,
+    )
+    prompt = build_lesson_prompt(
+        lesson_title=payload.lessonTitle or payload.topic,
+        competency=payload.learningCompetency or payload.topic,
+        grade_level="Grade 11-12",
+        subject=payload.subject,
+        quarter=payload.quarter,
+        learner_level=payload.learnerLevel,
+        module_unit=payload.moduleUnit,
+        curriculum_chunks=chunks,
+    )
+    explanation = await _generate_text(prompt, task_type="lesson_generation")
     retrieval_summary = summarize_retrieval_confidence(chunks)
+    _log_rag_usage(
+        request,
+        event_type="lesson",
+        topic=retrieval_query,
+        subject=payload.subject,
+        quarter=payload.quarter,
+        chunks=chunks,
+    )
     return {
+        "explanation": explanation,
         "retrievalConfidence": retrieval_summary.get("confidence", 0.0),
         "retrievalBand": retrieval_summary.get("band", "low"),
+        "retrievalQuery": retrieval_query,
+        "needsReview": retrieval_summary.get("band", "low") == "low",
         "sources": [
             {
                 "subject": row.get("subject"),
                 "quarter": row.get("quarter"),
                 "source_file": row.get("source_file"),
                 "page": row.get("page"),
                 "score": row.get("score"),
+                "content": row.get("content"),
                 "content_domain": row.get("content_domain"),
                 "chunk_type": row.get("chunk_type"),
             }
             for row in chunks
         ],
     }
         top_k=5,
     )
     prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
+    raw = await _generate_text(prompt, task_type="quiz_generation")
+    parsed: Dict[str, Any] = {}
+    cleaned = raw.strip()
+    if "{" in cleaned and "}" in cleaned:
+        try:
+            start = cleaned.find("{")
+            end = cleaned.rfind("}") + 1
+            parsed = json.loads(cleaned[start:end])
+        except Exception:
+            parsed = {}
     problem = str(parsed.get("problem") or raw)
     solution = str(parsed.get("solution") or "")
     competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
         chunks=chunks,
     )
+    return {"curriculumContext": "\n".join(lines)}

routes/video_routes.py DELETED Viewed

@@ -1,102 +0,0 @@
-"""
-Video lesson search routes for MathPulse AI.
-POST /api/lessons/videos/search — smart YouTube video search with RAG enrichment.
-"""
-from __future__ import annotations
-import logging
-from typing import List, Optional
-from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
-from services.youtube_service import (
-    get_video_search_results,
-    YOUTUBE_API_KEY,
-)
-logger = logging.getLogger("mathpulse.videos")
-router = APIRouter(prefix="/api/lessons/videos", tags=["videos"])
-class VideoSearchRequest(BaseModel):
-    topic: str = Field(..., min_length=1, max_length=200)
-    grade_level: str = Field(default="Grade 11", max_length=50)
-    subject: str = Field(default="General Mathematics", max_length=100)
-    lesson_context: str = Field(default="", max_length=1000)
-    lesson_id: Optional[str] = Field(default=None, max_length=100)
-class VideoResult(BaseModel):
-    videoId: str
-    title: str
-    channelTitle: str
-    thumbnailUrl: str
-    durationSeconds: int
-class VideoSearchResponse(BaseModel):
-    videos: List[VideoResult]
-    cached: bool = False
-@router.post("/search", response_model=VideoSearchResponse)
-async def search_videos(request: Request, payload: VideoSearchRequest):
-    """
-    Search for relevant educational YouTube videos for a lesson topic.
-    - Checks Firestore video_cache first (7-day TTL)
-    - Enriches the search query with RAG curriculum keywords
-    - Filters for educational channels, medium/long duration, HD quality
-    - Returns up to 3 video results
-    """
-    # Graceful degradation: if YouTube API key is not configured, return 503
-    # so the frontend can hide the video section silently
-    if not YOUTUBE_API_KEY:
-        logger.warning("YouTube API key not configured")
-        raise HTTPException(
-            status_code=503,
-            detail={
-                "error": "youtube_api_not_configured",
-                "message": "YouTube API key is not configured on the server.",
-            },
-        )
-    try:
-        result = get_video_search_results(
-            topic=payload.topic,
-            subject=payload.subject,
-            lesson_context=payload.lesson_context,
-            grade_level=payload.grade_level,
-            lesson_id=payload.lesson_id,
-            max_results=3,
-        )
-        videos = [
-            VideoResult(
-                videoId=v["videoId"],
-                title=v["title"],
-                channelTitle=v["channelTitle"],
-                thumbnailUrl=v["thumbnailUrl"],
-                durationSeconds=v["durationSeconds"],
-            )
-            for v in result.get("videos", [])
-        ]
-        return VideoSearchResponse(
-            videos=videos,
-            cached=result.get("cached", False),
-        )
-    except HTTPException:
-        raise
-    except Exception as exc:
-        logger.error("Video search endpoint error: %s", exc)
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "video_search_failed",
-                "message": f"Failed to search videos: {exc}",
-            },
-        )

scripts/download_vectorstore_from_firebase.py CHANGED Viewed

@@ -1,11 +1,10 @@
 """
 Download vectorstore directory from Firebase Storage at container startup.
-Run: python /app/scripts/download_vectorstore_from_firebase.py
 """
 from __future__ import annotations
-import json
 import logging
 import os
 import sys
@@ -13,66 +12,17 @@ from pathlib import Path
 logger = logging.getLogger("mathpulse.download_vectorstore")
-REMOTE_PREFIX = "vectorstore/"
-_FIREBASE_INITIALIZED = False
-def _init_firebase() -> any | None:
-    global _FIREBASE_INITIALIZED
-    if _FIREBASE_INITIALIZED:
-        try:
-            from firebase_admin import storage as fb_storage
-            return fb_storage.bucket()
-        except Exception as e:
-            logger.warning("Firebase storage unavailable: %s", e)
-            _FIREBASE_INITIALIZED = False
-            return None
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage
-    except ImportError:
-        logger.warning("firebase_admin not installed")
-        return None
-    if firebase_admin._apps:
-        _FIREBASE_INITIALIZED = True
-        try:
-            return storage.bucket()
-        except Exception as e:
-            logger.warning("Firebase storage bucket unavailable: %s", e)
-            return None
-    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-    try:
-        if sa_json:
-            creds = credentials.Certificate(json.loads(sa_json))
-        elif sa_file and Path(sa_file).exists():
-            creds = credentials.Certificate(sa_file)
-        else:
-            creds = credentials.ApplicationDefault()
-        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-        _FIREBASE_INITIALIZED = True
-        return storage.bucket()
-    except Exception as e:
-        logger.error("Firebase init failed: %s", e)
-        return None
-def _resolve_dest_dir() -> Path:
-    raw = os.getenv("CURRICULUM_VECTORSTORE_DIR") or os.getenv("VECTORSTORE_DIR")
-    if raw:
-        return Path(raw)
-    return Path("/app/datasets/vectorstore")
 def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
-    bucket = _init_firebase()
     if bucket is None:
         logger.warning("Firebase Storage not available, vectorstore download skipped")
         return False
@@ -85,7 +35,6 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
         return False
     downloaded = 0
-    skipped = 0
     errors = 0
     for blob in blobs:
@@ -97,10 +46,6 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
         local_path.parent.mkdir(parents=True, exist_ok=True)
         try:
-            if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
-                logger.info("Skipped (already up-to-date): %s", blob.name)
-                skipped += 1
-                continue
             blob.download_to_filename(str(local_path))
             logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
             downloaded += 1
@@ -108,20 +53,10 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
             logger.error("Failed to download %s: %s", blob.name, e)
             errors += 1
-    logger.info("Download complete: %d downloaded, %d skipped, %d errors", downloaded, skipped, errors)
     return errors == 0
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-    dest_dir = _resolve_dest_dir()
-    print(f"INFO: Using vectorstore destination: {dest_dir}")
-    print(f"INFO: CURRICULUM_VECTORSTORE_DIR env: {os.environ.get('CURRICULUM_VECTORSTORE_DIR', 'not set')}")
-    print(f"INFO: VECTORSTORE_DIR env: {os.environ.get('VECTORSTORE_DIR', 'not set')}")
-    print(f"INFO: FIREBASE_STORAGE_BUCKET env: {os.environ.get('FIREBASE_STORAGE_BUCKET', 'not set')}")
-    print(f"INFO: FIREBASE_SERVICE_ACCOUNT_JSON length: {len(os.environ.get('FIREBASE_SERVICE_ACCOUNT_JSON', ''))}")
-    result = download_vectorstore(dest_dir, REMOTE_PREFIX)
-    if result:
-        print("SUCCESS: Vectorstore download completed")
-    else:
-        print("FAILURE: Vectorstore download failed")

 """
 Download vectorstore directory from Firebase Storage at container startup.
+Run: python -m hf_space_test.scripts.download_vectorstore_from_firebase
 """
 from __future__ import annotations
 import logging
 import os
 import sys
 logger = logging.getLogger("mathpulse.download_vectorstore")
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+from backend.rag.firebase_storage_loader import _init_firebase_storage
+REMOTE_PREFIX = "vectorstore/"
+LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")
 def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
+    """Download all files under a prefix from Firebase Storage, preserving structure."""
+    _, bucket = _init_firebase_storage()
     if bucket is None:
         logger.warning("Firebase Storage not available, vectorstore download skipped")
         return False
         return False
     downloaded = 0
     errors = 0
     for blob in blobs:
         local_path.parent.mkdir(parents=True, exist_ok=True)
         try:
             blob.download_to_filename(str(local_path))
             logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
             downloaded += 1
             logger.error("Failed to download %s: %s", blob.name, e)
             errors += 1
+    logger.info("Download complete: %d files downloaded, %d errors", downloaded, errors)
     return errors == 0
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+    download_vectorstore(LOCAL_DEST_DIR, REMOTE_PREFIX)

scripts/ingest_curriculum.py CHANGED Viewed

@@ -1,159 +1,244 @@
 from __future__ import annotations
-import argparse
-import hashlib
 import json
-import logging
 import os
-import sys
 from pathlib import Path
-from typing import Any, Dict, List
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-from rag.vectorstore_loader import (
-    get_vectorstore_components,
-    reset_vectorstore_singleton,
-)
-logger = logging.getLogger(__name__)
-def _resolve_data_dir(raw: str | None) -> Path:
-    if raw:
-        p = Path(raw)
-        if p.is_absolute():
-            return p
-        p = Path.cwd() / raw
-        if p.exists():
-            return p
-    default = Path(__file__).resolve().parents[1] / "datasets"
-    return default
-def _iter_json_files(data_dir: Path):
-    for file in sorted(data_dir.rglob("*")):
-        if file.suffix not in {".json", ".jsonl"}:
-            continue
-        yield file
-def _load_records(file_path: Path) -> List[Dict[str, Any]]:
-    records: List[Dict[str, Any]] = []
-    try:
-        raw = file_path.read_text(encoding="utf-8").strip()
-        if file_path.suffix == ".jsonl":
-            for lineno, line in enumerate(raw.splitlines(), start=1):
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    records.append(json.loads(line))
-                except json.JSONDecodeError:
-                    logger.warning("Skipping malformed JSONL line %s:%d", file_path.name, lineno)
-        else:
-            parsed = json.loads(raw)
-            if isinstance(parsed, list):
-                records.extend(parsed)
-            elif isinstance(parsed, dict):
-                records.append(parsed)
-    except Exception as exc:
-        logger.warning("Failed to parse %s: %s", file_path.name, exc)
-    return records
-def _build_id(source_file: str, page: int, content: str) -> str:
-    key = f"{source_file}::{page}::{content[:120]}"
-    return hashlib.sha256(key.encode()).hexdigest()[:40]
 def main() -> None:
-    parser = argparse.ArgumentParser(description="Ingest DepEd SHS curriculum JSON/JSONL into ChromaDB")
-    parser.add_argument("--data-dir", default=None, help="Directory containing .json/.jsonl files")
-    parser.add_argument("--reset", action="store_true", help="Reset the vectorstore singleton before ingestion")
-    args = parser.parse_args()
-    data_dir = _resolve_data_dir(args.data_dir)
-    logger.info("Ingesting from: %s", data_dir)
-    if args.reset:
-        reset_vectorstore_singleton()
-        _, collection, _ = get_vectorstore_components()
-        try:
-            collection.delete(ids=collection.get(include=[])["ids"])
-        except Exception:
-            pass
-        reset_vectorstore_singleton()
-    total_processed = 0
-    total_upserted = 0
-    total_errors = 0
-    _, collection, embedder = get_vectorstore_components()
-    for file_path in _iter_json_files(data_dir):
-        records = _load_records(file_path)
-        documents: List[str] = []
-        metadatas: List[Dict[str, Any]] = []
-        ids: List[str] = []
-        embeddings_list: List[List[float]] = []
-        for record in records:
-            total_processed += 1
-            content = str(record.get("content") or "").strip()
-            if not content:
-                logger.debug("Skipping empty content in %s", file_path.name)
-                continue
-            try:
-                subject = str(record.get("subject") or "unknown")
-                quarter = int(record.get("quarter") or 0)
-                page = int(record.get("page") or 0)
-                content_domain = str(record.get("content_domain") or "unknown")
-                chunk_type = str(record.get("chunk_type") or "unknown")
-                source_file = str(record.get("source_file") or file_path.name)
-                embedding = embedder.encode(content).tolist()
-                chunk_id = _build_id(source_file, page, content)
                 metadata = {
                     "subject": subject,
                     "quarter": quarter,
-                    "content_domain": content_domain,
                     "chunk_type": chunk_type,
-                    "source_file": source_file,
-                    "page": page,
                 }
-                documents.append(content)
                 metadatas.append(metadata)
                 ids.append(chunk_id)
-                embeddings_list.append(embedding)
-            except Exception as exc:
-                total_errors += 1
-                logger.warning("Error processing record in %s: %s", file_path.name, exc)
-        if documents:
-            try:
-                collection.upsert(
-                    ids=ids,
-                    documents=documents,
-                    metadatas=metadatas,
-                    embeddings=embeddings_list,
-                )
-                total_upserted += len(documents)
-                logger.info("Upserted %d chunks from %s", len(documents), file_path.name)
-            except Exception as exc:
-                total_errors += len(documents)
-                logger.warning("Failed to upsert batch from %s: %s", file_path.name, exc)
-    print(f"=== Ingestion Summary ===")
-    print(f"Total records processed: {total_processed}")
-    print(f"Total chunks upserted:  {total_upserted}")
-    print(f"Total errors:           {total_errors}")
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    main()

 from __future__ import annotations
 import json
 import os
+import re
+from collections import Counter
+from datetime import datetime, timezone
 from pathlib import Path
+from typing import Dict, List
+import chromadb
+import pdfplumber
+from huggingface_hub import snapshot_download
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+BASE_DIR = Path(__file__).resolve().parents[1]
+def _resolve_default_dir(local_path: Path, data_path: Path) -> Path:
+    return data_path if data_path.parent.exists() else local_path
+CURRICULUM_DIR = Path(
+    os.getenv(
+        "CURRICULUM_DIR",
+        str(_resolve_default_dir(BASE_DIR / "datasets" / "curriculum", Path("/data/curriculum"))),
+    )
+)
+VECTORSTORE_DIR = Path(
+    os.getenv(
+        "VECTORSTORE_DIR",
+        str(_resolve_default_dir(BASE_DIR / "datasets" / "vectorstore", Path("/data/vectorstore"))),
+    )
+)
+COLLECTION_NAME = "curriculum_chunks"
+EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
+CURRICULUM_SOURCE_REPO_ID = os.getenv("CURRICULUM_SOURCE_REPO_ID", "").strip()
+CURRICULUM_SOURCE_REPO_TYPE = os.getenv("CURRICULUM_SOURCE_REPO_TYPE", "dataset").strip() or "dataset"
+CURRICULUM_SOURCE_REVISION = os.getenv("CURRICULUM_SOURCE_REVISION", "main").strip() or "main"
+SUBJECT_MAP = {
+    "SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": "general_math",
+    "GENERAL-MATHEMATICS-1-2.pdf": "general_math",
+    "GENERAL-MATHEMATICS-1.pdf": "general_math",
+    "SDO_Navotas_Bus.Math_SHS_1stSem.FV-5.pdf": "business_math",
+    "SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": "business_math",
+    "SDO_Navotas_STAT_PROB_SHS_1stSem.FV-3.pdf": "stat_prob",
+    "SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf": "stat_prob",
+    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV-4.pdf": "org_management",
+    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": "org_management",
+}
+QUARTER_HINTS = {
+    1: ["q1", "quarter 1", "business", "finance", "arithmetic sequence", "geometric sequence", "series"],
+    2: ["q2", "quarter 2", "measurement", "conversion", "functions", "piecewise", "statistics"],
+    3: ["q3", "quarter 3", "trigonometry", "practical measurements", "random variable", "sampling"],
+    4: ["q4", "quarter 4", "compound interest", "annuities", "loan", "hypothesis testing", "linear regression", "logic"],
+}
+DOMAIN_HINTS = {
+    "NA": ["number", "algebra", "sequence", "series", "interest", "annuity", "loan", "logic"],
+    "MG": ["measurement", "geometry", "trigonometry", "graph", "function", "piecewise"],
+    "DP": ["data", "probability", "statistics", "random variable", "sampling", "hypothesis", "regression"],
+}
+CHUNK_TYPE_HINTS = {
+    "learning_competency": ["learning competency", "code", "most essential learning", "melc", "competency"],
+    "example_problem": ["example", "solve", "problem", "exercise", "activity"],
+    "content_explanation": ["discussion", "content", "concept", "definition", "explain"],
+}
+def _norm(text: str) -> str:
+    return re.sub(r"\s+", " ", text.strip().lower())
+def infer_quarter(text: str) -> int:
+    probe = _norm(text)
+    for quarter, hints in QUARTER_HINTS.items():
+        if any(h in probe for h in hints):
+            return quarter
+    return 1
+def infer_domain(text: str) -> str:
+    probe = _norm(text)
+    scores: Dict[str, int] = {}
+    for domain, hints in DOMAIN_HINTS.items():
+        scores[domain] = sum(1 for hint in hints if hint in probe)
+    return max(scores, key=scores.get) if any(scores.values()) else "NA"
+def infer_chunk_type(text: str) -> str:
+    probe = _norm(text)
+    scores: Dict[str, int] = {}
+    for chunk_type, hints in CHUNK_TYPE_HINTS.items():
+        scores[chunk_type] = sum(1 for hint in hints if hint in probe)
+    best = max(scores, key=scores.get)
+    return best if scores[best] > 0 else "content_explanation"
+def extract_pdf_pages(pdf_path: Path) -> List[Dict[str, object]]:
+    rows: List[Dict[str, object]] = []
+    with pdfplumber.open(str(pdf_path)) as pdf:
+        for page_index, page in enumerate(pdf.pages, start=1):
+            page_text = page.extract_text() or ""
+            table_lines: List[str] = []
+            for table in page.extract_tables() or []:
+                for row in table:
+                    cells = [str(cell).strip() for cell in (row or []) if str(cell or "").strip()]
+                    if cells:
+                        table_lines.append(" | ".join(cells))
+            combined = "\n".join([page_text, "\n".join(table_lines)]).strip()
+            if combined:
+                rows.append({"page": page_index, "text": combined})
+    return rows
+def chunk_text(page_text: str) -> List[str]:
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=2000,
+        chunk_overlap=200,
+        separators=["\n\n", "\n", ". ", " ", ""],
+    )
+    return [chunk.strip() for chunk in splitter.split_text(page_text) if chunk.strip()]
+def _ensure_curriculum_pdfs() -> List[Path]:
+    pdf_files = sorted(CURRICULUM_DIR.glob("*.pdf"))
+    if pdf_files:
+        return pdf_files
+    if not CURRICULUM_SOURCE_REPO_ID:
+        raise SystemExit(
+            "No PDF files found in datasets/curriculum/ and CURRICULUM_SOURCE_REPO_ID is not set. "
+            "Upload the PDFs to a Hugging Face repo and point CURRICULUM_SOURCE_REPO_ID at it."
+        )
+    snapshot_dir = Path(
+        snapshot_download(
+            repo_id=CURRICULUM_SOURCE_REPO_ID,
+            repo_type=CURRICULUM_SOURCE_REPO_TYPE,
+            revision=CURRICULUM_SOURCE_REVISION,
+            allow_patterns=["*.pdf", "**/*.pdf"],
+        )
+    )
+    source_pdfs = sorted(snapshot_dir.rglob("*.pdf"))
+    if not source_pdfs:
+        raise SystemExit(
+            f"No PDF files found in Hugging Face repo {CURRICULUM_SOURCE_REPO_TYPE}:{CURRICULUM_SOURCE_REPO_ID}@{CURRICULUM_SOURCE_REVISION}."
+        )
+    CURRICULUM_DIR.mkdir(parents=True, exist_ok=True)
+    for source_pdf in source_pdfs:
+        target_pdf = CURRICULUM_DIR / source_pdf.name
+        target_pdf.write_bytes(source_pdf.read_bytes())
+    return sorted(CURRICULUM_DIR.glob("*.pdf"))
 def main() -> None:
+    if not CURRICULUM_DIR.exists():
+        raise SystemExit(f"Missing curriculum directory: {CURRICULUM_DIR}")
+    pdf_files = _ensure_curriculum_pdfs()
+    if not pdf_files:
+        raise SystemExit("No PDF files found in datasets/curriculum/")
+    VECTORSTORE_DIR.mkdir(parents=True, exist_ok=True)
+    documents: List[str] = []
+    metadatas: List[Dict[str, object]] = []
+    ids: List[str] = []
+    per_subject = Counter()
+    per_quarter = Counter()
+    per_domain = Counter()
+    for pdf_file in pdf_files:
+        subject = SUBJECT_MAP.get(pdf_file.name, "general_math")
+        page_rows = extract_pdf_pages(pdf_file)
+        for page_row in page_rows:
+            page_number = int(page_row["page"])
+            text = str(page_row["text"])
+            for idx, chunk in enumerate(chunk_text(text), start=1):
+                quarter = infer_quarter(chunk)
+                domain = infer_domain(chunk)
+                chunk_type = infer_chunk_type(chunk)
                 metadata = {
                     "subject": subject,
                     "quarter": quarter,
+                    "content_domain": domain,
                     "chunk_type": chunk_type,
+                    "source_file": pdf_file.name,
+                    "page": page_number,
                 }
+                chunk_id = f"{pdf_file.stem}-{page_number}-{idx}"
+                documents.append(chunk)
                 metadatas.append(metadata)
                 ids.append(chunk_id)
+                per_subject[subject] += 1
+                per_quarter[str(quarter)] += 1
+                per_domain[domain] += 1
+    embedder = SentenceTransformer(EMBED_MODEL_NAME)
+    embeddings = embedder.encode(documents, show_progress_bar=True).tolist()
+    client = chromadb.PersistentClient(path=str(VECTORSTORE_DIR))
+    existing = [c.name for c in client.list_collections()]
+    if COLLECTION_NAME in existing:
+        client.delete_collection(COLLECTION_NAME)
+    collection = client.create_collection(name=COLLECTION_NAME)
+    collection.add(ids=ids, documents=documents, metadatas=metadatas, embeddings=embeddings)
+    summary = {
+        "lastIngested": datetime.now(timezone.utc).isoformat(),
+        "totalChunks": len(documents),
+        "chunksPerSubject": dict(per_subject),
+        "chunksPerQuarter": dict(per_quarter),
+        "chunksPerDomain": dict(per_domain),
+        "sourceFiles": [pdf.name for pdf in pdf_files],
+    }
+    (VECTORSTORE_DIR / "ingest_summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
+    print("=== Curriculum Ingestion Summary ===")
+    print(f"Total chunks: {summary['totalChunks']}")
+    print("Chunks per subject:")
+    for subject, count in sorted(per_subject.items()):
+        print(f"  - {subject}: {count}")
+    print("Chunks per quarter:")
+    for quarter, count in sorted(per_quarter.items()):
+        print(f"  - Q{quarter}: {count}")
+    print("Chunks per domain:")
+    for domain, count in sorted(per_domain.items()):
+        print(f"  - {domain}: {count}")
 if __name__ == "__main__":
+    main()

scripts/ingest_from_storage.py DELETED Viewed

@@ -1,285 +0,0 @@
-"""
-Ingest curriculum PDFs from Firebase Storage into ChromaDB.
-Run: python -m backend.scripts.ingest_from_storage
-"""
-from __future__ import annotations
-import logging
-import os
-import sys
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-logger = logging.getLogger("mathpulse.ingest")
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-from rag.firebase_storage_loader import (
-    PDF_METADATA,
-    download_pdf_from_storage,
-    list_curriculum_blobs,
-)
-_CONTENT_DOMAIN_CLASSIFIERS = [
-    ("introduction", ["introduction", "welcome", "overview", "objectives", "learning objectives"]),
-    ("key_concepts", ["key concepts", "key ideas", "main concepts", "definitions", "key terms"]),
-    ("worked_examples", ["example", "worked example", "illustrative example", "sample problem", "solution"]),
-    ("important_notes", ["important", "note", "remember", "tip", "caution", "warning", "key point"]),
-    ("practice", ["practice", "exercise", "try it", "your turn", "activity", "problem set"]),
-    ("summary", ["summary", "recap", "key takeaways", "wrap-up", "conclusion"]),
-    ("assessment", ["assessment", "quiz", "test", "evaluation", "exam"]),
-]
-_CONTENT_TYPE_CLASSIFIERS = [
-    ("definition", ["definition", "define", "means", "is defined as"]),
-    ("formula", ["formula", "equation", "expression", "rule"]),
-    ("procedure", ["step", "method", "how to", "procedure", "process"]),
-    ("concept", ["concept", "idea", "principle", "theory"]),
-    ("application", ["application", "use", "example", "solve", "problem"]),
-]
-def _classify_chunk(content: str) -> tuple[str, str]:
-    content_lower = content.lower()
-    content_domain = "general"
-    chunk_type = "concept"
-    for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
-        if any(kw in content_lower for kw in keywords):
-            content_domain = domain
-            break
-    for ctype, keywords in _CONTENT_TYPE_CLASSIFIERS:
-        if any(kw in content_lower for kw in keywords):
-            chunk_type = ctype
-            break
-    return content_domain, chunk_type
-def _classify_lesson_section(content: str) -> str:
-    content_lower = content.lower().strip()
-    first_sentence = content_lower[:200]
-    for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
-        if any(kw in first_sentence for kw in keywords):
-            return domain
-    return "general"
-def chunk_text_preserve_pages(text: str, page_starts: List[int], chunk_size: int = 500, overlap: int = 80) -> List[Dict[str, Any]]:
-    """Split text into overlapping chunks, preserving page traceability."""
-    # Filter out None/empty entries that can result from malformed PDF text extraction
-    words = [w for w in text.split() if w is not None and str(w).strip()]
-    chunks = []
-    i = 0
-    chunk_idx = 0
-    while i < len(words):
-        chunk_words = words[i : i + chunk_size]
-        chunk_text = " ".join(str(w) for w in chunk_words)
-        estimated_page = max(1, (i // chunk_size) + 1)
-        content_domain, chunk_type = _classify_chunk(chunk_text)
-        chunks.append({
-            "text": chunk_text,
-            "chunk_index": chunk_idx,
-            "estimated_page": estimated_page,
-            "content_domain": content_domain,
-            "chunk_type": chunk_type,
-        })
-        i += chunk_size - overlap
-        chunk_idx += 1
-    return chunks
-def extract_pdf_text_and_pages(pdf_bytes: bytes) -> tuple[str, List[int]]:
-    """Extract text from PDF bytes, returning full text and page start positions."""
-    try:
-        from pypdf import PdfReader
-    except ImportError:
-        try:
-            import PyPDF2 as PdfReaderModule
-            from PyPDF2 import PdfReader
-        except ImportError:
-            logger.error("No PDF library available. Install: pip install pypdf")
-            return "", []
-    import io
-    reader = PdfReader(io.BytesIO(pdf_bytes))
-    pages: List[str] = []
-    for page in reader.pages:
-        text = page.extract_text() or ""
-        pages.append(text)
-    page_starts = []
-    position = 0
-    for page_text in pages:
-        page_starts.append(position)
-        position += len(page_text) + 1
-    full_text = "\n".join(pages)
-    return full_text, page_starts
-def get_firestore_client():
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-            sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-            bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-            if sa_json:
-                import json as _json
-                from firebase_admin import credentials
-                creds = credentials.Certificate(_json.loads(sa_json))
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            elif sa_file and Path(sa_file).exists():
-                from firebase_admin import credentials
-                creds = credentials.Certificate(sa_file)
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            else:
-                firebase_admin.initialize_app(options={"storageBucket": bucket_name})
-        return firestore.client()
-    except Exception as e:
-        logger.warning("Firestore unavailable: %s", e)
-        return None
-def ingest_from_firebase_storage(force_reindex: bool = False):
-    """Download PDFs from Firebase Storage and ingest into ChromaDB."""
-    try:
-        from sentence_transformers import SentenceTransformer
-        import chromadb
-    except ImportError:
-        logger.error("Missing dependencies. Install: pip install chromadb sentence-transformers pypdf")
-        return
-    chroma_path = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
-    chroma_client = chromadb.PersistentClient(path=chroma_path)
-    collection = chroma_client.get_or_create_collection(
-        name="curriculum_chunks",
-        metadata={"hnsw:space": "cosine"},
-    )
-    embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
-    db = get_firestore_client()
-    logger.info("Starting ingestion from Firebase Storage...")
-    ingested_count = 0
-    skipped_count = 0
-    error_count = 0
-    for storage_path, metadata in PDF_METADATA.items():
-        doc_id = storage_path.replace("/", "_").replace(".pdf", "")
-        if db:
-            try:
-                doc_ref = db.collection("curriculumDocuments").document(doc_id)
-                existing = doc_ref.get()
-                if existing.exists:
-                    if not force_reindex and existing.to_dict().get("status") == "ingested":
-                        logger.info("[SKIP] %s already ingested", storage_path)
-                        skipped_count += 1
-                        continue
-            except Exception as e:
-                logger.warning("Firestore check failed for %s: %s", storage_path, e)
-        logger.info("Downloading: %s", storage_path)
-        pdf_bytes = download_pdf_from_storage(storage_path)
-        if pdf_bytes is None:
-            logger.error("[ERROR] Failed to download: %s", storage_path)
-            if db:
-                try:
-                    doc_ref.set({
-                        "storagePath": storage_path,
-                        "status": "failed",
-                        "error": "download_failed",
-                        **metadata,
-                    }, merge=True)
-                except:
-                    pass
-            error_count += 1
-            continue
-        logger.info("Extracting text from: %s (%d bytes)", storage_path, len(pdf_bytes))
-        full_text, page_starts = extract_pdf_text_and_pages(pdf_bytes)
-        if not full_text.strip():
-            logger.warning("[WARN] No text extracted from: %s", storage_path)
-            error_count += 1
-            continue
-        chunks = chunk_text_preserve_pages(full_text, page_starts)
-        logger.info("  -> %d chunks created", len(chunks))
-        existing_ids = [cid for cid in collection.get()["ids"] if cid.startswith(f"{doc_id}_chunk_")]
-        if existing_ids:
-            collection.delete(ids=existing_ids)
-            logger.info("  Removed %d existing chunks", len(existing_ids))
-        for chunk in chunks:
-            chunk_text = chunk.get("text", "")
-            if not isinstance(chunk_text, str) or not chunk_text.strip():
-                logger.warning("  Skipping empty/invalid chunk %s (type=%s, len=%d)", chunk.get("chunk_index"), type(chunk_text), len(chunk_text))
-                continue
-            chunk_id = f"{doc_id}_chunk_{chunk['chunk_index']}"
-            try:
-                embedding = embedder.encode(chunk_text, normalize_embeddings=True).tolist()
-            except Exception as enc_err:
-                logger.warning("  Skipping unencodable chunk %s: %s", chunk.get("chunk_index"), enc_err)
-                continue
-            collection.add(
-                embeddings=[embedding],
-                documents=[chunk_text],
-                metadatas=[{
-                    "document_id": doc_id,
-                    "module_id": metadata.get("subjectId", ""),
-                    "lesson_id": f"lesson-{doc_id}",
-                    "title": metadata.get("subject", ""),
-                    "subject": metadata.get("subject", ""),
-                    "subjectId": metadata.get("subjectId", ""),
-                    "quarter": metadata.get("quarter", 1),
-                    "competency_code": metadata.get("competency_code", ""),
-                    "content_domain": chunk["content_domain"],
-                    "chunk_type": chunk["chunk_type"],
-                    "source_file": storage_path.split("/")[-1],
-                    "storage_path": storage_path,
-                    "page": chunk["estimated_page"],
-                    "chunk_index": chunk["chunk_index"],
-                    "type": metadata.get("type", ""),
-                }],
-                ids=[chunk_id],
-            )
-        if db:
-            try:
-                doc_ref.set({
-                    "id": doc_id,
-                    "storagePath": storage_path,
-                    "status": "ingested",
-                    "ingestedAt": __import__("firebase_admin").firestore.SERVER_TIMESTAMP,
-                    "chunkCount": len(chunks),
-                    **metadata,
-                }, merge=True)
-            except Exception as e:
-                logger.warning("Firestore update failed: %s", e)
-        logger.info("[OK] Ingested %s (%d chunks)", storage_path, len(chunks))
-        ingested_count += 1
-    logger.info("=" * 50)
-    logger.info("Ingestion complete: %d ingested, %d skipped, %d errors", ingested_count, skipped_count, error_count)
-    logger.info("Total chunks in ChromaDB: %d", collection.count())
-if __name__ == "__main__":
-    import argparse
-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-    parser = argparse.ArgumentParser(description="Ingest curriculum PDFs from Firebase Storage into ChromaDB")
-    parser.add_argument("--force", action="store_true", help="Re-ingest even if already ingested")
-    args = parser.parse_args()
-    ingest_from_firebase_storage(force_reindex=args.force)

scripts/migrate_grade12_to_grade11.py DELETED Viewed

@@ -1,107 +0,0 @@
-"""
-Migrate Grade 12 users to Grade 11.
-Run this to convert all existing Grade 12 users to Grade 11:
-    python backend/scripts/migrate_grade12_to_grade11.py
-This handles:
-- Firestore user profiles
-- Progress records
-- Any references to Grade 12
-"""
-import logging
-import os
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-logger = logging.getLogger(__name__)
-def migrate_grade_12_to_grade_11():
-    """Migrate all Grade 12 users to Grade 11."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        svc_account = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-        if svc_account:
-            import json
-            from firebase_admin import credentials
-            creds = credentials.Certificate(json.loads(svc_account))
-            firebase_admin.initialize_app(creds)
-        else:
-            firebase_admin.initialize_app()
-        db = firestore.client()
-        print("Firebase initialized")
-    except Exception as e:
-        print(f"Failed to initialize Firebase: {e}")
-        return
-    # Count migrations
-    users_migrated = 0
-    progress_migrated = 0
-    # Migrate users collection
-    print("\n--- Migrating users ---")
-    users_ref = db.collection("users")
-    # Batch update for users
-    batch = db.batch()
-    user_count = 0
-    for doc in users_ref.stream():
-        data = doc.to_dict()
-        if data.get("grade") == "Grade 12":
-            batch.update(doc.reference, {"grade": "Grade 11"})
-            user_count += 1
-            print(f"  Migrating user: {doc.id} ({data.get('name', 'Unknown')})")
-            if user_count >= 500:
-                batch.commit()
-                users_migrated += user_count
-                user_count = 0
-                batch = db.batch()
-    if user_count > 0:
-        batch.commit()
-        users_migrated += user_count
-    print(f"  => Migrated {users_migrated} users to Grade 11")
-    # Migrate progress collection
-    print("\n--- Migrating progress ---")
-    progress_ref = db.collection("progress")
-    batch = db.batch()
-    progress_count = 0
-    for doc in progress_ref.stream():
-        data = doc.to_dict()
-        if data.get("gradeLevel") == "Grade 12":
-            batch.update(doc.reference, {"gradeLevel": "Grade 11"})
-            progress_count += 1
-            if progress_count >= 500:
-                batch.commit()
-                progress_migrated += progress_count
-                progress_count = 0
-                batch = db.batch()
-    if progress_count > 0:
-        batch.commit()
-        progress_migrated += progress_count
-    print(f"  => Migrated {progress_migrated} progress records to Grade 11")
-    print(f"\n=== Migration complete ===")
-    print(f"Users migrated: {users_migrated}")
-    print(f"Progress migrated: {progress_migrated}")
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    migrate_grade_12_to_grade_11()

scripts/register_firestore_metadata.py DELETED Viewed

@@ -1,183 +0,0 @@
-"""
-Register curriculum document metadata in Firestore.
-Populates the curriculumDocuments collection so the app can display
-lessons mapped to their source PDFs before ingestion.
-Run: python backend/scripts/register_firestore_metadata.py
-"""
-from __future__ import annotations
-import os
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-def _get_firestore_client():
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-            sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-            bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-            if sa_json:
-                import json as _json
-                from firebase_admin import credentials
-                creds = credentials.Certificate(_json.loads(sa_json))
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            elif sa_file and Path(sa_file).exists():
-                from firebase_admin import credentials
-                creds = credentials.Certificate(sa_file)
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            else:
-                firebase_admin.initialize_app(options={"storageBucket": bucket_name})
-        return firestore.client()
-    except Exception as e:
-        print(f"Firestore init failed: {e}")
-        return None
-CURRICULUM_DOCUMENTS = [
-    {
-        "id": "gm_lesson_1",
-        "moduleId": "gm-q1-business-finance",
-        "lessonId": "gm-q1-bf-1",
-        "title": "Represent business transactions and financial goals using variables and equations.",
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "quarter": 1,
-        "competencyCode": "GM11-BF-1",
-        "learningCompetency": "Represent business transactions and financial goals using variables and equations.",
-        "storagePath": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "gm_navotas_lesson_1",
-        "moduleId": "gm-q1-patterns-sequences-series",
-        "lessonId": "gm-q1-pss-1",
-        "title": "Identify and describe arithmetic and geometric patterns in data.",
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "quarter": 1,
-        "competencyCode": "GM11-PSS-1",
-        "learningCompetency": "Identify and describe arithmetic and geometric patterns in data.",
-        "storagePath": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "bm_lesson_1",
-        "moduleId": "bm-q1-business-math",
-        "lessonId": "bm-q1-1",
-        "title": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
-        "subject": "Business Mathematics",
-        "subjectId": "business-math",
-        "quarter": 1,
-        "competencyCode": "ABM_BM11BS-Ia-b-1",
-        "learningCompetency": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
-        "storagePath": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "stat_lesson_1",
-        "moduleId": "stat-q1-probability",
-        "lessonId": "stat-q1-1",
-        "title": "Define and describe random variables and their types.",
-        "subject": "Statistics and Probability",
-        "subjectId": "stats-prob",
-        "quarter": 1,
-        "competencyCode": "SP_SHS11-Ia-1",
-        "learningCompetency": "Define and describe random variables and their types.",
-        "storagePath": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "fm1_lesson_1",
-        "moduleId": "fm1-q1-counting",
-        "lessonId": "fm1-q1-fpc-1",
-        "title": "Apply the fundamental counting principle in contextual problems.",
-        "subject": "Finite Mathematics 1",
-        "subjectId": "finite-math-1",
-        "quarter": 1,
-        "competencyCode": "FM1-SHS11-Ia-1",
-        "learningCompetency": "Apply the fundamental counting principle in contextual problems.",
-        "storagePath": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "fm2_lesson_1",
-        "moduleId": "fm2-q1-matrices",
-        "lessonId": "fm2-q1-matrices-1",
-        "title": "Represent contextual data using matrix notation.",
-        "subject": "Finite Mathematics 2",
-        "subjectId": "finite-math-2",
-        "quarter": 1,
-        "competencyCode": "FM2-SHS11-Ia-1",
-        "learningCompetency": "Represent contextual data using matrix notation.",
-        "storagePath": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "org_mgmt_lesson_1",
-        "moduleId": "org-mgmt-q1",
-        "lessonId": "org-mgmt-q1-1",
-        "title": "Understand the fundamental concepts of organization and management.",
-        "subject": "Organization and Management",
-        "subjectId": "org-mgmt",
-        "quarter": 1,
-        "competencyCode": "ABM_OM11-Ia-1",
-        "learningCompetency": "Understand the fundamental concepts of organization and management.",
-        "storagePath": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
-        "status": "uploaded",
-    },
-]
-def register_metadata(force: bool = False):
-    db = _get_firestore_client()
-    if db is None:
-        print("ERROR: Cannot connect to Firestore. Check credentials.")
-        print("Set FIREBASE_SERVICE_ACCOUNT_JSON or place mathpulse-sa.json in backend/ directory.")
-        return
-    print("Connected to Firestore.")
-    print("-" * 50)
-    registered = 0
-    skipped = 0
-    updated = 0
-    for doc in CURRICULUM_DOCUMENTS:
-        doc_id = doc["id"]
-        doc_ref = db.collection("curriculumDocuments").document(doc_id)
-        existing = doc_ref.get()
-        if existing.exists and not force:
-            print(f"[SKIP] {doc_id} already registered")
-            skipped += 1
-            continue
-        if existing.exists and force:
-            updated += 1
-        else:
-            registered += 1
-        data = {
-            **doc,
-            "uploadedAt": None,
-        }
-        doc_ref.set(data, merge=True)
-        print(f"[OK]  {'Updated' if force and existing.exists else 'Registered'} {doc_id} -> {doc.get('storagePath')}")
-    print("-" * 50)
-    print(f"Done: {registered} registered, {skipped} skipped, {updated} updated.")
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Register curriculum document metadata in Firestore")
-    parser.add_argument("--force", action="store_true", help="Overwrite existing records")
-    args = parser.parse_args()
-    register_metadata(force=args.force)

scripts/seed_curriculum.py DELETED Viewed

@@ -1,64 +0,0 @@
-"""
-Seed Firestore curriculum collection from static data.
-Run this ONCE to migrate static curriculum to Firestore:
-    python backend/scripts/seed_curriculum.py
-After seeding, the curriculum API will read from Firestore.
-"""
-import logging
-import json
-import os
-import sys
-from pathlib import Path
-# Add backend to path
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-from services.curriculum_service import _STATIC_SUBJECTS
-logger = logging.getLogger(__name__)
-def seed_curriculum():
-    """Seed curriculum subjects to Firestore."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore, credentials
-        # Initialize Firebase
-        svc_account = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-        if svc_account:
-            sa_creds = credentials.Certificate(json.loads(svc_account))
-            firebase_admin.initialize_app(sa_creds)
-        else:
-            firebase_admin.initialize_app()
-        db = firestore.client()
-        print("Firebase initialized")
-    except Exception as e:
-        print(f"Failed to initialize Firebase: {e}")
-        return
-    # Seed new subjects
-    subjects_ref = db.collection("subjects")
-    count = 0
-    for subject in _STATIC_SUBJECTS:
-        doc_ref = subjects_ref.document(subject["id"])
-        doc_ref.set(subject)
-        count += 1
-        print(f"  Seeded: {subject['id']} - {subject['name']} ({len(subject.get('topics', []))} topics)")
-    print(f"\nSeeded {count} subjects to Firestore")
-    print("\nCurriculum is now available at:")
-    print("  GET /api/curriculum/subjects")
-    print("  GET /api/curriculum/subjects/{id}")
-    print("  GET /api/curriculum/subjects/{id}/topics")
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    seed_curriculum()

scripts/upload_curriculum_pdfs.py DELETED Viewed

@@ -1,264 +0,0 @@
-"""
-Upload DepEd curriculum PDFs to Firebase Storage.
-Run once during initial setup: python scripts/upload_curriculum_pdfs.py
-"""
-from __future__ import annotations
-import os
-import sys
-from pathlib import Path
-from typing import Dict, List
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-LOCAL_PDF_DIR = r"C:\Users\Deign\Downloads\Documents"
-PDF_METADATA: Dict[str, Dict[str, object]] = {
-    "GENERAL-MATHEMATICS-1.pdf": {
-        "subject": "General Mathematics",
-        "type": "curriculum_guide",
-        "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
-        "quarters": ["Q1", "Q2", "Q3", "Q4"],
-        "storage_path": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
-    },
-    "Finite-Mathematics-1-1.pdf": {
-        "subject": "Finite Mathematics 1",
-        "type": "curriculum_guide",
-        "strand": ["STEM", "ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
-    },
-    "Finite-Mathematics-2-1.pdf": {
-        "subject": "Finite Mathematics 2",
-        "type": "curriculum_guide",
-        "strand": ["STEM", "ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
-    },
-    "SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
-        "subject": "General Mathematics",
-        "type": "sdo_module",
-        "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
-    },
-    "SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": {
-        "subject": "Business Mathematics",
-        "type": "sdo_module",
-        "strand": ["ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
-    },
-    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": {
-        "subject": "Organization and Management",
-        "type": "sdo_module",
-        "strand": ["ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
-    },
-    "SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf": {
-        "subject": "Statistics and Probability",
-        "type": "sdo_module",
-        "strand": ["STEM", "ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf",
-    },
-}
-def chunk_text(text: str, chunk_size: int = 600, overlap: int = 100) -> List[str]:
-    """Split text into overlapping chunks."""
-    words = text.split()
-    chunks: List[str] = []
-    i = 0
-    while i < len(words):
-        chunk = " ".join(words[i : i + chunk_size])
-        chunks.append(chunk)
-        i += chunk_size - overlap
-    return chunks
-def upload_pdfs():
-    """Upload PDFs from local directory to Firebase Storage."""
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage, firestore
-    except ImportError:
-        print("ERROR: firebase-admin not installed. Run: pip install firebase-admin")
-        return
-    service_account_path = Path(__file__).resolve().parents[1] / "serviceAccountKey.json"
-    if not service_account_path.exists():
-        print(f"ERROR: Service account key not found at {service_account_path}")
-        return
-    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "").strip()
-    if not bucket_name:
-        print("ERROR: FIREBASE_STORAGE_BUCKET not set in environment")
-        return
-    cred = credentials.Certificate(str(service_account_path))
-    firebase_admin.initialize_app(cred, {"storageBucket": bucket_name})
-    bucket = storage.bucket()
-    db = firestore.client()
-    print(f"Scanning: {LOCAL_PDF_DIR}")
-    print("-" * 50)
-    uploaded = 0
-    skipped = 0
-    for filename, meta in PDF_METADATA.items():
-        local_path = Path(LOCAL_PDF_DIR) / filename
-        if not local_path.exists():
-            print(f"[SKIP] {filename} not found in {LOCAL_PDF_DIR}")
-            skipped += 1
-            continue
-        doc_ref = db.collection("curriculumDocs").document(filename)
-        if doc_ref.get().exists:
-            print(f"[SKIP] {filename} already uploaded")
-            skipped += 1
-            continue
-        try:
-            blob = bucket.blob(meta["storage_path"])
-            blob.upload_from_filename(str(local_path), content_type="application/pdf")
-            doc_ref.set(
-                {
-                    "filename": filename,
-                    "subject": meta["subject"],
-                    "type": meta["type"],
-                    "strand": meta["strand"],
-                    "quarters": meta["quarters"],
-                    "storage_path": meta["storage_path"],
-                    "uploaded_at": firestore.SERVER_TIMESTAMP,
-                    "indexed": False,
-                }
-            )
-            print(f"[OK] Uploaded {filename}")
-            uploaded += 1
-        except Exception as e:
-            print(f"[ERROR] {filename}: {e}")
-    print("-" * 50)
-    print(f"Upload complete: {uploaded} uploaded, {skipped} skipped")
-def index_pdfs():
-    """Extract text from PDFs, chunk, embed, and store in ChromaDB."""
-    try:
-        from pypdf import PdfReader
-        import chromadb
-        from sentence_transformers import SentenceTransformer
-        from firebase_admin import firestore
-    except ImportError:
-        print("ERROR: Missing dependencies. Run: pip install pypdf chromadb sentence-transformers firebase-admin")
-        return
-    chroma_path = os.getenv("CHROMA_PERSIST_PATH", "./datasets/vectorstore")
-    chroma_client = chromadb.PersistentClient(path=chroma_path)
-    collection = chroma_client.get_or_create_collection(
-        name="curriculum_chunks",
-        metadata={"hnsw:space": "cosine"},
-    )
-    embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
-    try:
-        import firebase_admin
-        from firebase_admin import firestore as FS
-        db = FS.client()
-    except Exception:
-        db = None
-    print(f"Indexing PDFs from: {LOCAL_PDF_DIR}")
-    print("-" * 50)
-    indexed = 0
-    skipped = 0
-    for filename, meta in PDF_METADATA.items():
-        if db:
-            doc_ref = db.collection("curriculumDocs").document(filename)
-            doc = doc_ref.get()
-            if doc and doc.to_dict().get("indexed", False):
-                print(f"[SKIP] {filename} already indexed")
-                skipped += 1
-                continue
-        local_path = Path(LOCAL_PDF_DIR) / filename
-        if not local_path.exists():
-            print(f"[SKIP] {filename} not found")
-            skipped += 1
-            continue
-        try:
-            reader = PdfReader(str(local_path))
-            full_text = "\n".join(page.extract_text() or "" for page in reader.pages)
-            if not full_text.strip():
-                print(f"[WARN] {filename} has no extractable text")
-                continue
-            chunks = chunk_text(full_text)
-            print(f"[INFO] {filename} -> {len(chunks)} chunks")
-            for i, chunk in enumerate(chunks):
-                chunk_id = f"{filename}_chunk_{i}"
-                existing = collection.get(ids=[chunk_id])
-                if existing and existing.get("ids"):
-                    continue
-                chunk_embedding = embedder.encode(
-                    chunk,
-                    normalize_embeddings=True,
-                ).tolist()
-                collection.add(
-                    embeddings=[chunk_embedding],
-                    documents=[chunk],
-                    metadatas=[
-                        {
-                            "source_file": filename,
-                            "subject": meta["subject"],
-                            "strand": ",".join(meta["strand"]),
-                            "quarter": ",".join(meta["quarters"]),
-                            "chunk_index": i,
-                            "type": meta["type"],
-                        }
-                    ],
-                    ids=[chunk_id],
-                )
-            if db:
-                doc_ref.update({"indexed": True})
-            print(f"[OK] Indexed {filename}")
-            indexed += 1
-        except Exception as e:
-            print(f"[ERROR] {filename}: {e}")
-    print("-" * 50)
-    print(f"Indexing complete: {indexed} indexed, {skipped} skipped")
-    print(f"Total chunks in ChromaDB: {collection.count()}")
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Upload and index DepEd curriculum PDFs")
-    parser.add_argument("action", choices=["upload", "index", "both"], help="Action to perform")
-    args = parser.parse_args()
-    if args.action in ("upload", "both"):
-        upload_pdfs()
-    if args.action in ("index", "both"):
-        index_pdfs()

scripts/upload_lesson_modules.py DELETED Viewed

@@ -1,142 +0,0 @@
-"""
-Merge DepEd lesson module PDFs and upload to Firebase Storage.
-Run: python backend/scripts/upload_lesson_modules.py
-"""
-from __future__ import annotations
-import os
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-from pypdf import PdfWriter, PdfReader
-LOCAL_MODULES_DIR = Path(__file__).resolve().parents[1].parent / "datasets" / "lesson_modules"
-FIREBASE_STORAGE_BUCKET = "mathpulse-ai-2026.firebasestorage.app"
-# Upload plan
-UPLOAD_JOBS = [
-    {
-        "id": "basic-calc-q3",
-        "display_name": "Basic Calculus Q3",
-        "subject": "Basic Calculus",
-        "subjectId": "basic-calc",
-        "quarter": 3,
-        "storage_path": "curriculum/basic_calc/SDO_Navotas_BasicCalc_SHS_Q3.FV.pdf",
-        "local_dir": LOCAL_MODULES_DIR / "basic_calculus_q3",
-        "filename": "Basic Calculus-Q3-Module-{n}.pdf",
-        "modules": list(range(1, 9)),  # Modules 1-8
-    },
-    {
-        "id": "gen-math-q2",
-        "display_name": "General Mathematics Q2",
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "quarter": 2,
-        "storage_path": "curriculum/gen_math_q2/SDO_Navotas_GenMath_SHS_Q2.FV.pdf",
-        "local_dir": LOCAL_MODULES_DIR / "genmath_q2",
-        "filename": "genmath_q2_mod{n}_*.pdf",
-        "modules": [2, 3],  # Modules 2 and 3 only
-    },
-]
-def merge_pdfs(job: dict) -> Path | None:
-    """Merge multiple PDFs into a single output file. Returns output path."""
-    output_dir = LOCAL_MODULES_DIR / "merged"
-    output_dir.mkdir(parents=True, exist_ok=True)
-    output_path = output_dir / f"{job['id']}_merged.pdf"
-    writer = PdfWriter()
-    for mod_num in job["modules"]:
-        if job["id"] == "basic-calc-q3":
-            fname = job["filename"].format(n=mod_num)
-        else:
-            # GenMath modules have specific naming
-            fname = None
-            pattern = job["filename"].format(n=mod_num)
-            for f in job["local_dir"].glob(pattern):
-                fname = f.name
-                break
-            if fname is None:
-                print(f"  [WARN] Could not find file for module {mod_num}")
-                continue
-        src_path = job["local_dir"] / fname
-        if not src_path.exists():
-            print(f"  [WARN] File not found: {src_path}")
-            continue
-        reader = PdfReader(str(src_path))
-        print(f"  + {src_path.name} ({len(reader.pages)} pages)")
-        for page in reader.pages:
-            writer.add_page(page)
-    print(f"  Writing {output_path.name} ({len(writer.pages)} total pages)")
-    with open(output_path, "wb") as f:
-        writer.write(f)
-    return output_path
-def upload_to_firebase(local_path: Path, storage_path: str) -> bool:
-    """Upload a PDF file to Firebase Storage."""
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage
-    except ImportError:
-        print("  ERROR: firebase-admin not installed")
-        return False
-    sa_file = Path(__file__).resolve().parents[1].parent / ".secrets" / "firebase-service-account.json"
-    if not sa_file.exists():
-        print(f"  ERROR: Service account not found at {sa_file}")
-        return False
-    if not firebase_admin._apps:
-        cred = credentials.Certificate(str(sa_file))
-        firebase_admin.initialize_app(cred, {"storageBucket": FIREBASE_STORAGE_BUCKET})
-    bucket = storage.bucket()
-    blob = bucket.blob(storage_path)
-    print(f"  Uploading to gs://{bucket.name}/{storage_path}")
-    blob.upload_from_filename(str(local_path), content_type="application/pdf")
-    print(f"  Upload complete!")
-    return True
-def main():
-    print("=" * 60)
-    print("MathPulse AI — Lesson Module PDF Uploader")
-    print("=" * 60)
-    for job in UPLOAD_JOBS:
-        print(f"\n[{job['display_name']}]")
-        print("-" * 40)
-        # Step 1: Merge PDFs
-        output_path = merge_pdfs(job)
-        if not output_path or not output_path.exists():
-            print(f"  [FAIL] Merge failed for {job['id']}")
-            continue
-        # Step 2: Upload to Firebase
-        success = upload_to_firebase(output_path, job["storage_path"])
-        if not success:
-            print(f"  [FAIL] Upload failed for {job['id']}")
-            continue
-        print(f"\n  SUCCESS: {job['display_name']}")
-        print(f"  Storage path: gs://{FIREBASE_STORAGE_BUCKET}/{job['storage_path']}")
-        print(f"  Pages: {len(PdfReader(str(output_path)).pages)}")
-    print("\n" + "=" * 60)
-    print("Done!")
-if __name__ == "__main__":
-    main()

scripts/upload_vectorstore_to_firebase.py DELETED Viewed

@@ -1,71 +0,0 @@
-"""
-Upload vectorstore directory to Firebase Storage.
-Run: python -m backend.scripts.upload_vectorstore_to_firebase
-"""
-from __future__ import annotations
-import logging
-import os
-import sys
-from pathlib import Path
-logger = logging.getLogger("mathpulse.upload_vectorstore")
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-from backend.rag.firebase_storage_loader import _init_firebase_storage
-VECTORSTORE_SOURCE_DIR = Path(__file__).resolve().parents[3] / "datasets" / "vectorstore"
-REMOTE_PREFIX = "vectorstore/"
-def upload_directory(local_dir: Path, bucket, prefix: str):
-    """Recursively upload a local directory to Firebase Storage prefix."""
-    uploaded = 0
-    skipped = 0
-    for root, dirs, files in os.walk(local_dir):
-        for filename in files:
-            local_path = Path(root) / filename
-            relative_path = local_path.relative_to(local_dir)
-            remote_path = f"{prefix}{relative_path.as_posix()}"
-            try:
-                blob = bucket.blob(remote_path)
-                blob.upload_from_filename(str(local_path))
-                logger.info("Uploaded: %s (%d bytes)", remote_path, local_path.stat().st_size)
-                uploaded += 1
-            except Exception as e:
-                logger.error("Failed to upload %s: %s", remote_path, e)
-                skipped += 1
-    return uploaded, skipped
-if __name__ == "__main__":
-    import argparse
-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-    parser = argparse.ArgumentParser(description="Upload vectorstore to Firebase Storage")
-    parser.add_argument("--source", type=str, default=str(VECTORSTORE_SOURCE_DIR),
-                        help="Local vectorstore directory")
-    parser.add_argument("--prefix", type=str, default=REMOTE_PREFIX,
-                        help="Remote path prefix in Firebase Storage")
-    args = parser.parse_args()
-    source_dir = Path(args.source)
-    if not source_dir.exists():
-        logger.error("Source directory does not exist: %s", source_dir)
-        sys.exit(1)
-    _, bucket = _init_firebase_storage()
-    if bucket is None:
-        logger.error("Firebase Storage not available")
-        sys.exit(1)
-    logger.info("Uploading vectorstore from %s to gs://%s/%s",
-                source_dir, bucket.name, args.prefix)
-    uploaded, skipped = upload_directory(source_dir, bucket, args.prefix)
-    logger.info("Upload complete: %d uploaded, %d skipped", uploaded, skipped)

services/__init__.py CHANGED Viewed

@@ -1,44 +1 @@
 """Backend service helpers for inference, logging, and integrations."""
-from .inference_client import (
-    create_default_client,
-    InferenceRequest,
-    InferenceClient,
-    is_sequential_model,
-    get_current_runtime_config,
-    get_model_for_task,
-    set_runtime_model_profile,
-    set_runtime_model_override,
-    reset_runtime_overrides,
-    model_supports_thinking,
-    _MODEL_PROFILES,
-)
-from .ai_client import (
-    get_deepseek_client,
-    CHAT_MODEL,
-    REASONER_MODEL,
-    APIError,
-    RateLimitError,
-    APITimeoutError,
-)
-__all__ = [
-    "create_default_client",
-    "InferenceRequest",
-    "InferenceClient",
-    "is_sequential_model",
-    "get_current_runtime_config",
-    "get_model_for_task",
-    "set_runtime_model_profile",
-    "set_runtime_model_override",
-    "reset_runtime_overrides",
-    "model_supports_thinking",
-    "_MODEL_PROFILES",
-    "get_deepseek_client",
-    "CHAT_MODEL",
-    "REASONER_MODEL",
-    "APIError",
-    "RateLimitError",
-    "APITimeoutError",
-]


1	"""Backend service helpers for inference, logging, and integrations."""

services/ai_client.py DELETED Viewed

@@ -1,28 +0,0 @@
-import os
-from openai import OpenAI, APIError, RateLimitError, APITimeoutError
-from functools import lru_cache
-__all__ = [
-    "get_deepseek_client",
-    "CHAT_MODEL",
-    "REASONER_MODEL",
-    "DEEPSEEK_BASE_URL",
-    "APIError",
-    "RateLimitError",
-    "APITimeoutError",
-]
-DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
-CHAT_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
-REASONER_MODEL = os.getenv("DEEPSEEK_REASONER_MODEL", "deepseek-reasoner")
-@lru_cache(maxsize=1)
-def get_deepseek_client() -> OpenAI:
-    api_key = os.getenv("DEEPSEEK_API_KEY")
-    if not api_key:
-        raise ValueError("DEEPSEEK_API_KEY environment variable not set")
-    return OpenAI(
-        api_key=api_key,
-        base_url=DEEPSEEK_BASE_URL,
-    )

services/curriculum_service.py DELETED Viewed

@@ -1,232 +0,0 @@
-"""
-Curriculum Service - Firestore-backed curriculum data.
-Fetches subjects, topics, and modules from Firestore.
-Falls back to static data if Firestore is unavailable.
-"""
-import logging
-import os
-from typing import Any, Dict, List, Optional
-logger = logging.getLogger(__name__)
-# Static curriculum data as fallback
-_STATIC_SUBJECTS = [
-    {
-        "id": "gen-math",
-        "code": "GEN MATH",
-        "name": "General Mathematics",
-        "gradeLevel": "Grade 11",
-        "semester": "1st Semester",
-        "color": "from-blue-500 to-cyan-500",
-        "pdfAvailable": True,
-        "topics": [
-            {"id": "gen-math-001", "name": "Patterns and Real-Life Relationships", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-002", "name": "Functions as Mathematical Models", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-003", "name": "Function Notation and Evaluation", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-004", "name": "Domain and Range of Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-005", "name": "Operations on Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-006", "name": "Composite Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-007", "name": "Inverse Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-008", "name": "Graphs of Rational Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-009", "name": "Graphs of Exponential Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-010", "name": "Graphs of Logarithmic Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-011", "name": "Simple and Compound Interest", "unit": "Financial Mathematics"},
-            {"id": "gen-math-012", "name": "Simple and General Annuities", "unit": "Financial Mathematics"},
-            {"id": "gen-math-013", "name": "Present and Future Value", "unit": "Financial Mathematics"},
-            {"id": "gen-math-014", "name": "Loans, Amortization, and Sinking Funds", "unit": "Financial Mathematics"},
-            {"id": "gen-math-015", "name": "Stocks, Bonds, and Market Indices", "unit": "Financial Mathematics"},
-            {"id": "gen-math-016", "name": "Business Decision-Making with Mathematical Models", "unit": "Financial Mathematics"},
-            {"id": "gen-math-017", "name": "Propositions and Logical Connectives", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-018", "name": "Truth Values and Truth Tables", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-019", "name": "Logical Equivalence and Implication", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-020", "name": "Quantifiers and Negation", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-021", "name": "Validity of Arguments", "unit": "Logic and Mathematical Reasoning"},
-        ]
-    },
-    {
-        "id": "stats-prob",
-        "code": "STAT&PROB",
-        "name": "Statistics and Probability",
-        "gradeLevel": "Grade 11",
-        "semester": "2nd Semester",
-        "color": "from-sky-500 to-cyan-500",
-        "pdfAvailable": True,
-        "topics": [
-            {"id": "stat-001", "name": "Random Variables", "unit": "Random Variables"},
-            {"id": "stat-002", "name": "Discrete Probability Distributions", "unit": "Random Variables"},
-            {"id": "stat-003", "name": "Mean and Variance of Discrete RV", "unit": "Random Variables"},
-            {"id": "stat-004", "name": "Normal Distribution", "unit": "Normal Distribution"},
-            {"id": "stat-005", "name": "Standard Normal Distribution and Z-scores", "unit": "Normal Distribution"},
-            {"id": "stat-006", "name": "Areas Under the Normal Curve", "unit": "Normal Distribution"},
-            {"id": "stat-007", "name": "Sampling Distributions", "unit": "Sampling and Estimation"},
-            {"id": "stat-008", "name": "Central Limit Theorem", "unit": "Sampling and Estimation"},
-            {"id": "stat-009", "name": "Point Estimation", "unit": "Sampling and Estimation"},
-            {"id": "stat-010", "name": "Confidence Intervals", "unit": "Sampling and Estimation"},
-            {"id": "stat-011", "name": "Hypothesis Testing Concepts", "unit": "Hypothesis Testing"},
-            {"id": "stat-012", "name": "T-test", "unit": "Hypothesis Testing"},
-            {"id": "stat-013", "name": "Z-test", "unit": "Hypothesis Testing"},
-            {"id": "stat-014", "name": "Correlation and Regression", "unit": "Correlation and Regression"},
-        ]
-    },
-    {
-        "id": "pre-calc",
-        "code": "PRE-CALC",
-        "name": "Pre-Calculus",
-        "gradeLevel": "Grade 12",
-        "semester": "1st Semester",
-        "color": "from-orange-500 to-red-500",
-        "pdfAvailable": False,
-        "topics": [
-            {"id": "pre-calc-001", "name": "Conic Sections - Parabola", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-002", "name": "Conic Sections - Ellipse", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-003", "name": "Conic Sections - Hyperbola", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-004", "name": "Conic Sections - Circle", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-005", "name": "Systems of Nonlinear Equations", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-006", "name": "Sequences and Series", "unit": "Series and Induction"},
-            {"id": "pre-calc-007", "name": "Arithmetic Sequences", "unit": "Series and Induction"},
-            {"id": "pre-calc-008", "name": "Geometric Sequences", "unit": "Series and Induction"},
-            {"id": "pre-calc-009", "name": "Mathematical Induction", "unit": "Series and Induction"},
-            {"id": "pre-calc-010", "name": "Binomial Theorem", "unit": "Series and Induction"},
-            {"id": "pre-calc-011", "name": "Angles and Unit Circle", "unit": "Trigonometry"},
-            {"id": "pre-calc-012", "name": "Trigonometric Functions", "unit": "Trigonometry"},
-            {"id": "pre-calc-013", "name": "Trigonometric Identities", "unit": "Trigonometry"},
-            {"id": "pre-calc-014", "name": "Sum and Difference Formulas", "unit": "Trigonometry"},
-            {"id": "pre-calc-015", "name": "Inverse Trigonometric Functions", "unit": "Trigonometry"},
-            {"id": "pre-calc-016", "name": "Polar Coordinates", "unit": "Trigonometry"},
-        ]
-    },
-    {
-        "id": "basic-calc",
-        "code": "BASIC CALC",
-        "name": "Basic Calculus",
-        "gradeLevel": "Grade 12",
-        "semester": "2nd Semester",
-        "color": "from-green-500 to-teal-500",
-        "pdfAvailable": True,
-        "topics": [
-            {"id": "calc-001", "name": "Limits of Functions", "unit": "Limits"},
-            {"id": "calc-002", "name": "Limit Theorems", "unit": "Limits"},
-            {"id": "calc-003", "name": "One-Sided Limits", "unit": "Limits"},
-            {"id": "calc-004", "name": "Infinite Limits and Limits at Infinity", "unit": "Limits"},
-            {"id": "calc-005", "name": "Continuity of Functions", "unit": "Limits"},
-            {"id": "calc-006", "name": "Definition of the Derivative", "unit": "Derivatives"},
-            {"id": "calc-007", "name": "Differentiation Rules", "unit": "Derivatives"},
-            {"id": "calc-008", "name": "Chain Rule", "unit": "Derivatives"},
-            {"id": "calc-009", "name": "Implicit Differentiation", "unit": "Derivatives"},
-            {"id": "calc-010", "name": "Higher-Order Derivatives", "unit": "Derivatives"},
-            {"id": "calc-011", "name": "Related Rates", "unit": "Derivatives"},
-            {"id": "calc-012", "name": "Extrema and the First Derivative Test", "unit": "Derivatives"},
-            {"id": "calc-013", "name": "Concavity and the Second Derivative Test", "unit": "Derivatives"},
-            {"id": "calc-014", "name": "Optimization Problems", "unit": "Derivatives"},
-            {"id": "calc-015", "name": "Antiderivatives and Indefinite Integrals", "unit": "Integration"},
-            {"id": "calc-016", "name": "Definite Integrals and the FTC", "unit": "Integration"},
-            {"id": "calc-017", "name": "Integration by Substitution", "unit": "Integration"},
-            {"id": "calc-018", "name": "Area Under a Curve", "unit": "Integration"},
-        ]
-    },
-]
-_firestore_db = None
-def _get_firestore_db():
-    """Initialize Firestore client."""
-    global _firestore_db
-    if _firestore_db is not None:
-        return _firestore_db
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            # Try service account from env or default credentials
-            import json
-            svc_account = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-            if svc_account:
-                sa_creds = json.loads(svc_account)
-                firebase_admin.initialize_app(firebase_admin.Certificate(sa_creds))
-            else:
-                firebase_admin.initialize_app()
-        _firestore_db = firestore.client()
-        return _firestore_db
-    except Exception as e:
-        logger.warning(f"Could not initialize Firestore: {e}")
-        return None
-def get_subjects(grade_level: Optional[str] = None) -> List[Dict[str, Any]]:
-    """
-    Fetch all subjects from Firestore.
-    Falls back to static data if Firestore unavailable.
-    Defaults to Grade 11 (SHS) if no grade specified.
-    """
-    # Default to Grade 11 (SHS) - only serve Grade 11 students for now
-    if grade_level is None:
-        grade_level = "Grade 11"
-    db = _get_firestore_db()
-    if db is not None:
-        try:
-            subjects_ref = db.collection("subjects")
-            if grade_level:
-                subjects_ref = subjects_ref.where("gradeLevel", "==", grade_level)
-            docs = subjects_ref.stream()
-            subjects = []
-            for doc in docs:
-                data = doc.to_dict()
-                if data:
-                    data["id"] = doc.id
-                    subjects.append(data)
-            if subjects:
-                logger.info(f"Loaded {len(subjects)} subjects from Firestore")
-                return subjects
-        except Exception as e:
-            logger.warning(f"Firestore fetch failed, using static data: {e}")
-    # Static fallback
-    if grade_level:
-        return [s for s in _STATIC_SUBJECTS if s.get("gradeLevel") == grade_level]
-    return list(_STATIC_SUBJECTS)
-def get_subject(subject_id: str) -> Optional[Dict[str, Any]]:
-    """Fetch a single subject by ID."""
-    db = _get_firestore_db()
-    if db is not None:
-        try:
-            doc = db.collection("subjects").document(subject_id).get()
-            if doc.exists:
-                data = doc.to_dict()
-                data["id"] = doc.id
-                return data
-        except Exception as e:
-            logger.warning(f"Firestore fetch failed for {subject_id}: {e}")
-    # Static fallback
-    for subject in _STATIC_SUBJECTS:
-        if subject["id"] == subject_id:
-            return dict(subject)
-    return None
-def get_topics(subject_id: str) -> List[Dict[str, Any]]:
-    """Fetch all topics for a subject."""
-    subject = get_subject(subject_id)
-    if subject:
-        return subject.get("topics", [])
-    return []
-def get_topic(subject_id: str, topic_id: str) -> Optional[Dict[str, Any]]:
-    """Fetch a single topic."""
-    topics = get_topics(subject_id)
-    for topic in topics:
-        if topic["id"] == topic_id:
-            return topic
-    return None

services/inference_client.py CHANGED Viewed

@@ -10,198 +10,13 @@ from typing import Any, Dict, List, Optional, Tuple
 import requests
 import yaml
-from openai import OpenAI, APIError, RateLimitError, APITimeoutError
-from .ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL, DEEPSEEK_BASE_URL
 from .logging_utils import configure_structured_logging, log_model_call
 LOGGER = configure_structured_logging("mathpulse.inference")
 TEMP_CHAT_MODEL_OVERRIDE_ENV = "INFERENCE_CHAT_MODEL_TEMP_OVERRIDE"
-# ── Model Profiles ────────────────────────────────────────────────────────────
-# A profile sets multiple env defaults in one shot.
-# Individual env vars (DEEPSEEK_MODEL, DEEPSEEK_REASONER_MODEL, etc.) still override.
-# Usage: MODEL_PROFILE=dev  or  MODEL_PROFILE=prod  or  MODEL_PROFILE=budget
-# Profiles can also be applied at runtime via the admin panel without restart.
-_MODEL_PROFILES: dict[str, dict[str, str]] = {
-    "dev": {
-        "INFERENCE_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
-        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
-        "HF_RAG_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
-    },
-    "prod": {
-        "INFERENCE_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
-        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
-        "HF_RAG_MODEL_ID": REASONER_MODEL,
-        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
-    },
-    "budget": {
-        "INFERENCE_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
-        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
-        "HF_RAG_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
-    },
-}
-# ── Runtime Override Store ────────────────────────────────────────────────────
-# Mutated at runtime by the admin panel via /api/admin/model-config.
-# Priority: above env vars, below INFERENCE_ENFORCE_LOCK_MODEL.
-# Persisted to Firestore so backend cold-restarts restore the last admin-set config.
-_RUNTIME_OVERRIDES: dict[str, str] = {}
-_RUNTIME_PROFILE: str = ""
-_FS_COLLECTION = "system_config"
-_FS_DOC = "active_model_config"
-def _save_runtime_config_to_firestore() -> None:
-    try:
-        from firebase_admin import firestore as fs
-        db = fs.client()
-        db.collection(_FS_COLLECTION).document(_FS_DOC).set(
-            {
-                "profile": _RUNTIME_PROFILE,
-                "overrides": _RUNTIME_OVERRIDES,
-                "updatedAt": fs.SERVER_TIMESTAMP,
-            }
-        )
-    except Exception as e:
-        LOGGER.warning("Could not persist model config to Firestore: %s", e)
-def _load_runtime_config_from_firestore() -> None:
-    try:
-        from firebase_admin import firestore as fs
-        db = fs.client()
-        doc = db.collection(_FS_COLLECTION).document(_FS_DOC).get()
-        if not doc.exists:
-            return
-        data = doc.to_dict() or {}
-        profile = str(data.get("profile", "")).strip().lower()
-        overrides = data.get("overrides", {})
-        if profile and profile in _MODEL_PROFILES:
-            global _RUNTIME_PROFILE
-            _RUNTIME_PROFILE = profile
-            _RUNTIME_OVERRIDES.clear()
-            _RUNTIME_OVERRIDES.update(_MODEL_PROFILES[profile])
-        if isinstance(overrides, dict):
-            for key, value in overrides.items():
-                _RUNTIME_OVERRIDES[str(key)] = str(value)
-        LOGGER.info("Restored runtime model config from Firestore: profile=%s", profile)
-    except ImportError:
-        LOGGER.debug("Firebase not available (optional for DeepSeek-only)")
-    except Exception as e:
-        LOGGER.warning("Could not restore model config from Firestore: %s", e)
-def _apply_model_profile() -> None:
-    profile_name = os.getenv("MODEL_PROFILE", "").strip().lower()
-    if not profile_name:
-        return
-    profile = _MODEL_PROFILES.get(profile_name)
-    if profile is None:
-        LOGGER.warning("MODEL_PROFILE='%s' is not a known profile.", profile_name)
-        return
-    for key, value in profile.items():
-        if not os.environ.get(key):
-            os.environ[key] = value
-    LOGGER.info("Startup model profile applied: %s", profile_name)
-_apply_model_profile()
-_load_runtime_config_from_firestore()
-def set_runtime_model_profile(profile_name: str) -> None:
-    """Apply a named profile at runtime without restarting the process."""
-    global _RUNTIME_PROFILE, _RUNTIME_OVERRIDES
-    normalized = profile_name.strip().lower()
-    profile = _MODEL_PROFILES.get(normalized)
-    if not profile:
-        raise ValueError(
-            f"Unknown profile: '{profile_name}'. Valid values: {list(_MODEL_PROFILES.keys())}"
-        )
-    _RUNTIME_PROFILE = normalized
-    _RUNTIME_OVERRIDES.clear()
-    _RUNTIME_OVERRIDES.update(profile)
-    LOGGER.info("Runtime model profile switched to: %s", profile_name)
-    _save_runtime_config_to_firestore()
-def set_runtime_model_override(key: str, value: str) -> None:
-    """Set a single model env key at runtime."""
-    _RUNTIME_OVERRIDES[key] = value
-    LOGGER.info("Runtime model override set: %s = %s", key, value)
-    _save_runtime_config_to_firestore()
-def reset_runtime_overrides() -> None:
-    """Clear all runtime overrides."""
-    global _RUNTIME_PROFILE
-    _RUNTIME_OVERRIDES.clear()
-    _RUNTIME_PROFILE = ""
-    LOGGER.info("Runtime model overrides cleared.")
-    _save_runtime_config_to_firestore()
-def get_current_runtime_config() -> dict:
-    resolved: dict[str, str] = {}
-    for key in {
-        "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-        "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-    }:
-        resolved[key] = _resolve_key(key)
-    return {
-        "profile": _RUNTIME_PROFILE,
-        "overrides": dict(_RUNTIME_OVERRIDES),
-        "resolved": resolved,
-    }
-def _resolve_key(key: str) -> str:
-    if value := _RUNTIME_OVERRIDES.get(key):
-        return value
-    if _RUNTIME_PROFILE and _RUNTIME_PROFILE in _MODEL_PROFILES:
-        if value := _MODEL_PROFILES[_RUNTIME_PROFILE].get(key):
-            return value
-    return os.getenv(key, "")
-def get_model_for_task(task_type: str) -> str:
-    task = (task_type or "default").strip().lower()
-    enforce_lock = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
-    if enforce_lock:
-        override = (
-            _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID")
-            or os.getenv("INFERENCE_LOCK_MODEL_ID")
-            or CHAT_MODEL
-        )
-        return override
-    task_key_map = {
-        "chat": "INFERENCE_CHAT_MODEL_ID",
-        "quiz_generation": "HF_QUIZ_MODEL_ID",
-        "rag_lesson": "HF_RAG_MODEL_ID",
-        "rag_problem": "HF_RAG_MODEL_ID",
-        "rag_analysis_context": "HF_RAG_MODEL_ID",
-    }
-    if env_key := task_key_map.get(task):
-        if resolved := _resolve_key(env_key):
-            return resolved
-    return _resolve_key("INFERENCE_MODEL_ID") or CHAT_MODEL
-def model_supports_thinking(model_id: str = "") -> bool:
-    mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
-    return mid == REASONER_MODEL
 def _normalize_local_space_url(raw_url: str) -> str:
     """Accept either hf.space host or huggingface.co/spaces URL for local_space provider."""
@@ -209,6 +24,8 @@ def _normalize_local_space_url(raw_url: str) -> str:
     if not cleaned:
         return "http://127.0.0.1:7860"
     match = re.match(r"^https?://huggingface\.co/spaces/([^/]+)/([^/]+)$", cleaned, re.IGNORECASE)
     if match:
         owner = match.group(1).strip().lower()
@@ -224,41 +41,38 @@ class InferenceRequest:
     model: Optional[str] = None
     task_type: str = "default"
     request_tag: str = ""
-    max_new_tokens: int = 900
     temperature: float = 0.2
     top_p: float = 0.9
     repetition_penalty: float = 1.15
     timeout_sec: Optional[int] = None
-    enable_thinking: bool = False
 class InferenceClient:
-    def __init__(self, firestore_client: Optional[Any] = None) -> None:
-        self.firestore = firestore_client
-        self._last_persist_time = 0.0
-        self._persist_throttle_sec = 30.0
         config_paths = [
-            Path("./config/models.yaml"),
-            Path("/config/models.yaml"),
-            Path("/app/config/models.yaml"),
-            Path.cwd() / "config" / "models.yaml",
-            Path(__file__).resolve().parents[2] / "config" / "models.yaml",
         ]
         config: Dict[str, object] = {}
         config_path = None
         for path in config_paths:
             if path.exists():
                 config_path = path
                 with path.open("r", encoding="utf-8") as fh:
                     config = yaml.safe_load(fh) or {}
-                LOGGER.info(f"??? Loaded config from {config_path}")
                 break
         if not config_path:
-            LOGGER.warning(f"??????  Config file not found. Checked: {[str(p) for p in config_paths]}")
             LOGGER.warning(f"    CWD: {Path.cwd()}")
             LOGGER.warning(f"    Using hardcoded defaults")
@@ -270,43 +84,69 @@ class InferenceClient:
                 if isinstance(primary_cfg, dict):
                     primary = primary_cfg
-        self.provider = "deepseek"
-        self.ds_api_key = os.getenv("DEEPSEEK_API_KEY", "")
-        self.ds_base_url = os.getenv("DEEPSEEK_BASE_URL", DEEPSEEK_BASE_URL)
-        self.ds_chat_model = os.getenv("DEEPSEEK_MODEL", CHAT_MODEL)
-        self.ds_reasoner_model = os.getenv("DEEPSEEK_REASONER_MODEL", REASONER_MODEL)
         self.local_space_url = _normalize_local_space_url(
             os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
         )
         self.local_generate_path = os.getenv("INFERENCE_LOCAL_SPACE_GENERATE_PATH", "/gradio_api/call/generate")
-        self.enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
-        self.lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", CHAT_MODEL).strip() or CHAT_MODEL
-        default_model_fallback = str(primary.get("id") or CHAT_MODEL)
         env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
         self.default_model = env_model_id or default_model_fallback
         default_max_tokens = str(primary.get("max_new_tokens") or 512)
         self.default_max_new_tokens = int(os.getenv("INFERENCE_MAX_NEW_TOKENS", default_max_tokens))
         default_temp = str(primary.get("temperature") or 0.2)
         self.default_temperature = float(os.getenv("INFERENCE_TEMPERATURE", default_temp))
         default_top_p = str(primary.get("top_p") or 0.9)
         self.default_top_p = float(os.getenv("INFERENCE_TOP_P", default_top_p))
         self.chat_model_override = os.getenv("INFERENCE_CHAT_MODEL_ID", "").strip()
         self.chat_model_temp_override = os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
         self.chat_strict_model_only = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
-        self.ds_timeout_sec = int(os.getenv("INFERENCE_HF_TIMEOUT_SEC", "90"))
         self.local_timeout_sec = int(os.getenv("INFERENCE_LOCAL_SPACE_TIMEOUT_SEC", "90"))
         self.max_retries = int(os.getenv("INFERENCE_MAX_RETRIES", "3"))
         self.backoff_sec = float(os.getenv("INFERENCE_BACKOFF_SEC", "1.5"))
-        self.interactive_timeout_sec = int(os.getenv("INFERENCE_INTERACTIVE_TIMEOUT_SEC", str(self.ds_timeout_sec)))
-        self.background_timeout_sec = int(os.getenv("INFERENCE_BACKGROUND_TIMEOUT_SEC", str(self.ds_timeout_sec)))
         self.interactive_max_retries = int(os.getenv("INFERENCE_INTERACTIVE_MAX_RETRIES", str(self.max_retries)))
         self.background_max_retries = int(os.getenv("INFERENCE_BACKGROUND_MAX_RETRIES", str(self.max_retries)))
         self.interactive_backoff_sec = float(os.getenv("INFERENCE_INTERACTIVE_BACKOFF_SEC", str(self.backoff_sec)))
@@ -327,6 +167,12 @@ class InferenceClient:
         )
         self.cpu_only_tasks = {v.strip().lower() for v in cpu_tasks_raw.split(",") if v.strip()}
         interactive_tasks_raw = os.getenv(
             "INFERENCE_INTERACTIVE_TASKS",
             "chat,verify_solution,daily_insight",
@@ -338,20 +184,29 @@ class InferenceClient:
         )
         # Default task-to-model routing.
         self.task_model_map: Dict[str, str] = {
-            "chat": CHAT_MODEL,
-            "verify_solution": CHAT_MODEL,
-            "lesson_generation": CHAT_MODEL,
-            "quiz_generation": CHAT_MODEL,
-            "learning_path": CHAT_MODEL,
-            "daily_insight": CHAT_MODEL,
-            "risk_classification": CHAT_MODEL,
-            "risk_narrative": CHAT_MODEL,
         }
         self.task_fallback_model_map: Dict[str, List[str]] = {
-            "chat": [CHAT_MODEL],
-            "verify_solution": [CHAT_MODEL],
         }
         self.model_provider_map: Dict[str, str] = {}
         self.task_provider_map: Dict[str, str] = {}
         if isinstance(config, dict):
@@ -364,6 +219,7 @@ class InferenceClient:
                         for task, model in task_models.items()
                         if str(task).strip() and str(model).strip()
                     }
                     self.task_model_map.update(config_task_models)
                 task_fallback_models = routing_cfg.get("task_fallback_model_map", {})
                 if isinstance(task_fallback_models, dict):
@@ -395,7 +251,7 @@ class InferenceClient:
             for task_key in list(self.task_model_map.keys()):
                 self.task_model_map[task_key] = env_model_id
             LOGGER.info(
-                f"???? INFERENCE_MODEL_ID env var override applied: {env_model_id}"
             )
             LOGGER.info(
                 f"   Task model mappings changed from: {original_map}"
@@ -404,27 +260,29 @@ class InferenceClient:
         else:
             env_override_note = ""
-        if self.enforce_lock_model:
-            lock_map_before = dict(self.task_model_map)
-            self.default_model = self.lock_model_id
             for task_key in list(self.task_model_map.keys()):
-                self.task_model_map[task_key] = self.lock_model_id
             self.fallback_models = []
             self.task_fallback_model_map = {
                 task_key: [] for task_key in self.task_model_map.keys()
             }
-            LOGGER.info(f"???? INFERENCE_ENFORCE_LOCK_MODEL enabled: locking all inference tasks to {self.lock_model_id}")
-            LOGGER.info(f"   Cleared fallback models")
-            LOGGER.info(f"   Task model mappings forced from: {lock_map_before}")
         config_status = "from file" if config_path else "hardcoded defaults (no config file found)"
         effective_chat_model_for_logs = self.chat_model_override or self.task_model_map.get("chat", self.default_model)
-        LOGGER.info(f"??? InferenceClient initialized {config_status}{env_override_note}")
         LOGGER.info(f"   Default model: {self.default_model}")
         LOGGER.info(f"   Chat model: {effective_chat_model_for_logs}")
         LOGGER.info(f"   Chat temp override ({TEMP_CHAT_MODEL_OVERRIDE_ENV}): {self.chat_model_temp_override or 'disabled'}")
         LOGGER.info(f"   Chat strict model lock: {self.chat_strict_model_only}")
-        LOGGER.info(f"   Global model lock: {self.enforce_lock_model}")
         LOGGER.info(f"   Verify solution model: {self.task_model_map.get('verify_solution', self.default_model)}")
         LOGGER.info(f"   Full task_model_map: {self.task_model_map}")
@@ -436,23 +294,18 @@ class InferenceClient:
             "requests_error": 0,
             "retries_total": 0,
             "fallback_attempts": 0,
-            "latency_sum_ms": 0.0,
-            "latency_count": 0,
             "route_counts": {},
             "task_counts": {},
             "provider_counts": {},
             "status_code_counts": {},
         }
-        self._load_persistent_metrics()
     def _bump_metric(self, key: str, inc: int = 1) -> None:
         with self._metrics_lock:
             current = self._metrics.get(key) or 0
             if not isinstance(current, int):
                 current = 0
             self._metrics[key] = current + inc
-        self._persist_metrics()
     def _bump_bucket(self, key: str, bucket: str, inc: int = 1) -> None:
         with self._metrics_lock:
@@ -464,50 +317,6 @@ class InferenceClient:
             if not isinstance(current, int):
                 current = 0
             mapping[bucket] = current + inc
-        self._persist_metrics()
-    def _record_completion(self, *, latency_ms: float) -> None:
-        with self._metrics_lock:
-            self._metrics["latency_sum_ms"] = (self._metrics.get("latency_sum_ms") or 0.0) + latency_ms
-            self._metrics["latency_count"] = (self._metrics.get("latency_count") or 0) + 1
-        self._persist_metrics()
-    def _load_persistent_metrics(self) -> None:
-        if not self.firestore:
-            return
-        try:
-            doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
-            doc = doc_ref.get()
-            if doc.exists:
-                data = doc.to_dict() or {}
-                with self._metrics_lock:
-                    for k, v in data.items():
-                        if k in self._metrics:
-                            if isinstance(v, (int, float)):
-                                self._metrics[k] = v
-                            elif isinstance(v, dict) and isinstance(self._metrics[k], dict):
-                                self._metrics[k].update(v)
-                LOGGER.info("??? Persistent inference metrics loaded from Firestore")
-        except Exception as e:
-            LOGGER.warning(f"?????? Failed to load persistent metrics: {e}")
-    def _persist_metrics(self, force: bool = False) -> None:
-        if not self.firestore:
-            return
-        now = time.time()
-        if not force and (now - self._last_persist_time < self._persist_throttle_sec):
-            return
-        try:
-            self._last_persist_time = now
-            doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
-            with self._metrics_lock:
-                snapshot = dict(self._metrics)
-            doc_ref.set(snapshot, merge=True)
-        except Exception as e:
-            LOGGER.warning(f"?????? Failed to persist metrics: {e}")
     def _record_attempt(self, *, task_type: str, provider: str, route: str, fallback_depth: int) -> None:
         self._bump_metric("requests_total", 1)
@@ -519,10 +328,6 @@ class InferenceClient:
     def snapshot_metrics(self) -> Dict[str, Any]:
         with self._metrics_lock:
-            l_sum = self._metrics.get("latency_sum_ms") or 0.0
-            l_count = self._metrics.get("latency_count") or 0
-            avg_latency = round(l_sum / l_count, 2) if l_count > 0 else 0.0
             snapshot = {
                 "uptime_sec": round(max(0.0, time.time() - self._metrics_started_at), 2),
                 "requests_total": self._metrics.get("requests_total") or 0,
@@ -530,9 +335,6 @@ class InferenceClient:
                 "requests_error": self._metrics.get("requests_error") or 0,
                 "retries_total": self._metrics.get("retries_total") or 0,
                 "fallback_attempts": self._metrics.get("fallback_attempts") or 0,
-                "avg_latency_ms": avg_latency,
-                "active_model": self.default_model,
-                "primary_provider": self.provider,
                 "route_counts": dict(self._metrics.get("route_counts") or {}),
                 "task_counts": dict(self._metrics.get("task_counts") or {}),
                 "provider_counts": dict(self._metrics.get("provider_counts") or {}),
@@ -544,18 +346,22 @@ class InferenceClient:
         effective_task = (req.task_type or "default").strip().lower()
         request_tag = req.request_tag.strip() or f"{effective_task}-{int(time.time() * 1000)}"
         selected_model, model_selection_source = self._resolve_primary_model(req)
         model_chain = self._model_chain_for_task(effective_task, selected_model)
         last_error: Optional[Exception] = None
-        model_base = selected_model
         LOGGER.info(
-            f"???? request_tag={request_tag} task={effective_task} source={model_selection_source} "
-            f"selected_model={model_base} (primary)"
         )
         LOGGER.info(f"   fallback_chain={model_chain[1:] if len(model_chain) > 1 else 'none'}")
         for fallback_depth, model_name in enumerate(model_chain):
             request_for_model = InferenceRequest(
                 messages=req.messages,
@@ -568,19 +374,20 @@ class InferenceClient:
                 repetition_penalty=req.repetition_penalty,
                 timeout_sec=req.timeout_sec,
             )
-            try:
-                result = self._call_deepseek(request_for_model, fallback_depth)
-                if fallback_depth > 0:
-                    LOGGER.info(f"??? Fallback succeeded at depth={fallback_depth} model={model_name}")
-                return result
-            except Exception as exc:
-                last_error = exc
-                fallback_hint = f" (depth {fallback_depth})" if fallback_depth > 0 else ""
-                LOGGER.warning(
-                    f"??????  Attempt failed{fallback_hint}: task={request_for_model.task_type} "
-                    f"model={model_name} error={exc.__class__.__name__}: {str(exc)[:100]}"
-                )
         if last_error:
             raise last_error
@@ -593,6 +400,10 @@ class InferenceClient:
         effective_task = (req.task_type or "default").strip().lower()
         runtime_chat_override = self._runtime_chat_model_override()
         if effective_task == "chat" and runtime_chat_override:
             selected_model = runtime_chat_override
             model_selection_source = "chat_temp_override_env"
@@ -606,39 +417,107 @@ class InferenceClient:
             selected_model = self.task_model_map.get(effective_task, self.default_model)
             model_selection_source = "task_map"
-        if self.enforce_lock_model:
-            effective_lock_model_id = self.lock_model_id
             if effective_task == "chat":
-                effective_lock_model_id = runtime_chat_override or self.chat_model_override or self.lock_model_id
-            selected_base = (selected_model or "").split(":", 1)[0].strip()
-            lock_base = (effective_lock_model_id or "").split(":", 1)[0].strip()
             if selected_base != lock_base:
                 LOGGER.warning(
-                    f"?????? Model lock replaced requested model {selected_model} with {effective_lock_model_id}"
                 )
-            selected_model = effective_lock_model_id
-            model_selection_source = f"{model_selection_source}:model_lock"
         if effective_task == "chat" and self.chat_strict_model_only:
             return selected_model, f"{model_selection_source}:chat_strict_model_only"
         return selected_model, model_selection_source
     def _model_chain_for_task(self, task_type: str, selected_model: str) -> List[str]:
         normalized = (task_type or "default").strip().lower()
         runtime_chat_override = self._runtime_chat_model_override() if normalized == "chat" else ""
-        chat_lock_model_id = runtime_chat_override or (self.chat_model_override if normalized == "chat" else "")
-        if self.enforce_lock_model:
             if normalized == "chat":
-                locked_model = (chat_lock_model_id or self.lock_model_id or "").strip()
             else:
-                locked_model = (self.lock_model_id or "").strip()
             return [locked_model] if locked_model else []
         if normalized == "chat" and self.chat_strict_model_only:
-            chat_model = (chat_lock_model_id or selected_model or "").strip()
             return [chat_model] if chat_model else []
         per_task_candidates = self.task_fallback_model_map.get(task_type, [])
@@ -658,6 +537,34 @@ class InferenceClient:
             return deduped[:max_models]
         return deduped
     def _retry_profile(self, task_type: str) -> Tuple[int, float]:
         normalized = (task_type or "default").strip().lower()
         if normalized in self.interactive_tasks:
@@ -674,6 +581,20 @@ class InferenceClient:
             return self.interactive_timeout_sec
         return self.background_timeout_sec
     def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
         parts: List[str] = []
         for msg in messages:
@@ -686,9 +607,9 @@ class InferenceClient:
                 prefix = "SYSTEM"
             elif role == "assistant":
                 prefix = "ASSISTANT"
-            parts.append(f"{prefix}:\n{content}")
         parts.append("ASSISTANT:")
-        return "\n\n".join(parts)
     def _latest_user_message(self, messages: List[Dict[str, str]]) -> str:
         for msg in reversed(messages):
@@ -698,223 +619,160 @@ class InferenceClient:
                 return content
         return self._messages_to_prompt(messages)
-    def _call_deepseek(self, req: InferenceRequest, fallback_depth: int) -> str:
-        """Call DeepSeek API with OpenAI-compatible chat completions."""
-        if not self.ds_api_key:
-            raise RuntimeError("DEEPSEEK_API_KEY is not set")
-        target_model = req.model or self.default_model
-        route = "deepseek"
-        task_type = req.task_type or "default"
-        LOGGER.debug(
-            f"???? Calling DeepSeek: task={task_type} model={target_model} "
-            f"route={route} depth={fallback_depth}"
         )
-        timeout = self._timeout_for(req, "deepseek")
         max_retries, backoff_sec = self._retry_profile(task_type)
-        client = get_deepseek_client()
-        # Build chat completions params
-        params: Dict[str, Any] = {
-            "model": target_model,
-            "messages": req.messages,
-            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
-        }
-        if target_model == REASONER_MODEL:
-            params["max_tokens"] = req.max_new_tokens or 1024
-        else:
-            params["temperature"] = req.temperature
-            params["top_p"] = req.top_p
-        # Use JSON mode for quiz generation
-        if task_type == "quiz_generation" and target_model != REASONER_MODEL:
-            params["response_format"] = {"type": "json_object"}
-        for attempt in range(max_retries):
-            self._record_attempt(
-                task_type=task_type,
-                provider="deepseek",
-                route=route,
-                fallback_depth=fallback_depth,
-            )
             start = time.perf_counter()
             try:
-                response = client.chat.completions.create(**params, timeout=timeout)
                 latency_ms = (time.perf_counter() - start) * 1000
-                content = response.choices[0].message.content or ""
-                reasoning = getattr(response.choices[0].message, "reasoning_content", None)
-                text = content.strip()
-                if reasoning:
-                    text = f"{reasoning}\n{text}"
                 log_model_call(
                     LOGGER,
-                    provider="deepseek",
-                    model=target_model,
-                    endpoint=self.ds_base_url,
                     latency_ms=latency_ms,
                     input_tokens=None,
                     output_tokens=None,
-                    status="ok",
                     task_type=task_type,
-                    request_tag=req.request_tag,
                     retry_attempt=attempt + 1,
                     fallback_depth=fallback_depth,
                     route=route,
                 )
-                self._record_attempt(
-                    task_type=task_type,
-                    provider="deepseek",
-                    route=route,
-                    fallback_depth=fallback_depth,
-                )
-                self._record_completion(latency_ms=latency_ms)
-                self._bump_metric("requests_ok", 1)
-                return text
-            except RateLimitError:
-                latency_ms = (time.perf_counter() - start) * 1000
-                if attempt < max_retries - 1:
-                    log_model_call(
-                        LOGGER,
-                        provider="deepseek",
-                        model=target_model,
-                        endpoint=self.ds_base_url,
-                        latency_ms=latency_ms,
-                        input_tokens=None,
-                        output_tokens=None,
-                        status="error",
-                        error_class="RateLimitError",
-                        error_message="rate limited",
-                        task_type=task_type,
-                        request_tag=req.request_tag,
-                        retry_attempt=attempt + 1,
-                        fallback_depth=fallback_depth,
-                        route=route,
-                    )
-                    self._bump_metric("retries_total", 1)
-                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
-                    continue
-                self._bump_metric("requests_error", 1)
-                raise RuntimeError("DeepSeek API rate limit reached. Please try again shortly.")
-            except APITimeoutError:
-                latency_ms = (time.perf_counter() - start) * 1000
-                if attempt < max_retries - 1:
-                    log_model_call(
-                        LOGGER,
-                        provider="deepseek",
-                        model=target_model,
-                        endpoint=self.ds_base_url,
-                        latency_ms=latency_ms,
-                        input_tokens=None,
-                        output_tokens=None,
-                        status="error",
-                        error_class="APITimeoutError",
-                        error_message="timeout",
-                        task_type=task_type,
-                        request_tag=req.request_tag,
-                        retry_attempt=attempt + 1,
-                        fallback_depth=fallback_depth,
-                        route=route,
-                    )
-                    self._bump_metric("retries_total", 1)
-                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
-                    continue
-                self._bump_metric("requests_error", 1)
-                raise RuntimeError("DeepSeek API timed out. Please retry.")
-            except APIError as e:
-                latency_ms = (time.perf_counter() - start) * 1000
-                if attempt < max_retries - 1:
-                    log_model_call(
-                        LOGGER,
-                        provider="deepseek",
-                        model=target_model,
-                        endpoint=self.ds_base_url,
-                        latency_ms=latency_ms,
-                        input_tokens=None,
-                        output_tokens=None,
-                        status="error",
-                        error_class="APIError",
-                        error_message=str(e)[:200],
-                        task_type=task_type,
-                        request_tag=req.request_tag,
-                        retry_attempt=attempt + 1,
-                        fallback_depth=fallback_depth,
-                        route=route,
-                    )
-                    self._bump_metric("retries_total", 1)
-                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
-                    continue
-                self._bump_metric("requests_error", 1)
-                raise RuntimeError(f"DeepSeek API error: {str(e)}")
-            except Exception as exc:
-                latency_ms = (time.perf_counter() - start) * 1000
-                self._bump_metric("requests_error", 1)
                 log_model_call(
                     LOGGER,
-                    provider="deepseek",
-                    model=target_model,
-                    endpoint=self.ds_base_url,
                     latency_ms=latency_ms,
                     input_tokens=None,
                     output_tokens=None,
                     status="error",
-                    error_class=exc.__class__.__name__,
-                    error_message=str(exc)[:200],
                     task_type=task_type,
-                    request_tag=req.request_tag,
                     retry_attempt=attempt + 1,
                     fallback_depth=fallback_depth,
                     route=route,
                 )
-                raise
-        raise RuntimeError(f"DeepSeek call failed after {max_retries} attempts")
-    def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
         target_model = req.model or self.default_model
-        url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
-        prompt = self._messages_to_prompt(req.messages)
-        payload: Dict[str, object] = {
-            "data": [
-                prompt,
-                [],
-                req.temperature,
-                req.top_p,
-                req.max_new_tokens,
-            ]
-        }
-        headers = {"Content-Type": "application/json"}
         timeout = self._timeout_for(req, provider)
-        self._record_attempt(
-            task_type=req.task_type,
-            provider=provider,
-            route=route,
-            fallback_depth=fallback_depth,
-        )
         start = time.perf_counter()
         try:
-            resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
         except Exception as exc:
             latency_ms = (time.perf_counter() - start) * 1000
             log_model_call(
                 LOGGER,
-                provider=provider,
-                model=target_model,
-                endpoint=url,
                 latency_ms=latency_ms,
                 input_tokens=None,
                 output_tokens=None,
@@ -927,10 +785,182 @@ class InferenceClient:
                 fallback_depth=fallback_depth,
                 route=route,
             )
-            self._bump_metric("requests_error", 1)
             raise
-        latency_ms = (time.perf_counter() - start) * 1000
         self._bump_bucket("status_code_counts", str(resp.status_code), 1)
         if resp.status_code != 200:
@@ -969,7 +999,7 @@ class InferenceClient:
             status="ok",
             task_type=req.task_type,
             request_tag=req.request_tag,
-            retry_attempt=1,
             fallback_depth=fallback_depth,
             route=route,
         )
@@ -1010,39 +1040,32 @@ class InferenceClient:
     def _clean_response_text(self, text: str) -> str:
         """Strip JSON braces, template artifacts, and whitespace from response text."""
         text = text.strip()
         if text.startswith("{") and text.endswith("}"):
             try:
                 parsed = json.loads(text)
                 if isinstance(parsed, dict):
                     if "content" in parsed:
                         text = str(parsed["content"]).strip()
                     elif "text" in parsed:
                         text = str(parsed["text"]).strip()
             except json.JSONDecodeError:
                 text = text.strip("{}")
         if text.startswith("```json") or text.startswith("```"):
             text = re.sub(r"^```(?:json)?", "", text).strip()
         if text.endswith("```"):
             text = text[:-3].strip()
         return text.strip()
-def create_default_client(firestore_client: Optional[Any] = None) -> InferenceClient:
-    return InferenceClient(firestore_client=firestore_client)
-def is_sequential_model(model_id: str = "") -> bool:
-    mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
-    if not mid:
-        return False
-    if mid == REASONER_MODEL:
-        return True
-    if _RUNTIME_OVERRIDES:
-        lock = _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID", "")
-        if lock == REASONER_MODEL:
-            return True
-    return False

 import requests
 import yaml
+from huggingface_hub import InferenceClient as HFInferenceClient
 from .logging_utils import configure_structured_logging, log_model_call
 LOGGER = configure_structured_logging("mathpulse.inference")
 TEMP_CHAT_MODEL_OVERRIDE_ENV = "INFERENCE_CHAT_MODEL_TEMP_OVERRIDE"
 def _normalize_local_space_url(raw_url: str) -> str:
     """Accept either hf.space host or huggingface.co/spaces URL for local_space provider."""
     if not cleaned:
         return "http://127.0.0.1:7860"
+    # Convert page URL format to runtime host format:
+    # https://huggingface.co/spaces/{owner}/{space} -> https://{owner}-{space}.hf.space
     match = re.match(r"^https?://huggingface\.co/spaces/([^/]+)/([^/]+)$", cleaned, re.IGNORECASE)
     if match:
         owner = match.group(1).strip().lower()
     model: Optional[str] = None
     task_type: str = "default"
     request_tag: str = ""
+    max_new_tokens: int = 512
     temperature: float = 0.2
     top_p: float = 0.9
     repetition_penalty: float = 1.15
     timeout_sec: Optional[int] = None
 class InferenceClient:
+    def __init__(self) -> None:
+        # Try multiple config paths (HF Space, Docker, local development)
+        # The deploy script uploads config/ to the space root
         config_paths = [
+            Path("./config/models.yaml"),  # Current working directory (most reliable)
+            Path("/config/models.yaml"),  # HF Space root
+            Path("/app/config/models.yaml"),  # App directory
+            Path.cwd() / "config" / "models.yaml",  # CWD with config subdir
+            Path(__file__).resolve().parents[2] / "config" / "models.yaml",  # Package root
         ]
         config: Dict[str, object] = {}
         config_path = None
         for path in config_paths:
             if path.exists():
                 config_path = path
                 with path.open("r", encoding="utf-8") as fh:
                     config = yaml.safe_load(fh) or {}
+                LOGGER.info(f"✅ Loaded config from {config_path}")
                 break
         if not config_path:
+            LOGGER.warning(f"⚠️  Config file not found. Checked: {[str(p) for p in config_paths]}")
             LOGGER.warning(f"    CWD: {Path.cwd()}")
             LOGGER.warning(f"    Using hardcoded defaults")
                 if isinstance(primary_cfg, dict):
                     primary = primary_cfg
+        self.provider = os.getenv("INFERENCE_PROVIDER", "hf_inference").strip().lower()
+        self.pro_provider = os.getenv("INFERENCE_PRO_PROVIDER", "hf_inference").strip().lower()
+        self.gpu_provider = os.getenv("INFERENCE_GPU_PROVIDER", "hf_inference").strip().lower()
+        self.cpu_provider = os.getenv("INFERENCE_CPU_PROVIDER", "hf_inference").strip().lower()
+        self.enable_provider_fallback = os.getenv("INFERENCE_ENABLE_PROVIDER_FALLBACK", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.pro_enabled = os.getenv("INFERENCE_PRO_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
+        self.hf_token = os.getenv(
+            "HF_TOKEN",
+            os.getenv("HUGGING_FACE_API_TOKEN", os.getenv("HUGGINGFACE_API_TOKEN", "")),
+        )
+        self.hf_base_url = os.getenv("INFERENCE_HF_BASE_URL", "https://router.huggingface.co/hf-inference/models")
+        self.hf_chat_url = os.getenv("INFERENCE_HF_CHAT_URL", "https://router.huggingface.co/v1/chat/completions")
+        # Featherless AI for Qwen math models (used as fallback when HF router fails)
+        self.featherless_api_key = os.getenv("FEATHERLESS_API_KEY", "")
+        self.featherless_chat_url = os.getenv("FEATHERLESS_CHAT_URL", "https://api.featherless.ai/openai/v1/chat/completions")
         self.local_space_url = _normalize_local_space_url(
             os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
         )
         self.local_generate_path = os.getenv("INFERENCE_LOCAL_SPACE_GENERATE_PATH", "/gradio_api/call/generate")
+        self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
+        self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
+        self.enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen3-32B").strip() or "Qwen/Qwen3-32B"
+        default_model_fallback = str(primary.get("id") or "Qwen/Qwen3-32B")
         env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
         self.default_model = env_model_id or default_model_fallback
         default_max_tokens = str(primary.get("max_new_tokens") or 512)
         self.default_max_new_tokens = int(os.getenv("INFERENCE_MAX_NEW_TOKENS", default_max_tokens))
         default_temp = str(primary.get("temperature") or 0.2)
         self.default_temperature = float(os.getenv("INFERENCE_TEMPERATURE", default_temp))
         default_top_p = str(primary.get("top_p") or 0.9)
         self.default_top_p = float(os.getenv("INFERENCE_TOP_P", default_top_p))
+        # Task-specific model overrides via environment variables
         self.chat_model_override = os.getenv("INFERENCE_CHAT_MODEL_ID", "").strip()
         self.chat_model_temp_override = os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
         self.chat_strict_model_only = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.chat_hard_model = os.getenv("INFERENCE_CHAT_HARD_MODEL_ID", "meta-llama/Meta-Llama-3-70B-Instruct").strip()
+        self.chat_hard_trigger_enabled = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
+        self.chat_hard_prompt_chars = max(256, int(os.getenv("INFERENCE_CHAT_HARD_PROMPT_CHARS", "800")))
+        self.chat_hard_history_chars = max(
+            self.chat_hard_prompt_chars,
+            int(os.getenv("INFERENCE_CHAT_HARD_HISTORY_CHARS", "1800")),
+        )
+        hard_keywords_raw = os.getenv(
+            "INFERENCE_CHAT_HARD_KEYWORDS",
+            "step-by-step,show all steps,derive,proof,prove,rigorous,multi-step,word problem",
+        )
+        self.chat_hard_keywords = [kw.strip().lower() for kw in hard_keywords_raw.split(",") if kw.strip()]
+        self.hf_timeout_sec = int(os.getenv("INFERENCE_HF_TIMEOUT_SEC", "90"))
         self.local_timeout_sec = int(os.getenv("INFERENCE_LOCAL_SPACE_TIMEOUT_SEC", "90"))
         self.max_retries = int(os.getenv("INFERENCE_MAX_RETRIES", "3"))
         self.backoff_sec = float(os.getenv("INFERENCE_BACKOFF_SEC", "1.5"))
+        self.interactive_timeout_sec = int(os.getenv("INFERENCE_INTERACTIVE_TIMEOUT_SEC", str(self.hf_timeout_sec)))
+        self.background_timeout_sec = int(os.getenv("INFERENCE_BACKGROUND_TIMEOUT_SEC", str(self.hf_timeout_sec)))
         self.interactive_max_retries = int(os.getenv("INFERENCE_INTERACTIVE_MAX_RETRIES", str(self.max_retries)))
         self.background_max_retries = int(os.getenv("INFERENCE_BACKGROUND_MAX_RETRIES", str(self.max_retries)))
         self.interactive_backoff_sec = float(os.getenv("INFERENCE_INTERACTIVE_BACKOFF_SEC", str(self.backoff_sec)))
         )
         self.cpu_only_tasks = {v.strip().lower() for v in cpu_tasks_raw.split(",") if v.strip()}
+        pro_tasks_raw = os.getenv(
+            "INFERENCE_PRO_PRIORITY_TASKS",
+            "chat,quiz_generation,lesson_generation,learning_path,verify_solution",
+        )
+        self.pro_priority_tasks = {v.strip().lower() for v in pro_tasks_raw.split(",") if v.strip()}
         interactive_tasks_raw = os.getenv(
             "INFERENCE_INTERACTIVE_TASKS",
             "chat,verify_solution,daily_insight",
         )
         # Default task-to-model routing.
+        # Keep all tasks pinned to Qwen3-32B when qwen-only lock is active.
         self.task_model_map: Dict[str, str] = {
+            "chat": "Qwen/Qwen3-32B",
+            "verify_solution": "Qwen/Qwen3-32B",
+            "lesson_generation": "Qwen/Qwen3-32B",
+            "quiz_generation": "Qwen/Qwen3-32B",
+            "learning_path": "Qwen/Qwen3-32B",
+            "daily_insight": "Qwen/Qwen3-32B",
+            "risk_classification": "Qwen/Qwen3-32B",
+            "risk_narrative": "Qwen/Qwen3-32B",
         }
+        # Fallback chains (only to other HF-supported models, no featherless-ai)
         self.task_fallback_model_map: Dict[str, List[str]] = {
+            "chat": [
+                "meta-llama/Llama-3.1-8B-Instruct",
+                "google/gemma-2-2b-it",
+            ],
+            "verify_solution": [
+                "meta-llama/Llama-3.1-8B-Instruct",
+                "google/gemma-2-2b-it",
+            ],
         }
+        # Model-to-provider mappings (not needed when using model:provider syntax directly)
         self.model_provider_map: Dict[str, str] = {}
         self.task_provider_map: Dict[str, str] = {}
         if isinstance(config, dict):
                         for task, model in task_models.items()
                         if str(task).strip() and str(model).strip()
                     }
+                    # Merge config models with defaults (config overrides defaults)
                     self.task_model_map.update(config_task_models)
                 task_fallback_models = routing_cfg.get("task_fallback_model_map", {})
                 if isinstance(task_fallback_models, dict):
             for task_key in list(self.task_model_map.keys()):
                 self.task_model_map[task_key] = env_model_id
             LOGGER.info(
+                f"🔄 INFERENCE_MODEL_ID env var override applied: {env_model_id}"
             )
             LOGGER.info(
                 f"   Task model mappings changed from: {original_map}"
         else:
             env_override_note = ""
+        if self.enforce_qwen_only:
+            qwen_map_before = dict(self.task_model_map)
+            self.default_model = self.qwen_lock_model
             for task_key in list(self.task_model_map.keys()):
+                self.task_model_map[task_key] = self.qwen_lock_model
             self.fallback_models = []
             self.task_fallback_model_map = {
                 task_key: [] for task_key in self.task_model_map.keys()
             }
+            self.chat_hard_trigger_enabled = False
+            LOGGER.info(f"🔒 INFERENCE_ENFORCE_QWEN_ONLY enabled: locking all inference tasks to {self.qwen_lock_model}")
+            LOGGER.info(f"   Cleared fallback models and hard-escalation path")
+            LOGGER.info(f"   Task model mappings forced from: {qwen_map_before}")
+        # Log configuration loaded for debugging
         config_status = "from file" if config_path else "hardcoded defaults (no config file found)"
         effective_chat_model_for_logs = self.chat_model_override or self.task_model_map.get("chat", self.default_model)
+        LOGGER.info(f"✅ InferenceClient initialized {config_status}{env_override_note}")
         LOGGER.info(f"   Default model: {self.default_model}")
         LOGGER.info(f"   Chat model: {effective_chat_model_for_logs}")
         LOGGER.info(f"   Chat temp override ({TEMP_CHAT_MODEL_OVERRIDE_ENV}): {self.chat_model_temp_override or 'disabled'}")
         LOGGER.info(f"   Chat strict model lock: {self.chat_strict_model_only}")
+        LOGGER.info(f"   Global Qwen-only lock: {self.enforce_qwen_only}")
         LOGGER.info(f"   Verify solution model: {self.task_model_map.get('verify_solution', self.default_model)}")
         LOGGER.info(f"   Full task_model_map: {self.task_model_map}")
             "requests_error": 0,
             "retries_total": 0,
             "fallback_attempts": 0,
             "route_counts": {},
             "task_counts": {},
             "provider_counts": {},
             "status_code_counts": {},
         }
     def _bump_metric(self, key: str, inc: int = 1) -> None:
         with self._metrics_lock:
             current = self._metrics.get(key) or 0
             if not isinstance(current, int):
                 current = 0
             self._metrics[key] = current + inc
     def _bump_bucket(self, key: str, bucket: str, inc: int = 1) -> None:
         with self._metrics_lock:
             if not isinstance(current, int):
                 current = 0
             mapping[bucket] = current + inc
     def _record_attempt(self, *, task_type: str, provider: str, route: str, fallback_depth: int) -> None:
         self._bump_metric("requests_total", 1)
     def snapshot_metrics(self) -> Dict[str, Any]:
         with self._metrics_lock:
             snapshot = {
                 "uptime_sec": round(max(0.0, time.time() - self._metrics_started_at), 2),
                 "requests_total": self._metrics.get("requests_total") or 0,
                 "requests_error": self._metrics.get("requests_error") or 0,
                 "retries_total": self._metrics.get("retries_total") or 0,
                 "fallback_attempts": self._metrics.get("fallback_attempts") or 0,
                 "route_counts": dict(self._metrics.get("route_counts") or {}),
                 "task_counts": dict(self._metrics.get("task_counts") or {}),
                 "provider_counts": dict(self._metrics.get("provider_counts") or {}),
         effective_task = (req.task_type or "default").strip().lower()
         request_tag = req.request_tag.strip() or f"{effective_task}-{int(time.time() * 1000)}"
         selected_model, model_selection_source = self._resolve_primary_model(req)
         model_chain = self._model_chain_for_task(effective_task, selected_model)
         last_error: Optional[Exception] = None
+        provider_chain = self._provider_chain_for_task(req.task_type)
+        # Normalize model name (remove any provider suffix since we use hf_inference router)
+        model_base = selected_model.split(":")[0] if ":" in selected_model else selected_model
+        # Log model selection for debugging - confirm which model will actually be used
         LOGGER.info(
+            f"🎯 request_tag={request_tag} task={effective_task} source={model_selection_source} "
+            f"selected_model={model_base} (primary) provider_chain={provider_chain}"
         )
         LOGGER.info(f"   fallback_chain={model_chain[1:] if len(model_chain) > 1 else 'none'}")
         for fallback_depth, model_name in enumerate(model_chain):
             request_for_model = InferenceRequest(
                 messages=req.messages,
                 repetition_penalty=req.repetition_penalty,
                 timeout_sec=req.timeout_sec,
             )
+            for provider in provider_chain:
+                try:
+                    result = self._generate_with_provider(request_for_model, provider, fallback_depth)
+                    if fallback_depth > 0:
+                        LOGGER.info(f"✅ Fallback succeeded at depth={fallback_depth} model={model_name} provider={provider}")
+                    return result
+                except Exception as exc:
+                    last_error = exc
+                    fallback_hint = f" (depth {fallback_depth})" if fallback_depth > 0 else ""
+                    LOGGER.warning(
+                        f"⚠️  Attempt failed{fallback_hint}: task={request_for_model.task_type} "
+                        f"provider={provider} model={model_name} error={exc.__class__.__name__}: {str(exc)[:100]}"
+                    )
         if last_error:
             raise last_error
         effective_task = (req.task_type or "default").strip().lower()
         runtime_chat_override = self._runtime_chat_model_override()
+        def _base_model(model_name: str) -> str:
+            return (model_name or "").split(":", 1)[0].strip()
+        # Check explicit request model first, then chat override env, then task map/default.
         if effective_task == "chat" and runtime_chat_override:
             selected_model = runtime_chat_override
             model_selection_source = "chat_temp_override_env"
             selected_model = self.task_model_map.get(effective_task, self.default_model)
             model_selection_source = "task_map"
+        if self.enforce_qwen_only:
+            effective_qwen_lock_model = self.qwen_lock_model
             if effective_task == "chat":
+                effective_qwen_lock_model = runtime_chat_override or self.chat_model_override or self.qwen_lock_model
+            selected_base = _base_model(selected_model)
+            lock_base = _base_model(effective_qwen_lock_model)
             if selected_base != lock_base:
                 LOGGER.warning(
+                    f"⚠️ Qwen-only lock replaced requested model {selected_model} with {effective_qwen_lock_model}"
                 )
+            selected_model = effective_qwen_lock_model
+            model_selection_source = f"{model_selection_source}:qwen_only"
         if effective_task == "chat" and self.chat_strict_model_only:
             return selected_model, f"{model_selection_source}:chat_strict_model_only"
+        if effective_task == "chat" and self.chat_hard_trigger_enabled and self.chat_hard_model:
+            should_escalate, reason = self._should_escalate_chat_to_hard_model(req.messages)
+            if should_escalate and selected_model != self.chat_hard_model:
+                return self.chat_hard_model, f"chat_hard_escalation:{reason}"
         return selected_model, model_selection_source
+    def _should_escalate_chat_to_hard_model(self, messages: List[Dict[str, str]]) -> Tuple[bool, str]:
+        latest_user = self._latest_user_message(messages)
+        if not latest_user:
+            return False, "no_user_message"
+        latest_norm = latest_user.lower()
+        prompt_chars = len(latest_user)
+        history_chars = 0
+        for msg in messages:
+            content = (msg.get("content") or "") if isinstance(msg, dict) else ""
+            history_chars += len(content)
+        keyword_hit = ""
+        for kw in self.chat_hard_keywords:
+            if kw and kw in latest_norm:
+                keyword_hit = kw
+                break
+        math_marker_count = len(
+            re.findall(
+                r"(=|\bintegral\b|\bderivative\b|\bmatrix\b|\blimit\b|\bproof\b|\bderive\b|\bsolve\b)",
+                latest_norm,
+            )
+        )
+        long_prompt = prompt_chars >= self.chat_hard_prompt_chars
+        long_history = history_chars >= self.chat_hard_history_chars
+        immediate_hard_request = any(
+            phrase in latest_norm
+            for phrase in (
+                "show all steps",
+                "step-by-step",
+                "step by step",
+                "rigorous proof",
+                "formal proof",
+            )
+        )
+        # Escalate immediately for long step-by-step prompts or heavy math density.
+        escalate = bool(keyword_hit and immediate_hard_request)
+        if not escalate:
+            escalate = bool(keyword_hit and (long_prompt or long_history or math_marker_count >= 2))
+        if not escalate and long_prompt and math_marker_count >= 2:
+            escalate = True
+        if not escalate and long_history and math_marker_count >= 2:
+            escalate = True
+        if not escalate:
+            return False, "normal"
+        reasons: List[str] = []
+        if long_prompt:
+            reasons.append(f"prompt_chars={prompt_chars}")
+        if long_history:
+            reasons.append(f"history_chars={history_chars}")
+        if keyword_hit:
+            reasons.append(f"keyword={keyword_hit}")
+        if immediate_hard_request:
+            reasons.append("immediate_hard_request")
+        if math_marker_count >= 2:
+            reasons.append(f"math_markers={math_marker_count}")
+        return True, ",".join(reasons) if reasons else "hard_prompt"
     def _model_chain_for_task(self, task_type: str, selected_model: str) -> List[str]:
         normalized = (task_type or "default").strip().lower()
         runtime_chat_override = self._runtime_chat_model_override() if normalized == "chat" else ""
+        chat_qwen_lock_model = runtime_chat_override or (self.chat_model_override if normalized == "chat" else "")
+        if self.enforce_qwen_only:
             if normalized == "chat":
+                locked_model = (chat_qwen_lock_model or self.qwen_lock_model or "").strip()
             else:
+                locked_model = (self.qwen_lock_model or "").strip()
             return [locked_model] if locked_model else []
         if normalized == "chat" and self.chat_strict_model_only:
+            chat_model = (chat_qwen_lock_model or selected_model or "").strip()
             return [chat_model] if chat_model else []
         per_task_candidates = self.task_fallback_model_map.get(task_type, [])
             return deduped[:max_models]
         return deduped
+    def _provider_chain_for_task(self, task_type: str) -> List[str]:
+        normalized = (task_type or "default").strip().lower()
+        forced_provider = self.task_provider_map.get(normalized)
+        if forced_provider:
+            return [forced_provider]
+        if normalized in self.cpu_only_tasks:
+            return [self.cpu_provider]
+        if self.pro_enabled and normalized in self.pro_priority_tasks:
+            chain = [self.pro_provider]
+            if self.enable_provider_fallback and self.gpu_provider not in chain:
+                chain.append(self.gpu_provider)
+            if self.enable_provider_fallback and self.provider not in chain:
+                chain.append(self.provider)
+            return chain
+        if normalized in self.gpu_required_tasks:
+            chain = [self.gpu_provider]
+            if self.enable_provider_fallback and self.cpu_provider != self.gpu_provider:
+                chain.append(self.cpu_provider)
+            return chain
+        chain = [self.provider]
+        if self.enable_provider_fallback and self.cpu_provider not in chain:
+            chain.append(self.cpu_provider)
+        return chain
     def _retry_profile(self, task_type: str) -> Tuple[int, float]:
         normalized = (task_type or "default").strip().lower()
         if normalized in self.interactive_tasks:
             return self.interactive_timeout_sec
         return self.background_timeout_sec
+    def _resolve_route_label(self, provider: str, task_type: str) -> str:
+        normalized = (task_type or "default").strip().lower()
+        if self.pro_enabled and normalized in self.pro_priority_tasks and provider == self.pro_provider:
+            return "pro-priority"
+        return "standard"
+    def _generate_with_provider(self, req: InferenceRequest, provider: str, fallback_depth: int) -> str:
+        route = self._resolve_route_label(provider, req.task_type)
+        if provider == "local_space":
+            return self._call_local_space(req, provider=provider, route=route, fallback_depth=fallback_depth)
+        # All models use HF inference router directly (including Qwen/Qwen3-32B)
+        return self._call_hf_inference(req, provider=provider, route=route, fallback_depth=fallback_depth)
     def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
         parts: List[str] = []
         for msg in messages:
                 prefix = "SYSTEM"
             elif role == "assistant":
                 prefix = "ASSISTANT"
+            parts.append(f"{prefix}:\\n{content}")
         parts.append("ASSISTANT:")
+        return "\\n\\n".join(parts)
     def _latest_user_message(self, messages: List[Dict[str, str]]) -> str:
         for msg in reversed(messages):
                 return content
         return self._messages_to_prompt(messages)
+    def _post_with_retry(
+        self,
+        url: str,
+        *,
+        headers: Dict[str, str],
+        payload: Dict[str, object],
+        timeout: int,
+        provider: str,
+        model: str,
+        task_type: str,
+        request_tag: str,
+        fallback_depth: int,
+        route: str,
+    ) -> Tuple[requests.Response, float, int]:
+        self._record_attempt(
+            task_type=task_type,
+            provider=provider,
+            route=route,
+            fallback_depth=fallback_depth,
         )
         max_retries, backoff_sec = self._retry_profile(task_type)
+        attempt = 0
+        def _retry_sleep(retry_attempt: int) -> None:
+            # Small jitter reduces synchronized retry storms during transient provider issues.
+            jitter_factor = random.uniform(0.9, 1.2)
+            time.sleep(backoff_sec * retry_attempt * jitter_factor)
+        while True:
             start = time.perf_counter()
             try:
+                resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
+            except Exception as exc:
                 latency_ms = (time.perf_counter() - start) * 1000
                 log_model_call(
                     LOGGER,
+                    provider=provider,
+                    model=model,
+                    endpoint=url,
                     latency_ms=latency_ms,
                     input_tokens=None,
                     output_tokens=None,
+                    status="error",
+                    error_class=exc.__class__.__name__,
+                    error_message=str(exc),
                     task_type=task_type,
+                    request_tag=request_tag,
                     retry_attempt=attempt + 1,
                     fallback_depth=fallback_depth,
                     route=route,
                 )
+                if attempt >= max_retries - 1:
+                    self._bump_metric("requests_error", 1)
+                    raise
+                attempt += 1
+                self._bump_metric("retries_total", 1)
+                _retry_sleep(attempt)
+                continue
+            latency_ms = (time.perf_counter() - start) * 1000
+            if resp.status_code in {408, 429, 500, 502, 503, 504} and attempt < max_retries - 1:
                 log_model_call(
                     LOGGER,
+                    provider=provider,
+                    model=model,
+                    endpoint=url,
                     latency_ms=latency_ms,
                     input_tokens=None,
                     output_tokens=None,
                     status="error",
+                    error_class="HTTPRetry",
+                    error_message=f"status={resp.status_code}",
                     task_type=task_type,
+                    request_tag=request_tag,
                     retry_attempt=attempt + 1,
                     fallback_depth=fallback_depth,
                     route=route,
                 )
+                attempt += 1
+                self._bump_metric("retries_total", 1)
+                _retry_sleep(attempt)
+                continue
+            return resp, latency_ms, attempt + 1
+    def _call_hf_inference_direct(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        """
+        Call Qwen models via Featherless AI provider.
+        Uses HF InferenceClient with provider="featherless-ai" for direct model access.
+        """
+        if not self.hf_token:
+            raise RuntimeError("HF_TOKEN is not set")
         target_model = req.model or self.default_model
+        target_model_base = target_model.split(":")[0] if ":" in target_model else target_model
         timeout = self._timeout_for(req, provider)
         start = time.perf_counter()
         try:
+            # Use HF InferenceClient with featherless-ai provider for Qwen models.
+            client = HFInferenceClient(
+                model=target_model_base,
+                token=self.hf_token,
+                provider="featherless-ai",
+                timeout=timeout
+            )
+            response = client.chat_completion(
+                messages=req.messages,
+                max_tokens=req.max_new_tokens or self.default_max_new_tokens,
+                temperature=req.temperature or self.default_temperature,
+                top_p=req.top_p or self.default_top_p,
+            )
+            latency_ms = (time.perf_counter() - start) * 1000
+            # Extract text from response
+            if hasattr(response, "choices") and response.choices:
+                content = response.choices[0].message.content or ""
+                text = content.strip()
+            else:
+                text = self._extract_text(response)
+            log_model_call(
+                LOGGER,
+                provider="featherless-ai",
+                model=target_model_base,
+                endpoint="featherless-ai_inference",
+                latency_ms=latency_ms,
+                input_tokens=None,
+                output_tokens=None,
+                status="ok",
+                task_type=req.task_type,
+                request_tag=req.request_tag,
+                retry_attempt=1,
+                fallback_depth=fallback_depth,
+                route=route,
+            )
+            self._record_attempt(
+                task_type=req.task_type,
+                provider="featherless-ai",
+                route=route,
+                fallback_depth=fallback_depth,
+            )
+            self._bump_metric("requests_ok", 1)
+            return text
         except Exception as exc:
             latency_ms = (time.perf_counter() - start) * 1000
+            self._bump_metric("requests_error", 1)
             log_model_call(
                 LOGGER,
+                provider="featherless-ai",
+                model=target_model_base,
+                endpoint="featherless-ai_inference",
                 latency_ms=latency_ms,
                 input_tokens=None,
                 output_tokens=None,
                 fallback_depth=fallback_depth,
                 route=route,
             )
+            LOGGER.warning(
+                "task=%s provider=featherless-ai model=%s fallback_depth=%s failed: %s",
+                req.task_type,
+                target_model_base,
+                fallback_depth,
+                exc,
+            )
             raise
+    def _call_hf_inference(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        if not self.hf_token:
+            raise RuntimeError("HF_TOKEN is not set")
+        target_model = req.model or self.default_model
+        chat_model = target_model if ":" in target_model else f"{target_model}:fastest"
+        url = self.hf_chat_url
+        # Log which model is actually being used
+        model_base = target_model.split(":")[0] if ":" in target_model else target_model
+        LOGGER.debug(
+            f"📌 Calling HF inference: task={req.task_type} model={model_base} "
+            f"route={route} depth={fallback_depth}"
+        )
+        payload: Dict[str, object] = {
+            "model": chat_model,
+            "messages": req.messages,
+            "stream": False,
+            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
+            "temperature": req.temperature,
+            "top_p": req.top_p,
+        }
+        headers = {
+            "Authorization": f"Bearer {self.hf_token}",
+            "Content-Type": "application/json",
+            "X-MathPulse-Task": (req.task_type or "default").strip().lower(),
+        }
+        if route == "pro-priority" and self.pro_route_header_name.strip():
+            headers[self.pro_route_header_name.strip()] = self.pro_route_header_value
+        timeout = self._timeout_for(req, provider)
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_bucket("status_code_counts", str(resp.status_code), 1)
+        if resp.status_code != 200:
+            self._bump_metric("requests_error", 1)
+            raise RuntimeError(f"HF Inference error {resp.status_code}: {resp.text}")
+        data = resp.json()
+        text = self._extract_text(data)
+        # Log successful inference with actual model and response time
+        LOGGER.info(
+            f"✅ HF inference success: task={req.task_type} model={model_base} "
+            f"latency={latency_ms:.0f}ms tokens_out={len(text.split())}"
+        )
+        log_model_call(
+            LOGGER,
+            provider=provider,
+            model=target_model,
+            endpoint=url,
+            latency_ms=latency_ms,
+            input_tokens=None,
+            output_tokens=None,
+            status="ok",
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            retry_attempt=retry_attempt,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_metric("requests_ok", 1)
+        return text
+    def _call_featherless(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        if not self.featherless_api_key:
+            raise RuntimeError("FEATHERLESS_API_KEY is not set")
+        target_model = req.model or self.default_model
+        url = self.featherless_chat_url
+        payload: Dict[str, object] = {
+            "model": target_model,
+            "messages": req.messages,
+            "stream": False,
+            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
+            "temperature": req.temperature,
+            "top_p": req.top_p,
+        }
+        headers = {
+            "Authorization": f"Bearer {self.featherless_api_key}",
+            "Content-Type": "application/json",
+            "X-MathPulse-Task": (req.task_type or "default").strip().lower(),
+        }
+        timeout = self._timeout_for(req, provider)
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_bucket("status_code_counts", str(resp.status_code), 1)
+        if resp.status_code != 200:
+            self._bump_metric("requests_error", 1)
+            raise RuntimeError(f"Featherless API error {resp.status_code}: {resp.text}")
+        data = resp.json()
+        text = self._extract_text(data)
+        log_model_call(
+            LOGGER,
+            provider=provider,
+            model=target_model,
+            endpoint=url,
+            latency_ms=latency_ms,
+            input_tokens=None,
+            output_tokens=None,
+            status="ok",
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            retry_attempt=retry_attempt,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_metric("requests_ok", 1)
+        return text
+    def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        target_model = req.model or self.default_model
+        url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
+        prompt = self._messages_to_prompt(req.messages)
+        payload: Dict[str, object] = {
+            "data": [
+                prompt,
+                [],
+                req.temperature,
+                req.top_p,
+                req.max_new_tokens,
+            ]
+        }
+        headers = {"Content-Type": "application/json"}
+        timeout = self._timeout_for(req, provider)
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
         self._bump_bucket("status_code_counts", str(resp.status_code), 1)
         if resp.status_code != 200:
             status="ok",
             task_type=req.task_type,
             request_tag=req.request_tag,
+            retry_attempt=retry_attempt,
             fallback_depth=fallback_depth,
             route=route,
         )
     def _clean_response_text(self, text: str) -> str:
         """Strip JSON braces, template artifacts, and whitespace from response text."""
+        # Strip leading/trailing whitespace
         text = text.strip()
+        # Remove wrapping JSON braces or artifact markers
         if text.startswith("{") and text.endswith("}"):
             try:
+                # Try to parse as JSON - if it fails, return as-is
                 parsed = json.loads(text)
+                # If it's a dict with a "content" or "text" field, use that
                 if isinstance(parsed, dict):
                     if "content" in parsed:
                         text = str(parsed["content"]).strip()
                     elif "text" in parsed:
                         text = str(parsed["text"]).strip()
             except json.JSONDecodeError:
+                # Not valid JSON, just clean up braces
                 text = text.strip("{}")
+        # Remove any trailing artifact markers
         if text.startswith("```json") or text.startswith("```"):
             text = re.sub(r"^```(?:json)?", "", text).strip()
         if text.endswith("```"):
             text = text[:-3].strip()
         return text.strip()
+def create_default_client() -> InferenceClient:
+    return InferenceClient()

services/question_bank_service.py DELETED Viewed

@@ -1,123 +0,0 @@
-"""
-Question Bank Service for Quiz Battle.
-Handles querying the question bank with random ordering,
-caching session questions, and 24-hour debounce for variance results.
-"""
-import os
-import random
-from datetime import datetime, timezone, timedelta
-from typing import List, Dict, Optional
-from google.cloud import firestore
-DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
-def _get_db() -> firestore.Client:
-    """Get Firestore client."""
-    return firestore.Client(project=DEFAULT_FIREBASE_PROJECT)
-async def get_questions_for_battle(
-    grade_level: int,
-    topic: str,
-    count: int = 10,
-) -> List[Dict]:
-    """
-    Fetch random questions from the question bank for a battle session.
-    Uses Firestore random_seed field for pseudo-random ordering.
-    If fewer than `count` questions exist, returns all available.
-    """
-    db = _get_db()
-    collection_path = f"question_bank/{grade_level}/{topic}/questions"
-    collection_ref = db.collection(collection_path)
-    # Pseudo-random query using random_seed >= random threshold
-    threshold = random.random()
-    query = (
-        collection_ref
-        .where("random_seed", ">=", threshold)
-        .order_by("random_seed")
-        .limit(count)
-    )
-    docs = list(query.stream())
-    # If we didn't get enough, query from the start to fill shortfall
-    if len(docs) < count:
-        remaining = count - len(docs)
-        fallback_query = (
-            collection_ref
-            .where("random_seed", "<", threshold)
-            .order_by("random_seed")
-            .limit(remaining)
-        )
-        docs.extend(list(fallback_query.stream()))
-    questions = [doc.to_dict() for doc in docs]
-    # Ensure all required fields are present
-    valid_questions = []
-    for q in questions:
-        if q and all(k in q for k in ("question", "choices", "correct_answer", "difficulty")):
-            valid_questions.append(q)
-    return valid_questions
-async def cache_session_questions(
-    session_id: str,
-    questions: List[Dict],
-    player_ids: List[str],
-    grade_level: int,
-    topic: str,
-) -> None:
-    """Cache varied questions for a battle session with 24-hour TTL."""
-    db = _get_db()
-    session_ref = db.collection("quiz_battle_sessions").document(session_id)
-    session_ref.set({
-        "player_ids": player_ids,
-        "grade_level": grade_level,
-        "topic": topic,
-        "created_at": firestore.SERVER_TIMESTAMP,
-        "variance_cached_until": datetime.now(timezone.utc) + timedelta(hours=24),
-    })
-    # Write questions to subcollection
-    batch = db.batch()
-    for idx, q in enumerate(questions):
-        q_ref = session_ref.collection("questions").document(str(idx))
-        batch.set(q_ref, q)
-    batch.commit()
-async def get_cached_session(session_id: str) -> Optional[List[Dict]]:
-    """
-    Check if a session has cached varied questions within 24 hours.
-    Returns the cached questions if valid, otherwise None.
-    """
-    db = _get_db()
-    session_doc = db.collection("quiz_battle_sessions").document(session_id).get()
-    if not session_doc.exists:
-        return None
-    data = session_doc.to_dict()
-    cached_until = data.get("variance_cached_until")
-    if cached_until:
-        if isinstance(cached_until, datetime):
-            if cached_until.tzinfo is None:
-                cached_until = cached_until.replace(tzinfo=timezone.utc)
-        elif hasattr(cached_until, 'timestamp'):
-            # Firestore Timestamp object
-            cached_until = datetime.fromtimestamp(cached_until.timestamp(), tz=timezone.utc)
-        if cached_until > datetime.now(timezone.utc):
-            # Return cached questions
-            q_docs = db.collection("quiz_battle_sessions").document(session_id).collection("questions").stream()
-            questions = [doc.to_dict() for doc in q_docs]
-            return questions if questions else None
-    return None

services/user_provisioning_service.py CHANGED Viewed

@@ -185,6 +185,7 @@ class UserProvisioningService:
                     "level": 1,
                     "currentXP": 0,
                     "totalXP": 0,
                     "atRiskSubjects": [],
                     "hasTakenDiagnostic": False,
                 }

                     "level": 1,
                     "currentXP": 0,
                     "totalXP": 0,
+                    "streak": 0,
                     "atRiskSubjects": [],
                     "hasTakenDiagnostic": False,
                 }

services/variance_engine.py DELETED Viewed

@@ -1,115 +0,0 @@
-"""
-Variance Engine for Quiz Battle Questions.
-Applies per-session variance techniques via DeepSeek,
-with pure-Python fallback for choice shuffling.
-"""
-import json
-import random
-import re
-from typing import List, Dict
-from services.ai_client import get_deepseek_client, CHAT_MODEL
-from services.question_bank_service import get_cached_session, cache_session_questions
-def _fallback_shuffle(questions: List[Dict], seed: int) -> List[Dict]:
-    """
-    Pure-Python fallback: shuffle choices deterministically.
-    """
-    rng = random.Random(seed)
-    for q in questions:
-        choices = q["choices"].copy()
-        correct_letter = q["correct_answer"]
-        correct_index = ord(correct_letter) - ord("A")
-        correct_text = choices[correct_index]
-        rng.shuffle(choices)
-        q["choices"] = choices
-        q["correct_answer"] = chr(ord("A") + choices.index(correct_text))
-        q["variance_applied"] = ["choice_shuffle"]
-    return questions
-async def apply_variance(questions: List[Dict], session_id: str) -> List[Dict]:
-    """
-    Apply per-session variance to a list of questions.
-    1. Check 24h Firestore cache first
-    2. Call DeepSeek with variance prompt
-    3. Parse JSON response
-    4. Fall back to pure-Python shuffle if DeepSeek fails
-    5. Cache result for 24 hours
-    """
-    # 1. Check cache
-    cached = await get_cached_session(session_id)
-    if cached:
-        return cached
-    # 2. Generate deterministic seed from session_id
-    seed = hash(session_id) % (2**32)
-    # 3. Call DeepSeek
-    client = get_deepseek_client()
-    system_prompt = (
-        "You are a math quiz variance engine for MathPulse AI, an educational platform for "
-        "Filipino high school students following the DepEd K-12 curriculum. "
-        "Your job is to make quiz questions feel fresh each session WITHOUT changing the "
-        "correct answer or difficulty level."
-    )
-    user_prompt = f"""Given these {len(questions)} quiz battle questions as JSON:
-{json.dumps(questions, indent=2)}
-Apply the following variance techniques. Use session_seed={seed} for deterministic but varied output:
-PARAPHRASE (30% chance per question): Reword the question stem using different phrasing, synonyms, or sentence structure. Do NOT change the math or the answer.
-CHOICE SHUFFLE (always): Randomize the order of answer choices A/B/C/D. Update "correct_answer" to reflect the new position.
-DISTRACTOR REFRESH (20% chance per question): Replace 1-2 wrong choices with new plausible-but-incorrect distractors that represent common student misconceptions for this topic. Keep the correct answer unchanged.
-CONTEXT SWAP (10% chance per question): Replace real-world context variables (names, objects, currencies) with Filipino-localized equivalents (e.g., "pesos", "jeepney", "barangay") to increase cultural relevance.
-NUMERIC SCALING (10% chance, only for computation problems): Scale numbers by a small integer factor (2x or 3x) so the method remains the same but the answer changes. Recompute the correct answer and all distractors accordingly.
-Return the full modified questions array as valid JSON only. Keep all original fields.
-Add a "variance_applied": ["paraphrase", "distractor_refresh", ...] field per question.
-Do NOT change "topic", "difficulty", "grade_level", or "source_chunk_id"."""
-    try:
-        response = client.chat.completions.create(
-            model=CHAT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ],
-            temperature=0.5,
-            max_tokens=4000,
-        )
-        content = response.choices[0].message.content.strip()
-        # Strip markdown code fences
-        content = re.sub(r"^```json\s*", "", content)
-        content = re.sub(r"\s*```$", "", content)
-        varied_questions = json.loads(content)
-        if not isinstance(varied_questions, list) or len(varied_questions) != len(questions):
-            raise ValueError("Invalid response format from DeepSeek")
-        # Validate required fields
-        for q in varied_questions:
-            if not all(k in q for k in ("question", "choices", "correct_answer", "variance_applied")):
-                raise ValueError("Missing required fields in varied question")
-    except Exception as e:
-        print(f"[variance_engine] DeepSeek variance failed, falling back to shuffle: {e}")
-        varied_questions = _fallback_shuffle(questions, seed)
-    # 4. Cache for 24 hours
-    # Extract player_ids, grade_level, topic from original questions if available
-    player_ids = []
-    grade_level = questions[0].get("grade_level", 11) if questions else 11
-    topic = questions[0].get("topic", "general_mathematics") if questions else "general_mathematics"
-    await cache_session_questions(session_id, varied_questions, player_ids, grade_level, topic)
-    return varied_questions

services/youtube_service.py DELETED Viewed

@@ -1,1017 +0,0 @@
-"""
-Smart YouTube Video Search Service for MathPulse AI.
-Uses YouTube Data API v3 (googleapiclient.discovery) to find relevant
-educational math videos, enriched with RAG curriculum context and DeepSeek
-query generation for contextual fallback when exact matches don't exist.
-Results are cached in Firestore video_cache/{lessonId} with 7-day TTL.
-"""
-from __future__ import annotations
-import hashlib
-import json
-import logging
-import os
-import re
-from datetime import datetime, timezone
-from typing import Dict, List, Optional
-logger = logging.getLogger("mathpulse.youtube")
-YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "").strip()
-# Known educational channel keywords and exact names for post-filtering
-_EDUCATIONAL_CHANNEL_KEYWORDS = [
-    "khan", "math", "academy", "education", "teacher", "professor",
-    "tutorial", "lesson", "school", "university", "college", "deped",
-    "philippines", "filipino", "pinoy", "stem", "learning", "study",
-    "organic chemistry tutor", "patrickjmt", "3blue1brown", "numberphile",
-    "math antics", "bright side", "crashcourse", "ted-ed", "ted ed",
-    "nancy pi", "professor leonard", "mit", "stanford", "harvard",
-    "mashup math", "mathcoach", "mathologer", "stand-up maths",
-    "eddie woo", "black pen red pen", "michel van biezen", "brian mclogan",
-    "mathbff", "krista king", "mathMeeting", "mathbyfives", "yourteacher",
-    "virtual nerd", "study.com", "coursera", "edx", "brilliant",
-    "filipino math", "tagalog math", "pinoy teacher", "math philippines",
-    "shs math", "senior high school math", "grade 11 math", "grade 12 math",
-    "general mathematics", "business math", "statistics", "probability",
-    "finite math", "precalculus", "calculus", "algebra", "geometry",
-    "trigonometry", "functions", "equations", "problem solving",
-]
-_EDUCATIONAL_CHANNEL_EXACT = {
-    "khan academy", "patrickjmt", "3blue1brown", "numberphile",
-    "math antics", "the organic chemistry tutor", "professor leonard",
-    "nancy pi", "ted-ed", "crashcourse", "bright side",
-    "mit opencourseware", "stanford", "harvard", "mashup math",
-    "mathcoach", "mathologer", "stand-up maths", "eddie woo",
-    "black pen red pen", "michel van biezen", "brian mclogan",
-    "mathbff", "krista king", "mathmeeting", "mathbyfives", "yourteacher",
-    "virtual nerd", "study.com", "coursera", "brilliant.org",
-}
-# Duration filters
-_MIN_DURATION_SECONDS = 120   # 2 minutes (allow shorter tutorials)
-_MAX_DURATION_SECONDS = 3600  # 60 minutes
-_TARGET_MIN_SECONDS = 300     # 5 minutes (ideal)
-_TARGET_MAX_SECONDS = 1200    # 20 minutes (ideal)
-# Cache TTL in seconds (7 days)
-_CACHE_TTL_SECONDS = 7 * 24 * 60 * 60
-# Guaranteed fallback videos by subject — these are well-known educational videos
-# that are extremely likely to exist and be relevant. Used as nuclear option
-# when YouTube API returns nothing for all search strategies.
-_GUARANTEED_FALLBACK_VIDEOS = {
-    "default": [
-        {
-            "videoId": "p6j8HhfJ5Mc",
-            "title": "The Essence of Calculus",
-            "channelTitle": "3Blue1Brown",
-            "thumbnailUrl": "https://img.youtube.com/vi/p6j8HhfJ5Mc/hqdefault.jpg",
-            "durationSeconds": 1024,
-            "description": "A beautiful introduction to calculus concepts.",
-        },
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-    ],
-    "general mathematics": [
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-        {
-            "videoId": "5I_1G5CNA5E",
-            "title": "Functions and Their Graphs",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/5I_1G5CNA5E/hqdefault.jpg",
-            "durationSeconds": 685,
-            "description": "Understanding functions, domain, range, and graphing.",
-        },
-    ],
-    "business math": [
-        {
-            "videoId": "Dc2V7_ur_yY",
-            "title": "Simple Interest and Compound Interest",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/Dc2V7_ur_yY/hqdefault.jpg",
-            "durationSeconds": 780,
-            "description": "Understanding interest calculations for business applications.",
-        },
-        {
-            "videoId": "BFGj4mkHbHc",
-            "title": "Business Mathematics Tutorial",
-            "channelTitle": "Math Meeting",
-            "thumbnailUrl": "https://img.youtube.com/vi/BFGj4mkHbHc/hqdefault.jpg",
-            "durationSeconds": 890,
-            "description": "Essential business math concepts and problem solving.",
-        },
-    ],
-    "statistics": [
-        {
-            "videoId": "qBigTkBLU6g",
-            "title": "Statistics Intro: Mean, Median, and Mode",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/qBigTkBLU6g/hqdefault.jpg",
-            "durationSeconds": 512,
-            "description": "Introduction to measures of central tendency.",
-        },
-        {
-            "videoId": "oXdM3XVCzIM",
-            "title": "Standard Deviation Explained",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/oXdM3XVCzIM/hqdefault.jpg",
-            "durationSeconds": 635,
-            "description": "Understanding variance and standard deviation.",
-        },
-    ],
-    "probability": [
-        {
-            "videoId": "uzkc-qNVoOk",
-            "title": "Probability Explained",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/uzkc-qNVoOk/hqdefault.jpg",
-            "durationSeconds": 480,
-            "description": "Introduction to probability concepts and calculations.",
-        },
-        {
-            "videoId": "SkidyvDkNYQ",
-            "title": "Probability of Independent Events",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/SkidyvDkNYQ/hqdefault.jpg",
-            "durationSeconds": 520,
-            "description": "Calculating probabilities for independent and dependent events.",
-        },
-    ],
-    "finite math": [
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-        {
-            "videoId": "5I_1G5CNA5E",
-            "title": "Functions and Their Graphs",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/5I_1G5CNA5E/hqdefault.jpg",
-            "durationSeconds": 685,
-            "description": "Understanding functions, domain, range, and graphing.",
-        },
-    ],
-    "calculus": [
-        {
-            "videoId": "p6j8HhfJ5Mc",
-            "title": "The Essence of Calculus",
-            "channelTitle": "3Blue1Brown",
-            "thumbnailUrl": "https://img.youtube.com/vi/p6j8HhfJ5Mc/hqdefault.jpg",
-            "durationSeconds": 1024,
-            "description": "A beautiful introduction to calculus concepts.",
-        },
-        {
-            "videoId": "WUvTyaaNkzM",
-            "title": "Limits and Continuity",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/WUvTyaaNkzM/hqdefault.jpg",
-            "durationSeconds": 780,
-            "description": "Understanding limits and continuity in calculus.",
-        },
-    ],
-    "algebra": [
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-        {
-            "videoId": "5I_1G5CNA5E",
-            "title": "Functions and Their Graphs",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/5I_1G5CNA5E/hqdefault.jpg",
-            "durationSeconds": 685,
-            "description": "Understanding functions, domain, range, and graphing.",
-        },
-    ],
-    "geometry": [
-        {
-            "videoId": "302eJ3TzJQU",
-            "title": "Geometry Introduction",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/302eJ3TzJQU/hqdefault.jpg",
-            "durationSeconds": 540,
-            "description": "Basic geometry concepts and terminology.",
-        },
-        {
-            "videoId": "Jn0YxbqEjHk",
-            "title": "Trigonometry Introduction",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/Jn0YxbqEjHk/hqdefault.jpg",
-            "durationSeconds": 680,
-            "description": "Introduction to trigonometric functions and identities.",
-        },
-    ],
-    "trigonometry": [
-        {
-            "videoId": "Jn0YxbqEjHk",
-            "title": "Trigonometry Introduction",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/Jn0YxbqEjHk/hqdefault.jpg",
-            "durationSeconds": 680,
-            "description": "Introduction to trigonometric functions and identities.",
-        },
-        {
-            "videoId": "PUB0TaZ7bhA",
-            "title": "Unit Circle Definition of Trig Functions",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/PUB0TaZ7bhA/hqdefault.jpg",
-            "durationSeconds": 590,
-            "description": "Understanding sine and cosine on the unit circle.",
-        },
-    ],
-}
-def _get_guaranteed_fallback_videos(subject: str = "", max_results: int = 3) -> List[Dict]:
-    """Return guaranteed fallback videos when YouTube API returns nothing."""
-    subject_lower = subject.lower().strip()
-    # Try exact subject match
-    if subject_lower in _GUARANTEED_FALLBACK_VIDEOS:
-        videos = _GUARANTEED_FALLBACK_VIDEOS[subject_lower]
-    else:
-        # Try partial match
-        matched = False
-        for key, videos_list in _GUARANTEED_FALLBACK_VIDEOS.items():
-            if key != "default" and (key in subject_lower or subject_lower in key):
-                videos = videos_list
-                matched = True
-                break
-        if not matched:
-            videos = _GUARANTEED_FALLBACK_VIDEOS["default"]
-    return videos[:max_results]
-def _build_youtube_client():
-    """Lazy-init googleapiclient YouTube client. Returns None if no API key."""
-    if not YOUTUBE_API_KEY:
-        return None
-    try:
-        from googleapiclient.discovery import build
-        return build("youtube", "v3", developerKey=YOUTUBE_API_KEY, cache_discovery=False)
-    except Exception as exc:
-        logger.warning("Failed to build YouTube client: %s", exc)
-        return None
-def _parse_iso8601_duration(duration: str) -> int:
-    """Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
-    if not duration:
-        return 0
-    hours_match = re.search(r"(\d+)H", duration)
-    minutes_match = re.search(r"(\d+)M", duration)
-    seconds_match = re.search(r"(\d+)S", duration)
-    hours = int(hours_match.group(1)) if hours_match else 0
-    minutes = int(minutes_match.group(1)) if minutes_match else 0
-    seconds = int(seconds_match.group(1)) if seconds_match else 0
-    return hours * 3600 + minutes * 60 + seconds
-def _is_educational_channel(channel_title: str) -> bool:
-    """Check if a channel appears to be educational."""
-    lowered = channel_title.lower().strip()
-    if lowered in _EDUCATIONAL_CHANNEL_EXACT:
-        return True
-    return any(kw in lowered for kw in _EDUCATIONAL_CHANNEL_KEYWORDS)
-def _score_video_result(item: dict, query: str, topic: str, subject: str) -> float:
-    """Score a video result for relevance. Higher is better."""
-    score = 0.0
-    title = (item.get("title") or "").lower()
-    description = (item.get("description") or "").lower()
-    channel = (item.get("channelTitle") or "").lower()
-    query_lower = query.lower()
-    topic_lower = topic.lower()
-    subject_lower = subject.lower() if subject else ""
-    # Topic relevance (highest weight)
-    topic_words = [w for w in topic_lower.split() if len(w) > 2]
-    for word in topic_words:
-        if word in title:
-            score += 4.0
-        if word in description:
-            score += 1.5
-    # Subject relevance
-    if subject_lower:
-        subject_words = [w for w in subject_lower.split() if len(w) > 2]
-        for word in subject_words:
-            if word in title:
-                score += 2.0
-            if word in description:
-                score += 0.5
-    # Query terms appear in title
-    for word in query_lower.split():
-        if len(word) > 2 and word in title:
-            score += 1.0
-    # Educational channel bonus
-    if _is_educational_channel(channel):
-        score += 3.0
-    # Math/education terms in title
-    math_terms = ["tutorial", "lesson", "explain", "math", "mathematics",
-                  "solution", "problem", "example", "learn", "how to",
-                  "introduction", "basics", "overview", "guide"]
-    for term in math_terms:
-        if term in title:
-            score += 1.5
-    # Duration scoring
-    duration = item.get("durationSeconds", 0)
-    if _TARGET_MIN_SECONDS <= duration <= _TARGET_MAX_SECONDS:
-        score += 2.0
-    elif _MIN_DURATION_SECONDS <= duration <= _MAX_DURATION_SECONDS:
-        score += 1.0
-    elif duration > 0:
-        score += 0.3  # Still count very short/long videos, just less
-    return score
-def _extract_meaningful_keywords(chunks: List[dict]) -> List[str]:
-    """Extract meaningful keywords from curriculum chunks."""
-    keywords: List[str] = []
-    for chunk in chunks[:3]:
-        content = str(chunk.get("content", "")).strip()
-        if not content:
-            continue
-        # Split into sentences and take first few
-        sentences = content.split('.')[:2]
-        for sentence in sentences:
-            # Extract important words (nouns, concepts) - heuristic approach
-            words = re.findall(r'\b[A-Za-z][a-z]{3,}\b', sentence)
-            # Filter out common stop words
-            stop_words = {
-                'this', 'that', 'with', 'from', 'they', 'have', 'will',
-                'would', 'there', 'their', 'what', 'said', 'each',
-                'which', 'about', 'could', 'other', 'after', 'first',
-                'these', 'think', 'where', 'being', 'every', 'great',
-                'might', 'shall', 'while', 'through', 'during', 'before',
-                'between', 'among', 'within', 'without', 'against',
-                'students', 'student', 'learning', 'learn', 'understand',
-                'objective', 'objectives', 'competency', 'competencies',
-            }
-            meaningful = [w.lower() for w in words if w.lower() not in stop_words]
-            keywords.extend(meaningful[:8])
-    # Deduplicate while preserving order
-    seen = set()
-    unique = []
-    for kw in keywords:
-        if kw not in seen and len(kw) > 3:
-            seen.add(kw)
-            unique.append(kw)
-    return unique[:12]
-def _enrich_query_with_rag(topic: str, subject: str, lesson_context: str = "") -> str:
-    """
-    Query the RAG vectorstore to extract curriculum keywords and enrich
-    the YouTube search query for higher relevance.
-    """
-    enriched = topic
-    if subject:
-        enriched = f"{enriched} {subject}"
-    if lesson_context:
-        # Only add lesson context if it's not too similar to topic
-        if lesson_context.lower() not in topic.lower():
-            enriched = f"{enriched} {lesson_context}"
-    try:
-        from rag.curriculum_rag import retrieve_curriculum_context
-        chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject if subject else None,
-            top_k=5,
-        )
-        if chunks:
-            keywords = _extract_meaningful_keywords(chunks)
-            if keywords:
-                keyword_str = " ".join(keywords[:10])
-                enriched = f"{enriched} {keyword_str}"
-    except Exception as exc:
-        logger.debug("RAG enrichment skipped: %s", exc)
-    # Append standard DepEd/Philippines math context
-    enriched = f"{enriched} DepEd Philippines mathematics tutorial"
-    return enriched[:300]
-def _generate_search_queries_with_ai(
-    topic: str,
-    subject: str,
-    lesson_context: str,
-    grade_level: str,
-) -> List[str]:
-    """
-    Use DeepSeek to generate multiple targeted YouTube search queries.
-    Falls back to heuristic queries if AI is unavailable.
-    Returns a list of queries ordered from most specific to most general.
-    """
-    try:
-        from services.inference_client import InferenceRequest, create_default_client
-        prompt = (
-            f"You are helping find educational YouTube videos for a Filipino senior high school math lesson.\n"
-            f"Topic: {topic}\n"
-            f"Subject: {subject}\n"
-            f"Context: {lesson_context or 'General mathematics lesson'}\n"
-            f"Grade: {grade_level or 'Grade 11-12'}\n\n"
-            f"Generate exactly 4 YouTube search queries that would find the most relevant educational videos.\n"
-            f"Rules:\n"
-            f"1. Query 1: Most specific - exact topic with 'tutorial' or 'lesson'\n"
-            f"2. Query 2: Slightly broader - related concepts or prerequisite topics\n"
-            f"3. Query 3: Even broader - the general subject area with key concepts\n"
-            f"4. Query 4: Last resort - basic subject + 'introduction' or 'basics'\n"
-            f"5. Each query should be 3-8 words\n"
-            f"6. Use terms that real educational channels would use\n"
-            f"7. If the exact topic is very specific/niche, include related more common topics\n\n"
-            f"Return ONLY a JSON array of 4 strings, nothing else:\n"
-            f'["query1", "query2", "query3", "query4"]'
-        )
-        client = create_default_client()
-        request = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You generate YouTube search queries. Return only JSON arrays."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="lesson_generation",
-            max_new_tokens=200,
-            temperature=0.3,
-            top_p=0.9,
-        )
-        response = client.generate_from_messages(request)
-        # Parse JSON array from response
-        text = response.strip()
-        # Try to find JSON array
-        match = re.search(r'\[.*\]', text, re.DOTALL)
-        if match:
-            queries = json.loads(match.group())
-            if isinstance(queries, list) and len(queries) >= 2:
-                # Validate and clean queries
-                cleaned = []
-                for q in queries:
-                    if isinstance(q, str) and len(q.strip()) > 3:
-                        cleaned.append(q.strip()[:200])
-                if len(cleaned) >= 2:
-                    logger.info("AI generated %d search queries", len(cleaned))
-                    return cleaned
-    except Exception as exc:
-        logger.debug("AI query generation failed, using fallback: %s", exc)
-    # Fallback heuristic queries
-    return _generate_fallback_queries(topic, subject, lesson_context)
-def _generate_fallback_queries(topic: str, subject: str, lesson_context: str) -> List[str]:
-    """Generate fallback search queries when AI is unavailable."""
-    queries = [
-        f"{topic} {subject} tutorial lesson",
-        f"{topic} mathematics explained",
-        f"{subject} {topic} how to",
-    ]
-    # Add broader queries
-    if lesson_context and lesson_context.lower() not in topic.lower():
-        queries.insert(1, f"{lesson_context} tutorial")
-    # Extract core concept from topic (e.g., "quadratic equations" -> "quadratic")
-    core_words = [w for w in topic.split() if len(w) > 3]
-    if core_words:
-        core = core_words[0]
-        queries.append(f"{core} math lesson introduction")
-    # Add subject-level query as last resort
-    queries.append(f"{subject} basics tutorial")
-    # Remove duplicates while preserving order
-    seen = set()
-    unique = []
-    for q in queries:
-        if q.lower() not in seen:
-            seen.add(q.lower())
-            unique.append(q)
-    return unique[:5]
-def _find_related_topics_with_ai(topic: str, subject: str) -> List[str]:
-    """
-    When exact topic has no videos, ask DeepSeek for related/similar topics
-    that are more likely to have educational video content.
-    """
-    try:
-        from services.inference_client import InferenceRequest, create_default_client
-        prompt = (
-            f"The topic '{topic}' in {subject} has very few or no YouTube videos.\n"
-            f"Suggest 3 related, more commonly taught topics that would have educational videos.\n"
-            f"These should cover similar or prerequisite concepts.\n"
-            f"Return ONLY a JSON array of 3 short topic phrases (2-4 words each).\n"
-            f'["topic1", "topic2", "topic3"]'
-        )
-        client = create_default_client()
-        request = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You suggest related math topics. Return only JSON arrays."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="lesson_generation",
-            max_new_tokens=150,
-            temperature=0.4,
-            top_p=0.9,
-        )
-        response = client.generate_from_messages(request)
-        text = response.strip()
-        match = re.search(r'\[.*\]', text, re.DOTALL)
-        if match:
-            topics = json.loads(match.group())
-            if isinstance(topics, list):
-                cleaned = [t.strip()[:100] for t in topics if isinstance(t, str) and len(t.strip()) > 2]
-                if cleaned:
-                    logger.info("AI suggested %d related topics for '%s'", len(cleaned), topic)
-                    return cleaned
-    except Exception as exc:
-        logger.debug("AI related topics failed: %s", exc)
-    # Fallback: generate simple related topics
-    return _generate_fallback_related_topics(topic, subject)
-def _generate_fallback_related_topics(topic: str, subject: str) -> List[str]:
-    """Generate simple related topic fallbacks."""
-    related = []
-    # Try subject + common subtopics
-    if "equation" in topic.lower():
-        related.extend([f"{subject} functions", f"{subject} graphing"])
-    elif "function" in topic.lower():
-        related.extend([f"{subject} equations", f"{subject} domain range"])
-    elif "probability" in topic.lower():
-        related.extend([f"{subject} statistics", "basic probability concepts"])
-    elif "statistics" in topic.lower():
-        related.extend([f"{subject} data analysis", "measures of central tendency"])
-    elif "geometry" in topic.lower() or "angle" in topic.lower():
-        related.extend([f"{subject} trigonometry", "basic geometry concepts"])
-    elif "calculus" in topic.lower() or "derivative" in topic.lower():
-        related.extend(["limits and continuity", f"{subject} functions"])
-    else:
-        related.extend([
-            f"{subject} fundamentals",
-            f"{subject} basic concepts",
-            f"{subject} introduction",
-        ])
-    return related[:3]
-def _execute_youtube_search(
-    client,
-    query: str,
-    max_results: int = 15,
-    video_duration: Optional[str] = "medium",
-    video_definition: Optional[str] = "high",
-    language: str = "en",
-) -> List[dict]:
-    """Execute a single YouTube search and return raw items with details."""
-    try:
-        search_params = {
-            "part": "snippet",
-            "q": query,
-            "type": "video",
-            "maxResults": max_results,
-            "relevanceLanguage": language,
-            "order": "relevance",
-        }
-        if video_duration:
-            search_params["videoDuration"] = video_duration
-        if video_definition:
-            search_params["videoDefinition"] = video_definition
-        search_response = client.search().list(**search_params).execute()
-        items = search_response.get("items", [])
-        if not items:
-            return []
-        # Get video details
-        video_ids = [item["id"]["videoId"] for item in items if item.get("id", {}).get("videoId")]
-        if not video_ids:
-            return []
-        details_response = client.videos().list(
-            part="contentDetails,statistics,snippet",
-            id=",".join(video_ids),
-        ).execute()
-        details_map = {}
-        for detail in details_response.get("items", []):
-            vid = detail.get("id")
-            if vid:
-                details_map[vid] = detail
-        # Build enriched items
-        results = []
-        for item in items:
-            video_id = item.get("id", {}).get("videoId", "")
-            if not video_id:
-                continue
-            detail = details_map.get(video_id, {})
-            snippet = detail.get("snippet", item.get("snippet", {}))
-            content_details = detail.get("contentDetails", {})
-            duration = content_details.get("duration", "")
-            duration_secs = _parse_iso8601_duration(duration)
-            # Build thumbnail URL
-            thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
-            thumbs = snippet.get("thumbnails", {})
-            if "high" in thumbs:
-                thumbnail_url = thumbs["high"]["url"]
-            elif "medium" in thumbs:
-                thumbnail_url = thumbs["medium"]["url"]
-            results.append({
-                "videoId": video_id,
-                "title": snippet.get("title", ""),
-                "channelTitle": snippet.get("channelTitle", ""),
-                "thumbnailUrl": thumbnail_url,
-                "durationSeconds": duration_secs,
-                "description": snippet.get("description", "")[:300],
-            })
-        return results
-    except Exception as exc:
-        logger.warning("YouTube search execution failed for query '%s': %s", query, exc)
-        return []
-def _filter_and_score_results(
-    items: List[dict],
-    query: str,
-    topic: str,
-    subject: str,
-    require_educational: bool = True,
-    min_duration: int = 120,
-    max_duration: int = 3600,
-) -> List[dict]:
-    """Filter and score video results."""
-    results = []
-    for item in items:
-        duration_secs = item.get("durationSeconds", 0)
-        channel_title = item.get("channelTitle", "")
-        title = item.get("title", "")
-        # Duration filter
-        if duration_secs < min_duration or duration_secs > max_duration:
-            continue
-        # Educational channel filter
-        is_edu = _is_educational_channel(channel_title)
-        if require_educational and not is_edu:
-            # Allow if title strongly suggests math tutorial
-            lowered_title = title.lower()
-            if not any(term in lowered_title for term in [
-                "tutorial", "lesson", "math", "explain", "how to",
-                "introduction", "basics", "learn", "example", "problem"
-            ]):
-                continue
-        # Score
-        score = _score_video_result(item, query, topic, subject)
-        item["_score"] = score
-        results.append(item)
-    results.sort(key=lambda x: x["_score"], reverse=True)
-    for r in results:
-        r.pop("_score", None)
-    return results
-def _get_cache_key(topic: str, subject: str, grade_level: str) -> str:
-    """Generate a deterministic Firestore document ID for caching."""
-    raw = f"{subject}|{topic}|{grade_level}"
-    return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
-def get_cached_videos(lesson_id: str) -> Optional[List[Dict]]:
-    """Check Firestore video_cache/{lessonId} for cached results (TTL 7 days)."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            return None
-        db = firestore.client()
-        doc_ref = db.collection("video_cache").document(lesson_id)
-        doc = doc_ref.get()
-        if not doc.exists:
-            return None
-        data = doc.to_dict()
-        if not data:
-            return None
-        cached_at = data.get("cachedAt")
-        if cached_at:
-            if hasattr(cached_at, "timestamp"):
-                cached_epoch = cached_at.timestamp()
-            elif isinstance(cached_at, datetime):
-                cached_epoch = cached_at.timestamp()
-            else:
-                cached_epoch = float(cached_at)
-            now_epoch = datetime.now(timezone.utc).timestamp()
-            if (now_epoch - cached_epoch) > _CACHE_TTL_SECONDS:
-                logger.info("Video cache expired for lesson %s", lesson_id)
-                return None
-        videos = data.get("videos")
-        if isinstance(videos, list) and len(videos) > 0:
-            logger.info("Video cache hit for lesson %s (%d videos)", lesson_id, len(videos))
-            return videos
-    except Exception as exc:
-        logger.debug("Could not read video cache: %s", exc)
-    return None
-def cache_videos(lesson_id: str, videos: List[Dict], topic: str) -> None:
-    """Store search results in Firestore video_cache/{lessonId}."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            return
-        db = firestore.client()
-        db.collection("video_cache").document(lesson_id).set({
-            "videos": videos,
-            "cachedAt": firestore.SERVER_TIMESTAMP,
-            "topic": topic,
-        })
-        logger.info("Cached %d videos for lesson %s", len(videos), lesson_id)
-    except Exception as exc:
-        logger.warning("Could not cache videos in Firestore: %s", exc)
-def search_youtube_videos(
-    topic: str,
-    subject: str = "",
-    lesson_context: str = "",
-    grade_level: str = "",
-    max_results: int = 3,
-    language: str = "en",
-) -> List[Dict]:
-    """
-    Search YouTube Data API v3 for relevant educational math videos.
-    Uses a multi-strategy approach to guarantee at least 1 result:
-    1. AI-generated targeted queries with strict filters
-    2. Fallback to heuristic queries with relaxed filters
-    3. Broader subject-level searches
-    4. Related topics suggested by AI
-    5. Emergency unfiltered search as last resort
-    Returns up to `max_results` videos.
-    """
-    client = _build_youtube_client()
-    if client is None:
-        logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
-        return []
-    all_results: List[dict] = []
-    seen_video_ids = set()
-    # Generate search queries using AI + fallback
-    queries = _generate_search_queries_with_ai(topic, subject, lesson_context, grade_level)
-    logger.info("YouTube search queries: %s", queries)
-    # ─── Strategy 1: AI queries with standard filters ───────────────────────
-    for query in queries:
-        items = _execute_youtube_search(
-            client, query,
-            max_results=10,
-            video_duration="medium",
-            video_definition="high",
-            language=language,
-        )
-        filtered = _filter_and_score_results(
-            items, query, topic, subject,
-            require_educational=True,
-            min_duration=_MIN_DURATION_SECONDS,
-            max_duration=_MAX_DURATION_SECONDS,
-        )
-        for item in filtered:
-            vid = item["videoId"]
-            if vid not in seen_video_ids:
-                seen_video_ids.add(vid)
-                all_results.append(item)
-        if len(all_results) >= max_results:
-            break
-    # ─── Strategy 2: Same queries, relaxed filters ──────────────────────────
-    if len(all_results) < max_results:
-        for query in queries:
-            items = _execute_youtube_search(
-                client, query,
-                max_results=10,
-                video_duration=None,  # Any duration
-                video_definition=None,  # Any quality
-                language=language,
-            )
-            filtered = _filter_and_score_results(
-                items, query, topic, subject,
-                require_educational=False,  # Less strict
-                min_duration=60,  # Allow shorter
-                max_duration=7200,  # Allow longer
-            )
-            for item in filtered:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            if len(all_results) >= max_results:
-                break
-    # ─── Strategy 3: Broader subject-level searches ─────────────────────────
-    if len(all_results) < 1:
-        broad_queries = [
-            f"{subject} {topic.split()[0] if topic else ''} tutorial",
-            f"{subject} mathematics lesson",
-            f"{topic} explained simply",
-        ]
-        for query in broad_queries:
-            if not query.strip():
-                continue
-            items = _execute_youtube_search(
-                client, query,
-                max_results=10,
-                video_duration=None,
-                video_definition=None,
-                language=language,
-            )
-            filtered = _filter_and_score_results(
-                items, query, topic, subject,
-                require_educational=False,
-                min_duration=60,
-                max_duration=7200,
-            )
-            for item in filtered:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            if len(all_results) >= max_results:
-                break
-    # ─── Strategy 4: AI-suggested related topics ────────────────────────────
-    if len(all_results) < 1:
-        related_topics = _find_related_topics_with_ai(topic, subject)
-        for related_topic in related_topics:
-            query = f"{related_topic} tutorial"
-            items = _execute_youtube_search(
-                client, query,
-                max_results=8,
-                video_duration=None,
-                video_definition=None,
-                language=language,
-            )
-            filtered = _filter_and_score_results(
-                items, query, topic, subject,
-                require_educational=False,
-                min_duration=60,
-                max_duration=7200,
-            )
-            for item in filtered:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            if len(all_results) >= max_results:
-                break
-    # ─── Strategy 5: Emergency unfiltered search ────────────────────────────
-    if len(all_results) < 1:
-        emergency_queries = [
-            topic,
-            f"{topic} math",
-            subject,
-        ]
-        for query in emergency_queries:
-            if not query or not query.strip():
-                continue
-            items = _execute_youtube_search(
-                client, query,
-                max_results=5,
-                video_duration=None,
-                video_definition=None,
-                language=language,
-            )
-            # Accept ANY result in emergency mode
-            for item in items:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            if len(all_results) >= 1:
-                break
-    # ─── Final: Return top results or guaranteed fallback ───────────────────
-    if not all_results:
-        logger.warning(
-            "All YouTube search strategies failed for topic: %s. Using guaranteed fallback videos.",
-            topic,
-        )
-        fallback = _get_guaranteed_fallback_videos(subject, max_results)
-        if fallback:
-            logger.info("Returning %d guaranteed fallback videos for subject: %s", len(fallback), subject)
-            return fallback
-        return []
-    # Re-score all collected results against the original topic
-    for item in all_results:
-        item["_score"] = _score_video_result(item, topic, topic, subject)
-    all_results.sort(key=lambda x: x["_score"], reverse=True)
-    for item in all_results:
-        item.pop("_score", None)
-    top_results = all_results[:max_results]
-    logger.info("YouTube search returned %d results (top %d) for topic: %s",
-                len(all_results), len(top_results), topic)
-    return top_results
-def get_video_search_results(
-    topic: str,
-    subject: str = "",
-    lesson_context: str = "",
-    grade_level: str = "",
-    lesson_id: Optional[str] = None,
-    max_results: int = 3,
-) -> Dict:
-    """
-    High-level wrapper: check cache first, then search YouTube, then cache results.
-    Returns {"videos": [...], "cached": bool}.
-    """
-    cache_key = lesson_id or _get_cache_key(topic, subject, grade_level)
-    # Check cache first
-    cached = get_cached_videos(cache_key)
-    if cached is not None:
-        return {"videos": cached, "cached": True}
-    # Search YouTube
-    videos = search_youtube_videos(
-        topic=topic,
-        subject=subject,
-        lesson_context=lesson_context,
-        grade_level=grade_level,
-        max_results=max_results,
-    )
-    if videos:
-        cache_videos(cache_key, videos, topic)
-    return {"videos": videos, "cached": False}

startup.sh CHANGED Viewed

@@ -11,33 +11,12 @@ fi
 export CURRICULUM_DIR
 export VECTORSTORE_DIR
-export CURRICULUM_VECTORSTORE_DIR="${VECTORSTORE_DIR}"
-echo "=========================================="
-echo "MathPulse AI Startup"
-echo "=========================================="
-echo "VECTORSTORE_DIR=${VECTORSTORE_DIR}"
-echo "CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
-echo "CURRICULUM_SOURCE_REPO_ID set: $(if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then echo YES; else echo NO; fi)"
-echo "FIREBASE_SERVICE_ACCOUNT_JSON set: $(if [ -n "${FIREBASE_SERVICE_ACCOUNT_JSON:-}" ]; then echo YES; else echo NO; fi)"
-echo "FIREBASE_STORAGE_BUCKET=${FIREBASE_STORAGE_BUCKET:-not set}"
-echo "=========================================="
 mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
-_vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
-if [ ! -d "${_vectorstore_cache_dir}" ]; then
-    mkdir -p "${_vectorstore_cache_dir}"
-    echo "INFO: Initialized ChromaDB cache dir at ${_vectorstore_cache_dir}"
-fi
 _ingest_script="/app/scripts/ingest_curriculum.py"
 if [ -f "${_ingest_script}" ]; then
-    _has_pdfs=false
-    if [ -d "${CURRICULUM_DIR}" ] && find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
-        _has_pdfs=true
-    fi
-    if [ "${_has_pdfs}" = true ] || [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then
         echo "INFO: Running curriculum ingestion (optional)..."
         python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
     else
@@ -47,27 +26,12 @@ else
     echo "INFO: Curriculum ingestion script not found at ${_ingest_script}; skipping (curriculum is optional)"
 fi
-_vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py"
-if [ -f "${_vectorstore_download_script}" ]; then
-    echo "INFO: Vectorstore files present before download:"
-    ls -la "${VECTORSTORE_DIR}/"
     echo "INFO: Downloading vectorstore from Firebase Storage..."
-    python "${_vectorstore_download_script}" && _result=0 || _result=1
-    if [ $_result -eq 0 ]; then
-        echo "INFO: Vectorstore download succeeded"
-    else
-        echo "WARNING: Vectorstore download failed, continuing anyway"
-    fi
-    echo "INFO: Vectorstore files present after download:"
-    ls -la "${VECTORSTORE_DIR}/"
-    _vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
-    if [ -f "${_vectorstore_summary_file}" ]; then
-        echo "INFO: Vectorstore summary found at ${_vectorstore_summary_file}"
-    else
-        echo "WARNING: Vectorstore summary not found at ${_vectorstore_summary_file}"
-    fi
 else
-    echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
 fi
 exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1

 export CURRICULUM_DIR
 export VECTORSTORE_DIR
 mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
 _ingest_script="/app/scripts/ingest_curriculum.py"
 if [ -f "${_ingest_script}" ]; then
+    if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
         echo "INFO: Running curriculum ingestion (optional)..."
         python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
     else
     echo "INFO: Curriculum ingestion script not found at ${_ingest_script}; skipping (curriculum is optional)"
 fi
+_download_script="/app/scripts/download_vectorstore_from_firebase.py"
+if [ -f "${_download_script}" ]; then
     echo "INFO: Downloading vectorstore from Firebase Storage..."
+    python "${_download_script}" && echo "INFO: Vectorstore download completed" || echo "WARNING: Vectorstore download failed, continuing anyway"
 else
+    echo "INFO: Vectorstore download script not found at ${_download_script}; skipping (vectorstore is optional)"
 fi
 exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1

startup_validation.py CHANGED Viewed

@@ -32,12 +32,7 @@ def validate_imports() -> None:
         logger.info("   ✓ FastAPI, Uvicorn, Pydantic OK")
         # Backend services (use ABSOLUTE imports like deployed code)
-        from services.inference_client import (
-            InferenceClient, create_default_client, is_sequential_model,
-            get_current_runtime_config, get_model_for_task, model_supports_thinking,
-            set_runtime_model_profile, set_runtime_model_override, reset_runtime_overrides,
-            _MODEL_PROFILES,
-        )  # noqa
         logger.info("   ✓ InferenceClient imports OK")
         from automation_engine import automation_engine  # noqa
@@ -54,8 +49,8 @@ def validate_imports() -> None:
             logger.warning("   ⚠ firebase_admin not available (OK if Firebase not needed)")
         # ML & inference
-        from services.ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL  # noqa
-        logger.info("   ✓ DeepSeek AI client imports OK")
         logger.info("✅ All critical imports validated")
     except ImportError as e:
@@ -78,37 +73,36 @@ def validate_environment() -> None:
     """Verify required environment variables are set."""
     logger.info("🔍 Validating environment variables...")
-    # CRITICAL: DEEPSEEK_API_KEY for inference
-    ds_api_key = os.environ.get("DEEPSEEK_API_KEY")
-    if not ds_api_key:
         logger.warning(
-            "⚠  WARNING: DEEPSEEK_API_KEY is not set as an environment variable.\n"
             "   AI inference will fail without this token.\n"
-            "   Use: Set DEEPSEEK_API_KEY in your .env or space secrets."
         )
     else:
-        logger.info("   ✓ DEEPSEEK_API_KEY is set")
     # Check inference provider config
-    inference_provider = os.getenv("INFERENCE_PROVIDER", "deepseek")
     logger.info(f"   ✓ INFERENCE_PROVIDER: {inference_provider}")
     # Check model IDs
-    chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "deepseek-chat"
     logger.info(f"   ✓ Chat model configured: {chat_model}")
     chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
     chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
-    enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
-    lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", "deepseek-chat").strip() or "deepseek-chat"
-    logger.info(f"   ✓ INFERENCE_ENFORCE_LOCK_MODEL: {enforce_lock_model}")
-    logger.info(f"   ✓ INFERENCE_LOCK_MODEL_ID: {lock_model_id}")
-    model_profile = os.getenv("MODEL_PROFILE", "").strip().lower()
-    quiz_model = os.getenv("HF_QUIZ_MODEL_ID", "").strip()
-    rag_model = os.getenv("HF_RAG_MODEL_ID", "").strip()
-    logger.info(f"   ✓ MODEL_PROFILE: {model_profile or 'not set (using individual env vars)'}")
-    logger.info(f"   ✓ HF_QUIZ_MODEL_ID: {quiz_model or 'not set (using defaults)'}")
-    logger.info(f"   ✓ HF_RAG_MODEL_ID: {rag_model or 'not set (using defaults)'}")
     if not chat_strict:
         logger.warning("   ⚠ Chat strict model lock is disabled; chat may fallback to alternate models")
     if chat_strict and chat_hard_trigger:
@@ -116,40 +110,9 @@ def validate_environment() -> None:
             "   ⚠ Chat hard trigger is enabled while strict chat lock is on; hard escalation will be bypassed"
         )
-    _validate_embedding_model()
     logger.info("✅ Environment variables OK")
-EXPECTED_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
-def _validate_embedding_model() -> None:
-    embedding_model = os.getenv("EMBEDDING_MODEL", "").strip()
-    if not embedding_model:
-        logger.warning(
-            "WARNING: EMBEDDING_MODEL env var is not set. "
-            f"Expected: {EXPECTED_EMBEDDING_MODEL}. "
-            "RAG retrieval will fail without an embedding model."
-        )
-    elif embedding_model != EXPECTED_EMBEDDING_MODEL:
-        logger.warning(
-            f"WARNING: EMBEDDING_MODEL is set to '{embedding_model}' — "
-            f"expected '{EXPECTED_EMBEDDING_MODEL}'. "
-            "Confirm this is intentional before deploying."
-        )
-    from services.ai_client import CHAT_MODEL, REASONER_MODEL  # noqa
-    generation_model_ids = [
-        CHAT_MODEL, REASONER_MODEL,
-    ]
-    if embedding_model in generation_model_ids:
-        logger.warning(
-            f"CRITICAL: EMBEDDING_MODEL is set to a generation model ('{embedding_model}'). "
-            "This will break RAG retrieval. Set it to 'BAAI/bge-small-en-v1.5'."
-        )
-    else:
-        logger.info(f"   EMBEDDING_MODEL: {embedding_model or 'not set'}")
 def validate_config_files() -> None:
     """Verify config files exist and are readable."""
     logger.info("🔍 Validating configuration files...")
@@ -191,9 +154,7 @@ def validate_config_files() -> None:
         )
     logger.info(f"   ✓ Using model config: {readable_model_config}")
-    _validate_model_config_fields(readable_model_config)
     logger.info("✅ Configuration files OK")
@@ -297,40 +258,6 @@ def validate_inference_client_config() -> None:
         ) from e
-def _validate_model_config_fields(config_path: str) -> None:
-    try:
-        import yaml
-        with open(config_path, "r", encoding="utf-8") as f:
-            config = yaml.safe_load(f) or {}
-    except Exception as e:
-        raise StartupError(f"❌ Cannot parse {config_path} as YAML: {e}") from e
-    models = config.get("models", {})
-    if not isinstance(models, dict):
-        raise StartupError(f"❌ {config_path}: 'models' section missing or invalid")
-    if "rag_primary" not in models:
-        raise StartupError(f"❌ {config_path}: missing 'models.rag_primary' field")
-    rag_primary = models["rag_primary"]
-    if isinstance(rag_primary, dict):
-        logger.info(f"   ✓ rag_primary model: {rag_primary.get('id', 'UNSET')}")
-    else:
-        logger.warning(f"   ⚠ rag_primary is not a dict, may cause issues")
-    capabilities = models.get("model_capabilities")
-    if not isinstance(capabilities, dict):
-        raise StartupError(f"❌ {config_path}: missing 'models.model_capabilities' section")
-    logger.info(f"   ✓ model_capabilities: sequential_only={capabilities.get('sequential_only')}, supports_thinking={capabilities.get('supports_thinking')}")
-    tasks = config.get("routing", {}).get("task_model_map", {})
-    rag_tasks = {"rag_lesson", "rag_problem", "rag_analysis_context"}
-    missing_rag = rag_tasks - set(str(t).strip().lower() for t in tasks.keys())
-    if missing_rag:
-        raise StartupError(f"❌ {config_path}: missing RAG task mappings: {missing_rag}")
-    logger.info(f"   ✓ All RAG task mappings present")
 def run_all_validations() -> None:
     """Run comprehensive startup validation.

         logger.info("   ✓ FastAPI, Uvicorn, Pydantic OK")
         # Backend services (use ABSOLUTE imports like deployed code)
+        from services.inference_client import InferenceClient, create_default_client  # noqa
         logger.info("   ✓ InferenceClient imports OK")
         from automation_engine import automation_engine  # noqa
             logger.warning("   ⚠ firebase_admin not available (OK if Firebase not needed)")
         # ML & inference
+        from huggingface_hub import InferenceClient as HFInferenceClient  # noqa
+        logger.info("   ✓ HuggingFace Hub imports OK")
         logger.info("✅ All critical imports validated")
     except ImportError as e:
     """Verify required environment variables are set."""
     logger.info("🔍 Validating environment variables...")
+    # CRITICAL: HF_TOKEN for inference
+    hf_token = os.environ.get("HF_TOKEN")
+    api_key = os.environ.get("HUGGING_FACE_API_TOKEN")
+    legacy_api_key = os.environ.get("HUGGINGFACE_API_TOKEN")
+    if not hf_token and not api_key and not legacy_api_key:
         logger.warning(
+            "⚠  WARNING: HF_TOKEN is not set as an environment variable.\n"
+            "   On HF Spaces, this should be set as a SPACE SECRET.\n"
             "   AI inference will fail without this token.\n"
+            "   Use: python set-hf-secrets.py to set the secret."
         )
     else:
+        logger.info("   ✓ HF_TOKEN/HUGGING_FACE_API_TOKEN/HUGGINGFACE_API_TOKEN is set")
     # Check inference provider config
+    inference_provider = os.getenv("INFERENCE_PROVIDER", "hf_inference")
     logger.info(f"   ✓ INFERENCE_PROVIDER: {inference_provider}")
     # Check model IDs
+    chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "Qwen/Qwen3-32B"
     logger.info(f"   ✓ Chat model configured: {chat_model}")
     chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
     chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
+    enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+    qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen3-32B").strip() or "Qwen/Qwen3-32B"
+    logger.info(f"   ✓ INFERENCE_CHAT_STRICT_MODEL_ONLY: {chat_strict}")
+    logger.info(f"   ✓ INFERENCE_CHAT_HARD_TRIGGER_ENABLED: {chat_hard_trigger}")
+    logger.info(f"   ✓ INFERENCE_ENFORCE_QWEN_ONLY: {enforce_qwen_only}")
+    logger.info(f"   ✓ INFERENCE_QWEN_LOCK_MODEL: {qwen_lock_model}")
     if not chat_strict:
         logger.warning("   ⚠ Chat strict model lock is disabled; chat may fallback to alternate models")
     if chat_strict and chat_hard_trigger:
             "   ⚠ Chat hard trigger is enabled while strict chat lock is on; hard escalation will be bypassed"
         )
     logger.info("✅ Environment variables OK")
 def validate_config_files() -> None:
     """Verify config files exist and are readable."""
     logger.info("🔍 Validating configuration files...")
         )
     logger.info(f"   ✓ Using model config: {readable_model_config}")
     logger.info("✅ Configuration files OK")
         ) from e
 def run_all_validations() -> None:
     """Run comprehensive startup validation.

test_full_rag.py DELETED Viewed

@@ -1,75 +0,0 @@
-import sys
-import os
-sys.path.insert(0, 'backend')
-# Set required env vars
-os.environ['DEEPSEEK_API_KEY'] = os.getenv('DEEPSEEK_API_KEY', '')
-os.environ['DEEPSEEK_BASE_URL'] = os.getenv('DEEPSEEK_BASE_URL', 'https://api.deepseek.com')
-from rag.curriculum_rag import retrieve_lesson_pdf_context, build_lesson_prompt
-from services.inference_client import InferenceClient, InferenceRequest
-# Test retrieval
-print("Testing retrieval...")
-try:
-    chunks, mode = retrieve_lesson_pdf_context(
-        topic="Represent real-life relationships as functions and interpret domain/range.",
-        subject="General Mathematics",
-        quarter=2,
-        lesson_title="Represent real-life relationships as functions and interpret domain/range.",
-        module_id="gen-math",
-        lesson_id="gm-q2-functions-graphs-l1",
-        competency_code="GM11-FG-1",
-        top_k=8,
-    )
-    print(f"Retrieved {len(chunks)} chunks, mode={mode}")
-except Exception as e:
-    print(f"Retrieval ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
-    sys.exit(1)
-# Test prompt building
-print("\nTesting prompt building...")
-try:
-    prompt = build_lesson_prompt(
-        lesson_title="Represent real-life relationships as functions and interpret domain/range.",
-        competency="Represent real-life relationships as functions and interpret domain/range.",
-        grade_level="Grade 11-12",
-        subject="General Mathematics",
-        quarter=2,
-        learner_level="Grade 11-12",
-        module_unit="n/a",
-        curriculum_chunks=chunks,
-        competency_code="GM11-FG-1",
-    )
-    print(f"Prompt length: {len(prompt)} chars")
-    print(f"Prompt preview: {prompt[:200]}...")
-except Exception as e:
-    print(f"Prompt building ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
-    sys.exit(1)
-# Test inference (optional - might cost money)
-print("\nTesting inference...")
-try:
-    client = InferenceClient()
-    req = InferenceRequest(
-        messages=[
-            {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
-            {"role": "user", "content": prompt},
-        ],
-        task_type="lesson_generation",
-        max_new_tokens=100,  # Small for testing
-        temperature=0.2,
-        top_p=0.9,
-        enable_thinking=True,
-    )
-    result = client.generate_from_messages(req)
-    print(f"Inference result: {result[:200]}...")
-    print("SUCCESS!")
-except Exception as e:
-    print(f"Inference ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()

test_retrieval.py DELETED Viewed

@@ -1,39 +0,0 @@
-import sys
-sys.path.insert(0, '.')
-from rag.curriculum_rag import retrieve_lesson_pdf_context, retrieve_curriculum_context
-# Test retrieval with the same params as the frontend
-try:
-    chunks, mode = retrieve_lesson_pdf_context(
-        topic="Represent real-life relationships as functions and interpret domain/range.",
-        subject="General Mathematics",
-        quarter=2,
-        lesson_title="Represent real-life relationships as functions and interpret domain/range.",
-        module_id="gen-math",
-        lesson_id="gm-q2-functions-graphs-l1",
-        competency_code="GM11-FG-1",
-        top_k=8,
-    )
-    print(f"Retrieved {len(chunks)} chunks, mode={mode}")
-    for i, chunk in enumerate(chunks[:3]):
-        print(f"  Chunk {i}: score={chunk.get('score')}, domain={chunk.get('content_domain')}, source={chunk.get('source_file')}")
-        print(f"    Content: {chunk.get('content', '')[:100]}...")
-except Exception as e:
-    print(f"ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
-# Also test without module/lesson filters
-try:
-    chunks2 = retrieve_curriculum_context(
-        query="Represent real-life relationships as functions and interpret domain/range.",
-        subject="General Mathematics",
-        quarter=2,
-        top_k=8,
-    )
-    print(f"\nGeneral retrieval: {len(chunks2)} chunks")
-except Exception as e:
-    print(f"\nGeneral ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()

tests/README.md DELETED Viewed

@@ -1,46 +0,0 @@
-# Backend Tests Safe Runner
-## Test Pollution Issue
-The test suite has pollution when run in default pytest order. Tests pass in isolation or in specific groupings.
-## Running Tests Safely
-### Option 1: Run core API tests only (137 tests, all green)
-```bash
-cd backend
-python -m pytest tests/test_api.py tests/test_rag_pipeline.py tests/test_quiz_battle.py tests/test_model_profiles.py -v
-```
-### Option 2: Run key test files in correct order
-```bash
-python -m pytest tests/ -v --ignore=tests/test_video_routes.py --ignore=tests/test_admin_model_routes.py --ignore=tests/test_hf_monitoring_routes.py
-```
-### Option 3: Individual test files (all green individually)
-```bash
-# Each passes individually
-python -m pytest tests/test_api.py -v  # 90 passed
-python -m pytest tests/test_rag_pipeline.py -v  # 13 passed
-python -m pytest tests/test_quiz_battle.py -v  # 19 passed
-python -m pytest tests/test_model_profiles.py -v  # 15 passed
-python -m pytest tests/test_video_routes.py -v  # 11 passed
-python -m pytest tests/test_admin_model_routes.py -v  # 19 passed
-python -m pytest tests/test_hf_monitoring_routes.py -v  # 8 passed
-```
-## Root Cause
-- Different test files set different auth roles at module level
-- `test_api.py`: teacher role
-- `test_video_routes.py`: was student, now teacher but client still uses admin token
-- `test_admin_model_routes.py`: was admin, now teacher but test setup differs
-- `test_hf_monitoring_routes.py`: was admin, tests need admin via separate client
-## Fix Attempts
-1. conftest.py - doesn't work (MagicMock doesn't reset properly with @patch)
-2. Using pytest fixtures - doesn't work (@patch doesn't override MagicMock)
-3. Changing module-level auth - causes different tests to fail
-## Status
-- 177/180 tests pass when run in safe combinations
-- 3 tests fail only when test_video_routes runs before test_api in default order
-- Tests pass individually or in safe groupings

tests/test_admin_model_routes.py DELETED Viewed

@@ -1,213 +0,0 @@
-"""
-Route-level tests for the /api/admin/model-config endpoints.
-Follows the auth mock pattern from test_api.py.
-"""
-import os
-from unittest.mock import MagicMock, patch
-import pytest
-from fastapi.testclient import TestClient
-import main as main_module
-from main import app
-from services.inference_client import reset_runtime_overrides
-main_module._firebase_ready = True
-main_module._init_firebase_admin = lambda: None
-main_module.firebase_firestore = None
-main_module.firebase_auth = MagicMock()
-main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-    "uid": "test-teacher-uid",
-    "email": "teacher@example.com",
-    "role": "teacher",
-})
-admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
-_RESOLVED_KEYS = {
-    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-}
-_KNOWN_PROFILES = {"dev", "budget", "prod"}
-_BASE_CONFIG_KEYS = {"profile", "overrides", "resolved"}
-@pytest.fixture(autouse=True)
-def _mock_firestore():
-    with patch("services.inference_client._save_runtime_config_to_firestore", side_effect=None):
-        yield
-@pytest.fixture(autouse=True)
-def _reset_overrides():
-    reset_runtime_overrides()
-    yield
-    reset_runtime_overrides()
-# ─── Auth Enforcement ────────────────────────────────────────
-class TestAuth:
-    def test_get_rejects_bad_token(self):
-        main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
-        c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
-        response = c.get("/api/admin/model-config")
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
-        })
-        assert response.status_code in {401, 403}
-    def test_get_rejects_student_role(self):
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "student-uid", "email": "s@example.com", "role": "student",
-        })
-        c = TestClient(app, headers={"Authorization": "Bearer student-token"})
-        response = c.get("/api/admin/model-config")
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
-        })
-        assert response.status_code == 403
-# ─── GET Model Config ─────────────────────────────────────────
-class TestGetModelConfig:
-    def test_returns_base_keys(self):
-        response = admin_client.get("/api/admin/model-config")
-        assert response.status_code == 200
-        data = response.json()
-        for key in _BASE_CONFIG_KEYS:
-            assert key in data
-    def test_resolved_contains_expected_keys(self):
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        resolved = data.get("resolved", {})
-        for key in _RESOLVED_KEYS:
-            assert key in resolved
-    def test_available_profiles_present(self):
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        profiles = data.get("availableProfiles", [])
-        for p in _KNOWN_PROFILES:
-            assert p in profiles
-    def test_profile_descriptions_present(self):
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        descriptions = data.get("profileDescriptions", {})
-        for p in _KNOWN_PROFILES:
-            assert p in descriptions
-    def test_resolved_models_are_non_empty_strings(self):
-        admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        resolved = data.get("resolved", {})
-        for key, value in resolved.items():
-            assert isinstance(value, str), f"{key} is not a string: {value}"
-            assert len(value) > 0, f"Resolved key {key} is empty"
-# ─── POST Profile Switch ─────────────────────────────────────
-class TestPostProfileSwitch:
-    def test_switch_to_dev_succeeds(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
-        assert response.status_code == 200
-        assert response.json()["success"] is True
-    def test_switch_to_budget_succeeds(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-        assert data["applied"]["profile"] == "budget"
-    def test_switch_to_prod_succeeds(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "prod"})
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-        assert data["applied"]["profile"] == "prod"
-    def test_switch_to_invalid_profile_returns_400(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "nonexistent"})
-        assert response.status_code == 400
-    def test_switch_missing_profile_field(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={})
-        assert response.status_code == 422
-# ─── POST Override ───────────────────────────────────────────
-class TestPostOverride:
-    def test_set_valid_override_key_succeeds(self):
-        response = admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "INFERENCE_MODEL_ID", "value": "test/override-model"},
-        )
-        assert response.status_code == 200
-        assert response.json()["success"] is True
-    def test_set_invalid_override_key_returns_400(self):
-        response = admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "EMBEDDING_MODEL", "value": "test/emb"},
-        )
-        assert response.status_code == 400
-    def test_override_is_visible_in_subsequent_get(self):
-        admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "INFERENCE_MODEL_ID", "value": "custom/model-v2"},
-        )
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        overrides = data.get("overrides", {})
-        assert "INFERENCE_MODEL_ID" in overrides
-        assert overrides["INFERENCE_MODEL_ID"] == "custom/model-v2"
-# ─── DELETE Reset ───────────────────────────────────────────
-class TestDeleteReset:
-    def test_reset_returns_success(self):
-        response = admin_client.delete("/api/admin/model-config/reset")
-        assert response.status_code == 200
-        assert response.json()["success"] is True
-    def test_reset_clears_override(self):
-        admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "INFERENCE_MODEL_ID", "value": "temp/model"},
-        )
-        response = admin_client.delete("/api/admin/model-config/reset")
-        assert response.status_code == 200
-        overrides = response.json()["current"]["overrides"]
-        assert overrides == {}
-    def test_reset_clears_profile(self):
-        admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
-        response = admin_client.delete("/api/admin/model-config/reset")
-        assert response.status_code == 200
-        assert response.json()["current"]["profile"] == ""
-# ─── Profile after switch ────────────────────────────────────
-class TestProfileAfterSwitch:
-    def test_switched_profile_visible_in_get(self):
-        admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
-        response = admin_client.get("/api/admin/model-config")
-        assert response.json()["profile"] == "dev"

tests/test_api.py CHANGED Viewed

@@ -4,7 +4,8 @@ Comprehensive tests for all FastAPI endpoints.
 Tests cover:
   - Successful requests with valid data
-  - AI inference API failures (502 fallback)
   - Timeout handling
   - Malformed response data
   - Error status-code mapping
@@ -84,9 +85,8 @@ mock_ae.ContentUpdatePayload = _ContentUpdatePayload
 mock_ae.AutomationResult = _AutomationResult
 sys.modules["automation_engine"] = mock_ae
-# Override tokens so client init doesn't fail
 os.environ["HF_TOKEN"] = "test-token-for-testing"
-os.environ["DEEPSEEK_API_KEY"] = "test-ds-key-for-testing"
 # analytics.py is importable directly (its heavy deps are guarded)
 import main as main_module  # noqa: E402
@@ -97,7 +97,8 @@ app = main_module.app
 main_module._firebase_ready = True
 main_module._init_firebase_admin = lambda: None
 main_module.firebase_firestore = None
-main_module.firebase_auth = MagicMock()
 main_module.firebase_auth.verify_id_token = MagicMock(
     return_value={
         "uid": "test-teacher-uid",
@@ -112,22 +113,33 @@ client = TestClient(app, headers={"Authorization": "Bearer test-auth-token"})
 # ─── Fixtures ──────────────────────────────────────────────────
-def make_deepseek_risk_mock(
-    risk_label: str = "low risk academically stable",
-    confidence: float = 0.85,
 ):
-    """Create a mock DeepSeek client for risk prediction tests."""
-    mock_ds = MagicMock()
-    mock_choice = MagicMock()
-    mock_choice.message.content = json.dumps({
-        "risk_label": risk_label,
-        "confidence": confidence,
-        "reasoning": "Mock risk assessment."
-    })
-    mock_ds.chat.completions.create.return_value = MagicMock(
-        choices=[mock_choice]
-    )
-    return mock_ds
 # ─── Health & Root ─────────────────────────────────────────────
@@ -509,36 +521,43 @@ class TestChatEndpoint:
         mock_stream_async.assert_not_called()
-class TestChatTransport:
-    @patch("services.ai_client.get_deepseek_client")
-    def test_call_hf_chat_uses_deepseek_api(self, mock_ds_fn):
-        mock_ds = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "x = 2 or x = 3"
-        mock_ds.chat.completions.create.return_value = MagicMock(
-            choices=[mock_choice]
         )
-        mock_ds_fn.return_value = mock_ds
-        with patch.object(main_module, "get_inference_client") as mock_get_ic:
-            ic = MagicMock()
-            ic.generate_from_messages.return_value = "x = 2 or x = 3"
-            mock_get_ic.return_value = ic
-            result = main_module.call_hf_chat(
-                [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0"}],
-                max_tokens=256,
-                temperature=0.2,
-                top_p=0.9,
-            )
         assert result
 class TestInferenceRouting:
     def test_chat_strict_model_lock_keeps_single_model_chain(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
         client = InferenceClient()
         req = InferenceRequest(
@@ -549,15 +568,15 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
-        assert selected_model == "deepseek-chat"
         assert "chat_strict_model_only" in source
-        assert model_chain == ["deepseek-chat"]
-    def test_chat_env_override_wins_under_model_lock(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
-        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
-        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
         client = InferenceClient()
         req = InferenceRequest(
@@ -568,16 +587,16 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
-        assert selected_model == "deepseek-chat"
         assert "chat_override_env" in source
-        assert model_chain == ["deepseek-chat"]
-    def test_chat_temp_override_wins_under_model_lock(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-reasoner")
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
-        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
-        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
         client = InferenceClient()
         req = InferenceRequest(
@@ -588,14 +607,14 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
-        assert selected_model == "deepseek-chat"
         assert "chat_temp_override_env" in source
-        assert model_chain == ["deepseek-chat"]
-    def test_chat_temp_override_does_not_change_non_chat_task_under_lock(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
-        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
-        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
         client = InferenceClient()
         req = InferenceRequest(
@@ -606,18 +625,114 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("verify_solution", selected_model)
-        assert selected_model == "deepseek-reasoner"
         assert "chat_temp_override_env" not in source
-        assert model_chain == ["deepseek-reasoner"]
 # ─── Risk Prediction ──────────────────────────────────────────
 class TestRiskPrediction:
-    @patch("main.get_deepseek_client")
-    def test_predict_risk_success(self, mock_ds_fn):
-        mock_ds_fn.return_value = make_deepseek_risk_mock()
         response = client.post("/api/predict-risk", json={
             "engagementScore": 80,
             "avgQuizScore": 75,
@@ -631,7 +746,7 @@ class TestRiskPrediction:
     def test_predict_risk_invalid_score_range(self):
         response = client.post("/api/predict-risk", json={
-            "engagementScore": 150,
             "avgQuizScore": 75,
             "attendance": 90,
             "assignmentCompletion": 85,
@@ -653,11 +768,11 @@ class TestRiskPrediction:
         })
         assert response.status_code == 422
-    @patch("main.get_deepseek_client")
-    def test_predict_risk_ai_failure(self, mock_ds_fn):
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = Exception("AI down")
-        mock_ds_fn.return_value = mock_client
         response = client.post("/api/predict-risk", json={
             "engagementScore": 80,
             "avgQuizScore": 75,
@@ -666,9 +781,9 @@ class TestRiskPrediction:
         })
         assert response.status_code == 502
-    @patch("main.get_deepseek_client")
-    def test_batch_risk_prediction(self, mock_ds_fn):
-        mock_ds_fn.return_value = make_deepseek_risk_mock()
         response = client.post("/api/predict-risk/batch", json={
             "students": [
                 {"engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "assignmentCompletion": 85},
@@ -706,8 +821,8 @@ class TestLearningPath:
         assert response.status_code == 422
     @patch("main.call_hf_chat")
-    def test_learning_path_ai_failure(self, mock_chat):
-        mock_chat.side_effect = Exception("AI service down")
         response = client.post("/api/learning-path", json={
             "weaknesses": ["algebra"],
             "gradeLevel": "Grade 11",
@@ -1065,14 +1180,6 @@ class TestUploadClassRecordsGuardrails:
 class TestImportedOverviewAndTopicMastery:
     def test_imported_class_overview_returns_inferred_state_for_realistic_minimal_records(self):
-        # Ensure teacher role matches mock data
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "test-teacher-uid",
-                "email": "teacher@example.com",
-                "role": "teacher",
-            }
-        )
         firestore = _FakeFirestoreModule(
             {
                 "normalizedClassRecords": [
@@ -1221,24 +1328,15 @@ class TestAsyncGenerationTasks:
         assert cancel_payload["status"] in {"cancelled", "cancelling"}
     def test_inference_metrics_requires_admin(self):
-        # Test with a non-admin mock to verify role check works
-        with patch.object(main_module.firebase_auth, "verify_id_token", return_value={
-            "uid": "teacher-uid",
-            "email": "teacher@example.com",
-            "role": "teacher",
-        }):
-            response = client.get("/api/ops/inference-metrics")
-            assert response.status_code == 403
-    def test_inference_metrics_admin_success(self):
-        # Set admin role directly to ensure it persists
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "admin-uid",
-                "email": "admin@example.com",
-                "role": "admin",
-            }
-        )
         response = client.get("/api/ops/inference-metrics")
         assert response.status_code == 200
         payload = response.json()
@@ -1468,14 +1566,6 @@ class _FakeFirestoreModule:
 class TestRecentCourseMaterials:
     def test_recent_course_materials_respects_class_section_filter(self):
-        # Ensure teacher role matches mock data
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "test-teacher-uid",
-                "email": "teacher@example.com",
-                "role": "teacher",
-            }
-        )
         now = int(time.time())
         firestore = _FakeFirestoreModule(
             {
@@ -1518,14 +1608,6 @@ class TestRecentCourseMaterials:
         assert all(item["classSectionId"] == "grade11_a" for item in data["materials"])
     def test_recent_course_materials_reports_retention_exclusions(self):
-        # Ensure teacher role matches mock data
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "test-teacher-uid",
-                "email": "teacher@example.com",
-                "role": "teacher",
-            }
-        )
         now = int(time.time())
         firestore = _FakeFirestoreModule(
             {

 Tests cover:
   - Successful requests with valid data
+  - Input validation errors (422)
+  - HuggingFace API failures (502 fallback)
   - Timeout handling
   - Malformed response data
   - Error status-code mapping
 mock_ae.AutomationResult = _AutomationResult
 sys.modules["automation_engine"] = mock_ae
+# Override HF_TOKEN so client init doesn't fail
 os.environ["HF_TOKEN"] = "test-token-for-testing"
 # analytics.py is importable directly (its heavy deps are guarded)
 import main as main_module  # noqa: E402
 main_module._firebase_ready = True
 main_module._init_firebase_admin = lambda: None
 main_module.firebase_firestore = None
+if getattr(main_module, "firebase_auth", None) is None:
+    main_module.firebase_auth = MagicMock()
 main_module.firebase_auth.verify_id_token = MagicMock(
     return_value={
         "uid": "test-teacher-uid",
 # ─── Fixtures ──────────────────────────────────────────────────
+class FakeClassificationElement:
+    """Mimics huggingface_hub ZeroShotClassificationOutputElement."""
+    def __init__(self, label: str, score: float):
+        self.label = label
+        self.score = score
+def make_zsc_client(
+    classification: list | None = None,
 ):
+    """Create a mock InferenceClient with predictable zero-shot outputs.
+    Used only for risk-prediction tests (the only endpoint still using
+    ``get_client()`` / ``InferenceClient``).
+    """
+    mock_client = MagicMock()
+    if classification is None:
+        classification = [
+            FakeClassificationElement("low risk academically stable", 0.85),
+            FakeClassificationElement("medium academic risk", 0.10),
+            FakeClassificationElement("high risk of failing", 0.05),
+        ]
+    mock_client.zero_shot_classification.return_value = classification
+    return mock_client
 # ─── Health & Root ─────────────────────────────────────────────
         mock_stream_async.assert_not_called()
+class TestHFChatTransport:
+    @patch("main.http_requests.post")
+    def test_call_hf_chat_uses_router_chat_completions(self, mock_post):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [
+                {"message": {"content": "x = 2 or x = 3"}}
+            ]
+        }
+        mock_post.return_value = mock_response
+        result = main_module.call_hf_chat(
+            [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0"}],
+            max_tokens=256,
+            temperature=0.2,
+            top_p=0.9,
         )
         assert result
+        call_args = mock_post.call_args
+        endpoint = call_args.args[0]
+        payload = call_args.kwargs["json"]
+        assert endpoint == "https://router.huggingface.co/v1/chat/completions"
+        assert isinstance(payload["model"], str)
+        assert payload["model"]
+        assert payload["stream"] is False
+        assert isinstance(payload["messages"], list)
 class TestInferenceRouting:
     def test_chat_strict_model_lock_keeps_single_model_chain(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_MODEL_ID", "meta-llama/Meta-Llama-3-70B-Instruct")
         client = InferenceClient()
         req = InferenceRequest(
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
+        assert selected_model == "Qwen/Qwen2.5-7B-Instruct"
         assert "chat_strict_model_only" in source
+        assert model_chain == ["Qwen/Qwen2.5-7B-Instruct"]
+    def test_chat_env_override_wins_under_qwen_only_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen3-32B")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
         client = InferenceClient()
         req = InferenceRequest(
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
+        assert selected_model == "Qwen/Qwen3-32B"
         assert "chat_override_env" in source
+        assert model_chain == ["Qwen/Qwen3-32B"]
+    def test_chat_temp_override_wins_under_qwen_only_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "Qwen/Qwen3-32B")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
         client = InferenceClient()
         req = InferenceRequest(
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
+        assert selected_model == "Qwen/Qwen3-32B"
         assert "chat_temp_override_env" in source
+        assert model_chain == ["Qwen/Qwen3-32B"]
+    def test_chat_temp_override_does_not_change_non_chat_task_under_qwen_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "Qwen/Qwen3-32B")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
         client = InferenceClient()
         req = InferenceRequest(
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("verify_solution", selected_model)
+        assert selected_model == "Qwen/Qwen2.5-7B-Instruct"
         assert "chat_temp_override_env" not in source
+        assert model_chain == ["Qwen/Qwen2.5-7B-Instruct"]
+    def test_chat_escalation_when_strict_lock_disabled(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "false")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "false")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_MODEL_ID", "meta-llama/Meta-Llama-3-70B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_PROMPT_CHARS", "256")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_HISTORY_CHARS", "256")
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Show all steps and prove the result rigorously."}],
+            task_type="chat",
+        )
+        selected_model, source = client._resolve_primary_model(req)
+        assert selected_model == "meta-llama/Meta-Llama-3-70B-Instruct"
+        assert source.startswith("chat_hard_escalation:")
+    def test_async_chat_posts_only_qwen_when_strict_enabled(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "true")
+        monkeypatch.setenv("INFERENCE_HF_TIMEOUT_SEC", "15")
+        routing_client = InferenceClient()
+        requests_seen: List[Dict[str, Any]] = []
+        class FakeAsyncResponse:
+            def __init__(self, status_code: int, payload: Dict[str, Any]):
+                self.status_code = status_code
+                self._payload = payload
+                self.text = json.dumps(payload)
+            def json(self) -> Dict[str, Any]:
+                return self._payload
+        class FakeAsyncHttpClient:
+            async def post(self, _url, *, headers=None, json=None, timeout=None):
+                requests_seen.append({
+                    "headers": headers,
+                    "payload": json,
+                    "timeout": timeout,
+                })
+                return FakeAsyncResponse(
+                    200,
+                    {"choices": [{"message": {"content": "Final answer: 42"}}]},
+                )
+        async def _run() -> str:
+            real_getenv = os.getenv
+            def _patched_getenv(key: str, default=None):
+                if key == "PYTEST_CURRENT_TEST":
+                    return ""
+                return real_getenv(key, default)
+            with patch.object(main_module, "get_inference_client", return_value=routing_client), patch.object(
+                main_module,
+                "_get_hf_async_http_client",
+                new=AsyncMock(return_value=FakeAsyncHttpClient()),
+            ), patch.object(main_module.os, "getenv", side_effect=_patched_getenv):
+                return await main_module.call_hf_chat_async(
+                    [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0."}],
+                    task_type="chat",
+                )
+        result = asyncio.run(_run())
+        assert "42" in result
+        assert len(requests_seen) == 1
+        sent_model = requests_seen[0]["payload"]["model"]
+        assert sent_model.startswith("Qwen/Qwen2.5-7B-Instruct")
+        assert "Meta-Llama" not in sent_model
+        assert "gemma" not in sent_model.lower()
+    def test_qwen_only_lock_replaces_explicit_non_qwen_model(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Solve this quickly."}],
+            model="meta-llama/Meta-Llama-3-70B-Instruct",
+            task_type="verify_solution",
+        )
+        selected_model, source = client._resolve_primary_model(req)
+        model_chain = client._model_chain_for_task("verify_solution", selected_model)
+        assert selected_model == "Qwen/Qwen2.5-7B-Instruct"
+        assert "qwen_only" in source
+        assert model_chain == ["Qwen/Qwen2.5-7B-Instruct"]
 # ─── Risk Prediction ──────────────────────────────────────────
 class TestRiskPrediction:
+    @patch("main.get_client")
+    def test_predict_risk_success(self, mock_get):
+        mock_get.return_value = make_zsc_client()
         response = client.post("/api/predict-risk", json={
             "engagementScore": 80,
             "avgQuizScore": 75,
     def test_predict_risk_invalid_score_range(self):
         response = client.post("/api/predict-risk", json={
+            "engagementScore": 150,  # > 100
             "avgQuizScore": 75,
             "attendance": 90,
             "assignmentCompletion": 85,
         })
         assert response.status_code == 422
+    @patch("main.get_client")
+    def test_predict_risk_hf_failure(self, mock_get):
+        hf = make_zsc_client()
+        hf.zero_shot_classification.side_effect = Exception("HF down")
+        mock_get.return_value = hf
         response = client.post("/api/predict-risk", json={
             "engagementScore": 80,
             "avgQuizScore": 75,
         })
         assert response.status_code == 502
+    @patch("main.get_client")
+    def test_batch_risk_prediction(self, mock_get):
+        mock_get.return_value = make_zsc_client()
         response = client.post("/api/predict-risk/batch", json={
             "students": [
                 {"engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "assignmentCompletion": 85},
         assert response.status_code == 422
     @patch("main.call_hf_chat")
+    def test_learning_path_hf_failure(self, mock_chat):
+        mock_chat.side_effect = Exception("HF down")
         response = client.post("/api/learning-path", json={
             "weaknesses": ["algebra"],
             "gradeLevel": "Grade 11",
 class TestImportedOverviewAndTopicMastery:
     def test_imported_class_overview_returns_inferred_state_for_realistic_minimal_records(self):
         firestore = _FakeFirestoreModule(
             {
                 "normalizedClassRecords": [
         assert cancel_payload["status"] in {"cancelled", "cancelling"}
     def test_inference_metrics_requires_admin(self):
+        response = client.get("/api/ops/inference-metrics")
+        assert response.status_code == 403
+    @patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+        "uid": "admin-uid",
+        "email": "admin@example.com",
+        "role": "admin",
+    })
+    def test_inference_metrics_admin_success(self, _mock_verify):
         response = client.get("/api/ops/inference-metrics")
         assert response.status_code == 200
         payload = response.json()
 class TestRecentCourseMaterials:
     def test_recent_course_materials_respects_class_section_filter(self):
         now = int(time.time())
         firestore = _FakeFirestoreModule(
             {
         assert all(item["classSectionId"] == "grade11_a" for item in data["materials"])
     def test_recent_course_materials_reports_retention_exclusions(self):
         now = int(time.time())
         firestore = _FakeFirestoreModule(
             {

tests/test_hf_monitoring_routes.py DELETED Viewed

@@ -1,148 +0,0 @@
-"""
-Route-level tests for /api/hf/monitoring endpoint.
-Updated for DeepSeek AI monitoring.
-"""
-import os
-from unittest.mock import MagicMock, Mock, patch
-import pytest
-from fastapi.testclient import TestClient
-import main as main_module
-from main import app
-main_module._firebase_ready = True
-main_module._init_firebase_admin = lambda: None
-main_module.firebase_firestore = None
-if getattr(main_module, "firebase_auth", None) is None:
-    main_module.firebase_auth = MagicMock()
-main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-    "uid": "test-teacher-uid",
-    "email": "teacher@example.com",
-    "role": "teacher",
-})
-admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
-EXPECTED_MONITORING_FIELDS = {
-    "modelId", "modelStatus", "avgResponseTimeMs",
-    "embeddingModelId", "embeddingModelStatus",
-    "inferenceBalance", "totalPeriodCost",
-    "hubApiCallsUsed", "hubApiCallsLimit",
-    "zeroGpuMinutesUsed", "zeroGpuMinutesLimit",
-    "publicStorageUsedTB", "publicStorageLimitTB",
-    "lastChecked", "periodStart", "periodEnd",
-    "activeProfile", "runtimeOverridesActive", "resolvedModels",
-    "provider", "apiBaseUrl",
-}
-EXPECTED_FIELDS_AFTER_DS_REPLACEMENT = EXPECTED_MONITORING_FIELDS
-@pytest.fixture(autouse=True)
-def _mock_env():
-    with patch.dict(os.environ, {"DEEPSEEK_API_KEY": "test-ds-monitoring-key"}):
-        yield
-# ─── Auth Enforcement ────────────────────────────────────────
-class TestMonitoringAuth:
-    def test_rejects_bad_token(self):
-        main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
-        c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
-        response = c.get("/api/hf/monitoring")
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
-        })
-        assert response.status_code in {401, 403}
-# ─── Response Shape ───────────────────────────────────────────
-class TestMonitoringResponseShape:
-    @patch("main.time.time")
-    def test_success_response_contains_all_expected_fields(self, mock_time):
-        mock_time.return_value = 1000.0
-        response = admin_client.get("/api/hf/monitoring")
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-        payload = data["data"]
-        for field in EXPECTED_FIELDS_AFTER_DS_REPLACEMENT:
-            assert field in payload, f"Missing field: {field}"
-    @patch("main.time.time")
-    @patch("services.ai_client.get_deepseek_client")
-    def test_all_probes_fail_gracefully(self, mock_ds_client_fn, mock_time):
-        mock_time.return_value = 1000.0
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = Exception("network down")
-        mock_ds_client_fn.return_value = mock_client
-        response = admin_client.get("/api/hf/monitoring")
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-# ─── Response Values ──────────────────────────────────────────
-class TestMonitoringResponseValues:
-    @patch("services.ai_client.get_deepseek_client")
-    @patch("main.time.time")
-    def test_model_status_is_degraded_when_probe_fails(self, mock_time, mock_ds_client_fn):
-        mock_time.return_value = 1000.0
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = Exception("probe down")
-        mock_ds_client_fn.return_value = mock_client
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert data["data"]["modelStatus"] == "Degraded"
-    @patch("main.time.time")
-    def test_embedding_model_id_is_returned(self, mock_time):
-        mock_time.return_value = 1000.0
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert "bge-small" in data["data"]["embeddingModelId"].lower()
-    @patch("main.time.time")
-    def test_resolved_models_contains_task_keys(self, mock_time):
-        mock_time.return_value = 1000.0
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        resolved = data["data"].get("resolvedModels", {})
-        expected_tasks = {"chat", "rag_lesson", "rag_problem", "quiz_generation"}
-        for task in expected_tasks:
-            assert task in resolved, f"Missing task: {task}"
-            assert isinstance(resolved[task], str) and len(resolved[task]) > 0
-    @patch("main.time.time")
-    def test_active_profile_returned(self, mock_time):
-        mock_time.return_value = 1000.0
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert data["data"]["activeProfile"] in {"dev", "budget", "prod", ""}
-    @patch("main.time.time")
-    def test_provider_and_api_base_url_present(self, mock_time):
-        mock_time.return_value = 1000.0
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert data["data"]["provider"] == "deepseek"
-        assert "api.deepseek.com" in data["data"]["apiBaseUrl"]