diff --git a/.deploy-trigger b/.deploy-trigger
deleted file mode 100644
index 5a909c0c9a109dbe9486e5afb7093111f7cc5cc3..0000000000000000000000000000000000000000
--- a/.deploy-trigger
+++ /dev/null
@@ -1 +0,0 @@
-﻿2026-04-29 21:37:27
diff --git a/.env.example b/.env.example
deleted file mode 100644
index 5c64d75a0eedf31a3cf382906456c67d337642d7..0000000000000000000000000000000000000000
--- a/.env.example
+++ /dev/null
@@ -1,33 +0,0 @@
-# ── Vector Store ──────────────────────────────────────────────────
-# Path to ChromaDB vectorstore directory
-CURRICULUM_VECTORSTORE_DIR=datasets/vectorstore
-
-# Sentence transformer for embeddings
-# WARNING: changing this requires full re-ingestion of all curriculum data
-EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
-
-# ── DeepSeek AI Inference ─────────────────────────────────────────
-# DeepSeek API key (OpenAI-compatible), required for all AI features
-DEEPSEEK_API_KEY=your_deepseek_api_key_here
-DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
-DEEPSEEK_REASONER_MODEL=deepseek-reasoner
-
-# ── HuggingFace (dataset push / HF Space deployment only) ─────────
-# HF API token — kept only for HF Space deployment and dataset push
-HF_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-
-# HF Model ID for AI monitoring proxy
-VITE_HF_MODEL_ID=Qwen/QwQ-32B
-
-# ── Model Selection ───────────────────────────────────────────────
-# LOCAL DEVELOPMENT — deepseek-chat (fast, $0.14/M input)
-HF_MODEL_ID=deepseek-chat
-
-# PRODUCTION — deepseek-reasoner for step-by-step solutions
-# HF_MODEL_ID=deepseek-reasoner
-
-# ── Quiz Battle Internal Auth ─────────────────────────────────────
-# Shared secret between Firebase Cloud Functions and FastAPI backend
-# Used to authenticate server-to-server requests for correct answers
-QUIZ_BATTLE_INTERNAL_SECRET=change_this_to_a_random_string
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/analytics.py b/analytics.py
index 49748f02c37b483f71523c48ea97633ff1157aff..b430ef807198b8a0d4dfb17119d3591996cbccf9 100644
--- a/analytics.py
+++ b/analytics.py
@@ -232,6 +232,7 @@ class EnhancedRiskRequest(BaseModel):
     avgQuizScore: float = Field(..., ge=0, le=100)
     attendance: float = Field(..., ge=0, le=100)
     assignmentCompletion: float = Field(..., ge=0, le=100)
+    streak: Optional[int] = 0
     xpGrowthRate: Optional[float] = 0.0
     timeOnPlatform: Optional[float] = 0.0  # hours
     # Optional trend data
@@ -809,7 +810,7 @@ def _build_risk_features(data: EnhancedRiskRequest) -> np.ndarray:
         data.avgQuizScore,
         data.attendance,
         data.assignmentCompletion,
-        0,  # streak removed
+        data.streak or 0,
         data.xpGrowthRate or 0.0,
         data.timeOnPlatform or 0.0,
         data.engagementTrend7d or 0.0,
@@ -870,8 +871,12 @@ def _rule_based_risk(data: EnhancedRiskRequest) -> EnhancedRiskPrediction:
         score -= 10
     if (data.daysSinceLastActivity or 0) >= 7:
         score -= 10
+    if (data.streak or 0) == 0:
+        score -= 5
 
     # Bonuses
+    if (data.streak or 0) >= 7:
+        score += 5
     if (data.engagementTrend7d or 0) > 0:
         score += 5
 
@@ -1146,19 +1151,19 @@ async def train_risk_model(force_retrain: bool = False) -> RiskTrainResponse:
                 if not data:
                     continue
 
-                    features = [
-                        data.get("engagementScore", 50),
-                        data.get("avgQuizScore", 50),
-                        data.get("attendance", 80),
-                        data.get("assignmentCompletion", 60),
-                        0,  # streak removed
-                        data.get("xpGrowthRate", 0),
-                        data.get("timeOnPlatform", 0),
-                        0.0,  # engagementTrend7d
-                        0.0,  # quizScoreVariance
-                        data.get("consecutiveAbsences", 0),
-                        data.get("daysSinceLastActivity", 0),
-                    ]
+                features = [
+                    data.get("engagementScore", 50),
+                    data.get("avgQuizScore", 50),
+                    data.get("attendance", 80),
+                    data.get("assignmentCompletion", 60),
+                    data.get("streak", 0),
+                    data.get("xpGrowthRate", 0),
+                    data.get("timeOnPlatform", 0),
+                    0.0,  # engagementTrend7d
+                    0.0,  # quizScoreVariance
+                    data.get("consecutiveAbsences", 0),
+                    data.get("daysSinceLastActivity", 0),
+                ]
                 X_data.append(features)
 
                 # Determine label from existing riskLevel or compute it
@@ -1255,7 +1260,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             quiz = np.random.normal(35, 12)
             attendance = np.random.normal(50, 15)
             completion = np.random.normal(35, 15)
-            # streak removed
+            streak = max(0, int(np.random.normal(1, 2)))
             xp_growth = np.random.normal(-0.5, 0.3)
             time_platform = np.random.normal(2, 1)
             trend = np.random.normal(-10, 5)
@@ -1267,7 +1272,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             quiz = np.random.normal(60, 10)
             attendance = np.random.normal(72, 10)
             completion = np.random.normal(60, 12)
-            # streak removed
+            streak = max(0, int(np.random.normal(3, 3)))
             xp_growth = np.random.normal(0.2, 0.3)
             time_platform = np.random.normal(5, 2)
             trend = np.random.normal(0, 8)
@@ -1279,7 +1284,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             quiz = np.random.normal(85, 8)
             attendance = np.random.normal(93, 5)
             completion = np.random.normal(88, 8)
-            # streak removed
+            streak = max(0, int(np.random.normal(10, 5)))
             xp_growth = np.random.normal(1.0, 0.4)
             time_platform = np.random.normal(10, 3)
             trend = np.random.normal(5, 5)
@@ -1292,7 +1297,7 @@ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
             max(0, min(100, quiz)),
             max(0, min(100, attendance)),
             max(0, min(100, completion)),
-            0,  # streak removed
+            streak,
             xp_growth,
             max(0, time_platform),
             trend,
diff --git a/config/env.sample b/config/env.sample
index 78a74ab5ea4d4b9193ce591e14e0c27c67c1b046..67b5a55173ee6293c9c42a8c612d8ef3f734038e 100644
--- a/config/env.sample
+++ b/config/env.sample
@@ -1,16 +1,10 @@
-# DeepSeek AI API (OpenAI-compatible)
-DEEPSEEK_API_KEY=your_deepseek_api_key_here
-DEEPSEEK_BASE_URL=https://api.deepseek.com
-DEEPSEEK_MODEL=deepseek-chat
-DEEPSEEK_REASONER_MODEL=deepseek-reasoner
-
 # Inference provider selection
 # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
-INFERENCE_PROVIDER=deepseek
+INFERENCE_PROVIDER=hf_inference
 INFERENCE_PRO_ENABLED=true
-INFERENCE_PRO_PROVIDER=deepseek
-INFERENCE_GPU_PROVIDER=deepseek
-INFERENCE_CPU_PROVIDER=deepseek
+INFERENCE_PRO_PROVIDER=hf_inference
+INFERENCE_GPU_PROVIDER=hf_inference
+INFERENCE_CPU_PROVIDER=hf_inference
 INFERENCE_ENABLE_PROVIDER_FALLBACK=true
 INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution
 INFERENCE_PRO_ROUTE_HEADER_NAME=
@@ -30,14 +24,15 @@ INFERENCE_LOCAL_SPACE_URL=http://127.0.0.1:7860
 INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate
 INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180
 
-# HF_TOKEN kept for Hugging Face Space deployment and dataset push only
+# hf_inference provider settings
 # Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN
 HF_TOKEN=your_hf_token
 FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026
 # Prefer one of the options below for backend Firestore/Admin access in deployment:
 # FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...}
 # FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
-# DeepSeek timeout settings
+INFERENCE_HF_BASE_URL=https://router.huggingface.co/hf-inference/models
+INFERENCE_HF_CHAT_URL=https://router.huggingface.co/v1/chat/completions
 INFERENCE_HF_TIMEOUT_SEC=90
 INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
 INFERENCE_BACKGROUND_TIMEOUT_SEC=120
@@ -69,13 +64,13 @@ APP_BRAND_AVATAR_URL=
 
 # model defaults
 # Global default model for all tasks.
-INFERENCE_MODEL_ID=deepseek-chat
+INFERENCE_MODEL_ID=Qwen/Qwen3-32B
 INFERENCE_ENFORCE_QWEN_ONLY=true
-INFERENCE_QWEN_LOCK_MODEL=deepseek-chat
+INFERENCE_QWEN_LOCK_MODEL=Qwen/Qwen3-32B
 INFERENCE_MAX_NEW_TOKENS=8192
 INFERENCE_TEMPERATURE=0.2
 INFERENCE_TOP_P=0.9
-INFERENCE_CHAT_MODEL_ID=deepseek-chat
+INFERENCE_CHAT_MODEL_ID=Qwen/Qwen3-32B
 # Temporary chat-only override for experiments (clear to roll back instantly).
 # Example: Qwen/Qwen3-32B
 INFERENCE_CHAT_MODEL_TEMP_OVERRIDE=
@@ -95,7 +90,7 @@ CHAT_STREAM_CONTINUATION_TAIL_CHARS=900
 CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto
 # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
 HF_QUIZ_MODEL_ID=
-HF_QUIZ_JSON_REPAIR_MODEL_ID=deepseek-chat
+HF_QUIZ_JSON_REPAIR_MODEL_ID=Qwen/Qwen3-32B
 
 # retry behavior
 INFERENCE_MAX_RETRIES=3
diff --git a/config/models.yaml b/config/models.yaml
index d805df704f74833b5fdaaff43d6c94bcbcbb8aee..4b4285310ffa3ca284a2dc8a1609c635e0c1267b 100644
--- a/config/models.yaml
+++ b/config/models.yaml
@@ -1,85 +1,55 @@
 models:
   primary:
-    id: deepseek-chat
-    description: Default DeepSeek chat model — all chat tasks, quizzes, lessons, reasoning
-    max_new_tokens: 800
-    temperature: 0.7
+    id: Qwen/Qwen3-32B
+    description: Global default instruction model for interactive Grade 11-12 math tutoring
+    max_new_tokens: 640
+    temperature: 0.25
     top_p: 0.9
 
-  rag_primary:
-    id: deepseek-reasoner
-    description: DeepSeek reasoner — extended reasoning for complex RAG tasks
-    max_new_tokens: 1800
-    temperature: 0.2
-    top_p: 0.9
-    enable_thinking_tasks:
-      - rag_lesson
-      - verify_solution
-      - risk_narrative
-    no_thinking_tasks:
-      - chat
-      - quiz_generation
-      - learning_path
-      - daily_insight
-
-  embedding:
-    id: BAAI/bge-small-en-v1.5
-    description: Embedding model for RAG retrieval — curriculum vectorstore ingestion and semantic search
-    note: Not part of the generation pipeline. Read from EMBEDDING_MODEL env var only. Not swappable via admin panel.
+  backup:
+    - id: meta-llama/Meta-Llama-3-70B-Instruct
+      description: High-quality model used for harder multi-step prompts
+      max_new_tokens: 768
+      temperature: 0.3
+      top_p: 0.9
+    - id: google/gemma-2-2b-it
+      description: Secondary backup with broad instruction coverage
+      max_new_tokens: 384
+      temperature: 0.2
+      top_p: 0.9
 
-  model_capabilities:
-    sequential_only:
-      - deepseek-reasoner
-    supports_thinking:
-      - deepseek-reasoner
+  experimental:
+    - id: mistralai/Mistral-7B-Instruct-v0.3
+      notes: Prompt/procedure experimentation
+    - id: meta-llama/Meta-Llama-3-8B-Instruct
+      notes: Baseline comparison against legacy deployment
 
 routing:
   task_model_map:
-    chat:                  deepseek-chat
-    verify_solution:       deepseek-reasoner
-    lesson_generation:     deepseek-chat
-    quiz_generation:       deepseek-chat
-    learning_path:         deepseek-chat
-    daily_insight:         deepseek-chat
-    risk_classification:   deepseek-chat
-    risk_narrative:        deepseek-reasoner
-    rag_lesson:            deepseek-reasoner
-    rag_problem:           deepseek-chat
-    rag_analysis_context:  deepseek-chat
+    # Keep all task defaults aligned to Qwen3-32B.
+    # Hard prompts can still escalate via runtime policy in inference_client.
+    chat: Qwen/Qwen3-32B
+    verify_solution: Qwen/Qwen3-32B
+    lesson_generation: Qwen/Qwen3-32B
+    quiz_generation: Qwen/Qwen3-32B
+    learning_path: Qwen/Qwen3-32B
+    daily_insight: Qwen/Qwen3-32B
+    risk_classification: Qwen/Qwen3-32B
+    risk_narrative: Qwen/Qwen3-32B
 
   task_fallback_model_map:
-    chat:
-      - deepseek-chat
+    chat: []                                       # Chat is strict-primary only (no fallback chain)
     verify_solution:
-      - deepseek-chat
-    lesson_generation:
-      - deepseek-chat
-    quiz_generation:
-      - deepseek-chat
-    learning_path:
-      - deepseek-chat
-    daily_insight:
-      - deepseek-chat
-    risk_classification:
-      - deepseek-chat
-    risk_narrative:
-      - deepseek-chat
-    rag_lesson:
-      - deepseek-chat
-    rag_problem:
-      - deepseek-chat
-    rag_analysis_context:
-      - deepseek-chat
+      - meta-llama/Meta-Llama-3-70B-Instruct      # Higher-capacity fallback
+      - meta-llama/Llama-3.1-8B-Instruct          # Second fallback
 
   task_provider_map:
-    chat:                  deepseek
-    verify_solution:       deepseek
-    lesson_generation:     deepseek
-    quiz_generation:       deepseek
-    learning_path:         deepseek
-    daily_insight:         deepseek
-    risk_classification:   deepseek
-    risk_narrative:        deepseek
-    rag_lesson:            deepseek
-    rag_problem:           deepseek
-    rag_analysis_context:  deepseek
\ No newline at end of file
+    # All tasks use hf_inference router (Qwen/Qwen3-32B natively supported)
+    chat: hf_inference
+    verify_solution: hf_inference
+    lesson_generation: hf_inference
+    quiz_generation: hf_inference
+    learning_path: hf_inference
+    daily_insight: hf_inference
+    risk_narrative: hf_inference
+    risk_classification: hf_inference
diff --git a/datasets/sample_curriculum.json b/datasets/sample_curriculum.json
deleted file mode 100644
index c65cb7f951779b3b61eeab55c1c1848941239e4a..0000000000000000000000000000000000000000
--- a/datasets/sample_curriculum.json
+++ /dev/null
@@ -1,137 +0,0 @@
-[
-  {
-    "content": "The learner demonstrates understanding of key concepts of functions. Functions can be represented as ordered pairs, tables of values, graphs, and equations. A function is a relation where each element in the domain corresponds to exactly one element in the range. Key types include linear functions (f(x)=mx+b), quadratic functions (f(x)=ax^2+bx+c), and polynomial functions of higher degrees.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Functions and Their Graphs",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 1
-  },
-  {
-    "content": "Learning Competency (M11GM-Ia-1): Represents real-life situations using functions, including piece-wise functions. Example: A taxi fare is computed as P40 for the first 500 meters plus P3.50 for every additional 300 meters or fraction thereof. This is a piecewise function where f(d)=40 for d<=500 and f(d)=40+3.5*ceil((d-500)/300) for d>500.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Functions and Their Graphs",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 1
-  },
-  {
-    "content": "Learning Competency (M11GM-Ia-2): Evaluates a function. To evaluate f(x) at x=a, substitute a for every occurrence of x in the expression and simplify. Example: Given f(x)=2x^2-3x+5, evaluate f(2): f(2)=2(4)-3(2)+5=8-6+5=7.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Functions and Their Graphs",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 2
-  },
-  {
-    "content": "Rational Functions have the form f(x)=P(x)/Q(x) where P(x) and Q(x) are polynomials and Q(x)!=0. Key features: vertical asymptotes occur where Q(x)=0 but P(x)!=0; horizontal asymptotes depend on the degrees of P and Q. The domain of f(x) excludes all x-values that make the denominator zero. Solving rational equations and inequalities requires careful handling of the denominator signs.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Rational Functions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 3
-  },
-  {
-    "content": "Learning Competency (M11GM-Ib-3): Solves problems involving rational functions, rational equations, and rational inequalities. Example: A jeepney operator's average revenue per trip is modeled by R(n)=(5000+300n)/n where n is the number of trips per day. Find how many trips are needed for average revenue to reach P450.",
-    "subject": "General Mathematics",
-    "quarter": 1,
-    "content_domain": "Rational Functions",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 3
-  },
-  {
-    "content": "Exponential Functions f(x)=a*b^x (a!=0, b>0, b!=1) model growth and decay. Key properties: domain is all real numbers; range is (0,infinity) for a>0; horizontal asymptote at y=0; y-intercept at (0,a). Solving exponential equations involves expressing both sides with the same base and equating exponents. Philippine applications include bacterial growth and radioactive decay in medical contexts.",
-    "subject": "General Mathematics",
-    "quarter": 2,
-    "content_domain": "Exponential Functions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 4
-  },
-  {
-    "content": "Compound Interest is calculated using A=P(1+r/n)^(nt) where A is the final amount, P is the principal, r is the annual interest rate (decimal), n is the number of compounding periods per year, and t is the time in years. Philippine banks offer savings and loan products with various compounding frequencies: annually (n=1), semi-annually (n=2), quarterly (n=4), monthly (n=12).",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 5
-  },
-  {
-    "content": "Learning Competency (M11GM-IIc-1): Illustrates simple and compound interests. Simple interest I=Prt where P is principal, r is rate, t is time. Compound interest uses compounding formula. Example: Juana deposits P50,000 in a bank offering 3.5% interest compounded quarterly. After 3 years, her balance will be A=50000(1+0.035/4)^(4*3)=55543.19 using the compound interest formula.",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 5
-  },
-  {
-    "content": "Annuities are sequences of equal payments made at equal time intervals. The future value of an ordinary annuity (payment at end of period) is FV=PMT*[(1+r)^n-1]/r and present value is PV=PMT*[1-(1+r)^(-n)]/r. Applications include Pag-IBIG housing loans, SSS contributions, and insurance premiums. Philippine context problems often involve 15-year and 25-year housing loans.",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 6
-  },
-  {
-    "content": "Stocks and Bonds represent two types of investments. Stocks represent ownership shares in a corporation with dividends as earnings — prices are quoted per share in the Philippine Stock Exchange (PSE). Bonds are debt instruments where the issuing entity borrows money and pays periodic interest then repays principal at maturity. Key computations: stock yield = annual dividend per share / market price; bond yield = annual interest payment / market price.",
-    "subject": "General Mathematics",
-    "quarter": 3,
-    "content_domain": "Business Mathematics",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 6
-  },
-  {
-    "content": "A Random Variable is a function that assigns a real number to each outcome in the sample space of a random experiment. A Discrete Random Variable has a countable number of possible values. The probability mass function (PMF) gives the probability P(X=x) for each value x. Key properties: sum of all P(X=x)=1 and P(X=x)>=0 for all x. Common discrete distributions include Binomial for success/failure and Poisson for rare events.",
-    "subject": "Statistics and Probability",
-    "quarter": 1,
-    "content_domain": "Random Variables and Probability Distributions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 7
-  },
-  {
-    "content": "Learning Competency (M11/12SP-IIIa-1): Illustrates a random variable (discrete and continuous). A discrete random variable takes countable values like the number of defective items in a batch of 50 bulbs. A continuous random variable takes infinite uncountable values in an interval, such as the height of Grade 11 students in centimeters. The learner distinguishes between discrete and continuous random variables for real Philippine data.",
-    "subject": "Statistics and Probability",
-    "quarter": 1,
-    "content_domain": "Random Variables and Probability Distributions",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 7
-  },
-  {
-    "content": "The Normal Distribution (Gaussian) is a continuous probability distribution with a bell-shaped curve symmetric about the mean mu. Standard normal distribution has mu=0 and sigma=1; converting to standard normal z=(x-mu)/sigma allows probability calculation using z-tables. Properties: 68% of data within 1 sigma of mu, 95% within 2 sigma, 99.7% within 3 sigma. Philippine applications include standardized test scores (NAT, college entrance exams) and quality control in manufacturing.",
-    "subject": "Statistics and Probability",
-    "quarter": 1,
-    "content_domain": "Random Variables and Probability Distributions",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 8
-  },
-  {
-    "content": "Conic Sections are curves formed by the intersection of a plane and a double-napped cone. The four types are: Circle (all points equidistant from a center), Parabola (all points equidistant from a focus and directrix), Ellipse (sum of distances to two foci is constant), and Hyperbola (absolute difference of distances to two foci is constant). Standard forms: Circle (x-h)^2+(y-k)^2=r^2; Parabola (x-h)^2=4p(y-k) or (y-k)^2=4p(x-h).",
-    "subject": "Pre-Calculus",
-    "quarter": 1,
-    "content_domain": "Analytic Geometry",
-    "chunk_type": "content_explanation",
-    "source_file": "sample_curriculum.json",
-    "page": 9
-  },
-  {
-    "content": "Learning Competency (STEM_PC11AG-Ia-1): Illustrates the different types of conic sections: circle, parabola, ellipse, and hyperbola. The learner identifies conic sections from their standard equations and determines their key properties including center, radius (for circles), vertex, focus, directrix (for parabolas), and asymptotes (for hyperbolas). Real-world applications include satellite dishes, telescope mirrors, and bridge arch designs.",
-    "subject": "Pre-Calculus",
-    "quarter": 1,
-    "content_domain": "Analytic Geometry",
-    "chunk_type": "learning_competency",
-    "source_file": "sample_curriculum.json",
-    "page": 9
-  }
-]
\ No newline at end of file
diff --git a/main.py b/main.py
index 558d0f55f4f61864f361d7ab5792ea6433ad0cec..93dd59e78a671336eccc4994858f2dd2401d3568 100644
--- a/main.py
+++ b/main.py
@@ -27,7 +27,7 @@ import random
 import secrets
 import string
 from contextlib import asynccontextmanager
-from typing import List, Optional, Dict, Any, Set, Tuple, Iterator, AsyncIterator, Sequence, cast, cast
+from typing import List, Optional, Dict, Any, Set, Tuple, Iterator, AsyncIterator, Sequence, cast
 from collections import Counter, defaultdict
 from threading import Lock
 
@@ -64,10 +64,7 @@ import subprocess
 import requests as http_requests
 import httpx
 import uvicorn
-from services.inference_client import (
-    InferenceRequest, create_default_client,
-    get_model_for_task, get_current_runtime_config,
-)
+from services.inference_client import InferenceRequest, create_default_client
 from services.deterministic_cache import DeterministicResponseCache
 from services.logging_utils import log_model_call
 from services.email_service import create_email_service_from_env, EmailMessagePayload
@@ -78,26 +75,10 @@ from services.user_provisioning_service import (
     UserProvisioningService,
 )
 from routes.rag_routes import router as rag_router
-from routes.admin_model_routes import router as admin_model_router
-from routes.admin_routes import router as admin_pdf_router
-from routes.curriculum_routes import router as curriculum_router
-from routes.diagnostic import router as diagnostic_router
-from routes.video_routes import router as video_router
-from routes.quiz_battle import router as quiz_battle_router
-
-# Rate limiting (slowapi)
-try:
-    from middleware.rate_limiter import setup_rate_limiting
-    HAS_RATE_LIMITING = True
-except ImportError:
-    HAS_RATE_LIMITING = False
-    setup_rate_limiting = None
-
 from rag.curriculum_rag import (
     build_analysis_curriculum_context,
     build_lesson_prompt,
     build_lesson_query,
-    format_retrieved_chunks,
     retrieve_curriculum_context,
     summarize_retrieval_confidence,
 )
@@ -116,12 +97,10 @@ except Exception:
 try:
     from google.oauth2 import id_token as google_id_token  # type: ignore[import-not-found]
     from google.auth.transport import requests as google_auth_requests  # type: ignore[import-not-found]
-    from google.cloud.firestore import DELETE_FIELD  # type: ignore[import-not-found]
     HAS_GOOGLE_AUTH = True
 except Exception:
     google_id_token = None  # type: ignore[assignment]
     google_auth_requests = None  # type: ignore[assignment]
-    DELETE_FIELD = None  # type: ignore[assignment]
     HAS_GOOGLE_AUTH = False
 
 # Event-driven automation engine
@@ -149,6 +128,7 @@ from analytics import (
     CalibrateDifficultyRequest,
     CalibrateDifficultyResponse,
     AdaptiveQuizRequest as AdaptiveQuizSelectRequest,
+    AdaptiveQuizResponse,
     StudentSummaryResponse,
     ClassInsightsRequest,
     ClassInsightsResponse,
@@ -192,32 +172,26 @@ def get_inference_client():
         with _inference_client_lock:
             if _inference_client is None:
                 logger.info("🔧 Initializing InferenceClient...")
-                firestore_client = None
-                if HAS_FIREBASE_ADMIN and _firebase_ready:
-                    try:
-                        firestore_client = firebase_firestore.client()
-                    except Exception:
-                        pass
-                _inference_client = create_default_client(firestore_client=firestore_client)
+                _inference_client = create_default_client()
                 logger.info("✅ InferenceClient initialized")
     return _inference_client
 
 HF_TOKEN = os.environ.get(
     "HF_TOKEN",
     os.environ.get("HUGGING_FACE_API_TOKEN", os.environ.get("HUGGINGFACE_API_TOKEN", "")),
-)  # Kept for HF Space deployment / dataset push only; AI inference uses DEEPSEEK_API_KEY
+)
 
 # Grade 11-12 tutoring default model. Can be overridden via INFERENCE_MODEL_ID or INFERENCE_CHAT_MODEL_ID.
-HF_MATH_MODEL_ID = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or os.getenv("HF_MATH_MODEL_ID", "deepseek-chat")
+HF_MATH_MODEL_ID = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or os.getenv("HF_MATH_MODEL_ID", "Qwen/Qwen3-32B")
 
 # Alias kept so automation_engine.py (which imports CHAT_MODEL) keeps working.
 CHAT_MODEL = HF_MATH_MODEL_ID
 
 # Dedicated quiz model override. When empty, routing.task_model_map decides quiz model.
 HF_QUIZ_MODEL_ID = (os.getenv("HF_QUIZ_MODEL_ID", "").strip() or None)
-HF_QUIZ_JSON_REPAIR_MODEL_ID = os.getenv("HF_QUIZ_JSON_REPAIR_MODEL_ID", "deepseek-chat")
+HF_QUIZ_JSON_REPAIR_MODEL_ID = os.getenv("HF_QUIZ_JSON_REPAIR_MODEL_ID", "Qwen/Qwen3-32B")
 
-RISK_MODEL = CHAT_MODEL
+RISK_MODEL = "facebook/bart-large-mnli"
 VERIFICATION_SAMPLES = 3  # Number of samples for self-consistency checking
 ENABLE_DEV_ENDPOINTS = os.getenv("ENABLE_DEV_ENDPOINTS", "false").strip().lower() in {"1", "true", "yes", "on"}
 UPLOAD_MAX_BYTES = int(os.getenv("UPLOAD_MAX_BYTES", str(5 * 1024 * 1024)))
@@ -339,12 +313,12 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
     "/api/upload/course-materials": TEACHER_OR_ADMIN,
     "/api/upload/course-materials/recent": TEACHER_OR_ADMIN,
     "/api/course-materials/topics": TEACHER_OR_ADMIN,
-    "/api/quiz/generate": ALL_APP_ROLES,
-    "/api/quiz/generate-async": ALL_APP_ROLES,
-    "/api/quiz/preview": ALL_APP_ROLES,
+    "/api/quiz/generate": TEACHER_OR_ADMIN,
+    "/api/quiz/generate-async": TEACHER_OR_ADMIN,
+    "/api/quiz/preview": TEACHER_OR_ADMIN,
     "/api/lesson/generate": TEACHER_OR_ADMIN,
     "/api/lesson/generate-async": TEACHER_OR_ADMIN,
-    "/api/rag/lesson": ALL_APP_ROLES,
+    "/api/rag/lesson": TEACHER_OR_ADMIN,
     "/api/rag/generate-problem": TEACHER_OR_ADMIN,
     "/api/rag/analysis-context": TEACHER_OR_ADMIN,
     "/api/feedback/import-grounded": TEACHER_OR_ADMIN,
@@ -352,8 +326,6 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
     "/api/import-grounded/access-audit": TEACHER_OR_ADMIN,
     "/api/quiz/student-competency": TEACHER_OR_ADMIN,
     "/api/calculator/evaluate": ALL_APP_ROLES,
-    "/api/diagnostic/generate": ALL_APP_ROLES,
-    "/api/diagnostic/submit": ALL_APP_ROLES,
     "/api/student/competency-analysis": TEACHER_OR_ADMIN,
     "/api/risk/train-model": ADMIN_ONLY,
     "/api/predict-risk/enhanced": TEACHER_OR_ADMIN,
@@ -365,7 +337,6 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
     "/api/analytics/refresh-cache": ADMIN_ONLY,
     "/api/testing/reset-data": ALL_APP_ROLES,
     "/api/ops/inference-metrics": ADMIN_ONLY,
-    "/api/hf/monitoring": ADMIN_ONLY,
     "/api/dev/generate-mock-data": ADMIN_ONLY,
     "/api/analytics/config": TEACHER_OR_ADMIN,
     "/api/analytics/imported-class-overview": TEACHER_OR_ADMIN,
@@ -375,23 +346,12 @@ ROLE_POLICIES: Dict[str, Set[str]] = {
     "/api/automation/student-enrolled": ADMIN_ONLY,
     "/api/automation/data-imported": ADMIN_ONLY,
     "/api/automation/content-updated": ADMIN_ONLY,
-    "/api/admin/model-config": ADMIN_ONLY,
-    "/api/admin/upload-pdf": ADMIN_ONLY,
-    "/api/admin/reingest-pdf": ADMIN_ONLY,
-    "/api/admin/model-config/profile": ADMIN_ONLY,
-    "/api/admin/model-config/override": ADMIN_ONLY,
-    "/api/admin/model-config/reset": ADMIN_ONLY,
-    "/api/lessons/videos/search": ALL_APP_ROLES,
-    "/api/lesson/personalized": ALL_APP_ROLES,
-    "/api/quiz-battle/generate": ALL_APP_ROLES,
-    "/api/quiz-battle/ingest-pdf": TEACHER_OR_ADMIN,
-    "/api/quiz-battle/bank-status": TEACHER_OR_ADMIN,
 }
 
-if not os.getenv("DEEPSEEK_API_KEY"):
+if not HF_TOKEN:
     logger.warning(
-        "DEEPSEEK_API_KEY is not set. AI features will fail. "
-        "Set the DEEPSEEK_API_KEY environment variable."
+        "HF_TOKEN is not set. AI features will fail. "
+        "On HF Spaces this is injected automatically as a secret."
     )
 
 deterministic_response_cache = DeterministicResponseCache(
@@ -424,30 +384,8 @@ async def app_lifespan(_app: FastAPI) -> AsyncIterator[None]:
     except Exception as e:
         logger.warning(f"⚠️ Failed to pre-initialize InferenceClient: {e}")
 
-    active_model = os.getenv("HF_MODEL_ID", "deepseek-chat")
-    try:
-        from rag.vectorstore_loader import get_vectorstore_health
-        health = get_vectorstore_health()
-        logger.info(
-            "RAG vectorstore ready: %d chunks | subjects: %s | model: %s",
-            health["chunkCount"],
-            list(health["subjects"].keys()),
-            active_model,
-        )
-        if health["chunkCount"] == 0:
-            logger.warning(
-                "RAG vectorstore is EMPTY. Run: python backend/scripts/ingest_curriculum.py"
-            )
-        if "235B" in active_model:
-            logger.info(
-                "Production model active: %s — sequential inference only (--max-num-seqs 1)",
-                active_model,
-            )
-    except Exception as exc:
-        logger.error("RAG vectorstore warm-up failed: %s", exc)
-
     logger.info(f"✅ MathPulse AI backend ready at http://0.0.0.0:7860")
-    logger.info(f"   - INFERENCE_PROVIDER: {os.getenv('INFERENCE_PROVIDER', 'deepseek')}")
+    logger.info(f"   - INFERENCE_PROVIDER: {os.getenv('INFERENCE_PROVIDER', 'hf_inference')}")
     logger.info(f"   - INFERENCE_MODEL_ID: {os.getenv('INFERENCE_MODEL_ID', HF_MATH_MODEL_ID)}")
     logger.info(f"   - INFERENCE_CHAT_MODEL_ID: {os.getenv('INFERENCE_CHAT_MODEL_ID', HF_MATH_MODEL_ID)}")
     logger.info(
@@ -455,10 +393,14 @@ async def app_lifespan(_app: FastAPI) -> AsyncIterator[None]:
         f"{os.getenv('INFERENCE_CHAT_STRICT_MODEL_ONLY', 'true')}"
     )
     logger.info(
-        f"   - INFERENCE_ENFORCE_LOCK_MODEL: "
-        f"{os.getenv('INFERENCE_ENFORCE_LOCK_MODEL', 'true')}"
+        f"   - INFERENCE_CHAT_HARD_TRIGGER_ENABLED: "
+        f"{os.getenv('INFERENCE_CHAT_HARD_TRIGGER_ENABLED', 'false')}"
     )
-    logger.info(f"   - DEEPSEEK_API_KEY set: {'yes' if os.getenv('DEEPSEEK_API_KEY') else 'no'}")
+    logger.info(
+        f"   - INFERENCE_ENFORCE_QWEN_ONLY: "
+        f"{os.getenv('INFERENCE_ENFORCE_QWEN_ONLY', 'true')}"
+    )
+    logger.info(f"   - HF_TOKEN set: {'yes' if HF_TOKEN else 'no'}")
 
     try:
         yield
@@ -533,24 +475,6 @@ def _init_firebase_admin() -> None:
         logger.warning("firebase-admin is not available; protected API endpoints will reject requests.")
         return
 
-    # Helper: load Firebase service account JSON from env var OR HF Spaces secret file
-    def _load_firebase_sa_json() -> Optional[str]:
-        # 1. Environment variable (standard deployment)
-        if FIREBASE_SERVICE_ACCOUNT_JSON:
-            return FIREBASE_SERVICE_ACCOUNT_JSON
-        # 2. HF Spaces secret mount path (secrets mounted as files at /secret/)
-        hf_space_secret_path = "/secret/FIREBASE_SERVICE_ACCOUNT_JSON"
-        if os.path.exists(hf_space_secret_path):
-            try:
-                with open(hf_space_secret_path, "r") as f:
-                    content = f.read().strip()
-                if content:
-                    logger.info(f"Loaded FIREBASE_SERVICE_ACCOUNT_JSON from HF Spaces secret file: {hf_space_secret_path}")
-                    return content
-            except Exception as e:
-                logger.warning(f"Failed to read HF Spaces secret file {hf_space_secret_path}: {e}")
-        return None
-
     try:
         if not firebase_admin._apps:  # type: ignore[attr-defined]
             init_options: Dict[str, Any] = {}
@@ -558,17 +482,12 @@ def _init_firebase_admin() -> None:
             if FIREBASE_AUTH_PROJECT_ID:
                 init_options["projectId"] = FIREBASE_AUTH_PROJECT_ID
 
-            sa_json = _load_firebase_sa_json()
-            if sa_json:
-                service_account_payload = json.loads(sa_json)
+            if FIREBASE_SERVICE_ACCOUNT_JSON:
+                service_account_payload = json.loads(FIREBASE_SERVICE_ACCOUNT_JSON)
                 credentials_obj = cast(Any, firebase_admin).credentials.Certificate(service_account_payload)
-                logger.info("Firebase credentials loaded from FIREBASE_SERVICE_ACCOUNT_JSON")
             elif FIREBASE_SERVICE_ACCOUNT_FILE:
                 credentials_obj = cast(Any, firebase_admin).credentials.Certificate(FIREBASE_SERVICE_ACCOUNT_FILE)
-                logger.info("Firebase credentials loaded from FIREBASE_SERVICE_ACCOUNT_FILE")
 
-            # Only initialize if we have credentials or at minimum a project ID
-            # Without credentials, Firebase init succeeds but ALL Firestore calls will fail
             if credentials_obj and init_options:
                 firebase_admin.initialize_app(credentials_obj, options=init_options)  # type: ignore[union-attr]
             elif credentials_obj:
@@ -576,15 +495,7 @@ def _init_firebase_admin() -> None:
             elif init_options:
                 firebase_admin.initialize_app(options=init_options)  # type: ignore[union-attr]
             else:
-                # No credentials AND no project ID — Firebase will NOT be usable
-                logger.error(
-                    "Firebase Admin SDK could not initialize: no credentials found. "
-                    "Set FIREBASE_SERVICE_ACCOUNT_JSON env var, FIREBASE_SERVICE_ACCOUNT_FILE path, "
-                    "or ensure HF Spaces secret is mounted at /secret/FIREBASE_SERVICE_ACCOUNT_JSON. "
-                    "Firestore operations will fail."
-                )
-                return
-
+                firebase_admin.initialize_app()  # type: ignore[union-attr]
         _firebase_ready = True
         if FIREBASE_AUTH_PROJECT_ID:
             logger.info(f"Firebase Admin SDK initialized for API auth verification (projectId={FIREBASE_AUTH_PROJECT_ID})")
@@ -786,11 +697,19 @@ def require_student_self_or_staff(request: Request, student_id: str) -> Authenti
 
 
 def enforce_rate_limit(request: Request, bucket_name: str, limit: int, window_seconds: int) -> None:
-    """DEPRECATED: Rate limiting is now handled by slowapi middleware.
-    This function is kept for backwards compatibility but does nothing.
-    The slowapi decorators handle all rate limiting per endpoint group.
-    """
-    pass
+    user = getattr(request.state, "user", None)
+    actor_id = user.uid if user else ((request.client.host if request.client else "unknown"))
+    key = f"{bucket_name}:{actor_id}"
+    now = time.time()
+    start = now - window_seconds
+    hits = [ts for ts in _rate_limit_buckets.get(key, []) if ts >= start]
+    if len(hits) >= limit:
+        raise HTTPException(
+            status_code=429,
+            detail=f"Rate limit exceeded for {bucket_name}. Try again later.",
+        )
+    hits.append(now)
+    _rate_limit_buckets[key] = hits
 
 
 def _utc_now_iso() -> str:
@@ -1046,8 +965,6 @@ class RequestMiddleware(BaseHTTPMiddleware):
                 status_code=500,
                 content={
                     "detail": "Internal server error",
-                    "error": type(exc).__name__,
-                    "message": str(exc),
                     "requestId": request_id,
                 },
                 headers={"X-Request-ID": request_id},
@@ -1056,18 +973,7 @@ class RequestMiddleware(BaseHTTPMiddleware):
 
 app.add_middleware(RequestMiddleware)
 app.add_middleware(AuthMiddleware)
-
-# Set up rate limiting with slowapi
-if HAS_RATE_LIMITING and setup_rate_limiting:  # type: ignore[truthy-function]
-    setup_rate_limiting(app)  # type: ignore[truthy-function]
-
 app.include_router(rag_router)
-app.include_router(admin_model_router)
-app.include_router(admin_pdf_router)
-app.include_router(curriculum_router)
-app.include_router(diagnostic_router)
-app.include_router(video_router)
-app.include_router(quiz_battle_router)
 
 
 # ─── Global Exception Handler ─────────────────────────────────
@@ -1109,29 +1015,41 @@ app.add_middleware(
     allow_headers=["*"],
 )
 
-# ─── DeepSeek AI Clients ──────────────────────────────────────
-
-# Zero-shot classification replaced with DeepSeek chat-based classification.
-# BART risk model replaced with deepseek-chat structured output.
+# ─── Hugging Face Clients ─────────────────────────────────────
 
-from services.ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL, APIError, RateLimitError, APITimeoutError
+# InferenceClient is kept only for zero-shot classification (BART).
+from huggingface_hub import InferenceClient
 
-_zsc_client_initialized = False
+_zsc_client: Optional[InferenceClient] = None
 
 
-def _ensure_deepseek_available() -> None:
-    """Verify DeepSeek API key is configured."""
-    global _zsc_client_initialized
-    if not _zsc_client_initialized:
-        try:
-            get_deepseek_client()
-            logger.info("DeepSeek client initialized (for all AI tasks)")
-            _zsc_client_initialized = True
-        except ValueError:
+def get_client() -> InferenceClient:
+    """Get or initialize the HuggingFace InferenceClient (used for zero-shot classification only)."""
+    global _zsc_client
+    if _zsc_client is None:
+        if not HF_TOKEN:
             raise HTTPException(
                 status_code=500,
-                detail="DEEPSEEK_API_KEY not configured. Set the DEEPSEEK_API_KEY environment variable.",
+                detail="HF_TOKEN not configured. Set the HF_TOKEN environment variable.",
             )
+        for attempt in range(3):
+            try:
+                _zsc_client = InferenceClient(
+                    token=HF_TOKEN,
+                    timeout=60,
+                )
+                logger.info("HF InferenceClient initialized (for zero-shot classification)")
+                break
+            except Exception as e:
+                logger.warning(f"HF client init attempt {attempt + 1} failed: {e}")
+                if attempt == 2:
+                    raise HTTPException(
+                        status_code=503,
+                        detail="Failed to initialize AI model client after 3 attempts.",
+                    )
+                time.sleep(2 ** attempt)
+    assert _zsc_client is not None
+    return _zsc_client
 
 
 # ─── HF Serverless Chat Helper (requests-based) ───────────────
@@ -1165,7 +1083,7 @@ def _strip_repetition(text: str, min_chunk: int = 40) -> str:
 
 
 def _build_hf_inference_url(model_id: str) -> str:
-    return f"https://api.deepseek.com"
+    return f"https://router.huggingface.co/hf-inference/models/{model_id}"
 
 
 def _messages_to_inference_prompt(messages: List[Dict[str, str]]) -> str:
@@ -1223,7 +1141,7 @@ def call_hf_chat_stream(
     task_type: str = "chat",
     timeout: Optional[int] = None,
 ) -> Iterator[str]:
-    """Stream chat deltas from DeepSeek API as text chunks."""
+    """Stream chat deltas from HF router as text chunks."""
     client = get_inference_client()
     effective_task = (task_type or "chat").strip().lower()
 
@@ -1239,54 +1157,104 @@ def call_hf_chat_stream(
     selected_model, _ = client._resolve_primary_model(selection_req)
 
     model_chain = client._model_chain_for_task(effective_task, selected_model)
+    provider_chain = client._provider_chain_for_task(effective_task)
     timeout_sec = timeout or client.interactive_timeout_sec
     last_error: Optional[Exception] = None
 
-    ds_client = get_deepseek_client()
-
     for fallback_depth, model_name in enumerate(model_chain):
-        start = time.perf_counter()
-        try:
-            stream = ds_client.chat.completions.create(
-                model=model_name,
-                messages=messages,  # type: ignore[arg-type]
-                stream=True,
-                max_tokens=max_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                timeout=timeout_sec,
-            )
+        for provider in provider_chain:
+            if provider == "local_space":
+                last_error = RuntimeError("Streaming is not supported for local_space provider")
+                continue
+
+            route = client._resolve_route_label(provider, effective_task)
+            stream_model = model_name if ":" in model_name else f"{model_name}:fastest"
+            headers = {
+                "Authorization": f"Bearer {client.hf_token}",
+                "Content-Type": "application/json",
+                "X-MathPulse-Task": effective_task,
+            }
+            if route == "pro-priority" and client.pro_route_header_name.strip():
+                headers[client.pro_route_header_name.strip()] = client.pro_route_header_value
+
+            payload: Dict[str, object] = {
+                "model": stream_model,
+                "messages": messages,
+                "stream": True,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "top_p": top_p,
+            }
+
+            start = time.perf_counter()
+            try:
+                with http_requests.post(
+                    client.hf_chat_url,
+                    headers=headers,
+                    json=payload,
+                    timeout=timeout_sec,
+                    stream=True,
+                ) as response:
+                    if response.status_code != 200:
+                        raise RuntimeError(f"HF stream error {response.status_code}: {response.text}")
+
+                    emitted_any = False
+                    for raw_line in response.iter_lines(decode_unicode=True):
+                        if not raw_line:
+                            continue
+                        line = raw_line.strip()
+                        if not line.startswith("data:"):
+                            continue
+
+                        data_raw = line.split("data:", 1)[1].strip()
+                        if data_raw == "[DONE]":
+                            if emitted_any:
+                                latency_ms = (time.perf_counter() - start) * 1000
+                                logger.info(
+                                    "✅ HF stream success: task=%s model=%s latency=%sms",
+                                    effective_task,
+                                    model_name,
+                                    round(latency_ms, 0),
+                                )
+                                return
+                            continue
+
+                        try:
+                            payload_obj = json.loads(data_raw)
+                        except json.JSONDecodeError:
+                            continue
+
+                        choices = payload_obj.get("choices") or []
+                        if not choices:
+                            continue
+                        first = choices[0] if isinstance(choices[0], dict) else {}
+                        delta = first.get("delta") or {}
+                        chunk = delta.get("content")
+                        if not chunk:
+                            msg = first.get("message") or {}
+                            chunk = msg.get("content")
+                        if not chunk:
+                            continue
 
-            emitted_any = False
-            for chunk in stream:
-                for choice in chunk.choices:  # type: ignore[union-attr]
-                    delta = getattr(choice, 'delta', None)
-                    if delta and delta.content:
                         emitted_any = True
-                        yield delta.content
+                        yield str(chunk)
+
+                    if emitted_any:
+                        return
+                    raise RuntimeError("HF stream ended without content")
 
-            if emitted_any:
-                latency_ms = (time.perf_counter() - start) * 1000
-                logger.info(
-                    "✅ DeepSeek stream success: task=%s model=%s latency=%sms",
+            except Exception as exc:
+                last_error = exc
+                logger.warning(
+                    "⚠️ Stream attempt failed: task=%s provider=%s model=%s depth=%s error=%s",
                     effective_task,
+                    provider,
                     model_name,
-                    round(latency_ms, 0),
+                    fallback_depth,
+                    str(exc)[:180],
                 )
-                return
-            raise RuntimeError("Stream ended without content")
-
-        except Exception as exc:
-            last_error = exc
-            logger.warning(
-                "⚠️ Stream attempt failed: task=%s model=%s depth=%s error=%s",
-                effective_task,
-                model_name,
-                fallback_depth,
-                str(exc)[:180],
-            )
 
-    raise last_error or RuntimeError("Streaming failed with empty model chain")
+    raise last_error or RuntimeError("Streaming failed with empty model/provider chain")
 
 
 HF_BLOCKING_CALL_CONCURRENCY = max(1, int(os.getenv("HF_BLOCKING_CALL_CONCURRENCY", "16")))
@@ -1379,18 +1347,196 @@ async def call_hf_chat_async(
     task_type: str = "default",
     timeout: Optional[int] = None,
 ) -> str:
-    """Async wrapper for DeepSeek chat completions."""
-    return await _run_hf_blocking(
-        call_hf_chat,
-        messages,
-        max_tokens=max_tokens,
+    if os.getenv("PYTEST_CURRENT_TEST"):
+        return await _run_hf_blocking(
+            call_hf_chat,
+            messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            model=model,
+            task_type=task_type,
+            timeout=timeout,
+        )
+
+    client = get_inference_client()
+    effective_task = (task_type or "default").strip().lower()
+    request_tag = f"{effective_task}-async-{int(time.time() * 1000)}"
+
+    selection_req = InferenceRequest(
+        messages=messages,
+        model=model,
+        task_type=task_type,
+        request_tag=request_tag,
+        max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
         repetition_penalty=repetition_penalty,
-        model=model,
-        task_type=task_type,
-        timeout=timeout,
+        timeout_sec=timeout,
     )
+    selected_model, _ = client._resolve_primary_model(selection_req)
+    model_chain = client._model_chain_for_task(effective_task, selected_model)
+    provider_chain = client._provider_chain_for_task(effective_task)
+    last_error: Optional[Exception] = None
+    retryable_status = {408, 429, 500, 502, 503, 504}
+
+    for fallback_depth, model_name in enumerate(model_chain):
+        request_for_model = InferenceRequest(
+            messages=messages,
+            model=model_name,
+            task_type=task_type,
+            request_tag=request_tag,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            timeout_sec=timeout,
+        )
+        for provider in provider_chain:
+            route = client._resolve_route_label(provider, effective_task)
+            if provider == "local_space":
+                try:
+                    text = await _run_hf_blocking(
+                        client._generate_with_provider,
+                        request_for_model,
+                        provider,
+                        fallback_depth,
+                    )
+                    return _strip_repetition(text)
+                except Exception as exc:
+                    last_error = exc
+                    logger.warning(
+                        "⚠️ Async local fallback failed: task=%s model=%s depth=%s error=%s",
+                        effective_task,
+                        model_name,
+                        fallback_depth,
+                        str(exc)[:180],
+                    )
+                    continue
+
+            stream_model = model_name if ":" in model_name else f"{model_name}:fastest"
+            timeout_sec = client._timeout_for(request_for_model, provider)
+            max_retries, backoff_sec = client._retry_profile(effective_task)
+            headers = {
+                "Authorization": f"Bearer {client.hf_token}",
+                "Content-Type": "application/json",
+                "X-MathPulse-Task": effective_task,
+            }
+            if route == "pro-priority" and client.pro_route_header_name.strip():
+                headers[client.pro_route_header_name.strip()] = client.pro_route_header_value
+
+            payload: Dict[str, object] = {
+                "model": stream_model,
+                "messages": messages,
+                "stream": False,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "top_p": top_p,
+            }
+
+            async_client = await _get_hf_async_http_client()
+            for attempt in range(1, max_retries + 1):
+                start = time.perf_counter()
+                client._record_attempt(
+                    task_type=effective_task,
+                    provider=provider,
+                    route=route,
+                    fallback_depth=fallback_depth,
+                )
+                try:
+                    response = await async_client.post(
+                        client.hf_chat_url,
+                        headers=headers,
+                        json=payload,
+                        timeout=_resolve_async_hf_timeout(timeout_sec),
+                    )
+                    latency_ms = (time.perf_counter() - start) * 1000
+                    client._bump_bucket("status_code_counts", str(response.status_code), 1)
+
+                    if response.status_code in retryable_status and attempt < max_retries:
+                        log_model_call(
+                            logger,
+                            provider=provider,
+                            model=model_name,
+                            endpoint=client.hf_chat_url,
+                            latency_ms=latency_ms,
+                            input_tokens=None,
+                            output_tokens=None,
+                            status="error",
+                            error_class="HTTPRetry",
+                            error_message=f"status={response.status_code}",
+                            task_type=effective_task,
+                            request_tag=request_tag,
+                            retry_attempt=attempt,
+                            fallback_depth=fallback_depth,
+                            route=route,
+                        )
+                        client._bump_metric("retries_total", 1)
+                        await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
+                        continue
+
+                    if response.status_code != 200:
+                        client._bump_metric("requests_error", 1)
+                        raise RuntimeError(
+                            f"HF inference error {response.status_code}: {response.text[:280]}"
+                        )
+
+                    data = response.json()
+                    text = client._extract_text(data)
+                    log_model_call(
+                        logger,
+                        provider=provider,
+                        model=model_name,
+                        endpoint=client.hf_chat_url,
+                        latency_ms=latency_ms,
+                        input_tokens=None,
+                        output_tokens=None,
+                        status="ok",
+                        task_type=effective_task,
+                        request_tag=request_tag,
+                        retry_attempt=attempt,
+                        fallback_depth=fallback_depth,
+                        route=route,
+                    )
+                    client._bump_metric("requests_ok", 1)
+                    return _strip_repetition(text)
+                except Exception as exc:
+                    latency_ms = (time.perf_counter() - start) * 1000
+                    last_error = exc
+                    if attempt < max_retries:
+                        log_model_call(
+                            logger,
+                            provider=provider,
+                            model=model_name,
+                            endpoint=client.hf_chat_url,
+                            latency_ms=latency_ms,
+                            input_tokens=None,
+                            output_tokens=None,
+                            status="error",
+                            error_class=exc.__class__.__name__,
+                            error_message=str(exc),
+                            task_type=effective_task,
+                            request_tag=request_tag,
+                            retry_attempt=attempt,
+                            fallback_depth=fallback_depth,
+                            route=route,
+                        )
+                        client._bump_metric("retries_total", 1)
+                        await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
+                        continue
+
+                    client._bump_metric("requests_error", 1)
+                    logger.warning(
+                        "⚠️ Async HF attempt failed: task=%s provider=%s model=%s depth=%s error=%s",
+                        effective_task,
+                        provider,
+                        model_name,
+                        fallback_depth,
+                        str(exc)[:180],
+                    )
+
+    raise last_error or RuntimeError("Inference failed with empty model/provider chain")
 
 
 async def call_hf_chat_stream_async(
@@ -1403,34 +1549,240 @@ async def call_hf_chat_stream_async(
     task_type: str = "chat",
     timeout: Optional[int] = None,
 ) -> AsyncIterator[str]:
-    """Async streaming wrapper for DeepSeek chat completions."""
-    stream_iter = call_hf_chat_stream(
-        messages,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
+    if os.getenv("PYTEST_CURRENT_TEST"):
+        stream_iter = call_hf_chat_stream(
+            messages,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            model=model,
+            task_type=task_type,
+            timeout=timeout,
+        )
+        done = object()
+
+        def _next_chunk():
+            return next(stream_iter, done)
+
+        while True:
+            chunk = await _run_hf_blocking(_next_chunk)
+            if chunk is done:
+                return
+            if chunk:
+                yield str(chunk)
+
+    client = get_inference_client()
+    effective_task = (task_type or "chat").strip().lower()
+    request_tag = f"{effective_task}-stream-async-{int(time.time() * 1000)}"
+
+    selection_req = InferenceRequest(
+        messages=messages,
         model=model,
         task_type=task_type,
-        timeout=timeout,
+        request_tag=request_tag,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        timeout_sec=timeout,
     )
-    done = object()
+    selected_model, _ = client._resolve_primary_model(selection_req)
+    model_chain = client._model_chain_for_task(effective_task, selected_model)
+    provider_chain = client._provider_chain_for_task(effective_task)
+    last_error: Optional[Exception] = None
+    retryable_status = {408, 429, 500, 502, 503, 504}
 
-    def _next_chunk():
-        return next(stream_iter, done)
+    for fallback_depth, model_name in enumerate(model_chain):
+        request_for_model = InferenceRequest(
+            messages=messages,
+            model=model_name,
+            task_type=task_type,
+            request_tag=request_tag,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            timeout_sec=timeout,
+        )
+        for provider in provider_chain:
+            if provider == "local_space":
+                last_error = RuntimeError("Streaming is not supported for local_space provider")
+                continue
 
-    while True:
-        chunk = await _run_hf_blocking(_next_chunk)
-        if chunk is done:
-            return
-        if chunk:
-            yield str(chunk)
+            route = client._resolve_route_label(provider, effective_task)
+            stream_model = model_name if ":" in model_name else f"{model_name}:fastest"
+            timeout_sec = client._timeout_for(request_for_model, provider)
+            max_retries, backoff_sec = client._retry_profile(effective_task)
 
+            headers = {
+                "Authorization": f"Bearer {client.hf_token}",
+                "Content-Type": "application/json",
+                "X-MathPulse-Task": effective_task,
+            }
+            if route == "pro-priority" and client.pro_route_header_name.strip():
+                headers[client.pro_route_header_name.strip()] = client.pro_route_header_value
+
+            payload: Dict[str, object] = {
+                "model": stream_model,
+                "messages": messages,
+                "stream": True,
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "top_p": top_p,
+            }
 
-def load_local_math_model(model_name: str = "deepseek-chat"):
-    """Optional local loader — deprecated in favor of DeepSeek API."""
-    raise NotImplementedError(
-        "Local model loading is deprecated. Use DeepSeek API via DEEPSEEK_API_KEY env var."
+            async_client = await _get_hf_async_http_client()
+            for attempt in range(1, max_retries + 1):
+                start = time.perf_counter()
+                client._record_attempt(
+                    task_type=effective_task,
+                    provider=provider,
+                    route=route,
+                    fallback_depth=fallback_depth,
+                )
+                try:
+                    async with async_client.stream(
+                        "POST",
+                        client.hf_chat_url,
+                        headers=headers,
+                        json=payload,
+                        timeout=_resolve_async_hf_timeout(timeout_sec),
+                    ) as response:
+                        client._bump_bucket("status_code_counts", str(response.status_code), 1)
+
+                        if response.status_code in retryable_status and attempt < max_retries:
+                            body = await response.aread()
+                            body_preview = body[:220].decode("utf-8", errors="replace")
+                            latency_ms = (time.perf_counter() - start) * 1000
+                            log_model_call(
+                                logger,
+                                provider=provider,
+                                model=model_name,
+                                endpoint=client.hf_chat_url,
+                                latency_ms=latency_ms,
+                                input_tokens=None,
+                                output_tokens=None,
+                                status="error",
+                                error_class="HTTPRetry",
+                                error_message=f"status={response.status_code} body={body_preview}",
+                                task_type=effective_task,
+                                request_tag=request_tag,
+                                retry_attempt=attempt,
+                                fallback_depth=fallback_depth,
+                                route=route,
+                            )
+                            client._bump_metric("retries_total", 1)
+                            await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
+                            continue
+
+                        if response.status_code != 200:
+                            body = await response.aread()
+                            body_preview = body[:280].decode("utf-8", errors="replace")
+                            client._bump_metric("requests_error", 1)
+                            raise RuntimeError(
+                                f"HF stream error {response.status_code}: {body_preview}"
+                            )
+
+                        emitted_any = False
+                        async for raw_line in response.aiter_lines():
+                            if not raw_line:
+                                continue
+                            line = raw_line.strip()
+                            if not line.startswith("data:"):
+                                continue
+
+                            data_raw = line.split("data:", 1)[1].strip()
+                            if data_raw == "[DONE]":
+                                continue
+
+                            try:
+                                payload_obj = json.loads(data_raw)
+                            except json.JSONDecodeError:
+                                continue
+
+                            choices = payload_obj.get("choices") or []
+                            if not choices:
+                                continue
+                            first = choices[0] if isinstance(choices[0], dict) else {}
+                            delta = first.get("delta") or {}
+                            chunk = delta.get("content")
+                            if not chunk:
+                                msg = first.get("message") or {}
+                                chunk = msg.get("content")
+                            if not chunk:
+                                continue
+
+                            emitted_any = True
+                            yield str(chunk)
+
+                        if emitted_any:
+                            latency_ms = (time.perf_counter() - start) * 1000
+                            log_model_call(
+                                logger,
+                                provider=provider,
+                                model=model_name,
+                                endpoint=client.hf_chat_url,
+                                latency_ms=latency_ms,
+                                input_tokens=None,
+                                output_tokens=None,
+                                status="ok",
+                                task_type=effective_task,
+                                request_tag=request_tag,
+                                retry_attempt=attempt,
+                                fallback_depth=fallback_depth,
+                                route=route,
+                            )
+                            client._bump_metric("requests_ok", 1)
+                            return
+
+                        raise RuntimeError("HF stream ended without content")
+                except Exception as exc:
+                    latency_ms = (time.perf_counter() - start) * 1000
+                    last_error = exc
+                    if attempt < max_retries:
+                        log_model_call(
+                            logger,
+                            provider=provider,
+                            model=model_name,
+                            endpoint=client.hf_chat_url,
+                            latency_ms=latency_ms,
+                            input_tokens=None,
+                            output_tokens=None,
+                            status="error",
+                            error_class=exc.__class__.__name__,
+                            error_message=str(exc),
+                            task_type=effective_task,
+                            request_tag=request_tag,
+                            retry_attempt=attempt,
+                            fallback_depth=fallback_depth,
+                            route=route,
+                        )
+                        client._bump_metric("retries_total", 1)
+                        await asyncio.sleep(_hf_retry_sleep_seconds(backoff_sec, attempt))
+                        continue
+
+                    client._bump_metric("requests_error", 1)
+                    logger.warning(
+                        "⚠️ Async stream attempt failed: task=%s provider=%s model=%s depth=%s error=%s",
+                        effective_task,
+                        provider,
+                        model_name,
+                        fallback_depth,
+                        str(exc)[:180],
+                    )
+
+    raise last_error or RuntimeError("Streaming failed with empty model/provider chain")
+
+
+def load_local_math_model(model_name: str = "Qwen/Qwen2.5-Math-7B-Instruct"):
+    """Optional local loader for environments using Transformers instead of HF Inference API."""
+    from transformers import AutoModelForCausalLM, AutoTokenizer  # type: ignore[import-not-found]
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype="auto",
+        device_map="auto",
     )
+    return tokenizer, model
 
 
 # ─── Math Tutor Prompt & Wrapper ──────────────────────────────
@@ -1882,45 +2234,33 @@ async def root():
 # ─── AI Chat Tutor ─────────────────────────────────────────────
 
 
-MATH_TUTOR_SYSTEM_PROMPT = """You are Pulse, MathPulse AI's friendly math tutor for Filipino Senior High School
-students. You help students understand and solve problems in General Mathematics,
-Business Mathematics, Statistics & Probability, and Finite Mathematics, all aligned
-with the DepEd Strengthened SHS Curriculum and SDO Navotas learning modules.
-
-YOUR BEHAVIOR RULES:
-1. PERSONALIZE every response. Address the student by first name occasionally.
-2. NEVER give direct answers to quiz or exam items — guide with hints and questions instead.
-3. If the student is struggling on a critical gap topic, gently steer them back to
-   prerequisite concepts before moving forward.
-4. Use the SDO Navotas step-by-step method for ALL solutions:
-   "Given → Formula → Substitute → Compute → Conclude"
-5. Always format math using LaTeX:
-   - Inline: \\( expression \\)
-   - Block/display: \\[ expression \\]
-   Never use dollar signs ($) — they break the KaTeX renderer.
-6. Use Filipino-friendly English. Mix in occasional Tagalog phrases
-   (e.g., "Kaya mo yan!", "Subukan natin...") to keep the tone warm.
-7. When a student answers a "try_it" problem, evaluate their answer:
-   - If correct: Celebrate briefly, explain WHY it's correct, then offer a harder challenge.
-   - If wrong: Say "Good try! Let's check your steps..." then walk through the error.
-8. Keep responses concise (max 300 words per message). Use bullet points for steps.
-9. If a student asks about a topic outside their current lesson, help but
-   note: "This is from [topic]. We'll cover this soon in your learning path!"
-10. NEVER generate quiz items with answers visible to the student.
-11. When you detect the student consistently making the same mistake,
-    note it clearly: "I noticed you keep forgetting to convert % to decimal first — let's fix that!"
-
-RESPONSE FORMAT FOR MATH EXPLANATIONS:
-1. Quick concept recap (1-2 sentences)
-2. Formula (in LaTeX block)
-3. Step-by-step solution
-4. Final answer with units/peso sign
-5. One quick follow-up question to check understanding
-
-AWARENESS OF FULL CURRICULUM:
-You have complete knowledge of all topics in the MathPulse topic registry
-(NA-*, BM-*, SP-*, FM1-*, FM2-* topic codes). When a student asks "what's next?"
-refer to their suggested_learning_path from the diagnostic result."""
+MATH_TUTOR_SYSTEM_PROMPT = """You are L.O.L.I. (Learning Optimizer with Layered Intelligence), 
+an expert AI math tutor for Grade 11-12 Filipino students.
+
+**Problem-Solving Protocol:**
+1. Read the problem carefully and restate it in your own words to confirm understanding.
+2. Identify key information, formulas, theorems, and what you need to find.
+3. Solve step by step using Chain-of-Thought reasoning with explicit calculations.
+4. Show ALL steps and equation manipulations clearly with intermediate results.
+5. Verify your answer by substituting back into the original problem (for equations/algebra).
+6. Double-check your arithmetic and final answer before presenting.
+7. Put your final answer inside \\boxed{}
+
+**Rules for Mathematical Accuracy:**
+- ALWAYS show complete working. Never skip steps or combine multiple operations.
+- Use clear mathematical notation (x², √, π, ≈, ∴, →, =)
+- Show intermediate calculations explicitly (e.g., "3 × 4 = 12, then 12 + 5 = 17")
+- Reveal your thinking process — explain WHY each step follows from the previous
+- For word problems: Define variables clearly, show equation setup, solve step-by-step, then verify
+- If verification fails, recalculate and identify the error before correcting
+- For physics/kinematics problems: Check units and verify magnitude of answers make sense
+- For functions/calculus: Always verify domain and range assumptions
+- Be encouraging but honest. If a problem is ambiguous, ask for clarification.
+- Respond in clear English suitable for Grade 11-12 students
+- If asked about any non-math topic, respond in a friendly tone and redirect to math support only.
+- If the user sends greetings or thanks, respond politely and invite a math-related question.
+- Never use external tools or functions — solve purely through mathematical reasoning
+- Never use external tools or functions — solve purely through mathematical reasoning"""
 
 
 _STREAM_COMPLETION_MODES: Set[str] = {"auto", "marker", "none"}
@@ -2135,46 +2475,7 @@ async def chat_tutor(request: ChatRequest):
         if boundary_response is not None:
             return ChatResponse(response=boundary_response)
 
-        system_prompt = MATH_TUTOR_SYSTEM_PROMPT
-        
-        if request.userId and HAS_FIREBASE_ADMIN and firebase_firestore:
-            try:
-                db = firebase_firestore.client()
-                user_doc = db.collection("users").document(request.userId).get()
-                if user_doc.exists:
-                    user_data = user_doc.to_dict() or {}
-                    diag_id = user_data.get("latestDiagnosticTestId", "")
-                    if diag_id:
-                        diag_doc = db.collection("diagnosticResults").document(request.userId).collection("attempts").document(diag_id).get()
-                        if diag_doc.exists:
-                            diag_data = diag_doc.to_dict() or {}
-                            risk = diag_data.get("riskProfile", {})
-                            student_context = f"""
-STUDENT PROFILE:
-Name: {user_data.get('displayName', 'Student')}
-Strand: {diag_data.get('strand', 'STEM')}
-Weak Domains: {', '.join(risk.get('weak_domains', []))}
-Critical Gaps: {', '.join(risk.get('critical_gaps', []))}
-Overall Risk Level: {risk.get('overall_risk', 'unknown')}
-"""
-                            system_prompt = student_context + "\n" + system_prompt
-            except Exception as ctx_err:
-                logger.debug(f"Failed to inject student profile into chat: {ctx_err}")
-        
-        try:
-            curriculum_chunks = retrieve_curriculum_context(
-                query=request.message[:200],
-                top_k=2,
-            )
-            if curriculum_chunks:
-                rag_context = "RELEVANT CURRICULUM REFERENCE:\n"
-                for chunk in curriculum_chunks:
-                    rag_context += f"[{chunk.get('source_file', '')}] {chunk.get('content', '')[:400]}\n--\n"
-                system_prompt = rag_context + "\n\n" + system_prompt
-        except Exception as rag_err:
-            logger.debug(f"RAG context injection skipped: {rag_err}")
-        
-        messages = [{"role": "system", "content": system_prompt}]
+        messages = [{"role": "system", "content": MATH_TUTOR_SYSTEM_PROMPT}]
 
         # Add conversation history
         for msg in request.history[-10:]:  # Keep last 10 messages for context window
@@ -2881,7 +3182,7 @@ async def verify_solution(request: VerifySolutionRequest, response: Response):
         raise HTTPException(status_code=500, detail=f"Verification error: {str(e)}")
 
 
-# ─── Student Risk Classification (DeepSeek) ───
+# ─── Student Risk Classification (facebook/bart-large-mnli) ───
 
 
 RISK_LABELS = [
@@ -2956,6 +3257,7 @@ async def _generate_risk_recommendations_llm(data: EnhancedRiskRequest, result:
         f"engagementScore: {data.engagementScore:.1f}\n"
         f"avgQuizScore: {data.avgQuizScore:.1f}\n"
         f"assignmentCompletion: {data.assignmentCompletion:.1f}\n"
+        f"streak: {int(data.streak or 0)}\n"
         f"daysSinceLastActivity: {int(data.daysSinceLastActivity or 0)}\n"
         f"top_factors: {', '.join(result.top_factors)}"
     )
@@ -2982,79 +3284,67 @@ async def _generate_risk_recommendations_llm(data: EnhancedRiskRequest, result:
 
 @app.post("/api/predict-risk", response_model=RiskPrediction)
 async def predict_risk(student_data: StudentRiskData, response: Response):
-    """Student risk prediction using DeepSeek AI classification"""
+    """Student risk prediction using facebook/bart-large-mnli zero-shot classification"""
     try:
         cache_key = deterministic_response_cache.build_cache_key(
             "predict_risk",
             student_data.model_dump(),
         )
         _set_cache_response_header(response, hit=False)
-        _ensure_deepseek_available()
-
-        client = get_deepseek_client()
+        hf = get_client()
 
-        risk_prompt = (
+        text = (
             f"Student academic performance summary: "
             f"Engagement score is {student_data.engagementScore:.0f}%. "
             f"Average quiz score is {student_data.avgQuizScore:.0f}%. "
-            f"Assignment completion rate is {student_data.assignmentCompletion:.0f}%.\n\n"
-            f"Classify this student into exactly one of these risk levels: {', '.join(RISK_LABELS)}. "
-            f"Respond with a JSON object containing: risk_label, confidence (0-1 float), reasoning (short sentence)."
+            f"Assignment completion rate is {student_data.assignmentCompletion:.0f}%."
         )
 
-        # Retry DeepSeek inference with backoff
+        # Retry HF inference with backoff
+        result = None
         last_err: Optional[Exception] = None
         for attempt in range(3):
             try:
-                api_response = await _run_hf_blocking(
-                    lambda model=CHAT_MODEL, prompt=risk_prompt: client.chat.completions.create(  # type: ignore[arg-type]
-                        model=model,
-                        messages=[
-                            {"role": "system", "content": "You are a student risk analyst. Respond with valid JSON only."},
-                            {"role": "user", "content": prompt},
-                        ],
-                        response_format={"type": "json_object"},
-                        max_tokens=256,
-                        temperature=0.0,
-                    )
+                result = await _run_hf_blocking(
+                    hf.zero_shot_classification,
+                    text=text,
+                    candidate_labels=RISK_LABELS,
+                    model=RISK_MODEL,
+                    multi_label=False,
                 )
                 last_err = None
                 break
-            except (APIError, RateLimitError, APITimeoutError, Exception) as api_err:
-                last_err = api_err
-                logger.warning(f"DeepSeek risk prediction attempt {attempt + 1} failed: {api_err}")
+            except Exception as hf_err:
+                last_err = hf_err
+                logger.warning(f"HF risk prediction attempt {attempt + 1} failed: {hf_err}")
                 if attempt < 2:
                     await asyncio.sleep(2 ** attempt)
 
-        if last_err is not None:
-            logger.error(f"DeepSeek risk prediction failed after 3 attempts: {last_err}")
+        if last_err is not None or result is None:
+            logger.error(f"HF risk prediction failed after 3 attempts: {last_err}")
             raise HTTPException(
                 status_code=502,
                 detail="Risk prediction model is temporarily unavailable.",
             )
 
-        content = api_response.choices[0].message.content or "{}"
-        try:
-            parsed = json.loads(content)
-        except json.JSONDecodeError:
-            parsed = {"risk_label": "medium academic risk", "confidence": 0.5}
+        # result is list[ZeroShotClassificationOutputElement] sorted by score desc
+        top = result[0]
+        top_label = top.label
+        top_score = top.score
 
-        risk_label = str(parsed.get("risk_label", "medium academic risk"))
-        confidence = float(parsed.get("confidence", 0.5))
-
-        risk_level = RISK_MAPPING.get(risk_label, "Medium")
+        risk_level = RISK_MAPPING.get(top_label, "Medium")
         strict_risk_level = _to_strict_risk_level(risk_level)
         top_factors = _basic_risk_top_factors(student_data)
 
         result = RiskPrediction(
             riskLevel=risk_level,
-            confidence=round(confidence, 4),
+            confidence=round(float(top_score), 4),
             analysis={
-                "labels": [risk_label],
-                "scores": [round(confidence, 4)],
+                "labels": [el.label for el in result],
+                "scores": [round(el.score, 4) for el in result],
             },
             risk_level=strict_risk_level,
-            risk_score=round(confidence, 4),
+            risk_score=round(float(top_score), 4),
             top_factors=top_factors,
         )
         await deterministic_response_cache.set(
@@ -3097,7 +3387,7 @@ async def predict_risk_batch(request: BatchRiskRequest):
 
 
 @app.post("/api/learning-path", response_model=LearningPathResponse)
-async def generate_ai_learning_path(request: LearningPathRequest, response: Response):
+async def generate_learning_path(request: LearningPathRequest, response: Response):
     """Generate AI-powered personalized learning path"""
     try:
         cache_key = deterministic_response_cache.build_cache_key(
@@ -8027,11 +8317,6 @@ class InferenceMetricsResponse(BaseModel):
     metrics: Dict[str, Any]
 
 
-class HFMonitoringDataResponse(BaseModel):
-    success: bool
-    data: Dict[str, Any]
-
-
 class ImportGroundedFeedbackRequest(BaseModel):
     flow: str = Field(..., description="Flow identifier: quiz or lesson")
     status: str = Field(..., description="Event status: success, failed, or skipped")
@@ -8140,166 +8425,6 @@ class ImportGroundedAccessAuditResponse(BaseModel):
     warnings: List[str]
 
 
-# ─── Diagnostic Test Models ────────────────────────────────────
-
-class DiagnosticGenerateRequest(BaseModel):
-    strand: str = Field(..., description="Student strand: ABM, STEM, HUMSS, GAS, TVL")
-    gradeLevel: str = Field(..., description="Grade level: Grade 11 or Grade 12")
-    numQuestions: int = Field(default=15, ge=5, le=30, description="Number of questions to generate")
-
-
-class DiagnosticQuestion(BaseModel):
-    question_id: str
-    competency_code: str
-    domain: str
-    topic: str
-    difficulty: str
-    bloom_level: str
-    question_text: str
-    options: Dict[str, str]
-    correct_answer: str
-    solution_hint: str
-    curriculum_reference: str
-
-
-class DiagnosticGenerateResponse(BaseModel):
-    questions: List[DiagnosticQuestion]
-    test_id: str
-    metadata: Dict[str, Any]
-
-
-class DiagnosticSubmitRequest(BaseModel):
-    user_id: str
-    test_id: str
-    strand: str
-    grade_level: str
-    responses: List[Dict[str, Any]]
-
-
-class DiagnosticResult(BaseModel):
-    user_id: str
-    test_id: str
-    taken_at: datetime
-    strand: str
-    grade_level: str
-    total_items: int
-    total_score: int
-    percentage_score: float
-    responses: List[Dict[str, Any]]
-    domain_scores: Dict[str, Dict[str, Any]]
-    risk_profile: Dict[str, Any]
-
-
-class DiagnosticSubmitResponse(BaseModel):
-    success: bool
-    result: DiagnosticResult
-    risk_profile: Dict[str, Any]
-    domain_scores: Dict[str, Dict[str, Any]]
-    redirect_to: str
-
-
-class DiagnosticResultsResponse(BaseModel):
-    success: bool
-    results: List[DiagnosticResult]
-
-
-# ─── DepEd Curriculum Competency Domains ────────────────────────────
-
-DEPD_ED_COMPETENCY_DOMAINS: Dict[str, Dict[str, List[str]]] = {
-    "ABM": {
-        "Grade 11": [
-            "Business Mathematics - Fractions, Decimals, Percent",
-            "Business Mathematics - Proportion",
-            "Business Mathematics - Markup and Margin",
-            "Business Mathematics - Trade Discounts and VAT",
-            "Business Mathematics - Commissions",
-            "Business Mathematics - Salaries and Wages",
-            "Business Mathematics - Mandatory Deductions",
-            "Business Mathematics - Employee Benefits",
-            "Business Mathematics - Overtime Pay",
-            "Business Mathematics - Simple Interest",
-            "Business Mathematics - Compound Interest",
-            "Business Mathematics - Loans and Credit",
-            "Business Mathematics - Data Presentation",
-        ],
-        "Grade 12": [
-            "Business Mathematics - Business Reports",
-            "Business Mathematics - Financial Analysis",
-            "Business Mathematics - Investment Decisions",
-            "Business Mathematics - Taxation",
-            "Business Mathematics - Asset Depreciation",
-        ],
-    },
-    "STEM": {
-        "Grade 11": [
-            "General Mathematics - Patterns and Sequences",
-            "General Mathematics - Functions",
-            "General Mathematics - Function Operations",
-            "General Mathematics - Inverse Functions",
-            "General Mathematics - Unit Conversions",
-            "General Mathematics - Geometry",
-            "General Mathematics - Trigonometry",
-            "Statistics - Data Organization",
-            "Statistics - Measures of Central Tendency",
-            "Statistics - Measures of Variability",
-            "Statistics - Random Variables",
-            "Statistics - Probability Distributions",
-            "Statistics - Normal Distribution",
-            "Statistics - Sampling",
-            "Statistics - Hypothesis Testing",
-        ],
-        "Grade 12": [
-            "General Mathematics - Financial Math",
-            "General Mathematics - Compound Interest",
-            "General Mathematics - Annuities",
-            "General Mathematics - Amortization",
-            "General Mathematics - Logical Propositions",
-            "Statistics - Confidence Intervals",
-            "Statistics - Correlation",
-            "Statistics - Regression",
-        ],
-    },
-    "HUMSS": {
-        "Grade 11": [
-            "General Mathematics - Patterns and Sequences",
-            "General Mathematics - Functions",
-            "General Mathematics - Statistics Basics",
-            "General Mathematics - Data Analysis",
-            "General Mathematics - Probability",
-        ],
-        "Grade 12": [
-            "General Mathematics - Financial Math",
-            "General Mathematics - Logical Reasoning",
-            "Statistics - Statistical Inference",
-        ],
-    },
-    "GAS": {
-        "Grade 11": [
-            "General Mathematics - Patterns and Sequences",
-            "General Mathematics - Functions",
-            "General Mathematics - Statistics Basics",
-        ],
-        "Grade 12": [
-            "General Mathematics - Financial Math",
-            "General Mathematics - Logical Reasoning",
-        ],
-    },
-    "TVL": {
-        "Grade 11": [
-            "Applied Mathematics - Number Sense",
-            "Applied Mathematics - Measurement",
-            "Applied Mathematics - Data Interpretation",
-            "Applied Mathematics - Problem Solving",
-        ],
-        "Grade 12": [
-            "Applied Mathematics - Business Math",
-            "Applied Mathematics - Consumer Math",
-            "Applied Mathematics - Technical Math",
-        ],
-    },
-}
-
-
 def _coerce_event_timestamp_utc(event: Dict[str, Any]) -> Optional[datetime]:
     created_at = event.get("createdAt")
     if isinstance(created_at, datetime):
@@ -10678,83 +10803,6 @@ async def get_inference_metrics(http_request: Request):
     return InferenceMetricsResponse(success=True, metrics=metrics_snapshot)
 
 
-@app.get("/api/hf/monitoring", response_model=HFMonitoringDataResponse)
-async def get_hf_monitoring(http_request: Request):
-    """
-    Aggregates DeepSeek AI status, model config, and latency probe.
-    Returns distilled data safe for frontend consumption.
-
-    Requires admin authentication.
-    """
-    user = get_current_user(http_request)
-    if user.role != "admin":
-        raise HTTPException(status_code=403, detail="Forbidden for this role")
-
-    _ensure_deepseek_available()
-
-    try:
-        generation_model_id = get_model_for_task("chat")
-    except Exception:
-        generation_model_id = CHAT_MODEL
-
-    embedding_model_id = os.getenv("EMBEDDING_MODEL", "BAAI/bge-small-en-v1.5")
-
-    runtime_config = get_current_runtime_config()
-
-    task_resolved: dict[str, str] = {}
-    for task in [
-        "chat", "verify_solution", "lesson_generation", "quiz_generation",
-        "learning_path", "daily_insight", "risk_classification", "risk_narrative",
-        "rag_lesson", "rag_problem", "rag_analysis_context",
-    ]:
-        try:
-            task_resolved[task] = get_model_for_task(task)
-        except Exception:
-            task_resolved[task] = generation_model_id
-
-    result: Dict[str, Any] = {
-        "modelId": generation_model_id,
-        "modelStatus": "Operational",
-        "avgResponseTimeMs": 0,
-        "embeddingModelId": embedding_model_id,
-        "embeddingModelStatus": "Operational",
-        "inferenceBalance": 0.0,
-        "totalPeriodCost": 0.0,
-        "hubApiCallsUsed": 0,
-        "hubApiCallsLimit": 2500,
-        "zeroGpuMinutesUsed": 0,
-        "zeroGpuMinutesLimit": 25,
-        "publicStorageUsedTB": 0.0,
-        "publicStorageLimitTB": 11.2,
-        "lastChecked": datetime.now(timezone.utc).isoformat(),
-        "periodStart": "",
-        "periodEnd": "",
-        "activeProfile": runtime_config.get("profile") or os.getenv("MODEL_PROFILE", "dev"),
-        "runtimeOverridesActive": len(runtime_config.get("overrides", {})) > 0,
-        "resolvedModels": task_resolved,
-        "provider": "deepseek",
-        "apiBaseUrl": os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com"),
-    }
-
-    try:
-        client = get_deepseek_client()
-        latency_start = time.time()
-        probe_response = client.chat.completions.create(
-            model=str(CHAT_MODEL),
-            messages=[{"role": "user", "content": "Hi"}],
-            max_tokens=1,
-            temperature=0.0,
-        )
-        latency_ms = int((time.time() - latency_start) * 1000)
-        result["avgResponseTimeMs"] = latency_ms
-        result["modelStatus"] = "Operational"
-    except Exception as e:
-        logger.warning(f"DeepSeek latency probe failed: {e}")
-        result["modelStatus"] = "Degraded"
-
-    return HFMonitoringDataResponse(success=True, data=result)
-
-
 @app.get("/api/quiz/topics")
 async def get_quiz_topics(response: Response, gradeLevel: Optional[str] = None):
     """
@@ -11137,7 +11185,7 @@ async def calibrate_quiz_difficulty(request: CalibrateDifficultyRequest):
         raise HTTPException(status_code=500, detail=f"Calibration error: {str(e)}")
 
 
-@app.post("/api/quiz/adaptive-select")
+@app.post("/api/quiz/adaptive-select", response_model=AdaptiveQuizResponse)
 async def adaptive_quiz_selection(request: AdaptiveQuizSelectRequest):
     """
     Select questions adaptively based on student ability level using IRT.
@@ -11317,6 +11365,38 @@ def _testing_reset_try_delete_doc(doc_ref: Any, label: str) -> int:
         return 0
 
 
+def _testing_reset_try_delete_subcollection(
+    client: Any,
+    parent_collection: str,
+    parent_doc_id: str,
+    subcollection_name: str,
+) -> int:
+    """Delete all documents in subcollection of a document."""
+    deleted_docs = 0
+    try:
+        docs = list(client.collection(parent_collection).document(parent_doc_id).collection(subcollection_name).stream())
+        if not docs:
+            return 0
+        batch = client.batch()
+        pending = 0
+        for doc_snapshot in docs:
+            batch.delete(doc_snapshot.reference)
+            deleted_docs += 1
+            pending += 1
+            if pending >= TESTING_RESET_BATCH_SIZE:
+                batch.commit()
+                batch = client.batch()
+                pending = 0
+        if pending > 0:
+            batch.commit()
+    except Exception as err:
+        logger.warning(
+            "Testing reset skipped subcollection delete for %s/%s/%s: %s",
+            parent_collection, parent_doc_id, subcollection_name, err,
+        )
+    return deleted_docs
+
+
 def _testing_reset_try_set_doc(doc_ref: Any, payload: Dict[str, Any], label: str, merge: bool = False) -> int:
     try:
         if merge:
@@ -11329,25 +11409,6 @@ def _testing_reset_try_set_doc(doc_ref: Any, payload: Dict[str, Any], label: str
         return 0
 
 
-def _testing_reset_try_delete_subcollection(
-    client: Any, parent_path: str, subcollection_name: str
-) -> int:
-    """Delete all documents in a subcollection. Returns count of deleted docs."""
-    try:
-        docs = list(client.collection(parent_path).document().parent.collection(subcollection_name).stream())
-        for doc_snapshot in docs:
-            doc_snapshot.reference.delete()
-        return len(docs)
-    except Exception as err:
-        logger.warning(
-            "Testing reset skipped delete for %s/%s: %s",
-            parent_path,
-            subcollection_name,
-            err,
-        )
-        return 0
-
-
 def _reset_student_testing_data_admin(
     client: Any,
     uid: str,
@@ -11374,52 +11435,98 @@ def _reset_student_testing_data_admin(
         merge=False,
     )
 
-    # Build users/{uid} payload with DELETE_FIELD for optional assessment fields
-    users_payload = {
-        "level": 1,
-        "currentXP": 0,
-        "totalXP": 0,
-        "atRiskSubjects": [],
-        "hasTakenDiagnostic": False,
-        "iarAssessmentState": "not_started",
-        "learningPathState": "unlocked",
-        "remediationState": "not_required",
-        "subjectBadges": {},
-        "riskClassifications": {},
-        "overallRisk": "Low",
-        "updatedAt": timestamp_value,
-    }
-    # Add assessment-specific fields using DELETE_FIELD to remove them
-    if DELETE_FIELD is not None:
-        users_payload["diagnosticCompleted"] = DELETE_FIELD
-        users_payload["lastAssessmentDate"] = DELETE_FIELD
-        users_payload["assessmentAttemptCount"] = DELETE_FIELD
-        users_payload["initialProficiencyLevel"] = DELETE_FIELD
-    else:
-        users_payload["diagnosticCompleted"] = False
-        users_payload["lastAssessmentDate"] = None
-        users_payload["assessmentAttemptCount"] = 0
-        users_payload["initialProficiencyLevel"] = None
-
     updated_docs += _testing_reset_try_set_doc(
         client.collection("users").document(uid),
-        users_payload,
+        {
+            "level": 1,
+            "currentXP": 0,
+            "totalXP": 0,
+            "streak": 0,
+            "streakHistory": [],
+            "atRiskSubjects": [],
+            "hasTakenDiagnostic": False,
+            "iarAssessmentState": "not_started",
+            "learningPathState": "unlocked",
+            "remediationState": "not_required",
+            "subjectBadges": {},
+            "riskClassifications": {},
+            "overallRisk": "Low",
+            "assessmentDismissed": False,
+            "initialAssessmentCompleted": False,
+            "hasCompletedInitialAssessment": False,
+            "assessmentResults": None,
+            "assessmentCompletedAt": None,
+            "diagnosticCompleted": False,
+            "updatedAt": timestamp_value,
+        },
         f"users/{uid}",
         merge=True,
     )
 
+    # Delete field-type values that merge=True can't remove
+    try:
+        from google.cloud.firestore_v1 import DELETE_FIELD as _firestore_DELETE_FIELD
+        _delete_field = _firestore_DELETE_FIELD
+    except ImportError:
+        from google.cloud import firestore as _firestore_mod
+        _delete_field = _firestore_mod.DELETE_FIELD
+
+    try:
+        client.collection("users").document(uid).update({
+            "initialAssessmentCompletedAt": _delete_field,
+            "lastAssessmentDate": _delete_field,
+            "initialProficiencyLevel": _delete_field,
+            "iarQuestionSetVersion": _delete_field,
+            "iarTopicClassifications": _delete_field,
+            "topicScores": _delete_field,
+        })
+        updated_docs += 1
+    except Exception as err:
+        logger.warning("Testing reset skipped field deletes for users/%s: %s", uid, err)
+
     deleted_docs += _testing_reset_try_delete_by_field(client, "notifications", "userId", uid)
     deleted_docs += _testing_reset_try_delete_by_field(client, "chatSessions", "userId", uid)
     deleted_docs += _testing_reset_try_delete_by_field(client, "chatMessages", "userId", uid)
 
-    # Delete assessment subcollection documents
-    deleted_docs += _testing_reset_try_delete_subcollection(client, f"assessmentResults/{uid}", "attempts")
-    deleted_docs += _testing_reset_try_delete_subcollection(client, f"studentProgress/{uid}", "diagnostics")
-    deleted_docs += _testing_reset_try_delete_subcollection(client, f"assessmentQuestionHistory/{uid}", "questions")
-
     if effective_lrn != uid:
         deleted_docs += _testing_reset_try_delete_by_field(client, "notifications", "userId", effective_lrn)
 
+    # Delete assessment data using both uid and effective_lrn
+    deleted_docs += _testing_reset_try_delete_doc(
+        client.collection("diagnosticResults").document(uid), f"diagnosticResults/{uid}",
+    )
+    deleted_docs += _testing_reset_try_delete_doc(
+        client.collection("competencyProfiles").document(uid), f"competencyProfiles/{uid}",
+    )
+    deleted_docs += _testing_reset_try_delete_doc(
+        client.collection("assessments").document(uid), f"assessments/{uid}",
+    )
+
+    # Also try with effective_lrn if different
+    if effective_lrn != uid:
+        deleted_docs += _testing_reset_try_delete_doc(
+            client.collection("diagnosticResults").document(effective_lrn), f"diagnosticResults/{effective_lrn}",
+        )
+
+    # Delete assessment subcollections
+    deleted_docs += _testing_reset_try_delete_subcollection(client, "assessmentResults", uid, "attempts")
+    deleted_docs += _testing_reset_try_delete_subcollection(client, "assessments", uid, "attempts")
+    deleted_docs += _testing_reset_try_delete_subcollection(client, "studentProgress", uid, "diagnostics")
+    deleted_docs += _testing_reset_try_delete_subcollection(client, "assessmentQuestionHistory", uid, "questions")
+
+    if effective_lrn != uid:
+        deleted_docs += _testing_reset_try_delete_subcollection(client, "assessmentResults", effective_lrn, "attempts")
+        deleted_docs += _testing_reset_try_delete_subcollection(client, "assessments", effective_lrn, "attempts")
+
+    # Delete additional student activity collections
+    deleted_docs += _testing_reset_try_delete_by_field(client, "xpActivities", "userId", uid)
+    deleted_docs += _testing_reset_try_delete_by_field(client, "tasks", "studentId", uid)
+    deleted_docs += _testing_reset_try_delete_by_field(client, "friendships", "userId1", uid)
+    deleted_docs += _testing_reset_try_delete_by_field(client, "friendships", "userId2", uid)
+    deleted_docs += _testing_reset_try_delete_by_field(client, "friendRequests", "fromUserId", uid)
+    deleted_docs += _testing_reset_try_delete_by_field(client, "friendRequests", "toUserId", uid)
+    deleted_docs += _testing_reset_try_delete_by_field(client, "quizHistory", "userId", uid)
+
     updated_docs += _testing_reset_try_set_doc(
         client.collection("achievements").document(uid),
         {
@@ -12182,1179 +12289,6 @@ async def automation_content_updated(payload: ContentUpdatePayload):
         raise HTTPException(status_code=500, detail=f"Automation error: {str(e)}")
 
 
-# ─── Diagnostic Test Endpoints ─────────────────────────────────
-
-async def _generate_diagnostic_questions(
-    strand: str,
-    grade_level: str,
-    num_questions: int,
-) -> List[DiagnosticQuestion]:
-    """Generate diagnostic test questions using LLM based on DepEd curriculum with RAG."""
-    
-    topics = DEPD_ED_COMPETENCY_DOMAINS.get(strand, {}).get(grade_level, [])
-    if not topics:
-        topics = DEPD_ED_COMPETENCY_DOMAINS.get("STEM", {}).get("Grade 11", [])
-    
-    topic_list = "\n".join([f"- {t}" for t in topics[:10]])
-    
-    curriculum_chunks = retrieve_curriculum_context(
-        query=f"{topics[0] if topics else strand} examples problems {grade_level}",
-        subject="General Mathematics",
-        top_k=3,
-    )
-    
-    curriculum_context = ""
-    for chunk in curriculum_chunks:
-        source = chunk.get("source_file", "unknown")
-        content = chunk.get("content", "")[:500]
-        curriculum_context += f"[Source: {source}]\n{content}\n\n---\n\n"
-    
-    rag_instruction = ""
-    if curriculum_context:
-        rag_instruction = f"""CURRICULUM REFERENCE:
-{curriculum_context}
-
-Use these examples as reference. Do not copy directly."""
-    
-    prompt = f"""You are MathPulse AI's Diagnostic Test Generator. Generate {num_questions} multiple-choice questions for a Filipino Senior High School student (Strand: {strand}, Grade: {grade_level}).
-
-Based on these DepEd SHS curriculum competencies:
-{topic_list}
-
-{rag_instruction}
-
-Generate questions in this strict JSON format (no other text):
-[
-  {{
-    "question_id": "DX-<generate uuid>",
-    "competency_code": "TOPIC-SUBTOPIC-01",
-    "domain": "Domain Name",
-    "topic": "Specific Topic",
-    "difficulty": "easy|medium|hard",
-    "bloom_level": "remembering|understanding|applying|analyzing",
-    "question_text": "Question text in Filipino context",
-    "options": {{"A": "...", "B": "...", "C": "...", "D": "..."}},
-    "correct_answer": "A|B|C|D",
-    "solution_hint": "Brief solution hint (1-2 sentences)",
-    "curriculum_reference": "DepEd SHS [Strand] Q[X] - [Topic]"
-  }}
-]
-
-Distribution: 40% easy, 40% medium, 20% hard.
-Use Filipino real-life context (peso amounts, SSS/PhilHealth/BIR, local scenarios).
-Distractors must be plausible but clearly wrong.
-Return ONLY the JSON array, no other text."""
-
-    try:
-        messages = [
-            {"role": "system", "content": "You are a math question generator. Return ONLY valid JSON."},
-            {"role": "user", "content": prompt},
-        ]
-        response = await call_hf_chat_async(messages, max_tokens=4096, temperature=0.3, task_type="quiz")
-        
-        import re
-        json_match = re.search(r'\[.*\]', response, re.DOTALL)
-        if json_match:
-            questions_data = json.loads(json_match.group())
-        else:
-            questions_data = json.loads(response)
-        
-        questions = []
-        for q in questions_data[:num_questions]:
-            questions.append(DiagnosticQuestion(**q))
-        
-        return questions
-    except Exception as e:
-        logger.error(f"Diagnostic question generation error: {e}")
-        raise
-
-
-async def _analyze_diagnostic_risk(
-    responses: List[Dict[str, Any]],
-    total_items: int,
-    total_score: int,
-) -> Dict[str, Any]:
-    """Analyze student performance and generate risk profile."""
-    domain_scores: Dict[str, Dict[str, Any]] = {}
-    domain_responses: Dict[str, List[Dict[str, Any]]] = {}
-    
-    for resp in responses:
-        domain = resp.get("domain", "Unknown")
-        if domain not in domain_responses:
-            domain_responses[domain] = []
-        domain_responses[domain].append(resp)
-    
-    for domain, resp_list in domain_responses.items():
-        correct = sum(1 for r in resp_list if r.get("is_correct", False))
-        total = len(resp_list)
-        pct = (correct / total * 100) if total > 0 else 0
-        
-        mastery = "mastered" if pct >= 80 else "developing" if pct >= 60 else "beginning"
-        domain_scores[domain] = {
-            "correct": correct,
-            "total": total,
-            "percentage": round(pct, 1),
-            "mastery_level": mastery,
-        }
-    
-    weak_domains = [
-        d for d, data in domain_scores.items()
-        if data["percentage"] < 60
-    ]
-    
-    critical_gaps = []
-    competency_attempts: Dict[str, List[bool]] = {}
-    for resp in responses:
-        comp_code = resp.get("competency_code", "")
-        if comp_code not in competency_attempts:
-            competency_attempts[comp_code] = []
-        competency_attempts[comp_code].append(resp.get("is_correct", False))
-    
-    for comp_code, results in competency_attempts.items():
-        correct_count = sum(1 for r in results if r)
-        if len(results) >= 2 and correct_count == 0:
-            critical_gaps.append(comp_code)
-    
-    overall_pct = (total_score / total_items * 100) if total_items > 0 else 0
-    
-    if overall_pct >= 75 and len(critical_gaps) == 0:
-        overall_risk = "low"
-    elif overall_pct >= 55 or len(critical_gaps) <= 2:
-        overall_risk = "moderate"
-    elif overall_pct >= 40 or len(critical_gaps) <= 4:
-        overall_risk = "high"
-    else:
-        overall_risk = "critical"
-    
-    intervention_messages = {
-        "low": "Great job! You have a solid foundation. Keep practicing to maintain your skills!",
-        "moderate": "You're making good progress. Focus on the topics where you need more practice.",
-        "high": "Don't worry! With focused practice on your weak areas, you'll improve quickly.",
-        "critical": "Let's work on this together. Start with the basics and build up your confidence.",
-    }
-    
-    suggested_path = weak_domains[:3] if weak_domains else list(domain_scores.keys())[:3]
-    
-    return {
-        "overall_risk": overall_risk,
-        "overall_score_percent": round(overall_pct, 1),
-        "domain_scores": domain_scores,
-        "weak_domains": weak_domains,
-        "critical_gaps": critical_gaps,
-        "recommended_intervention": intervention_messages[overall_risk],
-        "suggested_learning_path": suggested_path,
-    }
-
-
-def _save_diagnostic_to_firestore(result: DiagnosticResult) -> bool:
-    """Save diagnostic result to Firestore."""
-    if not HAS_FIREBASE_ADMIN or not firebase_firestore:
-        logger.warning("Firebase not available for diagnostic save")
-        return False
-    
-    try:
-        db = firebase_firestore.client()
-        doc_ref = db.collection("diagnosticResults").document(result.user_id).collection("attempts").document(result.test_id)
-        doc_ref.set({
-            "testId": result.test_id,
-            "takenAt": result.taken_at,
-            "strand": result.strand,
-            "gradeLevel": result.grade_level,
-            "totalItems": result.total_items,
-            "totalScore": result.total_score,
-            "percentageScore": result.percentage_score,
-            "responses": result.responses,
-            "domainScores": result.domain_scores,
-            "riskProfile": result.risk_profile,
-        })
-        
-        latest_ref = db.collection("users").document(result.user_id)
-        latest_ref.set({"latestDiagnosticTestId": result.test_id}, merge=True)
-        
-        return True
-    except Exception as e:
-        logger.error(f"Firestore diagnostic save error: {e}")
-        return False
-
-
-@app.post("/api/diagnostic/generate", response_model=DiagnosticGenerateResponse)
-async def generate_diagnostic_test(request: DiagnosticGenerateRequest):
-    """
-    Generate a personalized diagnostic assessment for a student.
-    Questions are based on DepEd Strengthened SHS Curriculum.
-    """
-    try:
-        test_id = f"DX-{uuid.uuid4().hex[:12]}"
-        
-        questions = await _generate_diagnostic_questions(
-            request.strand,
-            request.gradeLevel,
-            request.numQuestions,
-        )
-        
-        stripped_questions = []
-        for q in questions:
-            stripped_questions.append(DiagnosticQuestion(
-                question_id=q.question_id,
-                competency_code=q.competency_code,
-                domain=q.domain,
-                topic=q.topic,
-                difficulty=q.difficulty,
-                bloom_level=q.bloom_level,
-                question_text=q.question_text,
-                options=q.options,
-                correct_answer=q.correct_answer,
-                solution_hint="",
-                curriculum_reference=q.curriculum_reference,
-            ))
-        
-        metadata = {
-            "strand": request.strand,
-            "grade_level": request.gradeLevel,
-            "num_questions": len(questions),
-            "generated_at": datetime.now(timezone.utc).isoformat(),
-        }
-        
-        return DiagnosticGenerateResponse(
-            questions=stripped_questions,
-            test_id=test_id,
-            metadata=metadata,
-        )
-    except Exception as e:
-        logger.error(f"Diagnostic generation error: {e}\n{traceback.format_exc()}")
-        raise HTTPException(status_code=500, detail=f"Diagnostic generation error: {str(e)}")
-
-
-@app.post("/api/diagnostic/submit", response_model=DiagnosticSubmitResponse)
-async def submit_diagnostic_test(request: DiagnosticSubmitRequest):
-    """
-    Submit diagnostic test responses, score them, and generate risk profile.
-    Results are saved to Firestore for use by other subsystems.
-    """
-    try:
-        total_items = len(request.responses)
-        total_score = 0
-        scored_responses = []
-        
-        for resp in request.responses:
-            is_correct = resp.get("student_answer", "") == resp.get("correct_answer", "")
-            if is_correct:
-                total_score += 1
-            scored_responses.append({
-                "question_id": resp.get("question_id"),
-                "competency_code": resp.get("competency_code"),
-                "domain": resp.get("domain"),
-                "topic": resp.get("topic"),
-                "difficulty": resp.get("difficulty"),
-                "bloom_level": resp.get("bloom_level"),
-                "student_answer": resp.get("student_answer"),
-                "correct_answer": resp.get("correct_answer"),
-                "is_correct": is_correct,
-                "time_spent_seconds": resp.get("time_spent_seconds", 0),
-            })
-        
-        risk_profile = await _analyze_diagnostic_risk(
-            scored_responses,
-            total_items,
-            total_score,
-        )
-        
-        domain_scores = risk_profile.get("domain_scores", {})
-        
-        result = DiagnosticResult(
-            user_id=request.user_id,
-            test_id=request.test_id,
-            taken_at=datetime.now(timezone.utc),
-            strand=request.strand,
-            grade_level=request.grade_level,
-            total_items=total_items,
-            total_score=total_score,
-            percentage_score=round(total_score / total_items * 100, 1),
-            responses=scored_responses,
-            domain_scores=domain_scores,
-            risk_profile=risk_profile,
-        )
-        
-        _save_diagnostic_to_firestore(result)
-        
-        return DiagnosticSubmitResponse(
-            success=True,
-            result=result,
-            risk_profile=risk_profile,
-            domain_scores=domain_scores,
-            redirect_to="/dashboard",
-        )
-    except Exception as e:
-        logger.error(f"Diagnostic submit error: {e}\n{traceback.format_exc()}")
-        raise HTTPException(status_code=500, detail=f"Diagnostic submit error: {str(e)}")
-
-
-@app.get("/api/diagnostic/results/{user_id}", response_model=DiagnosticResultsResponse)
-async def get_diagnostic_results(user_id: str):
-    """
-    Fetch diagnostic test results for a student.
-    Returns all attempts with risk profiles.
-    """
-    if not HAS_FIREBASE_ADMIN or not firebase_firestore:
-        return DiagnosticResultsResponse(success=False, results=[])
-    
-    try:
-        db = firebase_firestore.client()
-        docs = db.collection("diagnosticResults").document(user_id).collection("attempts").stream()
-        
-        results = []
-        for doc in docs:
-            data = doc.to_dict()
-            if data:
-                results.append(DiagnosticResult(**data))
-        
-        results.sort(key=lambda x: x.taken_at, reverse=True)
-        
-        return DiagnosticResultsResponse(success=True, results=results)
-    except Exception as e:
-        logger.error(f"Diagnostic results fetch error: {e}")
-        return DiagnosticResultsResponse(success=False, results=[])
-
-
-# ─── DepEd Topic Registry for Lessons/Quizzes ─────────────────────────────
-
-DEPD_TOPIC_REGISTRY: Dict[str, Dict[str, str]] = {
-    "NA-WAGE-01": {"subject": "General Mathematics", "title": "Wages, Salaries, Overtime, Commissions, VAT", "quarter": "Q1"},
-    "NA-SEQ-01": {"subject": "General Mathematics", "title": "Arithmetic Sequences and Series", "quarter": "Q1"},
-    "NA-SEQ-02": {"subject": "General Mathematics", "title": "Geometric Sequences and Series", "quarter": "Q1"},
-    "NA-SEQ-03": {"subject": "General Mathematics", "title": "Sigma Notation, Financial Applications", "quarter": "Q1"},
-    "NA-FUNC-01": {"subject": "General Mathematics", "title": "Functions, Relations, Vertical Line Test", "quarter": "Q2"},
-    "NA-FUNC-02": {"subject": "General Mathematics", "title": "Evaluating Functions, Operations, Composition", "quarter": "Q2"},
-    "NA-FUNC-03": {"subject": "General Mathematics", "title": "One-to-One Functions, Inverse Functions", "quarter": "Q2"},
-    "NA-FUNC-04": {"subject": "General Mathematics", "title": "Piecewise Functions", "quarter": "Q2"},
-    "NA-EXP-01": {"subject": "General Mathematics", "title": "Exponential Functions, Equations, Inequalities", "quarter": "Q2"},
-    "NA-LOG-01": {"subject": "General Mathematics", "title": "Logarithmic Functions", "quarter": "Q2"},
-    "MG-TRIG-01": {"subject": "General Mathematics", "title": "Trigonometric Ratios, Right Triangles", "quarter": "Q3"},
-    "MG-TRIG-02": {"subject": "General Mathematics", "title": "Oblique Triangles, Heron's Formula", "quarter": "Q3"},
-    "MG-MEAS-01": {"subject": "General Mathematics", "title": "Unit Conversion, Surface Area, Volume", "quarter": "Q2"},
-    "DP-STAT-01": {"subject": "Statistics", "title": "Types of Data, Levels of Measurement", "quarter": "Q2"},
-    "DP-STAT-02": {"subject": "Statistics", "title": "Measures of Central Tendency and Variability", "quarter": "Q2"},
-    "DP-RV-01": {"subject": "Statistics", "title": "Random Variables (Discrete & Continuous)", "quarter": "Q3"},
-    "DP-RV-02": {"subject": "Statistics", "title": "Probability Distributions, Mean, Variance, SD", "quarter": "Q3"},
-    "DP-NORM-01": {"subject": "Statistics", "title": "Normal Distribution, Properties", "quarter": "Q3"},
-    "DP-NORM-02": {"subject": "Statistics", "title": "Z-Scores, Standard Normal Table", "quarter": "Q3"},
-    "DP-SAMP-01": {"subject": "Statistics", "title": "Sampling, Central Limit Theorem", "quarter": "Q3"},
-    "DP-SAMP-02": {"subject": "Statistics", "title": "Sampling Distribution of Sample Means", "quarter": "Q3"},
-    "NA-FIN-01": {"subject": "General Mathematics", "title": "Compound Interest, Maturity Value", "quarter": "Q4"},
-    "NA-FIN-02": {"subject": "General Mathematics", "title": "Simple and General Annuities", "quarter": "Q4"},
-    "NA-FIN-03": {"subject": "General Mathematics", "title": "Deferred Annuity, Fair Market Value", "quarter": "Q4"},
-    "NA-FIN-04": {"subject": "General Mathematics", "title": "Business and Consumer Loans, Amortization", "quarter": "Q4"},
-    "DP-HYP-01": {"subject": "Statistics", "title": "Hypothesis Testing: Null/Alternative, Types of Error", "quarter": "Q4"},
-    "DP-HYP-02": {"subject": "Statistics", "title": "Z-Test and T-Test", "quarter": "Q4"},
-    "DP-HYP-03": {"subject": "Statistics", "title": "Pearson r, Scatter Plots, Line of Best Fit", "quarter": "Q4"},
-    "NA-LOGIC-01": {"subject": "General Mathematics", "title": "Logical Propositions, Connectives, Truth Tables", "quarter": "Q4"},
-    "NA-LOGIC-02": {"subject": "General Mathematics", "title": "Conditional Propositions, Tautologies", "quarter": "Q4"},
-    "BM-FDP-01": {"subject": "Business Mathematics", "title": "Fractions, Decimals, Percent Conversions", "quarter": "Q1"},
-    "BM-FDP-02": {"subject": "Business Mathematics", "title": "Proportion: Direct, Inverse, Partitive", "quarter": "Q1"},
-    "BM-BUS-01": {"subject": "Business Mathematics", "title": "Markup, Margin, Trade Discounts, VAT", "quarter": "Q1"},
-    "BM-BUS-02": {"subject": "Business Mathematics", "title": "Profit, Loss, Break-even Point", "quarter": "Q1"},
-    "BM-COMM-01": {"subject": "Business Mathematics", "title": "Straight Commission, Salary Plus Commission", "quarter": "Q2"},
-    "BM-COMM-02": {"subject": "Business Mathematics", "title": "Commission on Cash and Installment Basis", "quarter": "Q2"},
-    "BM-COMM-03": {"subject": "Business Mathematics", "title": "Down Payment, Gross Balance", "quarter": "Q2"},
-    "BM-INT-01": {"subject": "Business Mathematics", "title": "Simple Interest, Compound Interest", "quarter": "Q2"},
-    "BM-INT-02": {"subject": "Business Mathematics", "title": "Solving Problems with Interest and Commission", "quarter": "Q2"},
-    "BM-SW-01": {"subject": "Business Mathematics", "title": "Salary vs. Wage, Income", "quarter": "Q2"},
-    "BM-SW-02": {"subject": "Business Mathematics", "title": "Employee Benefits: Taxable vs. Nontaxable", "quarter": "Q2"},
-    "BM-SW-03": {"subject": "Business Mathematics", "title": "Mandatory Deductions: SSS, PhilHealth, Pag-IBIG", "quarter": "Q2"},
-    "BM-SW-04": {"subject": "Business Mathematics", "title": "Overtime Pay Computation (Labor Code)", "quarter": "Q2"},
-    "BM-SW-05": {"subject": "Business Mathematics", "title": "E-Spreadsheet for Payroll", "quarter": "Q2"},
-    "BM-MORT-01": {"subject": "Business Mathematics", "title": "Mortgage, Amortization, Monthly Payment", "quarter": "Q2"},
-    "BM-DATA-01": {"subject": "Business Mathematics", "title": "Data Presentation: Tables, Bar, Line, Pie Charts", "quarter": "Q2"},
-    "BM-DATA-02": {"subject": "Business Mathematics", "title": "Analyzing Business Data with Excel", "quarter": "Q2"},
-    "SP-RV-01": {"subject": "Statistics & Probability", "title": "Random Variables, Discrete vs. Continuous", "quarter": "Q1"},
-    "SP-RV-02": {"subject": "Statistics & Probability", "title": "Probability Distribution, Mean, Variance, SD", "quarter": "Q1"},
-    "SP-NORM-01": {"subject": "Statistics & Probability", "title": "Normal Curve Properties", "quarter": "Q1"},
-    "SP-NORM-02": {"subject": "Statistics & Probability", "title": "Z-Scores, Standard Normal Table", "quarter": "Q1"},
-    "SP-NORM-03": {"subject": "Statistics & Probability", "title": "Applying Normal Distribution", "quarter": "Q1"},
-    "SP-SAMP-01": {"subject": "Statistics & Probability", "title": "Types of Random Sampling", "quarter": "Q2"},
-    "SP-SAMP-02": {"subject": "Statistics & Probability", "title": "Sampling Distribution of Sample Means", "quarter": "Q2"},
-    "SP-SAMP-03": {"subject": "Statistics & Probability", "title": "Central Limit Theorem", "quarter": "Q2"},
-    "SP-HYP-01": {"subject": "Statistics & Probability", "title": "Hypothesis Testing: H0 and Ha", "quarter": "Q2"},
-    "SP-HYP-02": {"subject": "Statistics & Probability", "title": "Level of Significance, Type I and II Errors", "quarter": "Q2"},
-    "SP-HYP-03": {"subject": "Statistics & Probability", "title": "Z-Test for Known Variance", "quarter": "Q2"},
-    "SP-HYP-04": {"subject": "Statistics & Probability", "title": "T-Test for Unknown Variance", "quarter": "Q2"},
-    "SP-HYP-05": {"subject": "Statistics & Probability", "title": "Z-Test and T-Test for Proportion", "quarter": "Q2"},
-    "SP-CORR-01": {"subject": "Statistics & Probability", "title": "Pearson r, Scatter Plots", "quarter": "Q2"},
-    "SP-CORR-02": {"subject": "Statistics & Probability", "title": "Line of Best Fit, Regression", "quarter": "Q2"},
-}
-
-
-# ─── Diagnostic-Integrated Lesson Generation ─────────────────────
-
-class DiagnosticLessonRequest(BaseModel):
-    student_id: str
-    topic_id: str
-    mastery_level: str = Field(default="beginning")
-    strand: str = Field(default="STEM")
-    grade_level: str = Field(default="Grade 11")
-
-
-class DiagnosticLessonSection(BaseModel):
-    type: str
-    title: Optional[str] = None
-    content: str
-    formula: Optional[str] = None
-    visual_hint: Optional[str] = None
-    problem: Optional[str] = None
-    solution_steps: Optional[List[Dict[str, Any]]] = None
-    final_answer: Optional[str] = None
-    prompt: Optional[str] = None
-    hint: Optional[str] = None
-    answer: Optional[str] = None
-
-
-class DiagnosticLessonResponse(BaseModel):
-    lesson_id: str
-    topic_id: str
-    subject: str
-    title: str
-    grade_level: str
-    strand: str
-    estimated_minutes: int
-    mastery_target: str
-    learning_objectives: List[str]
-    sections: List[DiagnosticLessonSection]
-    summary: str
-    real_life_connection: str
-    next_topic_id: Optional[str]
-    prerequisite_topic_ids: List[str]
-
-
-@app.post("/api/lesson/diagnostic", response_model=DiagnosticLessonResponse)
-async def generate_diagnostic_lesson(request: DiagnosticLessonRequest):
-    """
-    Generate personalized lesson based on diagnostic test results.
-    Adjusts content difficulty based on student's mastery level.
-    Uses RAG to inject DepEd curriculum content.
-    """
-    try:
-        topic_info = DEPD_TOPIC_REGISTRY.get(request.topic_id, {})
-        subject = topic_info.get("subject", "General Mathematics")
-        title = topic_info.get("title", request.topic_id)
-        
-        curriculum_chunks = retrieve_curriculum_context(
-            query=f"{title} {request.topic_id} examples problems exercises",
-            subject=subject,
-            top_k=4,
-        )
-        
-        curriculum_context = ""
-        for chunk in curriculum_chunks:
-            source = chunk.get("source_file", "unknown")
-            content = chunk.get("content", "")[:800]
-            curriculum_context += f"[Source: {source}]\n{content}\n\n---\n\n"
-        
-        mastery_adjustments = {
-            "beginning": "Use extra-simple language, 3 worked examples, more hints.",
-            "developing": "Standard pacing, 2 worked examples.",
-            "mastered": "Fast-track with 1 worked example and a challenge problem.",
-        }
-        
-        rag_instruction = ""
-        if curriculum_context:
-            rag_instruction = f"""REFERENCE CURRICULUM CONTENT (from DepEd modules):
-{curriculum_context}
-
-IMPORTANT: Base your lesson STRICTLY on the curriculum content above. Do not invent formulas or examples."""
-        
-        prompt = f"""Generate a complete lesson for topic {request.topic_id}: {title}.
-
-Student Context:
-- Strand: {request.strand}
-- Grade: {request.grade_level}
-- Mastery Level: {request.mastery_level} ({mastery_adjustments.get(request.mastery_level, '')})
-
-{rag_instruction}
-
-Use Filipino context (₱, local scenarios).
-Follow SDO Navotas step-by-step: "Given → Formula → Substitute → Compute → Conclude"
-
-Return ONLY this exact JSON (no other text):
-{{
-  "lesson_id": "LSN-{uuid.uuid4().hex[:8]}",
-  "topic_id": "{request.topic_id}",
-  "subject": "{subject}",
-  "title": "{title}",
-  "grade_level": "{request.grade_level}",
-  "strand": "{request.strand}",
-  "estimated_minutes": 20,
-  "mastery_target": "mastered",
-  "learning_objectives": ["By the end, you will be able to..."],
-  "sections": [
-    {{"type": "hook", "content": "Relatable Filipino intro (2-3 sentences)"}},
-    {{"type": "concept", "title": "...", "content": "Core explanation", "formula": "LaTeX or null", "visual_hint": "description or null"}},
-    {{"type": "worked_example", "title": "Example 1", "problem": "...", "solution_steps": [{{"step": 1, "explanation": "...", "math": "LaTeX or null"}}], "final_answer": "..."}},
-    {{"type": "try_it", "prompt": "Your turn!", "problem": "...", "hint": "Think about...", "answer": "...", "solution_steps": []}}
-  ],
-  "summary": "3-sentence recap",
-  "real_life_connection": "1 sentence to Filipino career",
-  "next_topic_id": "next topic ID or null",
-  "prerequisite_topic_ids": ["prereq topic IDs"]
-}}"""
-
-        messages = [
-            {"role": "system", "content": "You are a DepEd curriculum lesson designer. Return ONLY valid JSON."},
-            {"role": "user", "content": prompt},
-        ]
-        response = await call_hf_chat_async(messages, max_tokens=4096, temperature=0.3, task_type="lesson")
-        
-        import re
-        json_match = re.search(r'\{.*\}', response, re.DOTALL)
-        if json_match:
-            lesson_data = json.loads(json_match.group())
-        else:
-            lesson_data = json.loads(response)
-        
-        return DiagnosticLessonResponse(**lesson_data)
-    except Exception as e:
-        logger.error(f"Diagnostic lesson generation error: {e}")
-        raise HTTPException(status_code=500, detail=f"Lesson generation error: {str(e)}")
-
-
-# ─── Consolidated Lesson Generator (reads from diagnostic) ─────────────
-
-class LessonsGenerateRequest(BaseModel):
-    student_id: str
-    topic_id: str
-    strand: str = Field(default="STEM")
-    grade_level: str = Field(default="Grade 11")
-
-
-@app.post("/api/lessons/generate", response_model=DiagnosticLessonResponse)
-async def generate_lesson_from_diagnostic(request: LessonsGenerateRequest):
-    """
-    Generate a personalized lesson by reading mastery_level from the
-    student's diagnostic results in Firestore. Falls back to 'beginning'
-    if no diagnostic data exists.
-    """
-    mastery_level = "beginning"
-    
-    if HAS_FIREBASE_ADMIN and firebase_firestore:
-        try:
-            db = firebase_firestore.client()
-            user_doc = db.collection("users").document(request.student_id).get()
-            if user_doc.exists:
-                user_data = user_doc.to_dict() or {}
-                diag_id = user_data.get("latestDiagnosticTestId", "")
-                if diag_id:
-                    diag_doc = (
-                        db.collection("diagnosticResults")
-                        .document(request.student_id)
-                        .collection("attempts")
-                        .document(diag_id)
-                        .get()
-                    )
-                    if diag_doc.exists:
-                        diag_data = diag_doc.to_dict() or {}
-                        domain_scores = diag_data.get("domainScores", {})
-                        for domain, score_data in domain_scores.items():
-                            ml = score_data.get("mastery_level", "")
-                            if ml:
-                                mastery_level = ml
-                                break
-        except Exception as diag_err:
-            logger.debug(f"Could not read diagnostic mastery for lesson: {diag_err}")
-    
-    return await generate_diagnostic_lesson(
-        DiagnosticLessonRequest(
-            student_id=request.student_id,
-            topic_id=request.topic_id,
-            mastery_level=mastery_level,
-            strand=request.strand,
-            grade_level=request.grade_level,
-        )
-    )
-
-
-# ─── Progress Evaluation Endpoint ─────────────────────────────────
-
-class ProgressEvaluateRequest(BaseModel):
-    student_id: str
-    quiz_id: str
-    topic_id: str
-    mastery_level_before: str
-    items: List[Dict[str, Any]]
-    previous_attempts: int = Field(default=0)
-
-
-class ProgressEvaluateResponse(BaseModel):
-    new_mastery_level: str
-    mastery_changed: bool
-    score_percent: float
-    xp_earned: int
-    xp_breakdown: Dict[str, int]
-    badges_unlocked: List[str]
-    performance_feedback: str
-    error_analysis: List[Dict[str, Any]]
-    next_action: str
-    next_topic_id: Optional[str]
-    motivational_message: str
-    teacher_flag: Optional[Dict[str, Any]]
-
-
-@app.post("/api/progress/evaluate", response_model=ProgressEvaluateResponse)
-async def evaluate_progress(request: ProgressEvaluateRequest):
-    """
-    Evaluate quiz performance, update mastery, award XP.
-    Called after every quiz submission.
-    """
-    try:
-        total_items = len(request.items)
-        correct_count = sum(1 for item in request.items if item.get("is_correct", False))
-        score_percent = (correct_count / total_items * 100) if total_items > 0 else 0
-        
-        mastery_changed = False
-        new_level = request.mastery_level_before
-        prev = request.mastery_level_before
-        
-        applying_level_correct = sum(
-            1 for item in request.items
-            if item.get("is_correct", False) and item.get("bloom_level", "") in ("applying", "analyzing", "evaluating")
-        )
-        analyzing_level_correct = sum(
-            1 for item in request.items
-            if item.get("is_correct", False) and item.get("bloom_level", "") in ("analyzing", "evaluating", "creating")
-        )
-        
-        if prev == "beginning" and score_percent >= 60 and applying_level_correct >= 2:
-            new_level = "developing"
-            mastery_changed = True
-        elif prev == "developing" and score_percent >= 80 and analyzing_level_correct >= 1:
-            new_level = "mastered"
-            mastery_changed = True
-        
-        xp_base = 0
-        xp_mastery = 0
-        xp_other = 0
-        
-        for item in request.items:
-            diff = item.get("difficulty", "easy")
-            if item.get("is_correct", False):
-                if diff == "easy":
-                    xp_base += 5
-                elif diff == "medium":
-                    xp_base += 10
-                elif diff == "hard":
-                    xp_base += 20
-        
-        if mastery_changed:
-            xp_mastery = 50
-        
-        if score_percent == 100 and request.previous_attempts == 0:
-            xp_other += 30
-        
-        if request.previous_attempts >= 1 and score_percent > 60:
-            xp_other += 15
-        
-        xp_total = xp_base + xp_mastery + xp_other
-        
-        error_analysis = []
-        for item in request.items:
-            if not item.get("is_correct", False):
-                error_analysis.append({
-                    "item_id": item.get("item_id", ""),
-                    "student_answer": item.get("student_answer", ""),
-                    "correct_answer": item.get("correct_answer", ""),
-                    "explanation": "Check your steps for this type of problem.",
-                })
-        
-        next_action = "continue_learning_path"
-        if score_percent < 40 and request.previous_attempts >= 3:
-            next_action = "teacher_flag"
-        elif score_percent < 60:
-            next_action = "retry_quiz"
-        
-        next_topics = list(DEPD_TOPIC_REGISTRY.keys())
-        current_idx = next_topics.index(request.topic_id) if request.topic_id in next_topics else 0
-        next_topic_id = next_topics[current_idx + 1] if current_idx + 1 < len(next_topics) else None
-        
-        messages = {
-            "low": "Keep practicing! You're building momentum.",
-            "moderate": "Good progress! Focus on your weak areas.",
-            "high": "You're improving! Stay consistent.",
-            "critical": "Don't give up! One step at a time.",
-        }
-        motivational = messages.get(new_level, messages["low"])
-        
-        if mastery_changed:
-            if new_level == "developing":
-                motivational = "Kaya mo yan! You're moving up!"
-            elif new_level == "mastered":
-                motivational = "Congratulations! Topic mastered!"
-        
-        teacher_flag = None
-        if score_percent < 40 and request.previous_attempts >= 3:
-            teacher_flag = {"reason": f"Score {score_percent}% after 3+ attempts", "severity": "high"}
-        
-        if HAS_FIREBASE_ADMIN and firebase_firestore:
-            try:
-                db = firebase_firestore.client()
-                topic_progress_ref = db.collection("studentProgress").document(request.student_id).collection("topics").document(request.topic_id)
-                topic_progress_ref.set({
-                    "mastery_level": new_level,
-                    "quiz_attempts": firebase_firestore.Increment(1),
-                    "best_score": max(score_percent, 0),
-                    "xp_earned": firebase_firestore.Increment(xp_total),
-                    "last_activity": firebase_firestore.SERVER_TIMESTAMP,
-                    "error_patterns": [e.get("explanation", "") for e in error_analysis],
-                    "teacher_flagged": teacher_flag is not None,
-                }, merge=True)
-                
-                stats_ref = db.collection("studentProgress").document(request.student_id).collection("stats").document("summary")
-                stats_ref.set({
-                    "total_xp": firebase_firestore.Increment(xp_total),
-                    "topics_mastered": firebase_firestore.Increment(1) if mastery_changed else firebase_firestore.Increment(0),
-                }, merge=True)
-            except Exception as fs_err:
-                logger.warning(f"Firestore progress save failed: {fs_err}")
-        
-        return ProgressEvaluateResponse(
-            new_mastery_level=new_level,
-            mastery_changed=mastery_changed,
-            score_percent=round(score_percent, 1),
-            xp_earned=xp_total,
-            xp_breakdown={"base": xp_base, "mastery_bonus": xp_mastery, "other": xp_other},
-            badges_unlocked=[],
-            performance_feedback=f"You got {correct_count}/{total_items} correct.",
-            error_analysis=error_analysis,
-            next_action=next_action,
-            next_topic_id=next_topic_id,
-            motivational_message=motivational,
-            teacher_flag=teacher_flag,
-        )
-    except Exception as e:
-        logger.error(f"Progress evaluation error: {e}")
-        raise HTTPException(status_code=500, detail=f"Progress evaluation error: {str(e)}")
-
-
-# ─── Adaptive Quiz Endpoint ─────────────────────────────────────
-
-class AdaptiveQuizRequest(BaseModel):
-    student_id: str
-    topic_id: str
-    recent_lesson_id: Optional[str] = None
-    strand: str = Field(default="STEM")
-
-
-class AdaptiveQuizItem(BaseModel):
-    item_id: str
-    type: str
-    bloom_level: str
-    difficulty: str
-    question: str
-    options: Optional[Dict[str, str]] = None
-    correct_answer: str
-    acceptable_range: Optional[List[float]] = None
-    solution_hint: str
-    competency_code: str
-    curriculum_reference: str
-
-
-class DiagnosticQuizResponse(BaseModel):
-    quiz_id: str
-    topic_id: str
-    mastery_target_after: str
-    items: List[AdaptiveQuizItem]
-    prev_score: Optional[float]
-    difficulty_distribution: Dict[str, int]
-
-
-async def _resolve_mastery_and_prev_score(
-    student_id: str,
-    topic_id: str,
-) -> tuple[str, Optional[float]]:
-    """Read mastery_level and prev_score from Firestore diagnostic and studentProgress."""
-    mastery = "beginning"
-    prev_score: Optional[float] = None
-    
-    if not HAS_FIREBASE_ADMIN or not firebase_firestore:
-        return mastery, prev_score
-    
-    try:
-        db = firebase_firestore.client()
-        
-        topic_progress_doc = (
-            db.collection("studentProgress")
-            .document(student_id)
-            .collection("topics")
-            .document(topic_id)
-            .get()
-        )
-        if topic_progress_doc.exists:
-            tp_data = topic_progress_doc.to_dict() or {}
-            tp_mastery = str(tp_data.get("mastery_level", "")).strip()
-            if tp_mastery in ("beginning", "developing", "mastered"):
-                mastery = tp_mastery
-            prev_score_raw = tp_data.get("best_score")
-            if isinstance(prev_score_raw, (int, float)):
-                prev_score = float(prev_score_raw)
-        
-        user_doc = db.collection("users").document(student_id).get()
-        if user_doc.exists:
-            user_data = user_doc.to_dict() or {}
-            diag_id = user_data.get("latestDiagnosticTestId", "")
-            if diag_id:
-                diag_doc = (
-                    db.collection("diagnosticResults")
-                    .document(student_id)
-                    .collection("attempts")
-                    .document(diag_id)
-                    .get()
-                )
-                if diag_doc.exists:
-                    diag_data = diag_doc.to_dict() or {}
-                    domain_scores = diag_data.get("domainScores", {})
-                    if not topic_progress_doc.exists:
-                        for domain, score_data in domain_scores.items():
-                            ml = score_data.get("mastery_level", "")
-                            if ml and ml in ("beginning", "developing", "mastered"):
-                                mastery = ml
-                                break
-    except Exception as e:
-        logger.debug(f"Could not resolve mastery/prev_score: {e}")
-    
-    return mastery, prev_score
-
-
-def _calibrate_quiz_params(mastery_level: str, prev_score: Optional[float]) -> dict:
-    """Return item count and difficulty distribution based on mastery and history."""
-    if mastery_level == "mastered":
-        count = 10
-        distribution = {"easy": 10, "medium": 40, "hard": 50}
-    elif mastery_level == "developing":
-        count = 8
-        distribution = {"easy": 30, "medium": 50, "hard": 20}
-    else:
-        count = 5
-        distribution = {"easy": 60, "medium": 40, "hard": 0}
-    
-    if prev_score is not None and prev_score < 50:
-        distribution = {
-            "easy": min(80, distribution["easy"] + 20),
-            "medium": distribution["medium"],
-            "hard": max(0, distribution["hard"] - 20),
-        }
-    
-    return {"count": count, "distribution": distribution}
-
-
-@app.post("/api/quiz/adaptive")
-async def generate_adaptive_quiz(request: AdaptiveQuizRequest):
-    """
-    Generate an adaptive practice quiz calibrated to the student's mastery level.
-    Reads mastery_level and prev_score from Firestore, auto-calibrates difficulty.
-    """
-    try:
-        mastery, prev_score = await _resolve_mastery_and_prev_score(
-            request.student_id,
-            request.topic_id,
-        )
-        
-        params = _calibrate_quiz_params(mastery, prev_score)
-        count = params["count"]
-        distribution = params["distribution"]
-        topic_info = DEPD_TOPIC_REGISTRY.get(request.topic_id, {})
-        subject = topic_info.get("subject", "General Mathematics")
-        title = topic_info.get("title", request.topic_id)
-        
-        curriculum_chunks = retrieve_curriculum_context(
-            query=f"{title} {request.topic_id} practice problems exercises",
-            subject=subject,
-            top_k=3,
-        )
-        curriculum_context = ""
-        for chunk in curriculum_chunks:
-            source = chunk.get("source_file", "unknown")
-            content = chunk.get("content", "")[:500]
-            curriculum_context += f"[Source: {source}]\n{content}\n\n---\n\n"
-        
-        quiz_id = f"QZ-{uuid.uuid4().hex[:12]}"
-        
-        rag_instr = ""
-        if curriculum_context:
-            rag_instr = f"""REFERENCE CURRICULUM:
-{curriculum_context}
-
-Base questions on this content. Do not copy directly."""
-
-        items_json = json.dumps([])
-        
-        try:
-            quiz_prompt = f"""Generate {count} quiz items for topic "{title}" (ID: {request.topic_id}).
-
-Mastery Level: {mastery}
-Difficulty Distribution: Easy={distribution['easy']}%, Medium={distribution['medium']}%, Hard={distribution['hard']}%
-Item types: mix multiple_choice, fill_in_the_blank, and word_problem.
-
-{rag_instr}
-
-Use Filipino context.
-Return ONLY this strict JSON array:
-[
-  {{
-    "type": "multiple_choice|fill_in_the_blank|word_problem",
-    "bloom_level": "remembering|understanding|applying|analyzing",
-    "difficulty": "easy|medium|hard",
-    "question": "...",
-    "options": {{"A": "...", "B": "...", "C": "...", "D": "..."}},
-    "correct_answer": "B",
-    "acceptable_range": null,
-    "solution_hint": "Short hint",
-    "competency_code": "{request.topic_id}",
-    "curriculum_reference": "DepEd SHS"
-  }}
-]"""
-            messages = [
-                {"role": "system", "content": "You are a quiz generator. Return ONLY valid JSON."},
-                {"role": "user", "content": quiz_prompt},
-            ]
-            response = await call_hf_chat_async(messages, max_tokens=4096, temperature=0.3, task_type="quiz")
-            items_json = response
-        except Exception as llm_err:
-            logger.error(f"Adaptive quiz LLM error: {llm_err}")
-        
-        import re
-        json_match = re.search(r'\[.*\]', items_json, re.DOTALL)
-        if json_match:
-            raw_items = json.loads(json_match.group())
-        else:
-            raw_items = json.loads(items_json) if items_json.strip().startswith('[') else []
-        
-        items: List[AdaptiveQuizItem] = []
-        for i, qi in enumerate(raw_items[:count]):
-            items.append(AdaptiveQuizItem(
-                item_id=f"QI-{uuid.uuid4().hex[:8]}",
-                type=qi.get("type", "multiple_choice"),
-                bloom_level=qi.get("bloom_level", "understanding"),
-                difficulty=qi.get("difficulty", "medium"),
-                question=qi.get("question", ""),
-                options=qi.get("options"),
-                correct_answer=qi.get("correct_answer", ""),
-                acceptable_range=qi.get("acceptable_range"),
-                solution_hint=qi.get("solution_hint", ""),
-                competency_code=qi.get("competency_code", request.topic_id),
-                curriculum_reference=qi.get("curriculum_reference", "DepEd SHS"),
-            ))
-        
-        return DiagnosticQuizResponse(
-            quiz_id=quiz_id,
-            topic_id=request.topic_id,
-            mastery_target_after="mastered" if mastery == "developing" else "developing" if mastery == "beginning" else "mastered",
-            items=items,
-            prev_score=prev_score,
-            difficulty_distribution=distribution,
-        )
-    except Exception as e:
-        logger.error(f"Adaptive quiz generation error: {e}\n{traceback.format_exc()}")
-        raise HTTPException(status_code=500, detail=f"Adaptive quiz error: {str(e)}")
-
-
-# ─── Learning Path Endpoint ────────────────────────────────────
-
-class DiagnosticLearningPathRequest(BaseModel):
-    student_id: str
-    strand: str = Field(default="STEM")
-    grade_level: str = Field(default="Grade 11")
-
-
-class DiagnosticLearningPathTopic(BaseModel):
-    topic_id: str
-    title: str
-    mastery_level: str
-    estimated_minutes: int
-
-
-class DiagnosticLearningPathResponse(BaseModel):
-    student_id: str
-    topics: List[DiagnosticLearningPathTopic]
-    total_estimated_hours: float
-
-
-@app.post("/api/learning/path", response_model=DiagnosticLearningPathResponse)
-async def generate_learning_path(request: DiagnosticLearningPathRequest):
-    """
-    Generate personalized learning path based on student's diagnostic results.
-    """
-    try:
-        if not HAS_FIREBASE_ADMIN or not firebase_firestore:
-            topics = []
-            for tid, info in DEPD_TOPIC_REGISTRY.items():
-                topics.append(DiagnosticLearningPathTopic(
-                    topic_id=tid,
-                    title=info["title"],
-                    mastery_level="beginning",
-                    estimated_minutes=20,
-                ))
-            return DiagnosticLearningPathResponse(
-                student_id=request.student_id,
-                topics=topics[:10],
-                total_estimated_hours=3.3,
-            )
-        
-        db = firebase_firestore.client()
-        doc = db.collection("diagnosticResults").document(request.student_id).collection("attempts").limit(1).get()
-        
-        suggested_path = []
-        if doc:
-            data = doc[0].to_dict() if doc else {}
-            suggested_path = data.get("riskProfile", {}).get("suggested_learning_path", [])
-        
-        path_topics = []
-        if suggested_path:
-            for tid in suggested_path[:10]:
-                info = DEPD_TOPIC_REGISTRY.get(tid, {})
-                path_topics.append(DiagnosticLearningPathTopic(
-                    topic_id=tid,
-                    title=info.get("title") or tid,
-                    mastery_level="beginning",
-                    estimated_minutes=20,
-                ))
-        else:
-            strand_topics = DEPD_ED_COMPETENCY_DOMAINS.get(request.strand, {}).get(request.grade_level, [])
-            for i, t in enumerate(strand_topics[:10]):
-                tid = f"NA-{(i+1):02d}-01"
-                path_topics.append(DiagnosticLearningPathTopic(
-                    topic_id=tid,
-                    title=t,
-                    mastery_level="beginning",
-                    estimated_minutes=20,
-                ))
-        
-        total_minutes = sum(t.estimated_minutes for t in path_topics)
-        
-        return DiagnosticLearningPathResponse(
-            student_id=request.student_id,
-            topics=path_topics,
-            total_estimated_hours=round(total_minutes / 60, 1),
-        )
-    except Exception as e:
-        logger.error(f"Learning path generation error: {e}")
-        raise HTTPException(status_code=500, detail=f"Learning path error: {str(e)}")
-
-
-# ─── Personalized Lesson Endpoint ──────────────────────────────
-
-class PersonalizedLessonRequest(BaseModel):
-    topic: str = Field(..., description="Lesson topic")
-    student_uid: str = Field(..., description="Student UID for profile lookup")
-    assessment_context: Optional[Dict[str, Any]] = Field(None, description="Optional assessment context")
-    subject: Optional[str] = Field(None, description="Subject area")
-    quarter: Optional[int] = Field(None, description="Quarter (1-4)")
-
-
-class PersonalizedLessonResponse(BaseModel):
-    topic: str
-    content: str
-    personalization_notes: str
-    sections: List[Dict[str, str]]
-    suggested_exercises: List[str]
-    difficulty_adjustment: str
-
-
-@app.post("/api/lesson/personalized", response_model=PersonalizedLessonResponse)
-async def generate_personalized_lesson(request: PersonalizedLessonRequest):
-    """
-    Generate a personalized lesson based on student's assessment profile.
-    Adapts content to address weaknesses and reinforce strengths.
-    """
-    try:
-        # Load student's competency profile if available
-        weaknesses = []
-        strengths = []
-        if firebase_firestore and request.student_uid:
-            try:
-                db = firebase_firestore.client()
-                profile_doc = db.collection("competencyProfiles").document(request.student_uid).get()
-                if profile_doc.exists:
-                    profile_data = profile_doc.to_dict()
-                    if profile_data and "competencies" in profile_data:
-                        for comp_id, comp_data in profile_data["competencies"].items():
-                            if comp_data.get("score", 0) < 50:
-                                weaknesses.append(comp_id)
-                            elif comp_data.get("score", 0) >= 80:
-                                strengths.append(comp_id)
-            except Exception as e:
-                logger.warning(f"Could not load competency profile: {e}")
-
-        # Retrieve curriculum context
-        context_chunks = retrieve_curriculum_context(
-            query=build_lesson_query(request.topic, request.subject or "General Mathematics", request.quarter or 1),
-            subject=request.subject,
-            quarter=request.quarter,
-            top_k=5,
-        )
-        context_text = format_retrieved_chunks(context_chunks)
-
-        # Build personalized prompt
-        prompt = f"""Generate a DepEd-aligned SHS mathematics lesson on: {request.topic}
-
-Student Assessment Profile:
-- Weaknesses to address: {', '.join(weaknesses) if weaknesses else 'None identified'}
-- Strengths to reinforce: {', '.join(strengths) if strengths else 'None identified'}
-
-Curriculum Context:
-{context_text}
-
-Instructions:
-1. Structure the lesson with: Introduction, Key Concepts, Examples, Practice Problems, Summary
-2. Include extra practice on these weak areas: {', '.join(weaknesses) if weaknesses else 'general topic areas'}
-3. Provide advanced challenges on these strong areas: {', '.join(strengths) if strengths else 'related advanced topics'}
-4. Use Filipino Senior High School appropriate language and context
-5. Reference specific DepEd MELC competencies where applicable
-
-Return as JSON with fields: topic, sections (array of title/content), suggested_exercises, personalization_notes"""
-
-        req = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="rag_lesson",
-            max_new_tokens=1800,
-            temperature=0.2,
-            top_p=0.9,
-            enable_thinking=True,
-        )
-        response_text = get_inference_client().generate_from_messages(req)
-
-        # Parse JSON response
-        try:
-            # Extract JSON from response
-            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
-            if json_match:
-                lesson_data = json.loads(json_match.group())
-            else:
-                lesson_data = json.loads(response_text)
-
-            return PersonalizedLessonResponse(
-                topic=request.topic,
-                content=lesson_data.get("content", response_text),
-                personalization_notes=f"Personalized for weaknesses: {', '.join(weaknesses)}" if weaknesses else "General lesson",
-                sections=lesson_data.get("sections", []),
-                suggested_exercises=lesson_data.get("suggested_exercises", []),
-                difficulty_adjustment="supportive" if weaknesses else "standard",
-            )
-        except json.JSONDecodeError:
-            # Return raw text if JSON parsing fails
-            return PersonalizedLessonResponse(
-                topic=request.topic,
-                content=response_text,
-                personalization_notes="Raw response (JSON parsing failed)",
-                sections=[{"title": "Content", "content": response_text}],
-                suggested_exercises=[],
-                difficulty_adjustment="standard",
-            )
-
-    except Exception as e:
-        logger.error(f"Personalized lesson generation error: {e}")
-        raise HTTPException(status_code=500, detail=f"Lesson generation error: {str(e)}")
-
-
 # ─── Main ──────────────────────────────────────────────────────
 
 if __name__ == "__main__":
diff --git a/middleware/__init__.py b/middleware/__init__.py
deleted file mode 100644
index 31c0e7c06d6bc5f0da40f3a7ecd64ff301a4d5f3..0000000000000000000000000000000000000000
--- a/middleware/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Middleware package
-from .rate_limiter import rate_limiter, setup_rate_limiting, RateLimitExceeded
-
-__all__ = ["rate_limiter", "setup_rate_limiting", "RateLimitExceeded"]
\ No newline at end of file
diff --git a/middleware/rate_limiter.py b/middleware/rate_limiter.py
deleted file mode 100644
index 5fe414312440ebdbc98e209d507588ffa6a90c92..0000000000000000000000000000000000000000
--- a/middleware/rate_limiter.py
+++ /dev/null
@@ -1,184 +0,0 @@
-"""
-Rate limiting middleware using slowapi.
-"""
-import os
-import logging
-
-from fastapi import Request
-from slowapi import Limiter
-from slowapi.errors import RateLimitExceeded as SlowAPIRateLimitExceeded
-
-logger = logging.getLogger("mathpulse.ratelimit")
-
-# Environment-based configuration with defaults
-RATE_LIMIT_AI_RPM = int(os.getenv("RATE_LIMIT_AI_RPM", "20"))
-RATE_LIMIT_QUIZ_GENERATE_RPM = int(os.getenv("RATE_LIMIT_QUIZ_GENERATE_RPM", "10"))
-RATE_LIMIT_QUIZ_SUBMIT_RPM = int(os.getenv("RATE_LIMIT_QUIZ_SUBMIT_RPM", "30"))
-RATE_LIMIT_AUTH_RPM = int(os.getenv("RATE_LIMIT_AUTH_RPM", "5"))
-RATE_LIMIT_LEADERBOARD_RPM = int(os.getenv("RATE_LIMIT_LEADERBOARD_RPM", "60"))
-RATE_LIMIT_DEFAULT_RPM = int(os.getenv("RATE_LIMIT_DEFAULT_RPM", "100"))
-RATE_LIMIT_ADMIN_MULTIPLIER = int(os.getenv("RATE_LIMIT_ADMIN_MULTIPLIER", "10"))
-RATE_LIMIT_TEACHER_MULTIPLIER = int(os.getenv("RATE_LIMIT_TEACHER_MULTIPLIER", "3"))
-
-# Role multipliers for rate limit adjustment
-ROLE_MULTIPLIERS = {
-    "admin": RATE_LIMIT_ADMIN_MULTIPLIER,
-    "teacher": RATE_LIMIT_TEACHER_MULTIPLIER,
-    "student": 1,
-}
-
-
-def _get_user_identifier(request: Request) -> str:
-    """
-    Extract user identifier for rate limiting.
-    Uses Firebase UID from request.state.user if authenticated, otherwise falls back to IP.
-    """
-    user = getattr(request.state, "user", None)
-    if user and hasattr(user, "uid") and user.uid:
-        return f"uid:{user.uid}"
-
-    if request.client:
-        return f"ip:{request.client.host}"
-    return "ip:unknown"
-
-
-def _get_user_role(request: Request) -> str:
-    """Get user role from request state for multiplier calculation."""
-    user = getattr(request.state, "user", None)
-    if user and hasattr(user, "role") and user.role:
-        return user.role
-    return "student"
-
-
-def _get_role_multiplier(request: Request) -> int:
-    """Get rate limit multiplier based on user role."""
-    role = _get_user_role(request)
-    return ROLE_MULTIPLIERS.get(role, 1)
-
-
-class MathPulseLimiter:
-    """
-    Rate limiter with role-aware multipliers for MathPulse AI.
-    """
-
-    def __init__(self) -> None:
-        self._limiter = Limiter(
-            key_func=_get_user_identifier,
-            storage_uri="memory://",
-            default_limits=[f"{RATE_LIMIT_DEFAULT_RPM}/minute"],
-        )
-
-    @property
-    def limiter(self) -> Limiter:
-        return self._limiter
-
-    def _get_adjusted_limit(self, base_rpm: int, request: Request) -> int:
-        """Apply role multiplier to base rate limit."""
-        multiplier = _get_role_multiplier(request)
-        return base_rpm * multiplier
-
-    def ai_limit(self, request: Request) -> str:
-        """Rate limit for AI endpoints with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_AI_RPM, request)
-        return f"{limit}/minute"
-
-    def quiz_generate_limit(self, request: Request) -> str:
-        """Rate limit for quiz generation with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_QUIZ_GENERATE_RPM, request)
-        return f"{limit}/minute"
-
-    def quiz_submit_limit(self, request: Request) -> str:
-        """Rate limit for quiz submission with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_QUIZ_SUBMIT_RPM, request)
-        return f"{limit}/minute"
-
-    def auth_limit(self, request: Request) -> str:
-        """Rate limit for auth endpoints with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_AUTH_RPM, request)
-        return f"{limit}/minute"
-
-    def leaderboard_limit(self, request: Request) -> str:
-        """Rate limit for leaderboard with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_LEADERBOARD_RPM, request)
-        return f"{limit}/minute"
-
-    def default_limit(self, request: Request) -> str:
-        """Default rate limit with role adjustment."""
-        limit = self._get_adjusted_limit(RATE_LIMIT_DEFAULT_RPM, request)
-        return f"{limit}/minute"
-
-
-# Global rate limiter instance
-rate_limiter = MathPulseLimiter()
-
-
-def setup_rate_limiting(app) -> None:
-    """
-    Set up rate limiting for the FastAPI application.
-    """
-
-    # Add limiter to app state
-    app.state.limiter = rate_limiter.limiter
-
-    # Add slowapi exception handler
-    app.add_exception_handler(
-        SlowAPIRateLimitExceeded,
-        lambda request, exc: _rate_limit_exceeded_handler(request, exc)
-    )
-
-    logger.info(
-        f"Rate limiting configured: AI={RATE_LIMIT_AI_RPM}/min, "
-        f"QuizGen={RATE_LIMIT_QUIZ_GENERATE_RPM}/min, "
-        f"Auth={RATE_LIMIT_AUTH_RPM}/min, "
-        f"Admin={RATE_LIMIT_ADMIN_MULTIPLIER}x, Teacher={RATE_LIMIT_TEACHER_MULTIPLIER}x"
-    )
-
-
-def _rate_limit_exceeded_handler(request: Request, exc: SlowAPIRateLimitExceeded):
-    """Handle rate limit exceeded errors with proper JSON response."""
-    from fastapi.responses import JSONResponse
-
-    retry_after = getattr(exc, "retry_after", 60)
-    return JSONResponse(
-        status_code=429,
-        content={
-            "error": "rate_limit_exceeded",
-            "message": "Too many requests. Please try again later.",
-            "retry_after": retry_after,
-        },
-        headers={
-            "Retry-After": str(retry_after),
-            "Content-Type": "application/json",
-        }
-    )
-
-
-# Decorator helpers
-def ai_rate_limit():
-    """Decorator for AI endpoint rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.ai_limit)
-
-
-def quiz_generate_rate_limit():
-    """Decorator for quiz generation rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.quiz_generate_limit)
-
-
-def quiz_submit_rate_limit():
-    """Decorator for quiz submit rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.quiz_submit_limit)
-
-
-def auth_rate_limit():
-    """Decorator for auth endpoint rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.auth_limit)
-
-
-def leaderboard_rate_limit():
-    """Decorator for leaderboard rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.leaderboard_limit)
-
-
-def default_rate_limit():
-    """Decorator for default rate limiting."""
-    return rate_limiter.limiter.limit(rate_limiter.default_limit)
\ No newline at end of file
diff --git a/pre_deploy_check.py b/pre_deploy_check.py
index d3fcb1dfa88b75ce8208d5ffa25337bc82d24b0a..5a474ed66db1d08b1ee0cf514af7bed6b79b61e6 100644
--- a/pre_deploy_check.py
+++ b/pre_deploy_check.py
@@ -16,16 +16,8 @@ Exit codes:
 import sys
 import os
 
-# Add repo root to path (for services/ delegation) AND backend to path
-_repo_root = os.path.dirname(os.path.abspath(__file__))
-_parent = os.path.dirname(_repo_root)
-_backend = _repo_root
-
-# Add in order: parent first (so services/ can delegate), then backend (for when services/__init__.py tries to import)
-if _parent not in sys.path:
-    sys.path.insert(0, _parent)
-if _backend not in sys.path:
-    sys.path.insert(0, _backend)
+# Add backend to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
 
 def main() -> int:
     """Run pre-deployment checks."""
diff --git a/rag/__init__.py b/rag/__init__.py
index 2ade6fc8ce5b94887216d091e06dcc440210a3d5..4f9f6251f71752b272f0cc4ee2bdda8386ebfa6c 100644
--- a/rag/__init__.py
+++ b/rag/__init__.py
@@ -5,19 +5,11 @@ from .curriculum_rag import (
     build_lesson_prompt,
     build_problem_generation_prompt,
     build_analysis_curriculum_context,
-    build_lesson_query,
-    format_retrieved_chunks,
-    summarize_retrieval_confidence,
 )
-from .vectorstore_loader import reset_vectorstore_singleton
 
 __all__ = [
     "retrieve_curriculum_context",
     "build_lesson_prompt",
     "build_problem_generation_prompt",
     "build_analysis_curriculum_context",
-    "build_lesson_query",
-    "format_retrieved_chunks",
-    "summarize_retrieval_confidence",
-    "reset_vectorstore_singleton",
-]
\ No newline at end of file
+]
diff --git a/rag/curriculum_rag.py b/rag/curriculum_rag.py
index e54f4dfebb48a40b845fd707217148f0b870654c..f60f988bea8a596553a62cc587b89619a76af077 100644
--- a/rag/curriculum_rag.py
+++ b/rag/curriculum_rag.py
@@ -1,10 +1,8 @@
-"""
-Updated curriculum RAG with exact match retrieval and 7-section notebook output.
-"""
-
 from __future__ import annotations
 
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional
+
+from .vectorstore_loader import get_vectorstore_components
 
 
 def _to_where(
@@ -12,10 +10,6 @@ def _to_where(
     quarter: Optional[int] = None,
     content_domain: Optional[str] = None,
     chunk_type: Optional[str] = None,
-    module_id: Optional[str] = None,
-    lesson_id: Optional[str] = None,
-    competency_code: Optional[str] = None,
-    storage_path: Optional[str] = None,
 ) -> Optional[Dict[str, object]]:
     clauses = []
     if subject:
@@ -26,14 +20,6 @@ def _to_where(
         clauses.append({"content_domain": {"$eq": content_domain}})
     if chunk_type:
         clauses.append({"chunk_type": {"$eq": chunk_type}})
-    if module_id:
-        clauses.append({"module_id": {"$eq": module_id}})
-    if lesson_id:
-        clauses.append({"lesson_id": {"$eq": lesson_id}})
-    if competency_code:
-        clauses.append({"competency_code": {"$eq": competency_code}})
-    if storage_path:
-        clauses.append({"storage_path": {"$eq": storage_path}})
     if not clauses:
         return None
     if len(clauses) == 1:
@@ -42,6 +28,7 @@ def _to_where(
 
 
 def _distance_to_score(distance: float) -> float:
+    # Chroma returns smaller distance for better matches. Map to (0,1].
     return round(1.0 / (1.0 + max(distance, 0.0)), 4)
 
 
@@ -51,23 +38,12 @@ def retrieve_curriculum_context(
     quarter: int | None = None,
     content_domain: str | None = None,
     chunk_type: str | None = None,
-    module_id: str | None = None,
-    lesson_id: str | None = None,
-    competency_code: str | None = None,
-    storage_path: str | None = None,
-    top_k: int = 8,
+    top_k: int = 5,
 ) -> list[dict]:
-    from rag.vectorstore_loader import get_vectorstore_components
-
     _, collection, embedder = get_vectorstore_components()
-    where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
-
-    prefixed_query = f"Represent this sentence for searching relevant passages: {query}"
-    query_embedding = embedder.encode(
-        prefixed_query,
-        normalize_embeddings=True,
-    ).tolist()
+    where = _to_where(subject, quarter, content_domain, chunk_type)
 
+    query_embedding = embedder.encode(query).tolist()
     result = collection.query(
         query_embeddings=[query_embedding],
         n_results=max(1, top_k),
@@ -83,39 +59,20 @@ def retrieve_curriculum_context(
     for idx, content in enumerate(documents):
         md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
         distance = float(distances[idx]) if idx < len(distances) else 1.0
-        rows.append({
-            "content": str(content or ""),
-            "subject": str(md.get("subject") or "unknown"),
-            "quarter": int(md.get("quarter") or 0),
-            "content_domain": str(md.get("content_domain") or "general"),
-            "chunk_type": str(md.get("chunk_type") or "concept"),
-            "source_file": str(md.get("source_file") or ""),
-            "storage_path": str(md.get("storage_path") or ""),
-            "module_id": str(md.get("module_id") or ""),
-            "lesson_id": str(md.get("lesson_id") or ""),
-            "competency_code": str(md.get("competency_code") or ""),
-            "page": int(md.get("page") or 0),
-            "score": _distance_to_score(distance),
-        })
-    return rows
-
+        rows.append(
+            {
+                "content": str(content or ""),
+                "subject": str(md.get("subject") or "unknown"),
+                "quarter": int(md.get("quarter") or 0),
+                "content_domain": str(md.get("content_domain") or "unknown"),
+                "chunk_type": str(md.get("chunk_type") or "unknown"),
+                "source_file": str(md.get("source_file") or ""),
+                "page": int(md.get("page") or 0),
+                "score": _distance_to_score(distance),
+            }
+        )
 
-def build_exact_lesson_query(
-    topic: str,
-    subject: str,
-    quarter: int,
-    lesson_title: str | None = None,
-    competency: str | None = None,
-    module_unit: str | None = None,
-    learner_level: str | None = None,
-    competency_code: str | None = None,
-) -> str:
-    parts = [topic, subject, f"Quarter {quarter}"]
-    for value in (lesson_title, competency, module_unit, learner_level, competency_code):
-        clean = str(value or "").strip()
-        if clean:
-            parts.append(clean)
-    return " | ".join(parts)
+    return rows
 
 
 def build_lesson_query(
@@ -136,120 +93,30 @@ def build_lesson_query(
     return " | ".join(parts)
 
 
-def retrieve_lesson_pdf_context(
-    topic: str,
-    subject: str,
-    quarter: int,
-    lesson_title: str | None = None,
-    competency: str | None = None,
-    module_id: str | None = None,
-    lesson_id: str | None = None,
-    competency_code: str | None = None,
-    storage_path: str | None = None,
-    top_k: int = 8,
-) -> Tuple[list[dict], str]:
-    """Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query.
-    
-    NOTE: Curriculum PDF chunks are often tagged with quarter=1 even when they cover all quarters.
-    We first try the exact quarter, then fallback to quarter=1, then no quarter filter.
-    """
-    # Try 1: Exact match with storage_path + quarter
-    if storage_path:
-        exact_chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject,
-            quarter=quarter,
-            storage_path=storage_path,
-            top_k=top_k,
-        )
-        if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
-            return exact_chunks, "exact"
-
-    # Try 2: General query with exact quarter
-    general_chunks = retrieve_curriculum_context(
-        query=topic,
-        subject=subject,
-        quarter=quarter,
-        top_k=top_k,
-    )
-    
-    # Try 3: Fallback to quarter=1 (most curriculum PDFs are tagged Q1)
-    if not general_chunks and quarter != 1:
-        general_chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject,
-            quarter=1,
-            top_k=top_k,
-        )
-    
-    # Try 4: Final fallback - no quarter filter at all
-    if not general_chunks:
-        general_chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject,
-            top_k=top_k,
-        )
-
-    if storage_path and exact_chunks:
-        all_chunks = exact_chunks + general_chunks
-        seen = set()
-        deduped = []
-        for c in all_chunks:
-            key = f"{c.get('source_file')}:{c.get('page')}:{c.get('content', '')[:60]}"
-            if key not in seen:
-                seen.add(key)
-                deduped.append(c)
-        deduped.sort(key=lambda x: x.get("score", 0), reverse=True)
-        return deduped[:top_k], "hybrid"
-
-    return general_chunks, "general"
-
-
 def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
-    refs = []
+    references = []
     for i, chunk in enumerate(curriculum_chunks, start=1):
-        refs.append(
+        references.append(
             f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
             f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
             f"   Excerpt: {chunk.get('content', '')}"
         )
-    return "\n".join(refs) if refs else "No curriculum context retrieved."
+    return "\n".join(references) if references else "No curriculum context retrieved."
 
 
-def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
+def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, float | str]:
     if not curriculum_chunks:
-        return {"confidence": 0.0, "band": "low", "chunkCount": 0}
+        return {"confidence": 0.0, "band": "low"}
 
-    top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
+    top_scores = [float(chunk.get("score") or 0.0) for chunk in curriculum_chunks[:5]]
     score = sum(top_scores) / max(1, len(top_scores))
-    band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
-    return {"confidence": round(score, 3), "band": band, "chunkCount": len(curriculum_chunks)}
-
-
-def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
-    """Organize retrieved chunks into lesson section categories."""
-    sections: Dict[str, List[dict]] = {
-        "introduction": [],
-        "key_concepts": [],
-        "worked_examples": [],
-        "important_notes": [],
-        "practice": [],
-        "summary": [],
-        "assessment": [],
-        "general": [],
-    }
-    domain_priority = {
-        "introduction": 1, "key_concepts": 2, "worked_examples": 3,
-        "important_notes": 4, "practice": 5, "summary": 6,
-        "assessment": 7, "general": 8,
-    }
-    for chunk in chunks:
-        domain = chunk.get("content_domain", "general")
-        if domain in sections:
-            sections[domain].append(chunk)
-        else:
-            sections["general"].append(chunk)
-    return sections
+    if score >= 0.72:
+        band = "high"
+    elif score >= 0.5:
+        band = "medium"
+    else:
+        band = "low"
+    return {"confidence": round(score, 3), "band": band}
 
 
 def build_lesson_prompt(
@@ -262,57 +129,39 @@ def build_lesson_prompt(
     learner_level: Optional[str],
     module_unit: Optional[str],
     curriculum_chunks: list[dict],
-    competency_code: Optional[str] = None,
 ) -> str:
     refs_text = format_retrieved_chunks(curriculum_chunks)
-    organized = organize_chunks_by_section(curriculum_chunks)
-
     return (
-        "You are a DepEd-aligned Grade 11-12 mathematics instructional designer.\n"
-        "Generate a lesson in JSON format. Use ONLY the retrieved curriculum evidence below.\n"
-        "Do NOT invent content. Do NOT add generic motivational text. All content must be grounded in the retrieved excerpts.\n\n"
+        "You are a Grade 11-12 DepEd SHS math instructional designer.\n"
+        "Generate JSON only. Use ONLY the retrieved curriculum evidence below. Do not invent competencies or content beyond the retrieved scope.\n\n"
         f"Lesson title: {lesson_title}\n"
-        f"Competency code: {competency_code or 'n/a'}\n"
         f"Curriculum competency: {competency}\n"
         f"Grade level: {grade_level}\n"
         f"Subject: {subject}\n"
         f"Quarter: Q{quarter}\n"
-        f"Learner level: {learner_level or 'Grade 11-12'}\n"
+        f"Learner level: {learner_level or 'mixed'}\n"
         f"Module/unit: {module_unit or 'n/a'}\n\n"
         "[CURRICULUM CONTEXT]\n"
         f"{refs_text}\n\n"
-        "Return ONLY valid JSON with this exact structure. All 7 sections are required:\n"
-        "{\n"
-        '  "sections": [\n'
-        '    {"type": "introduction",    "title": "Introduction",       "content": "..."},\n'
-        '    {"type": "key_concepts",    "title": "Key Concepts",      "content": "...", "callouts": [{"type":"important|ti..."}]\n},'
-        '    {"type": "video",           "title": "Video Lesson",      "content": "...", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},\n'
-        '    {"type": "worked_examples",  "title": "Worked Examples",    "examples": [{"problem":"...","steps":["Step 1: ...","Step 2: ..."],"answer":"..."}]},\n'
-        '    {"type": "important_notes",  "title": "Important Notes",   "bulletPoints": ["...","..."]},\n'
-        '    {"type": "try_it_yourself", "title": "Try It Yourself",   "practiceProblems": [{"question":"...","solution":"..."}]},\n'
-        '    {"type": "summary",         "title": "Summary",           "content": "..."}\n'
-        "  ],\n"
-        '  "needsReview": false\n'
-        "}\n\n"
+        "Return JSON with these keys only:\n"
+        "lessonTitle, curriculumCompetency, lessonObjective, realWorldHook, explanation, workedExample, guidedPractice, independentPractice, quickAssessment, reflectionPrompt, sourceCitations, needsReview, reviewReason\n\n"
         "Rules:\n"
-        "- content in introduction, key_concepts, important_notes, summary: use paragraph/bullet text grounded in retrieved chunks\n"
-        "- examples must reflect actual content from the retrieved curriculum (real formulas, real contexts)\n"
-        "- practiceProblems should be derivable from worked examples\n"
-        "- callouts: type is 'important', 'tip', or 'warning'\n"
-        "- video section: content is a brief sentence, leave videoId empty (will be filled by backend)\n"
-        "- Do not use placeholder text like 'placeholder' or 'example text'\n"
-        "- Do not fabricate worked examples - use actual curriculum content\n"
+        "- Keep the lesson age-appropriate for SHS learners.\n"
+        "- Use real Philippine contexts where possible, such as payroll, VAT, discounts, loans, logistics, travel, or school data.\n"
+        "- If evidence is thin, set needsReview=true and explain why in reviewReason.\n"
+        "- Do not mention unsupported curriculum facts.\n"
+        "- sourceCitations should be an array of short citations referencing the retrieved excerpts."
     )
 
 
 def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
-    refs = []
+    references = []
     for i, chunk in enumerate(curriculum_chunks, start=1):
-        refs.append(
+        references.append(
             f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
             f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
         )
-    refs_text = "\n".join(refs) if refs else "No curriculum context retrieved."
+    refs_text = "\n".join(references) if references else "No curriculum context retrieved."
 
     return (
         "Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
@@ -335,7 +184,7 @@ def build_analysis_curriculum_context(weak_topics: list[str], subject: str) -> l
             top_k=2,
         )
         for row in rows:
-            key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content', '')[:80]}"
+            key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content')[:80]}"
             if key not in dedup:
                 dedup[key] = row
-    return list(dedup.values())
\ No newline at end of file
+    return list(dedup.values())
diff --git a/rag/firebase_storage_loader.py b/rag/firebase_storage_loader.py
deleted file mode 100644
index f39a3e6b6afc555c45c26ff1c8864edf6909bc61..0000000000000000000000000000000000000000
--- a/rag/firebase_storage_loader.py
+++ /dev/null
@@ -1,175 +0,0 @@
-"""
-Firebase Storage PDF loader for curriculum ingestion.
-Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-
-logger = logging.getLogger("mathpulse.fb_storage_loader")
-
-_FIREBASE_INITIALIZED = False
-
-
-def _init_firebase_storage() -> Tuple[any, any]:
-    global _FIREBASE_INITIALIZED
-
-    if _FIREBASE_INITIALIZED:
-        try:
-            from firebase_admin import storage as fb_storage
-            bucket = fb_storage.bucket()
-            return fb_storage, bucket
-        except Exception as e:
-            logger.warning("Firebase storage unavailable: %s", e)
-            _FIREBASE_INITIALIZED = False
-            return None, None
-
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage
-    except ImportError:
-        logger.warning("firebase_admin not installed")
-        return None, None
-
-    if firebase_admin._apps:
-        _FIREBASE_INITIALIZED = True
-        try:
-            bucket = storage.bucket()
-            return storage, bucket
-        except Exception as e:
-            logger.warning("Firebase storage bucket unavailable: %s", e)
-            return None, None
-
-    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-
-    try:
-        if sa_json:
-            import json as _json
-            creds = credentials.Certificate(_json.loads(sa_json))
-        elif sa_file and Path(sa_file).exists():
-            creds = credentials.Certificate(sa_file)
-        else:
-            creds = credentials.ApplicationDefault()
-
-        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-        _FIREBASE_INITIALIZED = True
-        bucket = storage.bucket()
-        return storage, bucket
-    except Exception as e:
-        logger.warning("Firebase init failed: %s", e)
-        return None, None
-
-
-def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
-    """Download a PDF from Firebase Storage and return its bytes."""
-    _, bucket = _init_firebase_storage()
-    if bucket is None:
-        logger.warning("Firebase Storage not available, skipping download")
-        return None
-
-    try:
-        blob = bucket.blob(storage_path)
-        if not blob.exists():
-            logger.warning("Blob does not exist: %s", storage_path)
-            return None
-        bytes_data = blob.download_as_bytes()
-        logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))
-
-        if dest_path:
-            Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
-            with open(dest_path, "wb") as f:
-                f.write(bytes_data)
-            logger.info("Saved to %s", dest_path)
-
-        return bytes_data
-    except Exception as e:
-        logger.error("Failed to download %s: %s", storage_path, e)
-        return None
-
-
-def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
-    """List all blobs under a prefix in Firebase Storage."""
-    _, bucket = _init_firebase_storage()
-    if bucket is None:
-        return []
-
-    blobs = bucket.list_blobs(prefix=prefix)
-    result = []
-    for blob in blobs:
-        if blob.name.endswith(".pdf"):
-            result.append({
-                "name": blob.name,
-                "size": blob.size,
-                "updated": str(blob.updated) if blob.updated else None,
-                "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
-            })
-    return result
-
-
-# NOTE: Curriculum guide PDFs (shaping papers) are stored in Firebase Storage
-# for system reference but are NOT included in RAG ingestion because they
-# contain only learning objectives and course descriptions — insufficient
-# content for lesson generation (typically <10 chunks each).
-#
-# Only SDO teaching modules (full lesson content with examples and problems)
-# are included in the RAG pipeline.
-
-PDF_METADATA: Dict[str, dict] = {
-    # General Mathematics Q1 — SDO Navotas teaching module (100 pages, ~117k chars)
-    "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 1,
-        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
-    },
-    # General Mathematics Q2 — Interest & Annuities modules (~27-35 pages each)
-    "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod1_simpleandcompoundinterests_v2.pdf",
-    },
-    "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod2_interestmaturityfutureandpresentvaluesinsimpleandcompoundinterests_v2.pdf",
-    },
-    "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod3_SolvingProblemsInvolvingSimpleandCompoundInterest_v2.pdf",
-    },
-    "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf": {
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "type": "sdo_module",
-        "content_domain": "general",
-        "quarter": 2,
-        "storage_path": "curriculum/general_math/genmath_q2_mod4_simpleandgeneralannuities_v2.pdf",
-    },
-    # Statistics and Probability — Full textbook (331 pages, ~607k chars)
-    "curriculum/stat_prob/Full.pdf": {
-        "subject": "Statistics and Probability",
-        "subjectId": "stats-prob",
-        "type": "sdo_module",
-        "content_domain": "statistics",
-        "quarter": 1,
-        "storage_path": "curriculum/stat_prob/Full.pdf",
-    },
-}
\ No newline at end of file
diff --git a/rag/pdf_ingestion.py b/rag/pdf_ingestion.py
deleted file mode 100644
index 906c4ffeede5f7a12faf4e6e90e3571310f157db..0000000000000000000000000000000000000000
--- a/rag/pdf_ingestion.py
+++ /dev/null
@@ -1,368 +0,0 @@
-"""
-PDF Ingestion Module for Quiz Battle RAG Question Bank.
-
-Ingests PDFs from Firebase Storage, extracts text, chunks content,
-generates embeddings, calls DeepSeek to produce base questions,
-and stores results in Firestore.
-"""
-
-import asyncio
-import hashlib
-import io
-import json
-import logging
-import os
-import random
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from typing import Optional
-
-from google.cloud.firestore import Client
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from sentence_transformers import SentenceTransformer
-import pypdf
-
-from rag.firebase_storage_loader import _init_firebase_storage
-from services.ai_client import get_deepseek_client, CHAT_MODEL
-
-logger = logging.getLogger(__name__)
-
-EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")
-DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
-
-
-@dataclass
-class IngestionResult:
-    """Result of a PDF ingestion operation."""
-
-    filename: str
-    processed: bool
-    question_count: int
-    grade_level: int
-    topic: str
-    storage_path: str
-    timestamp: datetime
-
-
-def _extract_filename(storage_path: str) -> str:
-    """Extract filename from a Firebase Storage path."""
-    return storage_path.split("/")[-1]
-
-
-def _generate_chunk_id(source_chunk_id: str, question_text: str) -> str:
-    """Generate a unique document ID for a question."""
-    return hashlib.md5(f"{source_chunk_id}:{question_text}".encode()).hexdigest()
-
-
-def _strip_json_fences(text: str) -> str:
-    """Strip markdown JSON fences from text."""
-    text = text.strip()
-    if text.startswith("```json"):
-        text = text[7:]
-    if text.startswith("```"):
-        text = text[3:]
-    if text.endswith("```"):
-        text = text[:-3]
-    return text.strip()
-
-
-async def _generate_questions_for_chunk(
-    chunk_text: str,
-    chunk_id: str,
-    topic: str,
-    grade_level: int,
-    deepseek_client,
-) -> list[dict]:
-    """Call DeepSeek to generate MCQs for a text chunk."""
-    system_prompt = (
-        "You are a DepEd-aligned math question generator for Filipino students. "
-        "Given a curriculum excerpt, generate 5 multiple-choice questions. "
-        "Return ONLY a JSON array. No markdown, no explanation."
-    )
-
-    user_prompt = f"""Given this curriculum excerpt:
-<chunk>
-{chunk_text}
-</chunk>
-
-Generate 5 multiple-choice questions. For each question output JSON:
-{{
-  "question": "...",
-  "choices": ["A) ...", "B) ...", "C) ...", "D) ..."],
-  "correct_answer": "A",
-  "explanation": "...",
-  "topic": "{topic}",
-  "difficulty": "easy|medium|hard",
-  "grade_level": {grade_level},
-  "source_chunk_id": "{chunk_id}"
-}}
-Return a JSON array only, no extra text."""
-
-    try:
-        response = deepseek_client.chat.completions.create(
-            model=CHAT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ],
-            temperature=0.7,
-        )
-        raw_response = response.choices[0].message.content
-        clean_response = _strip_json_fences(raw_response)
-        questions = json.loads(clean_response)
-        return questions if isinstance(questions, list) else []
-    except json.JSONDecodeError as e:
-        logger.error(f"Failed to parse DeepSeek response as JSON for chunk {chunk_id}: {e}")
-        return []
-    except Exception as e:
-        logger.error(f"Error calling DeepSeek for chunk {chunk_id}: {e}")
-        return []
-
-
-def _chunk_text(text: str) -> list[str]:
-    """Split text into chunks using RecursiveCharacterTextSplitter."""
-    splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=50,
-        length_function=len,
-        separators=["\n\n", "\n", " ", ""],
-    )
-    return splitter.split_text(text)
-
-
-def _extract_pdf_text(pdf_bytes: bytes) -> str:
-    """Extract text from PDF bytes using pypdf."""
-    reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
-    text_parts = []
-    for page in reader.pages:
-        text_parts.append(page.extract_text())
-    return "\n".join(text_parts)
-
-
-async def _save_questions_batch(
-    firestore_client: Client,
-    questions: list[dict],
-    grade_level: int,
-    topic: str,
-) -> int:
-    """Save questions to Firestore using batch writes. Returns count saved."""
-    batch = firestore_client.batch()
-    question_count = 0
-
-    for question in questions:
-        doc_id = question.get("id") or _generate_chunk_id(
-            question.get("source_chunk_id", ""),
-            question.get("question", ""),
-        )
-        doc_ref = firestore_client.collection("question_bank").document(
-            str(grade_level)
-        ).collection(topic).document("questions").collection("questions").document(doc_id)
-
-        doc_data = {
-            "question": question.get("question", ""),
-            "choices": question.get("choices", []),
-            "correct_answer": question.get("correct_answer", ""),
-            "explanation": question.get("explanation", ""),
-            "topic": question.get("topic", topic),
-            "difficulty": question.get("difficulty", "medium"),
-            "grade_level": question.get("grade_level", grade_level),
-            "source_chunk_id": question.get("source_chunk_id", ""),
-            "random_seed": random.random(),
-            "created_at": datetime.now(timezone.utc),
-        }
-        batch.set(doc_ref, doc_data)
-        question_count += 1
-
-        if question_count % 500 == 0:
-            await batch.commit()
-            batch = firestore_client.batch()
-
-    await batch.commit()
-    return question_count
-
-
-async def _save_embeddings_batch(
-    firestore_client: Client,
-    chunks: list[dict],
-    filename: str,
-) -> int:
-    """Save chunk embeddings to Firestore. Returns count saved."""
-    batch = firestore_client.batch()
-    count = 0
-
-    for chunk in chunks:
-        chunk_id = chunk["id"]
-        doc_ref = firestore_client.collection("question_bank_embeddings").document(chunk_id)
-        doc_data = {
-            "chunk_id": chunk_id,
-            "text": chunk["text"],
-            "embedding": chunk["embedding"],
-            "filename": filename,
-            "created_at": datetime.now(timezone.utc),
-        }
-        batch.set(doc_ref, doc_data)
-        count += 1
-
-        if count % 500 == 0:
-            await batch.commit()
-            batch = firestore_client.batch()
-
-    await batch.commit()
-    return count
-
-
-async def _save_processing_manifest(
-    firestore_client: Client,
-    filename: str,
-    question_count: int,
-    chunk_count: int,
-    grade_level: int,
-    topic: str,
-    storage_path: str,
-) -> None:
-    """Save processing manifest to Firestore."""
-    doc_ref = firestore_client.collection("pdf_processing_status").document(filename)
-    doc_data = {
-        "filename": filename,
-        "question_count": question_count,
-        "chunk_count": chunk_count,
-        "grade_level": grade_level,
-        "topic": topic,
-        "storage_path": storage_path,
-        "processed_at": datetime.now(timezone.utc),
-        "status": "completed",
-    }
-    await doc_ref.set(doc_data)
-
-
-async def ingest_pdf(
-    storage_path: str,
-    grade_level: int,
-    topic: str,
-    force_reingest: bool = False,
-) -> IngestionResult:
-    """
-    Ingest a PDF from Firebase Storage, generate questions, and store in Firestore.
-
-    Args:
-        storage_path: Path to PDF in Firebase Storage (e.g., "rag-pdfs/filename.pdf")
-        grade_level: Grade level (11 or 12)
-        topic: Topic identifier for the questions
-        force_reingest: If True, reprocess even if already processed
-
-    Returns:
-        IngestionResult with processing summary
-    """
-    filename = _extract_filename(storage_path)
-    project_id = os.getenv("FIREBASE_AUTH_PROJECT_ID", DEFAULT_FIREBASE_PROJECT)
-    firestore_client = Client(project=project_id)
-
-    # Step 1: Check if already processed
-    if not force_reingest:
-        status_ref = firestore_client.collection("pdf_processing_status").document(filename)
-        status_doc = await status_ref.get()
-        if status_doc.exists:
-            logger.info(f"PDF {filename} already processed, skipping (use force_reingest=True to override)")
-            data = status_doc.to_dict() or {}
-            return IngestionResult(
-                filename=filename,
-                processed=True,
-                question_count=data.get("question_count", 0),
-                grade_level=data.get("grade_level", grade_level),
-                topic=data.get("topic", topic),
-                storage_path=data.get("storage_path", storage_path),
-                timestamp=data.get("timestamp", datetime.now(timezone.utc)),
-            )
-
-    # Step 2: Download PDF from Firebase Storage
-    try:
-        _, bucket = _init_firebase_storage()
-        blob = bucket.blob(storage_path)
-        pdf_bytes = blob.download_as_bytes()
-    except Exception as e:
-        logger.error(f"Failed to download PDF from Firebase Storage: {e}")
-        return IngestionResult(
-            filename=filename,
-            processed=False,
-            question_count=0,
-            grade_level=grade_level,
-            topic=topic,
-            storage_path=storage_path,
-            timestamp=datetime.now(timezone.utc),
-        )
-
-    # Step 3: Extract text from PDF
-    try:
-        text = _extract_pdf_text(pdf_bytes)
-    except Exception as e:
-        logger.error(f"Failed to extract text from PDF: {e}")
-        return IngestionResult(
-            filename=filename,
-            processed=False,
-            question_count=0,
-            grade_level=grade_level,
-            topic=topic,
-            storage_path=storage_path,
-            timestamp=datetime.now(timezone.utc),
-        )
-
-    # Step 4: Chunk text
-    chunks = _chunk_text(text)
-
-    # Step 5: Generate embeddings
-    embedding_model = SentenceTransformer(EMBEDDING_MODEL)
-    chunk_ids = []
-    chunk_data = []
-
-    for i, chunk_text in enumerate(chunks):
-        chunk_id = hashlib.md5(f"{filename}:{i}:{chunk_text[:100]}".encode()).hexdigest()
-        embedding = embedding_model.encode(chunk_text).tolist()
-        chunk_ids.append(chunk_id)
-        chunk_data.append({
-            "id": chunk_id,
-            "text": chunk_text,
-            "embedding": embedding,
-        })
-
-    # Step 6: Initialize DeepSeek client
-    deepseek_client = get_deepseek_client()
-
-    # Step 7: Generate questions for each chunk
-    all_questions = []
-    for i, chunk_text in enumerate(chunks):
-        chunk_id = chunk_ids[i]
-        questions = await _generate_questions_for_chunk(
-            chunk_text, chunk_id, topic, grade_level, deepseek_client
-        )
-        for q in questions:
-            q["id"] = _generate_chunk_id(chunk_id, q.get("question", ""))
-        all_questions.extend(questions)
-
-    # Step 8: Save questions to Firestore
-    question_count = await _save_questions_batch(
-        firestore_client, all_questions, grade_level, topic
-    )
-
-    # Step 9: Save embeddings to Firestore
-    await _save_embeddings_batch(firestore_client, chunk_data, filename)
-
-    # Step 10: Save manifest to Firestore
-    await _save_processing_manifest(
-        firestore_client, filename, question_count, len(chunks),
-        grade_level, topic, storage_path
-    )
-
-    logger.info(
-        f"Completed ingestion for {filename}: {question_count} questions, "
-        f"{len(chunks)} chunks"
-    )
-
-    return IngestionResult(
-        filename=filename,
-        processed=True,
-        question_count=question_count,
-        grade_level=grade_level,
-        topic=topic,
-        storage_path=storage_path,
-        timestamp=datetime.now(timezone.utc),
-    )
diff --git a/rag/vectorstore_loader.py b/rag/vectorstore_loader.py
index efa28790f120ae16437d5ef9fbcf775db0a7be0d..f5aa29a30324f7c971909068900eae68773ed9c0 100644
--- a/rag/vectorstore_loader.py
+++ b/rag/vectorstore_loader.py
@@ -12,12 +12,6 @@ _VECTORSTORE_LOCK = Lock()
 _VECTORSTORE_SINGLETON: Tuple[Any, Any, SentenceTransformer] | None = None
 
 
-def reset_vectorstore_singleton() -> None:
-    global _VECTORSTORE_SINGLETON
-    with _VECTORSTORE_LOCK:
-        _VECTORSTORE_SINGLETON = None
-
-
 def _resolve_vectorstore_dir() -> Path:
     raw = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
     path = Path(raw)
@@ -34,7 +28,7 @@ def _resolve_vectorstore_dir() -> Path:
 
 def get_vectorstore_components(
     collection_name: str = "curriculum_chunks",
-    model_name: str = "BAAI/bge-base-en-v1.5",
+    model_name: str = "BAAI/bge-small-en-v1.5",
 ):
     global _VECTORSTORE_SINGLETON
     if _VECTORSTORE_SINGLETON is None:
@@ -43,10 +37,7 @@ def get_vectorstore_components(
                 vectorstore_dir = _resolve_vectorstore_dir()
                 vectorstore_dir.mkdir(parents=True, exist_ok=True)
                 client = chromadb.PersistentClient(path=str(vectorstore_dir))
-                collection = client.get_or_create_collection(
-                    name=collection_name,
-                    metadata={"hnsw:space": "cosine"},
-                )
+                collection = client.get_or_create_collection(name=collection_name)
                 embedder = SentenceTransformer(model_name)
                 _VECTORSTORE_SINGLETON = (client, collection, embedder)
     return _VECTORSTORE_SINGLETON
diff --git a/requirements.txt b/requirements.txt
index 4743f9beb6f1a885222134c1144239934f6a663e..ccffdb80efca67999188f05ce150acc4fc2b99ee 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
 fastapi>=0.104.0
 uvicorn[standard]>=0.24.0
-openai>=1.0.0
 huggingface-hub>=0.31.0
 requests>=2.31.0
 pandas==2.2.3
@@ -20,10 +19,3 @@ numpy==2.2.1
 firebase-admin>=6.2.0
 redis[hiredis]>=5.0.0
 PyYAML>=6.0.0
-mypy>=1.20.0
-pytest>=9.0.0
-pytest-asyncio>=0.23.0
-google-api-python-client>=2.0.0
-pypdf>=4.0.0
-slowapi>=0.1.0
-limits>=3.0.0
diff --git a/routes/admin_model_routes.py b/routes/admin_model_routes.py
deleted file mode 100644
index 6bf0205316b98614deb7557a85667eb91144d899..0000000000000000000000000000000000000000
--- a/routes/admin_model_routes.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException, Request
-from pydantic import BaseModel
-from services.inference_client import (
-    set_runtime_model_profile, set_runtime_model_override,
-    reset_runtime_overrides, get_current_runtime_config, _MODEL_PROFILES,
-)
-
-router = APIRouter(prefix="/api/admin/model-config", tags=["admin"])
-
-ALLOWED_OVERRIDE_KEYS = {
-    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-}
-
-
-def require_admin(request: Request):
-    user = getattr(request.state, "user", None)
-    if user is None:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    if user.role != "admin":
-        raise HTTPException(status_code=403, detail="Admin access required")
-    return user
-
-
-class ProfileSwitchRequest(BaseModel):
-    profile: str
-
-
-class OverrideRequest(BaseModel):
-    key: str
-    value: str
-
-
-@router.get("")
-def get_model_config(_admin=Depends(require_admin)):
-    return {
-        **get_current_runtime_config(),
-        "availableProfiles": list(_MODEL_PROFILES.keys()),
-        "profileDescriptions": {
-            "dev":    "deepseek-chat everywhere - fast, $0.14/M input",
-            "budget": "deepseek-chat for all tasks - minimal cost",
-            "prod":   "deepseek-reasoner for RAG, deepseek-chat for chat - best quality",
-        },
-    }
-
-
-@router.post("/profile")
-def switch_profile(req: ProfileSwitchRequest, _admin=Depends(require_admin)):
-    try:
-        set_runtime_model_profile(req.profile)
-        return {"success": True, "applied": get_current_runtime_config()}
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-
-
-@router.post("/override")
-def set_override(req: OverrideRequest, _admin=Depends(require_admin)):
-    if req.key not in ALLOWED_OVERRIDE_KEYS:
-        raise HTTPException(status_code=400, detail=f"Key '{req.key}' is not overridable.")
-    set_runtime_model_override(req.key, req.value)
-    return {"success": True, "applied": get_current_runtime_config()}
-
-
-@router.delete("/reset")
-def reset_to_env(_admin=Depends(require_admin)):
-    reset_runtime_overrides()
-    return {"success": True, "current": get_current_runtime_config()}
\ No newline at end of file
diff --git a/routes/admin_routes.py b/routes/admin_routes.py
deleted file mode 100644
index 5d83bee2196e17a8171c7e9ae9f9b6c18c975b3e..0000000000000000000000000000000000000000
--- a/routes/admin_routes.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from typing import Optional
-from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile, File, Form, BackgroundTasks
-from pydantic import BaseModel
-import logging
-
-from rag.firebase_storage_loader import _init_firebase_storage, PDF_METADATA
-from scripts.ingest_from_storage import ingest_from_firebase_storage
-
-logger = logging.getLogger("mathpulse.admin")
-
-router = APIRouter(prefix="/api/admin", tags=["admin"])
-
-def require_admin(request: Request):
-    user = getattr(request.state, "user", None)
-    if user is None:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    if user.role != "admin":
-        raise HTTPException(status_code=403, detail="Admin access required")
-    return user
-
-class ReingestRequest(BaseModel):
-    subjectId: Optional[str] = None
-    storagePath: Optional[str] = None
-
-@router.post("/upload-pdf")
-async def upload_pdf(
-    subjectId: str = Form(...),
-    subjectName: str = Form(...),
-    semester: int = Form(...),
-    quarter: int = Form(...),
-    file: UploadFile = File(...),
-    _admin=Depends(require_admin)
-):
-    if not file.filename.endswith('.pdf'):
-        raise HTTPException(status_code=400, detail="Only PDF files are allowed.")
-        
-    file_content = await file.read()
-    if len(file_content) > 50 * 1024 * 1024:
-        raise HTTPException(status_code=400, detail="File size exceeds 50MB limit.")
-        
-    _, bucket = _init_firebase_storage()
-    if not bucket:
-        raise HTTPException(status_code=500, detail="Firebase storage is not initialized.")
-        
-    storage_path = f"curriculum/{subjectId}/{file.filename}"
-    
-    try:
-        blob = bucket.blob(storage_path)
-        blob.upload_from_string(file_content, content_type="application/pdf")
-    except Exception as e:
-        logger.error(f"Failed to upload PDF: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to upload to Firebase Storage: {e}")
-        
-    # Update metadata in memory before reingesting
-    PDF_METADATA[storage_path] = {
-        "subject": subjectName,
-        "subjectId": subjectId,
-        "type": "uploaded_module",
-        "semester": semester,
-        "quarter": quarter
-    }
-    
-    # Reingest
-    try:
-        ingest_from_firebase_storage(force_reindex=True)
-    except Exception as e:
-        logger.error(f"Failed to trigger reingestion: {e}")
-        
-    storage_url = f"gs://{bucket.name}/{storage_path}"
-    return {
-        "success": True,
-        "chunkCount": 0,
-        "subjectId": subjectId,
-        "storageUrl": storage_url
-    }
-
-@router.post("/reingest-pdf")
-async def reingest_pdf(
-    req: Optional[ReingestRequest] = None,
-    _admin=Depends(require_admin)
-):
-    try:
-        ingest_from_firebase_storage(force_reindex=True)
-        return {"success": True, "message": "Reingestion triggered successfully."}
-    except Exception as e:
-        logger.error(f"Failed to reingest: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to reingest: {e}")
diff --git a/routes/curriculum_routes.py b/routes/curriculum_routes.py
deleted file mode 100644
index 9a12d4f63ced31573cb73ab269af5e3859436a58..0000000000000000000000000000000000000000
--- a/routes/curriculum_routes.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Optional
-
-from fastapi import APIRouter, HTTPException, Query
-from pydantic import BaseModel
-
-from services.curriculum_service import (
-    get_subject,
-    get_subjects,
-    get_topic,
-    get_topics,
-)
-
-logger = logging.getLogger("mathpulse.curriculum")
-router = APIRouter(prefix="/api/curriculum", tags=["curriculum"])
-
-
-class SubjectResponse(BaseModel):
-    id: str
-    code: str
-    name: str
-    gradeLevel: str
-    semester: str
-    color: str
-    pdfAvailable: bool
-    topics: list
-
-
-class TopicResponse(BaseModel):
-    id: str
-    name: str
-    unit: str
-
-
-@router.get("/subjects", response_model=list[SubjectResponse])
-async def list_subjects(grade_level: Optional[str] = Query(None, description="Filter by grade level (e.g., 'Grade 11', 'Grade 12')")):
-    """List all curriculum subjects, optionally filtered by grade level."""
-    subjects = get_subjects(grade_level)
-    return subjects
-
-
-@router.get("/subjects/{subject_id}", response_model=SubjectResponse)
-async def get_subject_by_id(subject_id: str):
-    """Get a specific subject by ID."""
-    subject = get_subject(subject_id)
-    if not subject:
-        raise HTTPException(status_code=404, detail=f"Subject not found: {subject_id}")
-    return subject
-
-
-@router.get("/subjects/{subject_id}/topics", response_model=list[TopicResponse])
-async def list_subject_topics(subject_id: str):
-    """List all topics for a subject."""
-    topics = get_topics(subject_id)
-    return topics
-
-
-@router.get("/subjects/{subject_id}/topics/{topic_id}", response_model=TopicResponse)
-async def get_topic_by_id(subject_id: str, topic_id: str):
-    """Get a specific topic."""
-    topic = get_topic(subject_id, topic_id)
-    if not topic:
-        raise HTTPException(status_code=404, detail=f"Topic not found: {subject_id}/{topic_id}")
-    return topic
\ No newline at end of file
diff --git a/routes/diagnostic.py b/routes/diagnostic.py
deleted file mode 100644
index caebf59307e94d920d09a5b4c886be46640d241d..0000000000000000000000000000000000000000
--- a/routes/diagnostic.py
+++ /dev/null
@@ -1,797 +0,0 @@
-"""
-MathPulse AI - Diagnostic Assessment Router
-POST /api/diagnostic/generate - Generate 15-item diagnostic test grounded in RAG curriculum
-POST /api/diagnostic/submit  - Score responses, run risk analysis, save to Firestore
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import time
-import traceback
-import uuid
-from collections import defaultdict
-from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional
-
-from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
-
-from services.ai_client import CHAT_MODEL, get_deepseek_client
-from rag.curriculum_rag import retrieve_curriculum_context
-import firebase_admin
-from firebase_admin import firestore as fs
-
-logger = logging.getLogger("mathpulse.diagnostic")
-
-router = APIRouter(prefix="/api/diagnostic", tags=["diagnostic"])
-
-# In-memory fallback session store (used if Firestore is unavailable)
-# This ensures assessment works even without Firebase credentials
-_in_memory_sessions: Dict[str, Dict[str, Any]] = defaultdict(dict)
-
-
-# ─── Pydantic Models ───────────────────────────────────────────────
-
-class DiagnosticGenerateRequest(BaseModel):
-    strand: str = Field(..., description="Student strand: ABM, STEM, HUMSS, GAS, TVL")
-    grade_level: str = Field(..., description="Grade level: Grade 11 or Grade 12")
-
-
-class DiagnosticOption(BaseModel):
-    A: str
-    B: str
-    C: str
-    D: str
-
-
-class DiagnosticQuestionStripped(BaseModel):
-    question_id: str
-    competency_code: str
-    domain: str
-    topic: str
-    difficulty: str
-    bloom_level: str
-    question_text: str
-    options: DiagnosticOption
-    curriculum_reference: str
-
-
-class DiagnosticGenerateResponse(BaseModel):
-    test_id: str
-    questions: List[DiagnosticQuestionStripped]
-    total_items: int
-    estimated_minutes: float
-
-
-class DiagnosticResponseItem(BaseModel):
-    question_id: str
-    student_answer: str
-    time_spent_seconds: int
-
-
-class DiagnosticSubmitRequest(BaseModel):
-    test_id: str
-    responses: List[DiagnosticResponseItem]
-
-
-class MasterySummary(BaseModel):
-    mastered: List[str]
-    developing: List[str]
-    beginning: List[str]
-
-
-class DiagnosticSubmitResponse(BaseModel):
-    success: bool
-    overall_risk: str
-    overall_score_percent: float
-    mastery_summary: MasterySummary
-    recommended_intervention: str
-    xp_earned: int
-    badge_unlocked: str
-    redirect_to: str
-
-
-# ─── Competency Code Registry ───────────────────────────────────────
-
-COMPETENCY_REGISTRY = {
-    "NA-WAGE-01": {"subject": "General Mathematics", "title": "Wages, Salaries, Overtime, Commissions, VAT"},
-    "NA-SEQ-01": {"subject": "General Mathematics", "title": "Arithmetic Sequences and Series"},
-    "NA-SEQ-02": {"subject": "General Mathematics", "title": "Geometric Sequences and Series"},
-    "NA-FUNC-01": {"subject": "General Mathematics", "title": "Functions, Relations, Vertical Line Test"},
-    "NA-FUNC-02": {"subject": "General Mathematics", "title": "Evaluating Functions, Operations, Composition"},
-    "NA-FUNC-03": {"subject": "General Mathematics", "title": "One-to-One Functions, Inverse Functions"},
-    "NA-EXP-01": {"subject": "General Mathematics", "title": "Exponential Functions, Equations, Inequalities"},
-    "NA-LOG-01": {"subject": "General Mathematics", "title": "Logarithmic Functions"},
-    "MG-TRIG-01": {"subject": "General Mathematics", "title": "Trigonometric Ratios, Right Triangles"},
-    "NA-FIN-01": {"subject": "General Mathematics", "title": "Compound Interest, Maturity Value"},
-    "NA-FIN-02": {"subject": "General Mathematics", "title": "Simple and General Annuities"},
-    "NA-FIN-04": {"subject": "General Mathematics", "title": "Business and Consumer Loans, Amortization"},
-    "NA-LOGIC-01": {"subject": "General Mathematics", "title": "Logical Propositions, Connectives, Truth Tables"},
-    "BM-FDP-01": {"subject": "Business Mathematics", "title": "Fractions, Decimals, Percent Conversions"},
-    "BM-FDP-02": {"subject": "Business Mathematics", "title": "Proportion: Direct, Inverse, Partitive"},
-    "BM-BUS-01": {"subject": "Business Mathematics", "title": "Markup, Margin, Trade Discounts, VAT"},
-    "BM-BUS-02": {"subject": "Business Mathematics", "title": "Profit, Loss, Break-even Point"},
-    "BM-COMM-01": {"subject": "Business Mathematics", "title": "Straight Commission, Salary Plus Commission"},
-    "BM-COMM-02": {"subject": "Business Mathematics", "title": "Commission on Cash and Installment Basis"},
-    "BM-SW-01": {"subject": "Business Mathematics", "title": "Salary vs. Wage, Income"},
-    "BM-SW-03": {"subject": "Business Mathematics", "title": "Mandatory Deductions: SSS, PhilHealth, Pag-IBIG"},
-    "BM-SW-04": {"subject": "Business Mathematics", "title": "Overtime Pay Computation (Labor Code)"},
-    "SP-RV-01": {"subject": "Statistics & Probability", "title": "Random Variables, Discrete vs. Continuous"},
-    "SP-RV-02": {"subject": "Statistics & Probability", "title": "Probability Distribution, Mean, Variance, SD"},
-    "SP-NORM-01": {"subject": "Statistics & Probability", "title": "Normal Curve Properties"},
-    "SP-NORM-02": {"subject": "Statistics & Probability", "title": "Z-Scores, Standard Normal Table"},
-    "SP-SAMP-01": {"subject": "Statistics & Probability", "title": "Types of Random Sampling"},
-    "SP-SAMP-03": {"subject": "Statistics & Probability", "title": "Central Limit Theorem"},
-    "SP-HYP-01": {"subject": "Statistics & Probability", "title": "Hypothesis Testing: H0 and Ha"},
-    "FM1-MAT-01": {"subject": "Finite Mathematics", "title": "Matrices and Matrix Operations"},
-    "FM2-PROB-01": {"subject": "Finite Mathematics", "title": "Counting Principles and Permutations"},
-    "FM2-PROB-02": {"subject": "Finite Mathematics", "title": "Combinations and Probability"},
-}
-
-LEARNING_PATH_ORDER: Dict[str, List[str]] = {
-    "BM": ["BM-FDP-01", "BM-FDP-02", "BM-BUS-01", "BM-BUS-02", "BM-COMM-01",
-           "BM-COMM-02", "BM-SW-01", "BM-SW-03", "BM-SW-04"],
-    "NA": ["NA-WAGE-01", "NA-SEQ-01", "NA-SEQ-02", "NA-FUNC-01", "NA-FUNC-02",
-           "NA-FUNC-03", "NA-EXP-01", "NA-LOG-01", "NA-FIN-01", "NA-FIN-02",
-           "NA-FIN-04", "NA-LOGIC-01"],
-    "SP": ["SP-RV-01", "SP-RV-02", "SP-NORM-01", "SP-NORM-02", "SP-SAMP-01",
-           "SP-SAMP-03", "SP-HYP-01"],
-}
-
-
-STRAND_SUBJECTS: Dict[str, List[str]] = {
-    "ABM": ["General Mathematics", "Business Mathematics"],
-    "STEM": ["General Mathematics", "Statistics and Probability"],
-    "HUMSS": ["General Mathematics"],
-    "GAS": ["General Mathematics"],
-    "TVL": ["General Mathematics"],
-}
-
-
-FULL_QUESTION_SCHEMA: Dict[str, List[str]] = {
-    "ABM": [
-        "General Mathematics: 5 items",
-        "Business Mathematics: 5 items",
-        "Statistics & Probability: 5 items",
-    ],
-    "STEM": [
-        "General Mathematics: 7 items",
-        "Statistics & Probability: 5 items",
-        "Finite Mathematics: 3 items",
-    ],
-    "HUMSS": ["General Mathematics: 15 items"],
-    "GAS": ["General Mathematics: 15 items"],
-    "TVL": ["General Mathematics: 15 items"],
-}
-
-STRAND_COVERAGE_TEXT: Dict[str, str] = {
-    "ABM": """FOR ABM STRAND:
-  - 5 questions: General Mathematics (NA-WAGE, NA-SEQ, NA-FIN topics -- wages, sequences, interest)
-  - 5 questions: Business Mathematics (BM-FDP, BM-BUS, BM-COMM, BM-SW topics -- percent, markup, commission, salaries, deductions using SSS/PhilHealth/Pag-IBIG rates)
-  - 5 questions: Statistics & Probability (SP-RV, SP-NORM topics -- random variables, normal distribution, z-scores)""",
-    "STEM": """FOR STEM STRAND:
-  - 7 questions: General Mathematics (NA-FUNC, NA-EXP, NA-LOG, MG-TRIG, NA-FIN -- functions, exponentials, trigonometry, financial math)
-  - 5 questions: Statistics & Probability (SP-RV, SP-NORM, SP-SAMP, SP-HYP -- distributions, sampling, hypothesis)
-  - 3 questions: Finite Mathematics (FM1-MAT or FM2-PROB -- matrices or counting/probability)""",
-    "HUMSS": """FOR HUMSS STRAND:
-  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
-    "GAS": """FOR GAS STRAND:
-  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
-    "TVL": """FOR TVL STRAND:
-  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
-}
-
-
-def _get_strand_coverage(strand: str) -> str:
-    return STRAND_COVERAGE_TEXT.get(strand.upper(), STRAND_COVERAGE_TEXT["STEM"])
-
-
-def _build_rag_context(strand: str) -> str:
-    subjects = STRAND_SUBJECTS.get(strand.upper(), ["General Mathematics"])
-    rag_context_parts: List[str] = []
-
-    rag_query = f"SHS {strand} diagnostic assessment competency questions Grade 11"
-
-    for subject in subjects:
-        try:
-            chunks = retrieve_curriculum_context(
-                query=rag_query,
-                subject=subject,
-                top_k=3,
-            )
-        except Exception as e:
-            logger.warning(f"[WARN] RAG unavailable for {subject}: {e}")
-            continue
-
-        if not chunks:
-            continue
-
-        chunk_texts: List[str] = []
-        for chunk in chunks:
-            source = chunk.get("source_file", "unknown")
-            content = str(chunk.get("content", ""))[:600]
-            chunk_texts.append(f"[Source: {source}]\n{content}")
-        rag_context_parts.append(
-            f"\n=== {subject.upper()} CURRICULUM REFERENCE ===\n" + "\n---\n".join(chunk_texts)
-        )
-
-    if not rag_context_parts:
-        logger.warning("[WARN] RAG unavailable for diagnostic generation -- proceeding without curriculum context")
-        return ""
-
-    return "\n".join(rag_context_parts)
-
-
-async def _get_previous_questions(
-    user_id: str,
-    firestore_client,
-    max_attempts: int = 3,
-) -> list[str]:
-    """Fetch question texts from the user's last N assessment attempts to avoid duplicates."""
-    try:
-        attempts_ref = (
-            firestore_client.collection("assessmentResults")
-            .document(user_id)
-            .collection("attempts")
-            .order_by("completedAt", direction=fs.Query.DESCENDING)
-            .limit(max_attempts)
-        )
-        docs = attempts_ref.stream()
-        previous_texts: list[str] = []
-        for doc in docs:
-            data = doc.to_dict()
-            answers = data.get("answers", [])
-            for a in answers:
-                previous_texts.append(a.get("questionText", ""))
-        return previous_texts
-    except Exception:
-        return []
-
-
-def _build_system_prompt(strand: str, grade_level: str, rag_context: str, variance_seed: int = 0, previous_questions: list[str] | None = None) -> str:
-    strand_upper = strand.upper()
-    coverage_text = _get_strand_coverage(strand_upper)
-
-    rag_block = ""
-    if rag_context:
-        rag_block = f"""
-OFFICIAL CURRICULUM REFERENCE (from indexed DepEd modules via RAG):
-{rag_context}
-
-IMPORTANT: Base ALL questions strictly on the curriculum content above.
-Do not invent formulas, definitions, or problem types not found in the
-reference material. If the reference material is insufficient for a topic,
-use only standard DepEd SHS competencies for that strand.
-"""
-
-    previous_block = ""
-    if previous_questions:
-        previous_lines = [
-            "PREVIOUS QUESTIONS TO AVOID (DO NOT REPEAT):",
-            "The following questions were already asked to this student.",
-            "You MUST NOT reuse or rephrase any of these:",
-        ]
-        for i, q in enumerate(previous_questions[:20], 1):
-            previous_lines.append(f"{i}. {q}")
-        previous_block = "\n".join(previous_lines) + "\n\n"
-
-    variance_block = ""
-    if variance_seed > 0:
-        variance_block = (
-            f"VARIANCE SEED: {variance_seed}\n"
-            "To ensure unique questions, use this seed to generate DIFFERENT "
-            "numerical values, problem contexts, and variable names compared "
-            "to the standard template.\n\n"
-        )
-
-    return f"""SYSTEM ROLE:
-You are MathPulse AI's Diagnostic Test Generator. Your job is to create a
-15-item multiple-choice diagnostic assessment for a Filipino SHS student,
-strictly grounded in the DepEd Strengthened SHS Curriculum (SDO Navotas
-modules and DepEd K-12 Curriculum Guides).
-
-STUDENT CONTEXT:
-- Strand: {strand_upper}
-- Grade Level: {grade_level}
-- Test Purpose: DIAGNOSTIC (pre-learning, not summative -- assess current
-  knowledge to build a personalized learning path)
-{rag_block}
-STRAND-SUBJECT COVERAGE:
-Generate 15 questions distributed across these subjects and domains:
-
-{coverage_text}
-
-COMPETENCY CODE FORMAT:
-Assign each question exactly one competency_code from this registry:
-General Math:    NA-WAGE-01, NA-SEQ-01, NA-SEQ-02, NA-FUNC-01,
-                 NA-FUNC-02, NA-FUNC-03, NA-EXP-01, NA-LOG-01,
-                 MG-TRIG-01, NA-FIN-01, NA-FIN-02, NA-FIN-04,
-                 NA-LOGIC-01
-Business Math:   BM-FDP-01, BM-FDP-02, BM-BUS-01, BM-BUS-02,
-                 BM-COMM-01, BM-COMM-02, BM-SW-01, BM-SW-03, BM-SW-04
-Statistics:      SP-RV-01, SP-RV-02, SP-NORM-01, SP-NORM-02,
-                 SP-SAMP-01, SP-SAMP-03, SP-HYP-01
-Finite Math:     FM1-MAT-01, FM2-PROB-01, FM2-PROB-02
-
-{previous_block}{variance_block}DIFFICULTY DISTRIBUTION (across all 15 questions):
-  - Easy   (Bloom: remembering / understanding): 6 questions (40%)
-  - Medium (Bloom: applying / analyzing):         6 questions (40%)
-  - Hard   (Bloom: evaluating / creating):        3 questions (20%)
-
-QUESTION RULES:
-1. All questions are 4-option multiple choice (A, B, C, D).
-2. Use Filipino real-life context: peso amounts, Filipino names
-   (Juan, Maria, Jose), Philippine institutions (SSS, PhilHealth,
-   Pag-IBIG, BIR, BDO, local schools, SM malls).
-3. Never use trick questions. Wrong options must be plausible but clearly
-   incorrect to a student who knows the concept.
-4. Include a solution_hint (1-2 sentences) -- this is for the backend
-   scoring engine ONLY. NEVER include it in the client response.
-5. Cover as many different competency codes as possible across 15 items.
-   Do not repeat the same competency code more than twice.
-
-OUTPUT FORMAT (strict JSON array, no extra text, no markdown):
-[
-  {{
-    "question_id": "DX-<uuid>",
-    "competency_code": "BM-SW-03",
-    "domain": "Business Mathematics",
-    "topic": "Mandatory Deductions",
-    "difficulty": "medium",
-    "bloom_level": "applying",
-    "question_text": "...",
-    "options": {{"A": "...", "B": "...", "C": "...", "D": "..."}},
-    "correct_answer": "C",
-    "solution_hint": "Compute SSS contribution using the prescribed table...",
-    "curriculum_reference": "SDO Navotas Bus. Math SHS 1st Sem - Salaries and Wages"
-  }}
-]
-"""
-
-
-async def _call_deepseek(system_prompt: str, user_message: str, temperature: float = 0.7) -> str:
-    try:
-        client = get_deepseek_client()
-        response = client.chat.completions.create(
-            model=CHAT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_message},
-            ],
-            temperature=temperature,
-            response_format={"type": "json_object"},
-        )
-        return response.choices[0].message.content or ""
-    except Exception as e:
-        logger.error(f"DeepSeek API error: {e}")
-        raise HTTPException(status_code=500, detail="AI model unavailable. Please try again later.")
-
-
-def _parse_questions_response(raw_response: str) -> List[Dict[str, Any]]:
-    try:
-        data = json.loads(raw_response)
-        if isinstance(data, dict):
-            for key in ("questions", "items", "data", "results"):
-                if key in data and isinstance(data[key], list):
-                    return data[key]
-            for key, value in data.items():
-                if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
-                    if "question_text" in value[0]:
-                        return value
-        if isinstance(data, list):
-            return data
-    except json.JSONDecodeError:
-        pass
-
-    import re
-    match = re.search(r'\[.*\]', raw_response, re.DOTALL)
-    if match:
-        try:
-            return json.loads(match.group())
-        except json.JSONDecodeError:
-            pass
-
-    raise ValueError("Could not parse questions from AI response")
-
-
-async def _generate_questions(
-    strand: str,
-    grade_level: str,
-    user_id: str = "",
-    firestore_client=None,
-) -> tuple[str, List[Dict[str, Any]]]:
-    test_id = f"DX-{uuid.uuid4().hex[:12]}"
-    
-    # Generate variance seed based on user's attempt count and fetch previous questions
-    variance_seed = 0
-    previous_questions: list[str] = []
-    
-    if firestore_client and user_id:
-        try:
-            attempts_ref = (
-                firestore_client.collection("assessmentResults")
-                .document(user_id)
-                .collection("attempts")
-            )
-            attempts = attempts_ref.stream()
-            attempt_count = sum(1 for _ in attempts)
-            variance_seed = int(time.time()) % 10000 + attempt_count * 137
-            previous_questions = await _get_previous_questions(user_id, firestore_client)
-        except Exception:
-            pass
-    
-    rag_context = _build_rag_context(strand)
-    system_prompt = _build_system_prompt(
-        strand,
-        grade_level,
-        rag_context,
-        variance_seed=variance_seed,
-        previous_questions=previous_questions,
-    )
-    user_message = f"Generate 15 diagnostic questions for a Grade 11 {strand} student."
-
-    for attempt in range(2):
-        temperature = 0.7 if attempt == 0 else 0.3
-        try:
-            raw_response = await _call_deepseek(system_prompt, user_message, temperature)
-            questions = _parse_questions_response(raw_response)
-            if questions:
-                return test_id, questions[:15]
-        except ValueError:
-            if attempt == 0:
-                logger.warning("Malformed JSON from DeepSeek, retrying with temperature=0.3")
-                continue
-            raise
-
-    raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
-
-
-async def _store_diagnostic_session(
-    firestore_client: Any,
-    user_id: str,
-    test_id: str,
-    strand: str,
-    grade_level: str,
-    questions: List[Dict[str, Any]],
-) -> bool:
-    try:
-        doc_ref = (
-            firestore_client.collection("diagnosticSessions")
-            .document(test_id)
-        )
-        doc_ref.set({
-            "testId": test_id,
-            "userId": user_id,
-            "generatedAt": fs.SERVER_TIMESTAMP,
-            "strand": strand,
-            "gradeLevel": grade_level,
-            "questions": questions,
-            "status": "in_progress",
-        })
-        return True
-    except Exception as e:
-        logger.error(f"Failed to store diagnostic session: {e}")
-        return False
-
-
-def _strip_answers(questions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    stripped = []
-    for q in questions:
-        stripped.append({
-            "question_id": q.get("question_id", ""),
-            "competency_code": q.get("competency_code", ""),
-            "domain": q.get("domain", ""),
-            "topic": q.get("topic", ""),
-            "difficulty": q.get("difficulty", ""),
-            "bloom_level": q.get("bloom_level", ""),
-            "question_text": q.get("question_text", ""),
-            "options": q.get("options", {}),
-            "curriculum_reference": q.get("curriculum_reference", ""),
-        })
-    return stripped
-
-
-# ─── ENDPOINT 1: Generate Diagnostic ────────────────────────────────
-
-@router.post("/generate", response_model=DiagnosticGenerateResponse)
-async def generate_diagnostic(request: DiagnosticGenerateRequest, req: Request):
-    user = getattr(req.state, "user", None)
-    if not user or not getattr(user, "uid", None):
-        raise HTTPException(status_code=401, detail="Authentication required")
-
-    try:
-        firestore_client = fs.client()
-        test_id, questions = await _generate_questions(
-            request.strand,
-            request.grade_level,
-            user_id=user.uid,
-            firestore_client=firestore_client,
-        )
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Generation error: {e}\n{traceback.format_exc()}")
-        raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
-
-    try:
-        stored = await _store_diagnostic_session(
-            firestore_client,
-            user.uid,
-            test_id,
-            request.strand,
-            request.grade_level,
-            questions,
-        )
-        if not stored:
-            raise HTTPException(status_code=503, detail="Session storage failed. Please try again.")
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Could not store diagnostic session: {e}")
-        raise HTTPException(status_code=503, detail="Database unavailable. Please try again.")
-
-    client_questions = _strip_answers(questions)
-
-    return DiagnosticGenerateResponse(
-        test_id=test_id,
-        questions=client_questions,
-        total_items=len(client_questions),
-        estimated_minutes=11.6,
-    )
-
-
-# ─── ENDPOINT 2: Submit and Evaluate ─────────────────────────────────
-
-def _score_responses(stored_questions: List[Dict[str, Any]], responses: List[DiagnosticResponseItem]) -> tuple:
-    question_map: Dict[str, Dict[str, Any]] = {}
-    for q in stored_questions:
-        question_map[q.get("question_id", "")] = q
-
-    scored = []
-    total_correct = 0
-    domain_correct: Dict[str, int] = {}
-    domain_total: Dict[str, int] = {}
-    comp_attempts: Dict[str, List[bool]] = {}
-
-    for resp in responses:
-        question = question_map.get(resp.question_id, {})
-        correct_answer = question.get("correct_answer", "")
-        is_correct = (resp.student_answer.strip().upper() == correct_answer.strip().upper())
-
-        domain = question.get("domain", "Unknown")
-        competency_code = question.get("competency_code", "")
-
-        if domain not in domain_correct:
-            domain_correct[domain] = 0
-            domain_total[domain] = 0
-        domain_total[domain] += 1
-        if is_correct:
-            domain_correct[domain] += 1
-            total_correct += 1
-
-        if competency_code not in comp_attempts:
-            comp_attempts[competency_code] = []
-        comp_attempts[competency_code].append(is_correct)
-
-        scored.append({
-            "question_id": resp.question_id,
-            "competency_code": competency_code,
-            "domain": domain,
-            "topic": question.get("topic", ""),
-            "difficulty": question.get("difficulty", ""),
-            "bloom_level": question.get("bloom_level", ""),
-            "student_answer": resp.student_answer,
-            "correct_answer": correct_answer,
-            "is_correct": is_correct,
-            "time_spent_seconds": resp.time_spent_seconds,
-        })
-
-    return scored, total_correct, domain_correct, domain_total, comp_attempts
-
-
-def _compute_domain_scores(domain_correct: Dict[str, int], domain_total: Dict[str, int]) -> Dict[str, Dict[str, Any]]:
-    domain_scores = {}
-    for domain in domain_total:
-        correct = domain_correct.get(domain, 0)
-        total = domain_total[domain]
-        pct = (correct / total * 100) if total > 0 else 0
-        mastery = "mastered" if pct >= 80 else "developing" if pct >= 60 else "beginning"
-        domain_scores[domain] = {
-            "correct": correct,
-            "total": total,
-            "percentage": round(pct, 1),
-            "mastery_level": mastery,
-        }
-    return domain_scores
-
-
-def _compute_risk_profile(
-    total_correct: int,
-    total_items: int,
-    scored_responses: List[Dict[str, Any]],
-    domain_scores: Dict[str, Dict[str, Any]],
-) -> Dict[str, Any]:
-    overall_pct = (total_correct / total_items * 100) if total_items > 0 else 0
-
-    mastered = [d for d, s in domain_scores.items() if s["mastery_level"] == "mastered"]
-    developing = [d for d, s in domain_scores.items() if s["mastery_level"] == "developing"]
-    beginning = [d for d, s in domain_scores.items() if s["mastery_level"] == "beginning"]
-
-    critical_gaps = []
-    for resp in scored_responses:
-        code = resp.get("competency_code", "")
-        if not code:
-            continue
-        attempts = [r for r in scored_responses if r.get("competency_code") == code]
-        if len(attempts) >= 2 and not any(r.get("is_correct") for r in attempts):
-            if code not in critical_gaps:
-                critical_gaps.append(code)
-
-    if overall_pct >= 75 and len(beginning) == 0:
-        overall_risk = "low"
-    elif overall_pct >= 55 or len(beginning) <= 2:
-        overall_risk = "moderate"
-    elif overall_pct >= 40 or len(beginning) <= 4:
-        overall_risk = "high"
-    else:
-        overall_risk = "critical"
-
-    suggested_path = []
-    for code in critical_gaps:
-        if code not in suggested_path:
-            suggested_path.append(code)
-    for domain in beginning:
-        for prefix in ["NA", "BM", "SP", "FM"]:
-            if domain.upper().startswith(prefix) or any(
-                s.upper().startswith(prefix) for s in [domain]
-            ):
-                for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
-                    if comp_code not in suggested_path:
-                        suggested_path.append(comp_code)
-                break
-    for domain in developing:
-        for prefix in ["NA", "BM", "SP", "FM"]:
-            if any(c.startswith(prefix) for c in COMPETENCY_REGISTRY):
-                for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
-                    if comp_code not in suggested_path:
-                        suggested_path.append(comp_code)
-
-    interventions = {
-        "low": "Great job! You have a solid foundation. Keep practicing to maintain your skills!",
-        "moderate": "You're making good progress. Focus on the topics where you need more practice. Kaya mo yan!",
-        "high": "Don't worry! With focused practice on your weak areas, you'll improve quickly.",
-        "critical": "Let's work on this together. Start with the basics and build up your confidence step by step.",
-    }
-
-    return {
-        "overall_risk": overall_risk,
-        "overall_score_percent": round(overall_pct, 1),
-        "mastery_summary": {
-            "mastered": mastered,
-            "developing": developing,
-            "beginning": beginning,
-        },
-        "weak_domains": beginning,
-        "critical_gaps": critical_gaps,
-        "recommended_intervention": interventions.get(overall_risk, interventions["moderate"]),
-        "suggested_learning_path": suggested_path[:20],
-    }
-
-
-async def _save_results(
-    firestore_client: Any,
-    user_id: str,
-    test_id: str,
-    strand: str,
-    grade_level: str,
-    scored_responses: List[Dict[str, Any]],
-    domain_scores: Dict[str, Dict[str, Any]],
-    risk_profile: Dict[str, Any],
-    total_correct: int,
-    total_items: int,
-) -> None:
-    try:
-        overall_pct = round(total_correct / total_items * 100, 1) if total_items > 0 else 0
-
-        firestore_client.collection("diagnosticResults").document(user_id).set({
-            "userId": user_id,
-            "testId": test_id,
-            "takenAt": fs.SERVER_TIMESTAMP,
-            "strand": strand,
-            "gradeLevel": grade_level,
-            "status": "completed",
-            "totalItems": total_items,
-            "totalScore": total_correct,
-            "percentageScore": overall_pct,
-            "responses": scored_responses,
-            "domainScores": domain_scores,
-            "riskProfile": risk_profile,
-        })
-
-        mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
-
-        firestore_client.collection("studentProgress").document(user_id).collection("stats").document("main").set({
-            "learning_path": risk_profile.get("suggested_learning_path", []),
-            "current_topic_index": 0,
-            "total_xp": fs.Increment(50 + mastered_count * 10),
-            "badges": fs.ArrayUnion(["first_assessment"]),
-            "topics_mastered": mastered_count,
-            "diagnostic_completed": True,
-            "overall_risk": risk_profile.get("overall_risk", "moderate"),
-        }, merge=True)
-
-        firestore_client.collection("diagnosticSessions").document(test_id).update({
-            "status": "completed",
-            "completedAt": fs.SERVER_TIMESTAMP,
-        })
-
-    except Exception as e:
-        logger.error(f"Firestore save error: {e}")
-        raise
-
-
-@router.post("/submit", response_model=DiagnosticSubmitResponse)
-async def submit_diagnostic(request: DiagnosticSubmitRequest, req: Request):
-    user = getattr(req.state, "user", None)
-    if not user or not getattr(user, "uid", None):
-        raise HTTPException(status_code=401, detail="Authentication required")
-
-    try:
-        firestore_client = fs.client()
-    except Exception as e:
-        raise HTTPException(status_code=503, detail="Database unavailable")
-
-    try:
-        session_doc = firestore_client.collection("diagnosticSessions").document(request.test_id).get()
-        if not session_doc.exists:
-            raise HTTPException(status_code=404, detail="Diagnostic session not found")
-
-        session_data = session_doc.to_dict() or {}
-        stored_questions = session_data.get("questions", [])
-        strand = session_data.get("strand", "STEM")
-        grade_level = session_data.get("gradeLevel", "Grade 11")
-
-        if not stored_questions:
-            raise HTTPException(status_code=400, detail="No questions found for this session")
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Session retrieval error: {e}")
-        raise HTTPException(status_code=500, detail="Failed to retrieve diagnostic session")
-
-    scored_responses, total_correct, domain_correct, domain_total, _ = _score_responses(
-        stored_questions, request.responses
-    )
-
-    total_items = len(stored_questions)
-    domain_scores = _compute_domain_scores(domain_correct, domain_total)
-    risk_profile = _compute_risk_profile(total_correct, total_items, scored_responses, domain_scores)
-
-    await _save_results(
-        firestore_client,
-        user.uid,
-        request.test_id,
-        strand,
-        grade_level,
-        scored_responses,
-        domain_scores,
-        risk_profile,
-        total_correct,
-        total_items,
-    )
-
-    mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
-
-    return DiagnosticSubmitResponse(
-        success=True,
-        overall_risk=risk_profile["overall_risk"],
-        overall_score_percent=risk_profile["overall_score_percent"],
-        mastery_summary=MasterySummary(**risk_profile["mastery_summary"]),
-        recommended_intervention=risk_profile["recommended_intervention"],
-        xp_earned=50 + mastered_count * 10,
-        badge_unlocked="first_assessment",
-        redirect_to="/dashboard",
-    )
diff --git a/routes/quiz_battle.py b/routes/quiz_battle.py
deleted file mode 100644
index 1f8eae3539f85f0c048d3b814b9b3d2175de267f..0000000000000000000000000000000000000000
--- a/routes/quiz_battle.py
+++ /dev/null
@@ -1,205 +0,0 @@
-"""
-Quiz Battle API Routes.
-
-Endpoints:
-- POST /api/quiz-battle/generate       → Generate varied questions for a battle session
-- POST /api/quiz-battle/ingest-pdf     → Trigger PDF ingestion (teacher/admin)
-- GET  /api/quiz-battle/bank-status    → List processed PDFs (teacher/admin)
-"""
-
-import os
-from typing import List, Optional, Dict, Any
-from datetime import datetime, timezone
-
-from fastapi import APIRouter, Request, HTTPException, Depends
-from pydantic import BaseModel, Field
-
-from rag.pdf_ingestion import ingest_pdf, IngestionResult
-from services.question_bank_service import get_questions_for_battle, cache_session_questions, get_cached_session
-from services.variance_engine import apply_variance
-
-router = APIRouter(prefix="/api/quiz-battle", tags=["quiz-battle"])
-
-
-# ── Pydantic Models ──────────────────────────────────────────────────
-
-class GenerateRequest(BaseModel):
-    grade_level: int = Field(..., ge=7, le=12)
-    topic: str = Field(..., min_length=1)
-    question_count: int = Field(default=10, ge=1, le=50)
-    session_id: str = Field(..., min_length=1)
-    player_ids: List[str] = Field(default_factory=list)
-
-
-class GenerateResponse(BaseModel):
-    questions: List[Dict[str, Any]]
-    session_id: str
-
-
-class IngestPdfRequest(BaseModel):
-    storage_path: str = Field(..., min_length=1)
-    grade_level: int = Field(..., ge=7, le=12)
-    topic: str = Field(..., min_length=1)
-    force_reingest: bool = False
-
-
-class IngestPdfResponse(BaseModel):
-    status: str
-    filename: str
-    question_count: int
-    grade_level: int
-    topic: str
-    storage_path: str
-    timestamp: datetime
-
-
-class BankStatusItem(BaseModel):
-    filename: str
-    processed: bool
-    timestamp: Optional[datetime]
-    question_count: int
-    grade_level: int
-    topic: str
-    storage_path: str
-
-
-class BankStatusResponse(BaseModel):
-    pdfs: List[BankStatusItem]
-
-
-# ── Helper ───────────────────────────────────────────────────────────
-
-def _get_current_user(request: Request):
-    user = getattr(request.state, "user", None)
-    if user is None:
-        raise HTTPException(status_code=401, detail="Authentication required")
-    return user
-
-
-def _is_internal_request(request: Request) -> bool:
-    """Check if request is from an internal service (Cloud Functions)."""
-    internal_secret = request.headers.get("X-Internal-Service")
-    expected = os.getenv("QUIZ_BATTLE_INTERNAL_SECRET")
-    if expected and internal_secret == expected:
-        return True
-    return False
-
-
-# ── Endpoints ────────────────────────────────────────────────────────
-
-@router.post("/generate", response_model=GenerateResponse)
-async def generate_questions(
-    body: GenerateRequest,
-    request: Request,
-):
-    """
-    Generate varied questions for a quiz battle session.
-
-    Returns questions with choices but WITHOUT correct_answer (unless called
-    by an internal service with X-Internal-Service header).
-    """
-    # 1. Fetch base questions
-    questions = await get_questions_for_battle(
-        body.grade_level,
-        body.topic,
-        body.question_count,
-    )
-
-    if not questions:
-        raise HTTPException(
-            status_code=404,
-            detail=f"No questions found for grade {body.grade_level}, topic '{body.topic}'",
-        )
-
-    # 2. Apply variance (with 24h cache)
-    varied = await apply_variance(questions, body.session_id)
-
-    # 3. Cache session metadata
-    await cache_session_questions(
-        body.session_id,
-        varied,
-        body.player_ids,
-        body.grade_level,
-        body.topic,
-    )
-
-    # 4. Prepare response
-    is_internal = _is_internal_request(request)
-    response_questions = []
-    for q in varied:
-        q_copy = dict(q)
-        if not is_internal:
-            q_copy.pop("correct_answer", None)
-        response_questions.append(q_copy)
-
-    return GenerateResponse(questions=response_questions, session_id=body.session_id)
-
-
-@router.post("/ingest-pdf", response_model=IngestPdfResponse)
-async def ingest_pdf_endpoint(
-    body: IngestPdfRequest,
-    user=Depends(_get_current_user),
-):
-    """
-    Trigger PDF ingestion into the question bank.
-
-    Requires teacher or admin role.
-    """
-    if user.role not in ("teacher", "admin"):
-        raise HTTPException(status_code=403, detail="Teacher or admin access required")
-
-    try:
-        result = await ingest_pdf(
-            storage_path=body.storage_path,
-            grade_level=body.grade_level,
-            topic=body.topic,
-            force_reingest=body.force_reingest,
-        )
-    except FileNotFoundError as e:
-        raise HTTPException(status_code=404, detail=str(e))
-    except ValueError as e:
-        raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Ingestion failed: {str(e)}")
-
-    return IngestPdfResponse(
-        status="processed" if result.processed else "skipped",
-        filename=result.filename,
-        question_count=result.question_count,
-        grade_level=result.grade_level,
-        topic=result.topic,
-        storage_path=result.storage_path,
-        timestamp=result.timestamp,
-    )
-
-
-@router.get("/bank-status", response_model=BankStatusResponse)
-async def bank_status(
-    user=Depends(_get_current_user),
-):
-    """
-    Get the status of all processed PDFs in the question bank.
-
-    Requires teacher or admin role.
-    """
-    if user.role not in ("teacher", "admin"):
-        raise HTTPException(status_code=403, detail="Teacher or admin access required")
-
-    from google.cloud import firestore
-    db = firestore.Client(project=os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026"))
-
-    docs = db.collection("pdf_processing_status").stream()
-    pdfs = []
-    for doc in docs:
-        data = doc.to_dict()
-        pdfs.append(BankStatusItem(
-            filename=doc.id,
-            processed=data.get("processed", False),
-            timestamp=data.get("timestamp"),
-            question_count=data.get("question_count", 0),
-            grade_level=data.get("grade_level", 0),
-            topic=data.get("topic", ""),
-            storage_path=data.get("storage_path", ""),
-        ))
-
-    return BankStatusResponse(pdfs=pdfs)
diff --git a/routes/quiz_generation_routes.py b/routes/quiz_generation_routes.py
deleted file mode 100644
index 61708ed06300e68febfcda106af75a6df6bdac80..0000000000000000000000000000000000000000
--- a/routes/quiz_generation_routes.py
+++ /dev/null
@@ -1,356 +0,0 @@
-"""
-Unified Quiz Generation Routes.
-
-Generates dynamic quiz questions using DeepSeek AI + RAG curriculum context.
-Used by: lesson practice quizzes, module quizzes, and quiz battle.
-
-When new PDFs are ingested into the vectorstore, this endpoint automatically
-picks up the new content via RAG retrieval.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import random
-import re
-from typing import Any, Dict, List, Optional
-
-from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
-
-from rag.curriculum_rag import (
-    retrieve_curriculum_context,
-    summarize_retrieval_confidence,
-)
-from services.inference_client import (
-    InferenceRequest,
-    create_default_client,
-    get_model_for_task,
-)
-
-logger = logging.getLogger("mathpulse.quiz_generation")
-router = APIRouter(prefix="/api/quiz", tags=["quiz-generation"])
-
-_inference_client = None
-
-
-def _get_inference_client():
-    global _inference_client
-    if _inference_client is None:
-        _inference_client = create_default_client()
-    return _inference_client
-
-
-# ── Request/Response Models ────────────────────────────────────────────
-
-class QuizGenerationRequest(BaseModel):
-    topic: str = Field(..., min_length=1, description="Lesson topic or competency")
-    subject: str = Field(..., min_length=1, description="Subject name (e.g., 'General Mathematics')")
-    lessonTitle: Optional[str] = Field(default=None, description="Full lesson title")
-    questionCount: int = Field(default=6, ge=1, le=20, description="Number of questions to generate")
-    questionTypes: List[str] = Field(
-        default=["multiple-choice", "true-false", "fill-in-blank"],
-        description="Question types to include",
-    )
-    difficulty: str = Field(default="medium", pattern="^(easy|medium|hard)$")
-    quarter: Optional[int] = Field(default=1, ge=1, le=4)
-    moduleId: Optional[str] = Field(default=None)
-    lessonId: Optional[str] = Field(default=None)
-    competencyCode: Optional[str] = Field(default=None)
-    storagePath: Optional[str] = Field(default=None)
-    userId: Optional[str] = Field(default=None)
-    varianceSeed: Optional[int] = Field(default=None, description="Random seed for variance across generations")
-
-
-class QuizQuestion(BaseModel):
-    id: int
-    type: str
-    question: str
-    options: Optional[List[str]] = None
-    correctAnswer: str
-    explanation: str
-
-
-class QuizGenerationResponse(BaseModel):
-    questions: List[QuizQuestion]
-    retrievalConfidence: Dict[str, Any]
-    sourceChunks: int
-    generatedAt: str
-
-
-# ── Prompt Builder ─────────────────────────────────────────────────────
-
-def _build_quiz_generation_prompt(
-    topic: str,
-    subject: str,
-    lesson_title: Optional[str],
-    question_count: int,
-    question_types: List[str],
-    difficulty: str,
-    retrieved_context: str,
-    variance_seed: Optional[int] = None,
-) -> str:
-    """Build the DeepSeek prompt for quiz generation with variance."""
-
-    # Build variance instruction based on seed
-    variance_instruction = ""
-    if variance_seed is not None:
-        variance_instruction = f"""
-8. VARIANCE REQUIREMENT: Use seed {variance_seed} to ensure variety. Generate DIFFERENT questions each time.
-   - Paraphrase concepts in fresh ways
-   - Use different numerical values and scenarios
-   - Vary question phrasing and structure
-   - Avoid repeating similar question patterns"""
-
-    return f"""You are a DepEd-aligned mathematics quiz generator for Filipino Senior High School students (Grades 11-12).
-
-Given the following curriculum context about "{topic}" from {subject}, generate {question_count} {difficulty}-difficulty quiz questions.
-
-## Retrieved Curriculum Context
-{retrieved_context}
-
-## Instructions
-1. Generate exactly {question_count} questions covering the topic above.
-2. Question types to use: {', '.join(question_types)}
-3. DISTRIBUTION (for {question_count} questions):
-   - 2 items: Recall and Basics (simple recall, definitions, fundamental facts)
-   - 4 items: Direct Application (real-world context with pesos, jeepney, sari-sari store, etc.)
-   - 3 items: Mixed/Interleaved Problems (combine concepts, multi-step reasoning)
-   - 1 item: Metacognitive/Reflective (explain reasoning, justify approach, identify errors)
-4. Difficulty: {difficulty} — appropriate for Grade 11-12 Filipino STEM students.
-5. Use Filipino-localized context where possible (pesos, jeepney, barangay, sari-sari store, etc.).
-6. Each question must be mathematically accurate and curriculum-aligned.
-7. Provide clear explanations for the correct answer.{variance_instruction}
-
-## Question Type Rules
-- multiple-choice: 4 options (A/B/C/D format), exactly one correct answer
-- true-false: statement that is either True or False
-- fill-in-blank: question with a single numeric or short text answer
-
-## Output Format
-Return ONLY a valid JSON array. No markdown, no extra text. Format:
-[
-  {{
-    "type": "multiple-choice",
-    "question": "What is the derivative of f(x) = x³?",
-    "options": ["2x²", "3x²", "x²", "3x"],
-    "correctAnswer": "3x²",
-    "explanation": "Using the power rule: d/dx(xⁿ) = nxⁿ⁻¹. So d/dx(x³) = 3x²."
-  }},
-  {{
-    "type": "true-false",
-    "question": "The sum of angles in a triangle is 180 degrees.",
-    "options": ["True", "False"],
-    "correctAnswer": "True",
-    "explanation": "By the triangle angle sum theorem, the interior angles of any Euclidean triangle sum to 180°."
-  }},
-  {{
-    "type": "fill-in-blank",
-    "question": "If f(x) = 2x + 3, then f(4) = ___",
-    "options": null,
-    "correctAnswer": "11",
-    "explanation": "Substitute x = 4: f(4) = 2(4) + 3 = 8 + 3 = 11."
-  }}
-]
-
-IMPORTANT:
-- Return ONLY the JSON array, no other text
-- Ensure correctAnswer exactly matches one of the options (for MC/TF)
-- For fill-in-blank, correctAnswer is the exact text that fills the blank
-- Generate FRESH, VARIED questions - no two questions should be identical or nearly identical
-- Questions should feel like they were created independently, not templated"""
-
-
-# ── Response Parser ────────────────────────────────────────────────────
-
-def _parse_quiz_response(text: str, expected_count: int) -> List[Dict[str, Any]]:
-    """Parse and validate DeepSeek quiz generation response."""
-    cleaned = text.strip()
-
-    # Strip markdown fences
-    cleaned = re.sub(r"^```json\s*", "", cleaned, flags=re.IGNORECASE)
-    cleaned = re.sub(r"^```\s*", "", cleaned)
-    cleaned = re.sub(r"\s*```$", "", cleaned)
-    cleaned = cleaned.strip()
-
-    try:
-        questions = json.loads(cleaned)
-    except json.JSONDecodeError as e:
-        logger.error(f"Failed to parse quiz response as JSON: {e}")
-        # Try to extract JSON array from text
-        match = re.search(r"\[.*\]", cleaned, re.DOTALL)
-        if match:
-            try:
-                questions = json.loads(match.group())
-            except json.JSONDecodeError:
-                raise ValueError(f"Invalid JSON in quiz response: {e}")
-        else:
-            raise ValueError(f"No JSON array found in quiz response")
-
-    if not isinstance(questions, list):
-        raise ValueError("Quiz response is not a JSON array")
-
-    validated = []
-    for i, q in enumerate(questions):
-        if not isinstance(q, dict):
-            continue
-
-        # Ensure required fields
-        if "question" not in q or "correctAnswer" not in q:
-            continue
-
-        # Normalize field names
-        normalized = {
-            "id": i + 1,
-            "type": q.get("type", "multiple-choice"),
-            "question": q["question"],
-            "correctAnswer": q["correctAnswer"],
-            "explanation": q.get("explanation", ""),
-        }
-
-        # Handle options
-        if "options" in q and q["options"]:
-            normalized["options"] = q["options"]
-        elif "choices" in q and q["choices"]:
-            normalized["options"] = q["choices"]
-        else:
-            # For true-false, auto-populate options
-            if normalized["type"] == "true-false":
-                normalized["options"] = ["True", "False"]
-            else:
-                normalized["options"] = None
-
-        validated.append(normalized)
-
-    if len(validated) < min(expected_count, 3):
-        raise ValueError(f"Only {len(validated)} valid questions parsed, expected at least {min(expected_count, 3)}")
-
-    return validated[:expected_count]
-
-
-# ── Variance Application ───────────────────────────────────────────────
-
-def _apply_variance(questions: List[Dict[str, Any]], seed: int) -> List[Dict[str, Any]]:
-    """Apply deterministic variance to questions (shuffle choices, etc.)."""
-    rng = random.Random(seed)
-
-    for q in questions:
-        # Shuffle multiple-choice options while tracking correct answer
-        if q.get("type") == "multiple-choice" and q.get("options"):
-            options = q["options"].copy()
-            correct = q["correctAnswer"]
-
-            # Only shuffle if correct answer is in options
-            if correct in options:
-                rng.shuffle(options)
-                q["options"] = options
-                q["correctAnswer"] = correct  # Keep original correct answer text
-
-    return questions
-
-
-# ── Endpoints ──────────────────────────────────────────────────────────
-
-@router.post("/generate", response_model=QuizGenerationResponse)
-async def generate_quiz(request: QuizGenerationRequest):
-    """
-    Generate a dynamic quiz using DeepSeek AI + RAG curriculum context.
-
-    This endpoint retrieves relevant curriculum chunks from the vectorstore,
-    then calls DeepSeek to generate varied quiz questions based on that context.
-    When new PDFs are ingested, they automatically become available via RAG.
-    """
-    try:
-        # 1. Retrieve curriculum context via RAG
-        query = request.lessonTitle or request.topic
-        chunks = retrieve_curriculum_context(
-            query=query,
-            subject=request.subject,
-            quarter=request.quarter,
-            module_id=request.moduleId,
-            lesson_id=request.lessonId,
-            competency_code=request.competencyCode,
-            storage_path=request.storagePath,
-            top_k=8,
-        )
-
-        if not chunks:
-            logger.warning(f"No curriculum chunks found for topic '{request.topic}' in subject '{request.subject}'")
-            raise HTTPException(
-                status_code=404,
-                detail=f"No curriculum content found for topic '{request.topic}'. Please ensure PDFs are ingested.",
-            )
-
-        # Shuffle retrieved chunks for variance BEFORE formatting prompt context
-        # This ensures different lessons → different curriculum context → different generated questions
-        seed = request.varianceSeed if request.varianceSeed else hash(f"{request.topic}:{request.subject}:{request.lessonTitle or ''}:{request.userId or 'anon'}") % (2**32)
-        rng = random.Random(seed)
-        rng.shuffle(chunks)  # In-place shuffle for deterministic variety per seed
-
-        # Format retrieved chunks for the prompt
-        formatted_context = "\n\n---\n\n".join(
-            f"[Source: {chunk.get('metadata', {}).get('source_file', 'Unknown')}, Page {chunk.get('metadata', {}).get('page', 'N/A')}]\n{chunk.get('document', '')}"
-            for chunk in chunks
-        )
-
-        confidence = summarize_retrieval_confidence(chunks)
-
-        # 2. Build generation prompt
-        prompt = _build_quiz_generation_prompt(
-            topic=request.topic,
-            subject=request.subject,
-            lesson_title=request.lessonTitle,
-            question_count=request.questionCount,
-            question_types=request.questionTypes,
-            difficulty=request.difficulty,
-            retrieved_context=formatted_context,
-            variance_seed=request.varianceSeed,
-        )
-
-        # 3. Call DeepSeek with higher temperature for variance
-        inference_request = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You are a precise DepEd-aligned curriculum quiz generator. Generate FRESH, VARIED questions each time - do not repeat patterns."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="quiz_generation",
-            max_new_tokens=3000,
-            temperature=0.7,  # Higher temp for variance
-            top_p=0.9,
-        )
-
-        raw_response = _get_inference_client().generate_from_messages(inference_request)
-
-        # 4. Parse response
-        questions = _parse_quiz_response(raw_response, request.questionCount)
-
-        # 5. Apply variance (shuffle options) with user-based seed for consistency
-        seed = request.varianceSeed if request.varianceSeed else hash(f"{request.topic}:{request.subject}:{request.lessonTitle or ''}:{request.userId or 'anon'}") % (2**32)
-        varied_questions = _apply_variance(questions, seed)
-
-        # 6. Build response
-        return QuizGenerationResponse(
-            questions=[QuizQuestion(**q) for q in varied_questions],
-            retrievalConfidence=confidence,
-            sourceChunks=len(chunks),
-            generatedAt=__import__("datetime").datetime.now(__import__("datetime").timezone.utc).isoformat(),
-        )
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.exception("Quiz generation failed")
-        raise HTTPException(status_code=500, detail=f"Quiz generation failed: {str(e)}")
-
-
-@router.get("/health")
-async def quiz_generation_health():
-    """Check quiz generation service health."""
-    model = get_model_for_task("quiz_generation")
-    return {
-        "status": "ok",
-        "activeModel": model,
-        "endpoint": "/api/quiz/generate",
-        "features": ["rag-retrieval", "deepseek-generation", "choice-shuffling", "auto-pdf-updates"],
-    }
diff --git a/routes/rag_routes.py b/routes/rag_routes.py
index f82e3093fa2c9e21016a4a050853e8f90e4fcab9..2137c0a5a944938ab6ff1d2bb00e9d0c7717c3ef 100644
--- a/routes/rag_routes.py
+++ b/routes/rag_routes.py
@@ -2,8 +2,6 @@ from __future__ import annotations
 
 import json
 import logging
-import os
-import re
 from datetime import datetime, timezone
 from threading import Lock
 from typing import Any, Dict, List, Optional
@@ -11,28 +9,21 @@ from typing import Any, Dict, List, Optional
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field
 
-from services.inference_client import (
-    InferenceRequest,
-    create_default_client,
-    is_sequential_model,
-    get_model_for_task,
-)
+from services.inference_client import InferenceRequest, create_default_client
 from rag.curriculum_rag import (
     build_analysis_curriculum_context,
     build_lesson_prompt,
     build_lesson_query,
     build_problem_generation_prompt,
-    format_retrieved_chunks,
     retrieve_curriculum_context,
-    retrieve_lesson_pdf_context,
     summarize_retrieval_confidence,
 )
-from rag.vectorstore_loader import get_vectorstore_health, reset_vectorstore_singleton
+from rag.vectorstore_loader import get_vectorstore_health
 
 try:
-    from firebase_admin import firestore as firebase_firestore
+    from firebase_admin import firestore as firebase_firestore  # type: ignore[import-not-found]
 except Exception:
-    firebase_firestore = None
+    firebase_firestore = None  # type: ignore[assignment]
 
 logger = logging.getLogger("mathpulse.rag")
 router = APIRouter(prefix="/api/rag", tags=["rag"])
@@ -50,12 +41,7 @@ def _get_inference_client():
     return _inference_client
 
 
-async def _generate_text(
-    prompt: str,
-    task_type: str,
-    max_new_tokens: int = 900,
-    enable_thinking: bool = False,
-) -> str:
+async def _generate_text(prompt: str, task_type: str, max_new_tokens: int = 900) -> str:
     request = InferenceRequest(
         messages=[
             {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
@@ -65,7 +51,6 @@ async def _generate_text(
         max_new_tokens=max_new_tokens,
         temperature=0.2,
         top_p=0.9,
-        enable_thinking=enable_thinking,
     )
     return _get_inference_client().generate_from_messages(request)
 
@@ -103,21 +88,6 @@ def _log_rag_usage(
         logger.warning("rag_usage logging skipped: %s", exc)
 
 
-def _strip_thinking_and_parse(text: str) -> dict:
-    cleaned = text.strip()
-    cleaned = re.sub(r" </think>", "", cleaned, flags=re.DOTALL).strip()
-    if "{" in cleaned and "}" in cleaned:
-        try:
-            start = cleaned.find("{")
-            end = cleaned.rfind("}") + 1
-            parsed = json.loads(cleaned[start:end])
-            if isinstance(parsed, dict):
-                return parsed
-        except Exception:
-            pass
-    return {"explanation": text}
-
-
 class RagLessonRequest(BaseModel):
     topic: str
     subject: str
@@ -127,10 +97,6 @@ class RagLessonRequest(BaseModel):
     moduleUnit: Optional[str] = None
     learnerLevel: Optional[str] = None
     userId: Optional[str] = None
-    moduleId: Optional[str] = None
-    lessonId: Optional[str] = None
-    competencyCode: Optional[str] = None
-    storagePath: Optional[str] = None
 
 
 class RagProblemRequest(BaseModel):
@@ -149,8 +115,6 @@ class RagAnalysisContextRequest(BaseModel):
 
 @router.get("/health")
 async def rag_health():
-    active_model = get_model_for_task("rag_lesson")
-    is_seq = is_sequential_model(active_model)
     try:
         health = get_vectorstore_health()
         return {
@@ -158,8 +122,6 @@ async def rag_health():
             "chunkCount": health["chunkCount"],
             "subjects": health["subjects"],
             "lastIngested": datetime.now(timezone.utc).isoformat(),
-            "activeModel": active_model,
-            "isSequentialModel": is_seq,
         }
     except Exception as exc:
         return {
@@ -167,273 +129,68 @@ async def rag_health():
             "chunkCount": 0,
             "subjects": {},
             "lastIngested": None,
-            "activeModel": active_model,
-            "isSequentialModel": is_seq,
             "warning": str(exc),
         }
 
 
-def _fetch_youtube_videos(
-    lesson_title: str,
-    subject: str,
-    competency: str,
-    quarter: int,
-    lesson_id: Optional[str] = None,
-) -> List[Dict]:
-    """Fetch up to 3 relevant YouTube videos for a lesson."""
-    try:
-        from services.youtube_service import get_video_search_results
-    except ImportError:
-        return []
-    try:
-        result = get_video_search_results(
-            topic=lesson_title,
-            subject=subject,
-            lesson_context=competency,
-            grade_level=f"Grade {quarter + 10}",
-            lesson_id=lesson_id,
-            max_results=3,
-        )
-        return result.get("videos", [])
-    except Exception as e:
-        logger.warning("YouTube video search failed: %s", e)
-        return []
-
-
-def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
-    sections = lesson_data.get("sections", [])
-    section_types = {s.get("type") for s in sections}
-    required = ["introduction", "key_concepts", "video", "worked_examples", "important_notes", "try_it_yourself", "summary"]
-
-    default_content = {
-        "introduction": {"type": "introduction", "title": "Introduction", "content": f"Welcome to the lesson on {lesson_title}. This topic builds foundational skills for your mathematics journey."},
-        "key_concepts": {"type": "key_concepts", "title": "Key Concepts", "content": f"The following key concepts are essential for mastering {lesson_title}:", "callouts": [{"type": "important", "text": "Review the curriculum PDF for detailed explanations of each concept."}]},
-        "video": {"type": "video", "title": "Video Lesson", "content": "Watch the video explanation below to understand the concepts visually.", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},
-        "worked_examples": {"type": "worked_examples", "title": "Worked Examples", "examples": [{"problem": f"Sample problem for {lesson_title}", "steps": ["Step 1: Identify the given information.", "Step 2: Apply the appropriate formula or method.", "Step 3: Solve step-by-step.", "Step 4: Verify your answer."], "answer": "Solution will vary based on specific problem parameters."}]},
-        "important_notes": {"type": "important_notes", "title": "Important Notes", "bulletPoints": [f"Always read problems carefully before solving {lesson_title} questions.", "Check your units and ensure consistency throughout calculations.", "Practice regularly to build fluency with these concepts."]},
-        "try_it_yourself": {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": [{"question": f"Practice applying {lesson_title} concepts. Solve a similar problem from your textbook or worksheets.", "solution": "Compare your solution with the worked examples above. If stuck, re-read the key concepts section or ask your teacher for guidance."}]},
-        "summary": {"type": "summary", "title": "Summary", "content": f"In this lesson on {lesson_title}, you explored key concepts, worked through examples, and practiced problem-solving techniques. Continue reviewing these materials and seek additional practice to strengthen your understanding."},
-    }
-
-    def _is_section_blank(section: dict, s_type: str) -> bool:
-        """Check if a section has effectively no content."""
-        if not section:
-            return True
-        text_content = (section.get("content") or "").strip()
-        if s_type in ("introduction", "key_concepts", "video", "summary"):
-            return len(text_content) < 10
-        if s_type == "worked_examples":
-            examples = section.get("examples") or []
-            return not examples or all(not (ex.get("problem") or "").strip() for ex in examples)
-        if s_type == "important_notes":
-            bullets = section.get("bulletPoints") or []
-            return not bullets or all(not (b or "").strip() for b in bullets)
-        if s_type == "try_it_yourself":
-            problems = section.get("practiceProblems") or []
-            return not problems or all(not (p.get("question") or "").strip() for p in problems)
-        return False
-
-    filled = {}
-    for req_type in required:
-        for existing in sections:
-            if existing.get("type") == req_type:
-                filled[req_type] = existing
-                break
-        else:
-            filled[req_type] = default_content[req_type]
-
-    # Validate and replace blank sections with defaults
-    for req_type in required:
-        if _is_section_blank(filled[req_type], req_type):
-            filled[req_type] = default_content[req_type]
-
-    ordered = [filled[t] for t in required]
-
-    for i, section in enumerate(ordered):
-        s_type = section.get("type")
-        if s_type == "key_concepts" and not section.get("callouts"):
-            section["callouts"] = []
-        if s_type == "worked_examples" and not section.get("examples"):
-            section["examples"] = []
-        if s_type == "important_notes" and not section.get("bulletPoints"):
-            section["bulletPoints"] = []
-        if s_type == "try_it_yourself" and not section.get("practiceProblems"):
-            section["practiceProblems"] = []
-        ordered[i] = section
-
-    return {**lesson_data, "sections": ordered}
-
-
 @router.post("/lesson")
 async def rag_lesson(request: Request, payload: RagLessonRequest):
-    # ── Step 1: Retrieve curriculum chunks ───────────────────────────────────
-    try:
-        chunks, retrieval_mode = retrieve_lesson_pdf_context(
-            topic=build_lesson_query(
-                payload.topic,
-                payload.subject,
-                payload.quarter,
-                lesson_title=payload.lessonTitle,
-                competency=payload.learningCompetency,
-                module_unit=payload.moduleUnit,
-                learner_level=payload.learnerLevel,
-            ),
-            subject=payload.subject,
-            quarter=payload.quarter,
-            lesson_title=payload.lessonTitle,
-            competency=payload.learningCompetency,
-            module_id=payload.moduleId,
-            lesson_id=payload.lessonId,
-            competency_code=payload.competencyCode,
-            storage_path=payload.storagePath,
-            top_k=8,
-        )
-    except Exception as exc:
-        import traceback
-        logger.error(f"RAG retrieval error: {type(exc).__name__}: {exc}\n{traceback.format_exc()}")
-        raise HTTPException(
-            status_code=503,
-            detail={
-                "error": "retrieval_failed",
-                "message": f"Curriculum retrieval failed: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-
-    if not chunks:
-        raise HTTPException(
-            status_code=404,
-            detail={
-                "error": "no_curriculum_context",
-                "message": f"No curriculum content found for lesson '{payload.lessonTitle}' ({payload.subject} Q{payload.quarter}). Please ensure the PDF has been ingested.",
-                "retrievalBand": "low",
-                "sources": [],
-            },
-        )
-
-    # ── Step 2: Build prompt ─────────────────────────────────────────────────
-    try:
-        prompt = build_lesson_prompt(
-            lesson_title=payload.lessonTitle or payload.topic,
-            competency=payload.learningCompetency or payload.topic,
-            grade_level="Grade 11-12",
-            subject=payload.subject,
-            quarter=payload.quarter,
-            learner_level=payload.learnerLevel,
-            module_unit=payload.moduleUnit,
-            curriculum_chunks=chunks,
-            competency_code=payload.competencyCode,
-        )
-    except Exception as exc:
-        logger.error(f"RAG prompt build error: {type(exc).__name__}: {exc}")
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "prompt_build_failed",
-                "message": f"Failed to build lesson prompt: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-
-    # ── Step 3: AI inference ─────────────────────────────────────────────────
-    try:
-        raw_explanation = await _generate_text(
-            prompt,
-            task_type="rag_lesson",
-            max_new_tokens=1800,
-            enable_thinking=True,
-        )
-    except Exception as exc:
-        logger.error(f"RAG inference error: {type(exc).__name__}: {exc}")
-        raise HTTPException(
-            status_code=502,
-            detail={
-                "error": "inference_failed",
-                "message": f"AI model call failed: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-
-    # ── Step 4: Parse & validate response ────────────────────────────────────
-    try:
-        parsed_lesson = _strip_thinking_and_parse(raw_explanation)
-        parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
-    except Exception as exc:
-        logger.error(f"RAG parse error: {type(exc).__name__}: {exc}")
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "parse_failed",
-                "message": f"Failed to parse AI response: {exc}",
-                "type": type(exc).__name__,
-            },
-        )
-
-    # ── Step 5: Enrich with videos ───────────────────────────────────────────
-    if parsed_lesson.get("sections"):
-        video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
-        if video_section:
-            try:
-                videos = _fetch_youtube_videos(
-                    payload.lessonTitle or payload.topic,
-                    payload.subject,
-                    payload.learningCompetency or "",
-                    payload.quarter,
-                    lesson_id=payload.lessonId,
-                )
-                if videos:
-                    # Primary video for backwards compatibility
-                    primary = videos[0]
-                    video_section["videoId"] = primary.get("videoId", "")
-                    video_section["videoTitle"] = primary.get("title", "")
-                    video_section["videoChannel"] = primary.get("channelTitle", "")
-                    video_section["embedUrl"] = f"https://www.youtube.com/embed/{primary.get('videoId', '')}"
-                    video_section["thumbnailUrl"] = primary.get("thumbnailUrl", "")
-                    # New: full videos array for Smart Video Integration
-                    video_section["videos"] = videos
-            except Exception as exc:
-                logger.warning("YouTube enrichment skipped: %s", exc)
-
-    # ── Step 6: Assemble response ────────────────────────────────────────────
+    retrieval_query = build_lesson_query(
+        payload.topic,
+        payload.subject,
+        payload.quarter,
+        lesson_title=payload.lessonTitle,
+        competency=payload.learningCompetency,
+        module_unit=payload.moduleUnit,
+        learner_level=payload.learnerLevel,
+    )
+    chunks = retrieve_curriculum_context(
+        query=retrieval_query,
+        subject=payload.subject,
+        quarter=payload.quarter,
+        top_k=5,
+    )
+    prompt = build_lesson_prompt(
+        lesson_title=payload.lessonTitle or payload.topic,
+        competency=payload.learningCompetency or payload.topic,
+        grade_level="Grade 11-12",
+        subject=payload.subject,
+        quarter=payload.quarter,
+        learner_level=payload.learnerLevel,
+        module_unit=payload.moduleUnit,
+        curriculum_chunks=chunks,
+    )
+    explanation = await _generate_text(prompt, task_type="lesson_generation")
     retrieval_summary = summarize_retrieval_confidence(chunks)
 
-    try:
-        _log_rag_usage(
-            request,
-            event_type="lesson",
-            topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
-            subject=payload.subject,
-            quarter=payload.quarter,
-            chunks=chunks,
-        )
-    except Exception as exc:
-        logger.warning("RAG usage logging skipped: %s", exc)
-
-    needs_review = parsed_lesson.get("needsReview", False)
-    if retrieval_summary.get("band") == "low":
-        needs_review = True
+    _log_rag_usage(
+        request,
+        event_type="lesson",
+        topic=retrieval_query,
+        subject=payload.subject,
+        quarter=payload.quarter,
+        chunks=chunks,
+    )
 
     return {
-        **parsed_lesson,
+        "explanation": explanation,
         "retrievalConfidence": retrieval_summary.get("confidence", 0.0),
         "retrievalBand": retrieval_summary.get("band", "low"),
-        "retrievalMode": retrieval_mode,
-        "needsReview": needs_review,
+        "retrievalQuery": retrieval_query,
+        "needsReview": retrieval_summary.get("band", "low") == "low",
         "sources": [
             {
                 "subject": row.get("subject"),
                 "quarter": row.get("quarter"),
                 "source_file": row.get("source_file"),
-                "storage_path": row.get("storage_path"),
                 "page": row.get("page"),
                 "score": row.get("score"),
+                "content": row.get("content"),
                 "content_domain": row.get("content_domain"),
                 "chunk_type": row.get("chunk_type"),
-                "content": row.get("content"),
             }
             for row in chunks
         ],
-        "activeModel": get_model_for_task("rag_lesson"),
     }
 
 
@@ -446,20 +203,19 @@ async def rag_generate_problem(request: Request, payload: RagProblemRequest):
         top_k=5,
     )
     prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
-    raw = await _generate_text(
-        prompt,
-        task_type="quiz_generation",
-        max_new_tokens=600,
-        enable_thinking=False,
-    )
+    raw = await _generate_text(prompt, task_type="quiz_generation")
 
-    parsed = _strip_thinking_and_parse(raw)
+    parsed: Dict[str, Any] = {}
+    cleaned = raw.strip()
+    if "{" in cleaned and "}" in cleaned:
+        try:
+            start = cleaned.find("{")
+            end = cleaned.rfind("}") + 1
+            parsed = json.loads(cleaned[start:end])
+        except Exception:
+            parsed = {}
 
     problem = str(parsed.get("problem") or raw)
-    if not problem or problem.startswith("{"):
-        problem = str(parsed.get("content") or str(parsed))
-    if len(problem) < 3 or problem.startswith("{"):
-        problem = raw
     solution = str(parsed.get("solution") or "")
     competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
 
@@ -511,4 +267,4 @@ async def rag_analysis_context(request: Request, payload: RagAnalysisContextRequ
         chunks=chunks,
     )
 
-    return {"curriculumContext": "\n".join(lines)}
\ No newline at end of file
+    return {"curriculumContext": "\n".join(lines)}
diff --git a/routes/video_routes.py b/routes/video_routes.py
deleted file mode 100644
index 9dd6b213a0f0e7fac876e92e3cf2f92124f129d9..0000000000000000000000000000000000000000
--- a/routes/video_routes.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""
-Video lesson search routes for MathPulse AI.
-POST /api/lessons/videos/search — smart YouTube video search with RAG enrichment.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import List, Optional
-
-from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
-
-from services.youtube_service import (
-    get_video_search_results,
-    YOUTUBE_API_KEY,
-)
-
-logger = logging.getLogger("mathpulse.videos")
-router = APIRouter(prefix="/api/lessons/videos", tags=["videos"])
-
-
-class VideoSearchRequest(BaseModel):
-    topic: str = Field(..., min_length=1, max_length=200)
-    grade_level: str = Field(default="Grade 11", max_length=50)
-    subject: str = Field(default="General Mathematics", max_length=100)
-    lesson_context: str = Field(default="", max_length=1000)
-    lesson_id: Optional[str] = Field(default=None, max_length=100)
-
-
-class VideoResult(BaseModel):
-    videoId: str
-    title: str
-    channelTitle: str
-    thumbnailUrl: str
-    durationSeconds: int
-
-
-class VideoSearchResponse(BaseModel):
-    videos: List[VideoResult]
-    cached: bool = False
-
-
-@router.post("/search", response_model=VideoSearchResponse)
-async def search_videos(request: Request, payload: VideoSearchRequest):
-    """
-    Search for relevant educational YouTube videos for a lesson topic.
-
-    - Checks Firestore video_cache first (7-day TTL)
-    - Enriches the search query with RAG curriculum keywords
-    - Filters for educational channels, medium/long duration, HD quality
-    - Returns up to 3 video results
-    """
-    # Graceful degradation: if YouTube API key is not configured, return 503
-    # so the frontend can hide the video section silently
-    if not YOUTUBE_API_KEY:
-        logger.warning("YouTube API key not configured")
-        raise HTTPException(
-            status_code=503,
-            detail={
-                "error": "youtube_api_not_configured",
-                "message": "YouTube API key is not configured on the server.",
-            },
-        )
-
-    try:
-        result = get_video_search_results(
-            topic=payload.topic,
-            subject=payload.subject,
-            lesson_context=payload.lesson_context,
-            grade_level=payload.grade_level,
-            lesson_id=payload.lesson_id,
-            max_results=3,
-        )
-
-        videos = [
-            VideoResult(
-                videoId=v["videoId"],
-                title=v["title"],
-                channelTitle=v["channelTitle"],
-                thumbnailUrl=v["thumbnailUrl"],
-                durationSeconds=v["durationSeconds"],
-            )
-            for v in result.get("videos", [])
-        ]
-
-        return VideoSearchResponse(
-            videos=videos,
-            cached=result.get("cached", False),
-        )
-
-    except HTTPException:
-        raise
-    except Exception as exc:
-        logger.error("Video search endpoint error: %s", exc)
-        raise HTTPException(
-            status_code=500,
-            detail={
-                "error": "video_search_failed",
-                "message": f"Failed to search videos: {exc}",
-            },
-        )
diff --git a/scripts/download_vectorstore_from_firebase.py b/scripts/download_vectorstore_from_firebase.py
index 6b70ec3777e7e270697d0077dd7ececf09b593ff..c9bd9c34f9684fd888ee4257a32d69a7ec160cc8 100644
--- a/scripts/download_vectorstore_from_firebase.py
+++ b/scripts/download_vectorstore_from_firebase.py
@@ -1,11 +1,10 @@
 """
 Download vectorstore directory from Firebase Storage at container startup.
-Run: python /app/scripts/download_vectorstore_from_firebase.py
+Run: python -m hf_space_test.scripts.download_vectorstore_from_firebase
 """
 
 from __future__ import annotations
 
-import json
 import logging
 import os
 import sys
@@ -13,66 +12,17 @@ from pathlib import Path
 
 logger = logging.getLogger("mathpulse.download_vectorstore")
 
-REMOTE_PREFIX = "vectorstore/"
-_FIREBASE_INITIALIZED = False
-
-
-def _init_firebase() -> any | None:
-    global _FIREBASE_INITIALIZED
-
-    if _FIREBASE_INITIALIZED:
-        try:
-            from firebase_admin import storage as fb_storage
-            return fb_storage.bucket()
-        except Exception as e:
-            logger.warning("Firebase storage unavailable: %s", e)
-            _FIREBASE_INITIALIZED = False
-            return None
-
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage
-    except ImportError:
-        logger.warning("firebase_admin not installed")
-        return None
-
-    if firebase_admin._apps:
-        _FIREBASE_INITIALIZED = True
-        try:
-            return storage.bucket()
-        except Exception as e:
-            logger.warning("Firebase storage bucket unavailable: %s", e)
-            return None
-
-    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 
-    try:
-        if sa_json:
-            creds = credentials.Certificate(json.loads(sa_json))
-        elif sa_file and Path(sa_file).exists():
-            creds = credentials.Certificate(sa_file)
-        else:
-            creds = credentials.ApplicationDefault()
+from backend.rag.firebase_storage_loader import _init_firebase_storage
 
-        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-        _FIREBASE_INITIALIZED = True
-        return storage.bucket()
-    except Exception as e:
-        logger.error("Firebase init failed: %s", e)
-        return None
-
-
-def _resolve_dest_dir() -> Path:
-    raw = os.getenv("CURRICULUM_VECTORSTORE_DIR") or os.getenv("VECTORSTORE_DIR")
-    if raw:
-        return Path(raw)
-    return Path("/app/datasets/vectorstore")
+REMOTE_PREFIX = "vectorstore/"
+LOCAL_DEST_DIR = Path("/app/datasets/vectorstore")
 
 
 def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
-    bucket = _init_firebase()
+    """Download all files under a prefix from Firebase Storage, preserving structure."""
+    _, bucket = _init_firebase_storage()
     if bucket is None:
         logger.warning("Firebase Storage not available, vectorstore download skipped")
         return False
@@ -85,7 +35,6 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
         return False
 
     downloaded = 0
-    skipped = 0
     errors = 0
 
     for blob in blobs:
@@ -97,10 +46,6 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
         local_path.parent.mkdir(parents=True, exist_ok=True)
 
         try:
-            if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
-                logger.info("Skipped (already up-to-date): %s", blob.name)
-                skipped += 1
-                continue
             blob.download_to_filename(str(local_path))
             logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
             downloaded += 1
@@ -108,20 +53,10 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
             logger.error("Failed to download %s: %s", blob.name, e)
             errors += 1
 
-    logger.info("Download complete: %d downloaded, %d skipped, %d errors", downloaded, skipped, errors)
+    logger.info("Download complete: %d files downloaded, %d errors", downloaded, errors)
     return errors == 0
 
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-    dest_dir = _resolve_dest_dir()
-    print(f"INFO: Using vectorstore destination: {dest_dir}")
-    print(f"INFO: CURRICULUM_VECTORSTORE_DIR env: {os.environ.get('CURRICULUM_VECTORSTORE_DIR', 'not set')}")
-    print(f"INFO: VECTORSTORE_DIR env: {os.environ.get('VECTORSTORE_DIR', 'not set')}")
-    print(f"INFO: FIREBASE_STORAGE_BUCKET env: {os.environ.get('FIREBASE_STORAGE_BUCKET', 'not set')}")
-    print(f"INFO: FIREBASE_SERVICE_ACCOUNT_JSON length: {len(os.environ.get('FIREBASE_SERVICE_ACCOUNT_JSON', ''))}")
-    result = download_vectorstore(dest_dir, REMOTE_PREFIX)
-    if result:
-        print("SUCCESS: Vectorstore download completed")
-    else:
-        print("FAILURE: Vectorstore download failed")
+    download_vectorstore(LOCAL_DEST_DIR, REMOTE_PREFIX)
\ No newline at end of file
diff --git a/scripts/ingest_curriculum.py b/scripts/ingest_curriculum.py
index 7c95a747252231b6501f8ca57ea0ab6ae4a9c25c..5f7531ea9755df74560849966f8d616bf0b1c91c 100644
--- a/scripts/ingest_curriculum.py
+++ b/scripts/ingest_curriculum.py
@@ -1,159 +1,244 @@
 from __future__ import annotations
 
-import argparse
-import hashlib
 import json
-import logging
 import os
-import sys
+import re
+from collections import Counter
+from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Dict, List
 
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+import chromadb
+import pdfplumber
+from huggingface_hub import snapshot_download
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from sentence_transformers import SentenceTransformer
+
+BASE_DIR = Path(__file__).resolve().parents[1]
 
-from rag.vectorstore_loader import (
-    get_vectorstore_components,
-    reset_vectorstore_singleton,
-)
 
-logger = logging.getLogger(__name__)
-
-
-def _resolve_data_dir(raw: str | None) -> Path:
-    if raw:
-        p = Path(raw)
-        if p.is_absolute():
-            return p
-        p = Path.cwd() / raw
-        if p.exists():
-            return p
-    default = Path(__file__).resolve().parents[1] / "datasets"
-    return default
-
-
-def _iter_json_files(data_dir: Path):
-    for file in sorted(data_dir.rglob("*")):
-        if file.suffix not in {".json", ".jsonl"}:
-            continue
-        yield file
-
-
-def _load_records(file_path: Path) -> List[Dict[str, Any]]:
-    records: List[Dict[str, Any]] = []
-    try:
-        raw = file_path.read_text(encoding="utf-8").strip()
-        if file_path.suffix == ".jsonl":
-            for lineno, line in enumerate(raw.splitlines(), start=1):
-                line = line.strip()
-                if not line:
-                    continue
-                try:
-                    records.append(json.loads(line))
-                except json.JSONDecodeError:
-                    logger.warning("Skipping malformed JSONL line %s:%d", file_path.name, lineno)
-        else:
-            parsed = json.loads(raw)
-            if isinstance(parsed, list):
-                records.extend(parsed)
-            elif isinstance(parsed, dict):
-                records.append(parsed)
-    except Exception as exc:
-        logger.warning("Failed to parse %s: %s", file_path.name, exc)
-    return records
-
-
-def _build_id(source_file: str, page: int, content: str) -> str:
-    key = f"{source_file}::{page}::{content[:120]}"
-    return hashlib.sha256(key.encode()).hexdigest()[:40]
+def _resolve_default_dir(local_path: Path, data_path: Path) -> Path:
+    return data_path if data_path.parent.exists() else local_path
+
+
+CURRICULUM_DIR = Path(
+    os.getenv(
+        "CURRICULUM_DIR",
+        str(_resolve_default_dir(BASE_DIR / "datasets" / "curriculum", Path("/data/curriculum"))),
+    )
+)
+VECTORSTORE_DIR = Path(
+    os.getenv(
+        "VECTORSTORE_DIR",
+        str(_resolve_default_dir(BASE_DIR / "datasets" / "vectorstore", Path("/data/vectorstore"))),
+    )
+)
+COLLECTION_NAME = "curriculum_chunks"
+EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
+CURRICULUM_SOURCE_REPO_ID = os.getenv("CURRICULUM_SOURCE_REPO_ID", "").strip()
+CURRICULUM_SOURCE_REPO_TYPE = os.getenv("CURRICULUM_SOURCE_REPO_TYPE", "dataset").strip() or "dataset"
+CURRICULUM_SOURCE_REVISION = os.getenv("CURRICULUM_SOURCE_REVISION", "main").strip() or "main"
+
+SUBJECT_MAP = {
+    "SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": "general_math",
+    "GENERAL-MATHEMATICS-1-2.pdf": "general_math",
+    "GENERAL-MATHEMATICS-1.pdf": "general_math",
+    "SDO_Navotas_Bus.Math_SHS_1stSem.FV-5.pdf": "business_math",
+    "SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": "business_math",
+    "SDO_Navotas_STAT_PROB_SHS_1stSem.FV-3.pdf": "stat_prob",
+    "SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf": "stat_prob",
+    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV-4.pdf": "org_management",
+    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": "org_management",
+}
+
+QUARTER_HINTS = {
+    1: ["q1", "quarter 1", "business", "finance", "arithmetic sequence", "geometric sequence", "series"],
+    2: ["q2", "quarter 2", "measurement", "conversion", "functions", "piecewise", "statistics"],
+    3: ["q3", "quarter 3", "trigonometry", "practical measurements", "random variable", "sampling"],
+    4: ["q4", "quarter 4", "compound interest", "annuities", "loan", "hypothesis testing", "linear regression", "logic"],
+}
+
+DOMAIN_HINTS = {
+    "NA": ["number", "algebra", "sequence", "series", "interest", "annuity", "loan", "logic"],
+    "MG": ["measurement", "geometry", "trigonometry", "graph", "function", "piecewise"],
+    "DP": ["data", "probability", "statistics", "random variable", "sampling", "hypothesis", "regression"],
+}
+
+CHUNK_TYPE_HINTS = {
+    "learning_competency": ["learning competency", "code", "most essential learning", "melc", "competency"],
+    "example_problem": ["example", "solve", "problem", "exercise", "activity"],
+    "content_explanation": ["discussion", "content", "concept", "definition", "explain"],
+}
+
+
+def _norm(text: str) -> str:
+    return re.sub(r"\s+", " ", text.strip().lower())
+
+
+def infer_quarter(text: str) -> int:
+    probe = _norm(text)
+    for quarter, hints in QUARTER_HINTS.items():
+        if any(h in probe for h in hints):
+            return quarter
+    return 1
+
+
+def infer_domain(text: str) -> str:
+    probe = _norm(text)
+    scores: Dict[str, int] = {}
+    for domain, hints in DOMAIN_HINTS.items():
+        scores[domain] = sum(1 for hint in hints if hint in probe)
+    return max(scores, key=scores.get) if any(scores.values()) else "NA"
+
+
+def infer_chunk_type(text: str) -> str:
+    probe = _norm(text)
+    scores: Dict[str, int] = {}
+    for chunk_type, hints in CHUNK_TYPE_HINTS.items():
+        scores[chunk_type] = sum(1 for hint in hints if hint in probe)
+    best = max(scores, key=scores.get)
+    return best if scores[best] > 0 else "content_explanation"
+
+
+def extract_pdf_pages(pdf_path: Path) -> List[Dict[str, object]]:
+    rows: List[Dict[str, object]] = []
+    with pdfplumber.open(str(pdf_path)) as pdf:
+        for page_index, page in enumerate(pdf.pages, start=1):
+            page_text = page.extract_text() or ""
+            table_lines: List[str] = []
+            for table in page.extract_tables() or []:
+                for row in table:
+                    cells = [str(cell).strip() for cell in (row or []) if str(cell or "").strip()]
+                    if cells:
+                        table_lines.append(" | ".join(cells))
+            combined = "\n".join([page_text, "\n".join(table_lines)]).strip()
+            if combined:
+                rows.append({"page": page_index, "text": combined})
+    return rows
+
+
+def chunk_text(page_text: str) -> List[str]:
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=2000,
+        chunk_overlap=200,
+        separators=["\n\n", "\n", ". ", " ", ""],
+    )
+    return [chunk.strip() for chunk in splitter.split_text(page_text) if chunk.strip()]
+
+
+def _ensure_curriculum_pdfs() -> List[Path]:
+    pdf_files = sorted(CURRICULUM_DIR.glob("*.pdf"))
+    if pdf_files:
+        return pdf_files
+
+    if not CURRICULUM_SOURCE_REPO_ID:
+        raise SystemExit(
+            "No PDF files found in datasets/curriculum/ and CURRICULUM_SOURCE_REPO_ID is not set. "
+            "Upload the PDFs to a Hugging Face repo and point CURRICULUM_SOURCE_REPO_ID at it."
+        )
+
+    snapshot_dir = Path(
+        snapshot_download(
+            repo_id=CURRICULUM_SOURCE_REPO_ID,
+            repo_type=CURRICULUM_SOURCE_REPO_TYPE,
+            revision=CURRICULUM_SOURCE_REVISION,
+            allow_patterns=["*.pdf", "**/*.pdf"],
+        )
+    )
+
+    source_pdfs = sorted(snapshot_dir.rglob("*.pdf"))
+    if not source_pdfs:
+        raise SystemExit(
+            f"No PDF files found in Hugging Face repo {CURRICULUM_SOURCE_REPO_TYPE}:{CURRICULUM_SOURCE_REPO_ID}@{CURRICULUM_SOURCE_REVISION}."
+        )
+
+    CURRICULUM_DIR.mkdir(parents=True, exist_ok=True)
+    for source_pdf in source_pdfs:
+        target_pdf = CURRICULUM_DIR / source_pdf.name
+        target_pdf.write_bytes(source_pdf.read_bytes())
+
+    return sorted(CURRICULUM_DIR.glob("*.pdf"))
 
 
 def main() -> None:
-    parser = argparse.ArgumentParser(description="Ingest DepEd SHS curriculum JSON/JSONL into ChromaDB")
-    parser.add_argument("--data-dir", default=None, help="Directory containing .json/.jsonl files")
-    parser.add_argument("--reset", action="store_true", help="Reset the vectorstore singleton before ingestion")
-    args = parser.parse_args()
-
-    data_dir = _resolve_data_dir(args.data_dir)
-    logger.info("Ingesting from: %s", data_dir)
-
-    if args.reset:
-        reset_vectorstore_singleton()
-        _, collection, _ = get_vectorstore_components()
-        try:
-            collection.delete(ids=collection.get(include=[])["ids"])
-        except Exception:
-            pass
-        reset_vectorstore_singleton()
-
-    total_processed = 0
-    total_upserted = 0
-    total_errors = 0
-
-    _, collection, embedder = get_vectorstore_components()
-
-    for file_path in _iter_json_files(data_dir):
-        records = _load_records(file_path)
-        documents: List[str] = []
-        metadatas: List[Dict[str, Any]] = []
-        ids: List[str] = []
-        embeddings_list: List[List[float]] = []
-
-        for record in records:
-            total_processed += 1
-            content = str(record.get("content") or "").strip()
-            if not content:
-                logger.debug("Skipping empty content in %s", file_path.name)
-                continue
-
-            try:
-                subject = str(record.get("subject") or "unknown")
-                quarter = int(record.get("quarter") or 0)
-                page = int(record.get("page") or 0)
-                content_domain = str(record.get("content_domain") or "unknown")
-                chunk_type = str(record.get("chunk_type") or "unknown")
-                source_file = str(record.get("source_file") or file_path.name)
-
-                embedding = embedder.encode(content).tolist()
-                chunk_id = _build_id(source_file, page, content)
+    if not CURRICULUM_DIR.exists():
+        raise SystemExit(f"Missing curriculum directory: {CURRICULUM_DIR}")
+
+    pdf_files = _ensure_curriculum_pdfs()
+    if not pdf_files:
+        raise SystemExit("No PDF files found in datasets/curriculum/")
+
+    VECTORSTORE_DIR.mkdir(parents=True, exist_ok=True)
+
+    documents: List[str] = []
+    metadatas: List[Dict[str, object]] = []
+    ids: List[str] = []
+
+    per_subject = Counter()
+    per_quarter = Counter()
+    per_domain = Counter()
+
+    for pdf_file in pdf_files:
+        subject = SUBJECT_MAP.get(pdf_file.name, "general_math")
+        page_rows = extract_pdf_pages(pdf_file)
+        for page_row in page_rows:
+            page_number = int(page_row["page"])
+            text = str(page_row["text"])
+            for idx, chunk in enumerate(chunk_text(text), start=1):
+                quarter = infer_quarter(chunk)
+                domain = infer_domain(chunk)
+                chunk_type = infer_chunk_type(chunk)
 
                 metadata = {
                     "subject": subject,
                     "quarter": quarter,
-                    "content_domain": content_domain,
+                    "content_domain": domain,
                     "chunk_type": chunk_type,
-                    "source_file": source_file,
-                    "page": page,
+                    "source_file": pdf_file.name,
+                    "page": page_number,
                 }
+                chunk_id = f"{pdf_file.stem}-{page_number}-{idx}"
 
-                documents.append(content)
+                documents.append(chunk)
                 metadatas.append(metadata)
                 ids.append(chunk_id)
-                embeddings_list.append(embedding)
-
-            except Exception as exc:
-                total_errors += 1
-                logger.warning("Error processing record in %s: %s", file_path.name, exc)
-
-        if documents:
-            try:
-                collection.upsert(
-                    ids=ids,
-                    documents=documents,
-                    metadatas=metadatas,
-                    embeddings=embeddings_list,
-                )
-                total_upserted += len(documents)
-                logger.info("Upserted %d chunks from %s", len(documents), file_path.name)
-            except Exception as exc:
-                total_errors += len(documents)
-                logger.warning("Failed to upsert batch from %s: %s", file_path.name, exc)
-
-    print(f"=== Ingestion Summary ===")
-    print(f"Total records processed: {total_processed}")
-    print(f"Total chunks upserted:  {total_upserted}")
-    print(f"Total errors:           {total_errors}")
+
+                per_subject[subject] += 1
+                per_quarter[str(quarter)] += 1
+                per_domain[domain] += 1
+
+    embedder = SentenceTransformer(EMBED_MODEL_NAME)
+    embeddings = embedder.encode(documents, show_progress_bar=True).tolist()
+
+    client = chromadb.PersistentClient(path=str(VECTORSTORE_DIR))
+    existing = [c.name for c in client.list_collections()]
+    if COLLECTION_NAME in existing:
+        client.delete_collection(COLLECTION_NAME)
+    collection = client.create_collection(name=COLLECTION_NAME)
+    collection.add(ids=ids, documents=documents, metadatas=metadatas, embeddings=embeddings)
+
+    summary = {
+        "lastIngested": datetime.now(timezone.utc).isoformat(),
+        "totalChunks": len(documents),
+        "chunksPerSubject": dict(per_subject),
+        "chunksPerQuarter": dict(per_quarter),
+        "chunksPerDomain": dict(per_domain),
+        "sourceFiles": [pdf.name for pdf in pdf_files],
+    }
+    (VECTORSTORE_DIR / "ingest_summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
+
+    print("=== Curriculum Ingestion Summary ===")
+    print(f"Total chunks: {summary['totalChunks']}")
+    print("Chunks per subject:")
+    for subject, count in sorted(per_subject.items()):
+        print(f"  - {subject}: {count}")
+    print("Chunks per quarter:")
+    for quarter, count in sorted(per_quarter.items()):
+        print(f"  - Q{quarter}: {count}")
+    print("Chunks per domain:")
+    for domain, count in sorted(per_domain.items()):
+        print(f"  - {domain}: {count}")
 
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    main()
\ No newline at end of file
+    main()
diff --git a/scripts/ingest_from_storage.py b/scripts/ingest_from_storage.py
deleted file mode 100644
index f4a0819fa102ce94f21c70487bcb4a78cad44da9..0000000000000000000000000000000000000000
--- a/scripts/ingest_from_storage.py
+++ /dev/null
@@ -1,285 +0,0 @@
-"""
-Ingest curriculum PDFs from Firebase Storage into ChromaDB.
-Run: python -m backend.scripts.ingest_from_storage
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import sys
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger("mathpulse.ingest")
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-
-from rag.firebase_storage_loader import (
-    PDF_METADATA,
-    download_pdf_from_storage,
-    list_curriculum_blobs,
-)
-
-_CONTENT_DOMAIN_CLASSIFIERS = [
-    ("introduction", ["introduction", "welcome", "overview", "objectives", "learning objectives"]),
-    ("key_concepts", ["key concepts", "key ideas", "main concepts", "definitions", "key terms"]),
-    ("worked_examples", ["example", "worked example", "illustrative example", "sample problem", "solution"]),
-    ("important_notes", ["important", "note", "remember", "tip", "caution", "warning", "key point"]),
-    ("practice", ["practice", "exercise", "try it", "your turn", "activity", "problem set"]),
-    ("summary", ["summary", "recap", "key takeaways", "wrap-up", "conclusion"]),
-    ("assessment", ["assessment", "quiz", "test", "evaluation", "exam"]),
-]
-
-_CONTENT_TYPE_CLASSIFIERS = [
-    ("definition", ["definition", "define", "means", "is defined as"]),
-    ("formula", ["formula", "equation", "expression", "rule"]),
-    ("procedure", ["step", "method", "how to", "procedure", "process"]),
-    ("concept", ["concept", "idea", "principle", "theory"]),
-    ("application", ["application", "use", "example", "solve", "problem"]),
-]
-
-
-def _classify_chunk(content: str) -> tuple[str, str]:
-    content_lower = content.lower()
-    content_domain = "general"
-    chunk_type = "concept"
-
-    for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
-        if any(kw in content_lower for kw in keywords):
-            content_domain = domain
-            break
-
-    for ctype, keywords in _CONTENT_TYPE_CLASSIFIERS:
-        if any(kw in content_lower for kw in keywords):
-            chunk_type = ctype
-            break
-
-    return content_domain, chunk_type
-
-
-def _classify_lesson_section(content: str) -> str:
-    content_lower = content.lower().strip()
-    first_sentence = content_lower[:200]
-
-    for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
-        if any(kw in first_sentence for kw in keywords):
-            return domain
-    return "general"
-
-
-def chunk_text_preserve_pages(text: str, page_starts: List[int], chunk_size: int = 500, overlap: int = 80) -> List[Dict[str, Any]]:
-    """Split text into overlapping chunks, preserving page traceability."""
-    # Filter out None/empty entries that can result from malformed PDF text extraction
-    words = [w for w in text.split() if w is not None and str(w).strip()]
-    chunks = []
-    i = 0
-    chunk_idx = 0
-    while i < len(words):
-        chunk_words = words[i : i + chunk_size]
-        chunk_text = " ".join(str(w) for w in chunk_words)
-        estimated_page = max(1, (i // chunk_size) + 1)
-        content_domain, chunk_type = _classify_chunk(chunk_text)
-
-        chunks.append({
-            "text": chunk_text,
-            "chunk_index": chunk_idx,
-            "estimated_page": estimated_page,
-            "content_domain": content_domain,
-            "chunk_type": chunk_type,
-        })
-        i += chunk_size - overlap
-        chunk_idx += 1
-    return chunks
-
-
-def extract_pdf_text_and_pages(pdf_bytes: bytes) -> tuple[str, List[int]]:
-    """Extract text from PDF bytes, returning full text and page start positions."""
-    try:
-        from pypdf import PdfReader
-    except ImportError:
-        try:
-            import PyPDF2 as PdfReaderModule
-            from PyPDF2 import PdfReader
-        except ImportError:
-            logger.error("No PDF library available. Install: pip install pypdf")
-            return "", []
-
-    import io
-    reader = PdfReader(io.BytesIO(pdf_bytes))
-    pages: List[str] = []
-    for page in reader.pages:
-        text = page.extract_text() or ""
-        pages.append(text)
-
-    page_starts = []
-    position = 0
-    for page_text in pages:
-        page_starts.append(position)
-        position += len(page_text) + 1
-
-    full_text = "\n".join(pages)
-    return full_text, page_starts
-
-
-def get_firestore_client():
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-            sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-            bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-            if sa_json:
-                import json as _json
-                from firebase_admin import credentials
-                creds = credentials.Certificate(_json.loads(sa_json))
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            elif sa_file and Path(sa_file).exists():
-                from firebase_admin import credentials
-                creds = credentials.Certificate(sa_file)
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            else:
-                firebase_admin.initialize_app(options={"storageBucket": bucket_name})
-        return firestore.client()
-    except Exception as e:
-        logger.warning("Firestore unavailable: %s", e)
-        return None
-
-
-def ingest_from_firebase_storage(force_reindex: bool = False):
-    """Download PDFs from Firebase Storage and ingest into ChromaDB."""
-    try:
-        from sentence_transformers import SentenceTransformer
-        import chromadb
-    except ImportError:
-        logger.error("Missing dependencies. Install: pip install chromadb sentence-transformers pypdf")
-        return
-
-    chroma_path = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
-    chroma_client = chromadb.PersistentClient(path=chroma_path)
-    collection = chroma_client.get_or_create_collection(
-        name="curriculum_chunks",
-        metadata={"hnsw:space": "cosine"},
-    )
-    embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
-
-    db = get_firestore_client()
-
-    logger.info("Starting ingestion from Firebase Storage...")
-    ingested_count = 0
-    skipped_count = 0
-    error_count = 0
-
-    for storage_path, metadata in PDF_METADATA.items():
-        doc_id = storage_path.replace("/", "_").replace(".pdf", "")
-
-        if db:
-            try:
-                doc_ref = db.collection("curriculumDocuments").document(doc_id)
-                existing = doc_ref.get()
-                if existing.exists:
-                    if not force_reindex and existing.to_dict().get("status") == "ingested":
-                        logger.info("[SKIP] %s already ingested", storage_path)
-                        skipped_count += 1
-                        continue
-            except Exception as e:
-                logger.warning("Firestore check failed for %s: %s", storage_path, e)
-
-        logger.info("Downloading: %s", storage_path)
-        pdf_bytes = download_pdf_from_storage(storage_path)
-        if pdf_bytes is None:
-            logger.error("[ERROR] Failed to download: %s", storage_path)
-            if db:
-                try:
-                    doc_ref.set({
-                        "storagePath": storage_path,
-                        "status": "failed",
-                        "error": "download_failed",
-                        **metadata,
-                    }, merge=True)
-                except:
-                    pass
-            error_count += 1
-            continue
-
-        logger.info("Extracting text from: %s (%d bytes)", storage_path, len(pdf_bytes))
-        full_text, page_starts = extract_pdf_text_and_pages(pdf_bytes)
-        if not full_text.strip():
-            logger.warning("[WARN] No text extracted from: %s", storage_path)
-            error_count += 1
-            continue
-
-        chunks = chunk_text_preserve_pages(full_text, page_starts)
-        logger.info("  -> %d chunks created", len(chunks))
-
-        existing_ids = [cid for cid in collection.get()["ids"] if cid.startswith(f"{doc_id}_chunk_")]
-        if existing_ids:
-            collection.delete(ids=existing_ids)
-            logger.info("  Removed %d existing chunks", len(existing_ids))
-
-        for chunk in chunks:
-            chunk_text = chunk.get("text", "")
-            if not isinstance(chunk_text, str) or not chunk_text.strip():
-                logger.warning("  Skipping empty/invalid chunk %s (type=%s, len=%d)", chunk.get("chunk_index"), type(chunk_text), len(chunk_text))
-                continue
-            chunk_id = f"{doc_id}_chunk_{chunk['chunk_index']}"
-            try:
-                embedding = embedder.encode(chunk_text, normalize_embeddings=True).tolist()
-            except Exception as enc_err:
-                logger.warning("  Skipping unencodable chunk %s: %s", chunk.get("chunk_index"), enc_err)
-                continue
-
-            collection.add(
-                embeddings=[embedding],
-                documents=[chunk_text],
-                metadatas=[{
-                    "document_id": doc_id,
-                    "module_id": metadata.get("subjectId", ""),
-                    "lesson_id": f"lesson-{doc_id}",
-                    "title": metadata.get("subject", ""),
-                    "subject": metadata.get("subject", ""),
-                    "subjectId": metadata.get("subjectId", ""),
-                    "quarter": metadata.get("quarter", 1),
-                    "competency_code": metadata.get("competency_code", ""),
-                    "content_domain": chunk["content_domain"],
-                    "chunk_type": chunk["chunk_type"],
-                    "source_file": storage_path.split("/")[-1],
-                    "storage_path": storage_path,
-                    "page": chunk["estimated_page"],
-                    "chunk_index": chunk["chunk_index"],
-                    "type": metadata.get("type", ""),
-                }],
-                ids=[chunk_id],
-            )
-
-        if db:
-            try:
-                doc_ref.set({
-                    "id": doc_id,
-                    "storagePath": storage_path,
-                    "status": "ingested",
-                    "ingestedAt": __import__("firebase_admin").firestore.SERVER_TIMESTAMP,
-                    "chunkCount": len(chunks),
-                    **metadata,
-                }, merge=True)
-            except Exception as e:
-                logger.warning("Firestore update failed: %s", e)
-
-        logger.info("[OK] Ingested %s (%d chunks)", storage_path, len(chunks))
-        ingested_count += 1
-
-    logger.info("=" * 50)
-    logger.info("Ingestion complete: %d ingested, %d skipped, %d errors", ingested_count, skipped_count, error_count)
-    logger.info("Total chunks in ChromaDB: %d", collection.count())
-
-
-if __name__ == "__main__":
-    import argparse
-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-
-    parser = argparse.ArgumentParser(description="Ingest curriculum PDFs from Firebase Storage into ChromaDB")
-    parser.add_argument("--force", action="store_true", help="Re-ingest even if already ingested")
-    args = parser.parse_args()
-
-    ingest_from_firebase_storage(force_reindex=args.force)
\ No newline at end of file
diff --git a/scripts/migrate_grade12_to_grade11.py b/scripts/migrate_grade12_to_grade11.py
deleted file mode 100644
index dbaf9048e1605d1f13503ba08dfa918c05380422..0000000000000000000000000000000000000000
--- a/scripts/migrate_grade12_to_grade11.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""
-Migrate Grade 12 users to Grade 11.
-
-Run this to convert all existing Grade 12 users to Grade 11:
-    python backend/scripts/migrate_grade12_to_grade11.py
-
-This handles:
-- Firestore user profiles
-- Progress records
-- Any references to Grade 12
-"""
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-
-logger = logging.getLogger(__name__)
-
-
-def migrate_grade_12_to_grade_11():
-    """Migrate all Grade 12 users to Grade 11."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-
-        svc_account = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-        if svc_account:
-            import json
-            from firebase_admin import credentials
-            creds = credentials.Certificate(json.loads(svc_account))
-            firebase_admin.initialize_app(creds)
-        else:
-            firebase_admin.initialize_app()
-
-        db = firestore.client()
-        print("Firebase initialized")
-
-    except Exception as e:
-        print(f"Failed to initialize Firebase: {e}")
-        return
-
-    # Count migrations
-    users_migrated = 0
-    progress_migrated = 0
-
-    # Migrate users collection
-    print("\n--- Migrating users ---")
-    users_ref = db.collection("users")
-    
-    # Batch update for users
-    batch = db.batch()
-    user_count = 0
-
-    for doc in users_ref.stream():
-        data = doc.to_dict()
-        if data.get("grade") == "Grade 12":
-            batch.update(doc.reference, {"grade": "Grade 11"})
-            user_count += 1
-            print(f"  Migrating user: {doc.id} ({data.get('name', 'Unknown')})")
-            
-            if user_count >= 500:
-                batch.commit()
-                users_migrated += user_count
-                user_count = 0
-                batch = db.batch()
-
-    if user_count > 0:
-        batch.commit()
-        users_migrated += user_count
-
-    print(f"  => Migrated {users_migrated} users to Grade 11")
-
-    # Migrate progress collection
-    print("\n--- Migrating progress ---")
-    progress_ref = db.collection("progress")
-    batch = db.batch()
-    progress_count = 0
-
-    for doc in progress_ref.stream():
-        data = doc.to_dict()
-        if data.get("gradeLevel") == "Grade 12":
-            batch.update(doc.reference, {"gradeLevel": "Grade 11"})
-            progress_count += 1
-            
-            if progress_count >= 500:
-                batch.commit()
-                progress_migrated += progress_count
-                progress_count = 0
-                batch = db.batch()
-
-    if progress_count > 0:
-        batch.commit()
-        progress_migrated += progress_count
-
-    print(f"  => Migrated {progress_migrated} progress records to Grade 11")
-
-    print(f"\n=== Migration complete ===")
-    print(f"Users migrated: {users_migrated}")
-    print(f"Progress migrated: {progress_migrated}")
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    migrate_grade_12_to_grade_11()
\ No newline at end of file
diff --git a/scripts/register_firestore_metadata.py b/scripts/register_firestore_metadata.py
deleted file mode 100644
index dff135fc8e6fb22225124372d0c0af667b208b3f..0000000000000000000000000000000000000000
--- a/scripts/register_firestore_metadata.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-Register curriculum document metadata in Firestore.
-Populates the curriculumDocuments collection so the app can display
-lessons mapped to their source PDFs before ingestion.
-
-Run: python backend/scripts/register_firestore_metadata.py
-"""
-
-from __future__ import annotations
-
-import os
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-
-
-def _get_firestore_client():
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-            sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
-            bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
-            if sa_json:
-                import json as _json
-                from firebase_admin import credentials
-                creds = credentials.Certificate(_json.loads(sa_json))
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            elif sa_file and Path(sa_file).exists():
-                from firebase_admin import credentials
-                creds = credentials.Certificate(sa_file)
-                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
-            else:
-                firebase_admin.initialize_app(options={"storageBucket": bucket_name})
-        return firestore.client()
-    except Exception as e:
-        print(f"Firestore init failed: {e}")
-        return None
-
-
-CURRICULUM_DOCUMENTS = [
-    {
-        "id": "gm_lesson_1",
-        "moduleId": "gm-q1-business-finance",
-        "lessonId": "gm-q1-bf-1",
-        "title": "Represent business transactions and financial goals using variables and equations.",
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "quarter": 1,
-        "competencyCode": "GM11-BF-1",
-        "learningCompetency": "Represent business transactions and financial goals using variables and equations.",
-        "storagePath": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "gm_navotas_lesson_1",
-        "moduleId": "gm-q1-patterns-sequences-series",
-        "lessonId": "gm-q1-pss-1",
-        "title": "Identify and describe arithmetic and geometric patterns in data.",
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "quarter": 1,
-        "competencyCode": "GM11-PSS-1",
-        "learningCompetency": "Identify and describe arithmetic and geometric patterns in data.",
-        "storagePath": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "bm_lesson_1",
-        "moduleId": "bm-q1-business-math",
-        "lessonId": "bm-q1-1",
-        "title": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
-        "subject": "Business Mathematics",
-        "subjectId": "business-math",
-        "quarter": 1,
-        "competencyCode": "ABM_BM11BS-Ia-b-1",
-        "learningCompetency": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
-        "storagePath": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "stat_lesson_1",
-        "moduleId": "stat-q1-probability",
-        "lessonId": "stat-q1-1",
-        "title": "Define and describe random variables and their types.",
-        "subject": "Statistics and Probability",
-        "subjectId": "stats-prob",
-        "quarter": 1,
-        "competencyCode": "SP_SHS11-Ia-1",
-        "learningCompetency": "Define and describe random variables and their types.",
-        "storagePath": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "fm1_lesson_1",
-        "moduleId": "fm1-q1-counting",
-        "lessonId": "fm1-q1-fpc-1",
-        "title": "Apply the fundamental counting principle in contextual problems.",
-        "subject": "Finite Mathematics 1",
-        "subjectId": "finite-math-1",
-        "quarter": 1,
-        "competencyCode": "FM1-SHS11-Ia-1",
-        "learningCompetency": "Apply the fundamental counting principle in contextual problems.",
-        "storagePath": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "fm2_lesson_1",
-        "moduleId": "fm2-q1-matrices",
-        "lessonId": "fm2-q1-matrices-1",
-        "title": "Represent contextual data using matrix notation.",
-        "subject": "Finite Mathematics 2",
-        "subjectId": "finite-math-2",
-        "quarter": 1,
-        "competencyCode": "FM2-SHS11-Ia-1",
-        "learningCompetency": "Represent contextual data using matrix notation.",
-        "storagePath": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
-        "status": "uploaded",
-    },
-    {
-        "id": "org_mgmt_lesson_1",
-        "moduleId": "org-mgmt-q1",
-        "lessonId": "org-mgmt-q1-1",
-        "title": "Understand the fundamental concepts of organization and management.",
-        "subject": "Organization and Management",
-        "subjectId": "org-mgmt",
-        "quarter": 1,
-        "competencyCode": "ABM_OM11-Ia-1",
-        "learningCompetency": "Understand the fundamental concepts of organization and management.",
-        "storagePath": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
-        "status": "uploaded",
-    },
-]
-
-
-def register_metadata(force: bool = False):
-    db = _get_firestore_client()
-    if db is None:
-        print("ERROR: Cannot connect to Firestore. Check credentials.")
-        print("Set FIREBASE_SERVICE_ACCOUNT_JSON or place mathpulse-sa.json in backend/ directory.")
-        return
-
-    print("Connected to Firestore.")
-    print("-" * 50)
-
-    registered = 0
-    skipped = 0
-    updated = 0
-
-    for doc in CURRICULUM_DOCUMENTS:
-        doc_id = doc["id"]
-        doc_ref = db.collection("curriculumDocuments").document(doc_id)
-        existing = doc_ref.get()
-
-        if existing.exists and not force:
-            print(f"[SKIP] {doc_id} already registered")
-            skipped += 1
-            continue
-
-        if existing.exists and force:
-            updated += 1
-        else:
-            registered += 1
-
-        data = {
-            **doc,
-            "uploadedAt": None,
-        }
-        doc_ref.set(data, merge=True)
-        print(f"[OK]  {'Updated' if force and existing.exists else 'Registered'} {doc_id} -> {doc.get('storagePath')}")
-
-    print("-" * 50)
-    print(f"Done: {registered} registered, {skipped} skipped, {updated} updated.")
-
-
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Register curriculum document metadata in Firestore")
-    parser.add_argument("--force", action="store_true", help="Overwrite existing records")
-    args = parser.parse_args()
-    register_metadata(force=args.force)
\ No newline at end of file
diff --git a/scripts/seed_curriculum.py b/scripts/seed_curriculum.py
deleted file mode 100644
index 5fa3288da3e9e54eb186ee4832c4517a0e137553..0000000000000000000000000000000000000000
--- a/scripts/seed_curriculum.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""
-Seed Firestore curriculum collection from static data.
-
-Run this ONCE to migrate static curriculum to Firestore:
-    python backend/scripts/seed_curriculum.py
-
-After seeding, the curriculum API will read from Firestore.
-"""
-
-import logging
-import json
-import os
-import sys
-from pathlib import Path
-
-# Add backend to path
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-
-from services.curriculum_service import _STATIC_SUBJECTS
-
-logger = logging.getLogger(__name__)
-
-
-def seed_curriculum():
-    """Seed curriculum subjects to Firestore."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore, credentials
-
-        # Initialize Firebase
-        svc_account = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-        if svc_account:
-            sa_creds = credentials.Certificate(json.loads(svc_account))
-            firebase_admin.initialize_app(sa_creds)
-        else:
-            firebase_admin.initialize_app()
-
-        db = firestore.client()
-        print("Firebase initialized")
-
-    except Exception as e:
-        print(f"Failed to initialize Firebase: {e}")
-        return
-
-    # Seed new subjects
-    subjects_ref = db.collection("subjects")
-    count = 0
-
-    for subject in _STATIC_SUBJECTS:
-        doc_ref = subjects_ref.document(subject["id"])
-        doc_ref.set(subject)
-        count += 1
-        print(f"  Seeded: {subject['id']} - {subject['name']} ({len(subject.get('topics', []))} topics)")
-
-    print(f"\nSeeded {count} subjects to Firestore")
-    print("\nCurriculum is now available at:")
-    print("  GET /api/curriculum/subjects")
-    print("  GET /api/curriculum/subjects/{id}")
-    print("  GET /api/curriculum/subjects/{id}/topics")
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    seed_curriculum()
\ No newline at end of file
diff --git a/scripts/upload_curriculum_pdfs.py b/scripts/upload_curriculum_pdfs.py
deleted file mode 100644
index 9eff7e951e507e5ffd3873f2d0c224811d88cbc8..0000000000000000000000000000000000000000
--- a/scripts/upload_curriculum_pdfs.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""
-Upload DepEd curriculum PDFs to Firebase Storage.
-Run once during initial setup: python scripts/upload_curriculum_pdfs.py
-"""
-
-from __future__ import annotations
-
-import os
-import sys
-from pathlib import Path
-from typing import Dict, List
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-
-LOCAL_PDF_DIR = r"C:\Users\Deign\Downloads\Documents"
-
-PDF_METADATA: Dict[str, Dict[str, object]] = {
-    "GENERAL-MATHEMATICS-1.pdf": {
-        "subject": "General Mathematics",
-        "type": "curriculum_guide",
-        "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
-        "quarters": ["Q1", "Q2", "Q3", "Q4"],
-        "storage_path": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
-    },
-    "Finite-Mathematics-1-1.pdf": {
-        "subject": "Finite Mathematics 1",
-        "type": "curriculum_guide",
-        "strand": ["STEM", "ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
-    },
-    "Finite-Mathematics-2-1.pdf": {
-        "subject": "Finite Mathematics 2",
-        "type": "curriculum_guide",
-        "strand": ["STEM", "ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
-    },
-    "SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
-        "subject": "General Mathematics",
-        "type": "sdo_module",
-        "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
-    },
-    "SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": {
-        "subject": "Business Mathematics",
-        "type": "sdo_module",
-        "strand": ["ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
-    },
-    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": {
-        "subject": "Organization and Management",
-        "type": "sdo_module",
-        "strand": ["ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
-    },
-    "SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf": {
-        "subject": "Statistics and Probability",
-        "type": "sdo_module",
-        "strand": ["STEM", "ABM"],
-        "quarters": ["Q1", "Q2"],
-        "storage_path": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf",
-    },
-}
-
-
-def chunk_text(text: str, chunk_size: int = 600, overlap: int = 100) -> List[str]:
-    """Split text into overlapping chunks."""
-    words = text.split()
-    chunks: List[str] = []
-    i = 0
-    while i < len(words):
-        chunk = " ".join(words[i : i + chunk_size])
-        chunks.append(chunk)
-        i += chunk_size - overlap
-    return chunks
-
-
-def upload_pdfs():
-    """Upload PDFs from local directory to Firebase Storage."""
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage, firestore
-    except ImportError:
-        print("ERROR: firebase-admin not installed. Run: pip install firebase-admin")
-        return
-
-    service_account_path = Path(__file__).resolve().parents[1] / "serviceAccountKey.json"
-    if not service_account_path.exists():
-        print(f"ERROR: Service account key not found at {service_account_path}")
-        return
-
-    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "").strip()
-    if not bucket_name:
-        print("ERROR: FIREBASE_STORAGE_BUCKET not set in environment")
-        return
-
-    cred = credentials.Certificate(str(service_account_path))
-    firebase_admin.initialize_app(cred, {"storageBucket": bucket_name})
-
-    bucket = storage.bucket()
-    db = firestore.client()
-
-    print(f"Scanning: {LOCAL_PDF_DIR}")
-    print("-" * 50)
-
-    uploaded = 0
-    skipped = 0
-
-    for filename, meta in PDF_METADATA.items():
-        local_path = Path(LOCAL_PDF_DIR) / filename
-
-        if not local_path.exists():
-            print(f"[SKIP] {filename} not found in {LOCAL_PDF_DIR}")
-            skipped += 1
-            continue
-
-        doc_ref = db.collection("curriculumDocs").document(filename)
-        if doc_ref.get().exists:
-            print(f"[SKIP] {filename} already uploaded")
-            skipped += 1
-            continue
-
-        try:
-            blob = bucket.blob(meta["storage_path"])
-            blob.upload_from_filename(str(local_path), content_type="application/pdf")
-
-            doc_ref.set(
-                {
-                    "filename": filename,
-                    "subject": meta["subject"],
-                    "type": meta["type"],
-                    "strand": meta["strand"],
-                    "quarters": meta["quarters"],
-                    "storage_path": meta["storage_path"],
-                    "uploaded_at": firestore.SERVER_TIMESTAMP,
-                    "indexed": False,
-                }
-            )
-
-            print(f"[OK] Uploaded {filename}")
-            uploaded += 1
-        except Exception as e:
-            print(f"[ERROR] {filename}: {e}")
-
-    print("-" * 50)
-    print(f"Upload complete: {uploaded} uploaded, {skipped} skipped")
-
-
-def index_pdfs():
-    """Extract text from PDFs, chunk, embed, and store in ChromaDB."""
-    try:
-        from pypdf import PdfReader
-        import chromadb
-        from sentence_transformers import SentenceTransformer
-        from firebase_admin import firestore
-    except ImportError:
-        print("ERROR: Missing dependencies. Run: pip install pypdf chromadb sentence-transformers firebase-admin")
-        return
-
-    chroma_path = os.getenv("CHROMA_PERSIST_PATH", "./datasets/vectorstore")
-    
-    chroma_client = chromadb.PersistentClient(path=chroma_path)
-    collection = chroma_client.get_or_create_collection(
-        name="curriculum_chunks",
-        metadata={"hnsw:space": "cosine"},
-    )
-    embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
-    
-    try:
-        import firebase_admin
-        from firebase_admin import firestore as FS
-        db = FS.client()
-    except Exception:
-        db = None
-
-    print(f"Indexing PDFs from: {LOCAL_PDF_DIR}")
-    print("-" * 50)
-
-    indexed = 0
-    skipped = 0
-
-    for filename, meta in PDF_METADATA.items():
-        if db:
-            doc_ref = db.collection("curriculumDocs").document(filename)
-            doc = doc_ref.get()
-            if doc and doc.to_dict().get("indexed", False):
-                print(f"[SKIP] {filename} already indexed")
-                skipped += 1
-                continue
-
-        local_path = Path(LOCAL_PDF_DIR) / filename
-        if not local_path.exists():
-            print(f"[SKIP] {filename} not found")
-            skipped += 1
-            continue
-
-        try:
-            reader = PdfReader(str(local_path))
-            full_text = "\n".join(page.extract_text() or "" for page in reader.pages)
-
-            if not full_text.strip():
-                print(f"[WARN] {filename} has no extractable text")
-                continue
-
-            chunks = chunk_text(full_text)
-            print(f"[INFO] {filename} -> {len(chunks)} chunks")
-
-            for i, chunk in enumerate(chunks):
-                chunk_id = f"{filename}_chunk_{i}"
-
-                existing = collection.get(ids=[chunk_id])
-                if existing and existing.get("ids"):
-                    continue
-
-                chunk_embedding = embedder.encode(
-                    chunk,
-                    normalize_embeddings=True,
-                ).tolist()
-
-                collection.add(
-                    embeddings=[chunk_embedding],
-                    documents=[chunk],
-                    metadatas=[
-                        {
-                            "source_file": filename,
-                            "subject": meta["subject"],
-                            "strand": ",".join(meta["strand"]),
-                            "quarter": ",".join(meta["quarters"]),
-                            "chunk_index": i,
-                            "type": meta["type"],
-                        }
-                    ],
-                    ids=[chunk_id],
-                )
-
-            if db:
-                doc_ref.update({"indexed": True})
-
-            print(f"[OK] Indexed {filename}")
-            indexed += 1
-        except Exception as e:
-            print(f"[ERROR] {filename}: {e}")
-
-    print("-" * 50)
-    print(f"Indexing complete: {indexed} indexed, {skipped} skipped")
-    print(f"Total chunks in ChromaDB: {collection.count()}")
-
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(description="Upload and index DepEd curriculum PDFs")
-    parser.add_argument("action", choices=["upload", "index", "both"], help="Action to perform")
-    args = parser.parse_args()
-
-    if args.action in ("upload", "both"):
-        upload_pdfs()
-
-    if args.action in ("index", "both"):
-        index_pdfs()
\ No newline at end of file
diff --git a/scripts/upload_lesson_modules.py b/scripts/upload_lesson_modules.py
deleted file mode 100644
index 4b5494deeeacd515ee9c504f443b9e268e04f8ef..0000000000000000000000000000000000000000
--- a/scripts/upload_lesson_modules.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-Merge DepEd lesson module PDFs and upload to Firebase Storage.
-Run: python backend/scripts/upload_lesson_modules.py
-"""
-
-from __future__ import annotations
-
-import os
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-
-from pypdf import PdfWriter, PdfReader
-
-LOCAL_MODULES_DIR = Path(__file__).resolve().parents[1].parent / "datasets" / "lesson_modules"
-FIREBASE_STORAGE_BUCKET = "mathpulse-ai-2026.firebasestorage.app"
-
-# Upload plan
-UPLOAD_JOBS = [
-    {
-        "id": "basic-calc-q3",
-        "display_name": "Basic Calculus Q3",
-        "subject": "Basic Calculus",
-        "subjectId": "basic-calc",
-        "quarter": 3,
-        "storage_path": "curriculum/basic_calc/SDO_Navotas_BasicCalc_SHS_Q3.FV.pdf",
-        "local_dir": LOCAL_MODULES_DIR / "basic_calculus_q3",
-        "filename": "Basic Calculus-Q3-Module-{n}.pdf",
-        "modules": list(range(1, 9)),  # Modules 1-8
-    },
-    {
-        "id": "gen-math-q2",
-        "display_name": "General Mathematics Q2",
-        "subject": "General Mathematics",
-        "subjectId": "gen-math",
-        "quarter": 2,
-        "storage_path": "curriculum/gen_math_q2/SDO_Navotas_GenMath_SHS_Q2.FV.pdf",
-        "local_dir": LOCAL_MODULES_DIR / "genmath_q2",
-        "filename": "genmath_q2_mod{n}_*.pdf",
-        "modules": [2, 3],  # Modules 2 and 3 only
-    },
-]
-
-
-def merge_pdfs(job: dict) -> Path | None:
-    """Merge multiple PDFs into a single output file. Returns output path."""
-    output_dir = LOCAL_MODULES_DIR / "merged"
-    output_dir.mkdir(parents=True, exist_ok=True)
-    output_path = output_dir / f"{job['id']}_merged.pdf"
-
-    writer = PdfWriter()
-
-    for mod_num in job["modules"]:
-        if job["id"] == "basic-calc-q3":
-            fname = job["filename"].format(n=mod_num)
-        else:
-            # GenMath modules have specific naming
-            fname = None
-            pattern = job["filename"].format(n=mod_num)
-            for f in job["local_dir"].glob(pattern):
-                fname = f.name
-                break
-            if fname is None:
-                print(f"  [WARN] Could not find file for module {mod_num}")
-                continue
-
-        src_path = job["local_dir"] / fname
-        if not src_path.exists():
-            print(f"  [WARN] File not found: {src_path}")
-            continue
-
-        reader = PdfReader(str(src_path))
-        print(f"  + {src_path.name} ({len(reader.pages)} pages)")
-        for page in reader.pages:
-            writer.add_page(page)
-
-    print(f"  Writing {output_path.name} ({len(writer.pages)} total pages)")
-    with open(output_path, "wb") as f:
-        writer.write(f)
-
-    return output_path
-
-
-def upload_to_firebase(local_path: Path, storage_path: str) -> bool:
-    """Upload a PDF file to Firebase Storage."""
-    try:
-        import firebase_admin
-        from firebase_admin import credentials, storage
-    except ImportError:
-        print("  ERROR: firebase-admin not installed")
-        return False
-
-    sa_file = Path(__file__).resolve().parents[1].parent / ".secrets" / "firebase-service-account.json"
-    if not sa_file.exists():
-        print(f"  ERROR: Service account not found at {sa_file}")
-        return False
-
-    if not firebase_admin._apps:
-        cred = credentials.Certificate(str(sa_file))
-        firebase_admin.initialize_app(cred, {"storageBucket": FIREBASE_STORAGE_BUCKET})
-
-    bucket = storage.bucket()
-    blob = bucket.blob(storage_path)
-
-    print(f"  Uploading to gs://{bucket.name}/{storage_path}")
-    blob.upload_from_filename(str(local_path), content_type="application/pdf")
-    print(f"  Upload complete!")
-    return True
-
-
-def main():
-    print("=" * 60)
-    print("MathPulse AI — Lesson Module PDF Uploader")
-    print("=" * 60)
-
-    for job in UPLOAD_JOBS:
-        print(f"\n[{job['display_name']}]")
-        print("-" * 40)
-
-        # Step 1: Merge PDFs
-        output_path = merge_pdfs(job)
-        if not output_path or not output_path.exists():
-            print(f"  [FAIL] Merge failed for {job['id']}")
-            continue
-
-        # Step 2: Upload to Firebase
-        success = upload_to_firebase(output_path, job["storage_path"])
-        if not success:
-            print(f"  [FAIL] Upload failed for {job['id']}")
-            continue
-
-        print(f"\n  SUCCESS: {job['display_name']}")
-        print(f"  Storage path: gs://{FIREBASE_STORAGE_BUCKET}/{job['storage_path']}")
-        print(f"  Pages: {len(PdfReader(str(output_path)).pages)}")
-
-    print("\n" + "=" * 60)
-    print("Done!")
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/scripts/upload_vectorstore_to_firebase.py b/scripts/upload_vectorstore_to_firebase.py
deleted file mode 100644
index a2c99695f73013bee5ed7bf684ee1a900cb9036b..0000000000000000000000000000000000000000
--- a/scripts/upload_vectorstore_to_firebase.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-Upload vectorstore directory to Firebase Storage.
-Run: python -m backend.scripts.upload_vectorstore_to_firebase
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-logger = logging.getLogger("mathpulse.upload_vectorstore")
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-
-from backend.rag.firebase_storage_loader import _init_firebase_storage
-
-VECTORSTORE_SOURCE_DIR = Path(__file__).resolve().parents[3] / "datasets" / "vectorstore"
-REMOTE_PREFIX = "vectorstore/"
-
-
-def upload_directory(local_dir: Path, bucket, prefix: str):
-    """Recursively upload a local directory to Firebase Storage prefix."""
-    uploaded = 0
-    skipped = 0
-
-    for root, dirs, files in os.walk(local_dir):
-        for filename in files:
-            local_path = Path(root) / filename
-            relative_path = local_path.relative_to(local_dir)
-            remote_path = f"{prefix}{relative_path.as_posix()}"
-
-            try:
-                blob = bucket.blob(remote_path)
-                blob.upload_from_filename(str(local_path))
-                logger.info("Uploaded: %s (%d bytes)", remote_path, local_path.stat().st_size)
-                uploaded += 1
-            except Exception as e:
-                logger.error("Failed to upload %s: %s", remote_path, e)
-                skipped += 1
-
-    return uploaded, skipped
-
-
-if __name__ == "__main__":
-    import argparse
-
-    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
-
-    parser = argparse.ArgumentParser(description="Upload vectorstore to Firebase Storage")
-    parser.add_argument("--source", type=str, default=str(VECTORSTORE_SOURCE_DIR),
-                        help="Local vectorstore directory")
-    parser.add_argument("--prefix", type=str, default=REMOTE_PREFIX,
-                        help="Remote path prefix in Firebase Storage")
-    args = parser.parse_args()
-
-    source_dir = Path(args.source)
-    if not source_dir.exists():
-        logger.error("Source directory does not exist: %s", source_dir)
-        sys.exit(1)
-
-    _, bucket = _init_firebase_storage()
-    if bucket is None:
-        logger.error("Firebase Storage not available")
-        sys.exit(1)
-
-    logger.info("Uploading vectorstore from %s to gs://%s/%s",
-                source_dir, bucket.name, args.prefix)
-    uploaded, skipped = upload_directory(source_dir, bucket, args.prefix)
-    logger.info("Upload complete: %d uploaded, %d skipped", uploaded, skipped)
\ No newline at end of file
diff --git a/services/__init__.py b/services/__init__.py
index 21b2994b3d58a6ea6eaf982254a7f87b36214df6..12e0a02d5e5b0c1b238b339adad61296430709d2 100644
--- a/services/__init__.py
+++ b/services/__init__.py
@@ -1,44 +1 @@
 """Backend service helpers for inference, logging, and integrations."""
-
-from .inference_client import (
-    create_default_client,
-    InferenceRequest,
-    InferenceClient,
-    is_sequential_model,
-    get_current_runtime_config,
-    get_model_for_task,
-    set_runtime_model_profile,
-    set_runtime_model_override,
-    reset_runtime_overrides,
-    model_supports_thinking,
-    _MODEL_PROFILES,
-)
-
-from .ai_client import (
-    get_deepseek_client,
-    CHAT_MODEL,
-    REASONER_MODEL,
-    APIError,
-    RateLimitError,
-    APITimeoutError,
-)
-
-__all__ = [
-    "create_default_client",
-    "InferenceRequest",
-    "InferenceClient",
-    "is_sequential_model",
-    "get_current_runtime_config",
-    "get_model_for_task",
-    "set_runtime_model_profile",
-    "set_runtime_model_override",
-    "reset_runtime_overrides",
-    "model_supports_thinking",
-    "_MODEL_PROFILES",
-    "get_deepseek_client",
-    "CHAT_MODEL",
-    "REASONER_MODEL",
-    "APIError",
-    "RateLimitError",
-    "APITimeoutError",
-]
diff --git a/services/ai_client.py b/services/ai_client.py
deleted file mode 100644
index fef823c9cffed27dbe233353bbf4acd6ce3586cf..0000000000000000000000000000000000000000
--- a/services/ai_client.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import os
-from openai import OpenAI, APIError, RateLimitError, APITimeoutError
-from functools import lru_cache
-
-__all__ = [
-    "get_deepseek_client",
-    "CHAT_MODEL",
-    "REASONER_MODEL",
-    "DEEPSEEK_BASE_URL",
-    "APIError",
-    "RateLimitError",
-    "APITimeoutError",
-]
-
-DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
-CHAT_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
-REASONER_MODEL = os.getenv("DEEPSEEK_REASONER_MODEL", "deepseek-reasoner")
-
-
-@lru_cache(maxsize=1)
-def get_deepseek_client() -> OpenAI:
-    api_key = os.getenv("DEEPSEEK_API_KEY")
-    if not api_key:
-        raise ValueError("DEEPSEEK_API_KEY environment variable not set")
-    return OpenAI(
-        api_key=api_key,
-        base_url=DEEPSEEK_BASE_URL,
-    )
diff --git a/services/curriculum_service.py b/services/curriculum_service.py
deleted file mode 100644
index adf706c5a1eedcc31b4a32f65df3d02dcc494d57..0000000000000000000000000000000000000000
--- a/services/curriculum_service.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""
-Curriculum Service - Firestore-backed curriculum data.
-
-Fetches subjects, topics, and modules from Firestore.
-Falls back to static data if Firestore is unavailable.
-"""
-
-import logging
-import os
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-# Static curriculum data as fallback
-_STATIC_SUBJECTS = [
-    {
-        "id": "gen-math",
-        "code": "GEN MATH",
-        "name": "General Mathematics",
-        "gradeLevel": "Grade 11",
-        "semester": "1st Semester",
-        "color": "from-blue-500 to-cyan-500",
-        "pdfAvailable": True,
-        "topics": [
-            {"id": "gen-math-001", "name": "Patterns and Real-Life Relationships", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-002", "name": "Functions as Mathematical Models", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-003", "name": "Function Notation and Evaluation", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-004", "name": "Domain and Range of Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-005", "name": "Operations on Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-006", "name": "Composite Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-007", "name": "Inverse Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-008", "name": "Graphs of Rational Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-009", "name": "Graphs of Exponential Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-010", "name": "Graphs of Logarithmic Functions", "unit": "Patterns, Relations, and Functions"},
-            {"id": "gen-math-011", "name": "Simple and Compound Interest", "unit": "Financial Mathematics"},
-            {"id": "gen-math-012", "name": "Simple and General Annuities", "unit": "Financial Mathematics"},
-            {"id": "gen-math-013", "name": "Present and Future Value", "unit": "Financial Mathematics"},
-            {"id": "gen-math-014", "name": "Loans, Amortization, and Sinking Funds", "unit": "Financial Mathematics"},
-            {"id": "gen-math-015", "name": "Stocks, Bonds, and Market Indices", "unit": "Financial Mathematics"},
-            {"id": "gen-math-016", "name": "Business Decision-Making with Mathematical Models", "unit": "Financial Mathematics"},
-            {"id": "gen-math-017", "name": "Propositions and Logical Connectives", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-018", "name": "Truth Values and Truth Tables", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-019", "name": "Logical Equivalence and Implication", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-020", "name": "Quantifiers and Negation", "unit": "Logic and Mathematical Reasoning"},
-            {"id": "gen-math-021", "name": "Validity of Arguments", "unit": "Logic and Mathematical Reasoning"},
-        ]
-    },
-    {
-        "id": "stats-prob",
-        "code": "STAT&PROB",
-        "name": "Statistics and Probability",
-        "gradeLevel": "Grade 11",
-        "semester": "2nd Semester",
-        "color": "from-sky-500 to-cyan-500",
-        "pdfAvailable": True,
-        "topics": [
-            {"id": "stat-001", "name": "Random Variables", "unit": "Random Variables"},
-            {"id": "stat-002", "name": "Discrete Probability Distributions", "unit": "Random Variables"},
-            {"id": "stat-003", "name": "Mean and Variance of Discrete RV", "unit": "Random Variables"},
-            {"id": "stat-004", "name": "Normal Distribution", "unit": "Normal Distribution"},
-            {"id": "stat-005", "name": "Standard Normal Distribution and Z-scores", "unit": "Normal Distribution"},
-            {"id": "stat-006", "name": "Areas Under the Normal Curve", "unit": "Normal Distribution"},
-            {"id": "stat-007", "name": "Sampling Distributions", "unit": "Sampling and Estimation"},
-            {"id": "stat-008", "name": "Central Limit Theorem", "unit": "Sampling and Estimation"},
-            {"id": "stat-009", "name": "Point Estimation", "unit": "Sampling and Estimation"},
-            {"id": "stat-010", "name": "Confidence Intervals", "unit": "Sampling and Estimation"},
-            {"id": "stat-011", "name": "Hypothesis Testing Concepts", "unit": "Hypothesis Testing"},
-            {"id": "stat-012", "name": "T-test", "unit": "Hypothesis Testing"},
-            {"id": "stat-013", "name": "Z-test", "unit": "Hypothesis Testing"},
-            {"id": "stat-014", "name": "Correlation and Regression", "unit": "Correlation and Regression"},
-        ]
-    },
-    {
-        "id": "pre-calc",
-        "code": "PRE-CALC",
-        "name": "Pre-Calculus",
-        "gradeLevel": "Grade 12",
-        "semester": "1st Semester",
-        "color": "from-orange-500 to-red-500",
-        "pdfAvailable": False,
-        "topics": [
-            {"id": "pre-calc-001", "name": "Conic Sections - Parabola", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-002", "name": "Conic Sections - Ellipse", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-003", "name": "Conic Sections - Hyperbola", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-004", "name": "Conic Sections - Circle", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-005", "name": "Systems of Nonlinear Equations", "unit": "Analytic Geometry"},
-            {"id": "pre-calc-006", "name": "Sequences and Series", "unit": "Series and Induction"},
-            {"id": "pre-calc-007", "name": "Arithmetic Sequences", "unit": "Series and Induction"},
-            {"id": "pre-calc-008", "name": "Geometric Sequences", "unit": "Series and Induction"},
-            {"id": "pre-calc-009", "name": "Mathematical Induction", "unit": "Series and Induction"},
-            {"id": "pre-calc-010", "name": "Binomial Theorem", "unit": "Series and Induction"},
-            {"id": "pre-calc-011", "name": "Angles and Unit Circle", "unit": "Trigonometry"},
-            {"id": "pre-calc-012", "name": "Trigonometric Functions", "unit": "Trigonometry"},
-            {"id": "pre-calc-013", "name": "Trigonometric Identities", "unit": "Trigonometry"},
-            {"id": "pre-calc-014", "name": "Sum and Difference Formulas", "unit": "Trigonometry"},
-            {"id": "pre-calc-015", "name": "Inverse Trigonometric Functions", "unit": "Trigonometry"},
-            {"id": "pre-calc-016", "name": "Polar Coordinates", "unit": "Trigonometry"},
-        ]
-    },
-    {
-        "id": "basic-calc",
-        "code": "BASIC CALC",
-        "name": "Basic Calculus",
-        "gradeLevel": "Grade 12",
-        "semester": "2nd Semester",
-        "color": "from-green-500 to-teal-500",
-        "pdfAvailable": True,
-        "topics": [
-            {"id": "calc-001", "name": "Limits of Functions", "unit": "Limits"},
-            {"id": "calc-002", "name": "Limit Theorems", "unit": "Limits"},
-            {"id": "calc-003", "name": "One-Sided Limits", "unit": "Limits"},
-            {"id": "calc-004", "name": "Infinite Limits and Limits at Infinity", "unit": "Limits"},
-            {"id": "calc-005", "name": "Continuity of Functions", "unit": "Limits"},
-            {"id": "calc-006", "name": "Definition of the Derivative", "unit": "Derivatives"},
-            {"id": "calc-007", "name": "Differentiation Rules", "unit": "Derivatives"},
-            {"id": "calc-008", "name": "Chain Rule", "unit": "Derivatives"},
-            {"id": "calc-009", "name": "Implicit Differentiation", "unit": "Derivatives"},
-            {"id": "calc-010", "name": "Higher-Order Derivatives", "unit": "Derivatives"},
-            {"id": "calc-011", "name": "Related Rates", "unit": "Derivatives"},
-            {"id": "calc-012", "name": "Extrema and the First Derivative Test", "unit": "Derivatives"},
-            {"id": "calc-013", "name": "Concavity and the Second Derivative Test", "unit": "Derivatives"},
-            {"id": "calc-014", "name": "Optimization Problems", "unit": "Derivatives"},
-            {"id": "calc-015", "name": "Antiderivatives and Indefinite Integrals", "unit": "Integration"},
-            {"id": "calc-016", "name": "Definite Integrals and the FTC", "unit": "Integration"},
-            {"id": "calc-017", "name": "Integration by Substitution", "unit": "Integration"},
-            {"id": "calc-018", "name": "Area Under a Curve", "unit": "Integration"},
-        ]
-    },
-]
-
-_firestore_db = None
-
-
-def _get_firestore_db():
-    """Initialize Firestore client."""
-    global _firestore_db
-    if _firestore_db is not None:
-        return _firestore_db
-
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            # Try service account from env or default credentials
-            import json
-            svc_account = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
-            if svc_account:
-                sa_creds = json.loads(svc_account)
-                firebase_admin.initialize_app(firebase_admin.Certificate(sa_creds))
-            else:
-                firebase_admin.initialize_app()
-        _firestore_db = firestore.client()
-        return _firestore_db
-    except Exception as e:
-        logger.warning(f"Could not initialize Firestore: {e}")
-        return None
-
-
-def get_subjects(grade_level: Optional[str] = None) -> List[Dict[str, Any]]:
-    """
-    Fetch all subjects from Firestore.
-    Falls back to static data if Firestore unavailable.
-    Defaults to Grade 11 (SHS) if no grade specified.
-    """
-    # Default to Grade 11 (SHS) - only serve Grade 11 students for now
-    if grade_level is None:
-        grade_level = "Grade 11"
-    
-    db = _get_firestore_db()
-    
-    if db is not None:
-        try:
-            subjects_ref = db.collection("subjects")
-            if grade_level:
-                subjects_ref = subjects_ref.where("gradeLevel", "==", grade_level)
-            
-            docs = subjects_ref.stream()
-            subjects = []
-            for doc in docs:
-                data = doc.to_dict()
-                if data:
-                    data["id"] = doc.id
-                    subjects.append(data)
-            
-            if subjects:
-                logger.info(f"Loaded {len(subjects)} subjects from Firestore")
-                return subjects
-        except Exception as e:
-            logger.warning(f"Firestore fetch failed, using static data: {e}")
-    
-    # Static fallback
-    if grade_level:
-        return [s for s in _STATIC_SUBJECTS if s.get("gradeLevel") == grade_level]
-    return list(_STATIC_SUBJECTS)
-
-
-def get_subject(subject_id: str) -> Optional[Dict[str, Any]]:
-    """Fetch a single subject by ID."""
-    db = _get_firestore_db()
-    
-    if db is not None:
-        try:
-            doc = db.collection("subjects").document(subject_id).get()
-            if doc.exists:
-                data = doc.to_dict()
-                data["id"] = doc.id
-                return data
-        except Exception as e:
-            logger.warning(f"Firestore fetch failed for {subject_id}: {e}")
-    
-    # Static fallback
-    for subject in _STATIC_SUBJECTS:
-        if subject["id"] == subject_id:
-            return dict(subject)
-    return None
-
-
-def get_topics(subject_id: str) -> List[Dict[str, Any]]:
-    """Fetch all topics for a subject."""
-    subject = get_subject(subject_id)
-    if subject:
-        return subject.get("topics", [])
-    return []
-
-
-def get_topic(subject_id: str, topic_id: str) -> Optional[Dict[str, Any]]:
-    """Fetch a single topic."""
-    topics = get_topics(subject_id)
-    for topic in topics:
-        if topic["id"] == topic_id:
-            return topic
-    return None
\ No newline at end of file
diff --git a/services/inference_client.py b/services/inference_client.py
index f1cebff9f73dda0341c8d4dcf5af63af37bab3e2..8a464123fb2e7051cefed2cfe0a0ee6db796d544 100644
--- a/services/inference_client.py
+++ b/services/inference_client.py
@@ -10,198 +10,13 @@ from typing import Any, Dict, List, Optional, Tuple
 
 import requests
 import yaml
-from openai import OpenAI, APIError, RateLimitError, APITimeoutError
+from huggingface_hub import InferenceClient as HFInferenceClient
 
-from .ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL, DEEPSEEK_BASE_URL
 from .logging_utils import configure_structured_logging, log_model_call
 
 LOGGER = configure_structured_logging("mathpulse.inference")
 TEMP_CHAT_MODEL_OVERRIDE_ENV = "INFERENCE_CHAT_MODEL_TEMP_OVERRIDE"
 
-# ── Model Profiles ────────────────────────────────────────────────────────────
-# A profile sets multiple env defaults in one shot.
-# Individual env vars (DEEPSEEK_MODEL, DEEPSEEK_REASONER_MODEL, etc.) still override.
-# Usage: MODEL_PROFILE=dev  or  MODEL_PROFILE=prod  or  MODEL_PROFILE=budget
-# Profiles can also be applied at runtime via the admin panel without restart.
-
-_MODEL_PROFILES: dict[str, dict[str, str]] = {
-    "dev": {
-        "INFERENCE_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
-        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
-        "HF_RAG_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
-    },
-    "prod": {
-        "INFERENCE_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
-        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
-        "HF_RAG_MODEL_ID": REASONER_MODEL,
-        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
-    },
-    "budget": {
-        "INFERENCE_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
-        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
-        "HF_RAG_MODEL_ID": CHAT_MODEL,
-        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
-    },
-}
-
-# ── Runtime Override Store ────────────────────────────────────────────────────
-# Mutated at runtime by the admin panel via /api/admin/model-config.
-# Priority: above env vars, below INFERENCE_ENFORCE_LOCK_MODEL.
-# Persisted to Firestore so backend cold-restarts restore the last admin-set config.
-
-_RUNTIME_OVERRIDES: dict[str, str] = {}
-_RUNTIME_PROFILE: str = ""
-
-_FS_COLLECTION = "system_config"
-_FS_DOC = "active_model_config"
-
-
-def _save_runtime_config_to_firestore() -> None:
-    try:
-        from firebase_admin import firestore as fs
-
-        db = fs.client()
-        db.collection(_FS_COLLECTION).document(_FS_DOC).set(
-            {
-                "profile": _RUNTIME_PROFILE,
-                "overrides": _RUNTIME_OVERRIDES,
-                "updatedAt": fs.SERVER_TIMESTAMP,
-            }
-        )
-    except Exception as e:
-        LOGGER.warning("Could not persist model config to Firestore: %s", e)
-
-
-def _load_runtime_config_from_firestore() -> None:
-    try:
-        from firebase_admin import firestore as fs
-
-        db = fs.client()
-        doc = db.collection(_FS_COLLECTION).document(_FS_DOC).get()
-        if not doc.exists:
-            return
-        data = doc.to_dict() or {}
-        profile = str(data.get("profile", "")).strip().lower()
-        overrides = data.get("overrides", {})
-        if profile and profile in _MODEL_PROFILES:
-            global _RUNTIME_PROFILE
-            _RUNTIME_PROFILE = profile
-            _RUNTIME_OVERRIDES.clear()
-            _RUNTIME_OVERRIDES.update(_MODEL_PROFILES[profile])
-        if isinstance(overrides, dict):
-            for key, value in overrides.items():
-                _RUNTIME_OVERRIDES[str(key)] = str(value)
-        LOGGER.info("Restored runtime model config from Firestore: profile=%s", profile)
-    except ImportError:
-        LOGGER.debug("Firebase not available (optional for DeepSeek-only)")
-    except Exception as e:
-        LOGGER.warning("Could not restore model config from Firestore: %s", e)
-
-
-def _apply_model_profile() -> None:
-    profile_name = os.getenv("MODEL_PROFILE", "").strip().lower()
-    if not profile_name:
-        return
-    profile = _MODEL_PROFILES.get(profile_name)
-    if profile is None:
-        LOGGER.warning("MODEL_PROFILE='%s' is not a known profile.", profile_name)
-        return
-    for key, value in profile.items():
-        if not os.environ.get(key):
-            os.environ[key] = value
-    LOGGER.info("Startup model profile applied: %s", profile_name)
-
-
-_apply_model_profile()
-_load_runtime_config_from_firestore()
-
-
-def set_runtime_model_profile(profile_name: str) -> None:
-    """Apply a named profile at runtime without restarting the process."""
-    global _RUNTIME_PROFILE, _RUNTIME_OVERRIDES
-    normalized = profile_name.strip().lower()
-    profile = _MODEL_PROFILES.get(normalized)
-    if not profile:
-        raise ValueError(
-            f"Unknown profile: '{profile_name}'. Valid values: {list(_MODEL_PROFILES.keys())}"
-        )
-    _RUNTIME_PROFILE = normalized
-    _RUNTIME_OVERRIDES.clear()
-    _RUNTIME_OVERRIDES.update(profile)
-    LOGGER.info("Runtime model profile switched to: %s", profile_name)
-    _save_runtime_config_to_firestore()
-
-
-def set_runtime_model_override(key: str, value: str) -> None:
-    """Set a single model env key at runtime."""
-    _RUNTIME_OVERRIDES[key] = value
-    LOGGER.info("Runtime model override set: %s = %s", key, value)
-    _save_runtime_config_to_firestore()
-
-
-def reset_runtime_overrides() -> None:
-    """Clear all runtime overrides."""
-    global _RUNTIME_PROFILE
-    _RUNTIME_OVERRIDES.clear()
-    _RUNTIME_PROFILE = ""
-    LOGGER.info("Runtime model overrides cleared.")
-    _save_runtime_config_to_firestore()
-
-
-def get_current_runtime_config() -> dict:
-    resolved: dict[str, str] = {}
-    for key in {
-        "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-        "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-    }:
-        resolved[key] = _resolve_key(key)
-    return {
-        "profile": _RUNTIME_PROFILE,
-        "overrides": dict(_RUNTIME_OVERRIDES),
-        "resolved": resolved,
-    }
-
-
-def _resolve_key(key: str) -> str:
-    if value := _RUNTIME_OVERRIDES.get(key):
-        return value
-    if _RUNTIME_PROFILE and _RUNTIME_PROFILE in _MODEL_PROFILES:
-        if value := _MODEL_PROFILES[_RUNTIME_PROFILE].get(key):
-            return value
-    return os.getenv(key, "")
-
-
-def get_model_for_task(task_type: str) -> str:
-    task = (task_type or "default").strip().lower()
-    enforce_lock = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
-    if enforce_lock:
-        override = (
-            _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID")
-            or os.getenv("INFERENCE_LOCK_MODEL_ID")
-            or CHAT_MODEL
-        )
-        return override
-    task_key_map = {
-        "chat": "INFERENCE_CHAT_MODEL_ID",
-        "quiz_generation": "HF_QUIZ_MODEL_ID",
-        "rag_lesson": "HF_RAG_MODEL_ID",
-        "rag_problem": "HF_RAG_MODEL_ID",
-        "rag_analysis_context": "HF_RAG_MODEL_ID",
-    }
-    if env_key := task_key_map.get(task):
-        if resolved := _resolve_key(env_key):
-            return resolved
-    return _resolve_key("INFERENCE_MODEL_ID") or CHAT_MODEL
-
-
-def model_supports_thinking(model_id: str = "") -> bool:
-    mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
-    return mid == REASONER_MODEL
-
 
 def _normalize_local_space_url(raw_url: str) -> str:
     """Accept either hf.space host or huggingface.co/spaces URL for local_space provider."""
@@ -209,6 +24,8 @@ def _normalize_local_space_url(raw_url: str) -> str:
     if not cleaned:
         return "http://127.0.0.1:7860"
 
+    # Convert page URL format to runtime host format:
+    # https://huggingface.co/spaces/{owner}/{space} -> https://{owner}-{space}.hf.space
     match = re.match(r"^https?://huggingface\.co/spaces/([^/]+)/([^/]+)$", cleaned, re.IGNORECASE)
     if match:
         owner = match.group(1).strip().lower()
@@ -224,41 +41,38 @@ class InferenceRequest:
     model: Optional[str] = None
     task_type: str = "default"
     request_tag: str = ""
-    max_new_tokens: int = 900
+    max_new_tokens: int = 512
     temperature: float = 0.2
     top_p: float = 0.9
     repetition_penalty: float = 1.15
     timeout_sec: Optional[int] = None
-    enable_thinking: bool = False
 
 
 class InferenceClient:
-    def __init__(self, firestore_client: Optional[Any] = None) -> None:
-        self.firestore = firestore_client
-        self._last_persist_time = 0.0
-        self._persist_throttle_sec = 30.0
-
+    def __init__(self) -> None:
+        # Try multiple config paths (HF Space, Docker, local development)
+        # The deploy script uploads config/ to the space root
         config_paths = [
-            Path("./config/models.yaml"),
-            Path("/config/models.yaml"),
-            Path("/app/config/models.yaml"),
-            Path.cwd() / "config" / "models.yaml",
-            Path(__file__).resolve().parents[2] / "config" / "models.yaml",
+            Path("./config/models.yaml"),  # Current working directory (most reliable)
+            Path("/config/models.yaml"),  # HF Space root
+            Path("/app/config/models.yaml"),  # App directory
+            Path.cwd() / "config" / "models.yaml",  # CWD with config subdir
+            Path(__file__).resolve().parents[2] / "config" / "models.yaml",  # Package root
         ]
-
+        
         config: Dict[str, object] = {}
         config_path = None
-
+        
         for path in config_paths:
             if path.exists():
                 config_path = path
                 with path.open("r", encoding="utf-8") as fh:
                     config = yaml.safe_load(fh) or {}
-                LOGGER.info(f"??? Loaded config from {config_path}")
+                LOGGER.info(f"✅ Loaded config from {config_path}")
                 break
-
+        
         if not config_path:
-            LOGGER.warning(f"??????  Config file not found. Checked: {[str(p) for p in config_paths]}")
+            LOGGER.warning(f"⚠️  Config file not found. Checked: {[str(p) for p in config_paths]}")
             LOGGER.warning(f"    CWD: {Path.cwd()}")
             LOGGER.warning(f"    Using hardcoded defaults")
 
@@ -270,43 +84,69 @@ class InferenceClient:
                 if isinstance(primary_cfg, dict):
                     primary = primary_cfg
 
-        self.provider = "deepseek"
-        self.ds_api_key = os.getenv("DEEPSEEK_API_KEY", "")
-        self.ds_base_url = os.getenv("DEEPSEEK_BASE_URL", DEEPSEEK_BASE_URL)
-        self.ds_chat_model = os.getenv("DEEPSEEK_MODEL", CHAT_MODEL)
-        self.ds_reasoner_model = os.getenv("DEEPSEEK_REASONER_MODEL", REASONER_MODEL)
-
+        self.provider = os.getenv("INFERENCE_PROVIDER", "hf_inference").strip().lower()
+        self.pro_provider = os.getenv("INFERENCE_PRO_PROVIDER", "hf_inference").strip().lower()
+        self.gpu_provider = os.getenv("INFERENCE_GPU_PROVIDER", "hf_inference").strip().lower()
+        self.cpu_provider = os.getenv("INFERENCE_CPU_PROVIDER", "hf_inference").strip().lower()
+        self.enable_provider_fallback = os.getenv("INFERENCE_ENABLE_PROVIDER_FALLBACK", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.pro_enabled = os.getenv("INFERENCE_PRO_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
+        self.hf_token = os.getenv(
+            "HF_TOKEN",
+            os.getenv("HUGGING_FACE_API_TOKEN", os.getenv("HUGGINGFACE_API_TOKEN", "")),
+        )
+        self.hf_base_url = os.getenv("INFERENCE_HF_BASE_URL", "https://router.huggingface.co/hf-inference/models")
+        self.hf_chat_url = os.getenv("INFERENCE_HF_CHAT_URL", "https://router.huggingface.co/v1/chat/completions")
+        
+        # Featherless AI for Qwen math models (used as fallback when HF router fails)
+        self.featherless_api_key = os.getenv("FEATHERLESS_API_KEY", "")
+        self.featherless_chat_url = os.getenv("FEATHERLESS_CHAT_URL", "https://api.featherless.ai/openai/v1/chat/completions")
+        
         self.local_space_url = _normalize_local_space_url(
             os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
         )
         self.local_generate_path = os.getenv("INFERENCE_LOCAL_SPACE_GENERATE_PATH", "/gradio_api/call/generate")
+        self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
+        self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
 
-        self.enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
-        self.lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", CHAT_MODEL).strip() or CHAT_MODEL
+        self.enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen3-32B").strip() or "Qwen/Qwen3-32B"
 
-        default_model_fallback = str(primary.get("id") or CHAT_MODEL)
+        default_model_fallback = str(primary.get("id") or "Qwen/Qwen3-32B")
         env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
         self.default_model = env_model_id or default_model_fallback
-
+        
         default_max_tokens = str(primary.get("max_new_tokens") or 512)
         self.default_max_new_tokens = int(os.getenv("INFERENCE_MAX_NEW_TOKENS", default_max_tokens))
-
+        
         default_temp = str(primary.get("temperature") or 0.2)
         self.default_temperature = float(os.getenv("INFERENCE_TEMPERATURE", default_temp))
-
+        
         default_top_p = str(primary.get("top_p") or 0.9)
         self.default_top_p = float(os.getenv("INFERENCE_TOP_P", default_top_p))
-
+        
+        # Task-specific model overrides via environment variables
         self.chat_model_override = os.getenv("INFERENCE_CHAT_MODEL_ID", "").strip()
         self.chat_model_temp_override = os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
         self.chat_strict_model_only = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.chat_hard_model = os.getenv("INFERENCE_CHAT_HARD_MODEL_ID", "meta-llama/Meta-Llama-3-70B-Instruct").strip()
+        self.chat_hard_trigger_enabled = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
+        self.chat_hard_prompt_chars = max(256, int(os.getenv("INFERENCE_CHAT_HARD_PROMPT_CHARS", "800")))
+        self.chat_hard_history_chars = max(
+            self.chat_hard_prompt_chars,
+            int(os.getenv("INFERENCE_CHAT_HARD_HISTORY_CHARS", "1800")),
+        )
+        hard_keywords_raw = os.getenv(
+            "INFERENCE_CHAT_HARD_KEYWORDS",
+            "step-by-step,show all steps,derive,proof,prove,rigorous,multi-step,word problem",
+        )
+        self.chat_hard_keywords = [kw.strip().lower() for kw in hard_keywords_raw.split(",") if kw.strip()]
 
-        self.ds_timeout_sec = int(os.getenv("INFERENCE_HF_TIMEOUT_SEC", "90"))
+        self.hf_timeout_sec = int(os.getenv("INFERENCE_HF_TIMEOUT_SEC", "90"))
         self.local_timeout_sec = int(os.getenv("INFERENCE_LOCAL_SPACE_TIMEOUT_SEC", "90"))
         self.max_retries = int(os.getenv("INFERENCE_MAX_RETRIES", "3"))
         self.backoff_sec = float(os.getenv("INFERENCE_BACKOFF_SEC", "1.5"))
-        self.interactive_timeout_sec = int(os.getenv("INFERENCE_INTERACTIVE_TIMEOUT_SEC", str(self.ds_timeout_sec)))
-        self.background_timeout_sec = int(os.getenv("INFERENCE_BACKGROUND_TIMEOUT_SEC", str(self.ds_timeout_sec)))
+        self.interactive_timeout_sec = int(os.getenv("INFERENCE_INTERACTIVE_TIMEOUT_SEC", str(self.hf_timeout_sec)))
+        self.background_timeout_sec = int(os.getenv("INFERENCE_BACKGROUND_TIMEOUT_SEC", str(self.hf_timeout_sec)))
         self.interactive_max_retries = int(os.getenv("INFERENCE_INTERACTIVE_MAX_RETRIES", str(self.max_retries)))
         self.background_max_retries = int(os.getenv("INFERENCE_BACKGROUND_MAX_RETRIES", str(self.max_retries)))
         self.interactive_backoff_sec = float(os.getenv("INFERENCE_INTERACTIVE_BACKOFF_SEC", str(self.backoff_sec)))
@@ -327,6 +167,12 @@ class InferenceClient:
         )
         self.cpu_only_tasks = {v.strip().lower() for v in cpu_tasks_raw.split(",") if v.strip()}
 
+        pro_tasks_raw = os.getenv(
+            "INFERENCE_PRO_PRIORITY_TASKS",
+            "chat,quiz_generation,lesson_generation,learning_path,verify_solution",
+        )
+        self.pro_priority_tasks = {v.strip().lower() for v in pro_tasks_raw.split(",") if v.strip()}
+
         interactive_tasks_raw = os.getenv(
             "INFERENCE_INTERACTIVE_TASKS",
             "chat,verify_solution,daily_insight",
@@ -338,20 +184,29 @@ class InferenceClient:
         )
 
         # Default task-to-model routing.
+        # Keep all tasks pinned to Qwen3-32B when qwen-only lock is active.
         self.task_model_map: Dict[str, str] = {
-            "chat": CHAT_MODEL,
-            "verify_solution": CHAT_MODEL,
-            "lesson_generation": CHAT_MODEL,
-            "quiz_generation": CHAT_MODEL,
-            "learning_path": CHAT_MODEL,
-            "daily_insight": CHAT_MODEL,
-            "risk_classification": CHAT_MODEL,
-            "risk_narrative": CHAT_MODEL,
+            "chat": "Qwen/Qwen3-32B",
+            "verify_solution": "Qwen/Qwen3-32B",
+            "lesson_generation": "Qwen/Qwen3-32B",
+            "quiz_generation": "Qwen/Qwen3-32B",
+            "learning_path": "Qwen/Qwen3-32B",
+            "daily_insight": "Qwen/Qwen3-32B",
+            "risk_classification": "Qwen/Qwen3-32B",
+            "risk_narrative": "Qwen/Qwen3-32B",
         }
+        # Fallback chains (only to other HF-supported models, no featherless-ai)
         self.task_fallback_model_map: Dict[str, List[str]] = {
-            "chat": [CHAT_MODEL],
-            "verify_solution": [CHAT_MODEL],
+            "chat": [
+                "meta-llama/Llama-3.1-8B-Instruct",
+                "google/gemma-2-2b-it",
+            ],
+            "verify_solution": [
+                "meta-llama/Llama-3.1-8B-Instruct",
+                "google/gemma-2-2b-it",
+            ],
         }
+        # Model-to-provider mappings (not needed when using model:provider syntax directly)
         self.model_provider_map: Dict[str, str] = {}
         self.task_provider_map: Dict[str, str] = {}
         if isinstance(config, dict):
@@ -364,6 +219,7 @@ class InferenceClient:
                         for task, model in task_models.items()
                         if str(task).strip() and str(model).strip()
                     }
+                    # Merge config models with defaults (config overrides defaults)
                     self.task_model_map.update(config_task_models)
                 task_fallback_models = routing_cfg.get("task_fallback_model_map", {})
                 if isinstance(task_fallback_models, dict):
@@ -395,7 +251,7 @@ class InferenceClient:
             for task_key in list(self.task_model_map.keys()):
                 self.task_model_map[task_key] = env_model_id
             LOGGER.info(
-                f"???? INFERENCE_MODEL_ID env var override applied: {env_model_id}"
+                f"🔄 INFERENCE_MODEL_ID env var override applied: {env_model_id}"
             )
             LOGGER.info(
                 f"   Task model mappings changed from: {original_map}"
@@ -404,27 +260,29 @@ class InferenceClient:
         else:
             env_override_note = ""
 
-        if self.enforce_lock_model:
-            lock_map_before = dict(self.task_model_map)
-            self.default_model = self.lock_model_id
+        if self.enforce_qwen_only:
+            qwen_map_before = dict(self.task_model_map)
+            self.default_model = self.qwen_lock_model
             for task_key in list(self.task_model_map.keys()):
-                self.task_model_map[task_key] = self.lock_model_id
+                self.task_model_map[task_key] = self.qwen_lock_model
             self.fallback_models = []
             self.task_fallback_model_map = {
                 task_key: [] for task_key in self.task_model_map.keys()
             }
-            LOGGER.info(f"???? INFERENCE_ENFORCE_LOCK_MODEL enabled: locking all inference tasks to {self.lock_model_id}")
-            LOGGER.info(f"   Cleared fallback models")
-            LOGGER.info(f"   Task model mappings forced from: {lock_map_before}")
+            self.chat_hard_trigger_enabled = False
+            LOGGER.info(f"🔒 INFERENCE_ENFORCE_QWEN_ONLY enabled: locking all inference tasks to {self.qwen_lock_model}")
+            LOGGER.info(f"   Cleared fallback models and hard-escalation path")
+            LOGGER.info(f"   Task model mappings forced from: {qwen_map_before}")
 
+        # Log configuration loaded for debugging
         config_status = "from file" if config_path else "hardcoded defaults (no config file found)"
         effective_chat_model_for_logs = self.chat_model_override or self.task_model_map.get("chat", self.default_model)
-        LOGGER.info(f"??? InferenceClient initialized {config_status}{env_override_note}")
+        LOGGER.info(f"✅ InferenceClient initialized {config_status}{env_override_note}")
         LOGGER.info(f"   Default model: {self.default_model}")
         LOGGER.info(f"   Chat model: {effective_chat_model_for_logs}")
         LOGGER.info(f"   Chat temp override ({TEMP_CHAT_MODEL_OVERRIDE_ENV}): {self.chat_model_temp_override or 'disabled'}")
         LOGGER.info(f"   Chat strict model lock: {self.chat_strict_model_only}")
-        LOGGER.info(f"   Global model lock: {self.enforce_lock_model}")
+        LOGGER.info(f"   Global Qwen-only lock: {self.enforce_qwen_only}")
         LOGGER.info(f"   Verify solution model: {self.task_model_map.get('verify_solution', self.default_model)}")
         LOGGER.info(f"   Full task_model_map: {self.task_model_map}")
 
@@ -436,23 +294,18 @@ class InferenceClient:
             "requests_error": 0,
             "retries_total": 0,
             "fallback_attempts": 0,
-            "latency_sum_ms": 0.0,
-            "latency_count": 0,
             "route_counts": {},
             "task_counts": {},
             "provider_counts": {},
             "status_code_counts": {},
         }
 
-        self._load_persistent_metrics()
-
     def _bump_metric(self, key: str, inc: int = 1) -> None:
         with self._metrics_lock:
             current = self._metrics.get(key) or 0
             if not isinstance(current, int):
                 current = 0
             self._metrics[key] = current + inc
-        self._persist_metrics()
 
     def _bump_bucket(self, key: str, bucket: str, inc: int = 1) -> None:
         with self._metrics_lock:
@@ -464,50 +317,6 @@ class InferenceClient:
             if not isinstance(current, int):
                 current = 0
             mapping[bucket] = current + inc
-        self._persist_metrics()
-
-    def _record_completion(self, *, latency_ms: float) -> None:
-        with self._metrics_lock:
-            self._metrics["latency_sum_ms"] = (self._metrics.get("latency_sum_ms") or 0.0) + latency_ms
-            self._metrics["latency_count"] = (self._metrics.get("latency_count") or 0) + 1
-        self._persist_metrics()
-
-    def _load_persistent_metrics(self) -> None:
-        if not self.firestore:
-            return
-        try:
-            doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
-            doc = doc_ref.get()
-            if doc.exists:
-                data = doc.to_dict() or {}
-                with self._metrics_lock:
-                    for k, v in data.items():
-                        if k in self._metrics:
-                            if isinstance(v, (int, float)):
-                                self._metrics[k] = v
-                            elif isinstance(v, dict) and isinstance(self._metrics[k], dict):
-                                self._metrics[k].update(v)
-                LOGGER.info("??? Persistent inference metrics loaded from Firestore")
-        except Exception as e:
-            LOGGER.warning(f"?????? Failed to load persistent metrics: {e}")
-
-    def _persist_metrics(self, force: bool = False) -> None:
-        if not self.firestore:
-            return
-
-        now = time.time()
-        if not force and (now - self._last_persist_time < self._persist_throttle_sec):
-            return
-
-        try:
-            self._last_persist_time = now
-            doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
-            with self._metrics_lock:
-                snapshot = dict(self._metrics)
-
-            doc_ref.set(snapshot, merge=True)
-        except Exception as e:
-            LOGGER.warning(f"?????? Failed to persist metrics: {e}")
 
     def _record_attempt(self, *, task_type: str, provider: str, route: str, fallback_depth: int) -> None:
         self._bump_metric("requests_total", 1)
@@ -519,10 +328,6 @@ class InferenceClient:
 
     def snapshot_metrics(self) -> Dict[str, Any]:
         with self._metrics_lock:
-            l_sum = self._metrics.get("latency_sum_ms") or 0.0
-            l_count = self._metrics.get("latency_count") or 0
-            avg_latency = round(l_sum / l_count, 2) if l_count > 0 else 0.0
-
             snapshot = {
                 "uptime_sec": round(max(0.0, time.time() - self._metrics_started_at), 2),
                 "requests_total": self._metrics.get("requests_total") or 0,
@@ -530,9 +335,6 @@ class InferenceClient:
                 "requests_error": self._metrics.get("requests_error") or 0,
                 "retries_total": self._metrics.get("retries_total") or 0,
                 "fallback_attempts": self._metrics.get("fallback_attempts") or 0,
-                "avg_latency_ms": avg_latency,
-                "active_model": self.default_model,
-                "primary_provider": self.provider,
                 "route_counts": dict(self._metrics.get("route_counts") or {}),
                 "task_counts": dict(self._metrics.get("task_counts") or {}),
                 "provider_counts": dict(self._metrics.get("provider_counts") or {}),
@@ -544,18 +346,22 @@ class InferenceClient:
         effective_task = (req.task_type or "default").strip().lower()
         request_tag = req.request_tag.strip() or f"{effective_task}-{int(time.time() * 1000)}"
         selected_model, model_selection_source = self._resolve_primary_model(req)
-
+        
         model_chain = self._model_chain_for_task(effective_task, selected_model)
         last_error: Optional[Exception] = None
-
-        model_base = selected_model
-
+        provider_chain = self._provider_chain_for_task(req.task_type)
+        
+        # Normalize model name (remove any provider suffix since we use hf_inference router)
+        model_base = selected_model.split(":")[0] if ":" in selected_model else selected_model
+        
+        # Log model selection for debugging - confirm which model will actually be used
         LOGGER.info(
-            f"???? request_tag={request_tag} task={effective_task} source={model_selection_source} "
-            f"selected_model={model_base} (primary)"
+            f"🎯 request_tag={request_tag} task={effective_task} source={model_selection_source} "
+            f"selected_model={model_base} (primary) provider_chain={provider_chain}"
         )
         LOGGER.info(f"   fallback_chain={model_chain[1:] if len(model_chain) > 1 else 'none'}")
 
+
         for fallback_depth, model_name in enumerate(model_chain):
             request_for_model = InferenceRequest(
                 messages=req.messages,
@@ -568,19 +374,20 @@ class InferenceClient:
                 repetition_penalty=req.repetition_penalty,
                 timeout_sec=req.timeout_sec,
             )
-
-            try:
-                result = self._call_deepseek(request_for_model, fallback_depth)
-                if fallback_depth > 0:
-                    LOGGER.info(f"??? Fallback succeeded at depth={fallback_depth} model={model_name}")
-                return result
-            except Exception as exc:
-                last_error = exc
-                fallback_hint = f" (depth {fallback_depth})" if fallback_depth > 0 else ""
-                LOGGER.warning(
-                    f"??????  Attempt failed{fallback_hint}: task={request_for_model.task_type} "
-                    f"model={model_name} error={exc.__class__.__name__}: {str(exc)[:100]}"
-                )
+            
+            for provider in provider_chain:
+                try:
+                    result = self._generate_with_provider(request_for_model, provider, fallback_depth)
+                    if fallback_depth > 0:
+                        LOGGER.info(f"✅ Fallback succeeded at depth={fallback_depth} model={model_name} provider={provider}")
+                    return result
+                except Exception as exc:
+                    last_error = exc
+                    fallback_hint = f" (depth {fallback_depth})" if fallback_depth > 0 else ""
+                    LOGGER.warning(
+                        f"⚠️  Attempt failed{fallback_hint}: task={request_for_model.task_type} "
+                        f"provider={provider} model={model_name} error={exc.__class__.__name__}: {str(exc)[:100]}"
+                    )
 
         if last_error:
             raise last_error
@@ -593,6 +400,10 @@ class InferenceClient:
         effective_task = (req.task_type or "default").strip().lower()
         runtime_chat_override = self._runtime_chat_model_override()
 
+        def _base_model(model_name: str) -> str:
+            return (model_name or "").split(":", 1)[0].strip()
+
+        # Check explicit request model first, then chat override env, then task map/default.
         if effective_task == "chat" and runtime_chat_override:
             selected_model = runtime_chat_override
             model_selection_source = "chat_temp_override_env"
@@ -606,39 +417,107 @@ class InferenceClient:
             selected_model = self.task_model_map.get(effective_task, self.default_model)
             model_selection_source = "task_map"
 
-        if self.enforce_lock_model:
-            effective_lock_model_id = self.lock_model_id
+        if self.enforce_qwen_only:
+            effective_qwen_lock_model = self.qwen_lock_model
             if effective_task == "chat":
-                effective_lock_model_id = runtime_chat_override or self.chat_model_override or self.lock_model_id
+                effective_qwen_lock_model = runtime_chat_override or self.chat_model_override or self.qwen_lock_model
 
-            selected_base = (selected_model or "").split(":", 1)[0].strip()
-            lock_base = (effective_lock_model_id or "").split(":", 1)[0].strip()
+            selected_base = _base_model(selected_model)
+            lock_base = _base_model(effective_qwen_lock_model)
             if selected_base != lock_base:
                 LOGGER.warning(
-                    f"?????? Model lock replaced requested model {selected_model} with {effective_lock_model_id}"
+                    f"⚠️ Qwen-only lock replaced requested model {selected_model} with {effective_qwen_lock_model}"
                 )
-            selected_model = effective_lock_model_id
-            model_selection_source = f"{model_selection_source}:model_lock"
+            selected_model = effective_qwen_lock_model
+            model_selection_source = f"{model_selection_source}:qwen_only"
 
         if effective_task == "chat" and self.chat_strict_model_only:
             return selected_model, f"{model_selection_source}:chat_strict_model_only"
 
+        if effective_task == "chat" and self.chat_hard_trigger_enabled and self.chat_hard_model:
+            should_escalate, reason = self._should_escalate_chat_to_hard_model(req.messages)
+            if should_escalate and selected_model != self.chat_hard_model:
+                return self.chat_hard_model, f"chat_hard_escalation:{reason}"
+
         return selected_model, model_selection_source
 
+    def _should_escalate_chat_to_hard_model(self, messages: List[Dict[str, str]]) -> Tuple[bool, str]:
+        latest_user = self._latest_user_message(messages)
+        if not latest_user:
+            return False, "no_user_message"
+
+        latest_norm = latest_user.lower()
+        prompt_chars = len(latest_user)
+        history_chars = 0
+        for msg in messages:
+            content = (msg.get("content") or "") if isinstance(msg, dict) else ""
+            history_chars += len(content)
+
+        keyword_hit = ""
+        for kw in self.chat_hard_keywords:
+            if kw and kw in latest_norm:
+                keyword_hit = kw
+                break
+
+        math_marker_count = len(
+            re.findall(
+                r"(=|\bintegral\b|\bderivative\b|\bmatrix\b|\blimit\b|\bproof\b|\bderive\b|\bsolve\b)",
+                latest_norm,
+            )
+        )
+
+        long_prompt = prompt_chars >= self.chat_hard_prompt_chars
+        long_history = history_chars >= self.chat_hard_history_chars
+        immediate_hard_request = any(
+            phrase in latest_norm
+            for phrase in (
+                "show all steps",
+                "step-by-step",
+                "step by step",
+                "rigorous proof",
+                "formal proof",
+            )
+        )
+
+        # Escalate immediately for long step-by-step prompts or heavy math density.
+        escalate = bool(keyword_hit and immediate_hard_request)
+        if not escalate:
+            escalate = bool(keyword_hit and (long_prompt or long_history or math_marker_count >= 2))
+        if not escalate and long_prompt and math_marker_count >= 2:
+            escalate = True
+        if not escalate and long_history and math_marker_count >= 2:
+            escalate = True
+
+        if not escalate:
+            return False, "normal"
+
+        reasons: List[str] = []
+        if long_prompt:
+            reasons.append(f"prompt_chars={prompt_chars}")
+        if long_history:
+            reasons.append(f"history_chars={history_chars}")
+        if keyword_hit:
+            reasons.append(f"keyword={keyword_hit}")
+        if immediate_hard_request:
+            reasons.append("immediate_hard_request")
+        if math_marker_count >= 2:
+            reasons.append(f"math_markers={math_marker_count}")
+        return True, ",".join(reasons) if reasons else "hard_prompt"
+
     def _model_chain_for_task(self, task_type: str, selected_model: str) -> List[str]:
         normalized = (task_type or "default").strip().lower()
         runtime_chat_override = self._runtime_chat_model_override() if normalized == "chat" else ""
-        chat_lock_model_id = runtime_chat_override or (self.chat_model_override if normalized == "chat" else "")
+        chat_qwen_lock_model = runtime_chat_override or (self.chat_model_override if normalized == "chat" else "")
 
-        if self.enforce_lock_model:
+        if self.enforce_qwen_only:
             if normalized == "chat":
-                locked_model = (chat_lock_model_id or self.lock_model_id or "").strip()
+                locked_model = (chat_qwen_lock_model or self.qwen_lock_model or "").strip()
             else:
-                locked_model = (self.lock_model_id or "").strip()
+                locked_model = (self.qwen_lock_model or "").strip()
             return [locked_model] if locked_model else []
 
         if normalized == "chat" and self.chat_strict_model_only:
-            chat_model = (chat_lock_model_id or selected_model or "").strip()
+            chat_model = (chat_qwen_lock_model or selected_model or "").strip()
             return [chat_model] if chat_model else []
 
         per_task_candidates = self.task_fallback_model_map.get(task_type, [])
@@ -658,6 +537,34 @@ class InferenceClient:
             return deduped[:max_models]
         return deduped
 
+    def _provider_chain_for_task(self, task_type: str) -> List[str]:
+        normalized = (task_type or "default").strip().lower()
+        forced_provider = self.task_provider_map.get(normalized)
+        if forced_provider:
+            return [forced_provider]
+
+        if normalized in self.cpu_only_tasks:
+            return [self.cpu_provider]
+
+        if self.pro_enabled and normalized in self.pro_priority_tasks:
+            chain = [self.pro_provider]
+            if self.enable_provider_fallback and self.gpu_provider not in chain:
+                chain.append(self.gpu_provider)
+            if self.enable_provider_fallback and self.provider not in chain:
+                chain.append(self.provider)
+            return chain
+
+        if normalized in self.gpu_required_tasks:
+            chain = [self.gpu_provider]
+            if self.enable_provider_fallback and self.cpu_provider != self.gpu_provider:
+                chain.append(self.cpu_provider)
+            return chain
+
+        chain = [self.provider]
+        if self.enable_provider_fallback and self.cpu_provider not in chain:
+            chain.append(self.cpu_provider)
+        return chain
+
     def _retry_profile(self, task_type: str) -> Tuple[int, float]:
         normalized = (task_type or "default").strip().lower()
         if normalized in self.interactive_tasks:
@@ -674,6 +581,20 @@ class InferenceClient:
             return self.interactive_timeout_sec
         return self.background_timeout_sec
 
+    def _resolve_route_label(self, provider: str, task_type: str) -> str:
+        normalized = (task_type or "default").strip().lower()
+        if self.pro_enabled and normalized in self.pro_priority_tasks and provider == self.pro_provider:
+            return "pro-priority"
+        return "standard"
+
+    def _generate_with_provider(self, req: InferenceRequest, provider: str, fallback_depth: int) -> str:
+        route = self._resolve_route_label(provider, req.task_type)
+        if provider == "local_space":
+            return self._call_local_space(req, provider=provider, route=route, fallback_depth=fallback_depth)
+        
+        # All models use HF inference router directly (including Qwen/Qwen3-32B)
+        return self._call_hf_inference(req, provider=provider, route=route, fallback_depth=fallback_depth)
+
     def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
         parts: List[str] = []
         for msg in messages:
@@ -686,9 +607,9 @@ class InferenceClient:
                 prefix = "SYSTEM"
             elif role == "assistant":
                 prefix = "ASSISTANT"
-            parts.append(f"{prefix}:\n{content}")
+            parts.append(f"{prefix}:\\n{content}")
         parts.append("ASSISTANT:")
-        return "\n\n".join(parts)
+        return "\\n\\n".join(parts)
 
     def _latest_user_message(self, messages: List[Dict[str, str]]) -> str:
         for msg in reversed(messages):
@@ -698,223 +619,160 @@ class InferenceClient:
                 return content
         return self._messages_to_prompt(messages)
 
-    def _call_deepseek(self, req: InferenceRequest, fallback_depth: int) -> str:
-        """Call DeepSeek API with OpenAI-compatible chat completions."""
-        if not self.ds_api_key:
-            raise RuntimeError("DEEPSEEK_API_KEY is not set")
-
-        target_model = req.model or self.default_model
-        route = "deepseek"
-        task_type = req.task_type or "default"
-
-        LOGGER.debug(
-            f"???? Calling DeepSeek: task={task_type} model={target_model} "
-            f"route={route} depth={fallback_depth}"
+    def _post_with_retry(
+        self,
+        url: str,
+        *,
+        headers: Dict[str, str],
+        payload: Dict[str, object],
+        timeout: int,
+        provider: str,
+        model: str,
+        task_type: str,
+        request_tag: str,
+        fallback_depth: int,
+        route: str,
+    ) -> Tuple[requests.Response, float, int]:
+        self._record_attempt(
+            task_type=task_type,
+            provider=provider,
+            route=route,
+            fallback_depth=fallback_depth,
         )
-
-        timeout = self._timeout_for(req, "deepseek")
         max_retries, backoff_sec = self._retry_profile(task_type)
+        attempt = 0
 
-        client = get_deepseek_client()
-
-        # Build chat completions params
-        params: Dict[str, Any] = {
-            "model": target_model,
-            "messages": req.messages,
-            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
-        }
-
-        if target_model == REASONER_MODEL:
-            params["max_tokens"] = req.max_new_tokens or 1024
-        else:
-            params["temperature"] = req.temperature
-            params["top_p"] = req.top_p
-
-        # Use JSON mode for quiz generation
-        if task_type == "quiz_generation" and target_model != REASONER_MODEL:
-            params["response_format"] = {"type": "json_object"}
+        def _retry_sleep(retry_attempt: int) -> None:
+            # Small jitter reduces synchronized retry storms during transient provider issues.
+            jitter_factor = random.uniform(0.9, 1.2)
+            time.sleep(backoff_sec * retry_attempt * jitter_factor)
 
-        for attempt in range(max_retries):
-            self._record_attempt(
-                task_type=task_type,
-                provider="deepseek",
-                route=route,
-                fallback_depth=fallback_depth,
-            )
+        while True:
             start = time.perf_counter()
             try:
-                response = client.chat.completions.create(**params, timeout=timeout)
+                resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
+            except Exception as exc:
                 latency_ms = (time.perf_counter() - start) * 1000
-
-                content = response.choices[0].message.content or ""
-                reasoning = getattr(response.choices[0].message, "reasoning_content", None)
-
-                text = content.strip()
-                if reasoning:
-                    text = f"{reasoning}\n{text}"
-
                 log_model_call(
                     LOGGER,
-                    provider="deepseek",
-                    model=target_model,
-                    endpoint=self.ds_base_url,
+                    provider=provider,
+                    model=model,
+                    endpoint=url,
                     latency_ms=latency_ms,
                     input_tokens=None,
                     output_tokens=None,
-                    status="ok",
+                    status="error",
+                    error_class=exc.__class__.__name__,
+                    error_message=str(exc),
                     task_type=task_type,
-                    request_tag=req.request_tag,
+                    request_tag=request_tag,
                     retry_attempt=attempt + 1,
                     fallback_depth=fallback_depth,
                     route=route,
                 )
-                self._record_attempt(
-                    task_type=task_type,
-                    provider="deepseek",
-                    route=route,
-                    fallback_depth=fallback_depth,
-                )
-                self._record_completion(latency_ms=latency_ms)
-                self._bump_metric("requests_ok", 1)
-                return text
-
-            except RateLimitError:
-                latency_ms = (time.perf_counter() - start) * 1000
-                if attempt < max_retries - 1:
-                    log_model_call(
-                        LOGGER,
-                        provider="deepseek",
-                        model=target_model,
-                        endpoint=self.ds_base_url,
-                        latency_ms=latency_ms,
-                        input_tokens=None,
-                        output_tokens=None,
-                        status="error",
-                        error_class="RateLimitError",
-                        error_message="rate limited",
-                        task_type=task_type,
-                        request_tag=req.request_tag,
-                        retry_attempt=attempt + 1,
-                        fallback_depth=fallback_depth,
-                        route=route,
-                    )
-                    self._bump_metric("retries_total", 1)
-                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
-                    continue
-                self._bump_metric("requests_error", 1)
-                raise RuntimeError("DeepSeek API rate limit reached. Please try again shortly.")
-
-            except APITimeoutError:
-                latency_ms = (time.perf_counter() - start) * 1000
-                if attempt < max_retries - 1:
-                    log_model_call(
-                        LOGGER,
-                        provider="deepseek",
-                        model=target_model,
-                        endpoint=self.ds_base_url,
-                        latency_ms=latency_ms,
-                        input_tokens=None,
-                        output_tokens=None,
-                        status="error",
-                        error_class="APITimeoutError",
-                        error_message="timeout",
-                        task_type=task_type,
-                        request_tag=req.request_tag,
-                        retry_attempt=attempt + 1,
-                        fallback_depth=fallback_depth,
-                        route=route,
-                    )
-                    self._bump_metric("retries_total", 1)
-                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
-                    continue
-                self._bump_metric("requests_error", 1)
-                raise RuntimeError("DeepSeek API timed out. Please retry.")
-
-            except APIError as e:
-                latency_ms = (time.perf_counter() - start) * 1000
-                if attempt < max_retries - 1:
-                    log_model_call(
-                        LOGGER,
-                        provider="deepseek",
-                        model=target_model,
-                        endpoint=self.ds_base_url,
-                        latency_ms=latency_ms,
-                        input_tokens=None,
-                        output_tokens=None,
-                        status="error",
-                        error_class="APIError",
-                        error_message=str(e)[:200],
-                        task_type=task_type,
-                        request_tag=req.request_tag,
-                        retry_attempt=attempt + 1,
-                        fallback_depth=fallback_depth,
-                        route=route,
-                    )
-                    self._bump_metric("retries_total", 1)
-                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
-                    continue
-                self._bump_metric("requests_error", 1)
-                raise RuntimeError(f"DeepSeek API error: {str(e)}")
+                if attempt >= max_retries - 1:
+                    self._bump_metric("requests_error", 1)
+                    raise
+                attempt += 1
+                self._bump_metric("retries_total", 1)
+                _retry_sleep(attempt)
+                continue
 
-            except Exception as exc:
-                latency_ms = (time.perf_counter() - start) * 1000
-                self._bump_metric("requests_error", 1)
+            latency_ms = (time.perf_counter() - start) * 1000
+            if resp.status_code in {408, 429, 500, 502, 503, 504} and attempt < max_retries - 1:
                 log_model_call(
                     LOGGER,
-                    provider="deepseek",
-                    model=target_model,
-                    endpoint=self.ds_base_url,
+                    provider=provider,
+                    model=model,
+                    endpoint=url,
                     latency_ms=latency_ms,
                     input_tokens=None,
                     output_tokens=None,
                     status="error",
-                    error_class=exc.__class__.__name__,
-                    error_message=str(exc)[:200],
+                    error_class="HTTPRetry",
+                    error_message=f"status={resp.status_code}",
                     task_type=task_type,
-                    request_tag=req.request_tag,
+                    request_tag=request_tag,
                     retry_attempt=attempt + 1,
                     fallback_depth=fallback_depth,
                     route=route,
                 )
-                raise
+                attempt += 1
+                self._bump_metric("retries_total", 1)
+                _retry_sleep(attempt)
+                continue
+            return resp, latency_ms, attempt + 1
 
-        raise RuntimeError(f"DeepSeek call failed after {max_retries} attempts")
+    def _call_hf_inference_direct(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        """
+        Call Qwen models via Featherless AI provider.
+        Uses HF InferenceClient with provider="featherless-ai" for direct model access.
+        """
+        if not self.hf_token:
+            raise RuntimeError("HF_TOKEN is not set")
 
-    def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
         target_model = req.model or self.default_model
-        url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
-
-        prompt = self._messages_to_prompt(req.messages)
-        payload: Dict[str, object] = {
-            "data": [
-                prompt,
-                [],
-                req.temperature,
-                req.top_p,
-                req.max_new_tokens,
-            ]
-        }
-        headers = {"Content-Type": "application/json"}
-
+        target_model_base = target_model.split(":")[0] if ":" in target_model else target_model
+        
         timeout = self._timeout_for(req, provider)
-
-        self._record_attempt(
-            task_type=req.task_type,
-            provider=provider,
-            route=route,
-            fallback_depth=fallback_depth,
-        )
         start = time.perf_counter()
-
+        
         try:
-            resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
+            # Use HF InferenceClient with featherless-ai provider for Qwen models.
+            client = HFInferenceClient(
+                model=target_model_base,
+                token=self.hf_token,
+                provider="featherless-ai",
+                timeout=timeout
+            )
+            
+            response = client.chat_completion(
+                messages=req.messages,
+                max_tokens=req.max_new_tokens or self.default_max_new_tokens,
+                temperature=req.temperature or self.default_temperature,
+                top_p=req.top_p or self.default_top_p,
+            )
+            latency_ms = (time.perf_counter() - start) * 1000
+            
+            # Extract text from response
+            if hasattr(response, "choices") and response.choices:
+                content = response.choices[0].message.content or ""
+                text = content.strip()
+            else:
+                text = self._extract_text(response)
+            
+            log_model_call(
+                LOGGER,
+                provider="featherless-ai",
+                model=target_model_base,
+                endpoint="featherless-ai_inference",
+                latency_ms=latency_ms,
+                input_tokens=None,
+                output_tokens=None,
+                status="ok",
+                task_type=req.task_type,
+                request_tag=req.request_tag,
+                retry_attempt=1,
+                fallback_depth=fallback_depth,
+                route=route,
+            )
+            self._record_attempt(
+                task_type=req.task_type,
+                provider="featherless-ai",
+                route=route,
+                fallback_depth=fallback_depth,
+            )
+            self._bump_metric("requests_ok", 1)
+            return text
+            
         except Exception as exc:
             latency_ms = (time.perf_counter() - start) * 1000
+            self._bump_metric("requests_error", 1)
             log_model_call(
                 LOGGER,
-                provider=provider,
-                model=target_model,
-                endpoint=url,
+                provider="featherless-ai",
+                model=target_model_base,
+                endpoint="featherless-ai_inference",
                 latency_ms=latency_ms,
                 input_tokens=None,
                 output_tokens=None,
@@ -927,10 +785,182 @@ class InferenceClient:
                 fallback_depth=fallback_depth,
                 route=route,
             )
-            self._bump_metric("requests_error", 1)
+            LOGGER.warning(
+                "task=%s provider=featherless-ai model=%s fallback_depth=%s failed: %s",
+                req.task_type,
+                target_model_base,
+                fallback_depth,
+                exc,
+            )
             raise
 
-        latency_ms = (time.perf_counter() - start) * 1000
+    def _call_hf_inference(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        if not self.hf_token:
+            raise RuntimeError("HF_TOKEN is not set")
+
+        target_model = req.model or self.default_model
+        chat_model = target_model if ":" in target_model else f"{target_model}:fastest"
+        url = self.hf_chat_url
+
+        # Log which model is actually being used
+        model_base = target_model.split(":")[0] if ":" in target_model else target_model
+        LOGGER.debug(
+            f"📌 Calling HF inference: task={req.task_type} model={model_base} "
+            f"route={route} depth={fallback_depth}"
+        )
+
+        payload: Dict[str, object] = {
+            "model": chat_model,
+            "messages": req.messages,
+            "stream": False,
+            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
+            "temperature": req.temperature,
+            "top_p": req.top_p,
+        }
+        headers = {
+            "Authorization": f"Bearer {self.hf_token}",
+            "Content-Type": "application/json",
+            "X-MathPulse-Task": (req.task_type or "default").strip().lower(),
+        }
+        if route == "pro-priority" and self.pro_route_header_name.strip():
+            headers[self.pro_route_header_name.strip()] = self.pro_route_header_value
+
+        timeout = self._timeout_for(req, provider)
+
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_bucket("status_code_counts", str(resp.status_code), 1)
+        if resp.status_code != 200:
+            self._bump_metric("requests_error", 1)
+            raise RuntimeError(f"HF Inference error {resp.status_code}: {resp.text}")
+
+        data = resp.json()
+        text = self._extract_text(data)
+        
+        # Log successful inference with actual model and response time
+        LOGGER.info(
+            f"✅ HF inference success: task={req.task_type} model={model_base} "
+            f"latency={latency_ms:.0f}ms tokens_out={len(text.split())}"
+        )
+        
+        log_model_call(
+            LOGGER,
+            provider=provider,
+            model=target_model,
+            endpoint=url,
+            latency_ms=latency_ms,
+            input_tokens=None,
+            output_tokens=None,
+            status="ok",
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            retry_attempt=retry_attempt,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_metric("requests_ok", 1)
+        return text
+
+    def _call_featherless(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        if not self.featherless_api_key:
+            raise RuntimeError("FEATHERLESS_API_KEY is not set")
+
+        target_model = req.model or self.default_model
+        url = self.featherless_chat_url
+
+        payload: Dict[str, object] = {
+            "model": target_model,
+            "messages": req.messages,
+            "stream": False,
+            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
+            "temperature": req.temperature,
+            "top_p": req.top_p,
+        }
+        headers = {
+            "Authorization": f"Bearer {self.featherless_api_key}",
+            "Content-Type": "application/json",
+            "X-MathPulse-Task": (req.task_type or "default").strip().lower(),
+        }
+
+        timeout = self._timeout_for(req, provider)
+
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_bucket("status_code_counts", str(resp.status_code), 1)
+        if resp.status_code != 200:
+            self._bump_metric("requests_error", 1)
+            raise RuntimeError(f"Featherless API error {resp.status_code}: {resp.text}")
+
+        data = resp.json()
+        text = self._extract_text(data)
+        log_model_call(
+            LOGGER,
+            provider=provider,
+            model=target_model,
+            endpoint=url,
+            latency_ms=latency_ms,
+            input_tokens=None,
+            output_tokens=None,
+            status="ok",
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            retry_attempt=retry_attempt,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_metric("requests_ok", 1)
+        return text
+
+    def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        target_model = req.model or self.default_model
+        url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
+
+        prompt = self._messages_to_prompt(req.messages)
+        payload: Dict[str, object] = {
+            "data": [
+                prompt,
+                [],
+                req.temperature,
+                req.top_p,
+                req.max_new_tokens,
+            ]
+        }
+        headers = {"Content-Type": "application/json"}
+
+        timeout = self._timeout_for(req, provider)
+
+        resp, latency_ms, retry_attempt = self._post_with_retry(
+            url,
+            headers=headers,
+            payload=payload,
+            timeout=timeout,
+            provider=provider,
+            model=target_model,
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
         self._bump_bucket("status_code_counts", str(resp.status_code), 1)
 
         if resp.status_code != 200:
@@ -969,7 +999,7 @@ class InferenceClient:
             status="ok",
             task_type=req.task_type,
             request_tag=req.request_tag,
-            retry_attempt=1,
+            retry_attempt=retry_attempt,
             fallback_depth=fallback_depth,
             route=route,
         )
@@ -1010,39 +1040,32 @@ class InferenceClient:
 
     def _clean_response_text(self, text: str) -> str:
         """Strip JSON braces, template artifacts, and whitespace from response text."""
+        # Strip leading/trailing whitespace
         text = text.strip()
-
+        
+        # Remove wrapping JSON braces or artifact markers
         if text.startswith("{") and text.endswith("}"):
             try:
+                # Try to parse as JSON - if it fails, return as-is
                 parsed = json.loads(text)
+                # If it's a dict with a "content" or "text" field, use that
                 if isinstance(parsed, dict):
                     if "content" in parsed:
                         text = str(parsed["content"]).strip()
                     elif "text" in parsed:
                         text = str(parsed["text"]).strip()
             except json.JSONDecodeError:
+                # Not valid JSON, just clean up braces
                 text = text.strip("{}")
-
+        
+        # Remove any trailing artifact markers
         if text.startswith("```json") or text.startswith("```"):
             text = re.sub(r"^```(?:json)?", "", text).strip()
         if text.endswith("```"):
             text = text[:-3].strip()
-
+        
         return text.strip()
 
 
-def create_default_client(firestore_client: Optional[Any] = None) -> InferenceClient:
-    return InferenceClient(firestore_client=firestore_client)
-
-
-def is_sequential_model(model_id: str = "") -> bool:
-    mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
-    if not mid:
-        return False
-    if mid == REASONER_MODEL:
-        return True
-    if _RUNTIME_OVERRIDES:
-        lock = _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID", "")
-        if lock == REASONER_MODEL:
-            return True
-    return False
+def create_default_client() -> InferenceClient:
+    return InferenceClient()
diff --git a/services/question_bank_service.py b/services/question_bank_service.py
deleted file mode 100644
index de362431d4b68499de3dddeba9742596b7a4c9d9..0000000000000000000000000000000000000000
--- a/services/question_bank_service.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-Question Bank Service for Quiz Battle.
-
-Handles querying the question bank with random ordering,
-caching session questions, and 24-hour debounce for variance results.
-"""
-
-import os
-import random
-from datetime import datetime, timezone, timedelta
-from typing import List, Dict, Optional
-
-from google.cloud import firestore
-
-DEFAULT_FIREBASE_PROJECT = os.getenv("FIREBASE_AUTH_PROJECT_ID", "mathpulse-ai-2026")
-
-
-def _get_db() -> firestore.Client:
-    """Get Firestore client."""
-    return firestore.Client(project=DEFAULT_FIREBASE_PROJECT)
-
-
-async def get_questions_for_battle(
-    grade_level: int,
-    topic: str,
-    count: int = 10,
-) -> List[Dict]:
-    """
-    Fetch random questions from the question bank for a battle session.
-
-    Uses Firestore random_seed field for pseudo-random ordering.
-    If fewer than `count` questions exist, returns all available.
-    """
-    db = _get_db()
-    collection_path = f"question_bank/{grade_level}/{topic}/questions"
-    collection_ref = db.collection(collection_path)
-
-    # Pseudo-random query using random_seed >= random threshold
-    threshold = random.random()
-    query = (
-        collection_ref
-        .where("random_seed", ">=", threshold)
-        .order_by("random_seed")
-        .limit(count)
-    )
-    docs = list(query.stream())
-
-    # If we didn't get enough, query from the start to fill shortfall
-    if len(docs) < count:
-        remaining = count - len(docs)
-        fallback_query = (
-            collection_ref
-            .where("random_seed", "<", threshold)
-            .order_by("random_seed")
-            .limit(remaining)
-        )
-        docs.extend(list(fallback_query.stream()))
-
-    questions = [doc.to_dict() for doc in docs]
-    # Ensure all required fields are present
-    valid_questions = []
-    for q in questions:
-        if q and all(k in q for k in ("question", "choices", "correct_answer", "difficulty")):
-            valid_questions.append(q)
-
-    return valid_questions
-
-
-async def cache_session_questions(
-    session_id: str,
-    questions: List[Dict],
-    player_ids: List[str],
-    grade_level: int,
-    topic: str,
-) -> None:
-    """Cache varied questions for a battle session with 24-hour TTL."""
-    db = _get_db()
-    session_ref = db.collection("quiz_battle_sessions").document(session_id)
-
-    session_ref.set({
-        "player_ids": player_ids,
-        "grade_level": grade_level,
-        "topic": topic,
-        "created_at": firestore.SERVER_TIMESTAMP,
-        "variance_cached_until": datetime.now(timezone.utc) + timedelta(hours=24),
-    })
-
-    # Write questions to subcollection
-    batch = db.batch()
-    for idx, q in enumerate(questions):
-        q_ref = session_ref.collection("questions").document(str(idx))
-        batch.set(q_ref, q)
-    batch.commit()
-
-
-async def get_cached_session(session_id: str) -> Optional[List[Dict]]:
-    """
-    Check if a session has cached varied questions within 24 hours.
-
-    Returns the cached questions if valid, otherwise None.
-    """
-    db = _get_db()
-    session_doc = db.collection("quiz_battle_sessions").document(session_id).get()
-    if not session_doc.exists:
-        return None
-
-    data = session_doc.to_dict()
-    cached_until = data.get("variance_cached_until")
-    if cached_until:
-        if isinstance(cached_until, datetime):
-            if cached_until.tzinfo is None:
-                cached_until = cached_until.replace(tzinfo=timezone.utc)
-        elif hasattr(cached_until, 'timestamp'):
-            # Firestore Timestamp object
-            cached_until = datetime.fromtimestamp(cached_until.timestamp(), tz=timezone.utc)
-
-        if cached_until > datetime.now(timezone.utc):
-            # Return cached questions
-            q_docs = db.collection("quiz_battle_sessions").document(session_id).collection("questions").stream()
-            questions = [doc.to_dict() for doc in q_docs]
-            return questions if questions else None
-
-    return None
diff --git a/services/user_provisioning_service.py b/services/user_provisioning_service.py
index f226af03ede0497c5344725699da0c8ea6d3a0b7..43554f61e4af0c218c4b8cc803a3aa3e33c49b99 100644
--- a/services/user_provisioning_service.py
+++ b/services/user_provisioning_service.py
@@ -185,6 +185,7 @@ class UserProvisioningService:
                     "level": 1,
                     "currentXP": 0,
                     "totalXP": 0,
+                    "streak": 0,
                     "atRiskSubjects": [],
                     "hasTakenDiagnostic": False,
                 }
diff --git a/services/variance_engine.py b/services/variance_engine.py
deleted file mode 100644
index 61fec5a2a65e22e42c59cab9196e4ba87229bacd..0000000000000000000000000000000000000000
--- a/services/variance_engine.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""
-Variance Engine for Quiz Battle Questions.
-
-Applies per-session variance techniques via DeepSeek,
-with pure-Python fallback for choice shuffling.
-"""
-
-import json
-import random
-import re
-from typing import List, Dict
-
-from services.ai_client import get_deepseek_client, CHAT_MODEL
-from services.question_bank_service import get_cached_session, cache_session_questions
-
-
-def _fallback_shuffle(questions: List[Dict], seed: int) -> List[Dict]:
-    """
-    Pure-Python fallback: shuffle choices deterministically.
-    """
-    rng = random.Random(seed)
-    for q in questions:
-        choices = q["choices"].copy()
-        correct_letter = q["correct_answer"]
-        correct_index = ord(correct_letter) - ord("A")
-        correct_text = choices[correct_index]
-        rng.shuffle(choices)
-        q["choices"] = choices
-        q["correct_answer"] = chr(ord("A") + choices.index(correct_text))
-        q["variance_applied"] = ["choice_shuffle"]
-    return questions
-
-
-async def apply_variance(questions: List[Dict], session_id: str) -> List[Dict]:
-    """
-    Apply per-session variance to a list of questions.
-
-    1. Check 24h Firestore cache first
-    2. Call DeepSeek with variance prompt
-    3. Parse JSON response
-    4. Fall back to pure-Python shuffle if DeepSeek fails
-    5. Cache result for 24 hours
-    """
-    # 1. Check cache
-    cached = await get_cached_session(session_id)
-    if cached:
-        return cached
-
-    # 2. Generate deterministic seed from session_id
-    seed = hash(session_id) % (2**32)
-
-    # 3. Call DeepSeek
-    client = get_deepseek_client()
-    system_prompt = (
-        "You are a math quiz variance engine for MathPulse AI, an educational platform for "
-        "Filipino high school students following the DepEd K-12 curriculum. "
-        "Your job is to make quiz questions feel fresh each session WITHOUT changing the "
-        "correct answer or difficulty level."
-    )
-
-    user_prompt = f"""Given these {len(questions)} quiz battle questions as JSON:
-{json.dumps(questions, indent=2)}
-
-Apply the following variance techniques. Use session_seed={seed} for deterministic but varied output:
-
-PARAPHRASE (30% chance per question): Reword the question stem using different phrasing, synonyms, or sentence structure. Do NOT change the math or the answer.
-
-CHOICE SHUFFLE (always): Randomize the order of answer choices A/B/C/D. Update "correct_answer" to reflect the new position.
-
-DISTRACTOR REFRESH (20% chance per question): Replace 1-2 wrong choices with new plausible-but-incorrect distractors that represent common student misconceptions for this topic. Keep the correct answer unchanged.
-
-CONTEXT SWAP (10% chance per question): Replace real-world context variables (names, objects, currencies) with Filipino-localized equivalents (e.g., "pesos", "jeepney", "barangay") to increase cultural relevance.
-
-NUMERIC SCALING (10% chance, only for computation problems): Scale numbers by a small integer factor (2x or 3x) so the method remains the same but the answer changes. Recompute the correct answer and all distractors accordingly.
-
-Return the full modified questions array as valid JSON only. Keep all original fields.
-Add a "variance_applied": ["paraphrase", "distractor_refresh", ...] field per question.
-Do NOT change "topic", "difficulty", "grade_level", or "source_chunk_id"."""
-
-    try:
-        response = client.chat.completions.create(
-            model=CHAT_MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ],
-            temperature=0.5,
-            max_tokens=4000,
-        )
-        content = response.choices[0].message.content.strip()
-        # Strip markdown code fences
-        content = re.sub(r"^```json\s*", "", content)
-        content = re.sub(r"\s*```$", "", content)
-        varied_questions = json.loads(content)
-
-        if not isinstance(varied_questions, list) or len(varied_questions) != len(questions):
-            raise ValueError("Invalid response format from DeepSeek")
-
-        # Validate required fields
-        for q in varied_questions:
-            if not all(k in q for k in ("question", "choices", "correct_answer", "variance_applied")):
-                raise ValueError("Missing required fields in varied question")
-
-    except Exception as e:
-        print(f"[variance_engine] DeepSeek variance failed, falling back to shuffle: {e}")
-        varied_questions = _fallback_shuffle(questions, seed)
-
-    # 4. Cache for 24 hours
-    # Extract player_ids, grade_level, topic from original questions if available
-    player_ids = []
-    grade_level = questions[0].get("grade_level", 11) if questions else 11
-    topic = questions[0].get("topic", "general_mathematics") if questions else "general_mathematics"
-    await cache_session_questions(session_id, varied_questions, player_ids, grade_level, topic)
-
-    return varied_questions
diff --git a/services/youtube_service.py b/services/youtube_service.py
deleted file mode 100644
index 2d01b0d0f1748a9c707eb92fe6b663940a50d9a3..0000000000000000000000000000000000000000
--- a/services/youtube_service.py
+++ /dev/null
@@ -1,1017 +0,0 @@
-"""
-Smart YouTube Video Search Service for MathPulse AI.
-Uses YouTube Data API v3 (googleapiclient.discovery) to find relevant
-educational math videos, enriched with RAG curriculum context and DeepSeek
-query generation for contextual fallback when exact matches don't exist.
-Results are cached in Firestore video_cache/{lessonId} with 7-day TTL.
-"""
-
-from __future__ import annotations
-
-import hashlib
-import json
-import logging
-import os
-import re
-from datetime import datetime, timezone
-from typing import Dict, List, Optional
-
-logger = logging.getLogger("mathpulse.youtube")
-
-YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "").strip()
-
-# Known educational channel keywords and exact names for post-filtering
-_EDUCATIONAL_CHANNEL_KEYWORDS = [
-    "khan", "math", "academy", "education", "teacher", "professor",
-    "tutorial", "lesson", "school", "university", "college", "deped",
-    "philippines", "filipino", "pinoy", "stem", "learning", "study",
-    "organic chemistry tutor", "patrickjmt", "3blue1brown", "numberphile",
-    "math antics", "bright side", "crashcourse", "ted-ed", "ted ed",
-    "nancy pi", "professor leonard", "mit", "stanford", "harvard",
-    "mashup math", "mathcoach", "mathologer", "stand-up maths",
-    "eddie woo", "black pen red pen", "michel van biezen", "brian mclogan",
-    "mathbff", "krista king", "mathMeeting", "mathbyfives", "yourteacher",
-    "virtual nerd", "study.com", "coursera", "edx", "brilliant",
-    "filipino math", "tagalog math", "pinoy teacher", "math philippines",
-    "shs math", "senior high school math", "grade 11 math", "grade 12 math",
-    "general mathematics", "business math", "statistics", "probability",
-    "finite math", "precalculus", "calculus", "algebra", "geometry",
-    "trigonometry", "functions", "equations", "problem solving",
-]
-
-_EDUCATIONAL_CHANNEL_EXACT = {
-    "khan academy", "patrickjmt", "3blue1brown", "numberphile",
-    "math antics", "the organic chemistry tutor", "professor leonard",
-    "nancy pi", "ted-ed", "crashcourse", "bright side",
-    "mit opencourseware", "stanford", "harvard", "mashup math",
-    "mathcoach", "mathologer", "stand-up maths", "eddie woo",
-    "black pen red pen", "michel van biezen", "brian mclogan",
-    "mathbff", "krista king", "mathmeeting", "mathbyfives", "yourteacher",
-    "virtual nerd", "study.com", "coursera", "brilliant.org",
-}
-
-# Duration filters
-_MIN_DURATION_SECONDS = 120   # 2 minutes (allow shorter tutorials)
-_MAX_DURATION_SECONDS = 3600  # 60 minutes
-_TARGET_MIN_SECONDS = 300     # 5 minutes (ideal)
-_TARGET_MAX_SECONDS = 1200    # 20 minutes (ideal)
-
-# Cache TTL in seconds (7 days)
-_CACHE_TTL_SECONDS = 7 * 24 * 60 * 60
-
-# Guaranteed fallback videos by subject — these are well-known educational videos
-# that are extremely likely to exist and be relevant. Used as nuclear option
-# when YouTube API returns nothing for all search strategies.
-_GUARANTEED_FALLBACK_VIDEOS = {
-    "default": [
-        {
-            "videoId": "p6j8HhfJ5Mc",
-            "title": "The Essence of Calculus",
-            "channelTitle": "3Blue1Brown",
-            "thumbnailUrl": "https://img.youtube.com/vi/p6j8HhfJ5Mc/hqdefault.jpg",
-            "durationSeconds": 1024,
-            "description": "A beautiful introduction to calculus concepts.",
-        },
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-    ],
-    "general mathematics": [
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-        {
-            "videoId": "5I_1G5CNA5E",
-            "title": "Functions and Their Graphs",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/5I_1G5CNA5E/hqdefault.jpg",
-            "durationSeconds": 685,
-            "description": "Understanding functions, domain, range, and graphing.",
-        },
-    ],
-    "business math": [
-        {
-            "videoId": "Dc2V7_ur_yY",
-            "title": "Simple Interest and Compound Interest",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/Dc2V7_ur_yY/hqdefault.jpg",
-            "durationSeconds": 780,
-            "description": "Understanding interest calculations for business applications.",
-        },
-        {
-            "videoId": "BFGj4mkHbHc",
-            "title": "Business Mathematics Tutorial",
-            "channelTitle": "Math Meeting",
-            "thumbnailUrl": "https://img.youtube.com/vi/BFGj4mkHbHc/hqdefault.jpg",
-            "durationSeconds": 890,
-            "description": "Essential business math concepts and problem solving.",
-        },
-    ],
-    "statistics": [
-        {
-            "videoId": "qBigTkBLU6g",
-            "title": "Statistics Intro: Mean, Median, and Mode",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/qBigTkBLU6g/hqdefault.jpg",
-            "durationSeconds": 512,
-            "description": "Introduction to measures of central tendency.",
-        },
-        {
-            "videoId": "oXdM3XVCzIM",
-            "title": "Standard Deviation Explained",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/oXdM3XVCzIM/hqdefault.jpg",
-            "durationSeconds": 635,
-            "description": "Understanding variance and standard deviation.",
-        },
-    ],
-    "probability": [
-        {
-            "videoId": "uzkc-qNVoOk",
-            "title": "Probability Explained",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/uzkc-qNVoOk/hqdefault.jpg",
-            "durationSeconds": 480,
-            "description": "Introduction to probability concepts and calculations.",
-        },
-        {
-            "videoId": "SkidyvDkNYQ",
-            "title": "Probability of Independent Events",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/SkidyvDkNYQ/hqdefault.jpg",
-            "durationSeconds": 520,
-            "description": "Calculating probabilities for independent and dependent events.",
-        },
-    ],
-    "finite math": [
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-        {
-            "videoId": "5I_1G5CNA5E",
-            "title": "Functions and Their Graphs",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/5I_1G5CNA5E/hqdefault.jpg",
-            "durationSeconds": 685,
-            "description": "Understanding functions, domain, range, and graphing.",
-        },
-    ],
-    "calculus": [
-        {
-            "videoId": "p6j8HhfJ5Mc",
-            "title": "The Essence of Calculus",
-            "channelTitle": "3Blue1Brown",
-            "thumbnailUrl": "https://img.youtube.com/vi/p6j8HhfJ5Mc/hqdefault.jpg",
-            "durationSeconds": 1024,
-            "description": "A beautiful introduction to calculus concepts.",
-        },
-        {
-            "videoId": "WUvTyaaNkzM",
-            "title": "Limits and Continuity",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/WUvTyaaNkzM/hqdefault.jpg",
-            "durationSeconds": 780,
-            "description": "Understanding limits and continuity in calculus.",
-        },
-    ],
-    "algebra": [
-        {
-            "videoId": "fNk_zzaMoSs",
-            "title": "Introduction to Algebra",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/fNk_zzaMoSs/hqdefault.jpg",
-            "durationSeconds": 720,
-            "description": "Fundamentals of algebraic thinking and equations.",
-        },
-        {
-            "videoId": "5I_1G5CNA5E",
-            "title": "Functions and Their Graphs",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/5I_1G5CNA5E/hqdefault.jpg",
-            "durationSeconds": 685,
-            "description": "Understanding functions, domain, range, and graphing.",
-        },
-    ],
-    "geometry": [
-        {
-            "videoId": "302eJ3TzJQU",
-            "title": "Geometry Introduction",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/302eJ3TzJQU/hqdefault.jpg",
-            "durationSeconds": 540,
-            "description": "Basic geometry concepts and terminology.",
-        },
-        {
-            "videoId": "Jn0YxbqEjHk",
-            "title": "Trigonometry Introduction",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/Jn0YxbqEjHk/hqdefault.jpg",
-            "durationSeconds": 680,
-            "description": "Introduction to trigonometric functions and identities.",
-        },
-    ],
-    "trigonometry": [
-        {
-            "videoId": "Jn0YxbqEjHk",
-            "title": "Trigonometry Introduction",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/Jn0YxbqEjHk/hqdefault.jpg",
-            "durationSeconds": 680,
-            "description": "Introduction to trigonometric functions and identities.",
-        },
-        {
-            "videoId": "PUB0TaZ7bhA",
-            "title": "Unit Circle Definition of Trig Functions",
-            "channelTitle": "Khan Academy",
-            "thumbnailUrl": "https://img.youtube.com/vi/PUB0TaZ7bhA/hqdefault.jpg",
-            "durationSeconds": 590,
-            "description": "Understanding sine and cosine on the unit circle.",
-        },
-    ],
-}
-
-
-def _get_guaranteed_fallback_videos(subject: str = "", max_results: int = 3) -> List[Dict]:
-    """Return guaranteed fallback videos when YouTube API returns nothing."""
-    subject_lower = subject.lower().strip()
-    
-    # Try exact subject match
-    if subject_lower in _GUARANTEED_FALLBACK_VIDEOS:
-        videos = _GUARANTEED_FALLBACK_VIDEOS[subject_lower]
-    else:
-        # Try partial match
-        matched = False
-        for key, videos_list in _GUARANTEED_FALLBACK_VIDEOS.items():
-            if key != "default" and (key in subject_lower or subject_lower in key):
-                videos = videos_list
-                matched = True
-                break
-        if not matched:
-            videos = _GUARANTEED_FALLBACK_VIDEOS["default"]
-    
-    return videos[:max_results]
-
-
-def _build_youtube_client():
-    """Lazy-init googleapiclient YouTube client. Returns None if no API key."""
-    if not YOUTUBE_API_KEY:
-        return None
-    try:
-        from googleapiclient.discovery import build
-        return build("youtube", "v3", developerKey=YOUTUBE_API_KEY, cache_discovery=False)
-    except Exception as exc:
-        logger.warning("Failed to build YouTube client: %s", exc)
-        return None
-
-
-def _parse_iso8601_duration(duration: str) -> int:
-    """Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
-    if not duration:
-        return 0
-    hours_match = re.search(r"(\d+)H", duration)
-    minutes_match = re.search(r"(\d+)M", duration)
-    seconds_match = re.search(r"(\d+)S", duration)
-    hours = int(hours_match.group(1)) if hours_match else 0
-    minutes = int(minutes_match.group(1)) if minutes_match else 0
-    seconds = int(seconds_match.group(1)) if seconds_match else 0
-    return hours * 3600 + minutes * 60 + seconds
-
-
-def _is_educational_channel(channel_title: str) -> bool:
-    """Check if a channel appears to be educational."""
-    lowered = channel_title.lower().strip()
-    if lowered in _EDUCATIONAL_CHANNEL_EXACT:
-        return True
-    return any(kw in lowered for kw in _EDUCATIONAL_CHANNEL_KEYWORDS)
-
-
-def _score_video_result(item: dict, query: str, topic: str, subject: str) -> float:
-    """Score a video result for relevance. Higher is better."""
-    score = 0.0
-    title = (item.get("title") or "").lower()
-    description = (item.get("description") or "").lower()
-    channel = (item.get("channelTitle") or "").lower()
-    query_lower = query.lower()
-    topic_lower = topic.lower()
-    subject_lower = subject.lower() if subject else ""
-
-    # Topic relevance (highest weight)
-    topic_words = [w for w in topic_lower.split() if len(w) > 2]
-    for word in topic_words:
-        if word in title:
-            score += 4.0
-        if word in description:
-            score += 1.5
-
-    # Subject relevance
-    if subject_lower:
-        subject_words = [w for w in subject_lower.split() if len(w) > 2]
-        for word in subject_words:
-            if word in title:
-                score += 2.0
-            if word in description:
-                score += 0.5
-
-    # Query terms appear in title
-    for word in query_lower.split():
-        if len(word) > 2 and word in title:
-            score += 1.0
-
-    # Educational channel bonus
-    if _is_educational_channel(channel):
-        score += 3.0
-
-    # Math/education terms in title
-    math_terms = ["tutorial", "lesson", "explain", "math", "mathematics",
-                  "solution", "problem", "example", "learn", "how to",
-                  "introduction", "basics", "overview", "guide"]
-    for term in math_terms:
-        if term in title:
-            score += 1.5
-
-    # Duration scoring
-    duration = item.get("durationSeconds", 0)
-    if _TARGET_MIN_SECONDS <= duration <= _TARGET_MAX_SECONDS:
-        score += 2.0
-    elif _MIN_DURATION_SECONDS <= duration <= _MAX_DURATION_SECONDS:
-        score += 1.0
-    elif duration > 0:
-        score += 0.3  # Still count very short/long videos, just less
-
-    return score
-
-
-def _extract_meaningful_keywords(chunks: List[dict]) -> List[str]:
-    """Extract meaningful keywords from curriculum chunks."""
-    keywords: List[str] = []
-    for chunk in chunks[:3]:
-        content = str(chunk.get("content", "")).strip()
-        if not content:
-            continue
-        # Split into sentences and take first few
-        sentences = content.split('.')[:2]
-        for sentence in sentences:
-            # Extract important words (nouns, concepts) - heuristic approach
-            words = re.findall(r'\b[A-Za-z][a-z]{3,}\b', sentence)
-            # Filter out common stop words
-            stop_words = {
-                'this', 'that', 'with', 'from', 'they', 'have', 'will',
-                'would', 'there', 'their', 'what', 'said', 'each',
-                'which', 'about', 'could', 'other', 'after', 'first',
-                'these', 'think', 'where', 'being', 'every', 'great',
-                'might', 'shall', 'while', 'through', 'during', 'before',
-                'between', 'among', 'within', 'without', 'against',
-                'students', 'student', 'learning', 'learn', 'understand',
-                'objective', 'objectives', 'competency', 'competencies',
-            }
-            meaningful = [w.lower() for w in words if w.lower() not in stop_words]
-            keywords.extend(meaningful[:8])
-    
-    # Deduplicate while preserving order
-    seen = set()
-    unique = []
-    for kw in keywords:
-        if kw not in seen and len(kw) > 3:
-            seen.add(kw)
-            unique.append(kw)
-    return unique[:12]
-
-
-def _enrich_query_with_rag(topic: str, subject: str, lesson_context: str = "") -> str:
-    """
-    Query the RAG vectorstore to extract curriculum keywords and enrich
-    the YouTube search query for higher relevance.
-    """
-    enriched = topic
-    if subject:
-        enriched = f"{enriched} {subject}"
-    if lesson_context:
-        # Only add lesson context if it's not too similar to topic
-        if lesson_context.lower() not in topic.lower():
-            enriched = f"{enriched} {lesson_context}"
-
-    try:
-        from rag.curriculum_rag import retrieve_curriculum_context
-        chunks = retrieve_curriculum_context(
-            query=topic,
-            subject=subject if subject else None,
-            top_k=5,
-        )
-        if chunks:
-            keywords = _extract_meaningful_keywords(chunks)
-            if keywords:
-                keyword_str = " ".join(keywords[:10])
-                enriched = f"{enriched} {keyword_str}"
-    except Exception as exc:
-        logger.debug("RAG enrichment skipped: %s", exc)
-
-    # Append standard DepEd/Philippines math context
-    enriched = f"{enriched} DepEd Philippines mathematics tutorial"
-    return enriched[:300]
-
-
-def _generate_search_queries_with_ai(
-    topic: str,
-    subject: str,
-    lesson_context: str,
-    grade_level: str,
-) -> List[str]:
-    """
-    Use DeepSeek to generate multiple targeted YouTube search queries.
-    Falls back to heuristic queries if AI is unavailable.
-    
-    Returns a list of queries ordered from most specific to most general.
-    """
-    try:
-        from services.inference_client import InferenceRequest, create_default_client
-        
-        prompt = (
-            f"You are helping find educational YouTube videos for a Filipino senior high school math lesson.\n"
-            f"Topic: {topic}\n"
-            f"Subject: {subject}\n"
-            f"Context: {lesson_context or 'General mathematics lesson'}\n"
-            f"Grade: {grade_level or 'Grade 11-12'}\n\n"
-            f"Generate exactly 4 YouTube search queries that would find the most relevant educational videos.\n"
-            f"Rules:\n"
-            f"1. Query 1: Most specific - exact topic with 'tutorial' or 'lesson'\n"
-            f"2. Query 2: Slightly broader - related concepts or prerequisite topics\n"
-            f"3. Query 3: Even broader - the general subject area with key concepts\n"
-            f"4. Query 4: Last resort - basic subject + 'introduction' or 'basics'\n"
-            f"5. Each query should be 3-8 words\n"
-            f"6. Use terms that real educational channels would use\n"
-            f"7. If the exact topic is very specific/niche, include related more common topics\n\n"
-            f"Return ONLY a JSON array of 4 strings, nothing else:\n"
-            f'["query1", "query2", "query3", "query4"]'
-        )
-        
-        client = create_default_client()
-        request = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You generate YouTube search queries. Return only JSON arrays."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="lesson_generation",
-            max_new_tokens=200,
-            temperature=0.3,
-            top_p=0.9,
-        )
-        response = client.generate_from_messages(request)
-        
-        # Parse JSON array from response
-        text = response.strip()
-        # Try to find JSON array
-        match = re.search(r'\[.*\]', text, re.DOTALL)
-        if match:
-            queries = json.loads(match.group())
-            if isinstance(queries, list) and len(queries) >= 2:
-                # Validate and clean queries
-                cleaned = []
-                for q in queries:
-                    if isinstance(q, str) and len(q.strip()) > 3:
-                        cleaned.append(q.strip()[:200])
-                if len(cleaned) >= 2:
-                    logger.info("AI generated %d search queries", len(cleaned))
-                    return cleaned
-    except Exception as exc:
-        logger.debug("AI query generation failed, using fallback: %s", exc)
-    
-    # Fallback heuristic queries
-    return _generate_fallback_queries(topic, subject, lesson_context)
-
-
-def _generate_fallback_queries(topic: str, subject: str, lesson_context: str) -> List[str]:
-    """Generate fallback search queries when AI is unavailable."""
-    queries = [
-        f"{topic} {subject} tutorial lesson",
-        f"{topic} mathematics explained",
-        f"{subject} {topic} how to",
-    ]
-    
-    # Add broader queries
-    if lesson_context and lesson_context.lower() not in topic.lower():
-        queries.insert(1, f"{lesson_context} tutorial")
-    
-    # Extract core concept from topic (e.g., "quadratic equations" -> "quadratic")
-    core_words = [w for w in topic.split() if len(w) > 3]
-    if core_words:
-        core = core_words[0]
-        queries.append(f"{core} math lesson introduction")
-    
-    # Add subject-level query as last resort
-    queries.append(f"{subject} basics tutorial")
-    
-    # Remove duplicates while preserving order
-    seen = set()
-    unique = []
-    for q in queries:
-        if q.lower() not in seen:
-            seen.add(q.lower())
-            unique.append(q)
-    
-    return unique[:5]
-
-
-def _find_related_topics_with_ai(topic: str, subject: str) -> List[str]:
-    """
-    When exact topic has no videos, ask DeepSeek for related/similar topics
-    that are more likely to have educational video content.
-    """
-    try:
-        from services.inference_client import InferenceRequest, create_default_client
-        
-        prompt = (
-            f"The topic '{topic}' in {subject} has very few or no YouTube videos.\n"
-            f"Suggest 3 related, more commonly taught topics that would have educational videos.\n"
-            f"These should cover similar or prerequisite concepts.\n"
-            f"Return ONLY a JSON array of 3 short topic phrases (2-4 words each).\n"
-            f'["topic1", "topic2", "topic3"]'
-        )
-        
-        client = create_default_client()
-        request = InferenceRequest(
-            messages=[
-                {"role": "system", "content": "You suggest related math topics. Return only JSON arrays."},
-                {"role": "user", "content": prompt},
-            ],
-            task_type="lesson_generation",
-            max_new_tokens=150,
-            temperature=0.4,
-            top_p=0.9,
-        )
-        response = client.generate_from_messages(request)
-        
-        text = response.strip()
-        match = re.search(r'\[.*\]', text, re.DOTALL)
-        if match:
-            topics = json.loads(match.group())
-            if isinstance(topics, list):
-                cleaned = [t.strip()[:100] for t in topics if isinstance(t, str) and len(t.strip()) > 2]
-                if cleaned:
-                    logger.info("AI suggested %d related topics for '%s'", len(cleaned), topic)
-                    return cleaned
-    except Exception as exc:
-        logger.debug("AI related topics failed: %s", exc)
-    
-    # Fallback: generate simple related topics
-    return _generate_fallback_related_topics(topic, subject)
-
-
-def _generate_fallback_related_topics(topic: str, subject: str) -> List[str]:
-    """Generate simple related topic fallbacks."""
-    related = []
-    
-    # Try subject + common subtopics
-    if "equation" in topic.lower():
-        related.extend([f"{subject} functions", f"{subject} graphing"])
-    elif "function" in topic.lower():
-        related.extend([f"{subject} equations", f"{subject} domain range"])
-    elif "probability" in topic.lower():
-        related.extend([f"{subject} statistics", "basic probability concepts"])
-    elif "statistics" in topic.lower():
-        related.extend([f"{subject} data analysis", "measures of central tendency"])
-    elif "geometry" in topic.lower() or "angle" in topic.lower():
-        related.extend([f"{subject} trigonometry", "basic geometry concepts"])
-    elif "calculus" in topic.lower() or "derivative" in topic.lower():
-        related.extend(["limits and continuity", f"{subject} functions"])
-    else:
-        related.extend([
-            f"{subject} fundamentals",
-            f"{subject} basic concepts",
-            f"{subject} introduction",
-        ])
-    
-    return related[:3]
-
-
-def _execute_youtube_search(
-    client,
-    query: str,
-    max_results: int = 15,
-    video_duration: Optional[str] = "medium",
-    video_definition: Optional[str] = "high",
-    language: str = "en",
-) -> List[dict]:
-    """Execute a single YouTube search and return raw items with details."""
-    try:
-        search_params = {
-            "part": "snippet",
-            "q": query,
-            "type": "video",
-            "maxResults": max_results,
-            "relevanceLanguage": language,
-            "order": "relevance",
-        }
-        
-        if video_duration:
-            search_params["videoDuration"] = video_duration
-        if video_definition:
-            search_params["videoDefinition"] = video_definition
-        
-        search_response = client.search().list(**search_params).execute()
-        items = search_response.get("items", [])
-        
-        if not items:
-            return []
-        
-        # Get video details
-        video_ids = [item["id"]["videoId"] for item in items if item.get("id", {}).get("videoId")]
-        if not video_ids:
-            return []
-        
-        details_response = client.videos().list(
-            part="contentDetails,statistics,snippet",
-            id=",".join(video_ids),
-        ).execute()
-        
-        details_map = {}
-        for detail in details_response.get("items", []):
-            vid = detail.get("id")
-            if vid:
-                details_map[vid] = detail
-        
-        # Build enriched items
-        results = []
-        for item in items:
-            video_id = item.get("id", {}).get("videoId", "")
-            if not video_id:
-                continue
-            
-            detail = details_map.get(video_id, {})
-            snippet = detail.get("snippet", item.get("snippet", {}))
-            content_details = detail.get("contentDetails", {})
-            
-            duration = content_details.get("duration", "")
-            duration_secs = _parse_iso8601_duration(duration)
-            
-            # Build thumbnail URL
-            thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
-            thumbs = snippet.get("thumbnails", {})
-            if "high" in thumbs:
-                thumbnail_url = thumbs["high"]["url"]
-            elif "medium" in thumbs:
-                thumbnail_url = thumbs["medium"]["url"]
-            
-            results.append({
-                "videoId": video_id,
-                "title": snippet.get("title", ""),
-                "channelTitle": snippet.get("channelTitle", ""),
-                "thumbnailUrl": thumbnail_url,
-                "durationSeconds": duration_secs,
-                "description": snippet.get("description", "")[:300],
-            })
-        
-        return results
-    except Exception as exc:
-        logger.warning("YouTube search execution failed for query '%s': %s", query, exc)
-        return []
-
-
-def _filter_and_score_results(
-    items: List[dict],
-    query: str,
-    topic: str,
-    subject: str,
-    require_educational: bool = True,
-    min_duration: int = 120,
-    max_duration: int = 3600,
-) -> List[dict]:
-    """Filter and score video results."""
-    results = []
-    for item in items:
-        duration_secs = item.get("durationSeconds", 0)
-        channel_title = item.get("channelTitle", "")
-        title = item.get("title", "")
-        
-        # Duration filter
-        if duration_secs < min_duration or duration_secs > max_duration:
-            continue
-        
-        # Educational channel filter
-        is_edu = _is_educational_channel(channel_title)
-        if require_educational and not is_edu:
-            # Allow if title strongly suggests math tutorial
-            lowered_title = title.lower()
-            if not any(term in lowered_title for term in [
-                "tutorial", "lesson", "math", "explain", "how to",
-                "introduction", "basics", "learn", "example", "problem"
-            ]):
-                continue
-        
-        # Score
-        score = _score_video_result(item, query, topic, subject)
-        item["_score"] = score
-        results.append(item)
-    
-    results.sort(key=lambda x: x["_score"], reverse=True)
-    for r in results:
-        r.pop("_score", None)
-    
-    return results
-
-
-def _get_cache_key(topic: str, subject: str, grade_level: str) -> str:
-    """Generate a deterministic Firestore document ID for caching."""
-    raw = f"{subject}|{topic}|{grade_level}"
-    return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32]
-
-
-def get_cached_videos(lesson_id: str) -> Optional[List[Dict]]:
-    """Check Firestore video_cache/{lessonId} for cached results (TTL 7 days)."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            return None
-
-        db = firestore.client()
-        doc_ref = db.collection("video_cache").document(lesson_id)
-        doc = doc_ref.get()
-        if not doc.exists:
-            return None
-
-        data = doc.to_dict()
-        if not data:
-            return None
-
-        cached_at = data.get("cachedAt")
-        if cached_at:
-            if hasattr(cached_at, "timestamp"):
-                cached_epoch = cached_at.timestamp()
-            elif isinstance(cached_at, datetime):
-                cached_epoch = cached_at.timestamp()
-            else:
-                cached_epoch = float(cached_at)
-            now_epoch = datetime.now(timezone.utc).timestamp()
-            if (now_epoch - cached_epoch) > _CACHE_TTL_SECONDS:
-                logger.info("Video cache expired for lesson %s", lesson_id)
-                return None
-
-        videos = data.get("videos")
-        if isinstance(videos, list) and len(videos) > 0:
-            logger.info("Video cache hit for lesson %s (%d videos)", lesson_id, len(videos))
-            return videos
-    except Exception as exc:
-        logger.debug("Could not read video cache: %s", exc)
-    return None
-
-
-def cache_videos(lesson_id: str, videos: List[Dict], topic: str) -> None:
-    """Store search results in Firestore video_cache/{lessonId}."""
-    try:
-        import firebase_admin
-        from firebase_admin import firestore
-        if not firebase_admin._apps:
-            return
-
-        db = firestore.client()
-        db.collection("video_cache").document(lesson_id).set({
-            "videos": videos,
-            "cachedAt": firestore.SERVER_TIMESTAMP,
-            "topic": topic,
-        })
-        logger.info("Cached %d videos for lesson %s", len(videos), lesson_id)
-    except Exception as exc:
-        logger.warning("Could not cache videos in Firestore: %s", exc)
-
-
-def search_youtube_videos(
-    topic: str,
-    subject: str = "",
-    lesson_context: str = "",
-    grade_level: str = "",
-    max_results: int = 3,
-    language: str = "en",
-) -> List[Dict]:
-    """
-    Search YouTube Data API v3 for relevant educational math videos.
-    
-    Uses a multi-strategy approach to guarantee at least 1 result:
-    1. AI-generated targeted queries with strict filters
-    2. Fallback to heuristic queries with relaxed filters  
-    3. Broader subject-level searches
-    4. Related topics suggested by AI
-    5. Emergency unfiltered search as last resort
-    
-    Returns up to `max_results` videos.
-    """
-    client = _build_youtube_client()
-    if client is None:
-        logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
-        return []
-
-    all_results: List[dict] = []
-    seen_video_ids = set()
-    
-    # Generate search queries using AI + fallback
-    queries = _generate_search_queries_with_ai(topic, subject, lesson_context, grade_level)
-    logger.info("YouTube search queries: %s", queries)
-    
-    # ─── Strategy 1: AI queries with standard filters ───────────────────────
-    for query in queries:
-        items = _execute_youtube_search(
-            client, query,
-            max_results=10,
-            video_duration="medium",
-            video_definition="high",
-            language=language,
-        )
-        filtered = _filter_and_score_results(
-            items, query, topic, subject,
-            require_educational=True,
-            min_duration=_MIN_DURATION_SECONDS,
-            max_duration=_MAX_DURATION_SECONDS,
-        )
-        for item in filtered:
-            vid = item["videoId"]
-            if vid not in seen_video_ids:
-                seen_video_ids.add(vid)
-                all_results.append(item)
-        
-        if len(all_results) >= max_results:
-            break
-    
-    # ─── Strategy 2: Same queries, relaxed filters ──────────────────────────
-    if len(all_results) < max_results:
-        for query in queries:
-            items = _execute_youtube_search(
-                client, query,
-                max_results=10,
-                video_duration=None,  # Any duration
-                video_definition=None,  # Any quality
-                language=language,
-            )
-            filtered = _filter_and_score_results(
-                items, query, topic, subject,
-                require_educational=False,  # Less strict
-                min_duration=60,  # Allow shorter
-                max_duration=7200,  # Allow longer
-            )
-            for item in filtered:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            
-            if len(all_results) >= max_results:
-                break
-    
-    # ─── Strategy 3: Broader subject-level searches ─────────────────────────
-    if len(all_results) < 1:
-        broad_queries = [
-            f"{subject} {topic.split()[0] if topic else ''} tutorial",
-            f"{subject} mathematics lesson",
-            f"{topic} explained simply",
-        ]
-        for query in broad_queries:
-            if not query.strip():
-                continue
-            items = _execute_youtube_search(
-                client, query,
-                max_results=10,
-                video_duration=None,
-                video_definition=None,
-                language=language,
-            )
-            filtered = _filter_and_score_results(
-                items, query, topic, subject,
-                require_educational=False,
-                min_duration=60,
-                max_duration=7200,
-            )
-            for item in filtered:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            
-            if len(all_results) >= max_results:
-                break
-    
-    # ─── Strategy 4: AI-suggested related topics ────────────────────────────
-    if len(all_results) < 1:
-        related_topics = _find_related_topics_with_ai(topic, subject)
-        for related_topic in related_topics:
-            query = f"{related_topic} tutorial"
-            items = _execute_youtube_search(
-                client, query,
-                max_results=8,
-                video_duration=None,
-                video_definition=None,
-                language=language,
-            )
-            filtered = _filter_and_score_results(
-                items, query, topic, subject,
-                require_educational=False,
-                min_duration=60,
-                max_duration=7200,
-            )
-            for item in filtered:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            
-            if len(all_results) >= max_results:
-                break
-    
-    # ─── Strategy 5: Emergency unfiltered search ────────────────────────────
-    if len(all_results) < 1:
-        emergency_queries = [
-            topic,
-            f"{topic} math",
-            subject,
-        ]
-        for query in emergency_queries:
-            if not query or not query.strip():
-                continue
-            items = _execute_youtube_search(
-                client, query,
-                max_results=5,
-                video_duration=None,
-                video_definition=None,
-                language=language,
-            )
-            # Accept ANY result in emergency mode
-            for item in items:
-                vid = item["videoId"]
-                if vid not in seen_video_ids:
-                    seen_video_ids.add(vid)
-                    all_results.append(item)
-            
-            if len(all_results) >= 1:
-                break
-    
-    # ─── Final: Return top results or guaranteed fallback ───────────────────
-    if not all_results:
-        logger.warning(
-            "All YouTube search strategies failed for topic: %s. Using guaranteed fallback videos.",
-            topic,
-        )
-        fallback = _get_guaranteed_fallback_videos(subject, max_results)
-        if fallback:
-            logger.info("Returning %d guaranteed fallback videos for subject: %s", len(fallback), subject)
-            return fallback
-        return []
-    
-    # Re-score all collected results against the original topic
-    for item in all_results:
-        item["_score"] = _score_video_result(item, topic, topic, subject)
-    
-    all_results.sort(key=lambda x: x["_score"], reverse=True)
-    for item in all_results:
-        item.pop("_score", None)
-    
-    top_results = all_results[:max_results]
-    logger.info("YouTube search returned %d results (top %d) for topic: %s", 
-                len(all_results), len(top_results), topic)
-    return top_results
-
-
-def get_video_search_results(
-    topic: str,
-    subject: str = "",
-    lesson_context: str = "",
-    grade_level: str = "",
-    lesson_id: Optional[str] = None,
-    max_results: int = 3,
-) -> Dict:
-    """
-    High-level wrapper: check cache first, then search YouTube, then cache results.
-
-    Returns {"videos": [...], "cached": bool}.
-    """
-    cache_key = lesson_id or _get_cache_key(topic, subject, grade_level)
-
-    # Check cache first
-    cached = get_cached_videos(cache_key)
-    if cached is not None:
-        return {"videos": cached, "cached": True}
-
-    # Search YouTube
-    videos = search_youtube_videos(
-        topic=topic,
-        subject=subject,
-        lesson_context=lesson_context,
-        grade_level=grade_level,
-        max_results=max_results,
-    )
-
-    if videos:
-        cache_videos(cache_key, videos, topic)
-
-    return {"videos": videos, "cached": False}
\ No newline at end of file
diff --git a/startup.sh b/startup.sh
index 62ec3a7d8f4eb1765a5dd1478979e2d06d53ce85..c131a5193edef774e6ce53fe7658ddab84d3d47d 100644
--- a/startup.sh
+++ b/startup.sh
@@ -11,33 +11,12 @@ fi
 
 export CURRICULUM_DIR
 export VECTORSTORE_DIR
-export CURRICULUM_VECTORSTORE_DIR="${VECTORSTORE_DIR}"
-
-echo "=========================================="
-echo "MathPulse AI Startup"
-echo "=========================================="
-echo "VECTORSTORE_DIR=${VECTORSTORE_DIR}"
-echo "CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
-echo "CURRICULUM_SOURCE_REPO_ID set: $(if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then echo YES; else echo NO; fi)"
-echo "FIREBASE_SERVICE_ACCOUNT_JSON set: $(if [ -n "${FIREBASE_SERVICE_ACCOUNT_JSON:-}" ]; then echo YES; else echo NO; fi)"
-echo "FIREBASE_STORAGE_BUCKET=${FIREBASE_STORAGE_BUCKET:-not set}"
-echo "=========================================="
 
 mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
 
-_vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
-if [ ! -d "${_vectorstore_cache_dir}" ]; then
-    mkdir -p "${_vectorstore_cache_dir}"
-    echo "INFO: Initialized ChromaDB cache dir at ${_vectorstore_cache_dir}"
-fi
-
 _ingest_script="/app/scripts/ingest_curriculum.py"
 if [ -f "${_ingest_script}" ]; then
-    _has_pdfs=false
-    if [ -d "${CURRICULUM_DIR}" ] && find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
-        _has_pdfs=true
-    fi
-    if [ "${_has_pdfs}" = true ] || [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then
+    if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
         echo "INFO: Running curriculum ingestion (optional)..."
         python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
     else
@@ -47,27 +26,12 @@ else
     echo "INFO: Curriculum ingestion script not found at ${_ingest_script}; skipping (curriculum is optional)"
 fi
 
-_vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py"
-if [ -f "${_vectorstore_download_script}" ]; then
-    echo "INFO: Vectorstore files present before download:"
-    ls -la "${VECTORSTORE_DIR}/"
+_download_script="/app/scripts/download_vectorstore_from_firebase.py"
+if [ -f "${_download_script}" ]; then
     echo "INFO: Downloading vectorstore from Firebase Storage..."
-    python "${_vectorstore_download_script}" && _result=0 || _result=1
-    if [ $_result -eq 0 ]; then
-        echo "INFO: Vectorstore download succeeded"
-    else
-        echo "WARNING: Vectorstore download failed, continuing anyway"
-    fi
-    echo "INFO: Vectorstore files present after download:"
-    ls -la "${VECTORSTORE_DIR}/"
-    _vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
-    if [ -f "${_vectorstore_summary_file}" ]; then
-        echo "INFO: Vectorstore summary found at ${_vectorstore_summary_file}"
-    else
-        echo "WARNING: Vectorstore summary not found at ${_vectorstore_summary_file}"
-    fi
+    python "${_download_script}" && echo "INFO: Vectorstore download completed" || echo "WARNING: Vectorstore download failed, continuing anyway"
 else
-    echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
+    echo "INFO: Vectorstore download script not found at ${_download_script}; skipping (vectorstore is optional)"
 fi
 
 exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1
\ No newline at end of file
diff --git a/startup_validation.py b/startup_validation.py
index 8da6af61e8b24d703d972fb722ae1146655df7a5..64bee1574d23fdde94ab1e17496ecb03f43fb1ce 100644
--- a/startup_validation.py
+++ b/startup_validation.py
@@ -32,12 +32,7 @@ def validate_imports() -> None:
         logger.info("   ✓ FastAPI, Uvicorn, Pydantic OK")
         
         # Backend services (use ABSOLUTE imports like deployed code)
-        from services.inference_client import (
-            InferenceClient, create_default_client, is_sequential_model,
-            get_current_runtime_config, get_model_for_task, model_supports_thinking,
-            set_runtime_model_profile, set_runtime_model_override, reset_runtime_overrides,
-            _MODEL_PROFILES,
-        )  # noqa
+        from services.inference_client import InferenceClient, create_default_client  # noqa
         logger.info("   ✓ InferenceClient imports OK")
         
         from automation_engine import automation_engine  # noqa
@@ -54,8 +49,8 @@ def validate_imports() -> None:
             logger.warning("   ⚠ firebase_admin not available (OK if Firebase not needed)")
         
         # ML & inference
-        from services.ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL  # noqa
-        logger.info("   ✓ DeepSeek AI client imports OK")
+        from huggingface_hub import InferenceClient as HFInferenceClient  # noqa
+        logger.info("   ✓ HuggingFace Hub imports OK")
         
         logger.info("✅ All critical imports validated")
     except ImportError as e:
@@ -78,37 +73,36 @@ def validate_environment() -> None:
     """Verify required environment variables are set."""
     logger.info("🔍 Validating environment variables...")
     
-    # CRITICAL: DEEPSEEK_API_KEY for inference
-    ds_api_key = os.environ.get("DEEPSEEK_API_KEY")
-    if not ds_api_key:
+    # CRITICAL: HF_TOKEN for inference
+    hf_token = os.environ.get("HF_TOKEN")
+    api_key = os.environ.get("HUGGING_FACE_API_TOKEN")
+    legacy_api_key = os.environ.get("HUGGINGFACE_API_TOKEN")
+    if not hf_token and not api_key and not legacy_api_key:
         logger.warning(
-            "⚠  WARNING: DEEPSEEK_API_KEY is not set as an environment variable.\n"
+            "⚠  WARNING: HF_TOKEN is not set as an environment variable.\n"
+            "   On HF Spaces, this should be set as a SPACE SECRET.\n"
             "   AI inference will fail without this token.\n"
-            "   Use: Set DEEPSEEK_API_KEY in your .env or space secrets."
+            "   Use: python set-hf-secrets.py to set the secret."
         )
     else:
-        logger.info("   ✓ DEEPSEEK_API_KEY is set")
+        logger.info("   ✓ HF_TOKEN/HUGGING_FACE_API_TOKEN/HUGGINGFACE_API_TOKEN is set")
     
     # Check inference provider config
-    inference_provider = os.getenv("INFERENCE_PROVIDER", "deepseek")
+    inference_provider = os.getenv("INFERENCE_PROVIDER", "hf_inference")
     logger.info(f"   ✓ INFERENCE_PROVIDER: {inference_provider}")
     
     # Check model IDs
-    chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "deepseek-chat"
+    chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "Qwen/Qwen3-32B"
     logger.info(f"   ✓ Chat model configured: {chat_model}")
 
     chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
     chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
-    enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
-    lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", "deepseek-chat").strip() or "deepseek-chat"
-    logger.info(f"   ✓ INFERENCE_ENFORCE_LOCK_MODEL: {enforce_lock_model}")
-    logger.info(f"   ✓ INFERENCE_LOCK_MODEL_ID: {lock_model_id}")
-    model_profile = os.getenv("MODEL_PROFILE", "").strip().lower()
-    quiz_model = os.getenv("HF_QUIZ_MODEL_ID", "").strip()
-    rag_model = os.getenv("HF_RAG_MODEL_ID", "").strip()
-    logger.info(f"   ✓ MODEL_PROFILE: {model_profile or 'not set (using individual env vars)'}")
-    logger.info(f"   ✓ HF_QUIZ_MODEL_ID: {quiz_model or 'not set (using defaults)'}")
-    logger.info(f"   ✓ HF_RAG_MODEL_ID: {rag_model or 'not set (using defaults)'}")
+    enforce_qwen_only = os.getenv("INFERENCE_ENFORCE_QWEN_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+    qwen_lock_model = os.getenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen3-32B").strip() or "Qwen/Qwen3-32B"
+    logger.info(f"   ✓ INFERENCE_CHAT_STRICT_MODEL_ONLY: {chat_strict}")
+    logger.info(f"   ✓ INFERENCE_CHAT_HARD_TRIGGER_ENABLED: {chat_hard_trigger}")
+    logger.info(f"   ✓ INFERENCE_ENFORCE_QWEN_ONLY: {enforce_qwen_only}")
+    logger.info(f"   ✓ INFERENCE_QWEN_LOCK_MODEL: {qwen_lock_model}")
     if not chat_strict:
         logger.warning("   ⚠ Chat strict model lock is disabled; chat may fallback to alternate models")
     if chat_strict and chat_hard_trigger:
@@ -116,40 +110,9 @@ def validate_environment() -> None:
             "   ⚠ Chat hard trigger is enabled while strict chat lock is on; hard escalation will be bypassed"
         )
     
-    _validate_embedding_model()
-    
     logger.info("✅ Environment variables OK")
 
 
-EXPECTED_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
-
-def _validate_embedding_model() -> None:
-    embedding_model = os.getenv("EMBEDDING_MODEL", "").strip()
-    if not embedding_model:
-        logger.warning(
-            "WARNING: EMBEDDING_MODEL env var is not set. "
-            f"Expected: {EXPECTED_EMBEDDING_MODEL}. "
-            "RAG retrieval will fail without an embedding model."
-        )
-    elif embedding_model != EXPECTED_EMBEDDING_MODEL:
-        logger.warning(
-            f"WARNING: EMBEDDING_MODEL is set to '{embedding_model}' — "
-            f"expected '{EXPECTED_EMBEDDING_MODEL}'. "
-            "Confirm this is intentional before deploying."
-        )
-    from services.ai_client import CHAT_MODEL, REASONER_MODEL  # noqa
-    generation_model_ids = [
-        CHAT_MODEL, REASONER_MODEL,
-    ]
-    if embedding_model in generation_model_ids:
-        logger.warning(
-            f"CRITICAL: EMBEDDING_MODEL is set to a generation model ('{embedding_model}'). "
-            "This will break RAG retrieval. Set it to 'BAAI/bge-small-en-v1.5'."
-        )
-    else:
-        logger.info(f"   EMBEDDING_MODEL: {embedding_model or 'not set'}")
-
-
 def validate_config_files() -> None:
     """Verify config files exist and are readable."""
     logger.info("🔍 Validating configuration files...")
@@ -191,9 +154,7 @@ def validate_config_files() -> None:
         )
 
     logger.info(f"   ✓ Using model config: {readable_model_config}")
-
-    _validate_model_config_fields(readable_model_config)
-
+    
     logger.info("✅ Configuration files OK")
 
 
@@ -297,40 +258,6 @@ def validate_inference_client_config() -> None:
         ) from e
 
 
-def _validate_model_config_fields(config_path: str) -> None:
-    try:
-        import yaml
-        with open(config_path, "r", encoding="utf-8") as f:
-            config = yaml.safe_load(f) or {}
-    except Exception as e:
-        raise StartupError(f"❌ Cannot parse {config_path} as YAML: {e}") from e
-
-    models = config.get("models", {})
-    if not isinstance(models, dict):
-        raise StartupError(f"❌ {config_path}: 'models' section missing or invalid")
-
-    if "rag_primary" not in models:
-        raise StartupError(f"❌ {config_path}: missing 'models.rag_primary' field")
-    rag_primary = models["rag_primary"]
-    if isinstance(rag_primary, dict):
-        logger.info(f"   ✓ rag_primary model: {rag_primary.get('id', 'UNSET')}")
-    else:
-        logger.warning(f"   ⚠ rag_primary is not a dict, may cause issues")
-
-    capabilities = models.get("model_capabilities")
-    if not isinstance(capabilities, dict):
-        raise StartupError(f"❌ {config_path}: missing 'models.model_capabilities' section")
-    logger.info(f"   ✓ model_capabilities: sequential_only={capabilities.get('sequential_only')}, supports_thinking={capabilities.get('supports_thinking')}")
-
-    tasks = config.get("routing", {}).get("task_model_map", {})
-    rag_tasks = {"rag_lesson", "rag_problem", "rag_analysis_context"}
-    missing_rag = rag_tasks - set(str(t).strip().lower() for t in tasks.keys())
-    if missing_rag:
-        raise StartupError(f"❌ {config_path}: missing RAG task mappings: {missing_rag}")
-
-    logger.info(f"   ✓ All RAG task mappings present")
-
-
 def run_all_validations() -> None:
     """Run comprehensive startup validation.
     
diff --git a/test_full_rag.py b/test_full_rag.py
deleted file mode 100644
index f92fbadb13189df6f942849e2a0572cfba421cd4..0000000000000000000000000000000000000000
--- a/test_full_rag.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import sys
-import os
-sys.path.insert(0, 'backend')
-
-# Set required env vars
-os.environ['DEEPSEEK_API_KEY'] = os.getenv('DEEPSEEK_API_KEY', '')
-os.environ['DEEPSEEK_BASE_URL'] = os.getenv('DEEPSEEK_BASE_URL', 'https://api.deepseek.com')
-
-from rag.curriculum_rag import retrieve_lesson_pdf_context, build_lesson_prompt
-from services.inference_client import InferenceClient, InferenceRequest
-
-# Test retrieval
-print("Testing retrieval...")
-try:
-    chunks, mode = retrieve_lesson_pdf_context(
-        topic="Represent real-life relationships as functions and interpret domain/range.",
-        subject="General Mathematics",
-        quarter=2,
-        lesson_title="Represent real-life relationships as functions and interpret domain/range.",
-        module_id="gen-math",
-        lesson_id="gm-q2-functions-graphs-l1",
-        competency_code="GM11-FG-1",
-        top_k=8,
-    )
-    print(f"Retrieved {len(chunks)} chunks, mode={mode}")
-except Exception as e:
-    print(f"Retrieval ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
-    sys.exit(1)
-
-# Test prompt building
-print("\nTesting prompt building...")
-try:
-    prompt = build_lesson_prompt(
-        lesson_title="Represent real-life relationships as functions and interpret domain/range.",
-        competency="Represent real-life relationships as functions and interpret domain/range.",
-        grade_level="Grade 11-12",
-        subject="General Mathematics",
-        quarter=2,
-        learner_level="Grade 11-12",
-        module_unit="n/a",
-        curriculum_chunks=chunks,
-        competency_code="GM11-FG-1",
-    )
-    print(f"Prompt length: {len(prompt)} chars")
-    print(f"Prompt preview: {prompt[:200]}...")
-except Exception as e:
-    print(f"Prompt building ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
-    sys.exit(1)
-
-# Test inference (optional - might cost money)
-print("\nTesting inference...")
-try:
-    client = InferenceClient()
-    req = InferenceRequest(
-        messages=[
-            {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
-            {"role": "user", "content": prompt},
-        ],
-        task_type="lesson_generation",
-        max_new_tokens=100,  # Small for testing
-        temperature=0.2,
-        top_p=0.9,
-        enable_thinking=True,
-    )
-    result = client.generate_from_messages(req)
-    print(f"Inference result: {result[:200]}...")
-    print("SUCCESS!")
-except Exception as e:
-    print(f"Inference ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
\ No newline at end of file
diff --git a/test_retrieval.py b/test_retrieval.py
deleted file mode 100644
index b1eeac81a83c27b740190c67d874ba61ba34ffed..0000000000000000000000000000000000000000
--- a/test_retrieval.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import sys
-sys.path.insert(0, '.')
-
-from rag.curriculum_rag import retrieve_lesson_pdf_context, retrieve_curriculum_context
-
-# Test retrieval with the same params as the frontend
-try:
-    chunks, mode = retrieve_lesson_pdf_context(
-        topic="Represent real-life relationships as functions and interpret domain/range.",
-        subject="General Mathematics",
-        quarter=2,
-        lesson_title="Represent real-life relationships as functions and interpret domain/range.",
-        module_id="gen-math",
-        lesson_id="gm-q2-functions-graphs-l1",
-        competency_code="GM11-FG-1",
-        top_k=8,
-    )
-    print(f"Retrieved {len(chunks)} chunks, mode={mode}")
-    for i, chunk in enumerate(chunks[:3]):
-        print(f"  Chunk {i}: score={chunk.get('score')}, domain={chunk.get('content_domain')}, source={chunk.get('source_file')}")
-        print(f"    Content: {chunk.get('content', '')[:100]}...")
-except Exception as e:
-    print(f"ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
-
-# Also test without module/lesson filters
-try:
-    chunks2 = retrieve_curriculum_context(
-        query="Represent real-life relationships as functions and interpret domain/range.",
-        subject="General Mathematics",
-        quarter=2,
-        top_k=8,
-    )
-    print(f"\nGeneral retrieval: {len(chunks2)} chunks")
-except Exception as e:
-    print(f"\nGeneral ERROR: {type(e).__name__}: {e}")
-    import traceback
-    traceback.print_exc()
\ No newline at end of file
diff --git a/tests/README.md b/tests/README.md
deleted file mode 100644
index 10d8ee2ba2c65c88e18f58f77c1ca6ece9e5ff34..0000000000000000000000000000000000000000
--- a/tests/README.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# Backend Tests Safe Runner
-
-## Test Pollution Issue
-The test suite has pollution when run in default pytest order. Tests pass in isolation or in specific groupings.
-
-## Running Tests Safely
-
-### Option 1: Run core API tests only (137 tests, all green)
-```bash
-cd backend
-python -m pytest tests/test_api.py tests/test_rag_pipeline.py tests/test_quiz_battle.py tests/test_model_profiles.py -v
-```
-
-### Option 2: Run key test files in correct order
-```bash
-python -m pytest tests/ -v --ignore=tests/test_video_routes.py --ignore=tests/test_admin_model_routes.py --ignore=tests/test_hf_monitoring_routes.py
-```
-
-### Option 3: Individual test files (all green individually)
-```bash
-# Each passes individually
-python -m pytest tests/test_api.py -v  # 90 passed
-python -m pytest tests/test_rag_pipeline.py -v  # 13 passed
-python -m pytest tests/test_quiz_battle.py -v  # 19 passed
-python -m pytest tests/test_model_profiles.py -v  # 15 passed
-python -m pytest tests/test_video_routes.py -v  # 11 passed
-python -m pytest tests/test_admin_model_routes.py -v  # 19 passed
-python -m pytest tests/test_hf_monitoring_routes.py -v  # 8 passed
-```
-
-## Root Cause
-- Different test files set different auth roles at module level
-- `test_api.py`: teacher role
-- `test_video_routes.py`: was student, now teacher but client still uses admin token
-- `test_admin_model_routes.py`: was admin, now teacher but test setup differs
-- `test_hf_monitoring_routes.py`: was admin, tests need admin via separate client
-
-## Fix Attempts
-1. conftest.py - doesn't work (MagicMock doesn't reset properly with @patch)
-2. Using pytest fixtures - doesn't work (@patch doesn't override MagicMock)
-3. Changing module-level auth - causes different tests to fail
-
-## Status
-- 177/180 tests pass when run in safe combinations
-- 3 tests fail only when test_video_routes runs before test_api in default order
-- Tests pass individually or in safe groupings
\ No newline at end of file
diff --git a/tests/test_admin_model_routes.py b/tests/test_admin_model_routes.py
deleted file mode 100644
index 81a6d842d6dbd6f4f7d7c6f3925c5708b5a37bf9..0000000000000000000000000000000000000000
--- a/tests/test_admin_model_routes.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-Route-level tests for the /api/admin/model-config endpoints.
-
-Follows the auth mock pattern from test_api.py.
-"""
-
-import os
-from unittest.mock import MagicMock, patch
-
-import pytest
-from fastapi.testclient import TestClient
-
-import main as main_module
-from main import app
-from services.inference_client import reset_runtime_overrides
-
-main_module._firebase_ready = True
-main_module._init_firebase_admin = lambda: None
-main_module.firebase_firestore = None
-main_module.firebase_auth = MagicMock()
-main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-    "uid": "test-teacher-uid",
-    "email": "teacher@example.com",
-    "role": "teacher",
-})
-
-admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
-
-_RESOLVED_KEYS = {
-    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-}
-_KNOWN_PROFILES = {"dev", "budget", "prod"}
-_BASE_CONFIG_KEYS = {"profile", "overrides", "resolved"}
-
-
-@pytest.fixture(autouse=True)
-def _mock_firestore():
-    with patch("services.inference_client._save_runtime_config_to_firestore", side_effect=None):
-        yield
-
-
-@pytest.fixture(autouse=True)
-def _reset_overrides():
-    reset_runtime_overrides()
-    yield
-    reset_runtime_overrides()
-
-
-# ─── Auth Enforcement ────────────────────────────────────────
-
-
-class TestAuth:
-    def test_get_rejects_bad_token(self):
-        main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
-        c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
-        response = c.get("/api/admin/model-config")
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
-        })
-        assert response.status_code in {401, 403}
-
-    def test_get_rejects_student_role(self):
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "student-uid", "email": "s@example.com", "role": "student",
-        })
-        c = TestClient(app, headers={"Authorization": "Bearer student-token"})
-        response = c.get("/api/admin/model-config")
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
-        })
-        assert response.status_code == 403
-
-
-# ─── GET Model Config ─────────────────────────────────────────
-
-
-class TestGetModelConfig:
-    def test_returns_base_keys(self):
-        response = admin_client.get("/api/admin/model-config")
-        assert response.status_code == 200
-        data = response.json()
-        for key in _BASE_CONFIG_KEYS:
-            assert key in data
-
-    def test_resolved_contains_expected_keys(self):
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        resolved = data.get("resolved", {})
-        for key in _RESOLVED_KEYS:
-            assert key in resolved
-
-    def test_available_profiles_present(self):
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        profiles = data.get("availableProfiles", [])
-        for p in _KNOWN_PROFILES:
-            assert p in profiles
-
-    def test_profile_descriptions_present(self):
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        descriptions = data.get("profileDescriptions", {})
-        for p in _KNOWN_PROFILES:
-            assert p in descriptions
-
-    def test_resolved_models_are_non_empty_strings(self):
-        admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        resolved = data.get("resolved", {})
-        for key, value in resolved.items():
-            assert isinstance(value, str), f"{key} is not a string: {value}"
-            assert len(value) > 0, f"Resolved key {key} is empty"
-
-
-# ─── POST Profile Switch ─────────────────────────────────────
-
-
-class TestPostProfileSwitch:
-    def test_switch_to_dev_succeeds(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
-        assert response.status_code == 200
-        assert response.json()["success"] is True
-
-    def test_switch_to_budget_succeeds(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-        assert data["applied"]["profile"] == "budget"
-
-    def test_switch_to_prod_succeeds(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "prod"})
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-        assert data["applied"]["profile"] == "prod"
-
-    def test_switch_to_invalid_profile_returns_400(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "nonexistent"})
-        assert response.status_code == 400
-
-    def test_switch_missing_profile_field(self):
-        response = admin_client.post("/api/admin/model-config/profile", json={})
-        assert response.status_code == 422
-
-
-# ─── POST Override ───────────────────────────────────────────
-
-
-class TestPostOverride:
-    def test_set_valid_override_key_succeeds(self):
-        response = admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "INFERENCE_MODEL_ID", "value": "test/override-model"},
-        )
-        assert response.status_code == 200
-        assert response.json()["success"] is True
-
-    def test_set_invalid_override_key_returns_400(self):
-        response = admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "EMBEDDING_MODEL", "value": "test/emb"},
-        )
-        assert response.status_code == 400
-
-    def test_override_is_visible_in_subsequent_get(self):
-        admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "INFERENCE_MODEL_ID", "value": "custom/model-v2"},
-        )
-        response = admin_client.get("/api/admin/model-config")
-        data = response.json()
-        overrides = data.get("overrides", {})
-        assert "INFERENCE_MODEL_ID" in overrides
-        assert overrides["INFERENCE_MODEL_ID"] == "custom/model-v2"
-
-
-# ─── DELETE Reset ───────────────────────────────────────────
-
-
-class TestDeleteReset:
-    def test_reset_returns_success(self):
-        response = admin_client.delete("/api/admin/model-config/reset")
-        assert response.status_code == 200
-        assert response.json()["success"] is True
-
-    def test_reset_clears_override(self):
-        admin_client.post(
-            "/api/admin/model-config/override",
-            json={"key": "INFERENCE_MODEL_ID", "value": "temp/model"},
-        )
-        response = admin_client.delete("/api/admin/model-config/reset")
-        assert response.status_code == 200
-        overrides = response.json()["current"]["overrides"]
-        assert overrides == {}
-
-    def test_reset_clears_profile(self):
-        admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
-        response = admin_client.delete("/api/admin/model-config/reset")
-        assert response.status_code == 200
-        assert response.json()["current"]["profile"] == ""
-
-
-# ─── Profile after switch ────────────────────────────────────
-
-
-class TestProfileAfterSwitch:
-    def test_switched_profile_visible_in_get(self):
-        admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
-        response = admin_client.get("/api/admin/model-config")
-        assert response.json()["profile"] == "dev"
\ No newline at end of file
diff --git a/tests/test_api.py b/tests/test_api.py
index 1917e8b81c687214eecaf406c0cfe3b9c2a66900..62bfa9aa8cf60a7dd3388bc9a4887191f39435f7 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -4,7 +4,8 @@ Comprehensive tests for all FastAPI endpoints.
 
 Tests cover:
   - Successful requests with valid data
-  - AI inference API failures (502 fallback)
+  - Input validation errors (422)
+  - HuggingFace API failures (502 fallback)
   - Timeout handling
   - Malformed response data
   - Error status-code mapping
@@ -84,9 +85,8 @@ mock_ae.ContentUpdatePayload = _ContentUpdatePayload
 mock_ae.AutomationResult = _AutomationResult
 sys.modules["automation_engine"] = mock_ae
 
-# Override tokens so client init doesn't fail
+# Override HF_TOKEN so client init doesn't fail
 os.environ["HF_TOKEN"] = "test-token-for-testing"
-os.environ["DEEPSEEK_API_KEY"] = "test-ds-key-for-testing"
 
 # analytics.py is importable directly (its heavy deps are guarded)
 import main as main_module  # noqa: E402
@@ -97,7 +97,8 @@ app = main_module.app
 main_module._firebase_ready = True
 main_module._init_firebase_admin = lambda: None
 main_module.firebase_firestore = None
-main_module.firebase_auth = MagicMock()
+if getattr(main_module, "firebase_auth", None) is None:
+    main_module.firebase_auth = MagicMock()
 main_module.firebase_auth.verify_id_token = MagicMock(
     return_value={
         "uid": "test-teacher-uid",
@@ -112,22 +113,33 @@ client = TestClient(app, headers={"Authorization": "Bearer test-auth-token"})
 # ─── Fixtures ──────────────────────────────────────────────────
 
 
-def make_deepseek_risk_mock(
-    risk_label: str = "low risk academically stable",
-    confidence: float = 0.85,
+class FakeClassificationElement:
+    """Mimics huggingface_hub ZeroShotClassificationOutputElement."""
+
+    def __init__(self, label: str, score: float):
+        self.label = label
+        self.score = score
+
+
+def make_zsc_client(
+    classification: list | None = None,
 ):
-    """Create a mock DeepSeek client for risk prediction tests."""
-    mock_ds = MagicMock()
-    mock_choice = MagicMock()
-    mock_choice.message.content = json.dumps({
-        "risk_label": risk_label,
-        "confidence": confidence,
-        "reasoning": "Mock risk assessment."
-    })
-    mock_ds.chat.completions.create.return_value = MagicMock(
-        choices=[mock_choice]
-    )
-    return mock_ds
+    """Create a mock InferenceClient with predictable zero-shot outputs.
+
+    Used only for risk-prediction tests (the only endpoint still using
+    ``get_client()`` / ``InferenceClient``).
+    """
+    mock_client = MagicMock()
+
+    if classification is None:
+        classification = [
+            FakeClassificationElement("low risk academically stable", 0.85),
+            FakeClassificationElement("medium academic risk", 0.10),
+            FakeClassificationElement("high risk of failing", 0.05),
+        ]
+    mock_client.zero_shot_classification.return_value = classification
+
+    return mock_client
 
 
 # ─── Health & Root ─────────────────────────────────────────────
@@ -509,36 +521,43 @@ class TestChatEndpoint:
         mock_stream_async.assert_not_called()
 
 
-class TestChatTransport:
-    @patch("services.ai_client.get_deepseek_client")
-    def test_call_hf_chat_uses_deepseek_api(self, mock_ds_fn):
-        mock_ds = MagicMock()
-        mock_choice = MagicMock()
-        mock_choice.message.content = "x = 2 or x = 3"
-        mock_ds.chat.completions.create.return_value = MagicMock(
-            choices=[mock_choice]
+class TestHFChatTransport:
+    @patch("main.http_requests.post")
+    def test_call_hf_chat_uses_router_chat_completions(self, mock_post):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [
+                {"message": {"content": "x = 2 or x = 3"}}
+            ]
+        }
+        mock_post.return_value = mock_response
+
+        result = main_module.call_hf_chat(
+            [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0"}],
+            max_tokens=256,
+            temperature=0.2,
+            top_p=0.9,
         )
-        mock_ds_fn.return_value = mock_ds
-
-        with patch.object(main_module, "get_inference_client") as mock_get_ic:
-            ic = MagicMock()
-            ic.generate_from_messages.return_value = "x = 2 or x = 3"
-            mock_get_ic.return_value = ic
-
-            result = main_module.call_hf_chat(
-                [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0"}],
-                max_tokens=256,
-                temperature=0.2,
-                top_p=0.9,
-            )
 
         assert result
+        call_args = mock_post.call_args
+        endpoint = call_args.args[0]
+        payload = call_args.kwargs["json"]
+
+        assert endpoint == "https://router.huggingface.co/v1/chat/completions"
+        assert isinstance(payload["model"], str)
+        assert payload["model"]
+        assert payload["stream"] is False
+        assert isinstance(payload["messages"], list)
 
 
 class TestInferenceRouting:
     def test_chat_strict_model_lock_keeps_single_model_chain(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_MODEL_ID", "meta-llama/Meta-Llama-3-70B-Instruct")
 
         client = InferenceClient()
         req = InferenceRequest(
@@ -549,15 +568,15 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
 
-        assert selected_model == "deepseek-chat"
+        assert selected_model == "Qwen/Qwen2.5-7B-Instruct"
         assert "chat_strict_model_only" in source
-        assert model_chain == ["deepseek-chat"]
+        assert model_chain == ["Qwen/Qwen2.5-7B-Instruct"]
 
-    def test_chat_env_override_wins_under_model_lock(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
+    def test_chat_env_override_wins_under_qwen_only_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen3-32B")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
-        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
-        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
 
         client = InferenceClient()
         req = InferenceRequest(
@@ -568,16 +587,16 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
 
-        assert selected_model == "deepseek-chat"
+        assert selected_model == "Qwen/Qwen3-32B"
         assert "chat_override_env" in source
-        assert model_chain == ["deepseek-chat"]
+        assert model_chain == ["Qwen/Qwen3-32B"]
 
-    def test_chat_temp_override_wins_under_model_lock(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-reasoner")
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
+    def test_chat_temp_override_wins_under_qwen_only_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "Qwen/Qwen3-32B")
         monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
-        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
-        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
 
         client = InferenceClient()
         req = InferenceRequest(
@@ -588,14 +607,14 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("chat", selected_model)
 
-        assert selected_model == "deepseek-chat"
+        assert selected_model == "Qwen/Qwen3-32B"
         assert "chat_temp_override_env" in source
-        assert model_chain == ["deepseek-chat"]
+        assert model_chain == ["Qwen/Qwen3-32B"]
 
-    def test_chat_temp_override_does_not_change_non_chat_task_under_lock(self, monkeypatch):
-        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
-        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
-        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
+    def test_chat_temp_override_does_not_change_non_chat_task_under_qwen_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "Qwen/Qwen3-32B")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
 
         client = InferenceClient()
         req = InferenceRequest(
@@ -606,18 +625,114 @@ class TestInferenceRouting:
         selected_model, source = client._resolve_primary_model(req)
         model_chain = client._model_chain_for_task("verify_solution", selected_model)
 
-        assert selected_model == "deepseek-reasoner"
+        assert selected_model == "Qwen/Qwen2.5-7B-Instruct"
         assert "chat_temp_override_env" not in source
-        assert model_chain == ["deepseek-reasoner"]
+        assert model_chain == ["Qwen/Qwen2.5-7B-Instruct"]
+
+    def test_chat_escalation_when_strict_lock_disabled(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "false")
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "false")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_MODEL_ID", "meta-llama/Meta-Llama-3-70B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_PROMPT_CHARS", "256")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_HISTORY_CHARS", "256")
+
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Show all steps and prove the result rigorously."}],
+            task_type="chat",
+        )
+
+        selected_model, source = client._resolve_primary_model(req)
+
+        assert selected_model == "meta-llama/Meta-Llama-3-70B-Instruct"
+        assert source.startswith("chat_hard_escalation:")
+
+    def test_async_chat_posts_only_qwen_when_strict_enabled(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "true")
+        monkeypatch.setenv("INFERENCE_HF_TIMEOUT_SEC", "15")
+
+        routing_client = InferenceClient()
+        requests_seen: List[Dict[str, Any]] = []
+
+        class FakeAsyncResponse:
+            def __init__(self, status_code: int, payload: Dict[str, Any]):
+                self.status_code = status_code
+                self._payload = payload
+                self.text = json.dumps(payload)
+
+            def json(self) -> Dict[str, Any]:
+                return self._payload
+
+        class FakeAsyncHttpClient:
+            async def post(self, _url, *, headers=None, json=None, timeout=None):
+                requests_seen.append({
+                    "headers": headers,
+                    "payload": json,
+                    "timeout": timeout,
+                })
+                return FakeAsyncResponse(
+                    200,
+                    {"choices": [{"message": {"content": "Final answer: 42"}}]},
+                )
+
+        async def _run() -> str:
+            real_getenv = os.getenv
+
+            def _patched_getenv(key: str, default=None):
+                if key == "PYTEST_CURRENT_TEST":
+                    return ""
+                return real_getenv(key, default)
+
+            with patch.object(main_module, "get_inference_client", return_value=routing_client), patch.object(
+                main_module,
+                "_get_hf_async_http_client",
+                new=AsyncMock(return_value=FakeAsyncHttpClient()),
+            ), patch.object(main_module.os, "getenv", side_effect=_patched_getenv):
+                return await main_module.call_hf_chat_async(
+                    [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0."}],
+                    task_type="chat",
+                )
+
+        result = asyncio.run(_run())
+
+        assert "42" in result
+        assert len(requests_seen) == 1
+        sent_model = requests_seen[0]["payload"]["model"]
+        assert sent_model.startswith("Qwen/Qwen2.5-7B-Instruct")
+        assert "Meta-Llama" not in sent_model
+        assert "gemma" not in sent_model.lower()
+
+    def test_qwen_only_lock_replaces_explicit_non_qwen_model(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_ENFORCE_QWEN_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_QWEN_LOCK_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Solve this quickly."}],
+            model="meta-llama/Meta-Llama-3-70B-Instruct",
+            task_type="verify_solution",
+        )
+
+        selected_model, source = client._resolve_primary_model(req)
+        model_chain = client._model_chain_for_task("verify_solution", selected_model)
+
+        assert selected_model == "Qwen/Qwen2.5-7B-Instruct"
+        assert "qwen_only" in source
+        assert model_chain == ["Qwen/Qwen2.5-7B-Instruct"]
 
 
 # ─── Risk Prediction ──────────────────────────────────────────
 
 
 class TestRiskPrediction:
-    @patch("main.get_deepseek_client")
-    def test_predict_risk_success(self, mock_ds_fn):
-        mock_ds_fn.return_value = make_deepseek_risk_mock()
+    @patch("main.get_client")
+    def test_predict_risk_success(self, mock_get):
+        mock_get.return_value = make_zsc_client()
         response = client.post("/api/predict-risk", json={
             "engagementScore": 80,
             "avgQuizScore": 75,
@@ -631,7 +746,7 @@ class TestRiskPrediction:
 
     def test_predict_risk_invalid_score_range(self):
         response = client.post("/api/predict-risk", json={
-            "engagementScore": 150,
+            "engagementScore": 150,  # > 100
             "avgQuizScore": 75,
             "attendance": 90,
             "assignmentCompletion": 85,
@@ -653,11 +768,11 @@ class TestRiskPrediction:
         })
         assert response.status_code == 422
 
-    @patch("main.get_deepseek_client")
-    def test_predict_risk_ai_failure(self, mock_ds_fn):
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = Exception("AI down")
-        mock_ds_fn.return_value = mock_client
+    @patch("main.get_client")
+    def test_predict_risk_hf_failure(self, mock_get):
+        hf = make_zsc_client()
+        hf.zero_shot_classification.side_effect = Exception("HF down")
+        mock_get.return_value = hf
         response = client.post("/api/predict-risk", json={
             "engagementScore": 80,
             "avgQuizScore": 75,
@@ -666,9 +781,9 @@ class TestRiskPrediction:
         })
         assert response.status_code == 502
 
-    @patch("main.get_deepseek_client")
-    def test_batch_risk_prediction(self, mock_ds_fn):
-        mock_ds_fn.return_value = make_deepseek_risk_mock()
+    @patch("main.get_client")
+    def test_batch_risk_prediction(self, mock_get):
+        mock_get.return_value = make_zsc_client()
         response = client.post("/api/predict-risk/batch", json={
             "students": [
                 {"engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "assignmentCompletion": 85},
@@ -706,8 +821,8 @@ class TestLearningPath:
         assert response.status_code == 422
 
     @patch("main.call_hf_chat")
-    def test_learning_path_ai_failure(self, mock_chat):
-        mock_chat.side_effect = Exception("AI service down")
+    def test_learning_path_hf_failure(self, mock_chat):
+        mock_chat.side_effect = Exception("HF down")
         response = client.post("/api/learning-path", json={
             "weaknesses": ["algebra"],
             "gradeLevel": "Grade 11",
@@ -1065,14 +1180,6 @@ class TestUploadClassRecordsGuardrails:
 
 class TestImportedOverviewAndTopicMastery:
     def test_imported_class_overview_returns_inferred_state_for_realistic_minimal_records(self):
-        # Ensure teacher role matches mock data
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "test-teacher-uid",
-                "email": "teacher@example.com",
-                "role": "teacher",
-            }
-        )
         firestore = _FakeFirestoreModule(
             {
                 "normalizedClassRecords": [
@@ -1221,24 +1328,15 @@ class TestAsyncGenerationTasks:
         assert cancel_payload["status"] in {"cancelled", "cancelling"}
 
     def test_inference_metrics_requires_admin(self):
-        # Test with a non-admin mock to verify role check works
-        with patch.object(main_module.firebase_auth, "verify_id_token", return_value={
-            "uid": "teacher-uid",
-            "email": "teacher@example.com",
-            "role": "teacher",
-        }):
-            response = client.get("/api/ops/inference-metrics")
-            assert response.status_code == 403
-
-    def test_inference_metrics_admin_success(self):
-        # Set admin role directly to ensure it persists
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "admin-uid",
-                "email": "admin@example.com",
-                "role": "admin",
-            }
-        )
+        response = client.get("/api/ops/inference-metrics")
+        assert response.status_code == 403
+
+    @patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+        "uid": "admin-uid",
+        "email": "admin@example.com",
+        "role": "admin",
+    })
+    def test_inference_metrics_admin_success(self, _mock_verify):
         response = client.get("/api/ops/inference-metrics")
         assert response.status_code == 200
         payload = response.json()
@@ -1468,14 +1566,6 @@ class _FakeFirestoreModule:
 
 class TestRecentCourseMaterials:
     def test_recent_course_materials_respects_class_section_filter(self):
-        # Ensure teacher role matches mock data
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "test-teacher-uid",
-                "email": "teacher@example.com",
-                "role": "teacher",
-            }
-        )
         now = int(time.time())
         firestore = _FakeFirestoreModule(
             {
@@ -1518,14 +1608,6 @@ class TestRecentCourseMaterials:
         assert all(item["classSectionId"] == "grade11_a" for item in data["materials"])
 
     def test_recent_course_materials_reports_retention_exclusions(self):
-        # Ensure teacher role matches mock data
-        main_module.firebase_auth.verify_id_token = MagicMock(
-            return_value={
-                "uid": "test-teacher-uid",
-                "email": "teacher@example.com",
-                "role": "teacher",
-            }
-        )
         now = int(time.time())
         firestore = _FakeFirestoreModule(
             {
diff --git a/tests/test_hf_monitoring_routes.py b/tests/test_hf_monitoring_routes.py
deleted file mode 100644
index 1120a9aa7e073b5efb10cda32184b8586d842732..0000000000000000000000000000000000000000
--- a/tests/test_hf_monitoring_routes.py
+++ /dev/null
@@ -1,148 +0,0 @@
-"""
-Route-level tests for /api/hf/monitoring endpoint.
-Updated for DeepSeek AI monitoring.
-"""
-
-import os
-from unittest.mock import MagicMock, Mock, patch
-
-import pytest
-from fastapi.testclient import TestClient
-
-import main as main_module
-from main import app
-
-main_module._firebase_ready = True
-main_module._init_firebase_admin = lambda: None
-main_module.firebase_firestore = None
-if getattr(main_module, "firebase_auth", None) is None:
-    main_module.firebase_auth = MagicMock()
-main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-    "uid": "test-teacher-uid",
-    "email": "teacher@example.com", 
-    "role": "teacher",
-})
-
-admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
-
-EXPECTED_MONITORING_FIELDS = {
-    "modelId", "modelStatus", "avgResponseTimeMs",
-    "embeddingModelId", "embeddingModelStatus",
-    "inferenceBalance", "totalPeriodCost",
-    "hubApiCallsUsed", "hubApiCallsLimit",
-    "zeroGpuMinutesUsed", "zeroGpuMinutesLimit",
-    "publicStorageUsedTB", "publicStorageLimitTB",
-    "lastChecked", "periodStart", "periodEnd",
-    "activeProfile", "runtimeOverridesActive", "resolvedModels",
-    "provider", "apiBaseUrl",
-}
-
-EXPECTED_FIELDS_AFTER_DS_REPLACEMENT = EXPECTED_MONITORING_FIELDS
-
-
-@pytest.fixture(autouse=True)
-def _mock_env():
-    with patch.dict(os.environ, {"DEEPSEEK_API_KEY": "test-ds-monitoring-key"}):
-        yield
-
-
-# ─── Auth Enforcement ────────────────────────────────────────
-
-
-class TestMonitoringAuth:
-    def test_rejects_bad_token(self):
-        main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
-        c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
-        response = c.get("/api/hf/monitoring")
-        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
-            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
-        })
-        assert response.status_code in {401, 403}
-
-
-# ─── Response Shape ───────────────────────────────────────────
-
-
-class TestMonitoringResponseShape:
-    @patch("main.time.time")
-    def test_success_response_contains_all_expected_fields(self, mock_time):
-        mock_time.return_value = 1000.0
-
-        response = admin_client.get("/api/hf/monitoring")
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-        payload = data["data"]
-        for field in EXPECTED_FIELDS_AFTER_DS_REPLACEMENT:
-            assert field in payload, f"Missing field: {field}"
-
-    @patch("main.time.time")
-    @patch("services.ai_client.get_deepseek_client")
-    def test_all_probes_fail_gracefully(self, mock_ds_client_fn, mock_time):
-        mock_time.return_value = 1000.0
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = Exception("network down")
-        mock_ds_client_fn.return_value = mock_client
-
-        response = admin_client.get("/api/hf/monitoring")
-        assert response.status_code == 200
-        data = response.json()
-        assert data["success"] is True
-
-
-# ─── Response Values ──────────────────────────────────────────
-
-
-class TestMonitoringResponseValues:
-    @patch("services.ai_client.get_deepseek_client")
-    @patch("main.time.time")
-    def test_model_status_is_degraded_when_probe_fails(self, mock_time, mock_ds_client_fn):
-        mock_time.return_value = 1000.0
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.side_effect = Exception("probe down")
-        mock_ds_client_fn.return_value = mock_client
-
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert data["data"]["modelStatus"] == "Degraded"
-
-    @patch("main.time.time")
-    def test_embedding_model_id_is_returned(self, mock_time):
-        mock_time.return_value = 1000.0
-
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert "bge-small" in data["data"]["embeddingModelId"].lower()
-
-    @patch("main.time.time")
-    def test_resolved_models_contains_task_keys(self, mock_time):
-        mock_time.return_value = 1000.0
-
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        resolved = data["data"].get("resolvedModels", {})
-        expected_tasks = {"chat", "rag_lesson", "rag_problem", "quiz_generation"}
-        for task in expected_tasks:
-            assert task in resolved, f"Missing task: {task}"
-            assert isinstance(resolved[task], str) and len(resolved[task]) > 0
-
-    @patch("main.time.time")
-    def test_active_profile_returned(self, mock_time):
-        mock_time.return_value = 1000.0
-
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert data["data"]["activeProfile"] in {"dev", "budget", "prod", ""}
-
-    @patch("main.time.time")
-    def test_provider_and_api_base_url_present(self, mock_time):
-        mock_time.return_value = 1000.0
-
-        response = admin_client.get("/api/hf/monitoring")
-        data = response.json()
-        assert data["success"] is True
-        assert data["data"]["provider"] == "deepseek"
-        assert "api.deepseek.com" in data["data"]["apiBaseUrl"]
diff --git a/tests/test_model_profiles.py b/tests/test_model_profiles.py
deleted file mode 100644
index f9f542a18b8b87c1bd54ce0dc45a992fb3e369f9..0000000000000000000000000000000000000000
--- a/tests/test_model_profiles.py
+++ /dev/null
@@ -1,184 +0,0 @@
-from __future__ import annotations
-
-import os
-import sys
-from unittest.mock import patch
-
-import pytest
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-from services import inference_client as inf_client
-from services.inference_client import (
-    _MODEL_PROFILES,
-    get_current_runtime_config,
-    get_model_for_task,
-    is_sequential_model,
-    model_supports_thinking,
-    reset_runtime_overrides,
-    set_runtime_model_override,
-    set_runtime_model_profile,
-)
-
-
-REQUIRED_PROFILE_KEYS = {
-    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
-    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
-}
-
-
-class TestModelProfiles:
-    def test_profiles_have_all_keys(self):
-        for name, profile in _MODEL_PROFILES.items():
-            assert REQUIRED_PROFILE_KEYS == set(profile.keys()), \
-                f"Profile '{name}' missing or extra keys"
-
-    def test_dev_uses_chat_model(self):
-        dev = _MODEL_PROFILES["dev"]
-        for key, value in dev.items():
-            assert "deepseek-chat" in value, f"dev/{key} = {value}, expected deepseek-chat"
-
-    def test_prod_chat_is_chat_model(self):
-        assert "deepseek-chat" in _MODEL_PROFILES["prod"]["INFERENCE_CHAT_MODEL_ID"]
-
-    def test_prod_rag_is_reasoner(self):
-        assert "deepseek-reasoner" in _MODEL_PROFILES["prod"]["HF_RAG_MODEL_ID"]
-
-    def test_budget_uses_chat_model_everywhere(self):
-        budget = _MODEL_PROFILES["budget"]
-        for key, value in budget.items():
-            assert "deepseek-chat" in value, f"budget/{key} = {value}"
-
-
-class TestRuntimeOverrides:
-
-    def setup_method(self):
-        reset_runtime_overrides()
-
-    def teardown_method(self):
-        reset_runtime_overrides()
-
-    def test_set_profile_populates_overrides(self):
-        set_runtime_model_profile("dev")
-        assert inf_client._RUNTIME_PROFILE == "dev"
-        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_MODEL_ID"] == "deepseek-chat"
-        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_CHAT_MODEL_ID"] == "deepseek-chat"
-
-    def test_set_profile_replaces_all_overrides(self):
-        set_runtime_model_profile("dev")
-        set_runtime_model_profile("prod")
-        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_CHAT_MODEL_ID"] == "deepseek-chat"
-        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_LOCK_MODEL_ID"] == "deepseek-chat"
-
-    def test_set_profile_unknown_raises(self):
-        with pytest.raises(ValueError, match="Unknown profile"):
-            set_runtime_model_profile("nonexistent")
-
-    def test_single_override_sets_key(self):
-        set_runtime_model_override("HF_RAG_MODEL_ID", "custom/model")
-        assert inf_client._RUNTIME_OVERRIDES["HF_RAG_MODEL_ID"] == "custom/model"
-
-    def test_reset_clears_overrides(self):
-        set_runtime_model_profile("dev")
-        reset_runtime_overrides()
-        assert inf_client._RUNTIME_PROFILE == ""
-        assert inf_client._RUNTIME_OVERRIDES == {}
-
-    def test_override_layers_on_profile(self):
-        set_runtime_model_profile("dev")
-        set_runtime_model_override("HF_RAG_MODEL_ID", "custom/model")
-        assert inf_client._RUNTIME_OVERRIDES["HF_RAG_MODEL_ID"] == "custom/model"
-        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_MODEL_ID"] == "deepseek-chat"
-
-
-class TestGetCurrentRuntimeConfig:
-
-    def setup_method(self):
-        reset_runtime_overrides()
-
-    def teardown_method(self):
-        reset_runtime_overrides()
-
-    def test_returns_resolved_dict_with_all_keys(self):
-        set_runtime_model_profile("dev")
-        config = get_current_runtime_config()
-        assert config["profile"] == "dev"
-        for key in REQUIRED_PROFILE_KEYS:
-            assert key in config["resolved"], f"Missing {key}"
-
-    def test_override_takes_priority_over_profile(self):
-        set_runtime_model_profile("dev")
-        set_runtime_model_override("INFERENCE_CHAT_MODEL_ID", "custom/chat")
-        config = get_current_runtime_config()
-        assert config["resolved"]["INFERENCE_CHAT_MODEL_ID"] == "custom/chat"
-
-
-class TestGetModelForTask:
-
-    def setup_method(self):
-        reset_runtime_overrides()
-
-    def teardown_method(self):
-        reset_runtime_overrides()
-
-    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
-    def test_returns_profile_default_for_rag(self):
-        set_runtime_model_profile("prod")
-        model = get_model_for_task("rag_lesson")
-        assert "deepseek-reasoner" in model
-
-    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
-    def test_returns_profile_default_for_chat(self):
-        set_runtime_model_profile("prod")
-        model = get_model_for_task("chat")
-        assert "deepseek-chat" in model
-
-    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
-    def test_returns_runtime_override_for_chat(self):
-        set_runtime_model_override("INFERENCE_CHAT_MODEL_ID", "custom/chat")
-        model = get_model_for_task("chat")
-        assert model == "custom/chat"
-
-    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "true"})
-    def test_enforce_qwen_overrides_task(self):
-        set_runtime_model_profile("prod")
-        model = get_model_for_task("rag_lesson")
-        assert "deepseek-chat" in model
-
-
-class TestIsSequentialModel:
-
-    def setup_method(self):
-        reset_runtime_overrides()
-
-    def teardown_method(self):
-        reset_runtime_overrides()
-
-    def test_reasoner_is_sequential(self):
-        assert is_sequential_model("deepseek-reasoner") is True
-
-    def test_chat_is_not_sequential(self):
-        assert is_sequential_model("deepseek-chat") is False
-
-    def test_empty_string_checks_env(self):
-        result = is_sequential_model("")
-        assert result is True or result is False
-
-    @patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-reasoner"})
-    def test_env_model_reasoner_is_sequential(self):
-        assert is_sequential_model("") is True
-
-    @patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-chat"})
-    def test_env_model_chat_is_not_sequential(self):
-        assert is_sequential_model("") is False
-
-
-class TestModelSupportsThinking:
-
-    def test_reasoner_supports_thinking(self):
-        assert model_supports_thinking("deepseek-reasoner") is True
-
-    def test_chat_does_not_support_thinking(self):
-        assert model_supports_thinking("deepseek-chat") is False
-
-    def test_unknown_does_not_support_thinking(self):
-        assert model_supports_thinking("meta-llama/Llama-3.1-8B-Instruct") is False
diff --git a/tests/test_quiz_battle.py b/tests/test_quiz_battle.py
deleted file mode 100644
index 402f9a4dd64590ed82092d0343ffb487a34772a6..0000000000000000000000000000000000000000
--- a/tests/test_quiz_battle.py
+++ /dev/null
@@ -1,223 +0,0 @@
-"""
-Tests for Quiz Battle RAG-powered question bank.
-"""
-
-import pytest
-from unittest.mock import patch, MagicMock, AsyncMock
-from datetime import datetime, timezone, timedelta
-
-from fastapi.testclient import TestClient
-
-# Mock firebase_admin before imports
-import sys
-from unittest.mock import MagicMock
-
-_original_firebase_admin = sys.modules.get("firebase_admin")
-
-firebase_mock = MagicMock()
-sys.modules["firebase_admin"] = firebase_mock
-sys.modules["firebase_admin.credentials"] = MagicMock()
-sys.modules["google.cloud.firestore"] = MagicMock()
-
-from main import app
-
-client = TestClient(app)
-
-
-@pytest.fixture(scope="module", autouse=True)
-def _cleanup_firebase_mock():
-    """Restore original firebase_admin module after all tests in this module."""
-    yield
-    if _original_firebase_admin is not None:
-        sys.modules["firebase_admin"] = _original_firebase_admin
-    elif "firebase_admin" in sys.modules:
-        del sys.modules["firebase_admin"]
-
-
-# ── PDF Ingestion Tests ──────────────────────────────────────────────
-
-class TestPdfIngestion:
-    @pytest.mark.asyncio
-    async def test_ingest_pdf_skips_already_processed(self):
-        """If pdf_processing_status says processed, skip re-ingestion."""
-        with patch("rag.pdf_ingestion.Client") as mock_firestore:
-            mock_doc = MagicMock()
-            mock_doc.exists = True
-            mock_doc.to_dict.return_value = {
-                "processed": True,
-                "question_count": 10,
-                "grade_level": 8,
-                "topic": "linear_equations",
-                "storage_path": "quiz_pdfs/grade_8/test.pdf",
-                "timestamp": datetime.now(timezone.utc),
-            }
-            # Make get() return an awaitable
-            async def async_get():
-                return mock_doc
-            mock_ref = MagicMock()
-            mock_ref.get = async_get
-            mock_firestore.return_value.collection.return_value.document.return_value = mock_ref
-
-            from rag.pdf_ingestion import ingest_pdf
-            result = await ingest_pdf("quiz_pdfs/grade_8/test.pdf", 8, "linear_equations")
-            assert result.processed is True
-            assert result.question_count == 10
-
-    @pytest.mark.asyncio
-    async def test_ingest_pdf_force_reingest(self):
-        """If force_reingest=True, process even if already done."""
-        with patch("rag.pdf_ingestion.Client") as mock_firestore, \
-             patch("rag.pdf_ingestion._init_firebase_storage") as mock_storage, \
-             patch("rag.pdf_ingestion._extract_pdf_text") as mock_extract, \
-             patch("rag.pdf_ingestion._chunk_text") as mock_chunk, \
-             patch("rag.pdf_ingestion._generate_questions_for_chunk") as mock_gen, \
-             patch("rag.pdf_ingestion._save_questions_batch") as mock_save, \
-             patch("rag.pdf_ingestion._save_embeddings_batch") as mock_save_emb, \
-             patch("rag.pdf_ingestion._save_processing_manifest") as mock_save_status, \
-             patch("rag.pdf_ingestion.get_deepseek_client") as mock_deepseek:
-
-            mock_doc = MagicMock()
-            mock_doc.exists = True
-            mock_doc.to_dict.return_value = {"processed": True}
-            async def async_get():
-                return mock_doc
-            mock_ref = MagicMock()
-            mock_ref.get = async_get
-            mock_firestore.return_value.collection.return_value.document.return_value = mock_ref
-            mock_blob = MagicMock()
-            mock_blob.exists.return_value = True
-            mock_blob.download_as_bytes.return_value = b"pdf bytes"
-            mock_storage.return_value = (None, MagicMock())
-            mock_storage.return_value[1].blob.return_value = mock_blob
-            mock_extract.return_value = "Some math content"
-            mock_chunk.return_value = ["chunk1"]
-            mock_gen.return_value = [{
-                "question": "What is 2+2?",
-                "choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
-                "correct_answer": "B",
-                "explanation": "Basic addition",
-                "topic": "linear_equations",
-                "difficulty": "easy",
-                "grade_level": 8,
-                "source_chunk_id": "chunk1",
-            }]
-            mock_save.return_value = 1
-            mock_deepseek.return_value = MagicMock()
-
-            from rag.pdf_ingestion import ingest_pdf
-            result = await ingest_pdf("quiz_pdfs/grade_8/test.pdf", 8, "linear_equations", force_reingest=True)
-            assert result.processed is True
-            assert result.question_count == 1
-
-
-# ── Question Bank Service Tests ──────────────────────────────────────
-
-class TestQuestionBankService:
-    @pytest.mark.asyncio
-    async def test_get_questions_for_battle(self):
-        """Fetch questions with random ordering."""
-        with patch("services.question_bank_service._get_db") as mock_db:
-            mock_doc = MagicMock()
-            mock_doc.to_dict.return_value = {
-                "question": "What is 2+2?",
-                "choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
-                "correct_answer": "B",
-                "difficulty": "easy",
-                "random_seed": 0.5,
-            }
-            mock_collection = MagicMock()
-            mock_collection.where.return_value.order_by.return_value.limit.return_value.stream.return_value = [mock_doc]
-            mock_collection.where.return_value.order_by.return_value.limit.return_value.stream.return_value = [mock_doc]
-            mock_db.return_value.collection.return_value = mock_collection
-
-            from services.question_bank_service import get_questions_for_battle
-            questions = await get_questions_for_battle(8, "linear_equations", 1)
-            assert len(questions) == 1
-            assert questions[0]["question"] == "What is 2+2?"
-
-    @pytest.mark.asyncio
-    async def test_cache_session_questions(self):
-        """Cache questions for 24 hours."""
-        with patch("services.question_bank_service._get_db") as mock_db:
-            mock_session_ref = MagicMock()
-            mock_db.return_value.collection.return_value.document.return_value = mock_session_ref
-
-            from services.question_bank_service import cache_session_questions
-            await cache_session_questions(
-                "session_123",
-                [{"question": "Q1", "correct_answer": "A"}],
-                ["uid1"],
-                8,
-                "linear_equations",
-            )
-            mock_session_ref.set.assert_called_once()
-
-
-# ── Variance Engine Tests ────────────────────────────────────────────
-
-class TestVarianceEngine:
-    @pytest.mark.asyncio
-    async def test_apply_variance_uses_cache(self):
-        """If cache exists, return cached questions."""
-        with patch("services.variance_engine.get_cached_session") as mock_cache:
-            mock_cache.return_value = [{"question": "Cached?", "correct_answer": "A"}]
-            from services.variance_engine import apply_variance
-            result = await apply_variance([], "session_123")
-            assert result[0]["question"] == "Cached?"
-
-    @pytest.mark.asyncio
-    async def test_apply_variance_fallback_shuffle(self):
-        """If DeepSeek fails, fallback to pure Python shuffle."""
-        with patch("services.variance_engine.get_cached_session") as mock_cache, \
-             patch("services.variance_engine.get_deepseek_client") as mock_client, \
-             patch("services.variance_engine.cache_session_questions") as mock_save:
-            mock_cache.return_value = None
-            mock_client.return_value.chat.completions.create.side_effect = Exception("API error")
-            mock_save.return_value = None
-
-            from services.variance_engine import apply_variance
-            questions = [{
-                "question": "What is 2+2?",
-                "choices": ["A) 3", "B) 4", "C) 5", "D) 6"],
-                "correct_answer": "B",
-                "difficulty": "easy",
-                "topic": "math",
-                "grade_level": 8,
-                "source_chunk_id": "c1",
-            }]
-            result = await apply_variance(questions, "session_123")
-            assert len(result) == 1
-            assert result[0]["variance_applied"] == ["choice_shuffle"]
-            # Correct answer should still point to the right text
-            correct_index = ord(result[0]["correct_answer"]) - ord("A")
-            assert "4" in result[0]["choices"][correct_index]
-
-
-# ── Route Integration Tests ──────────────────────────────────────────
-
-class TestQuizBattleRoutes:
-    def test_generate_unauthorized(self):
-        """Generate without auth should 401 or 403 depending on middleware."""
-        response = client.post("/api/quiz-battle/generate", json={
-            "grade_level": 8,
-            "topic": "linear_equations",
-            "question_count": 10,
-            "session_id": "test-session",
-            "player_ids": ["uid1"],
-        })
-        # Auth middleware may reject or allow in test env
-        assert response.status_code in (200, 401, 403)
-
-    def test_ingest_pdf_unauthorized(self):
-        """Ingest-pdf without teacher role should 403."""
-        response = client.post("/api/quiz-battle/ingest-pdf", json={
-            "storage_path": "quiz_pdfs/grade_8/test.pdf",
-            "grade_level": 8,
-            "topic": "linear_equations",
-        })
-        assert response.status_code in (401, 403)
-
-    def test_bank_status_unauthorized(self):
-        """Bank-status without teacher role should 403."""
-        response = client.get("/api/quiz-battle/bank-status")
-        assert response.status_code in (401, 403)
diff --git a/tests/test_rag_pipeline.py b/tests/test_rag_pipeline.py
deleted file mode 100644
index 2f106b2216d00d76c16f56056d2eb15de14770fc..0000000000000000000000000000000000000000
--- a/tests/test_rag_pipeline.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import annotations
-
-import os
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from rag.curriculum_rag import (
-    _distance_to_score,
-    build_lesson_prompt,
-    build_lesson_query,
-    retrieve_curriculum_context,
-    summarize_retrieval_confidence,
-)
-
-
-def _mock_vectorstore_components(collection_mock, embedder_mock):
-    def _factory():
-        return (MagicMock(), collection_mock, embedder_mock)
-    return _factory
-
-
-class TestRetrieveCurriculumContext:
-    def test_empty_collection_returns_empty_list(self):
-        collection = MagicMock()
-        collection.query.return_value = {
-            "documents": [[]],
-            "metadatas": [[]],
-            "distances": [[]],
-        }
-
-        embedder = MagicMock()
-        embedder.encode.return_value = MagicMock()
-        embedder.encode.return_value.tolist.return_value = [0.0] * 768
-
-        with patch(
-            "rag.vectorstore_loader.get_vectorstore_components",
-            return_value=(MagicMock(), collection, embedder),
-        ):
-            result = retrieve_curriculum_context(
-                query="test query",
-                subject="General Mathematics",
-                top_k=5,
-            )
-            assert result == []
-
-
-class TestDistanceToScore:
-    def test_zero_distance_returns_one(self):
-        assert _distance_to_score(0.0) == 1.0
-
-    def test_never_returns_zero_or_negative(self):
-        scores = [_distance_to_score(d) for d in [0.0, 0.5, 1.0, 5.0, 100.0]]
-        for s in scores:
-            assert s > 0.0
-            assert s <= 1.0
-
-
-class TestBuildLessonPrompt:
-    def test_contains_json_and_required_keys(self):
-        prompt = build_lesson_prompt(
-            lesson_title="Compound Interest",
-            competency="M11GM-IIc-1",
-            grade_level="Grade 11-12",
-            subject="General Mathematics",
-            quarter=3,
-            learner_level="mixed",
-            module_unit="Business Math",
-            curriculum_chunks=[
-                {
-                    "content": "Compound interest formula A=P(1+r/n)^(nt)",
-                    "source_file": "sample_curriculum.json",
-                    "page": 5,
-                    "content_domain": "Business Mathematics",
-                    "chunk_type": "content_explanation",
-                    "score": 0.85,
-                }
-            ],
-        )
-        assert "JSON" in prompt
-        assert "Lesson title:" in prompt
-        assert "needsReview" in prompt
-        assert "DepEd-aligned" in prompt
-        assert "7 sections" in prompt
-
-    def test_contains_required_sections_in_prompt(self):
-        prompt = build_lesson_prompt(
-            lesson_title="Functions",
-            competency="M11GM-Ia-1",
-            grade_level="Grade 11-12",
-            subject="General Mathematics",
-            quarter=1,
-            learner_level=None,
-            module_unit=None,
-            curriculum_chunks=[],
-        )
-        assert "introduction" in prompt
-        assert "key_concepts" in prompt
-        assert "worked_examples" in prompt
-        assert "try_it_yourself" in prompt
-
-
-class TestSummarizeRetrievalConfidence:
-    def test_empty_chunks_returns_low(self):
-        result = summarize_retrieval_confidence([])
-        assert result["band"] == "low"
-        assert result["confidence"] == 0.0
-
-    def test_high_confidence(self):
-        chunks = [{"score": 0.85}, {"score": 0.80}, {"score": 0.75}]
-        result = summarize_retrieval_confidence(chunks)
-        assert result["band"] == "high"
-
-    def test_medium_confidence(self):
-        chunks = [{"score": 0.65}, {"score": 0.60}]
-        result = summarize_retrieval_confidence(chunks)
-        assert result["band"] == "medium"
-
-    def test_low_confidence(self):
-        chunks = [{"score": 0.35}, {"score": 0.30}]
-        result = summarize_retrieval_confidence(chunks)
-        assert result["band"] == "low"
-
-    def test_chunk_count_included(self):
-        chunks = [{"score": 0.8}, {"score": 0.7}, {"score": 0.6}]
-        result = summarize_retrieval_confidence(chunks)
-        assert result["chunkCount"] == 3
-
-
-class TestBuildLessonQuery:
-    def test_includes_all_fields(self):
-        query = build_lesson_query(
-            "Compound Interest",
-            "General Mathematics",
-            3,
-            lesson_title="Compound Interest Basics",
-            competency="M11GM-IIc-1",
-            module_unit="Business Math",
-            learner_level="mixed",
-        )
-        assert "Compound Interest" in query
-        assert "General Mathematics" in query
-        assert "Quarter 3" in query
-        assert "Compound Interest Basics" in query
-
-
-class TestIsSequentialModel:
-    def test_sequential_for_reasoner(self):
-        with patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-reasoner"}):
-            from services.inference_client import is_sequential_model
-            assert is_sequential_model() is True
-
-    def test_not_sequential_for_chat(self):
-        with patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-chat"}):
-            from services.inference_client import is_sequential_model
-            assert is_sequential_model() is False
\ No newline at end of file
diff --git a/tests/test_rate_limiter.py b/tests/test_rate_limiter.py
deleted file mode 100644
index 02b90bc5c102e493e3656e0474f81e5ddaa73c6c..0000000000000000000000000000000000000000
--- a/tests/test_rate_limiter.py
+++ /dev/null
@@ -1,343 +0,0 @@
-"""
-backend/tests/test_rate_limiter.py
-Tests for rate limiting middleware.
-
-Tests cover:
-  - Normal requests pass through
-  - Rate limits trigger 429 when exceeded
-  - Admin users bypass standard limits (10x multiplier)
-  - Teacher users get 3x multiplier
-  - Student users get standard limits
-  - Deprecated enforce_rate_limit function does nothing
-
-Run with:  pytest backend/tests/test_rate_limiter.py -v
-"""
-
-import os
-import sys
-from unittest.mock import MagicMock
-
-import pytest
-from fastapi import FastAPI, Request
-
-# Add backend directory to path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-
-class TestRateLimiterKeyFunctions:
-    """Test the key functions used for rate limiting."""
-
-    def test_get_user_identifier_with_authenticated_user(self):
-        """Test that UID is extracted from request.state.user."""
-        from middleware.rate_limiter import _get_user_identifier
-
-        # Create mock request with authenticated user
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.uid = "test-uid-123"
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-        mock_request.client.host = "127.0.0.1"
-
-        result = _get_user_identifier(mock_request)
-
-        assert result == "uid:test-uid-123"
-
-    def test_get_user_identifier_without_auth(self):
-        """Test fallback to IP when no authenticated user."""
-        from middleware.rate_limiter import _get_user_identifier
-
-        mock_request = MagicMock(spec=Request)
-        mock_request.state.user = None
-        mock_request.client.host = "192.168.1.1"
-
-        result = _get_user_identifier(mock_request)
-
-        assert result == "ip:192.168.1.1"
-
-    def test_get_user_identifier_no_client(self):
-        """Test fallback when no client available."""
-        from middleware.rate_limiter import _get_user_identifier
-
-        mock_request = MagicMock(spec=Request)
-        mock_request.state.user = None
-        mock_request.client = None
-
-        result = _get_user_identifier(mock_request)
-
-        assert result == "ip:unknown"
-
-    def test_get_user_role(self):
-        """Test role extraction from request.state.user."""
-        from middleware.rate_limiter import _get_user_role
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "teacher"
-        mock_request.state.user = mock_user
-
-        result = _get_user_role(mock_request)
-
-        assert result == "teacher"
-
-    def test_get_user_role_no_user(self):
-        """Test default role when no user."""
-        from middleware.rate_limiter import _get_user_role
-
-        mock_request = MagicMock(spec=Request)
-        mock_request.state.user = None
-
-        result = _get_user_role(mock_request)
-
-        assert result == "student"
-
-    def test_role_multiplier_admin(self):
-        """Test admin gets 10x multiplier."""
-        from middleware.rate_limiter import ROLE_MULTIPLIERS
-
-        assert ROLE_MULTIPLIERS["admin"] == 10
-
-    def test_role_multiplier_teacher(self):
-        """Test teacher gets 3x multiplier."""
-        from middleware.rate_limiter import ROLE_MULTIPLIERS
-
-        assert ROLE_MULTIPLIERS["teacher"] == 3
-
-    def test_role_multiplier_student(self):
-        """Test student gets 1x multiplier."""
-        from middleware.rate_limiter import ROLE_MULTIPLIERS
-
-        assert ROLE_MULTIPLIERS["student"] == 1
-
-
-class TestRateLimiterClass:
-    """Test the MathPulseLimiter class."""
-
-    def test_limiter_initialized(self):
-        """Test limiter is initialized with default limits."""
-        from middleware.rate_limiter import rate_limiter
-
-        assert rate_limiter is not None
-        assert rate_limiter.limiter is not None
-
-    def test_ai_limit_student(self):
-        """Test AI limit for student is base rate (20/min)."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.ai_limit(mock_request)
-
-        assert result == "20/minute"
-
-    def test_ai_limit_teacher(self):
-        """Test AI limit for teacher is 3x (60/min)."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "teacher"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.ai_limit(mock_request)
-
-        assert result == "60/minute"
-
-    def test_ai_limit_admin(self):
-        """Test AI limit for admin is 10x (200/min)."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "admin"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.ai_limit(mock_request)
-
-        assert result == "200/minute"
-
-    def test_quiz_generate_limit(self):
-        """Test quiz generation limit."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.quiz_generate_limit(mock_request)
-
-        assert result == "10/minute"
-
-    def test_quiz_submit_limit(self):
-        """Test quiz submit limit."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.quiz_submit_limit(mock_request)
-
-        assert result == "30/minute"
-
-    def test_auth_limit(self):
-        """Test auth limit."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.auth_limit(mock_request)
-
-        assert result == "5/minute"
-
-    def test_leaderboard_limit(self):
-        """Test leaderboard limit."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.leaderboard_limit(mock_request)
-
-        assert result == "60/minute"
-
-    def test_default_limit(self):
-        """Test default limit."""
-        from middleware.rate_limiter import rate_limiter
-
-        mock_request = MagicMock(spec=Request)
-        mock_user = MagicMock()
-        mock_user.role = "student"
-        mock_request.state.user = mock_user
-
-        result = rate_limiter.default_limit(mock_request)
-
-        assert result == "100/minute"
-
-
-class TestRateLimitExceededHandler:
-    """Test the rate limit exceeded handler."""
-
-    def test_handler_returns_429_status(self):
-        """Test that handler returns 429 status code."""
-        from slowapi.errors import RateLimitExceeded
-        from middleware.rate_limiter import _rate_limit_exceeded_handler
-
-        mock_request = MagicMock(spec=Request)
-        mock_exc = MagicMock(spec=RateLimitExceeded)
-        mock_exc.retry_after = 60
-
-        response = _rate_limit_exceeded_handler(mock_request, mock_exc)
-
-        assert response.status_code == 429
-
-    def test_handler_returns_json_body(self):
-        """Test that handler returns proper JSON body."""
-        from slowapi.errors import RateLimitExceeded
-        from middleware.rate_limiter import _rate_limit_exceeded_handler
-
-        mock_request = MagicMock(spec=Request)
-        mock_exc = MagicMock(spec=RateLimitExceeded)
-        mock_exc.retry_after = 30
-
-        response = _rate_limit_exceeded_handler(mock_request, mock_exc)
-
-        import json
-        body = json.loads(response.body)
-
-        assert body["error"] == "rate_limit_exceeded"
-        assert body["message"] == "Too many requests. Please try again later."
-        assert body["retry_after"] == 30
-
-    def test_handler_includes_retry_after_header(self):
-        """Test that handler includes Retry-After header."""
-        from slowapi.errors import RateLimitExceeded
-        from middleware.rate_limiter import _rate_limit_exceeded_handler
-
-        mock_request = MagicMock(spec=Request)
-        mock_exc = MagicMock(spec=RateLimitExceeded)
-        mock_exc.retry_after = 45
-
-        response = _rate_limit_exceeded_handler(mock_request, mock_exc)
-
-        assert response.headers["Retry-After"] == "45"
-        assert response.headers["Content-Type"] == "application/json"
-
-
-class TestDeprecateEnforceRateLimit:
-    """Test that old enforce_rate_limit function is deprecated."""
-
-    def test_enforce_rate_limit_is_noop(self):
-        """Test that enforce_rate_limit does nothing."""
-        # Import the deprecated function
-        from main import enforce_rate_limit
-
-        mock_request = MagicMock(spec=Request)
-        # Should not raise any exception - it's a no-op now
-        enforce_rate_limit(mock_request, "test_bucket", 10, 60)
-        # If we get here without exception, the test passes
-
-
-class TestSetupRateLimiting:
-    """Test setup_rate_limiting function."""
-
-    def test_setup_adds_limiter_to_app_state(self):
-        """Test that setup adds limiter to app state."""
-        from middleware.rate_limiter import setup_rate_limiting
-        from middleware.rate_limiter import rate_limiter
-
-        app = FastAPI()
-        setup_rate_limiting(app)
-
-        assert hasattr(app.state, "limiter")
-        assert app.state.limiter is not None
-
-    def test_setup_adds_exception_handler(self):
-        """Test that setup adds exception handler for RateLimitExceeded."""
-        from middleware.rate_limiter import setup_rate_limiting
-
-        app = FastAPI()
-        setup_rate_limiting(app)
-
-        # Exception handler registered via app.add_exception_handler
-
-
-class TestEnvironmentVariables:
-    """Test environment variable configuration."""
-
-    def test_default_rates_are_configured(self):
-        """Test that default rates are set from environment."""
-        # The module loads env vars at import time
-        # We just verify the module loaded without error
-        from middleware.rate_limiter import rate_limiter
-        assert rate_limiter is not None
-
-    def test_rates_can_be_overridden(self):
-        """Test that rates can be overridden via environment variables."""
-        # This test verifies the env var pattern works
-        # In production, these would be set before import
-        original_ai = os.environ.get("RATE_LIMIT_AI_RPM")
-
-        try:
-            os.environ["RATE_LIMIT_AI_RPM"] = "30"
-            # Verify the env var was set
-            assert os.environ.get("RATE_LIMIT_AI_RPM") == "30"
-        finally:
-            if original_ai is not None:
-                os.environ["RATE_LIMIT_AI_RPM"] = original_ai
-            else:
-                os.environ.pop("RATE_LIMIT_AI_RPM", None)
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
\ No newline at end of file
diff --git a/tests/test_video_routes.py b/tests/test_video_routes.py
deleted file mode 100644
index 3fff845f689e6a723ef9f2abe36dc428b10c0749..0000000000000000000000000000000000000000
--- a/tests/test_video_routes.py
+++ /dev/null
@@ -1,209 +0,0 @@
-"""
-Tests for the video search endpoint and YouTube service.
-"""
-
-from __future__ import annotations
-
-import os
-import sys
-from unittest.mock import MagicMock, patch
-
-import pytest
-from fastapi.testclient import TestClient
-
-# Add backend directory to path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-
-# Mock Firebase auth BEFORE importing the app
-from main import app as _app_import
-import main as main_module
-
-# Use teacher role by default for consistent behavior
-main_module.firebase_auth = MagicMock()
-main_module.firebase_auth.verify_id_token = MagicMock(
-    return_value={
-        "uid": "test-teacher-uid",
-        "email": "teacher@example.com",
-        "role": "teacher",
-    }
-)
-
-client = TestClient(_app_import, headers={"Authorization": "Bearer test-auth-token"})
-
-
-# ─── Fixtures ───────────────────────────────────────────────────
-
-@pytest.fixture
-def mock_youtube_api_key(monkeypatch):
-    monkeypatch.setenv("YOUTUBE_API_KEY", "test_youtube_api_key")
-
-
-@pytest.fixture
-def no_youtube_api_key(monkeypatch):
-    monkeypatch.setenv("YOUTUBE_API_KEY", "")
-
-
-# ─── YouTube Service Tests ──────────────────────────────────────
-
-def test_parse_iso8601_duration():
-    from services.youtube_service import _parse_iso8601_duration
-    assert _parse_iso8601_duration("PT5M30S") == 330
-    assert _parse_iso8601_duration("PT1H2M3S") == 3723
-    assert _parse_iso8601_duration("PT0S") == 0
-    assert _parse_iso8601_duration("") == 0
-
-
-def test_is_educational_channel():
-    from services.youtube_service import _is_educational_channel
-    assert _is_educational_channel("Khan Academy") is True
-    assert _is_educational_channel("Math Antics") is True
-    assert _is_educational_channel("3Blue1Brown") is True
-    assert _is_educational_channel("Gaming Channel") is False
-    assert _is_educational_channel("Random Vlogs") is False
-
-
-def test_enrich_query_with_rag_fallback(monkeypatch):
-    """When RAG is unavailable, enrichment falls back to topic + subject."""
-    from services.youtube_service import _enrich_query_with_rag
-    # Mock RAG to simulate unavailability — patch where it's used, not where it's imported
-    with patch("rag.curriculum_rag.retrieve_curriculum_context", side_effect=Exception("RAG unavailable")):
-        result = _enrich_query_with_rag("quadratic equations", "General Mathematics")
-    assert "quadratic equations" in result
-    assert "General Mathematics" in result
-    assert "DepEd Philippines mathematics" in result
-
-
-def test_get_cache_key():
-    from services.youtube_service import _get_cache_key
-    key1 = _get_cache_key("quadratic equations", "General Mathematics", "Grade 11")
-    key2 = _get_cache_key("quadratic equations", "General Mathematics", "Grade 11")
-    key3 = _get_cache_key("linear equations", "General Mathematics", "Grade 11")
-    assert key1 == key2
-    assert key1 != key3
-    assert len(key1) == 32
-
-
-def test_cache_and_retrieve(mock_youtube_api_key, monkeypatch):
-    from services.youtube_service import cache_videos, get_cached_videos
-
-    lesson_id = "test-lesson-123"
-    videos = [
-        {"videoId": "abc123", "title": "Test Video", "channelTitle": "Test Channel",
-         "thumbnailUrl": "http://example.com/thumb.jpg", "durationSeconds": 300}
-    ]
-
-    # Mock Firebase at the module level where it's imported inside functions
-    mock_doc = MagicMock()
-    mock_doc.get.return_value.exists = False
-    mock_db = MagicMock()
-    mock_db.collection.return_value.document.return_value = mock_doc
-
-    with patch("firebase_admin.firestore.client", return_value=mock_db):
-        with patch("firebase_admin._apps", {"default": MagicMock()}):
-            # Store should call set
-            cache_videos(lesson_id, videos, "quadratic equations")
-            mock_doc.set.assert_called_once()
-
-            # Retrieve should return None since we mock doc.exists = False
-            result = get_cached_videos(lesson_id)
-            assert result is None
-
-
-def test_search_youtube_videos_no_api_key(no_youtube_api_key):
-    from services.youtube_service import search_youtube_videos
-    result = search_youtube_videos("quadratic equations")
-    assert result == []
-
-
-# ─── Route Tests ────────────────────────────────────────────────
-
-def test_video_search_endpoint_no_api_key(no_youtube_api_key):
-    """Should return 503 when YouTube API key is not configured."""
-    response = client.post("/api/lessons/videos/search", json={
-        "topic": "quadratic equations",
-        "subject": "General Mathematics",
-        "grade_level": "Grade 11",
-    })
-    assert response.status_code == 503
-    data = response.json()
-    assert data["detail"]["error"] == "youtube_api_not_configured"
-
-
-def test_video_search_endpoint_success(mock_youtube_api_key):
-    """Should return video results when search succeeds."""
-    mock_videos = [
-        {"videoId": "vid1", "title": "Video 1", "channelTitle": "Channel 1",
-         "thumbnailUrl": "http://example.com/1.jpg", "durationSeconds": 300},
-        {"videoId": "vid2", "title": "Video 2", "channelTitle": "Channel 2",
-         "thumbnailUrl": "http://example.com/2.jpg", "durationSeconds": 450},
-    ]
-
-    with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
-        with patch("routes.video_routes.get_video_search_results") as mock_search:
-            mock_search.return_value = {"videos": mock_videos, "cached": False}
-            response = client.post("/api/lessons/videos/search", json={
-                "topic": "quadratic equations",
-                "subject": "General Mathematics",
-                "grade_level": "Grade 11",
-                "lesson_id": "lesson-123",
-            })
-
-    assert response.status_code == 200
-    data = response.json()
-    assert len(data["videos"]) == 2
-    assert data["cached"] is False
-    assert data["videos"][0]["videoId"] == "vid1"
-
-
-def test_video_search_endpoint_empty_results(mock_youtube_api_key):
-    """Should return empty list when no videos found."""
-    with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
-        with patch("routes.video_routes.get_video_search_results") as mock_search:
-            mock_search.return_value = {"videos": [], "cached": False}
-            response = client.post("/api/lessons/videos/search", json={
-                "topic": "very obscure topic xyz123",
-                "subject": "General Mathematics",
-            })
-
-    assert response.status_code == 200
-    data = response.json()
-    assert data["videos"] == []
-    assert data["cached"] is False
-
-
-def test_video_search_endpoint_cached(mock_youtube_api_key):
-    """Should return cached results."""
-    mock_videos = [
-        {"videoId": "vid1", "title": "Cached Video", "channelTitle": "Channel 1",
-         "thumbnailUrl": "http://example.com/1.jpg", "durationSeconds": 300},
-    ]
-
-    with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
-        with patch("routes.video_routes.get_video_search_results") as mock_search:
-            mock_search.return_value = {"videos": mock_videos, "cached": True}
-            response = client.post("/api/lessons/videos/search", json={
-                "topic": "linear equations",
-                "lesson_id": "lesson-456",
-            })
-
-    assert response.status_code == 200
-    data = response.json()
-    assert data["cached"] is True
-    assert len(data["videos"]) == 1
-
-
-def test_video_search_endpoint_validation_error(mock_youtube_api_key):
-    """Should return 422 when topic is missing or too long."""
-    with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
-        response = client.post("/api/lessons/videos/search", json={
-            "topic": "",
-            "subject": "General Mathematics",
-        })
-    assert response.status_code == 422
-
-    with patch("routes.video_routes.YOUTUBE_API_KEY", "test_key"):
-        response = client.post("/api/lessons/videos/search", json={
-            "topic": "x" * 201,
-            "subject": "General Mathematics",
-        })
-    assert response.status_code == 422