Spaces:

Deign86
/

mathpulse-api-v3test

Running

App Files Files Community

github-actions[bot] commited on 6 days ago

Commit

b5cb5bb

1 Parent(s): e2217c1

🚀 Auto-deploy backend from GitHub (41571e6)

Browse files

Files changed (47) hide show

.deploy-trigger +1 -0
.dockerignore +29 -0
.env.example +28 -0
.gitattributes +0 -35
Dockerfile +26 -0
README.md +8 -5
analytics.py +2110 -0
automation_engine.py +670 -0
config/env.sample +122 -0
config/models.yaml +85 -0
datasets/curriculum/.gitkeep +0 -0
datasets/sample_curriculum.json +137 -0
main.py +0 -0
models/.gitkeep +0 -0
pre_deploy_check.py +88 -0
rag/__init__.py +23 -0
rag/curriculum_rag.py +318 -0
rag/firebase_storage_loader.py +166 -0
rag/vectorstore_loader.py +69 -0
requirements-dev.txt +3 -0
requirements.txt +24 -0
routes/__init__.py +1 -0
routes/admin_model_routes.py +67 -0
routes/diagnostic.py +710 -0
routes/rag_routes.py +415 -0
scripts/ingest_curriculum.py +159 -0
scripts/ingest_from_storage.py +276 -0
scripts/register_firestore_metadata.py +183 -0
scripts/upload_curriculum_pdfs.py +264 -0
services/__init__.py +44 -0
services/ai_client.py +28 -0
services/deterministic_cache.py +145 -0
services/email_service.py +335 -0
services/email_templates.py +160 -0
services/inference_client.py +1048 -0
services/logging_utils.py +86 -0
services/user_provisioning_service.py +332 -0
services/youtube_service.py +170 -0
startup.sh +29 -0
startup_validation.py +374 -0
tests/test_admin_model_routes.py +214 -0
tests/test_api.py +2053 -0
tests/test_email_service.py +84 -0
tests/test_email_templates.py +53 -0
tests/test_hf_monitoring_routes.py +148 -0
tests/test_model_profiles.py +184 -0
tests/test_rag_pipeline.py +150 -0

.deploy-trigger ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2026-04-29 21:37:27

.dockerignore ADDED Viewed

	@@ -0,0 +1,29 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.git/
+.github/
+.vscode/
+.idea/
+.env
+.env.*
+*.log
+tests/
+docs/
+datasets/
+jobs/
+build/
+dist/
+node_modules/
+Dockerfile*
+docker-compose.yml
+*.md
+!README.md

.env.example ADDED Viewed

	@@ -0,0 +1,28 @@

+# ── Vector Store ──────────────────────────────────────────────────
+# Path to ChromaDB vectorstore directory
+CURRICULUM_VECTORSTORE_DIR=datasets/vectorstore
+# Sentence transformer for embeddings
+# WARNING: changing this requires full re-ingestion of all curriculum data
+EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
+# ── DeepSeek AI Inference ─────────────────────────────────────────
+# DeepSeek API key (OpenAI-compatible), required for all AI features
+DEEPSEEK_API_KEY=your_deepseek_api_key_here
+DEEPSEEK_BASE_URL=https://api.deepseek.com
+DEEPSEEK_MODEL=deepseek-chat
+DEEPSEEK_REASONER_MODEL=deepseek-reasoner
+# ── HuggingFace (dataset push / HF Space deployment only) ─────────
+# HF API token — kept only for HF Space deployment and dataset push
+HF_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+# HF Model ID for AI monitoring proxy
+VITE_HF_MODEL_ID=Qwen/QwQ-32B
+# ── Model Selection ───────────────────────────────────────────────
+# LOCAL DEVELOPMENT — deepseek-chat (fast, $0.14/M input)
+HF_MODEL_ID=deepseek-chat
+# PRODUCTION — deepseek-reasoner for step-by-step solutions
+# HF_MODEL_ID=deepseek-reasoner

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1 \
+    HF_HOME=/data/.huggingface \
+    HUGGINGFACE_HUB_CACHE=/data/.huggingface/hub \
+    TRANSFORMERS_CACHE=/data/.huggingface/transformers \
+    MPLCONFIGDIR=/tmp/matplotlib
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt /app/requirements.txt
+RUN python -m pip install --upgrade pip setuptools wheel && \
+    python -m pip install --prefer-binary --retries 5 -r /app/requirements.txt
+COPY . /app
+EXPOSE 7860
+CMD ["/bin/sh", "/app/startup.sh"]

README.md CHANGED Viewed

@@ -1,10 +1,13 @@
 ---
-title: Mathpulse Api V3test
-emoji: 📊
-colorFrom: purple
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MathPulse AI API
+emoji: "🧮"
+colorFrom: blue
+colorTo: indigo
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# MathPulse AI Backend
+FastAPI backend for the MathPulse AI educational platform.

analytics.py ADDED Viewed

	@@ -0,0 +1,2110 @@

+"""
+MathPulse AI - ML-Powered Student Analytics & Adaptive Learning Module
+Provides:
+- Student competency assessment via IRT (Item Response Theory)
+- Enhanced risk prediction with trained ML models (Random Forest / XGBoost)
+- Quiz difficulty calibration engine
+- Topic recommendation engine
+- Learning analytics aggregation
+- Mock data generation for development/testing
+"""
+import os
+import math
+import time
+import random
+import logging
+import traceback
+import re
+from typing import List, Optional, Dict, Any, Tuple, Literal
+from datetime import datetime, timedelta
+from collections import defaultdict
+import numpy as np  # type: ignore[import-not-found]
+from scipy.optimize import minimize_scalar  # type: ignore[import-not-found]
+from sklearn.linear_model import LinearRegression  # type: ignore[import-not-found]
+from sklearn.ensemble import RandomForestClassifier  # type: ignore[import-not-found]
+from sklearn.model_selection import train_test_split  # type: ignore[import-not-found]
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report  # type: ignore[import-not-found]
+from pydantic import BaseModel, Field
+# Optional heavy dependencies — guarded imports
+xgb: Any = None
+shap: Any = None
+joblib: Any = None
+firebase_admin: Any = None
+credentials: Any = None
+firestore: Any = None
+try:
+    import xgboost as xgb  # type: ignore[import-not-found,no-redef]
+    HAS_XGBOOST = True
+except ImportError:
+    HAS_XGBOOST = False
+try:
+    import shap  # type: ignore[import-not-found,no-redef]
+    HAS_SHAP = True
+except ImportError:
+    HAS_SHAP = False
+try:
+    import joblib  # type: ignore[import-not-found,no-redef]
+    HAS_JOBLIB = True
+except ImportError:
+    HAS_JOBLIB = False
+try:
+    import firebase_admin  # type: ignore[import-not-found,no-redef]
+    from firebase_admin import credentials, firestore  # type: ignore[import-not-found,no-redef,assignment]
+    HAS_FIREBASE = True
+except ImportError:
+    HAS_FIREBASE = False
+logger = logging.getLogger("mathpulse.analytics")
+def _normalize_topic_key(value: str) -> str:
+    key = re.sub(r"[^a-z0-9\s]+", " ", (value or "").lower())
+    key = re.sub(r"\s+", " ", key).strip()
+    return key
+TOPIC_LABEL_ALIASES: Dict[str, str] = {
+    _normalize_topic_key("Functions and Relations"): "Functions as Mathematical Models",
+    _normalize_topic_key("Evaluating Functions"): "Function Notation and Evaluation",
+    _normalize_topic_key("Rational Functions"): "Graphs of Rational Functions",
+    _normalize_topic_key("Exponential Functions"): "Graphs of Exponential Functions",
+    _normalize_topic_key("Logarithmic Functions"): "Graphs of Logarithmic Functions",
+    _normalize_topic_key("Simple Interest"): "Simple and Compound Interest",
+    _normalize_topic_key("Compound Interest"): "Simple and Compound Interest",
+    _normalize_topic_key("Annuities"): "Simple and General Annuities",
+    _normalize_topic_key("Loans and Amortization"): "Loans, Amortization, and Sinking Funds",
+    _normalize_topic_key("Stocks and Bonds"): "Stocks, Bonds, and Market Indices",
+    _normalize_topic_key("Propositions and Connectives"): "Propositions and Logical Connectives",
+    _normalize_topic_key("Truth Tables"): "Truth Values and Truth Tables",
+    _normalize_topic_key("Logical Equivalence"): "Logical Equivalence and Implication",
+    _normalize_topic_key("Valid Arguments and Fallacies"): "Validity of Arguments",
+}
+def _canonicalize_topic_label(value: str) -> str:
+    clean_value = str(value or "").strip()
+    if not clean_value:
+        return "Unknown"
+    return TOPIC_LABEL_ALIASES.get(_normalize_topic_key(clean_value), clean_value)
+# ─── Configuration ─────────────────────────────────────────────
+RISK_MODEL_PATH = "models/risk_classifier.joblib"
+IRT_DIFFICULTY_CACHE_TTL = 3600  # 1 hour
+MIN_QUIZ_ATTEMPTS_FOR_COMPETENCY = 3
+LEARNING_VELOCITY_WINDOW_DAYS = 30
+COMPETENCY_THRESHOLDS = {
+    "beginner": (0, 40),
+    "developing": (40, 65),
+    "proficient": (65, 85),
+    "advanced": (85, 100),
+}
+# Topic dependency / prerequisite graph
+TOPIC_PREREQUISITES: Dict[str, List[str]] = {
+    "Quadratic Equations": ["Linear Equations", "Variables & Expressions"],
+    "Systems of Equations": ["Linear Equations", "Slope & Rate of Change"],
+    "Polynomials": ["Variables & Expressions", "Exponents & Powers"],
+    "Factoring": ["Polynomials", "Variables & Expressions"],
+    "Quadratic Functions": ["Quadratic Equations", "Functions"],
+    "Exponential Functions": ["Exponents & Powers", "Functions"],
+    "Trigonometric Ratios": ["Pythagorean Theorem", "Angles", "Triangles"],
+    "Trigonometric Functions": ["Trigonometric Ratios", "Functions"],
+    "Derivatives": ["Limits", "Functions"],
+    "Integration": ["Derivatives", "Area Under a Curve"],
+    "Limits": ["Functions", "Rational Expressions"],
+    "Coordinate Geometry": ["Linear Equations", "Slope & Rate of Change"],
+    "Circle Theorems": ["Circles", "Angles"],
+    "Logarithmic Functions": ["Exponential Functions"],
+    "Rational Functions": ["Polynomials", "Factoring"],
+    "Complex Numbers": ["Quadratic Equations", "Radicals & Exponents"],
+    "Matrices (Introduction)": ["Systems of Equations"],
+    "Conic Sections": ["Coordinate Geometry", "Quadratic Functions"],
+    "Probability of Compound Events": ["Probability Basics"],
+    "Permutations & Combinations": ["Probability Basics", "Factorial"],
+    "Hypothesis Testing Basics": ["Normal Distribution Basics", "Sampling Methods"],
+    "Confidence Intervals": ["Normal Distribution Basics", "Sampling Methods"],
+    "Regression Analysis": ["Scatter Plots", "Linear Functions"],
+    "Statistical Inference": ["Hypothesis Testing Basics", "Confidence Intervals"],
+    "Multivariable Calculus": ["Derivatives", "Integration"],
+    "Differential Equations": ["Derivatives", "Integration"],
+    "Vector Calculus": ["Multivariable Calculus", "Vectors"],
+    "Linear Transformations": ["Matrices & Determinants", "Vector Spaces"],
+    "Eigenvalues & Eigenvectors": ["Matrices & Determinants"],
+}
+# ─── Pydantic Models ──────────────────────────────────────────
+class CompetencyAnalysisRequest(BaseModel):
+    studentId: str
+    topicId: Optional[str] = None
+class CompetencyAnalysis(BaseModel):
+    topicId: str
+    topicName: str
+    efficiencyScore: float = Field(..., ge=0, le=100)
+    competencyLevel: str
+    masteryPercentage: float
+    learningVelocity: float
+    totalAttempts: int
+    averageAccuracy: float
+    lastAttemptDate: Optional[str] = None
+class CompetencyAnalysisResponse(BaseModel):
+    studentId: str
+    status: str  # "success" | "insufficient_data"
+    analyses: List[CompetencyAnalysis]
+    overallCompetency: Optional[str] = None
+    thetaEstimate: Optional[float] = None
+class TopicRecommendation(BaseModel):
+    topicId: str
+    topicName: str
+    recommendationScore: float
+    reasoning: str
+    estimatedTimeToMastery: int  # hours
+    prerequisitesMet: bool
+    currentCompetency: str
+class TopicRecommendationRequest(BaseModel):
+    studentId: str
+    numRecommendations: int = Field(default=5, ge=1, le=20)
+class TopicRecommendationResponse(BaseModel):
+    studentId: str
+    recommendations: List[TopicRecommendation]
+    status: str
+class EnhancedRiskPrediction(BaseModel):
+    riskLevel: str
+    confidence: float
+    probabilities: Dict[str, float]
+    contributingFactors: List[Dict[str, Any]]
+    recommendations: List[str]
+    modelUsed: str  # "ml_model" | "rule_based" | "zero_shot"
+    risk_level: str
+    risk_score: float
+    top_factors: List[str]
+def _to_strict_risk_level(level: str) -> str:
+    normalized = (level or "").strip().lower()
+    if normalized in {"high", "medium", "low"}:
+        return normalized
+    return "medium"
+def _extract_top_factor_texts(factors: List[Dict[str, Any]]) -> List[str]:
+    texts: List[str] = []
+    for factor in factors[:3]:
+        detail = str(factor.get("detail") or "").strip()
+        feature = str(factor.get("feature") or "").strip()
+        value = factor.get("value")
+        if detail:
+            texts.append(detail)
+        elif feature and value is not None:
+            texts.append(f"{feature}={value}")
+        elif feature:
+            texts.append(feature)
+    if not texts:
+        texts.append("No major risk indicators detected")
+    return texts
+class EnhancedRiskRequest(BaseModel):
+    studentId: str
+    engagementScore: float = Field(..., ge=0, le=100)
+    avgQuizScore: float = Field(..., ge=0, le=100)
+    attendance: float = Field(..., ge=0, le=100)
+    assignmentCompletion: float = Field(..., ge=0, le=100)
+    streak: Optional[int] = 0
+    xpGrowthRate: Optional[float] = 0.0
+    timeOnPlatform: Optional[float] = 0.0  # hours
+    # Optional trend data
+    engagementTrend7d: Optional[float] = None
+    quizScoreVariance: Optional[float] = None
+    consecutiveAbsences: Optional[int] = 0
+    daysSinceLastActivity: Optional[int] = 0
+class StudentRiskPredictionV2(BaseModel):
+    risk_level: Literal["low", "medium", "high"]
+    risk_score: float = Field(..., ge=0.0, le=1.0)
+    top_factors: List[str]
+    probabilities: Dict[str, float]
+    model_used: str
+class RiskTrainRequest(BaseModel):
+    forceRetrain: bool = False
+class RiskTrainResponse(BaseModel):
+    status: str
+    accuracy: float
+    precision: float
+    recall: float
+    f1Score: float
+    samplesUsed: int
+    modelPath: str
+class CalibrateDifficultyRequest(BaseModel):
+    questionId: str
+    studentResponses: List[Dict[str, Any]]  # [{studentId, correct, timeSpent, attempts}]
+class CalibrateDifficultyResponse(BaseModel):
+    questionId: str
+    difficultyParameter: float  # b parameter
+    discriminationParameter: float  # a parameter
+    guessingParameter: float  # c parameter
+    difficultyLabel: str  # "easy" | "medium" | "hard"
+    totalResponses: int
+    successRate: float
+class AdaptiveQuizRequest(BaseModel):
+    studentId: str
+    topicId: str
+    numQuestions: int = Field(default=10, ge=1, le=50)
+    targetSuccessRate: float = Field(default=0.70, ge=0.3, le=0.95)
+class AdaptiveQuizSelection(BaseModel):
+    questionId: str
+    estimatedDifficulty: float
+    predictedSuccessProbability: float
+    difficultyLabel: str
+class AdaptiveQuizResponse(BaseModel):
+    studentId: str
+    topicId: str
+    selectedQuestions: List[AdaptiveQuizSelection]
+    studentAbilityEstimate: float
+    expectedSuccessRate: float
+    difficultyDistribution: Dict[str, int]
+class StudentSummaryResponse(BaseModel):
+    studentId: str
+    competencyDistribution: Dict[str, int]
+    riskAssessment: Optional[Dict[str, Any]] = None
+    recommendedTopics: List[Dict[str, Any]]
+    learningVelocityTrend: List[Dict[str, Any]]
+    efficiencyScores: Dict[str, float]
+    predictedNextQuizScore: Optional[float] = None
+    engagementPatterns: Dict[str, Any]
+    status: str
+class ClassInsightsRequest(BaseModel):
+    teacherId: str
+    classId: Optional[str] = None
+class ClassInsightsResponse(BaseModel):
+    teacherId: str
+    riskDistribution: Dict[str, int]
+    riskTrend: List[Dict[str, Any]]
+    commonWeakTopics: List[Dict[str, Any]]
+    learningVelocityDistribution: Dict[str, float]
+    engagementPatterns: Dict[str, Any]
+    interventionRecommendations: List[Dict[str, Any]]
+    successPredictions: Dict[str, Any]
+    totalStudents: int
+    status: str
+class MockDataRequest(BaseModel):
+    numStudents: int = Field(default=30, ge=1, le=200)
+    numQuizzes: int = Field(default=20, ge=1, le=100)
+    seed: Optional[int] = None
+class RefreshCacheResponse(BaseModel):
+    status: str
+    cachedItems: int
+    timestamp: str
+# ─── In-Memory Caches ─────────────────────────────────────────
+_competency_cache: Dict[str, Tuple[float, Any]] = {}
+_class_stats_cache: Dict[str, Tuple[float, Any]] = {}
+_difficulty_cache: Dict[str, Tuple[float, Any]] = {}
+_risk_model_cache: Dict[str, Any] = {}
+def _cache_get(cache: Dict[str, Tuple[float, Any]], key: str, ttl: int) -> Optional[Any]:
+    """Get from cache if not expired."""
+    if key in cache:
+        ts, val = cache[key]
+        if time.time() - ts < ttl:
+            return val
+        del cache[key]
+    return None
+def _cache_set(cache: Dict[str, Tuple[float, Any]], key: str, value: Any):
+    """Set a cache entry with current timestamp."""
+    cache[key] = (time.time(), value)
+# ─── Firebase Helpers ──────────────────────────────────────────
+_firestore_db = None
+def _get_firestore_db():
+    """Get or initialise Firestore client."""
+    global _firestore_db
+    if _firestore_db is not None:
+        return _firestore_db
+    if not HAS_FIREBASE:
+        logger.warning("firebase-admin not installed; Firestore operations will use mock data")
+        return None
+    try:
+        # Check if already initialised
+        firebase_admin.get_app()
+    except ValueError:
+        # Initialise with default credentials or service account
+        cred_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
+        if cred_path and os.path.exists(cred_path):
+            cred = credentials.Certificate(cred_path)
+            firebase_admin.initialize_app(cred)
+        else:
+            # Try default credentials (e.g., GCP environment)
+            try:
+                firebase_admin.initialize_app()
+            except Exception as e:
+                logger.warning(f"Could not initialise Firebase: {e}")
+                return None
+    _firestore_db = firestore.client()
+    return _firestore_db
+async def fetch_student_quiz_history(student_id: str) -> List[Dict[str, Any]]:
+    """Fetch quiz attempt history for a student from Firestore."""
+    db = _get_firestore_db()
+    if db is None:
+        logger.info(f"No Firestore connection; returning empty quiz history for {student_id}")
+        return []
+    try:
+        # Query progress collection for the student
+        progress_ref = db.collection("progress").where("userId", "==", student_id)
+        docs = progress_ref.stream()
+        history = []
+        for doc in docs:
+            data = doc.to_dict()
+            if data:
+                data["id"] = doc.id
+                history.append(data)
+        # Also check quizAttempts subcollection if it exists
+        quiz_ref = db.collection("quizAttempts").where("studentId", "==", student_id).order_by(
+            "completedAt", direction=firestore.Query.DESCENDING
+        )
+        quiz_docs = quiz_ref.stream()
+        for doc in quiz_docs:
+            data = doc.to_dict()
+            if data:
+                data["id"] = doc.id
+                data["source"] = "quizAttempts"
+                history.append(data)
+        logger.info(f"Fetched {len(history)} quiz history records for student {student_id}")
+        return history
+    except Exception as e:
+        logger.error(f"Error fetching quiz history for {student_id}: {e}")
+        return []
+async def fetch_student_engagement_metrics(student_id: str, days: int = 30) -> Dict[str, Any]:
+    """Fetch engagement metrics for a student over the past N days."""
+    db = _get_firestore_db()
+    if db is None:
+        return {
+            "totalTimeOnPlatform": 0,
+            "sessionsCount": 0,
+            "avgSessionDuration": 0,
+            "dailyActivity": {},
+            "hourlyActivity": {},
+        }
+    try:
+        cutoff = datetime.utcnow() - timedelta(days=days)
+        # Fetch XP activities as engagement proxy
+        xp_ref = db.collection("xpActivities").where(
+            "userId", "==", student_id
+        ).where("timestamp", ">=", cutoff)
+        xp_docs = xp_ref.stream()
+        daily_activity: Dict[str, int] = {}
+        hourly_activity: Dict[int, int] = defaultdict(int)
+        total_xp = 0
+        activity_count = 0
+        for doc in xp_docs:
+            data = doc.to_dict()
+            if data:
+                activity_count += 1
+                total_xp += data.get("xpAmount", 0)
+                ts = data.get("timestamp")
+                if ts:
+                    if hasattr(ts, "seconds"):
+                        dt = datetime.utcfromtimestamp(ts.seconds)
+                    elif isinstance(ts, datetime):
+                        dt = ts
+                    else:
+                        continue
+                    day_key = dt.strftime("%Y-%m-%d")
+                    daily_activity[day_key] = daily_activity.get(day_key, 0) + 1
+                    hourly_activity[dt.hour] += 1
+        return {
+            "totalXP": total_xp,
+            "activityCount": activity_count,
+            "dailyActivity": daily_activity,
+            "hourlyActivity": dict(hourly_activity),
+            "activeDays": len(daily_activity),
+            "avgActivitiesPerDay": round(activity_count / max(len(daily_activity), 1), 2),
+        }
+    except Exception as e:
+        logger.error(f"Error fetching engagement metrics for {student_id}: {e}")
+        return {"totalXP": 0, "activityCount": 0, "dailyActivity": {}, "hourlyActivity": {}}
+def fetch_topic_dependencies() -> Dict[str, List[str]]:
+    """Return the topic prerequisite graph."""
+    return TOPIC_PREREQUISITES.copy()
+async def store_competency_analysis(student_id: str, analysis: Dict[str, Any]):
+    """Store competency analysis results in Firestore."""
+    db = _get_firestore_db()
+    if db is None:
+        logger.info(f"No Firestore; skipping competency storage for {student_id}")
+        return
+    try:
+        doc_ref = db.collection("competencyAnalyses").document(student_id)
+        analysis["updatedAt"] = firestore.SERVER_TIMESTAMP
+        doc_ref.set(analysis, merge=True)
+        logger.info(f"Stored competency analysis for {student_id}")
+    except Exception as e:
+        logger.error(f"Error storing competency analysis: {e}")
+async def store_question_difficulty(question_id: str, params: Dict[str, Any]):
+    """Store question IRT difficulty parameters in Firestore."""
+    db = _get_firestore_db()
+    if db is None:
+        logger.info(f"No Firestore; skipping difficulty storage for {question_id}")
+        return
+    try:
+        doc_ref = db.collection("questions").document(question_id).collection(
+            "difficulty_params"
+        ).document("irt")
+        params["updatedAt"] = firestore.SERVER_TIMESTAMP
+        doc_ref.set(params, merge=True)
+        logger.info(f"Stored difficulty params for question {question_id}")
+    except Exception as e:
+        logger.error(f"Error storing question difficulty: {e}")
+# ─── IRT & Statistical Helpers ─────────────────────────────────
+def _irt_3pl_probability(theta: float, a: float, b: float, c: float = 0.25) -> float:
+    """
+    3-Parameter Logistic IRT model.
+    P(correct) = c + (1 - c) / (1 + exp(-a * (theta - b)))
+    theta: student ability
+    a: discrimination
+    b: difficulty
+    c: guessing parameter
+    """
+    exponent = -a * (theta - b)
+    exponent = max(-20, min(20, exponent))  # numerical stability
+    return c + (1 - c) / (1 + math.exp(exponent))
+def _estimate_theta(responses: List[Dict[str, Any]], difficulty_params: Dict[str, Dict[str, float]]) -> float:
+    """
+    Estimate student ability (theta) using Maximum Likelihood Estimation.
+    responses: list of {questionId, correct: bool}
+    difficulty_params: {questionId: {a, b, c}}
+    """
+    if not responses:
+        return 0.0
+    def neg_log_likelihood(theta: float) -> float:
+        ll = 0.0
+        for r in responses:
+            qid = r.get("questionId", "")
+            params = difficulty_params.get(qid, {"a": 1.0, "b": 0.0, "c": 0.25})
+            p = _irt_3pl_probability(theta, params["a"], params["b"], params.get("c", 0.25))
+            p = max(1e-10, min(1 - 1e-10, p))  # avoid log(0)
+            if r.get("correct", False):
+                ll += math.log(p)
+            else:
+                ll += math.log(1 - p)
+        return -ll
+    result = minimize_scalar(neg_log_likelihood, bounds=(-4, 4), method="bounded")
+    return round(result.x, 3)
+def _calculate_learning_velocity(scores_over_time: List[Tuple[float, float]]) -> float:
+    """
+    Calculate learning velocity using weighted linear regression.
+    scores_over_time: list of (timestamp_as_days, score)
+    Returns slope (positive = improving, negative = declining).
+    """
+    if len(scores_over_time) < 2:
+        return 0.0
+    times = np.array([t for t, _ in scores_over_time]).reshape(-1, 1)
+    scores = np.array([s for _, s in scores_over_time])
+    # Exponential decay weights (more recent = higher weight)
+    max_time = times.max()
+    decay_rate = 0.05
+    weights = np.exp(-decay_rate * (max_time - times.flatten()))
+    weights = weights / weights.sum()
+    # Weighted linear regression
+    model = LinearRegression()
+    model.fit(times, scores, sample_weight=weights)
+    return round(float(model.coef_[0]), 4)
+def _calculate_efficiency_score(
+    student_times: List[float],
+    student_accuracies: List[bool],
+    class_avg_time: float,
+    attempt_counts: List[int],
+) -> float:
+    """
+    Efficiency = (class_avg_time / student_time) * accuracy_multiplier * 100
+    Penalise multiple attempts.
+    """
+    if not student_times or class_avg_time <= 0:
+        return 50.0
+    efficiencies = []
+    for t, correct, attempts in zip(student_times, student_accuracies, attempt_counts):
+        if t <= 0:
+            t = 1.0
+        time_ratio = class_avg_time / t
+        accuracy_mult = 1.0 if correct else 0.3
+        attempt_penalty = 1.0 / max(attempts, 1)
+        eff = time_ratio * accuracy_mult * attempt_penalty * 100
+        efficiencies.append(min(eff, 150))  # cap at 150 to avoid outliers
+    raw = sum(efficiencies) / len(efficiencies)
+    return round(min(max(raw, 0), 100), 2)
+def _get_competency_level(score: float) -> str:
+    """Map a score (0-100) to competency level."""
+    for level, (low, high) in COMPETENCY_THRESHOLDS.items():
+        if low <= score < high:
+            return level
+    return "advanced" if score >= 85 else "beginner"
+# ─── Competency Assessment System ─────────────────────────────
+async def compute_competency_analysis(
+    student_id: str,
+    quiz_history: List[Dict[str, Any]],
+    topic_filter: Optional[str] = None,
+) -> CompetencyAnalysisResponse:
+    """
+    Full competency analysis using IRT approach.
+    """
+    if not quiz_history or len(quiz_history) < MIN_QUIZ_ATTEMPTS_FOR_COMPETENCY:
+        return CompetencyAnalysisResponse(
+            studentId=student_id,
+            status="insufficient_data",
+            analyses=[],
+            overallCompetency=None,
+            thetaEstimate=None,
+        )
+    # Group by topic
+    topic_data: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+    for entry in quiz_history:
+        topic = _canonicalize_topic_label(str(entry.get("topicId") or entry.get("topic") or "Unknown"))
+        if topic_filter and topic != _canonicalize_topic_label(topic_filter):
+            continue
+        topic_data[topic].append(entry)
+    if not topic_data:
+        return CompetencyAnalysisResponse(
+            studentId=student_id,
+            status="insufficient_data",
+            analyses=[],
+        )
+    # Build difficulty params from class-wide success rates
+    difficulty_params: Dict[str, Dict[str, float]] = {}
+    all_responses_for_irt: List[Dict[str, Any]] = []
+    for topic, entries in topic_data.items():
+        for entry in entries:
+            qid = entry.get("questionId", entry.get("id", f"{topic}_{len(all_responses_for_irt)}"))
+            correct = entry.get("correct", False)
+            if isinstance(correct, (int, float)):
+                correct = correct > 0.5
+            score = entry.get("score", 0)
+            total = entry.get("total", 1)
+            if not isinstance(correct, bool) and total > 0:
+                correct = (score / total) >= 0.5
+            all_responses_for_irt.append({"questionId": qid, "correct": correct})
+            # Estimate difficulty from success rate across the dataset
+            if qid not in difficulty_params:
+                difficulty_params[qid] = {"a": 1.0, "b": 0.0, "c": 0.25}
+    # Estimate theta
+    theta = _estimate_theta(all_responses_for_irt, difficulty_params)
+    # Per-topic analysis
+    analyses: List[CompetencyAnalysis] = []
+    for topic, entries in topic_data.items():
+        topic_name = topic.replace("_", " ").title()
+        # Accuracy
+        correct_count = 0
+        total_count = 0
+        first_attempt_correct = 0
+        first_attempt_total = 0
+        times: List[float] = []
+        accuracies: List[bool] = []
+        attempt_counts: List[int] = []
+        scores_over_time: List[Tuple[float, float]] = []
+        for entry in entries:
+            total_count += 1
+            score = entry.get("score", 0)
+            total = max(entry.get("total", 1), 1)
+            pct = (score / total) * 100
+            correct = pct >= 50
+            if correct:
+                correct_count += 1
+            attempts = entry.get("attempts", 1)
+            if attempts <= 1 and correct:
+                first_attempt_correct += 1
+            first_attempt_total += 1
+            time_spent = entry.get("timeTaken") or entry.get("timeSpent") or 60
+            times.append(float(time_spent))
+            accuracies.append(correct)
+            attempt_counts.append(max(attempts, 1))
+            # Timestamp for velocity
+            ts = entry.get("completedAt") or entry.get("timestamp") or entry.get("date")
+            if ts:
+                if isinstance(ts, (int, float)):
+                    day_val = ts / 86400
+                elif hasattr(ts, "seconds"):
+                    day_val = ts.seconds / 86400
+                elif isinstance(ts, datetime):
+                    day_val = ts.timestamp() / 86400
+                elif isinstance(ts, str):
+                    try:
+                        dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
+                        day_val = dt.timestamp() / 86400
+                    except Exception:
+                        day_val = time.time() / 86400
+                else:
+                    day_val = time.time() / 86400
+                scores_over_time.append((day_val, pct))
+        avg_accuracy = (correct_count / max(total_count, 1)) * 100
+        mastery_pct = (first_attempt_correct / max(first_attempt_total, 1)) * 100
+        # Class average time (use all entries as proxy)
+        class_avg_time = float(np.mean(times)) if times else 60.0
+        efficiency = _calculate_efficiency_score(times, accuracies, class_avg_time, attempt_counts)
+        velocity = _calculate_learning_velocity(scores_over_time)
+        competency_level = _get_competency_level(avg_accuracy)
+        # Last attempt date
+        last_date = None
+        if scores_over_time:
+            last_ts = max(t for t, _ in scores_over_time)
+            last_date = datetime.utcfromtimestamp(last_ts * 86400).isoformat()
+        analyses.append(CompetencyAnalysis(
+            topicId=topic,
+            topicName=topic_name,
+            efficiencyScore=efficiency,
+            competencyLevel=competency_level,
+            masteryPercentage=round(mastery_pct, 2),
+            learningVelocity=velocity,
+            totalAttempts=total_count,
+            averageAccuracy=round(avg_accuracy, 2),
+            lastAttemptDate=last_date,
+        ))
+    # Sort by efficiency score ascending (weakest first)
+    analyses.sort(key=lambda a: a.efficiencyScore)
+    # Overall competency
+    if analyses:
+        avg_eff = sum(a.efficiencyScore for a in analyses) / len(analyses)
+        overall = _get_competency_level(avg_eff)
+    else:
+        overall = None
+    return CompetencyAnalysisResponse(
+        studentId=student_id,
+        status="success",
+        analyses=analyses,
+        overallCompetency=overall,
+        thetaEstimate=theta,
+    )
+# ─── Enhanced Risk Prediction ─────────────────────────────────
+def _build_risk_features(data: EnhancedRiskRequest) -> np.ndarray:
+    """Build feature vector for risk prediction."""
+    features = [
+        data.engagementScore,
+        data.avgQuizScore,
+        data.attendance,
+        data.assignmentCompletion,
+        data.streak or 0,
+        data.xpGrowthRate or 0.0,
+        data.timeOnPlatform or 0.0,
+        data.engagementTrend7d or 0.0,
+        data.quizScoreVariance or 0.0,
+        data.consecutiveAbsences or 0,
+        data.daysSinceLastActivity or 0,
+    ]
+    return np.array(features).reshape(1, -1)
+RISK_FEATURE_NAMES = [
+    "engagementScore",
+    "avgQuizScore",
+    "attendance",
+    "assignmentCompletion",
+    "streak",
+    "xpGrowthRate",
+    "timeOnPlatform",
+    "engagementTrend7d",
+    "quizScoreVariance",
+    "consecutiveAbsences",
+    "daysSinceLastActivity",
+]
+def _load_risk_model():
+    """Load trained risk model from disk."""
+    if not HAS_JOBLIB:
+        return None
+    cache_key = "risk_model"
+    cached = _risk_model_cache.get(cache_key)
+    if cached is not None:
+        return cached
+    if os.path.exists(RISK_MODEL_PATH):
+        try:
+            model = joblib.load(RISK_MODEL_PATH)
+            _risk_model_cache[cache_key] = model
+            logger.info("Loaded trained risk model from disk")
+            return model
+        except Exception as e:
+            logger.error(f"Error loading risk model: {e}")
+    return None
+def _rule_based_risk(data: EnhancedRiskRequest) -> EnhancedRiskPrediction:
+    """Fallback rule-based risk prediction when no ML model is available."""
+    score = (
+        data.engagementScore * 0.25
+        + data.avgQuizScore * 0.30
+        + data.attendance * 0.25
+        + data.assignmentCompletion * 0.20
+    )
+    # Penalties
+    if (data.consecutiveAbsences or 0) >= 3:
+        score -= 10
+    if (data.daysSinceLastActivity or 0) >= 7:
+        score -= 10
+    if (data.streak or 0) == 0:
+        score -= 5
+    # Bonuses
+    if (data.streak or 0) >= 7:
+        score += 5
+    if (data.engagementTrend7d or 0) > 0:
+        score += 5
+    score = max(0, min(100, score))
+    if score >= 70:
+        risk_level = "Low"
+        probs = {"High": 0.05, "Medium": 0.15, "Low": 0.80}
+    elif score >= 45:
+        risk_level = "Medium"
+        probs = {"High": 0.15, "Medium": 0.55, "Low": 0.30}
+    else:
+        risk_level = "High"
+        probs = {"High": 0.70, "Medium": 0.20, "Low": 0.10}
+    factors = []
+    if data.avgQuizScore < 50:
+        factors.append({"feature": "avgQuizScore", "impact": -0.3, "detail": "Low quiz scores"})
+    if data.attendance < 60:
+        factors.append({"feature": "attendance", "impact": -0.25, "detail": "Poor attendance"})
+    if data.engagementScore < 40:
+        factors.append({"feature": "engagementScore", "impact": -0.2, "detail": "Low engagement"})
+    if (data.consecutiveAbsences or 0) >= 3:
+        factors.append({"feature": "consecutiveAbsences", "impact": -0.15, "detail": "Multiple consecutive absences"})
+    if data.assignmentCompletion < 50:
+        factors.append({"feature": "assignmentCompletion", "impact": -0.2, "detail": "Low assignment completion"})
+    if not factors:
+        factors.append({"feature": "overall", "impact": 0.0, "detail": "No major risk factors identified"})
+    recommendations = []
+    if risk_level == "High":
+        recommendations = [
+            "Schedule immediate one-on-one check-in with student",
+            "Set up tutoring sessions for weak subjects",
+            "Contact parent/guardian about academic concerns",
+            "Create a structured study plan with daily goals",
+        ]
+    elif risk_level == "Medium":
+        recommendations = [
+            "Monitor progress closely over next 2 weeks",
+            "Encourage participation in study groups",
+            "Assign additional practice exercises for weak areas",
+        ]
+    else:
+        recommendations = [
+            "Continue current learning approach",
+            "Challenge with advanced material when ready",
+        ]
+    return EnhancedRiskPrediction(
+        riskLevel=risk_level,
+        confidence=round(max(probs.values()), 3),
+        probabilities=probs,
+        contributingFactors=factors[:3],
+        recommendations=recommendations,
+        modelUsed="rule_based",
+        risk_level=_to_strict_risk_level(risk_level),
+        risk_score=round(float(probs.get("High", 0.0)), 4),
+        top_factors=_extract_top_factor_texts(factors),
+    )
+async def predict_risk_enhanced(data: EnhancedRiskRequest) -> EnhancedRiskPrediction:
+    """Enhanced risk prediction using trained ML model with SHAP explanations."""
+    model = _load_risk_model()
+    if model is None:
+        logger.info("No trained ML model found; using rule-based risk prediction")
+        return _rule_based_risk(data)
+    try:
+        features = _build_risk_features(data)
+        label_map = {0: "High", 1: "Medium", 2: "Low"}
+        # Predict
+        prediction = model.predict(features)[0]
+        probabilities_raw = model.predict_proba(features)[0]
+        risk_level = label_map.get(int(prediction), "Medium")
+        probs = {}
+        for i, label in label_map.items():
+            if i < len(probabilities_raw):
+                probs[label] = round(float(probabilities_raw[i]), 4)
+            else:
+                probs[label] = 0.0
+        confidence = round(float(max(probabilities_raw)), 4)
+        # SHAP explanations
+        factors = []
+        if HAS_SHAP:
+            try:
+                explainer = shap.TreeExplainer(model)
+                shap_values = explainer.shap_values(features)
+                if isinstance(shap_values, list):
+                    # Multi-class: use SHAP values for predicted class
+                    sv = shap_values[int(prediction)][0]
+                else:
+                    sv = shap_values[0]
+                # Get top 3 contributing features
+                feature_impacts = list(zip(RISK_FEATURE_NAMES, sv))
+                feature_impacts.sort(key=lambda x: abs(x[1]), reverse=True)
+                for fname, impact in feature_impacts[:3]:
+                    idx = RISK_FEATURE_NAMES.index(fname)
+                    fval = features[0][idx]
+                    factors.append({
+                        "feature": fname,
+                        "impact": round(float(impact), 4),
+                        "value": round(float(fval), 2),
+                        "detail": f"{fname} = {fval:.1f} (SHAP impact: {impact:.3f})",
+                    })
+            except Exception as e:
+                logger.warning(f"SHAP explanation failed: {e}")
+                factors = [{"feature": "model_prediction", "impact": 0.0, "detail": "SHAP unavailable"}]
+        else:
+            # Feature importance fallback
+            if hasattr(model, "feature_importances_"):
+                importances = model.feature_importances_
+                fi = list(zip(RISK_FEATURE_NAMES, importances))
+                fi.sort(key=lambda x: x[1], reverse=True)
+                for fname, imp in fi[:3]:
+                    idx = RISK_FEATURE_NAMES.index(fname)
+                    fval = features[0][idx]
+                    factors.append({
+                        "feature": fname,
+                        "impact": round(float(imp), 4),
+                        "value": round(float(fval), 2),
+                        "detail": f"{fname} = {fval:.1f} (importance: {imp:.3f})",
+                    })
+        # Recommendations based on prediction
+        if risk_level == "High":
+            recommendations = [
+                "Immediate intervention recommended — schedule one-on-one session",
+                "Review recent quiz performance for specific skill gaps",
+                "Contact parent/guardian about academic concerns",
+                "Create personalised remediation plan",
+            ]
+        elif risk_level == "Medium":
+            recommendations = [
+                "Monitor student progress more frequently",
+                "Assign targeted practice for weak areas",
+                "Encourage peer study groups",
+            ]
+        else:
+            recommendations = [
+                "Student is performing well — maintain current pace",
+                "Consider enrichment activities for advanced topics",
+            ]
+        return EnhancedRiskPrediction(
+            riskLevel=risk_level,
+            confidence=confidence,
+            probabilities=probs,
+            contributingFactors=factors,
+            recommendations=recommendations,
+            modelUsed="ml_model",
+            risk_level=_to_strict_risk_level(risk_level),
+            risk_score=round(float(probs.get("High", 0.0)), 4),
+            top_factors=_extract_top_factor_texts(factors),
+        )
+    except Exception as e:
+        logger.error(f"ML risk prediction failed: {e}\n{traceback.format_exc()}")
+        logger.info("Falling back to rule-based prediction")
+        return _rule_based_risk(data)
+def _humanize_risk_factor(factor: Dict[str, Any]) -> str:
+    """Convert raw factor payloads into teacher-friendly explanations."""
+    feature = str(factor.get("feature", "overall"))
+    value = factor.get("value", None)
+    if feature == "attendance":
+        if value is not None:
+            return f"Low attendance rate ({value:.0f}%) over recent sessions."
+        return "Low attendance rate over recent sessions."
+    if feature == "avgQuizScore":
+        if value is not None:
+            return f"Consistently low quiz performance (average {value:.0f}%)."
+        return "Consistently low quiz performance."
+    if feature == "assignmentCompletion":
+        if value is not None:
+            return f"Missing or incomplete assignments ({value:.0f}% completion)."
+        return "Missing or incomplete assignments."
+    if feature == "engagementScore":
+        if value is not None:
+            return f"Low learning engagement indicators ({value:.0f}%)."
+        return "Low learning engagement indicators."
+    if feature == "consecutiveAbsences":
+        if value is not None:
+            return f"Frequent recent absences ({int(value)} in a row)."
+        return "Frequent recent absences."
+    if feature == "daysSinceLastActivity":
+        if value is not None:
+            return f"Long inactivity window ({int(value)} days since last activity)."
+        return "Long inactivity window since last activity."
+    detail = str(factor.get("detail", "")).strip()
+    if detail:
+        return detail
+    return "Multiple performance indicators suggest elevated support needs."
+async def predict_risk_v2(data: EnhancedRiskRequest) -> StudentRiskPredictionV2:
+    """Return normalized risk payload while reusing the existing enhanced ML model."""
+    enhanced = await predict_risk_enhanced(data)
+    risk_level_map: Dict[str, Literal["low", "medium", "high"]] = {
+        "High": "high",
+        "Medium": "medium",
+        "Low": "low",
+    }
+    normalized_level: Literal["low", "medium", "high"] = risk_level_map.get(enhanced.riskLevel, "medium")
+    high_prob = float(enhanced.probabilities.get("High", 0.0))
+    medium_prob = float(enhanced.probabilities.get("Medium", 0.0))
+    risk_score = max(0.0, min(1.0, round(high_prob + (0.5 * medium_prob), 4)))
+    factor_texts = [_humanize_risk_factor(f) for f in enhanced.contributingFactors]
+    if not factor_texts:
+        factor_texts = ["Limited data available; monitor student trends after next assessments."]
+    return StudentRiskPredictionV2(
+        risk_level=normalized_level,
+        risk_score=risk_score,
+        top_factors=factor_texts[:3],
+        probabilities={
+            "high": round(high_prob, 4),
+            "medium": round(medium_prob, 4),
+            "low": round(float(enhanced.probabilities.get("Low", 0.0)), 4),
+        },
+        model_used=enhanced.modelUsed,
+    )
+async def train_risk_model(force_retrain: bool = False) -> RiskTrainResponse:
+    """
+    Train a risk classification model on historical student data.
+    Tries XGBoost first, falls back to Random Forest.
+    """
+    if not HAS_JOBLIB:
+        raise ValueError("joblib not installed; cannot save model")
+    # Check if model exists and skip unless forced
+    if os.path.exists(RISK_MODEL_PATH) and not force_retrain:
+        return RiskTrainResponse(
+            status="model_exists",
+            accuracy=0.0,
+            precision=0.0,
+            recall=0.0,
+            f1Score=0.0,
+            samplesUsed=0,
+            modelPath=RISK_MODEL_PATH,
+        )
+    # Fetch historical data from Firestore
+    db = _get_firestore_db()
+    X_data = []
+    y_data = []
+    if db is not None:
+        try:
+            users_ref = db.collection("users").where("role", "==", "student").limit(500)
+            user_docs = users_ref.stream()
+            for doc in user_docs:
+                data = doc.to_dict()
+                if not data:
+                    continue
+                features = [
+                    data.get("engagementScore", 50),
+                    data.get("avgQuizScore", 50),
+                    data.get("attendance", 80),
+                    data.get("assignmentCompletion", 60),
+                    data.get("streak", 0),
+                    data.get("xpGrowthRate", 0),
+                    data.get("timeOnPlatform", 0),
+                    0.0,  # engagementTrend7d
+                    0.0,  # quizScoreVariance
+                    data.get("consecutiveAbsences", 0),
+                    data.get("daysSinceLastActivity", 0),
+                ]
+                X_data.append(features)
+                # Determine label from existing riskLevel or compute it
+                risk = data.get("riskLevel", "")
+                if risk == "High":
+                    y_data.append(0)
+                elif risk == "Medium":
+                    y_data.append(1)
+                else:
+                    y_data.append(2)
+        except Exception as e:
+            logger.error(f"Error fetching training data: {e}")
+    # If insufficient real data, generate synthetic training data
+    if len(X_data) < 50:
+        logger.info("Insufficient Firestore data; generating synthetic training data")
+        synth_X, synth_y = _generate_synthetic_risk_data(500)
+        for row in synth_X:
+            X_data.append(list(row))
+        for label in synth_y:
+            y_data.append(int(label))
+    X = np.array(X_data)
+    y = np.array(y_data)
+    # Train/test split
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
+    # Train model
+    if HAS_XGBOOST:
+        model = xgb.XGBClassifier(
+            n_estimators=100,
+            max_depth=6,
+            learning_rate=0.1,
+            objective="multi:softprob",
+            num_class=3,
+            eval_metric="mlogloss",
+            random_state=42,
+            use_label_encoder=False,
+        )
+        logger.info("Training XGBoost risk classifier")
+    else:
+        model = RandomForestClassifier(
+            n_estimators=100,
+            max_depth=10,
+            random_state=42,
+            class_weight="balanced",
+        )
+        logger.info("Training Random Forest risk classifier")
+    model.fit(X_train, y_train)
+    y_pred = model.predict(X_test)
+    acc = float(accuracy_score(y_test, y_pred))
+    prec = float(precision_score(y_test, y_pred, average="weighted", zero_division=0))
+    rec = float(recall_score(y_test, y_pred, average="weighted", zero_division=0))
+    f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
+    logger.info(f"Risk model trained: accuracy={acc:.3f}, F1={f1:.3f}")
+    logger.info(f"Classification report:\n{classification_report(y_test, y_pred, zero_division=0)}")
+    # Save model
+    os.makedirs(os.path.dirname(RISK_MODEL_PATH), exist_ok=True)
+    joblib.dump(model, RISK_MODEL_PATH)
+    logger.info(f"Risk model saved to {RISK_MODEL_PATH}")
+    # Clear model cache so next prediction loads new model
+    _risk_model_cache.clear()
+    return RiskTrainResponse(
+        status="trained",
+        accuracy=round(acc, 4),
+        precision=round(prec, 4),
+        recall=round(rec, 4),
+        f1Score=round(f1, 4),
+        samplesUsed=len(X_data),
+        modelPath=RISK_MODEL_PATH,
+    )
+def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
+    """Generate synthetic student data for model training."""
+    np.random.seed(42)
+    X = []
+    y = []
+    for _ in range(n):
+        risk_class = np.random.choice([0, 1, 2], p=[0.2, 0.3, 0.5])
+        if risk_class == 0:  # High risk
+            engagement = np.random.normal(30, 15)
+            quiz = np.random.normal(35, 12)
+            attendance = np.random.normal(50, 15)
+            completion = np.random.normal(35, 15)
+            streak = max(0, int(np.random.normal(1, 2)))
+            xp_growth = np.random.normal(-0.5, 0.3)
+            time_platform = np.random.normal(2, 1)
+            trend = np.random.normal(-10, 5)
+            variance = np.random.normal(25, 8)
+            absences = max(0, int(np.random.normal(4, 2)))
+            days_inactive = max(0, int(np.random.normal(10, 5)))
+        elif risk_class == 1:  # Medium risk
+            engagement = np.random.normal(55, 12)
+            quiz = np.random.normal(60, 10)
+            attendance = np.random.normal(72, 10)
+            completion = np.random.normal(60, 12)
+            streak = max(0, int(np.random.normal(3, 3)))
+            xp_growth = np.random.normal(0.2, 0.3)
+            time_platform = np.random.normal(5, 2)
+            trend = np.random.normal(0, 8)
+            variance = np.random.normal(15, 5)
+            absences = max(0, int(np.random.normal(2, 1)))
+            days_inactive = max(0, int(np.random.normal(3, 3)))
+        else:  # Low risk
+            engagement = np.random.normal(82, 10)
+            quiz = np.random.normal(85, 8)
+            attendance = np.random.normal(93, 5)
+            completion = np.random.normal(88, 8)
+            streak = max(0, int(np.random.normal(10, 5)))
+            xp_growth = np.random.normal(1.0, 0.4)
+            time_platform = np.random.normal(10, 3)
+            trend = np.random.normal(5, 5)
+            variance = np.random.normal(8, 3)
+            absences = 0
+            days_inactive = max(0, int(np.random.normal(1, 1)))
+        features = [
+            max(0, min(100, engagement)),
+            max(0, min(100, quiz)),
+            max(0, min(100, attendance)),
+            max(0, min(100, completion)),
+            streak,
+            xp_growth,
+            max(0, time_platform),
+            trend,
+            max(0, variance),
+            absences,
+            days_inactive,
+        ]
+        X.append(features)
+        y.append(risk_class)
+    return np.array(X), np.array(y)
+# ─── Quiz Difficulty Calibration ───────────────────────────────
+async def calibrate_question_difficulty(request: CalibrateDifficultyRequest) -> CalibrateDifficultyResponse:
+    """
+    Calculate IRT difficulty parameters for a question based on student responses.
+    """
+    responses = request.studentResponses
+    if not responses:
+        raise ValueError("No student responses provided")
+    correct_count = sum(1 for r in responses if r.get("correct", False))
+    total = len(responses)
+    success_rate = correct_count / total
+    # Difficulty parameter b = logit(1 - p_correct)
+    p = max(0.01, min(0.99, success_rate))  # clamp to avoid infinity
+    b = round(math.log((1 - p) / p), 3)
+    # Discrimination parameter a
+    # Split students into high and low performers by time
+    if len(responses) >= 4:
+        times = [r.get("timeSpent", 60) for r in responses]
+        median_time = sorted(times)[len(times) // 2]
+        fast_correct = sum(1 for r in responses if r.get("correct") and r.get("timeSpent", 60) <= median_time)
+        fast_total = sum(1 for r in responses if r.get("timeSpent", 60) <= median_time)
+        slow_correct = sum(1 for r in responses if r.get("correct") and r.get("timeSpent", 60) > median_time)
+        slow_total = sum(1 for r in responses if r.get("timeSpent", 60) > median_time)
+        p_fast = fast_correct / max(fast_total, 1)
+        p_slow = slow_correct / max(slow_total, 1)
+        # Higher discrimination if fast students do much better
+        a = round(max(0.3, min(3.0, (p_fast - p_slow) * 3 + 1.0)), 3)
+    else:
+        a = 1.0
+    # Guessing parameter c (based on question type; default 0.25 for 4-choice)
+    c = 0.25
+    # Difficulty label
+    if b < -1.0:
+        diff_label = "easy"
+    elif b < 1.0:
+        diff_label = "medium"
+    else:
+        diff_label = "hard"
+    # Store in Firestore
+    params = {
+        "b": b,
+        "a": a,
+        "c": c,
+        "difficultyLabel": diff_label,
+        "successRate": round(success_rate, 4),
+        "totalResponses": total,
+    }
+    await store_question_difficulty(request.questionId, params)
+    # Cache it
+    _cache_set(_difficulty_cache, request.questionId, params)
+    return CalibrateDifficultyResponse(
+        questionId=request.questionId,
+        difficultyParameter=b,
+        discriminationParameter=a,
+        guessingParameter=c,
+        difficultyLabel=diff_label,
+        totalResponses=total,
+        successRate=round(success_rate, 4),
+    )
+async def select_adaptive_quiz(request: AdaptiveQuizRequest) -> AdaptiveQuizResponse:
+    """
+    Select questions adaptively based on student ability and IRT parameters.
+    """
+    # Get student competency for this topic
+    quiz_history = await fetch_student_quiz_history(request.studentId)
+    # Estimate student ability
+    canonical_topic_id = _canonicalize_topic_label(request.topicId)
+    topic_entries = [
+        e for e in quiz_history
+        if _canonicalize_topic_label(str(e.get("topicId") or e.get("topic") or "")) == canonical_topic_id
+    ]
+    if topic_entries:
+        responses_for_irt = []
+        difficulty_params = {}
+        for i, entry in enumerate(topic_entries):
+            qid = entry.get("questionId", f"q_{i}")
+            correct = entry.get("correct", False)
+            if isinstance(correct, (int, float)):
+                correct = correct > 0.5
+            score = entry.get("score", 0)
+            total = max(entry.get("total", 1), 1)
+            if not isinstance(correct, bool):
+                correct = (score / total) >= 0.5
+            responses_for_irt.append({"questionId": qid, "correct": correct})
+            difficulty_params[qid] = {"a": 1.0, "b": 0.0, "c": 0.25}
+        theta = _estimate_theta(responses_for_irt, difficulty_params)
+    else:
+        theta = 0.0  # Default ability
+    competency_level = _get_competency_level((theta + 4) / 8 * 100)  # normalise theta to 0-100
+    # Difficulty distribution based on competency
+    distributions = {
+        "beginner": {"easy": 0.70, "medium": 0.20, "hard": 0.10},
+        "developing": {"easy": 0.40, "medium": 0.40, "hard": 0.20},
+        "proficient": {"easy": 0.20, "medium": 0.40, "hard": 0.40},
+        "advanced": {"easy": 0.10, "medium": 0.30, "hard": 0.60},
+    }
+    dist = distributions.get(competency_level, distributions["developing"])
+    # Generate question selections with adaptive difficulty
+    n = request.numQuestions
+    selected: List[AdaptiveQuizSelection] = []
+    current_theta = theta
+    difficulty_counts = {"easy": 0, "medium": 0, "hard": 0}
+    # Calculate target counts per difficulty
+    target_counts = {
+        "easy": max(1, round(n * dist["easy"])),
+        "medium": max(1, round(n * dist["medium"])),
+        "hard": max(0, n - max(1, round(n * dist["easy"])) - max(1, round(n * dist["medium"]))),
+    }
+    for i in range(n):
+        # Determine difficulty for this question
+        if i < 2:
+            # Start near student's level
+            b = current_theta
+        else:
+            # Adaptive: alternate based on simulated performance
+            if i % 3 == 0:
+                b = current_theta - 0.5  # Slightly easier
+            elif i % 3 == 1:
+                b = current_theta
+            else:
+                b = current_theta + 0.5  # Slightly harder
+        # Classify difficulty
+        if b < -1.0:
+            diff_label = "easy"
+        elif b < 1.0:
+            diff_label = "medium"
+        else:
+            diff_label = "hard"
+        # Ensure we don't exceed target counts
+        if difficulty_counts[diff_label] >= target_counts[diff_label]:
+            # Pick the difficulty with most remaining quota
+            remaining = {k: target_counts[k] - difficulty_counts[k] for k in target_counts}
+            diff_label = max(remaining, key=lambda k: remaining[k])
+            if diff_label == "easy":
+                b = min(b, -1.0)
+            elif diff_label == "hard":
+                b = max(b, 1.0)
+        difficulty_counts[diff_label] += 1
+        # Calculate predicted success probability
+        predicted_p = _irt_3pl_probability(current_theta, a=1.0, b=b, c=0.25)
+        selected.append(AdaptiveQuizSelection(
+            questionId=f"{request.topicId}_q{i+1}",
+            estimatedDifficulty=round(b, 3),
+            predictedSuccessProbability=round(predicted_p, 3),
+            difficultyLabel=diff_label,
+        ))
+    # Expected overall success rate
+    avg_success = sum(q.predictedSuccessProbability for q in selected) / max(len(selected), 1)
+    return AdaptiveQuizResponse(
+        studentId=request.studentId,
+        topicId=request.topicId,
+        selectedQuestions=selected,
+        studentAbilityEstimate=round(theta, 3),
+        expectedSuccessRate=round(avg_success, 3),
+        difficultyDistribution=difficulty_counts,
+    )
+# ─── Topic Recommendation Engine ──────────────────────────────
+async def recommend_topics(request: TopicRecommendationRequest) -> TopicRecommendationResponse:
+    """
+    Recommend topics based on competency gaps, prerequisites, and peer data.
+    """
+    student_id = request.studentId
+    quiz_history = await fetch_student_quiz_history(student_id)
+    if not quiz_history:
+        # Cold start: recommend foundational topics
+        foundational = [
+            TopicRecommendation(
+                topicId="Variables & Expressions",
+                topicName="Variables & Expressions",
+                recommendationScore=95.0,
+                reasoning="Foundational topic essential for all algebra. Start here to build a strong base.",
+                estimatedTimeToMastery=3,
+                prerequisitesMet=True,
+                currentCompetency="not_attempted",
+            ),
+            TopicRecommendation(
+                topicId="Integers",
+                topicName="Integers",
+                recommendationScore=90.0,
+                reasoning="Core number sense topic needed for all math areas.",
+                estimatedTimeToMastery=2,
+                prerequisitesMet=True,
+                currentCompetency="not_attempted",
+            ),
+            TopicRecommendation(
+                topicId="Fractions & Decimals",
+                topicName="Fractions & Decimals",
+                recommendationScore=85.0,
+                reasoning="Understanding fractions is critical for algebra and calculus.",
+                estimatedTimeToMastery=4,
+                prerequisitesMet=True,
+                currentCompetency="not_attempted",
+            ),
+        ]
+        return TopicRecommendationResponse(
+            studentId=student_id,
+            recommendations=foundational[:request.numRecommendations],
+            status="cold_start",
+        )
+    # Get competency analysis
+    comp_result = await compute_competency_analysis(student_id, quiz_history)
+    dependencies = fetch_topic_dependencies()
+    topic_competencies: Dict[str, CompetencyAnalysis] = {}
+    for a in comp_result.analyses:
+        topic_competencies[a.topicId] = a
+    # Score each topic
+    all_topics = set()
+    for a in comp_result.analyses:
+        all_topics.add(a.topicId)
+    for topic, prereqs in dependencies.items():
+        all_topics.add(topic)
+        all_topics.update(prereqs)
+    scored_topics: List[TopicRecommendation] = []
+    for topic in all_topics:
+        comp = topic_competencies.get(topic)
+        current_level = comp.competencyLevel if comp else "not_attempted"
+        current_score = comp.averageAccuracy if comp else 0
+        # Skip topics already mastered
+        if current_level == "advanced":
+            continue
+        # 1. Weakness score (higher for weaker topics)
+        if current_level == "not_attempted":
+            weakness_score = 70
+        elif current_level == "beginner":
+            weakness_score = 100 - current_score
+        elif current_level == "developing":
+            weakness_score = 80 - current_score * 0.5
+        else:  # proficient
+            weakness_score = 40 - current_score * 0.3
+        # 2. Prerequisite score (higher if prerequisites are met)
+        prereqs = dependencies.get(topic, [])
+        if prereqs:
+            prereq_scores = []
+            for p in prereqs:
+                p_comp = topic_competencies.get(p)
+                if p_comp:
+                    prereq_scores.append(p_comp.averageAccuracy)
+                else:
+                    prereq_scores.append(0)
+            prereq_avg = sum(prereq_scores) / len(prereq_scores) if prereq_scores else 0
+            prereqs_met = all(s >= 50 for s in prereq_scores)
+        else:
+            prereq_avg = 100  # No prereqs needed
+            prereqs_met = True
+        # 3. Recency score (boost recently attempted topics)
+        if comp and comp.lastAttemptDate:
+            try:
+                last_dt = datetime.fromisoformat(comp.lastAttemptDate.replace("Z", "+00:00"))
+                days_since = (datetime.utcnow() - last_dt.replace(tzinfo=None)).days
+            except Exception:
+                days_since = 30
+        else:
+            days_since = 30
+        recency_score = min(days_since, 60)  # cap at 60
+        # 4. Combined score
+        total_score = (
+            weakness_score * 0.4
+            + prereq_avg * 0.3
+            + recency_score * 0.2
+            + (10 if prereqs_met else 0) * 0.1
+        )
+        # Degrade score if prerequisites not met (but still recommend)
+        if not prereqs_met:
+            total_score *= 0.6
+        # Estimate time to mastery (hours)
+        if current_level == "not_attempted":
+            est_hours = 8
+        elif current_level == "beginner":
+            est_hours = 6
+        elif current_level == "developing":
+            est_hours = 4
+        else:
+            est_hours = 2
+        # Build reasoning
+        reasons = []
+        if current_level in ("beginner", "not_attempted"):
+            reasons.append(f"Currently at {current_level} level — focused practice will build foundation")
+        elif current_level == "developing":
+            reasons.append(f"Developing competency ({current_score:.0f}% accuracy) — close to proficiency with more practice")
+        else:
+            reasons.append(f"Proficient but not yet mastered ({current_score:.0f}% accuracy)")
+        if not prereqs_met and prereqs:
+            reasons.append(f"Note: prerequisites ({', '.join(prereqs)}) not fully met — complete those first")
+        elif prereqs and prereqs_met:
+            reasons.append("All prerequisites are met")
+        if comp and comp.learningVelocity > 0:
+            reasons.append(f"Positive learning trend (velocity: {comp.learningVelocity:+.3f})")
+        elif comp and comp.learningVelocity < 0:
+            reasons.append(f"Declining performance detected — review recommended")
+        if days_since > 14:
+            reasons.append(f"Not practiced in {days_since} days — review to prevent forgetting")
+        scored_topics.append(TopicRecommendation(
+            topicId=topic,
+            topicName=topic.replace("_", " ").title(),
+            recommendationScore=round(total_score, 2),
+            reasoning=". ".join(reasons) + ".",
+            estimatedTimeToMastery=est_hours,
+            prerequisitesMet=prereqs_met,
+            currentCompetency=current_level,
+        ))
+    # Sort by score descending
+    scored_topics.sort(key=lambda t: t.recommendationScore, reverse=True)
+    return TopicRecommendationResponse(
+        studentId=student_id,
+        recommendations=scored_topics[:request.numRecommendations],
+        status="success",
+    )
+# ─── Learning Analytics Aggregation ───────────────────────────
+async def get_student_summary(student_id: str) -> StudentSummaryResponse:
+    """Aggregate all ML metrics for a single student."""
+    # Check cache
+    cached = _cache_get(_competency_cache, f"summary_{student_id}", IRT_DIFFICULTY_CACHE_TTL)
+    if cached:
+        return cached
+    quiz_history = await fetch_student_quiz_history(student_id)
+    engagement = await fetch_student_engagement_metrics(student_id)
+    # Competency analysis
+    comp_result = await compute_competency_analysis(student_id, quiz_history)
+    # Competency distribution
+    comp_dist = {"beginner": 0, "developing": 0, "proficient": 0, "advanced": 0}
+    for a in comp_result.analyses:
+        if a.competencyLevel in comp_dist:
+            comp_dist[a.competencyLevel] += 1
+    # Efficiency scores per subject
+    eff_scores = {}
+    for a in comp_result.analyses:
+        eff_scores[a.topicName] = a.efficiencyScore
+    # Learning velocity trend (chart data)
+    velocity_trend = []
+    for a in comp_result.analyses:
+        velocity_trend.append({
+            "topic": a.topicName,
+            "velocity": a.learningVelocity,
+            "accuracy": a.averageAccuracy,
+            "attempts": a.totalAttempts,
+        })
+    # Topic recommendations
+    try:
+        rec_req = TopicRecommendationRequest(studentId=student_id, numRecommendations=5)
+        rec_result = await recommend_topics(rec_req)
+        recommended = [
+            {
+                "topicId": r.topicId,
+                "topicName": r.topicName,
+                "score": r.recommendationScore,
+                "reasoning": r.reasoning,
+                "prerequisitesMet": r.prerequisitesMet,
+            }
+            for r in rec_result.recommendations
+        ]
+    except Exception as e:
+        logger.warning(f"Topic recommendation failed: {e}")
+        recommended = []
+    # Predicted next quiz score (simple linear extrapolation)
+    predicted_score = None
+    if quiz_history and len(quiz_history) >= 3:
+        recent_scores = []
+        for entry in quiz_history[-10:]:
+            score = entry.get("score", 0)
+            total = max(entry.get("total", 1), 1)
+            recent_scores.append((score / total) * 100)
+        if len(recent_scores) >= 3:
+            x = np.arange(len(recent_scores)).reshape(-1, 1)
+            y = np.array(recent_scores)
+            model = LinearRegression()
+            model.fit(x, y)
+            next_idx = np.array([[len(recent_scores)]], dtype=float)
+            next_pred = float(model.predict(next_idx)[0])
+            predicted_score = round(float(max(0.0, min(100.0, next_pred))), 1)
+    # Engagement patterns
+    engagement_patterns = {
+        "dailyActivity": engagement.get("dailyActivity", {}),
+        "hourlyActivity": engagement.get("hourlyActivity", {}),
+        "activeDays": engagement.get("activeDays", 0),
+        "avgActivitiesPerDay": engagement.get("avgActivitiesPerDay", 0),
+        "totalXP": engagement.get("totalXP", 0),
+    }
+    result = StudentSummaryResponse(
+        studentId=student_id,
+        competencyDistribution=comp_dist,
+        riskAssessment=None,
+        recommendedTopics=recommended,
+        learningVelocityTrend=velocity_trend,
+        efficiencyScores=eff_scores,
+        predictedNextQuizScore=predicted_score,
+        engagementPatterns=engagement_patterns,
+        status="success" if comp_result.status == "success" else "limited_data",
+    )
+    # Cache the result
+    _cache_set(_competency_cache, f"summary_{student_id}", result)
+    return result
+async def get_class_insights(request: ClassInsightsRequest) -> ClassInsightsResponse:
+    """Aggregate class-wide ML analytics for teacher dashboards."""
+    cached = _cache_get(_class_stats_cache, f"class_{request.teacherId}_{request.classId}", IRT_DIFFICULTY_CACHE_TTL)
+    if cached:
+        return cached
+    db = _get_firestore_db()
+    student_ids: List[str] = []
+    if db is not None:
+        try:
+            if request.classId:
+                # Fetch students in specific class
+                class_ref = db.collection("classes").document(request.classId)
+                class_doc = class_ref.get()
+                if class_doc.exists:
+                    class_data = class_doc.to_dict()
+                    student_ids = class_data.get("studentIds", [])
+            else:
+                # Fetch all students for this teacher
+                user_ref = db.collection("users").where("role", "==", "student").limit(100)
+                for doc in user_ref.stream():
+                    student_ids.append(doc.id)
+        except Exception as e:
+            logger.error(f"Error fetching class students: {e}")
+    if not student_ids:
+        # Generate sample data for demo
+        return _generate_demo_class_insights(request)
+    # Aggregate per-student data
+    risk_dist = {"High": 0, "Medium": 0, "Low": 0}
+    all_velocities: List[float] = []
+    interventions: List[Dict[str, Any]] = []
+    topic_weakness_counts: Dict[str, int] = defaultdict(int)
+    hourly_engagement = defaultdict(int)
+    for sid in student_ids[:50]:  # Limit for performance
+        try:
+            summary = await get_student_summary(sid)
+            # Risk
+            if summary.riskAssessment:
+                level = summary.riskAssessment.get("riskLevel", "Medium")
+                risk_dist[level] = risk_dist.get(level, 0) + 1
+            # Competencies
+            for topic, count in summary.competencyDistribution.items():
+                if topic in ("beginner", "developing") and count > 0:
+                    # Mark this as a weak area
+                    pass
+            # Velocities
+            for vt in summary.learningVelocityTrend:
+                all_velocities.append(vt.get("velocity", 0))
+                if vt.get("velocity", 0) < -0.01:
+                    topic_weakness_counts[vt.get("topic", "Unknown")] += 1
+            # Engagement
+            for hour_str, count in summary.engagementPatterns.get("hourlyActivity", {}).items():
+                hourly_engagement[int(hour_str)] += count
+            # Intervention needed?
+            total_beginner = summary.competencyDistribution.get("beginner", 0)
+            if total_beginner >= 2 or (summary.predictedNextQuizScore and summary.predictedNextQuizScore < 50):
+                interventions.append({
+                    "studentId": sid,
+                    "reason": "Multiple topics at beginner level" if total_beginner >= 2 else "Predicted score below 50%",
+                    "predictedScore": summary.predictedNextQuizScore,
+                    "recommendedAction": "Schedule one-on-one tutoring session",
+                })
+        except Exception as e:
+            logger.warning(f"Error processing student {sid}: {e}")
+    # Common weak topics
+    common_weak = sorted(topic_weakness_counts.items(), key=lambda x: x[1], reverse=True)[:10]
+    weak_topics_list = [
+        {"topic": t, "studentsStruggling": c, "percentageOfClass": round(c / max(len(student_ids), 1) * 100, 1)}
+        for t, c in common_weak
+    ]
+    # Velocity distribution
+    if all_velocities:
+        vel_dist: Dict[str, float] = {
+            "mean": round(float(np.mean(all_velocities)), 4),
+            "median": round(float(np.median(all_velocities)), 4),
+            "improving": float(sum(1 for v in all_velocities if v > 0.01)),
+            "declining": float(sum(1 for v in all_velocities if v < -0.01)),
+            "plateaued": float(sum(1 for v in all_velocities if -0.01 <= v <= 0.01)),
+        }
+    else:
+        vel_dist = {"mean": 0.0, "median": 0.0, "improving": 0.0, "declining": 0.0, "plateaued": 0.0}
+    result = ClassInsightsResponse(
+        teacherId=request.teacherId,
+        riskDistribution=risk_dist,
+        riskTrend=[],  # Would require historical data
+        commonWeakTopics=weak_topics_list,
+        learningVelocityDistribution=vel_dist,
+        engagementPatterns={"hourlyDistribution": dict(hourly_engagement)},
+        interventionRecommendations=interventions[:10],
+        successPredictions={
+            "classAverageExpected": round(float(np.mean([s or 60 for s in []])) if not all_velocities else 65.0, 1),
+            "studentsLikelyToStruggle": len(interventions),
+        },
+        totalStudents=len(student_ids),
+        status="success",
+    )
+    _cache_set(_class_stats_cache, f"class_{request.teacherId}_{request.classId}", result)
+    return result
+def _generate_demo_class_insights(request: ClassInsightsRequest) -> ClassInsightsResponse:
+    """Generate demo class insights when no real data is available."""
+    return ClassInsightsResponse(
+        teacherId=request.teacherId,
+        riskDistribution={"High": 4, "Medium": 8, "Low": 18},
+        riskTrend=[
+            {"date": "2026-02-11", "high": 5, "medium": 9, "low": 16},
+            {"date": "2026-02-18", "high": 4, "medium": 8, "low": 18},
+        ],
+        commonWeakTopics=[
+            {"topic": "Quadratic Equations", "studentsStruggling": 12, "percentageOfClass": 40.0},
+            {"topic": "Trigonometric Ratios", "studentsStruggling": 9, "percentageOfClass": 30.0},
+            {"topic": "Factoring", "studentsStruggling": 7, "percentageOfClass": 23.3},
+        ],
+        learningVelocityDistribution={
+            "mean": 0.015,
+            "median": 0.008,
+            "improving": 18,
+            "declining": 5,
+            "plateaued": 7,
+        },
+        engagementPatterns={
+            "hourlyDistribution": {str(h): random.randint(5, 40) for h in range(8, 22)},
+            "peakHour": 16,
+            "avgDailyActiveStudents": 22,
+        },
+        interventionRecommendations=[
+            {
+                "studentId": "demo_student_1",
+                "reason": "Declining performance in multiple topics",
+                "predictedScore": 42.5,
+                "recommendedAction": "Schedule one-on-one review session for Quadratic Equations",
+            },
+            {
+                "studentId": "demo_student_2",
+                "reason": "3 consecutive absences",
+                "predictedScore": 38.0,
+                "recommendedAction": "Contact parent/guardian and arrange catch-up sessions",
+            },
+        ],
+        successPredictions={
+            "classAverageExpected": 72.3,
+            "studentsLikelyToStruggle": 4,
+            "studentsLikelyToExcel": 8,
+        },
+        totalStudents=30,
+        status="demo_data",
+    )
+# ─── Mock Data Generator ──────────────────────────────────────
+def generate_mock_student_data(
+    num_students: int = 30,
+    num_quizzes: int = 20,
+    seed: Optional[int] = None,
+) -> Dict[str, Any]:
+    """
+    Generate realistic mock student data for testing ML features.
+    Includes edge cases: perfect students, struggling students, inconsistent performers.
+    """
+    if seed is not None:
+        random.seed(seed)
+        np.random.seed(seed)
+    topics = [
+        "Linear Equations", "Quadratic Equations", "Polynomials",
+        "Trigonometric Ratios", "Pythagorean Theorem", "Fractions & Decimals",
+        "Integers", "Probability Basics", "Angles", "Area & Perimeter",
+    ]
+    students = []
+    all_quiz_data = []
+    for i in range(num_students):
+        student_id = f"mock_student_{i+1:03d}"
+        # Assign student archetype
+        archetype_roll = random.random()
+        if archetype_roll < 0.1:
+            archetype = "perfect"
+        elif archetype_roll < 0.2:
+            archetype = "struggling"
+        elif archetype_roll < 0.3:
+            archetype = "inconsistent"
+        elif archetype_roll < 0.5:
+            archetype = "improving"
+        elif archetype_roll < 0.65:
+            archetype = "declining"
+        else:
+            archetype = "average"
+        # Base metrics per archetype
+        archetypes = {
+            "perfect": {
+                "engagement": (90, 5), "quiz": (92, 4), "attendance": (98, 2),
+                "completion": (95, 3), "streak": (15, 3),
+            },
+            "struggling": {
+                "engagement": (25, 10), "quiz": (30, 12), "attendance": (55, 15),
+                "completion": (30, 12), "streak": (0, 1),
+            },
+            "inconsistent": {
+                "engagement": (60, 25), "quiz": (55, 25), "attendance": (70, 20),
+                "completion": (55, 20), "streak": (3, 5),
+            },
+            "improving": {
+                "engagement": (65, 10), "quiz": (60, 10), "attendance": (80, 8),
+                "completion": (70, 10), "streak": (7, 3),
+            },
+            "declining": {
+                "engagement": (50, 15), "quiz": (55, 15), "attendance": (65, 12),
+                "completion": (50, 15), "streak": (1, 2),
+            },
+            "average": {
+                "engagement": (65, 12), "quiz": (68, 10), "attendance": (82, 8),
+                "completion": (72, 10), "streak": (5, 3),
+            },
+        }
+        params = archetypes[archetype]
+        engagement = max(0, min(100, np.random.normal(*params["engagement"])))
+        avg_quiz = max(0, min(100, np.random.normal(*params["quiz"])))
+        attendance = max(0, min(100, np.random.normal(*params["attendance"])))
+        completion = max(0, min(100, np.random.normal(*params["completion"])))
+        streak = max(0, int(np.random.normal(*params["streak"])))
+        student = {
+            "studentId": student_id,
+            "name": f"Student {i+1}",
+            "archetype": archetype,
+            "engagementScore": round(engagement, 1),
+            "avgQuizScore": round(avg_quiz, 1),
+            "attendance": round(attendance, 1),
+            "assignmentCompletion": round(completion, 1),
+            "streak": streak,
+            "xpGrowthRate": round(np.random.normal(0.5 if archetype == "improving" else 0, 0.3), 2),
+            "timeOnPlatform": round(max(0, np.random.normal(8, 3)), 1),
+        }
+        students.append(student)
+        # Generate quiz history for this student
+        base_time = datetime(2025, 9, 1)
+        for j in range(num_quizzes):
+            topic = random.choice(topics)
+            days_offset = random.randint(0, 150)
+            quiz_date = base_time + timedelta(days=days_offset)
+            # Score based on archetype with progression
+            if archetype == "improving":
+                base_score = 40 + (j / num_quizzes) * 40
+            elif archetype == "declining":
+                base_score = 80 - (j / num_quizzes) * 35
+            elif archetype == "perfect":
+                base_score = 90
+            elif archetype == "struggling":
+                base_score = 30
+            elif archetype == "inconsistent":
+                base_score = random.choice([30, 50, 70, 90])
+            else:  # average
+                base_score = 65
+            score = max(0, min(100, base_score + np.random.normal(0, 8)))
+            total_questions = random.choice([10, 15, 20])
+            correct = round(total_questions * score / 100)
+            time_per_q = max(10, np.random.normal(60 if score > 70 else 90, 20))
+            quiz_entry = {
+                "studentId": student_id,
+                "topicId": topic,
+                "topic": topic,
+                "score": correct,
+                "total": total_questions,
+                "correct": correct >= total_questions * 0.5,
+                "timeTaken": round(time_per_q * total_questions),
+                "timeSpent": round(time_per_q),
+                "attempts": random.choice([1, 1, 1, 2, 2, 3]) if score < 60 else 1,
+                "completedAt": quiz_date.isoformat(),
+                "timestamp": quiz_date.isoformat(),
+                "questionId": f"q_{topic.replace(' ', '_').lower()}_{j}",
+            }
+            all_quiz_data.append(quiz_entry)
+    return {
+        "students": students,
+        "quizHistory": all_quiz_data,
+        "metadata": {
+            "numStudents": num_students,
+            "numQuizzes": num_quizzes,
+            "archetypeDistribution": {
+                archetype: sum(1 for s in students if s["archetype"] == archetype)
+                for archetype in ["perfect", "struggling", "inconsistent", "improving", "declining", "average"]
+            },
+            "topicsCovered": topics,
+            "generatedAt": datetime.utcnow().isoformat(),
+        },
+    }
+# ─── Cache Management ─────────────────────────────────────────
+def refresh_all_caches() -> RefreshCacheResponse:
+    """Clear and refresh all in-memory caches."""
+    _competency_cache.clear()
+    _class_stats_cache.clear()
+    _difficulty_cache.clear()
+    _risk_model_cache.clear()
+    logger.info("All analytics caches cleared")
+    return RefreshCacheResponse(
+        status="caches_cleared",
+        cachedItems=0,
+        timestamp=datetime.utcnow().isoformat(),
+    )

automation_engine.py ADDED Viewed

	@@ -0,0 +1,670 @@

+"""
+MathPulse AI - Event-Driven Automation Engine
+Processes educational workflows based on a diagnostic-first, risk-driven
+intervention model.  Trigger points:
+1. Diagnostic Assessment Completion  (highest priority)
+2. Quiz / Assessment Submission       (continuous)
+3. New Student Enrollment
+4. External Data Import               (teacher action)
+5. Admin Content Updates
+Each event is routed to a dedicated handler that orchestrates
+classification, quiz generation, notifications and dashboard updates.
+"""
+import math
+import logging
+from typing import List, Optional, Dict, Any
+from pydantic import BaseModel, Field
+logger = logging.getLogger("mathpulse.automation")
+# ─── Constants ──────────────────────────────────────────────────
+AT_RISK_THRESHOLD = 60          # < 60 % → At Risk
+WEAK_TOPIC_THRESHOLD = 0.50     # < 50 % accuracy → weak topic
+HIGH_RISK_RATIO = 0.75          # 75 %+ subjects at risk
+MEDIUM_RISK_RATIO = 0.50        # 50-75 %
+REMEDIAL_CONFIG = {
+    "High":   {"questions": 15, "dist": {"easy": 60, "medium": 30, "hard": 10}},
+    "Medium": {"questions": 12, "dist": {"easy": 50, "medium": 35, "hard": 15}},
+    "Low":    {"questions": 10, "dist": {"easy": 40, "medium": 40, "hard": 20}},
+}
+# ─── Request / Response Models ──────────────────────────────────
+class DiagnosticResult(BaseModel):
+    """Per-subject score from diagnostic assessment."""
+    subject: str
+    score: float = Field(..., ge=0, le=100)
+class DiagnosticCompletionPayload(BaseModel):
+    """Payload sent when a student completes the diagnostic."""
+    studentId: str
+    results: List[DiagnosticResult]
+    gradeLevel: str = "Grade 10"
+    questionBreakdown: Optional[Dict[str, list]] = None   # topic → [{correct: bool, …}]
+class QuizSubmissionPayload(BaseModel):
+    """Payload sent on quiz / assessment submission."""
+    studentId: str
+    quizId: str
+    subject: str
+    score: float = Field(..., ge=0, le=100)
+    totalQuestions: int
+    correctAnswers: int
+    timeSpentSeconds: int
+    answers: Optional[List[Dict[str, Any]]] = None
+class StudentEnrollmentPayload(BaseModel):
+    """Payload sent when a new student account is created."""
+    studentId: str
+    name: str
+    email: str
+    gradeLevel: str = "Grade 10"
+    teacherId: Optional[str] = None
+class DataImportPayload(BaseModel):
+    """Payload sent after a teacher uploads a spreadsheet."""
+    teacherId: str
+    students: List[Dict[str, Any]]       # parsed student rows
+    columnMapping: Dict[str, str]
+class ContentUpdatePayload(BaseModel):
+    """Payload sent when admin performs CRUD on curriculum."""
+    adminId: str
+    action: str                           # create | update | delete
+    contentType: str                      # lesson | quiz | module | subject
+    contentId: str
+    subjectId: Optional[str] = None
+    details: Optional[str] = None
+# ─── Risk classification helpers ─────────────────────────────────
+class SubjectRiskClassification(BaseModel):
+    status: str              # "At Risk" | "On Track"
+    score: float
+    confidence: float
+    needsIntervention: bool
+class AutomationResult(BaseModel):
+    """Standardised result returned by every handler."""
+    success: bool
+    event: str
+    studentId: Optional[str] = None
+    message: str
+    riskClassifications: Optional[Dict[str, Dict[str, Any]]] = None
+    overallRisk: Optional[str] = None
+    atRiskSubjects: Optional[List[str]] = None
+    weakTopics: Optional[List[Dict[str, Any]]] = None
+    learningPath: Optional[str] = None
+    remedialQuizzesCreated: int = 0
+    interventions: Optional[str] = None
+    notifications: List[str] = Field(default_factory=list)
+# ─── Automation Engine ──────────────────────────────────────────
+class MathPulseAutomationEngine:
+    """
+    Stateless event-driven automation system.
+    Each ``handle_*`` method is an independent, self-contained handler that
+    receives a validated Pydantic payload and returns an ``AutomationResult``.
+    Firebase / Hugging Face calls are only attempted when available.
+    """
+    # ────────────────────────────────────────────────────────────
+    # 1. DIAGNOSTIC COMPLETION  (highest-priority)
+    # ────────────────────────────────────────────────────────────
+    async def handle_diagnostic_completion(
+        self, payload: DiagnosticCompletionPayload
+    ) -> AutomationResult:
+        """
+        Runs when a student completes the mandatory diagnostic.
+        Steps:
+        1. Classify per-subject risk
+        2. Identify weak topics
+        3. Compute overall risk
+        4. Generate personalised learning path (AI)
+        5. Create remedial quiz assignments
+        6. Generate teacher intervention recommendations (AI)
+        7. Persist everything & notify
+        """
+        student_id = payload.studentId
+        logger.info(f"📊 DIAGNOSTIC COMPLETED for {student_id}")
+        notifications: list[str] = []
+        # 1 — subject-level risk
+        risk_classifications = self._classify_subject_risks(payload.results)
+        # 2 — weak topics
+        weak_topics = self._identify_weak_topics(payload.questionBreakdown)
+        # 3 — overall risk
+        overall_risk = self._calculate_overall_risk(risk_classifications)
+        at_risk_subjects = [
+            subj for subj, data in risk_classifications.items()
+            if data["status"] == "At Risk"
+        ]
+        # 4 — learning path (AI call)
+        learning_path: Optional[str] = None
+        if at_risk_subjects:
+            learning_path = await self._generate_learning_path(
+                at_risk_subjects, weak_topics, payload.gradeLevel
+            )
+        # 5 — remedial quizzes
+        remedial_count = 0
+        remedial_quizzes: list[dict] = []
+        if at_risk_subjects:
+            remedial_quizzes = self._build_remedial_quiz_configs(
+                student_id, at_risk_subjects, overall_risk, payload.gradeLevel
+            )
+            remedial_count = len(remedial_quizzes)
+        # 6 — teacher interventions (AI call)
+        interventions: Optional[str] = None
+        if at_risk_subjects:
+            interventions = await self._generate_teacher_interventions(
+                risk_classifications, weak_topics
+            )
+        # 7 — notification messages
+        if at_risk_subjects:
+            notifications.append(
+                f"Diagnostic complete — {len(at_risk_subjects)} subject(s) flagged At Risk: "
+                + ", ".join(at_risk_subjects)
+            )
+        else:
+            notifications.append("Diagnostic complete — all subjects On Track!")
+        logger.info(
+            f"✅ DIAGNOSTIC PROCESSING COMPLETE for {student_id} | "
+            f"Overall={overall_risk} | AtRisk={at_risk_subjects}"
+        )
+        return AutomationResult(
+            success=True,
+            event="diagnostic_completed",
+            studentId=student_id,
+            message=f"Diagnostic processed for {student_id}",
+            riskClassifications=risk_classifications,
+            overallRisk=overall_risk,
+            atRiskSubjects=at_risk_subjects,
+            weakTopics=weak_topics,
+            learningPath=learning_path,
+            remedialQuizzesCreated=remedial_count,
+            interventions=interventions,
+            notifications=notifications,
+        )
+    # ────────────────────────────────────────────────────────────
+    # 2. QUIZ SUBMISSION  (continuous)
+    # ────────────────────────────────────────────────────────────
+    async def handle_quiz_submission(
+        self, payload: QuizSubmissionPayload
+    ) -> AutomationResult:
+        """Recalculate risk for a subject after a quiz is submitted."""
+        student_id = payload.studentId
+        logger.info(f"📝 QUIZ SUBMITTED by {student_id} — {payload.subject} ({payload.score}%)")
+        notifications: list[str] = []
+        # Determine new status for this subject
+        new_status = "At Risk" if payload.score < AT_RISK_THRESHOLD else "On Track"
+        confidence = (
+            (AT_RISK_THRESHOLD - payload.score) / AT_RISK_THRESHOLD
+            if new_status == "At Risk"
+            else (payload.score - AT_RISK_THRESHOLD) / (100 - AT_RISK_THRESHOLD)
+        )
+        risk_classifications = {
+            payload.subject: {
+                "status": new_status,
+                "score": payload.score,
+                "confidence": round(abs(confidence), 2),
+                "needsIntervention": new_status == "At Risk",
+            }
+        }
+        at_risk = [payload.subject] if new_status == "At Risk" else []
+        if new_status == "At Risk":
+            notifications.append(
+                f"Quiz result: {payload.subject} scored {payload.score}% — status changed to At Risk"
+            )
+        else:
+            notifications.append(
+                f"Quiz result: {payload.subject} scored {payload.score}% — On Track"
+            )
+        return AutomationResult(
+            success=True,
+            event="quiz_submitted",
+            studentId=student_id,
+            message=f"Quiz processed for {student_id}",
+            riskClassifications=risk_classifications,
+            overallRisk=None,   # single-subject update — overall recalculated on frontend
+            atRiskSubjects=at_risk,
+            notifications=notifications,
+        )
+    # ────────────────────────────────────────────────────────────
+    # 3. STUDENT ENROLLMENT
+    # ────────────────────────────────────────────────────────────
+    async def handle_student_enrollment(
+        self, payload: StudentEnrollmentPayload
+    ) -> AutomationResult:
+        """
+        Prepare a new student:
+        - Create empty progress record skeleton
+        - Initialise gamification (XP 0, Level 1, no streaks)
+        - Flag as needing diagnostic
+        """
+        student_id = payload.studentId
+        logger.info(f"🆕 NEW STUDENT ENROLLED: {student_id}")
+        notifications: list[str] = [
+            f"Welcome {payload.name}! Please complete the diagnostic assessment to personalise your learning path.",
+        ]
+        if payload.teacherId:
+            notifications.append(
+                f"New student {payload.name} enrolled — diagnostic pending."
+            )
+        return AutomationResult(
+            success=True,
+            event="student_enrolled",
+            studentId=student_id,
+            message=f"Student {payload.name} enrolled and initialised",
+            notifications=notifications,
+        )
+    # ────────────────────────────────────────────────────────────
+    # 4. DATA IMPORT  (teacher action)
+    # ────────────────────────────────────────────────────────────
+    async def handle_data_import(
+        self, payload: DataImportPayload
+    ) -> AutomationResult:
+        """
+        After a teacher uploads a spreadsheet, recalculate risk for every
+        imported student and flag any status changes.
+        """
+        logger.info(f"📂 DATA IMPORT by teacher {payload.teacherId} — {len(payload.students)} students")
+        notifications: list[str] = []
+        high_risk_students: list[str] = []
+        medium_risk_count = 0
+        low_risk_count = 0
+        weak_topic_counts: Dict[str, int] = {}
+        for student_row in payload.students:
+            name = str(student_row.get("name") or "Unknown").strip() or "Unknown"
+            avg_score = self._safe_float(student_row.get("avgQuizScore"), 0.0)
+            attendance = self._safe_float(student_row.get("attendance"), 0.0)
+            engagement = self._safe_float(student_row.get("engagementScore"), 0.0)
+            completion_raw = student_row.get("assignmentCompletion")
+            completion = (
+                self._safe_float(completion_raw, 0.0)
+                if completion_raw not in (None, "")
+                else None
+            )
+            risk_level = self._classify_import_risk(
+                avg_score=avg_score,
+                attendance=attendance,
+                engagement=engagement,
+                completion=completion,
+            )
+            if risk_level == "High":
+                high_risk_students.append(name)
+            elif risk_level == "Medium":
+                medium_risk_count += 1
+            else:
+                low_risk_count += 1
+            topic_label = self._extract_import_topic(student_row)
+            if topic_label:
+                weak_topic_counts[topic_label] = weak_topic_counts.get(topic_label, 0) + 1
+        if high_risk_students:
+            notifications.append(
+                f"Data import flagged {len(high_risk_students)} high-risk student(s): "
+                + ", ".join(high_risk_students[:5])
+                + ("..." if len(high_risk_students) > 5 else "")
+            )
+        notifications.append(
+            "Risk interpretation summary — "
+            f"High: {len(high_risk_students)}, Medium: {medium_risk_count}, Low: {low_risk_count}."
+        )
+        if weak_topic_counts:
+            top_topics = sorted(
+                weak_topic_counts.items(),
+                key=lambda item: (-item[1], item[0]),
+            )[:3]
+            notifications.append(
+                "Most frequent weak-topic signals: "
+                + ", ".join(f"{topic} ({count})" for topic, count in top_topics)
+            )
+        notifications.append(
+            f"Data import complete — {len(payload.students)} student records processed."
+        )
+        return AutomationResult(
+            success=True,
+            event="data_imported",
+            studentId=None,
+            message=f"Data import processed for {len(payload.students)} students",
+            atRiskSubjects=None,
+            notifications=notifications,
+        )
+    # ────────────────────────────────────────────────────────────
+    # 5. CONTENT UPDATE  (admin action)
+    # ────────────────────────────────────────────────────────────
+    async def handle_content_update(
+        self, payload: ContentUpdatePayload
+    ) -> AutomationResult:
+        """
+        After admin CRUD on curriculum, log & notify.
+        """
+        logger.info(
+            f"📚 CONTENT UPDATE by admin {payload.adminId}: "
+            f"{payload.action} {payload.contentType} {payload.contentId}"
+        )
+        notifications: list[str] = [
+            f"Curriculum update: {payload.action}d {payload.contentType} "
+            f"({payload.contentId}). Teachers may want to review affected quizzes.",
+        ]
+        return AutomationResult(
+            success=True,
+            event="content_updated",
+            studentId=None,
+            message=f"Content {payload.action} processed for {payload.contentType}",
+            notifications=notifications,
+        )
+    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+    #  INTERNAL HELPERS
+    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+    # --- risk classification ---
+    @staticmethod
+    def _safe_float(value: Any, default: float = 0.0) -> float:
+        try:
+            parsed = float(value)
+            if math.isnan(parsed) or math.isinf(parsed):
+                return default
+            return parsed
+        except (TypeError, ValueError):
+            return default
+    @staticmethod
+    def _classify_import_risk(
+        *,
+        avg_score: float,
+        attendance: float,
+        engagement: float,
+        completion: Optional[float],
+    ) -> str:
+        high_flags = int(avg_score < 60) + int(attendance < 75) + int(engagement < 55)
+        medium_flags = int(avg_score < 75) + int(attendance < 85) + int(engagement < 70)
+        if completion is not None:
+            high_flags += int(completion < 60)
+            medium_flags += int(completion < 75)
+        if high_flags >= 2 or (avg_score < 55 and (attendance < 80 or engagement < 65)):
+            return "High"
+        if medium_flags >= 2:
+            return "Medium"
+        return "Low"
+    @staticmethod
+    def _extract_import_topic(student_row: Dict[str, Any]) -> Optional[str]:
+        explicit_topic = str(student_row.get("weakestTopic") or "").strip()
+        if explicit_topic:
+            return explicit_topic
+        assessment_name = str(student_row.get("assessmentName") or "").strip()
+        if assessment_name and assessment_name.lower() != "general-assessment":
+            return assessment_name
+        return None
+    @staticmethod
+    def _classify_subject_risks(
+        results: List[DiagnosticResult],
+    ) -> Dict[str, Dict[str, Any]]:
+        """Classify each subject as 'At Risk' or 'On Track'."""
+        classifications: Dict[str, Dict[str, Any]] = {}
+        for r in results:
+            if r.score < AT_RISK_THRESHOLD:
+                status = "At Risk"
+                confidence = round((AT_RISK_THRESHOLD - r.score) / AT_RISK_THRESHOLD, 2)
+            else:
+                status = "On Track"
+                confidence = round(
+                    (r.score - AT_RISK_THRESHOLD) / (100 - AT_RISK_THRESHOLD), 2
+                )
+            classifications[r.subject] = {
+                "status": status,
+                "score": r.score,
+                "confidence": confidence,
+                "needsIntervention": status == "At Risk",
+            }
+        return classifications
+    @staticmethod
+    def _identify_weak_topics(
+        question_breakdown: Optional[Dict[str, list]],
+    ) -> List[Dict[str, Any]]:
+        """
+        Drill into per-topic accuracy from diagnostic question-level data.
+        Returns topics sorted weakest-first.
+        """
+        if not question_breakdown:
+            return []
+        weak: list[dict] = []
+        for topic, questions in question_breakdown.items():
+            if not questions:
+                continue
+            correct_count = sum(1 for q in questions if q.get("correct"))
+            accuracy = correct_count / len(questions)
+            if accuracy < WEAK_TOPIC_THRESHOLD:
+                weak.append({
+                    "topic": topic,
+                    "accuracy": round(accuracy, 2),
+                    "questionsAttempted": len(questions),
+                    "priority": "high" if accuracy < 0.3 else "medium",
+                })
+        weak.sort(key=lambda x: x["accuracy"])
+        return weak
+    @staticmethod
+    def _calculate_overall_risk(
+        classifications: Dict[str, Dict[str, Any]],
+    ) -> str:
+        total = len(classifications)
+        if total == 0:
+            return "Low"
+        at_risk_count = sum(
+            1 for d in classifications.values() if d["status"] == "At Risk"
+        )
+        ratio = at_risk_count / total
+        if ratio >= HIGH_RISK_RATIO:
+            return "High"
+        elif ratio >= MEDIUM_RISK_RATIO:
+            return "Medium"
+        return "Low"
+    # --- remedial quiz configs ---
+    @staticmethod
+    def _build_remedial_quiz_configs(
+        student_id: str,
+        at_risk_subjects: List[str],
+        overall_risk: str,
+        grade_level: str,
+    ) -> List[Dict[str, Any]]:
+        """Return list of quiz configuration dicts ready for persistence."""
+        cfg = REMEDIAL_CONFIG.get(overall_risk, REMEDIAL_CONFIG["Low"])
+        quizzes: list[dict] = []
+        for subject in at_risk_subjects:
+            quizzes.append({
+                "studentId": student_id,
+                "subject": subject,
+                "quizConfig": {
+                    "topics": [subject],
+                    "gradeLevel": grade_level,
+                    "numQuestions": cfg["questions"],
+                    "questionTypes": [
+                        "identification",
+                        "enumeration",
+                        "multiple_choice",
+                        "word_problem",
+                    ],
+                    "difficultyDistribution": cfg["dist"],
+                    "bloomLevels": ["remember", "understand", "apply"],
+                    "includeGraphs": False,
+                    "excludeTopics": [],
+                    "purpose": "remedial",
+                    "targetStudent": student_id,
+                },
+                "status": "pending",
+                "autoGenerated": True,
+                "reason": f'Diagnostic identified "{subject}" as At Risk',
+                "priority": "high" if overall_risk == "High" else "medium",
+                "dueInDays": 7,
+            })
+        return quizzes
+    # --- AI helpers (Hugging Face) ---
+    async def _generate_learning_path(
+        self,
+        at_risk_subjects: List[str],
+        weak_topics: List[Dict[str, Any]],
+        grade_level: str,
+    ) -> Optional[str]:
+        """Generate a personalised learning path via HF Serverless Inference."""
+        try:
+            from main import call_hf_chat
+            weakness_lines = ", ".join(at_risk_subjects)
+            topic_lines = "\n".join(
+                f"  - {t['topic']} ({t['accuracy']*100:.0f}% accuracy)"
+                for t in weak_topics[:5]
+            )
+            prompt = (
+                f"Generate a personalised math learning path for a {grade_level} student.\n\n"
+                f"Weak subjects: {weakness_lines}\n"
+                f"Weak topics:\n{topic_lines}\n\n"
+                "Create 5-7 specific activities. For each give:\n"
+                "1. Activity title\n"
+                "2. Brief description (1-2 sentences)\n"
+                "3. Estimated duration\n"
+                "4. Type (video, practice, quiz, reading, interactive)\n\n"
+                "Format as a numbered list. Be specific."
+            )
+            return call_hf_chat(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are an educational curriculum expert specialising in "
+                            "mathematics. Create clear, actionable learning paths."
+                        ),
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=1500,
+                temperature=0.7,
+            )
+        except Exception as e:
+            logger.warning(f"Learning-path AI call failed: {e}")
+            return None
+    async def _generate_teacher_interventions(
+        self,
+        risk_classifications: Dict[str, Dict[str, Any]],
+        weak_topics: List[Dict[str, Any]],
+    ) -> Optional[str]:
+        """Generate teacher intervention recommendations via HF Serverless Inference."""
+        try:
+            from main import call_hf_chat
+            at_risk = [
+                subj for subj, data in risk_classifications.items()
+                if data["status"] == "At Risk"
+            ]
+            topic_lines = "\n".join(
+                f"- {t['topic']} ({t['accuracy']*100:.0f}% accuracy)"
+                for t in weak_topics[:5]
+            )
+            prompt = (
+                "You are an educational intervention specialist. A student has completed "
+                "their diagnostic assessment with the following results:\n\n"
+                f"At-Risk Subjects: {', '.join(at_risk)}\n\n"
+                f"Weak Topics Identified:\n{topic_lines}\n\n"
+                "Generate a 'Remedial Path Timeline' with:\n"
+                "1. Prioritised list of topics to address (most critical first)\n"
+                "2. Suggested teaching strategies for each topic\n"
+                "3. Recommended one-on-one intervention activities\n"
+                "4. Timeline for reassessment\n"
+                "5. Warning signs that student needs additional support\n\n"
+                "Keep response under 300 words, structured with clear sections."
+            )
+            return call_hf_chat(
+                messages=[
+                    {
+                        "role": "system",
+                        "content": (
+                            "You are an expert educational intervention specialist. "
+                            "Provide actionable, structured recommendations for teachers."
+                        ),
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                max_tokens=1000,
+                temperature=0.5,
+            )
+        except Exception as e:
+            logger.warning(f"Teacher-intervention AI call failed: {e}")
+            return None
+# Module-level singleton
+automation_engine = MathPulseAutomationEngine()

config/env.sample ADDED Viewed

	@@ -0,0 +1,122 @@

+# DeepSeek AI API (OpenAI-compatible)
+DEEPSEEK_API_KEY=your_deepseek_api_key_here
+DEEPSEEK_BASE_URL=https://api.deepseek.com
+DEEPSEEK_MODEL=deepseek-chat
+DEEPSEEK_REASONER_MODEL=deepseek-reasoner
+# Inference provider selection
+# CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
+INFERENCE_PROVIDER=deepseek
+INFERENCE_PRO_ENABLED=true
+INFERENCE_PRO_PROVIDER=deepseek
+INFERENCE_GPU_PROVIDER=deepseek
+INFERENCE_CPU_PROVIDER=deepseek
+INFERENCE_ENABLE_PROVIDER_FALLBACK=true
+INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution
+INFERENCE_PRO_ROUTE_HEADER_NAME=
+INFERENCE_PRO_ROUTE_HEADER_VALUE=true
+# task policy sets, comma-separated
+INFERENCE_GPU_REQUIRED_TASKS=chat
+INFERENCE_CPU_ONLY_TASKS=risk_classification,analytics_aggregation,file_parsing,auth,default_cpu
+INFERENCE_INTERACTIVE_TASKS=chat,verify_solution,daily_insight
+ENABLE_LLM_RISK_RECOMMENDATIONS=true
+# local_space provider settings
+# Accepts either runtime host (https://<owner>-<space>.hf.space) or
+# Space page URL (https://huggingface.co/spaces/<owner>/<space>).
+# Example: https://huggingface.co/spaces/Deign86/mathpulse-ai
+INFERENCE_LOCAL_SPACE_URL=http://127.0.0.1:7860
+INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate
+INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180
+# HF_TOKEN kept for Hugging Face Space deployment and dataset push only
+# Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN
+HF_TOKEN=your_hf_token
+FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026
+# Prefer one of the options below for backend Firestore/Admin access in deployment:
+# FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...}
+# FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
+# DeepSeek timeout settings
+INFERENCE_HF_TIMEOUT_SEC=90
+INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
+INFERENCE_BACKGROUND_TIMEOUT_SEC=120
+# Curriculum PDF storage
+# Store the binary curriculum files in a Hugging Face dataset or Space repo,
+# then point the backend at that repo so it downloads them at build/startup time.
+CURRICULUM_SOURCE_REPO_ID=Deign86/mathpulse-curriculum
+CURRICULUM_SOURCE_REPO_TYPE=dataset
+CURRICULUM_SOURCE_REVISION=main
+# Transactional email settings for admin-created accounts
+# Primary provider: Brevo Transactional API
+BREVO_API_KEY=
+# Optional: Brevo MCP token (base64 JSON containing api_key) if BREVO_API_KEY is not set
+BREVO_MCP_TOKEN=
+# Optional SMTP fallback provider (Brevo SMTP relay)
+BREVO_SMTP_LOGIN=
+BREVO_SMTP_KEY=
+BREVO_SMTP_HOST=smtp-relay.brevo.com
+BREVO_SMTP_PORT=587
+MAIL_FROM_ADDRESS=noreply@mathpulse.ai
+MAIL_FROM_NAME=MathPulse AI
+MAIL_SEND_TIMEOUT_SEC=15
+APP_LOGIN_URL=https://mathpulse.ai
+# Optional: absolute http(s) URL used as the email header avatar image.
+# If unset, backend derives this from APP_LOGIN_URL + /avatar/avatar_icon.png.
+APP_BRAND_AVATAR_URL=
+# model defaults
+# Global default model for all tasks.
+INFERENCE_MODEL_ID=deepseek-chat
+INFERENCE_ENFORCE_QWEN_ONLY=true
+INFERENCE_QWEN_LOCK_MODEL=deepseek-chat
+INFERENCE_MAX_NEW_TOKENS=8192
+INFERENCE_TEMPERATURE=0.2
+INFERENCE_TOP_P=0.9
+INFERENCE_CHAT_MODEL_ID=deepseek-chat
+# Temporary chat-only override for experiments (clear to roll back instantly).
+# Example: Qwen/Qwen3-32B
+INFERENCE_CHAT_MODEL_TEMP_OVERRIDE=
+INFERENCE_CHAT_STRICT_MODEL_ONLY=true
+INFERENCE_CHAT_HARD_MODEL_ID=meta-llama/Meta-Llama-3-70B-Instruct
+INFERENCE_CHAT_HARD_TRIGGER_ENABLED=false
+INFERENCE_CHAT_HARD_PROMPT_CHARS=650
+INFERENCE_CHAT_HARD_HISTORY_CHARS=1500
+INFERENCE_CHAT_HARD_KEYWORDS=step-by-step,show all steps,explain each step,justify each step,derive,derivation,proof,prove,rigorous,multi-step,word problem
+CHAT_MAX_NEW_TOKENS=8192
+CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC=90
+CHAT_STREAM_TOTAL_TIMEOUT_SEC=900
+CHAT_STREAM_CONTINUATION_ENABLED=true
+CHAT_STREAM_CONTINUATION_MAX_ROUNDS=2
+CHAT_STREAM_CONTINUATION_MIN_NEW_CHARS=24
+CHAT_STREAM_CONTINUATION_TAIL_CHARS=900
+CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto
+# Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
+HF_QUIZ_MODEL_ID=
+HF_QUIZ_JSON_REPAIR_MODEL_ID=deepseek-chat
+# retry behavior
+INFERENCE_MAX_RETRIES=3
+INFERENCE_BACKOFF_SEC=1.5
+INFERENCE_INTERACTIVE_MAX_RETRIES=1
+INFERENCE_BACKGROUND_MAX_RETRIES=3
+INFERENCE_INTERACTIVE_BACKOFF_SEC=1.0
+INFERENCE_BACKGROUND_BACKOFF_SEC=1.75
+INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH=1
+# Max simultaneous blocking HF calls allowed from async endpoints.
+HF_BLOCKING_CALL_CONCURRENCY=16
+HF_ASYNC_MAX_CONNECTIONS=64
+HF_ASYNC_MAX_KEEPALIVE_CONNECTIONS=32
+HF_ASYNC_CONNECT_TIMEOUT_SEC=10.0
+HF_ASYNC_WRITE_TIMEOUT_SEC=30.0
+HF_ASYNC_POOL_TIMEOUT_SEC=10.0
+# fallback model ids, comma-separated
+INFERENCE_FALLBACK_MODELS=
+# async generation controls
+ENABLE_ASYNC_GENERATION=true
+ASYNC_TASK_TTL_SECONDS=3600
+ASYNC_TASK_MAX_ITEMS=400

config/models.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+models:
+  primary:
+    id: deepseek-chat
+    description: Default DeepSeek chat model — all chat tasks, quizzes, lessons, reasoning
+    max_new_tokens: 800
+    temperature: 0.7
+    top_p: 0.9
+  rag_primary:
+    id: deepseek-reasoner
+    description: DeepSeek reasoner — extended reasoning for complex RAG tasks
+    max_new_tokens: 1800
+    temperature: 0.2
+    top_p: 0.9
+    enable_thinking_tasks:
+      - rag_lesson
+      - verify_solution
+      - risk_narrative
+    no_thinking_tasks:
+      - chat
+      - quiz_generation
+      - learning_path
+      - daily_insight
+  embedding:
+    id: BAAI/bge-small-en-v1.5
+    description: Embedding model for RAG retrieval — curriculum vectorstore ingestion and semantic search
+    note: Not part of the generation pipeline. Read from EMBEDDING_MODEL env var only. Not swappable via admin panel.
+  model_capabilities:
+    sequential_only:
+      - deepseek-reasoner
+    supports_thinking:
+      - deepseek-reasoner
+routing:
+  task_model_map:
+    chat:                  deepseek-chat
+    verify_solution:       deepseek-reasoner
+    lesson_generation:     deepseek-chat
+    quiz_generation:       deepseek-chat
+    learning_path:         deepseek-chat
+    daily_insight:         deepseek-chat
+    risk_classification:   deepseek-chat
+    risk_narrative:        deepseek-reasoner
+    rag_lesson:            deepseek-reasoner
+    rag_problem:           deepseek-chat
+    rag_analysis_context:  deepseek-chat
+  task_fallback_model_map:
+    chat:
+      - deepseek-chat
+    verify_solution:
+      - deepseek-chat
+    lesson_generation:
+      - deepseek-chat
+    quiz_generation:
+      - deepseek-chat
+    learning_path:
+      - deepseek-chat
+    daily_insight:
+      - deepseek-chat
+    risk_classification:
+      - deepseek-chat
+    risk_narrative:
+      - deepseek-chat
+    rag_lesson:
+      - deepseek-chat
+    rag_problem:
+      - deepseek-chat
+    rag_analysis_context:
+      - deepseek-chat
+  task_provider_map:
+    chat:                  deepseek
+    verify_solution:       deepseek
+    lesson_generation:     deepseek
+    quiz_generation:       deepseek
+    learning_path:         deepseek
+    daily_insight:         deepseek
+    risk_classification:   deepseek
+    risk_narrative:        deepseek
+    rag_lesson:            deepseek
+    rag_problem:           deepseek
+    rag_analysis_context:  deepseek

datasets/curriculum/.gitkeep ADDED Viewed

File without changes

datasets/sample_curriculum.json ADDED Viewed

	@@ -0,0 +1,137 @@

+[
+  {
+    "content": "The learner demonstrates understanding of key concepts of functions. Functions can be represented as ordered pairs, tables of values, graphs, and equations. A function is a relation where each element in the domain corresponds to exactly one element in the range. Key types include linear functions (f(x)=mx+b), quadratic functions (f(x)=ax^2+bx+c), and polynomial functions of higher degrees.",
+    "subject": "General Mathematics",
+    "quarter": 1,
+    "content_domain": "Functions and Their Graphs",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 1
+  },
+  {
+    "content": "Learning Competency (M11GM-Ia-1): Represents real-life situations using functions, including piece-wise functions. Example: A taxi fare is computed as P40 for the first 500 meters plus P3.50 for every additional 300 meters or fraction thereof. This is a piecewise function where f(d)=40 for d<=500 and f(d)=40+3.5*ceil((d-500)/300) for d>500.",
+    "subject": "General Mathematics",
+    "quarter": 1,
+    "content_domain": "Functions and Their Graphs",
+    "chunk_type": "learning_competency",
+    "source_file": "sample_curriculum.json",
+    "page": 1
+  },
+  {
+    "content": "Learning Competency (M11GM-Ia-2): Evaluates a function. To evaluate f(x) at x=a, substitute a for every occurrence of x in the expression and simplify. Example: Given f(x)=2x^2-3x+5, evaluate f(2): f(2)=2(4)-3(2)+5=8-6+5=7.",
+    "subject": "General Mathematics",
+    "quarter": 1,
+    "content_domain": "Functions and Their Graphs",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 2
+  },
+  {
+    "content": "Rational Functions have the form f(x)=P(x)/Q(x) where P(x) and Q(x) are polynomials and Q(x)!=0. Key features: vertical asymptotes occur where Q(x)=0 but P(x)!=0; horizontal asymptotes depend on the degrees of P and Q. The domain of f(x) excludes all x-values that make the denominator zero. Solving rational equations and inequalities requires careful handling of the denominator signs.",
+    "subject": "General Mathematics",
+    "quarter": 1,
+    "content_domain": "Rational Functions",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 3
+  },
+  {
+    "content": "Learning Competency (M11GM-Ib-3): Solves problems involving rational functions, rational equations, and rational inequalities. Example: A jeepney operator's average revenue per trip is modeled by R(n)=(5000+300n)/n where n is the number of trips per day. Find how many trips are needed for average revenue to reach P450.",
+    "subject": "General Mathematics",
+    "quarter": 1,
+    "content_domain": "Rational Functions",
+    "chunk_type": "learning_competency",
+    "source_file": "sample_curriculum.json",
+    "page": 3
+  },
+  {
+    "content": "Exponential Functions f(x)=a*b^x (a!=0, b>0, b!=1) model growth and decay. Key properties: domain is all real numbers; range is (0,infinity) for a>0; horizontal asymptote at y=0; y-intercept at (0,a). Solving exponential equations involves expressing both sides with the same base and equating exponents. Philippine applications include bacterial growth and radioactive decay in medical contexts.",
+    "subject": "General Mathematics",
+    "quarter": 2,
+    "content_domain": "Exponential Functions",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 4
+  },
+  {
+    "content": "Compound Interest is calculated using A=P(1+r/n)^(nt) where A is the final amount, P is the principal, r is the annual interest rate (decimal), n is the number of compounding periods per year, and t is the time in years. Philippine banks offer savings and loan products with various compounding frequencies: annually (n=1), semi-annually (n=2), quarterly (n=4), monthly (n=12).",
+    "subject": "General Mathematics",
+    "quarter": 3,
+    "content_domain": "Business Mathematics",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 5
+  },
+  {
+    "content": "Learning Competency (M11GM-IIc-1): Illustrates simple and compound interests. Simple interest I=Prt where P is principal, r is rate, t is time. Compound interest uses compounding formula. Example: Juana deposits P50,000 in a bank offering 3.5% interest compounded quarterly. After 3 years, her balance will be A=50000(1+0.035/4)^(4*3)=55543.19 using the compound interest formula.",
+    "subject": "General Mathematics",
+    "quarter": 3,
+    "content_domain": "Business Mathematics",
+    "chunk_type": "learning_competency",
+    "source_file": "sample_curriculum.json",
+    "page": 5
+  },
+  {
+    "content": "Annuities are sequences of equal payments made at equal time intervals. The future value of an ordinary annuity (payment at end of period) is FV=PMT*[(1+r)^n-1]/r and present value is PV=PMT*[1-(1+r)^(-n)]/r. Applications include Pag-IBIG housing loans, SSS contributions, and insurance premiums. Philippine context problems often involve 15-year and 25-year housing loans.",
+    "subject": "General Mathematics",
+    "quarter": 3,
+    "content_domain": "Business Mathematics",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 6
+  },
+  {
+    "content": "Stocks and Bonds represent two types of investments. Stocks represent ownership shares in a corporation with dividends as earnings — prices are quoted per share in the Philippine Stock Exchange (PSE). Bonds are debt instruments where the issuing entity borrows money and pays periodic interest then repays principal at maturity. Key computations: stock yield = annual dividend per share / market price; bond yield = annual interest payment / market price.",
+    "subject": "General Mathematics",
+    "quarter": 3,
+    "content_domain": "Business Mathematics",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 6
+  },
+  {
+    "content": "A Random Variable is a function that assigns a real number to each outcome in the sample space of a random experiment. A Discrete Random Variable has a countable number of possible values. The probability mass function (PMF) gives the probability P(X=x) for each value x. Key properties: sum of all P(X=x)=1 and P(X=x)>=0 for all x. Common discrete distributions include Binomial for success/failure and Poisson for rare events.",
+    "subject": "Statistics and Probability",
+    "quarter": 1,
+    "content_domain": "Random Variables and Probability Distributions",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 7
+  },
+  {
+    "content": "Learning Competency (M11/12SP-IIIa-1): Illustrates a random variable (discrete and continuous). A discrete random variable takes countable values like the number of defective items in a batch of 50 bulbs. A continuous random variable takes infinite uncountable values in an interval, such as the height of Grade 11 students in centimeters. The learner distinguishes between discrete and continuous random variables for real Philippine data.",
+    "subject": "Statistics and Probability",
+    "quarter": 1,
+    "content_domain": "Random Variables and Probability Distributions",
+    "chunk_type": "learning_competency",
+    "source_file": "sample_curriculum.json",
+    "page": 7
+  },
+  {
+    "content": "The Normal Distribution (Gaussian) is a continuous probability distribution with a bell-shaped curve symmetric about the mean mu. Standard normal distribution has mu=0 and sigma=1; converting to standard normal z=(x-mu)/sigma allows probability calculation using z-tables. Properties: 68% of data within 1 sigma of mu, 95% within 2 sigma, 99.7% within 3 sigma. Philippine applications include standardized test scores (NAT, college entrance exams) and quality control in manufacturing.",
+    "subject": "Statistics and Probability",
+    "quarter": 1,
+    "content_domain": "Random Variables and Probability Distributions",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 8
+  },
+  {
+    "content": "Conic Sections are curves formed by the intersection of a plane and a double-napped cone. The four types are: Circle (all points equidistant from a center), Parabola (all points equidistant from a focus and directrix), Ellipse (sum of distances to two foci is constant), and Hyperbola (absolute difference of distances to two foci is constant). Standard forms: Circle (x-h)^2+(y-k)^2=r^2; Parabola (x-h)^2=4p(y-k) or (y-k)^2=4p(x-h).",
+    "subject": "Pre-Calculus",
+    "quarter": 1,
+    "content_domain": "Analytic Geometry",
+    "chunk_type": "content_explanation",
+    "source_file": "sample_curriculum.json",
+    "page": 9
+  },
+  {
+    "content": "Learning Competency (STEM_PC11AG-Ia-1): Illustrates the different types of conic sections: circle, parabola, ellipse, and hyperbola. The learner identifies conic sections from their standard equations and determines their key properties including center, radius (for circles), vertex, focus, directrix (for parabolas), and asymptotes (for hyperbolas). Real-world applications include satellite dishes, telescope mirrors, and bridge arch designs.",
+    "subject": "Pre-Calculus",
+    "quarter": 1,
+    "content_domain": "Analytic Geometry",
+    "chunk_type": "learning_competency",
+    "source_file": "sample_curriculum.json",
+    "page": 9
+  }
+]

main.py ADDED Viewed

The diff for this file is too large to render. See raw diff

models/.gitkeep ADDED Viewed

File without changes

pre_deploy_check.py ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/usr/bin/env python3
+"""
+Pre-deployment validation script for MathPulse AI backend.
+This script runs BEFORE deployment to catch issues early and prevent
+restart loops on HF Spaces.
+Usage:
+    python backend/pre_deploy_check.py
+Exit codes:
+    0: All checks passed, safe to deploy
+    1: Critical issue found, deployment should be blocked
+"""
+import sys
+import os
+# Add repo root to path (for services/ delegation) AND backend to path
+_repo_root = os.path.dirname(os.path.abspath(__file__))
+_parent = os.path.dirname(_repo_root)
+_backend = _repo_root
+# Add in order: parent first (so services/ can delegate), then backend (for when services/__init__.py tries to import)
+if _parent not in sys.path:
+    sys.path.insert(0, _parent)
+if _backend not in sys.path:
+    sys.path.insert(0, _backend)
+def main() -> int:
+    """Run pre-deployment checks."""
+    print("=" * 70)
+    print("🔍 PRE-DEPLOYMENT VALIDATION - Backend will run these checks")
+    print("=" * 70)
+    print()
+    try:
+        # Import the validation module
+        from backend.startup_validation import (
+            validate_imports,
+            validate_environment,
+            validate_config_files,
+            validate_file_structure,
+            validate_inference_client_config,
+        )
+        print("Running pre-deployment checks...\n")
+        validate_file_structure()
+        print()
+        validate_imports()
+        print()
+        validate_environment()
+        print()
+        validate_config_files()
+        print()
+        validate_inference_client_config()
+        print()
+        print("=" * 70)
+        print("✅ PRE-DEPLOYMENT VALIDATION PASSED")
+        print("=" * 70)
+        print()
+        print("Backend is ready for deployment to HF Spaces.")
+        print()
+        return 0
+    except Exception as e:
+        print()
+        print("=" * 70)
+        print("❌ PRE-DEPLOYMENT VALIDATION FAILED")
+        print("=" * 70)
+        print()
+        print(f"Error: {e}")
+        print()
+        print("🛑 BLOCK DEPLOYMENT - Fix errors above before pushing to main branch")
+        print()
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

rag/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Curriculum RAG package for DepEd-grounded retrieval utilities."""
+from .curriculum_rag import (
+    retrieve_curriculum_context,
+    build_lesson_prompt,
+    build_problem_generation_prompt,
+    build_analysis_curriculum_context,
+    build_lesson_query,
+    format_retrieved_chunks,
+    summarize_retrieval_confidence,
+)
+from .vectorstore_loader import reset_vectorstore_singleton
+__all__ = [
+    "retrieve_curriculum_context",
+    "build_lesson_prompt",
+    "build_problem_generation_prompt",
+    "build_analysis_curriculum_context",
+    "build_lesson_query",
+    "format_retrieved_chunks",
+    "summarize_retrieval_confidence",
+    "reset_vectorstore_singleton",
+]

rag/curriculum_rag.py ADDED Viewed

	@@ -0,0 +1,318 @@

+"""
+Updated curriculum RAG with exact match retrieval and 7-section notebook output.
+"""
+from __future__ import annotations
+from typing import Dict, List, Optional, Tuple
+def _to_where(
+    subject: Optional[str] = None,
+    quarter: Optional[int] = None,
+    content_domain: Optional[str] = None,
+    chunk_type: Optional[str] = None,
+    module_id: Optional[str] = None,
+    lesson_id: Optional[str] = None,
+    competency_code: Optional[str] = None,
+    storage_path: Optional[str] = None,
+) -> Optional[Dict[str, object]]:
+    clauses = []
+    if subject:
+        clauses.append({"subject": {"$eq": subject}})
+    if quarter is not None:
+        clauses.append({"quarter": {"$eq": int(quarter)}})
+    if content_domain:
+        clauses.append({"content_domain": {"$eq": content_domain}})
+    if chunk_type:
+        clauses.append({"chunk_type": {"$eq": chunk_type}})
+    if module_id:
+        clauses.append({"module_id": {"$eq": module_id}})
+    if lesson_id:
+        clauses.append({"lesson_id": {"$eq": lesson_id}})
+    if competency_code:
+        clauses.append({"competency_code": {"$eq": competency_code}})
+    if storage_path:
+        clauses.append({"storage_path": {"$eq": storage_path}})
+    if not clauses:
+        return None
+    if len(clauses) == 1:
+        return clauses[0]
+    return {"$and": clauses}
+def _distance_to_score(distance: float) -> float:
+    return round(1.0 / (1.0 + max(distance, 0.0)), 4)
+def retrieve_curriculum_context(
+    query: str,
+    subject: str | None = None,
+    quarter: int | None = None,
+    content_domain: str | None = None,
+    chunk_type: str | None = None,
+    module_id: str | None = None,
+    lesson_id: str | None = None,
+    competency_code: str | None = None,
+    storage_path: str | None = None,
+    top_k: int = 8,
+) -> list[dict]:
+    from backend.rag.vectorstore_loader import get_vectorstore_components
+    _, collection, embedder = get_vectorstore_components()
+    where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
+    prefixed_query = f"Represent this sentence for searching relevant passages: {query}"
+    query_embedding = embedder.encode(
+        prefixed_query,
+        normalize_embeddings=True,
+    ).tolist()
+    result = collection.query(
+        query_embeddings=[query_embedding],
+        n_results=max(1, top_k),
+        where=where,
+        include=["documents", "metadatas", "distances"],
+    )
+    documents = (result.get("documents") or [[]])[0]
+    metadatas = (result.get("metadatas") or [[]])[0]
+    distances = (result.get("distances") or [[]])[0]
+    rows: List[dict] = []
+    for idx, content in enumerate(documents):
+        md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
+        distance = float(distances[idx]) if idx < len(distances) else 1.0
+        rows.append({
+            "content": str(content or ""),
+            "subject": str(md.get("subject") or "unknown"),
+            "quarter": int(md.get("quarter") or 0),
+            "content_domain": str(md.get("content_domain") or "general"),
+            "chunk_type": str(md.get("chunk_type") or "concept"),
+            "source_file": str(md.get("source_file") or ""),
+            "storage_path": str(md.get("storage_path") or ""),
+            "module_id": str(md.get("module_id") or ""),
+            "lesson_id": str(md.get("lesson_id") or ""),
+            "competency_code": str(md.get("competency_code") or ""),
+            "page": int(md.get("page") or 0),
+            "score": _distance_to_score(distance),
+        })
+    return rows
+def build_exact_lesson_query(
+    topic: str,
+    subject: str,
+    quarter: int,
+    lesson_title: str | None = None,
+    competency: str | None = None,
+    module_unit: str | None = None,
+    learner_level: str | None = None,
+    competency_code: str | None = None,
+) -> str:
+    parts = [topic, subject, f"Quarter {quarter}"]
+    for value in (lesson_title, competency, module_unit, learner_level, competency_code):
+        clean = str(value or "").strip()
+        if clean:
+            parts.append(clean)
+    return " | ".join(parts)
+def build_lesson_query(
+    topic: str,
+    subject: str,
+    quarter: int,
+    *,
+    lesson_title: Optional[str] = None,
+    competency: Optional[str] = None,
+    module_unit: Optional[str] = None,
+    learner_level: Optional[str] = None,
+) -> str:
+    parts = [topic, subject, f"Quarter {quarter}"]
+    for value in (lesson_title, competency, module_unit, learner_level):
+        clean_value = str(value or "").strip()
+        if clean_value:
+            parts.append(clean_value)
+    return " | ".join(parts)
+def retrieve_lesson_pdf_context(
+    topic: str,
+    subject: str,
+    quarter: int,
+    lesson_title: str | None = None,
+    competency: str | None = None,
+    module_id: str | None = None,
+    lesson_id: str | None = None,
+    competency_code: str | None = None,
+    storage_path: str | None = None,
+    top_k: int = 8,
+) -> Tuple[list[dict], str]:
+    """Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query."""
+    if storage_path:
+        exact_chunks = retrieve_curriculum_context(
+            query=topic,
+            subject=subject,
+            quarter=quarter,
+            storage_path=storage_path,
+            top_k=top_k,
+        )
+        if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
+            return exact_chunks, "exact"
+    general_chunks = retrieve_curriculum_context(
+        query=topic,
+        subject=subject,
+        quarter=quarter,
+        top_k=top_k,
+    )
+    if storage_path and exact_chunks:
+        all_chunks = exact_chunks + general_chunks
+        seen = set()
+        deduped = []
+        for c in all_chunks:
+            key = f"{c.get('source_file')}:{c.get('page')}:{c.get('content', '')[:60]}"
+            if key not in seen:
+                seen.add(key)
+                deduped.append(c)
+        deduped.sort(key=lambda x: x.get("score", 0), reverse=True)
+        return deduped[:top_k], "hybrid"
+    return general_chunks, "general"
+def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
+    refs = []
+    for i, chunk in enumerate(curriculum_chunks, start=1):
+        refs.append(
+            f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
+            f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
+            f"   Excerpt: {chunk.get('content', '')}"
+        )
+    return "\n".join(refs) if refs else "No curriculum context retrieved."
+def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
+    if not curriculum_chunks:
+        return {"confidence": 0.0, "band": "low"}
+    top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
+    score = sum(top_scores) / max(1, len(top_scores))
+    band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
+    return {"confidence": round(score, 3), "band": band}
+def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
+    """Organize retrieved chunks into lesson section categories."""
+    sections: Dict[str, List[dict]] = {
+        "introduction": [],
+        "key_concepts": [],
+        "worked_examples": [],
+        "important_notes": [],
+        "practice": [],
+        "summary": [],
+        "assessment": [],
+        "general": [],
+    }
+    domain_priority = {
+        "introduction": 1, "key_concepts": 2, "worked_examples": 3,
+        "important_notes": 4, "practice": 5, "summary": 6,
+        "assessment": 7, "general": 8,
+    }
+    for chunk in chunks:
+        domain = chunk.get("content_domain", "general")
+        if domain in sections:
+            sections[domain].append(chunk)
+        else:
+            sections["general"].append(chunk)
+    return sections
+def build_lesson_prompt(
+    *,
+    lesson_title: str,
+    competency: str,
+    grade_level: str,
+    subject: str,
+    quarter: int,
+    learner_level: Optional[str],
+    module_unit: Optional[str],
+    curriculum_chunks: list[dict],
+    competency_code: Optional[str] = None,
+) -> str:
+    refs_text = format_retrieved_chunks(curriculum_chunks)
+    organized = organize_chunks_by_section(curriculum_chunks)
+    return (
+        "You are a DepEd-aligned Grade 11-12 mathematics instructional designer.\n"
+        "Generate a lesson in JSON format. Use ONLY the retrieved curriculum evidence below.\n"
+        "Do NOT invent content. Do NOT add generic motivational text. All content must be grounded in the retrieved excerpts.\n\n"
+        f"Lesson title: {lesson_title}\n"
+        f"Competency code: {competency_code or 'n/a'}\n"
+        f"Curriculum competency: {competency}\n"
+        f"Grade level: {grade_level}\n"
+        f"Subject: {subject}\n"
+        f"Quarter: Q{quarter}\n"
+        f"Learner level: {learner_level or 'Grade 11-12'}\n"
+        f"Module/unit: {module_unit or 'n/a'}\n\n"
+        "[CURRICULUM CONTEXT]\n"
+        f"{refs_text}\n\n"
+        "Return ONLY valid JSON with this exact structure. All 7 sections are required:\n"
+        "{\n"
+        '  "sections": [\n'
+        '    {"type": "introduction",    "title": "Introduction",       "content": "..."},\n'
+        '    {"type": "key_concepts",    "title": "Key Concepts",      "content": "...", "callouts": [{"type":"important|ti..."}]\n},'
+        '    {"type": "video",           "title": "Video Lesson",      "content": "...", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},\n'
+        '    {"type": "worked_examples",  "title": "Worked Examples",    "examples": [{"problem":"...","steps":["Step 1: ...","Step 2: ..."],"answer":"..."}]},\n'
+        '    {"type": "important_notes",  "title": "Important Notes",   "bulletPoints": ["...","..."]},\n'
+        '    {"type": "try_it_yourself", "title": "Try It Yourself",   "practiceProblems": [{"question":"...","solution":"..."}]},\n'
+        '    {"type": "summary",         "title": "Summary",           "content": "..."}\n'
+        "  ],\n"
+        '  "needsReview": false\n'
+        "}\n\n"
+        "Rules:\n"
+        "- content in introduction, key_concepts, important_notes, summary: use paragraph/bullet text grounded in retrieved chunks\n"
+        "- examples must reflect actual content from the retrieved curriculum (real formulas, real contexts)\n"
+        "- practiceProblems should be derivable from worked examples\n"
+        "- callouts: type is 'important', 'tip', or 'warning'\n"
+        "- video section: content is a brief sentence, leave videoId empty (will be filled by backend)\n"
+        "- Do not use placeholder text like 'placeholder' or 'example text'\n"
+        "- Do not fabricate worked examples - use actual curriculum content\n"
+    )
+def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
+    refs = []
+    for i, chunk in enumerate(curriculum_chunks, start=1):
+        refs.append(
+            f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
+            f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
+        )
+    refs_text = "\n".join(refs) if refs else "No curriculum context retrieved."
+    return (
+        "Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
+        "Do not include topics outside the competency context.\n\n"
+        f"Topic: {topic}\n"
+        f"Difficulty: {difficulty}\n\n"
+        "[CURRICULUM CONTEXT]\n"
+        f"{refs_text}\n\n"
+        "Return JSON with keys: problem, solution, competencyReference"
+    )
+def build_analysis_curriculum_context(weak_topics: list[str], subject: str) -> list[dict]:
+    dedup: Dict[str, dict] = {}
+    for weak_topic in weak_topics:
+        rows = retrieve_curriculum_context(
+            query=f"DepEd learning competency for {weak_topic}",
+            subject=subject,
+            chunk_type="learning_competency",
+            top_k=2,
+        )
+        for row in rows:
+            key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content', '')[:80]}"
+            if key not in dedup:
+                dedup[key] = row
+    return list(dedup.values())

rag/firebase_storage_loader.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""
+Firebase Storage PDF loader for curriculum ingestion.
+Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
+"""
+from __future__ import annotations
+import logging
+import os
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+logger = logging.getLogger("mathpulse.fb_storage_loader")
+_FIREBASE_INITIALIZED = False
+def _init_firebase_storage() -> Tuple[any, any]:
+    global _FIREBASE_INITIALIZED
+    if _FIREBASE_INITIALIZED:
+        try:
+            from firebase_admin import storage as fb_storage
+            bucket = fb_storage.bucket()
+            return fb_storage, bucket
+        except Exception as e:
+            logger.warning("Firebase storage unavailable: %s", e)
+            return None, None
+    try:
+        import firebase_admin
+        from firebase_admin import credentials, storage
+    except ImportError:
+        logger.warning("firebase_admin not installed")
+        return None, None
+    if firebase_admin._apps:
+        _FIREBASE_INITIALIZED = True
+        bucket = storage.bucket()
+        return storage, bucket
+    sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
+    sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
+    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
+    try:
+        if sa_json:
+            import json as _json
+            creds = credentials.Certificate(_json.loads(sa_json))
+        elif sa_file and Path(sa_file).exists():
+            creds = credentials.Certificate(sa_file)
+        else:
+            creds = credentials.ApplicationDefault()
+        firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
+        _FIREBASE_INITIALIZED = True
+        bucket = storage.bucket()
+        return storage, bucket
+    except Exception as e:
+        logger.warning("Firebase init failed: %s", e)
+        return None, None
+def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
+    """Download a PDF from Firebase Storage and return its bytes."""
+    _, bucket = _init_firebase_storage()
+    if bucket is None:
+        logger.warning("Firebase Storage not available, skipping download")
+        return None
+    try:
+        blob = bucket.blob(storage_path)
+        if not blob.exists():
+            logger.warning("Blob does not exist: %s", storage_path)
+            return None
+        bytes_data = blob.download_as_bytes()
+        logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))
+        if dest_path:
+            Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
+            with open(dest_path, "wb") as f:
+                f.write(bytes_data)
+            logger.info("Saved to %s", dest_path)
+        return bytes_data
+    except Exception as e:
+        logger.error("Failed to download %s: %s", storage_path, e)
+        return None
+def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
+    """List all blobs under a prefix in Firebase Storage."""
+    _, bucket = _init_firebase_storage()
+    if bucket is None:
+        return []
+    blobs = bucket.list_blobs(prefix=prefix)
+    result = []
+    for blob in blobs:
+        if blob.name.endswith(".pdf"):
+            result.append({
+                "name": blob.name,
+                "size": blob.size,
+                "updated": str(blob.updated) if blob.updated else None,
+                "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
+            })
+    return result
+PDF_METADATA: Dict[str, dict] = {
+    "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "curriculum_guide",
+        "content_domain": "general",
+        "quarter": 1,
+        "storage_path": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
+    },
+    "curriculum/finite_math/Finite-Mathematics-1-1.pdf": {
+        "subject": "Finite Mathematics 1",
+        "subjectId": "finite-math-1",
+        "type": "curriculum_guide",
+        "content_domain": "finite_math",
+        "quarter": 1,
+        "storage_path": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
+    },
+    "curriculum/finite_math/Finite-Mathematics-2-1.pdf": {
+        "subject": "Finite Mathematics 2",
+        "subjectId": "finite-math-2",
+        "type": "curriculum_guide",
+        "content_domain": "finite_math",
+        "quarter": 1,
+        "storage_path": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
+    },
+    "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "type": "sdo_module",
+        "content_domain": "general",
+        "quarter": 1,
+        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
+    },
+    "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": {
+        "subject": "Business Mathematics",
+        "subjectId": "business-math",
+        "type": "sdo_module",
+        "content_domain": "business",
+        "quarter": 1,
+        "storage_path": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
+    },
+    "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": {
+        "subject": "Organization and Management",
+        "subjectId": "org-mgmt",
+        "type": "sdo_module",
+        "content_domain": "org_management",
+        "quarter": 1,
+        "storage_path": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
+    },
+    "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf": {
+        "subject": "Statistics and Probability",
+        "subjectId": "stats-prob",
+        "type": "sdo_module",
+        "content_domain": "statistics",
+        "quarter": 1,
+        "storage_path": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf",
+    },
+}

rag/vectorstore_loader.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from __future__ import annotations
+import os
+from pathlib import Path
+from threading import Lock
+from typing import Any, Dict, Tuple
+import chromadb
+from sentence_transformers import SentenceTransformer
+_VECTORSTORE_LOCK = Lock()
+_VECTORSTORE_SINGLETON: Tuple[Any, Any, SentenceTransformer] | None = None
+def reset_vectorstore_singleton() -> None:
+    global _VECTORSTORE_SINGLETON
+    with _VECTORSTORE_LOCK:
+        _VECTORSTORE_SINGLETON = None
+def _resolve_vectorstore_dir() -> Path:
+    raw = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
+    path = Path(raw)
+    if path.is_absolute():
+        return path
+    cwd_candidate = Path.cwd() / path
+    if cwd_candidate.exists() or str(Path.cwd()).endswith("MATHPULSE-AI"):
+        return cwd_candidate
+    backend_candidate = Path(__file__).resolve().parents[2] / path
+    return backend_candidate
+def get_vectorstore_components(
+    collection_name: str = "curriculum_chunks",
+    model_name: str = "BAAI/bge-base-en-v1.5",
+):
+    global _VECTORSTORE_SINGLETON
+    if _VECTORSTORE_SINGLETON is None:
+        with _VECTORSTORE_LOCK:
+            if _VECTORSTORE_SINGLETON is None:
+                vectorstore_dir = _resolve_vectorstore_dir()
+                vectorstore_dir.mkdir(parents=True, exist_ok=True)
+                client = chromadb.PersistentClient(path=str(vectorstore_dir))
+                collection = client.get_or_create_collection(
+                    name=collection_name,
+                    metadata={"hnsw:space": "cosine"},
+                )
+                embedder = SentenceTransformer(model_name)
+                _VECTORSTORE_SINGLETON = (client, collection, embedder)
+    return _VECTORSTORE_SINGLETON
+def get_vectorstore_health() -> Dict[str, Any]:
+    _, collection, _ = get_vectorstore_components()
+    payload = collection.get(include=["metadatas"])
+    metadatas = payload.get("metadatas") or []
+    subjects: Dict[str, int] = {}
+    for md in metadatas:
+        if not isinstance(md, dict):
+            continue
+        subject = str(md.get("subject") or "unknown")
+        subjects[subject] = subjects.get(subject, 0) + 1
+    return {
+        "chunkCount": len(payload.get("ids") or []),
+        "subjects": subjects,
+        "vectorstoreDir": str(_resolve_vectorstore_dir()),
+    }

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+-r requirements.txt
+mypy>=1.11.0
+pytest>=8.3.0

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+openai>=1.0.0
+huggingface-hub>=0.31.0
+requests>=2.31.0
+pandas==2.2.3
+openpyxl==3.1.5
+pdfplumber==0.11.5
+chromadb>=0.5.0
+sentence-transformers>=3.0.0
+langchain-text-splitters>=0.3.0
+python-docx==1.1.2
+python-multipart>=0.0.6
+sympy==1.13.3
+matplotlib==3.10.0
+scikit-learn==1.6.1
+joblib==1.4.2
+scipy==1.15.1
+numpy==2.2.1
+firebase-admin>=6.2.0
+redis[hiredis]>=5.0.0
+PyYAML>=6.0.0
+mypy>=1.20.0
+pytest>=9.0.0

routes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Backend route modules."""

routes/admin_model_routes.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from fastapi import APIRouter, Depends, HTTPException, Request
+from pydantic import BaseModel
+from services.inference_client import (
+    set_runtime_model_profile, set_runtime_model_override,
+    reset_runtime_overrides, get_current_runtime_config, _MODEL_PROFILES,
+)
+router = APIRouter(prefix="/api/admin/model-config", tags=["admin"])
+ALLOWED_OVERRIDE_KEYS = {
+    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
+    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
+}
+def require_admin(request: Request):
+    user = getattr(request.state, "user", None)
+    if user is None:
+        raise HTTPException(status_code=401, detail="Authentication required")
+    if user.role != "admin":
+        raise HTTPException(status_code=403, detail="Admin access required")
+    return user
+class ProfileSwitchRequest(BaseModel):
+    profile: str
+class OverrideRequest(BaseModel):
+    key: str
+    value: str
+@router.get("")
+def get_model_config(_admin=Depends(require_admin)):
+    return {
+        **get_current_runtime_config(),
+        "availableProfiles": list(_MODEL_PROFILES.keys()),
+        "profileDescriptions": {
+            "dev":    "deepseek-chat everywhere - fast, $0.14/M input",
+            "budget": "deepseek-chat for all tasks - minimal cost",
+            "prod":   "deepseek-reasoner for RAG, deepseek-chat for chat - best quality",
+        },
+    }
+@router.post("/profile")
+def switch_profile(req: ProfileSwitchRequest, _admin=Depends(require_admin)):
+    try:
+        set_runtime_model_profile(req.profile)
+        return {"success": True, "applied": get_current_runtime_config()}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@router.post("/override")
+def set_override(req: OverrideRequest, _admin=Depends(require_admin)):
+    if req.key not in ALLOWED_OVERRIDE_KEYS:
+        raise HTTPException(status_code=400, detail=f"Key '{req.key}' is not overridable.")
+    set_runtime_model_override(req.key, req.value)
+    return {"success": True, "applied": get_current_runtime_config()}
+@router.delete("/reset")
+def reset_to_env(_admin=Depends(require_admin)):
+    reset_runtime_overrides()
+    return {"success": True, "current": get_current_runtime_config()}

routes/diagnostic.py ADDED Viewed

	@@ -0,0 +1,710 @@

+"""
+MathPulse AI - Diagnostic Assessment Router
+POST /api/diagnostic/generate - Generate 15-item diagnostic test grounded in RAG curriculum
+POST /api/diagnostic/submit  - Score responses, run risk analysis, save to Firestore
+"""
+from __future__ import annotations
+import json
+import logging
+import traceback
+import uuid
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+from fastapi import APIRouter, HTTPException, Request
+from pydantic import BaseModel, Field
+from services.ai_client import CHAT_MODEL, get_deepseek_client
+from rag.curriculum_rag import retrieve_curriculum_context
+logger = logging.getLogger("mathpulse.diagnostic")
+router = APIRouter(prefix="/api/diagnostic", tags=["diagnostic"])
+# ─── Pydantic Models ───────────────────────────────────────────────
+class DiagnosticGenerateRequest(BaseModel):
+    strand: str = Field(..., description="Student strand: ABM, STEM, HUMSS, GAS, TVL")
+    grade_level: str = Field(..., description="Grade level: Grade 11 or Grade 12")
+class DiagnosticOption(BaseModel):
+    A: str
+    B: str
+    C: str
+    D: str
+class DiagnosticQuestionStripped(BaseModel):
+    question_id: str
+    competency_code: str
+    domain: str
+    topic: str
+    difficulty: str
+    bloom_level: str
+    question_text: str
+    options: DiagnosticOption
+    curriculum_reference: str
+class DiagnosticGenerateResponse(BaseModel):
+    test_id: str
+    questions: List[DiagnosticQuestionStripped]
+    total_items: int
+    estimated_minutes: float
+class DiagnosticResponseItem(BaseModel):
+    question_id: str
+    student_answer: str
+    time_spent_seconds: int
+class DiagnosticSubmitRequest(BaseModel):
+    test_id: str
+    responses: List[DiagnosticResponseItem]
+class MasterySummary(BaseModel):
+    mastered: List[str]
+    developing: List[str]
+    beginning: List[str]
+class DiagnosticSubmitResponse(BaseModel):
+    success: bool
+    overall_risk: str
+    overall_score_percent: float
+    mastery_summary: MasterySummary
+    recommended_intervention: str
+    xp_earned: int
+    badge_unlocked: str
+    redirect_to: str
+# ─── Competency Code Registry ───────────────────────────────────────
+COMPETENCY_REGISTRY = {
+    "NA-WAGE-01": {"subject": "General Mathematics", "title": "Wages, Salaries, Overtime, Commissions, VAT"},
+    "NA-SEQ-01": {"subject": "General Mathematics", "title": "Arithmetic Sequences and Series"},
+    "NA-SEQ-02": {"subject": "General Mathematics", "title": "Geometric Sequences and Series"},
+    "NA-FUNC-01": {"subject": "General Mathematics", "title": "Functions, Relations, Vertical Line Test"},
+    "NA-FUNC-02": {"subject": "General Mathematics", "title": "Evaluating Functions, Operations, Composition"},
+    "NA-FUNC-03": {"subject": "General Mathematics", "title": "One-to-One Functions, Inverse Functions"},
+    "NA-EXP-01": {"subject": "General Mathematics", "title": "Exponential Functions, Equations, Inequalities"},
+    "NA-LOG-01": {"subject": "General Mathematics", "title": "Logarithmic Functions"},
+    "MG-TRIG-01": {"subject": "General Mathematics", "title": "Trigonometric Ratios, Right Triangles"},
+    "NA-FIN-01": {"subject": "General Mathematics", "title": "Compound Interest, Maturity Value"},
+    "NA-FIN-02": {"subject": "General Mathematics", "title": "Simple and General Annuities"},
+    "NA-FIN-04": {"subject": "General Mathematics", "title": "Business and Consumer Loans, Amortization"},
+    "NA-LOGIC-01": {"subject": "General Mathematics", "title": "Logical Propositions, Connectives, Truth Tables"},
+    "BM-FDP-01": {"subject": "Business Mathematics", "title": "Fractions, Decimals, Percent Conversions"},
+    "BM-FDP-02": {"subject": "Business Mathematics", "title": "Proportion: Direct, Inverse, Partitive"},
+    "BM-BUS-01": {"subject": "Business Mathematics", "title": "Markup, Margin, Trade Discounts, VAT"},
+    "BM-BUS-02": {"subject": "Business Mathematics", "title": "Profit, Loss, Break-even Point"},
+    "BM-COMM-01": {"subject": "Business Mathematics", "title": "Straight Commission, Salary Plus Commission"},
+    "BM-COMM-02": {"subject": "Business Mathematics", "title": "Commission on Cash and Installment Basis"},
+    "BM-SW-01": {"subject": "Business Mathematics", "title": "Salary vs. Wage, Income"},
+    "BM-SW-03": {"subject": "Business Mathematics", "title": "Mandatory Deductions: SSS, PhilHealth, Pag-IBIG"},
+    "BM-SW-04": {"subject": "Business Mathematics", "title": "Overtime Pay Computation (Labor Code)"},
+    "SP-RV-01": {"subject": "Statistics & Probability", "title": "Random Variables, Discrete vs. Continuous"},
+    "SP-RV-02": {"subject": "Statistics & Probability", "title": "Probability Distribution, Mean, Variance, SD"},
+    "SP-NORM-01": {"subject": "Statistics & Probability", "title": "Normal Curve Properties"},
+    "SP-NORM-02": {"subject": "Statistics & Probability", "title": "Z-Scores, Standard Normal Table"},
+    "SP-SAMP-01": {"subject": "Statistics & Probability", "title": "Types of Random Sampling"},
+    "SP-SAMP-03": {"subject": "Statistics & Probability", "title": "Central Limit Theorem"},
+    "SP-HYP-01": {"subject": "Statistics & Probability", "title": "Hypothesis Testing: H0 and Ha"},
+    "FM1-MAT-01": {"subject": "Finite Mathematics", "title": "Matrices and Matrix Operations"},
+    "FM2-PROB-01": {"subject": "Finite Mathematics", "title": "Counting Principles and Permutations"},
+    "FM2-PROB-02": {"subject": "Finite Mathematics", "title": "Combinations and Probability"},
+}
+LEARNING_PATH_ORDER: Dict[str, List[str]] = {
+    "BM": ["BM-FDP-01", "BM-FDP-02", "BM-BUS-01", "BM-BUS-02", "BM-COMM-01",
+           "BM-COMM-02", "BM-SW-01", "BM-SW-03", "BM-SW-04"],
+    "NA": ["NA-WAGE-01", "NA-SEQ-01", "NA-SEQ-02", "NA-FUNC-01", "NA-FUNC-02",
+           "NA-FUNC-03", "NA-EXP-01", "NA-LOG-01", "NA-FIN-01", "NA-FIN-02",
+           "NA-FIN-04", "NA-LOGIC-01"],
+    "SP": ["SP-RV-01", "SP-RV-02", "SP-NORM-01", "SP-NORM-02", "SP-SAMP-01",
+           "SP-SAMP-03", "SP-HYP-01"],
+}
+STRAND_SUBJECTS: Dict[str, List[str]] = {
+    "ABM": ["General Mathematics", "Business Mathematics"],
+    "STEM": ["General Mathematics", "Statistics and Probability"],
+    "HUMSS": ["General Mathematics"],
+    "GAS": ["General Mathematics"],
+    "TVL": ["General Mathematics"],
+}
+FULL_QUESTION_SCHEMA: Dict[str, List[str]] = {
+    "ABM": [
+        "General Mathematics: 5 items",
+        "Business Mathematics: 5 items",
+        "Statistics & Probability: 5 items",
+    ],
+    "STEM": [
+        "General Mathematics: 7 items",
+        "Statistics & Probability: 5 items",
+        "Finite Mathematics: 3 items",
+    ],
+    "HUMSS": ["General Mathematics: 15 items"],
+    "GAS": ["General Mathematics: 15 items"],
+    "TVL": ["General Mathematics: 15 items"],
+}
+STRAND_COVERAGE_TEXT: Dict[str, str] = {
+    "ABM": """FOR ABM STRAND:
+  - 5 questions: General Mathematics (NA-WAGE, NA-SEQ, NA-FIN topics -- wages, sequences, interest)
+  - 5 questions: Business Mathematics (BM-FDP, BM-BUS, BM-COMM, BM-SW topics -- percent, markup, commission, salaries, deductions using SSS/PhilHealth/Pag-IBIG rates)
+  - 5 questions: Statistics & Probability (SP-RV, SP-NORM topics -- random variables, normal distribution, z-scores)""",
+    "STEM": """FOR STEM STRAND:
+  - 7 questions: General Mathematics (NA-FUNC, NA-EXP, NA-LOG, MG-TRIG, NA-FIN -- functions, exponentials, trigonometry, financial math)
+  - 5 questions: Statistics & Probability (SP-RV, SP-NORM, SP-SAMP, SP-HYP -- distributions, sampling, hypothesis)
+  - 3 questions: Finite Mathematics (FM1-MAT or FM2-PROB -- matrices or counting/probability)""",
+    "HUMSS": """FOR HUMSS STRAND:
+  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
+    "GAS": """FOR GAS STRAND:
+  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
+    "TVL": """FOR TVL STRAND:
+  - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
+}
+def _get_strand_coverage(strand: str) -> str:
+    return STRAND_COVERAGE_TEXT.get(strand.upper(), STRAND_COVERAGE_TEXT["STEM"])
+def _build_rag_context(strand: str) -> str:
+    subjects = STRAND_SUBJECTS.get(strand.upper(), ["General Mathematics"])
+    rag_context_parts: List[str] = []
+    rag_query = f"SHS {strand} diagnostic assessment competency questions Grade 11"
+    for subject in subjects:
+        try:
+            chunks = retrieve_curriculum_context(
+                query=rag_query,
+                subject=subject,
+                top_k=3,
+            )
+        except Exception as e:
+            logger.warning(f"[WARN] RAG unavailable for {subject}: {e}")
+            continue
+        if not chunks:
+            continue
+        chunk_texts: List[str] = []
+        for chunk in chunks:
+            source = chunk.get("source_file", "unknown")
+            content = str(chunk.get("content", ""))[:600]
+            chunk_texts.append(f"[Source: {source}]\n{content}")
+        rag_context_parts.append(
+            f"\n=== {subject.upper()} CURRICULUM REFERENCE ===\n" + "\n---\n".join(chunk_texts)
+        )
+    if not rag_context_parts:
+        logger.warning("[WARN] RAG unavailable for diagnostic generation -- proceeding without curriculum context")
+        return ""
+    return "\n".join(rag_context_parts)
+def _build_system_prompt(strand: str, grade_level: str, rag_context: str) -> str:
+    strand_upper = strand.upper()
+    coverage_text = _get_strand_coverage(strand_upper)
+    rag_block = ""
+    if rag_context:
+        rag_block = f"""
+OFFICIAL CURRICULUM REFERENCE (from indexed DepEd modules via RAG):
+{rag_context}
+IMPORTANT: Base ALL questions strictly on the curriculum content above.
+Do not invent formulas, definitions, or problem types not found in the
+reference material. If the reference material is insufficient for a topic,
+use only standard DepEd SHS competencies for that strand.
+"""
+    return f"""SYSTEM ROLE:
+You are MathPulse AI's Diagnostic Test Generator. Your job is to create a
+15-item multiple-choice diagnostic assessment for a Filipino SHS student,
+strictly grounded in the DepEd Strengthened SHS Curriculum (SDO Navotas
+modules and DepEd K-12 Curriculum Guides).
+STUDENT CONTEXT:
+- Strand: {strand_upper}
+- Grade Level: {grade_level}
+- Test Purpose: DIAGNOSTIC (pre-learning, not summative -- assess current
+  knowledge to build a personalized learning path)
+{rag_block}
+STRAND-SUBJECT COVERAGE:
+Generate 15 questions distributed across these subjects and domains:
+{coverage_text}
+COMPETENCY CODE FORMAT:
+Assign each question exactly one competency_code from this registry:
+General Math:    NA-WAGE-01, NA-SEQ-01, NA-SEQ-02, NA-FUNC-01,
+                 NA-FUNC-02, NA-FUNC-03, NA-EXP-01, NA-LOG-01,
+                 MG-TRIG-01, NA-FIN-01, NA-FIN-02, NA-FIN-04,
+                 NA-LOGIC-01
+Business Math:   BM-FDP-01, BM-FDP-02, BM-BUS-01, BM-BUS-02,
+                 BM-COMM-01, BM-COMM-02, BM-SW-01, BM-SW-03, BM-SW-04
+Statistics:      SP-RV-01, SP-RV-02, SP-NORM-01, SP-NORM-02,
+                 SP-SAMP-01, SP-SAMP-03, SP-HYP-01
+Finite Math:     FM1-MAT-01, FM2-PROB-01, FM2-PROB-02
+DIFFICULTY DISTRIBUTION (across all 15 questions):
+  - Easy   (Bloom: remembering / understanding): 6 questions (40%)
+  - Medium (Bloom: applying / analyzing):         6 questions (40%)
+  - Hard   (Bloom: evaluating / creating):        3 questions (20%)
+QUESTION RULES:
+1. All questions are 4-option multiple choice (A, B, C, D).
+2. Use Filipino real-life context: peso amounts, Filipino names
+   (Juan, Maria, Jose), Philippine institutions (SSS, PhilHealth,
+   Pag-IBIG, BIR, BDO, local schools, SM malls).
+3. Never use trick questions. Wrong options must be plausible but clearly
+   incorrect to a student who knows the concept.
+4. Include a solution_hint (1-2 sentences) -- this is for the backend
+   scoring engine ONLY. NEVER include it in the client response.
+5. Cover as many different competency codes as possible across 15 items.
+   Do not repeat the same competency code more than twice.
+OUTPUT FORMAT (strict JSON array, no extra text, no markdown):
+[
+  {{
+    "question_id": "DX-<uuid>",
+    "competency_code": "BM-SW-03",
+    "domain": "Business Mathematics",
+    "topic": "Mandatory Deductions",
+    "difficulty": "medium",
+    "bloom_level": "applying",
+    "question_text": "...",
+    "options": {{"A": "...", "B": "...", "C": "...", "D": "..."}},
+    "correct_answer": "C",
+    "solution_hint": "Compute SSS contribution using the prescribed table...",
+    "curriculum_reference": "SDO Navotas Bus. Math SHS 1st Sem - Salaries and Wages"
+  }}
+]
+"""
+async def _call_deepseek(system_prompt: str, user_message: str, temperature: float = 0.7) -> str:
+    try:
+        client = get_deepseek_client()
+        response = client.chat.completions.create(
+            model=CHAT_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_message},
+            ],
+            temperature=temperature,
+            response_format={"type": "json_object"},
+        )
+        return response.choices[0].message.content or ""
+    except Exception as e:
+        logger.error(f"DeepSeek API error: {e}")
+        raise HTTPException(status_code=500, detail="AI model unavailable. Please try again later.")
+def _parse_questions_response(raw_response: str) -> List[Dict[str, Any]]:
+    try:
+        data = json.loads(raw_response)
+        if isinstance(data, dict):
+            for key in ("questions", "items", "data", "results"):
+                if key in data and isinstance(data[key], list):
+                    return data[key]
+            for key, value in data.items():
+                if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
+                    if "question_text" in value[0]:
+                        return value
+        if isinstance(data, list):
+            return data
+    except json.JSONDecodeError:
+        pass
+    import re
+    match = re.search(r'\[.*\]', raw_response, re.DOTALL)
+    if match:
+        try:
+            return json.loads(match.group())
+        except json.JSONDecodeError:
+            pass
+    raise ValueError("Could not parse questions from AI response")
+async def _generate_questions(strand: str, grade_level: str) -> tuple[str, List[Dict[str, Any]]]:
+    test_id = f"DX-{uuid.uuid4().hex[:12]}"
+    rag_context = _build_rag_context(strand)
+    system_prompt = _build_system_prompt(strand, grade_level, rag_context)
+    user_message = f"Generate 15 diagnostic questions for a Grade 11 {strand} student."
+    for attempt in range(2):
+        temperature = 0.7 if attempt == 0 else 0.3
+        try:
+            raw_response = await _call_deepseek(system_prompt, user_message, temperature)
+            questions = _parse_questions_response(raw_response)
+            if questions:
+                return test_id, questions[:15]
+        except ValueError:
+            if attempt == 0:
+                logger.warning("Malformed JSON from DeepSeek, retrying with temperature=0.3")
+                continue
+            raise
+    raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
+async def _store_diagnostic_session(
+    firestore_client: Any,
+    user_id: str,
+    test_id: str,
+    strand: str,
+    grade_level: str,
+    questions: List[Dict[str, Any]],
+) -> bool:
+    try:
+        doc_ref = (
+            firestore_client.collection("diagnosticSessions")
+            .document(test_id)
+        )
+        doc_ref.set({
+            "testId": test_id,
+            "userId": user_id,
+            "generatedAt": firestore_client.SERVER_TIMESTAMP,
+            "strand": strand,
+            "gradeLevel": grade_level,
+            "questions": questions,
+            "status": "in_progress",
+        })
+        return True
+    except Exception as e:
+        logger.error(f"Failed to store diagnostic session: {e}")
+        return False
+def _strip_answers(questions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    stripped = []
+    for q in questions:
+        stripped.append({
+            "question_id": q.get("question_id", ""),
+            "competency_code": q.get("competency_code", ""),
+            "domain": q.get("domain", ""),
+            "topic": q.get("topic", ""),
+            "difficulty": q.get("difficulty", ""),
+            "bloom_level": q.get("bloom_level", ""),
+            "question_text": q.get("question_text", ""),
+            "options": q.get("options", {}),
+            "curriculum_reference": q.get("curriculum_reference", ""),
+        })
+    return stripped
+# ─── ENDPOINT 1: Generate Diagnostic ────────────────────────────────
+@router.post("/generate", response_model=DiagnosticGenerateResponse)
+async def generate_diagnostic(request: DiagnosticGenerateRequest, req: Request):
+    user = getattr(req.state, "user", None)
+    if not user or not getattr(user, "uid", None):
+        raise HTTPException(status_code=401, detail="Authentication required")
+    try:
+        test_id, questions = await _generate_questions(
+            request.strand,
+            request.grade_level,
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Generation error: {e}\n{traceback.format_exc()}")
+        raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
+    try:
+        import firebase_admin
+        from firebase_admin import firestore as fs
+        firestore_client = fs.client()
+        await _store_diagnostic_session(
+            firestore_client,
+            user.uid,
+            test_id,
+            request.strand,
+            request.grade_level,
+            questions,
+        )
+    except Exception as e:
+        logger.warning(f"Could not store diagnostic session: {e}")
+    client_questions = _strip_answers(questions)
+    return DiagnosticGenerateResponse(
+        test_id=test_id,
+        questions=client_questions,
+        total_items=len(client_questions),
+        estimated_minutes=11.6,
+    )
+# ─── ENDPOINT 2: Submit and Evaluate ─────────────────────────────────
+def _score_responses(stored_questions: List[Dict[str, Any]], responses: List[DiagnosticResponseItem]) -> tuple:
+    question_map: Dict[str, Dict[str, Any]] = {}
+    for q in stored_questions:
+        question_map[q.get("question_id", "")] = q
+    scored = []
+    total_correct = 0
+    domain_correct: Dict[str, int] = {}
+    domain_total: Dict[str, int] = {}
+    comp_attempts: Dict[str, List[bool]] = {}
+    for resp in responses:
+        question = question_map.get(resp.question_id, {})
+        correct_answer = question.get("correct_answer", "")
+        is_correct = (resp.student_answer.strip().upper() == correct_answer.strip().upper())
+        domain = question.get("domain", "Unknown")
+        competency_code = question.get("competency_code", "")
+        if domain not in domain_correct:
+            domain_correct[domain] = 0
+            domain_total[domain] = 0
+        domain_total[domain] += 1
+        if is_correct:
+            domain_correct[domain] += 1
+            total_correct += 1
+        if competency_code not in comp_attempts:
+            comp_attempts[competency_code] = []
+        comp_attempts[competency_code].append(is_correct)
+        scored.append({
+            "question_id": resp.question_id,
+            "competency_code": competency_code,
+            "domain": domain,
+            "topic": question.get("topic", ""),
+            "difficulty": question.get("difficulty", ""),
+            "bloom_level": question.get("bloom_level", ""),
+            "student_answer": resp.student_answer,
+            "correct_answer": correct_answer,
+            "is_correct": is_correct,
+            "time_spent_seconds": resp.time_spent_seconds,
+        })
+    return scored, total_correct, domain_correct, domain_total, comp_attempts
+def _compute_domain_scores(domain_correct: Dict[str, int], domain_total: Dict[str, int]) -> Dict[str, Dict[str, Any]]:
+    domain_scores = {}
+    for domain in domain_total:
+        correct = domain_correct.get(domain, 0)
+        total = domain_total[domain]
+        pct = (correct / total * 100) if total > 0 else 0
+        mastery = "mastered" if pct >= 80 else "developing" if pct >= 60 else "beginning"
+        domain_scores[domain] = {
+            "correct": correct,
+            "total": total,
+            "percentage": round(pct, 1),
+            "mastery_level": mastery,
+        }
+    return domain_scores
+def _compute_risk_profile(
+    total_correct: int,
+    total_items: int,
+    scored_responses: List[Dict[str, Any]],
+    domain_scores: Dict[str, Dict[str, Any]],
+) -> Dict[str, Any]:
+    overall_pct = (total_correct / total_items * 100) if total_items > 0 else 0
+    mastered = [d for d, s in domain_scores.items() if s["mastery_level"] == "mastered"]
+    developing = [d for d, s in domain_scores.items() if s["mastery_level"] == "developing"]
+    beginning = [d for d, s in domain_scores.items() if s["mastery_level"] == "beginning"]
+    critical_gaps = []
+    for resp in scored_responses:
+        code = resp.get("competency_code", "")
+        if not code:
+            continue
+        attempts = [r for r in scored_responses if r.get("competency_code") == code]
+        if len(attempts) >= 2 and not any(r.get("is_correct") for r in attempts):
+            if code not in critical_gaps:
+                critical_gaps.append(code)
+    if overall_pct >= 75 and len(beginning) == 0:
+        overall_risk = "low"
+    elif overall_pct >= 55 or len(beginning) <= 2:
+        overall_risk = "moderate"
+    elif overall_pct >= 40 or len(beginning) <= 4:
+        overall_risk = "high"
+    else:
+        overall_risk = "critical"
+    suggested_path = []
+    for code in critical_gaps:
+        if code not in suggested_path:
+            suggested_path.append(code)
+    for domain in beginning:
+        for prefix in ["NA", "BM", "SP", "FM"]:
+            if domain.upper().startswith(prefix) or any(
+                s.upper().startswith(prefix) for s in [domain]
+            ):
+                for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
+                    if comp_code not in suggested_path:
+                        suggested_path.append(comp_code)
+                break
+    for domain in developing:
+        for prefix in ["NA", "BM", "SP", "FM"]:
+            if any(c.startswith(prefix) for c in COMPETENCY_REGISTRY):
+                for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
+                    if comp_code not in suggested_path:
+                        suggested_path.append(comp_code)
+    interventions = {
+        "low": "Great job! You have a solid foundation. Keep practicing to maintain your skills!",
+        "moderate": "You're making good progress. Focus on the topics where you need more practice. Kaya mo yan!",
+        "high": "Don't worry! With focused practice on your weak areas, you'll improve quickly.",
+        "critical": "Let's work on this together. Start with the basics and build up your confidence step by step.",
+    }
+    return {
+        "overall_risk": overall_risk,
+        "overall_score_percent": round(overall_pct, 1),
+        "mastery_summary": {
+            "mastered": mastered,
+            "developing": developing,
+            "beginning": beginning,
+        },
+        "weak_domains": beginning,
+        "critical_gaps": critical_gaps,
+        "recommended_intervention": interventions.get(overall_risk, interventions["moderate"]),
+        "suggested_learning_path": suggested_path[:20],
+    }
+async def _save_results(
+    firestore_client: Any,
+    user_id: str,
+    test_id: str,
+    strand: str,
+    grade_level: str,
+    scored_responses: List[Dict[str, Any]],
+    domain_scores: Dict[str, Dict[str, Any]],
+    risk_profile: Dict[str, Any],
+    total_correct: int,
+    total_items: int,
+) -> None:
+    try:
+        overall_pct = round(total_correct / total_items * 100, 1) if total_items > 0 else 0
+        firestore_client.collection("diagnosticResults").document(user_id).set({
+            "userId": user_id,
+            "testId": test_id,
+            "takenAt": firestore_client.SERVER_TIMESTAMP,
+            "strand": strand,
+            "gradeLevel": grade_level,
+            "status": "completed",
+            "totalItems": total_items,
+            "totalScore": total_correct,
+            "percentageScore": overall_pct,
+            "responses": scored_responses,
+            "domainScores": domain_scores,
+            "riskProfile": risk_profile,
+        })
+        mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
+        firestore_client.collection("studentProgress").document(user_id).collection("stats").document("main").set({
+            "learning_path": risk_profile.get("suggested_learning_path", []),
+            "current_topic_index": 0,
+            "total_xp": firestore_client.Increment(50 + mastered_count * 10),
+            "current_streak_days": 1,
+            "badges": firestore_client.ArrayUnion(["first_assessment"]),
+            "topics_mastered": mastered_count,
+            "diagnostic_completed": True,
+            "overall_risk": risk_profile.get("overall_risk", "moderate"),
+        }, merge=True)
+        firestore_client.collection("diagnosticSessions").document(test_id).update({
+            "status": "completed",
+            "completedAt": firestore_client.SERVER_TIMESTAMP,
+        })
+    except Exception as e:
+        logger.error(f"Firestore save error: {e}")
+        raise
+@router.post("/submit", response_model=DiagnosticSubmitResponse)
+async def submit_diagnostic(request: DiagnosticSubmitRequest, req: Request):
+    user = getattr(req.state, "user", None)
+    if not user or not getattr(user, "uid", None):
+        raise HTTPException(status_code=401, detail="Authentication required")
+    try:
+        import firebase_admin
+        from firebase_admin import firestore as fs
+        firestore_client = fs.client()
+    except Exception as e:
+        raise HTTPException(status_code=503, detail="Database unavailable")
+    try:
+        session_doc = firestore_client.collection("diagnosticSessions").document(request.test_id).get()
+        if not session_doc.exists:
+            raise HTTPException(status_code=404, detail="Diagnostic session not found")
+        session_data = session_doc.to_dict() or {}
+        stored_questions = session_data.get("questions", [])
+        strand = session_data.get("strand", "STEM")
+        grade_level = session_data.get("gradeLevel", "Grade 11")
+        if not stored_questions:
+            raise HTTPException(status_code=400, detail="No questions found for this session")
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Session retrieval error: {e}")
+        raise HTTPException(status_code=500, detail="Failed to retrieve diagnostic session")
+    scored_responses, total_correct, domain_correct, domain_total, _ = _score_responses(
+        stored_questions, request.responses
+    )
+    total_items = len(stored_questions)
+    domain_scores = _compute_domain_scores(domain_correct, domain_total)
+    risk_profile = _compute_risk_profile(total_correct, total_items, scored_responses, domain_scores)
+    await _save_results(
+        firestore_client,
+        user.uid,
+        request.test_id,
+        strand,
+        grade_level,
+        scored_responses,
+        domain_scores,
+        risk_profile,
+        total_correct,
+        total_items,
+    )
+    mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
+    return DiagnosticSubmitResponse(
+        success=True,
+        overall_risk=risk_profile["overall_risk"],
+        overall_score_percent=risk_profile["overall_score_percent"],
+        mastery_summary=MasterySummary(**risk_profile["mastery_summary"]),
+        recommended_intervention=risk_profile["recommended_intervention"],
+        xp_earned=50 + mastered_count * 10,
+        badge_unlocked="first_assessment",
+        redirect_to="/dashboard",
+    )

routes/rag_routes.py ADDED Viewed

	@@ -0,0 +1,415 @@

+from __future__ import annotations
+import json
+import logging
+import os
+import re
+from datetime import datetime, timezone
+from threading import Lock
+from typing import Any, Dict, List, Optional
+from fastapi import APIRouter, HTTPException, Request
+from pydantic import BaseModel, Field
+from services.inference_client import (
+    InferenceRequest,
+    create_default_client,
+    is_sequential_model,
+    get_model_for_task,
+)
+from rag.curriculum_rag import (
+    build_analysis_curriculum_context,
+    build_lesson_prompt,
+    build_lesson_query,
+    build_problem_generation_prompt,
+    format_retrieved_chunks,
+    retrieve_curriculum_context,
+    retrieve_lesson_pdf_context,
+    summarize_retrieval_confidence,
+)
+from rag.vectorstore_loader import get_vectorstore_health, reset_vectorstore_singleton
+try:
+    from firebase_admin import firestore as firebase_firestore
+except Exception:
+    firebase_firestore = None
+logger = logging.getLogger("mathpulse.rag")
+router = APIRouter(prefix="/api/rag", tags=["rag"])
+_inference_client = None
+_inference_lock = Lock()
+def _get_inference_client():
+    global _inference_client
+    if _inference_client is None:
+        with _inference_lock:
+            if _inference_client is None:
+                _inference_client = create_default_client()
+    return _inference_client
+async def _generate_text(
+    prompt: str,
+    task_type: str,
+    max_new_tokens: int = 900,
+    enable_thinking: bool = False,
+) -> str:
+    request = InferenceRequest(
+        messages=[
+            {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
+            {"role": "user", "content": prompt},
+        ],
+        task_type=task_type,
+        max_new_tokens=max_new_tokens,
+        temperature=0.2,
+        top_p=0.9,
+        enable_thinking=enable_thinking,
+    )
+    return _get_inference_client().generate_from_messages(request)
+def _log_rag_usage(
+    request: Request,
+    *,
+    event_type: str,
+    topic: str,
+    subject: str,
+    quarter: Optional[int],
+    chunks: List[Dict[str, Any]],
+) -> None:
+    if firebase_firestore is None:
+        return
+    try:
+        user = getattr(request.state, "user", None)
+        uid = getattr(user, "uid", None)
+        domains = sorted({str(chunk.get("content_domain") or "").strip() for chunk in chunks if chunk.get("content_domain")})
+        top_score = max((float(chunk.get("score") or 0.0) for chunk in chunks), default=0.0)
+        payload = {
+            "userId": uid,
+            "type": event_type,
+            "topic": topic,
+            "subject": subject,
+            "quarter": quarter,
+            "retrievedChunks": len(chunks),
+            "topScore": top_score,
+            "curriculumDomainsHit": domains,
+            "timestamp": firebase_firestore.SERVER_TIMESTAMP,
+            "createdAtIso": datetime.now(timezone.utc).isoformat(),
+        }
+        firebase_firestore.client().collection("rag_usage").add(payload)
+    except Exception as exc:
+        logger.warning("rag_usage logging skipped: %s", exc)
+def _strip_thinking_and_parse(text: str) -> dict:
+    cleaned = text.strip()
+    cleaned = re.sub(r" </think>", "", cleaned, flags=re.DOTALL).strip()
+    if "{" in cleaned and "}" in cleaned:
+        try:
+            start = cleaned.find("{")
+            end = cleaned.rfind("}") + 1
+            parsed = json.loads(cleaned[start:end])
+            if isinstance(parsed, dict):
+                return parsed
+        except Exception:
+            pass
+    return {"explanation": text}
+class RagLessonRequest(BaseModel):
+    topic: str
+    subject: str
+    quarter: int
+    lessonTitle: Optional[str] = None
+    learningCompetency: Optional[str] = None
+    moduleUnit: Optional[str] = None
+    learnerLevel: Optional[str] = None
+    userId: Optional[str] = None
+    moduleId: Optional[str] = None
+    lessonId: Optional[str] = None
+    competencyCode: Optional[str] = None
+    storagePath: Optional[str] = None
+class RagProblemRequest(BaseModel):
+    topic: str
+    subject: str
+    quarter: int
+    difficulty: str = Field(default="medium")
+    userId: Optional[str] = None
+class RagAnalysisContextRequest(BaseModel):
+    weakTopics: List[str]
+    subject: str
+    userId: Optional[str] = None
+@router.get("/health")
+async def rag_health():
+    active_model = get_model_for_task("rag_lesson")
+    is_seq = is_sequential_model(active_model)
+    try:
+        health = get_vectorstore_health()
+        return {
+            "status": "ok",
+            "chunkCount": health["chunkCount"],
+            "subjects": health["subjects"],
+            "lastIngested": datetime.now(timezone.utc).isoformat(),
+            "activeModel": active_model,
+            "isSequentialModel": is_seq,
+        }
+    except Exception as exc:
+        return {
+            "status": "degraded",
+            "chunkCount": 0,
+            "subjects": {},
+            "lastIngested": None,
+            "activeModel": active_model,
+            "isSequentialModel": is_seq,
+            "warning": str(exc),
+        }
+def _fetch_youtube_video(lesson_title: str, subject: str, competency: str, quarter: int) -> dict:
+    try:
+        from backend.services.youtube_service import get_video_for_lesson
+    except ImportError:
+        return {}
+    try:
+        video = get_video_for_lesson(lesson_title, subject, competency, quarter)
+        return video or {}
+    except Exception as e:
+        logger.warning("YouTube search failed: %s", e)
+        return {}
+def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
+    sections = lesson_data.get("sections", [])
+    section_types = {s.get("type") for s in sections}
+    required = ["introduction", "key_concepts", "video", "worked_examples", "important_notes", "try_it_yourself", "summary"]
+    default_content = {
+        "introduction": {"type": "introduction", "title": "Introduction", "content": f"Welcome to the lesson on {lesson_title}."},
+        "key_concepts": {"type": "key_concepts", "title": "Key Concepts", "content": "Below are the key concepts covered in this lesson.", "callouts": []},
+        "video": {"type": "video", "title": "Video Lesson", "content": "Watch this explanation to understand the concepts visually.", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},
+        "worked_examples": {"type": "worked_examples", "title": "Worked Examples", "examples": []},
+        "important_notes": {"type": "important_notes", "title": "Important Notes", "bulletPoints": []},
+        "try_it_yourself": {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": []},
+        "summary": {"type": "summary", "title": "Summary", "content": f"Great job completing the lesson on {lesson_title}!"},
+    }
+    filled = {}
+    for req_type in required:
+        for existing in sections:
+            if existing.get("type") == req_type:
+                filled[req_type] = existing
+                break
+        else:
+            filled[req_type] = default_content[req_type]
+    ordered = [filled[t] for t in required]
+    for i, section in enumerate(ordered):
+        s_type = section.get("type")
+        if s_type == "key_concepts" and not section.get("callouts"):
+            section["callouts"] = []
+        if s_type == "worked_examples" and not section.get("examples"):
+            section["examples"] = []
+        if s_type == "important_notes" and not section.get("bulletPoints"):
+            section["bulletPoints"] = []
+        if s_type == "try_it_yourself" and not section.get("practiceProblems"):
+            section["practiceProblems"] = []
+        ordered[i] = section
+    return {**lesson_data, "sections": ordered}
+@router.post("/lesson")
+async def rag_lesson(request: Request, payload: RagLessonRequest):
+    chunks, retrieval_mode = retrieve_lesson_pdf_context(
+        query=build_lesson_query(
+            payload.topic,
+            payload.subject,
+            payload.quarter,
+            lesson_title=payload.lessonTitle,
+            competency=payload.learningCompetency,
+            module_unit=payload.moduleUnit,
+            learner_level=payload.learnerLevel,
+        ),
+        subject=payload.subject,
+        quarter=payload.quarter,
+        lesson_title=payload.lessonTitle,
+        competency=payload.learningCompetency,
+        module_id=payload.moduleId,
+        lesson_id=payload.lessonId,
+        competency_code=payload.competencyCode,
+        storage_path=payload.storagePath,
+        top_k=8,
+    )
+    if not chunks:
+        raise HTTPException(
+            status_code=404,
+            detail={
+                "error": "no_curriculum_context",
+                "message": f"No curriculum content found for lesson '{payload.lessonTitle}' ({payload.subject} Q{payload.quarter}). Please ensure the PDF has been ingested.",
+                "retrievalBand": "low",
+                "sources": [],
+            },
+        )
+    prompt = build_lesson_prompt(
+        lesson_title=payload.lessonTitle or payload.topic,
+        competency=payload.learningCompetency or payload.topic,
+        grade_level="Grade 11-12",
+        subject=payload.subject,
+        quarter=payload.quarter,
+        learner_level=payload.learnerLevel,
+        module_unit=payload.moduleUnit,
+        curriculum_chunks=chunks,
+        competency_code=payload.competencyCode,
+    )
+    raw_explanation = await _generate_text(
+        prompt,
+        task_type="lesson_generation",
+        max_new_tokens=1800,
+        enable_thinking=True,
+    )
+    parsed_lesson = _strip_thinking_and_parse(raw_explanation)
+    parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
+    if parsed_lesson.get("sections"):
+        video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
+        if video_section:
+            video_data = _fetch_youtube_video(
+                payload.lessonTitle or payload.topic,
+                payload.subject,
+                payload.learningCompetency or "",
+                payload.quarter,
+            )
+            if video_data:
+                video_section["videoId"] = video_data.get("videoId", "")
+                video_section["videoTitle"] = video_data.get("videoTitle", "")
+                video_section["videoChannel"] = video_data.get("videoChannel", "")
+                video_section["embedUrl"] = video_data.get("embedUrl", "")
+                video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
+    retrieval_summary = summarize_retrieval_confidence(chunks)
+    _log_rag_usage(
+        request,
+        event_type="lesson",
+        topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
+        subject=payload.subject,
+        quarter=payload.quarter,
+        chunks=chunks,
+    )
+    needs_review = parsed_lesson.get("needsReview", False)
+    if retrieval_summary.get("band") == "low":
+        needs_review = True
+    return {
+        **parsed_lesson,
+        "retrievalConfidence": retrieval_summary.get("confidence", 0.0),
+        "retrievalBand": retrieval_summary.get("band", "low"),
+        "retrievalMode": retrieval_mode,
+        "needsReview": needs_review,
+        "sources": [
+            {
+                "subject": row.get("subject"),
+                "quarter": row.get("quarter"),
+                "source_file": row.get("source_file"),
+                "storage_path": row.get("storage_path"),
+                "page": row.get("page"),
+                "score": row.get("score"),
+                "content_domain": row.get("content_domain"),
+                "chunk_type": row.get("chunk_type"),
+                "content": row.get("content"),
+            }
+            for row in chunks
+        ],
+        "activeModel": get_model_for_task("rag_lesson"),
+    }
+@router.post("/generate-problem")
+async def rag_generate_problem(request: Request, payload: RagProblemRequest):
+    chunks = retrieve_curriculum_context(
+        query=payload.topic,
+        subject=payload.subject,
+        quarter=payload.quarter,
+        top_k=5,
+    )
+    prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
+    raw = await _generate_text(
+        prompt,
+        task_type="quiz_generation",
+        max_new_tokens=600,
+        enable_thinking=False,
+    )
+    parsed = _strip_thinking_and_parse(raw)
+    problem = str(parsed.get("problem") or raw)
+    if not problem or problem.startswith("{"):
+        problem = str(parsed.get("content") or str(parsed))
+    if len(problem) < 3 or problem.startswith("{"):
+        problem = raw
+    solution = str(parsed.get("solution") or "")
+    competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
+    _log_rag_usage(
+        request,
+        event_type="problem_generation",
+        topic=payload.topic,
+        subject=payload.subject,
+        quarter=payload.quarter,
+        chunks=chunks,
+    )
+    return {
+        "problem": problem,
+        "solution": solution,
+        "competencyReference": competency_ref,
+        "sources": [
+            {
+                "subject": row.get("subject"),
+                "quarter": row.get("quarter"),
+                "source_file": row.get("source_file"),
+                "page": row.get("page"),
+                "score": row.get("score"),
+            }
+            for row in chunks
+        ],
+    }
+@router.post("/analysis-context")
+async def rag_analysis_context(request: Request, payload: RagAnalysisContextRequest):
+    if not payload.weakTopics:
+        raise HTTPException(status_code=400, detail="weakTopics must be a non-empty list")
+    chunks = build_analysis_curriculum_context(payload.weakTopics, payload.subject)
+    lines = ["LEARNING COMPETENCIES:"]
+    for index, row in enumerate(chunks, start=1):
+        lines.append(
+            f"{index}. {row.get('content')} (Source: {row.get('source_file')} p.{row.get('page')}, "
+            f"Q{row.get('quarter')}, {row.get('content_domain')})"
+        )
+    _log_rag_usage(
+        request,
+        event_type="analysis_context",
+        topic=", ".join(payload.weakTopics),
+        subject=payload.subject,
+        quarter=None,
+        chunks=chunks,
+    )
+    return {"curriculumContext": "\n".join(lines)}

scripts/ingest_curriculum.py ADDED Viewed

	@@ -0,0 +1,159 @@

+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+from rag.vectorstore_loader import (
+    get_vectorstore_components,
+    reset_vectorstore_singleton,
+)
+logger = logging.getLogger(__name__)
+def _resolve_data_dir(raw: str | None) -> Path:
+    if raw:
+        p = Path(raw)
+        if p.is_absolute():
+            return p
+        p = Path.cwd() / raw
+        if p.exists():
+            return p
+    default = Path(__file__).resolve().parents[1] / "datasets"
+    return default
+def _iter_json_files(data_dir: Path):
+    for file in sorted(data_dir.rglob("*")):
+        if file.suffix not in {".json", ".jsonl"}:
+            continue
+        yield file
+def _load_records(file_path: Path) -> List[Dict[str, Any]]:
+    records: List[Dict[str, Any]] = []
+    try:
+        raw = file_path.read_text(encoding="utf-8").strip()
+        if file_path.suffix == ".jsonl":
+            for lineno, line in enumerate(raw.splitlines(), start=1):
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    records.append(json.loads(line))
+                except json.JSONDecodeError:
+                    logger.warning("Skipping malformed JSONL line %s:%d", file_path.name, lineno)
+        else:
+            parsed = json.loads(raw)
+            if isinstance(parsed, list):
+                records.extend(parsed)
+            elif isinstance(parsed, dict):
+                records.append(parsed)
+    except Exception as exc:
+        logger.warning("Failed to parse %s: %s", file_path.name, exc)
+    return records
+def _build_id(source_file: str, page: int, content: str) -> str:
+    key = f"{source_file}::{page}::{content[:120]}"
+    return hashlib.sha256(key.encode()).hexdigest()[:40]
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Ingest DepEd SHS curriculum JSON/JSONL into ChromaDB")
+    parser.add_argument("--data-dir", default=None, help="Directory containing .json/.jsonl files")
+    parser.add_argument("--reset", action="store_true", help="Reset the vectorstore singleton before ingestion")
+    args = parser.parse_args()
+    data_dir = _resolve_data_dir(args.data_dir)
+    logger.info("Ingesting from: %s", data_dir)
+    if args.reset:
+        reset_vectorstore_singleton()
+        _, collection, _ = get_vectorstore_components()
+        try:
+            collection.delete(ids=collection.get(include=[])["ids"])
+        except Exception:
+            pass
+        reset_vectorstore_singleton()
+    total_processed = 0
+    total_upserted = 0
+    total_errors = 0
+    _, collection, embedder = get_vectorstore_components()
+    for file_path in _iter_json_files(data_dir):
+        records = _load_records(file_path)
+        documents: List[str] = []
+        metadatas: List[Dict[str, Any]] = []
+        ids: List[str] = []
+        embeddings_list: List[List[float]] = []
+        for record in records:
+            total_processed += 1
+            content = str(record.get("content") or "").strip()
+            if not content:
+                logger.debug("Skipping empty content in %s", file_path.name)
+                continue
+            try:
+                subject = str(record.get("subject") or "unknown")
+                quarter = int(record.get("quarter") or 0)
+                page = int(record.get("page") or 0)
+                content_domain = str(record.get("content_domain") or "unknown")
+                chunk_type = str(record.get("chunk_type") or "unknown")
+                source_file = str(record.get("source_file") or file_path.name)
+                embedding = embedder.encode(content).tolist()
+                chunk_id = _build_id(source_file, page, content)
+                metadata = {
+                    "subject": subject,
+                    "quarter": quarter,
+                    "content_domain": content_domain,
+                    "chunk_type": chunk_type,
+                    "source_file": source_file,
+                    "page": page,
+                }
+                documents.append(content)
+                metadatas.append(metadata)
+                ids.append(chunk_id)
+                embeddings_list.append(embedding)
+            except Exception as exc:
+                total_errors += 1
+                logger.warning("Error processing record in %s: %s", file_path.name, exc)
+        if documents:
+            try:
+                collection.upsert(
+                    ids=ids,
+                    documents=documents,
+                    metadatas=metadatas,
+                    embeddings=embeddings_list,
+                )
+                total_upserted += len(documents)
+                logger.info("Upserted %d chunks from %s", len(documents), file_path.name)
+            except Exception as exc:
+                total_errors += len(documents)
+                logger.warning("Failed to upsert batch from %s: %s", file_path.name, exc)
+    print(f"=== Ingestion Summary ===")
+    print(f"Total records processed: {total_processed}")
+    print(f"Total chunks upserted:  {total_upserted}")
+    print(f"Total errors:           {total_errors}")
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    main()

scripts/ingest_from_storage.py ADDED Viewed

	@@ -0,0 +1,276 @@

+"""
+Ingest curriculum PDFs from Firebase Storage into ChromaDB.
+Run: python -m backend.scripts.ingest_from_storage
+"""
+from __future__ import annotations
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger("mathpulse.ingest")
+sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
+from backend.rag.firebase_storage_loader import (
+    PDF_METADATA,
+    download_pdf_from_storage,
+    list_curriculum_blobs,
+)
+_CONTENT_DOMAIN_CLASSIFIERS = [
+    ("introduction", ["introduction", "welcome", "overview", "objectives", "learning objectives"]),
+    ("key_concepts", ["key concepts", "key ideas", "main concepts", "definitions", "key terms"]),
+    ("worked_examples", ["example", "worked example", "illustrative example", "sample problem", "solution"]),
+    ("important_notes", ["important", "note", "remember", "tip", "caution", "warning", "key point"]),
+    ("practice", ["practice", "exercise", "try it", "your turn", "activity", "problem set"]),
+    ("summary", ["summary", "recap", "key takeaways", "wrap-up", "conclusion"]),
+    ("assessment", ["assessment", "quiz", "test", "evaluation", "exam"]),
+]
+_CONTENT_TYPE_CLASSIFIERS = [
+    ("definition", ["definition", "define", "means", "is defined as"]),
+    ("formula", ["formula", "equation", "expression", "rule"]),
+    ("procedure", ["step", "method", "how to", "procedure", "process"]),
+    ("concept", ["concept", "idea", "principle", "theory"]),
+    ("application", ["application", "use", "example", "solve", "problem"]),
+]
+def _classify_chunk(content: str) -> tuple[str, str]:
+    content_lower = content.lower()
+    content_domain = "general"
+    chunk_type = "concept"
+    for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
+        if any(kw in content_lower for kw in keywords):
+            content_domain = domain
+            break
+    for ctype, keywords in _CONTENT_TYPE_CLASSIFIERS:
+        if any(kw in content_lower for kw in keywords):
+            chunk_type = ctype
+            break
+    return content_domain, chunk_type
+def _classify_lesson_section(content: str) -> str:
+    content_lower = content.lower().strip()
+    first_sentence = content_lower[:200]
+    for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
+        if any(kw in first_sentence for kw in keywords):
+            return domain
+    return "general"
+def chunk_text_preserve_pages(text: str, page_starts: List[int], chunk_size: int = 500, overlap: int = 80) -> List[Dict[str, Any]]:
+    """Split text into overlapping chunks, preserving page traceability."""
+    words = text.split()
+    chunks = []
+    i = 0
+    chunk_idx = 0
+    while i < len(words):
+        chunk_words = words[i : i + chunk_size]
+        chunk_text = " ".join(chunk_words)
+        estimated_page = max(1, (i // chunk_size) + 1)
+        content_domain, chunk_type = _classify_chunk(chunk_text)
+        chunks.append({
+            "text": chunk_text,
+            "chunk_index": chunk_idx,
+            "estimated_page": estimated_page,
+            "content_domain": content_domain,
+            "chunk_type": chunk_type,
+        })
+        i += chunk_size - overlap
+        chunk_idx += 1
+    return chunks
+def extract_pdf_text_and_pages(pdf_bytes: bytes) -> tuple[str, List[int]]:
+    """Extract text from PDF bytes, returning full text and page start positions."""
+    try:
+        from pypdf import PdfReader
+    except ImportError:
+        try:
+            import PyPDF2 as PdfReaderModule
+            from PyPDF2 import PdfReader
+        except ImportError:
+            logger.error("No PDF library available. Install: pip install pypdf")
+            return "", []
+    import io
+    reader = PdfReader(io.BytesIO(pdf_bytes))
+    pages: List[str] = []
+    for page in reader.pages:
+        text = page.extract_text() or ""
+        pages.append(text)
+    page_starts = []
+    position = 0
+    for page_text in pages:
+        page_starts.append(position)
+        position += len(page_text) + 1
+    full_text = "\n".join(pages)
+    return full_text, page_starts
+def get_firestore_client():
+    try:
+        import firebase_admin
+        from firebase_admin import firestore
+        if not firebase_admin._apps:
+            sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
+            sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
+            bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
+            if sa_json:
+                import json as _json
+                from firebase_admin import credentials
+                creds = credentials.Certificate(_json.loads(sa_json))
+                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
+            elif sa_file and Path(sa_file).exists():
+                from firebase_admin import credentials
+                creds = credentials.Certificate(sa_file)
+                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
+            else:
+                firebase_admin.initialize_app(options={"storageBucket": bucket_name})
+        return firestore.client()
+    except Exception as e:
+        logger.warning("Firestore unavailable: %s", e)
+        return None
+def ingest_from_firebase_storage(force_reindex: bool = False):
+    """Download PDFs from Firebase Storage and ingest into ChromaDB."""
+    try:
+        from sentence_transformers import SentenceTransformer
+        import chromadb
+    except ImportError:
+        logger.error("Missing dependencies. Install: pip install chromadb sentence-transformers pypdf")
+        return
+    chroma_path = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
+    chroma_client = chromadb.PersistentClient(path=chroma_path)
+    collection = chroma_client.get_or_create_collection(
+        name="curriculum_chunks",
+        metadata={"hnsw:space": "cosine"},
+    )
+    embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
+    db = get_firestore_client()
+    logger.info("Starting ingestion from Firebase Storage...")
+    ingested_count = 0
+    skipped_count = 0
+    error_count = 0
+    for storage_path, metadata in PDF_METADATA.items():
+        doc_id = storage_path.replace("/", "_").replace(".pdf", "")
+        if db:
+            try:
+                doc_ref = db.collection("curriculumDocuments").document(doc_id)
+                existing = doc_ref.get()
+                if existing.exists:
+                    if not force_reindex and existing.to_dict().get("status") == "ingested":
+                        logger.info("[SKIP] %s already ingested", storage_path)
+                        skipped_count += 1
+                        continue
+            except Exception as e:
+                logger.warning("Firestore check failed for %s: %s", storage_path, e)
+        logger.info("Downloading: %s", storage_path)
+        pdf_bytes = download_pdf_from_storage(storage_path)
+        if pdf_bytes is None:
+            logger.error("[ERROR] Failed to download: %s", storage_path)
+            if db:
+                try:
+                    doc_ref.set({
+                        "storagePath": storage_path,
+                        "status": "failed",
+                        "error": "download_failed",
+                        **metadata,
+                    }, merge=True)
+                except:
+                    pass
+            error_count += 1
+            continue
+        logger.info("Extracting text from: %s (%d bytes)", storage_path, len(pdf_bytes))
+        full_text, page_starts = extract_pdf_text_and_pages(pdf_bytes)
+        if not full_text.strip():
+            logger.warning("[WARN] No text extracted from: %s", storage_path)
+            error_count += 1
+            continue
+        chunks = chunk_text_preserve_pages(full_text, page_starts)
+        logger.info("  -> %d chunks created", len(chunks))
+        existing_ids = [cid for cid in collection.get()["ids"] if cid.startswith(f"{doc_id}_chunk_")]
+        if existing_ids:
+            collection.delete(ids=existing_ids)
+            logger.info("  Removed %d existing chunks", len(existing_ids))
+        for chunk in chunks:
+            chunk_id = f"{doc_id}_chunk_{chunk['chunk_index']}"
+            embedding = embedder.encode(chunk["text"], normalize_embeddings=True).tolist()
+            collection.add(
+                embeddings=[embedding],
+                documents=[chunk["text"]],
+                metadatas=[{
+                    "document_id": doc_id,
+                    "module_id": metadata.get("subjectId", ""),
+                    "lesson_id": f"lesson-{doc_id}",
+                    "title": metadata.get("subject", ""),
+                    "subject": metadata.get("subject", ""),
+                    "subjectId": metadata.get("subjectId", ""),
+                    "quarter": metadata.get("quarter", 1),
+                    "competency_code": metadata.get("competency_code", ""),
+                    "content_domain": chunk["content_domain"],
+                    "chunk_type": chunk["chunk_type"],
+                    "source_file": storage_path.split("/")[-1],
+                    "storage_path": storage_path,
+                    "page": chunk["estimated_page"],
+                    "chunk_index": chunk["chunk_index"],
+                    "type": metadata.get("type", ""),
+                }],
+                ids=[chunk_id],
+            )
+        if db:
+            try:
+                doc_ref.set({
+                    "id": doc_id,
+                    "storagePath": storage_path,
+                    "status": "ingested",
+                    "ingestedAt": __import__("firebase_admin").firestore.SERVER_TIMESTAMP,
+                    "chunkCount": len(chunks),
+                    **metadata,
+                }, merge=True)
+            except Exception as e:
+                logger.warning("Firestore update failed: %s", e)
+        logger.info("[OK] Ingested %s (%d chunks)", storage_path, len(chunks))
+        ingested_count += 1
+    logger.info("=" * 50)
+    logger.info("Ingestion complete: %d ingested, %d skipped, %d errors", ingested_count, skipped_count, error_count)
+    logger.info("Total chunks in ChromaDB: %d", collection.count())
+if __name__ == "__main__":
+    import argparse
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+    parser = argparse.ArgumentParser(description="Ingest curriculum PDFs from Firebase Storage into ChromaDB")
+    parser.add_argument("--force", action="store_true", help="Re-ingest even if already ingested")
+    args = parser.parse_args()
+    ingest_from_firebase_storage(force_reindex=args.force)

scripts/register_firestore_metadata.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+Register curriculum document metadata in Firestore.
+Populates the curriculumDocuments collection so the app can display
+lessons mapped to their source PDFs before ingestion.
+Run: python backend/scripts/register_firestore_metadata.py
+"""
+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+def _get_firestore_client():
+    try:
+        import firebase_admin
+        from firebase_admin import firestore
+        if not firebase_admin._apps:
+            sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
+            sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
+            bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
+            if sa_json:
+                import json as _json
+                from firebase_admin import credentials
+                creds = credentials.Certificate(_json.loads(sa_json))
+                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
+            elif sa_file and Path(sa_file).exists():
+                from firebase_admin import credentials
+                creds = credentials.Certificate(sa_file)
+                firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
+            else:
+                firebase_admin.initialize_app(options={"storageBucket": bucket_name})
+        return firestore.client()
+    except Exception as e:
+        print(f"Firestore init failed: {e}")
+        return None
+CURRICULUM_DOCUMENTS = [
+    {
+        "id": "gm_lesson_1",
+        "moduleId": "gm-q1-business-finance",
+        "lessonId": "gm-q1-bf-1",
+        "title": "Represent business transactions and financial goals using variables and equations.",
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "quarter": 1,
+        "competencyCode": "GM11-BF-1",
+        "learningCompetency": "Represent business transactions and financial goals using variables and equations.",
+        "storagePath": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
+        "status": "uploaded",
+    },
+    {
+        "id": "gm_navotas_lesson_1",
+        "moduleId": "gm-q1-patterns-sequences-series",
+        "lessonId": "gm-q1-pss-1",
+        "title": "Identify and describe arithmetic and geometric patterns in data.",
+        "subject": "General Mathematics",
+        "subjectId": "gen-math",
+        "quarter": 1,
+        "competencyCode": "GM11-PSS-1",
+        "learningCompetency": "Identify and describe arithmetic and geometric patterns in data.",
+        "storagePath": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
+        "status": "uploaded",
+    },
+    {
+        "id": "bm_lesson_1",
+        "moduleId": "bm-q1-business-math",
+        "lessonId": "bm-q1-1",
+        "title": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
+        "subject": "Business Mathematics",
+        "subjectId": "business-math",
+        "quarter": 1,
+        "competencyCode": "ABM_BM11BS-Ia-b-1",
+        "learningCompetency": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
+        "storagePath": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
+        "status": "uploaded",
+    },
+    {
+        "id": "stat_lesson_1",
+        "moduleId": "stat-q1-probability",
+        "lessonId": "stat-q1-1",
+        "title": "Define and describe random variables and their types.",
+        "subject": "Statistics and Probability",
+        "subjectId": "stats-prob",
+        "quarter": 1,
+        "competencyCode": "SP_SHS11-Ia-1",
+        "learningCompetency": "Define and describe random variables and their types.",
+        "storagePath": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf",
+        "status": "uploaded",
+    },
+    {
+        "id": "fm1_lesson_1",
+        "moduleId": "fm1-q1-counting",
+        "lessonId": "fm1-q1-fpc-1",
+        "title": "Apply the fundamental counting principle in contextual problems.",
+        "subject": "Finite Mathematics 1",
+        "subjectId": "finite-math-1",
+        "quarter": 1,
+        "competencyCode": "FM1-SHS11-Ia-1",
+        "learningCompetency": "Apply the fundamental counting principle in contextual problems.",
+        "storagePath": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
+        "status": "uploaded",
+    },
+    {
+        "id": "fm2_lesson_1",
+        "moduleId": "fm2-q1-matrices",
+        "lessonId": "fm2-q1-matrices-1",
+        "title": "Represent contextual data using matrix notation.",
+        "subject": "Finite Mathematics 2",
+        "subjectId": "finite-math-2",
+        "quarter": 1,
+        "competencyCode": "FM2-SHS11-Ia-1",
+        "learningCompetency": "Represent contextual data using matrix notation.",
+        "storagePath": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
+        "status": "uploaded",
+    },
+    {
+        "id": "org_mgmt_lesson_1",
+        "moduleId": "org-mgmt-q1",
+        "lessonId": "org-mgmt-q1-1",
+        "title": "Understand the fundamental concepts of organization and management.",
+        "subject": "Organization and Management",
+        "subjectId": "org-mgmt",
+        "quarter": 1,
+        "competencyCode": "ABM_OM11-Ia-1",
+        "learningCompetency": "Understand the fundamental concepts of organization and management.",
+        "storagePath": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
+        "status": "uploaded",
+    },
+]
+def register_metadata(force: bool = False):
+    db = _get_firestore_client()
+    if db is None:
+        print("ERROR: Cannot connect to Firestore. Check credentials.")
+        print("Set FIREBASE_SERVICE_ACCOUNT_JSON or place mathpulse-sa.json in backend/ directory.")
+        return
+    print("Connected to Firestore.")
+    print("-" * 50)
+    registered = 0
+    skipped = 0
+    updated = 0
+    for doc in CURRICULUM_DOCUMENTS:
+        doc_id = doc["id"]
+        doc_ref = db.collection("curriculumDocuments").document(doc_id)
+        existing = doc_ref.get()
+        if existing.exists and not force:
+            print(f"[SKIP] {doc_id} already registered")
+            skipped += 1
+            continue
+        if existing.exists and force:
+            updated += 1
+        else:
+            registered += 1
+        data = {
+            **doc,
+            "uploadedAt": None,
+        }
+        doc_ref.set(data, merge=True)
+        print(f"[OK]  {'Updated' if force and existing.exists else 'Registered'} {doc_id} -> {doc.get('storagePath')}")
+    print("-" * 50)
+    print(f"Done: {registered} registered, {skipped} skipped, {updated} updated.")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Register curriculum document metadata in Firestore")
+    parser.add_argument("--force", action="store_true", help="Overwrite existing records")
+    args = parser.parse_args()
+    register_metadata(force=args.force)

scripts/upload_curriculum_pdfs.py ADDED Viewed

	@@ -0,0 +1,264 @@

+"""
+Upload DepEd curriculum PDFs to Firebase Storage.
+Run once during initial setup: python scripts/upload_curriculum_pdfs.py
+"""
+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+from typing import Dict, List
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+LOCAL_PDF_DIR = r"C:\Users\Deign\Downloads\Documents"
+PDF_METADATA: Dict[str, Dict[str, object]] = {
+    "GENERAL-MATHEMATICS-1.pdf": {
+        "subject": "General Mathematics",
+        "type": "curriculum_guide",
+        "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
+        "quarters": ["Q1", "Q2", "Q3", "Q4"],
+        "storage_path": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
+    },
+    "Finite-Mathematics-1-1.pdf": {
+        "subject": "Finite Mathematics 1",
+        "type": "curriculum_guide",
+        "strand": ["STEM", "ABM"],
+        "quarters": ["Q1", "Q2"],
+        "storage_path": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
+    },
+    "Finite-Mathematics-2-1.pdf": {
+        "subject": "Finite Mathematics 2",
+        "type": "curriculum_guide",
+        "strand": ["STEM", "ABM"],
+        "quarters": ["Q1", "Q2"],
+        "storage_path": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
+    },
+    "SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
+        "subject": "General Mathematics",
+        "type": "sdo_module",
+        "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
+        "quarters": ["Q1", "Q2"],
+        "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
+    },
+    "SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": {
+        "subject": "Business Mathematics",
+        "type": "sdo_module",
+        "strand": ["ABM"],
+        "quarters": ["Q1", "Q2"],
+        "storage_path": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
+    },
+    "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": {
+        "subject": "Organization and Management",
+        "type": "sdo_module",
+        "strand": ["ABM"],
+        "quarters": ["Q1", "Q2"],
+        "storage_path": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
+    },
+    "SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf": {
+        "subject": "Statistics and Probability",
+        "type": "sdo_module",
+        "strand": ["STEM", "ABM"],
+        "quarters": ["Q1", "Q2"],
+        "storage_path": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf",
+    },
+}
+def chunk_text(text: str, chunk_size: int = 600, overlap: int = 100) -> List[str]:
+    """Split text into overlapping chunks."""
+    words = text.split()
+    chunks: List[str] = []
+    i = 0
+    while i < len(words):
+        chunk = " ".join(words[i : i + chunk_size])
+        chunks.append(chunk)
+        i += chunk_size - overlap
+    return chunks
+def upload_pdfs():
+    """Upload PDFs from local directory to Firebase Storage."""
+    try:
+        import firebase_admin
+        from firebase_admin import credentials, storage, firestore
+    except ImportError:
+        print("ERROR: firebase-admin not installed. Run: pip install firebase-admin")
+        return
+    service_account_path = Path(__file__).resolve().parents[1] / "serviceAccountKey.json"
+    if not service_account_path.exists():
+        print(f"ERROR: Service account key not found at {service_account_path}")
+        return
+    bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "").strip()
+    if not bucket_name:
+        print("ERROR: FIREBASE_STORAGE_BUCKET not set in environment")
+        return
+    cred = credentials.Certificate(str(service_account_path))
+    firebase_admin.initialize_app(cred, {"storageBucket": bucket_name})
+    bucket = storage.bucket()
+    db = firestore.client()
+    print(f"Scanning: {LOCAL_PDF_DIR}")
+    print("-" * 50)
+    uploaded = 0
+    skipped = 0
+    for filename, meta in PDF_METADATA.items():
+        local_path = Path(LOCAL_PDF_DIR) / filename
+        if not local_path.exists():
+            print(f"[SKIP] {filename} not found in {LOCAL_PDF_DIR}")
+            skipped += 1
+            continue
+        doc_ref = db.collection("curriculumDocs").document(filename)
+        if doc_ref.get().exists:
+            print(f"[SKIP] {filename} already uploaded")
+            skipped += 1
+            continue
+        try:
+            blob = bucket.blob(meta["storage_path"])
+            blob.upload_from_filename(str(local_path), content_type="application/pdf")
+            doc_ref.set(
+                {
+                    "filename": filename,
+                    "subject": meta["subject"],
+                    "type": meta["type"],
+                    "strand": meta["strand"],
+                    "quarters": meta["quarters"],
+                    "storage_path": meta["storage_path"],
+                    "uploaded_at": firestore.SERVER_TIMESTAMP,
+                    "indexed": False,
+                }
+            )
+            print(f"[OK] Uploaded {filename}")
+            uploaded += 1
+        except Exception as e:
+            print(f"[ERROR] {filename}: {e}")
+    print("-" * 50)
+    print(f"Upload complete: {uploaded} uploaded, {skipped} skipped")
+def index_pdfs():
+    """Extract text from PDFs, chunk, embed, and store in ChromaDB."""
+    try:
+        from pypdf import PdfReader
+        import chromadb
+        from sentence_transformers import SentenceTransformer
+        from firebase_admin import firestore
+    except ImportError:
+        print("ERROR: Missing dependencies. Run: pip install pypdf chromadb sentence-transformers firebase-admin")
+        return
+    chroma_path = os.getenv("CHROMA_PERSIST_PATH", "./datasets/vectorstore")
+    chroma_client = chromadb.PersistentClient(path=chroma_path)
+    collection = chroma_client.get_or_create_collection(
+        name="curriculum_chunks",
+        metadata={"hnsw:space": "cosine"},
+    )
+    embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
+    try:
+        import firebase_admin
+        from firebase_admin import firestore as FS
+        db = FS.client()
+    except Exception:
+        db = None
+    print(f"Indexing PDFs from: {LOCAL_PDF_DIR}")
+    print("-" * 50)
+    indexed = 0
+    skipped = 0
+    for filename, meta in PDF_METADATA.items():
+        if db:
+            doc_ref = db.collection("curriculumDocs").document(filename)
+            doc = doc_ref.get()
+            if doc and doc.to_dict().get("indexed", False):
+                print(f"[SKIP] {filename} already indexed")
+                skipped += 1
+                continue
+        local_path = Path(LOCAL_PDF_DIR) / filename
+        if not local_path.exists():
+            print(f"[SKIP] {filename} not found")
+            skipped += 1
+            continue
+        try:
+            reader = PdfReader(str(local_path))
+            full_text = "\n".join(page.extract_text() or "" for page in reader.pages)
+            if not full_text.strip():
+                print(f"[WARN] {filename} has no extractable text")
+                continue
+            chunks = chunk_text(full_text)
+            print(f"[INFO] {filename} -> {len(chunks)} chunks")
+            for i, chunk in enumerate(chunks):
+                chunk_id = f"{filename}_chunk_{i}"
+                existing = collection.get(ids=[chunk_id])
+                if existing and existing.get("ids"):
+                    continue
+                chunk_embedding = embedder.encode(
+                    chunk,
+                    normalize_embeddings=True,
+                ).tolist()
+                collection.add(
+                    embeddings=[chunk_embedding],
+                    documents=[chunk],
+                    metadatas=[
+                        {
+                            "source_file": filename,
+                            "subject": meta["subject"],
+                            "strand": ",".join(meta["strand"]),
+                            "quarter": ",".join(meta["quarters"]),
+                            "chunk_index": i,
+                            "type": meta["type"],
+                        }
+                    ],
+                    ids=[chunk_id],
+                )
+            if db:
+                doc_ref.update({"indexed": True})
+            print(f"[OK] Indexed {filename}")
+            indexed += 1
+        except Exception as e:
+            print(f"[ERROR] {filename}: {e}")
+    print("-" * 50)
+    print(f"Indexing complete: {indexed} indexed, {skipped} skipped")
+    print(f"Total chunks in ChromaDB: {collection.count()}")
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Upload and index DepEd curriculum PDFs")
+    parser.add_argument("action", choices=["upload", "index", "both"], help="Action to perform")
+    args = parser.parse_args()
+    if args.action in ("upload", "both"):
+        upload_pdfs()
+    if args.action in ("index", "both"):
+        index_pdfs()

services/__init__.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""Backend service helpers for inference, logging, and integrations."""
+from .inference_client import (
+    create_default_client,
+    InferenceRequest,
+    InferenceClient,
+    is_sequential_model,
+    get_current_runtime_config,
+    get_model_for_task,
+    set_runtime_model_profile,
+    set_runtime_model_override,
+    reset_runtime_overrides,
+    model_supports_thinking,
+    _MODEL_PROFILES,
+)
+from .ai_client import (
+    get_deepseek_client,
+    CHAT_MODEL,
+    REASONER_MODEL,
+    APIError,
+    RateLimitError,
+    APITimeoutError,
+)
+__all__ = [
+    "create_default_client",
+    "InferenceRequest",
+    "InferenceClient",
+    "is_sequential_model",
+    "get_current_runtime_config",
+    "get_model_for_task",
+    "set_runtime_model_profile",
+    "set_runtime_model_override",
+    "reset_runtime_overrides",
+    "model_supports_thinking",
+    "_MODEL_PROFILES",
+    "get_deepseek_client",
+    "CHAT_MODEL",
+    "REASONER_MODEL",
+    "APIError",
+    "RateLimitError",
+    "APITimeoutError",
+]

services/ai_client.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+from openai import OpenAI, APIError, RateLimitError, APITimeoutError
+from functools import lru_cache
+__all__ = [
+    "get_deepseek_client",
+    "CHAT_MODEL",
+    "REASONER_MODEL",
+    "DEEPSEEK_BASE_URL",
+    "APIError",
+    "RateLimitError",
+    "APITimeoutError",
+]
+DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
+CHAT_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
+REASONER_MODEL = os.getenv("DEEPSEEK_REASONER_MODEL", "deepseek-reasoner")
+@lru_cache(maxsize=1)
+def get_deepseek_client() -> OpenAI:
+    api_key = os.getenv("DEEPSEEK_API_KEY")
+    if not api_key:
+        raise ValueError("DEEPSEEK_API_KEY environment variable not set")
+    return OpenAI(
+        api_key=api_key,
+        base_url=DEEPSEEK_BASE_URL,
+    )

services/deterministic_cache.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import json
+import time
+from collections import OrderedDict
+from dataclasses import dataclass
+from hashlib import sha256
+from threading import Lock
+from typing import Any, Dict, Optional
+try:
+    import redis.asyncio as redis_async  # type: ignore[import-not-found]
+except Exception:  # pragma: no cover - optional dependency
+    redis_async = None  # type: ignore[assignment]
+@dataclass
+class _CacheRecord:
+    value: Any
+    expires_at: float
+class DeterministicResponseCache:
+    """TTL + LRU response cache with optional Redis backing.
+    - Local cache is always used for fast lookups.
+    - Redis is optional and fail-open.
+    - Values are normalized through JSON roundtrip to keep payloads serializable.
+    """
+    def __init__(
+        self,
+        *,
+        enabled: bool,
+        max_entries: int,
+        redis_url: Optional[str] = None,
+        redis_prefix: str = "mathpulse:det-cache:",
+        logger: Any = None,
+    ) -> None:
+        self.enabled = bool(enabled)
+        self.max_entries = max(1, int(max_entries))
+        self.redis_prefix = redis_prefix
+        self.logger = logger
+        self._lock = Lock()
+        self._local: OrderedDict[str, _CacheRecord] = OrderedDict()
+        self._redis = None
+        if self.enabled and redis_url and redis_async is not None:
+            try:
+                self._redis = redis_async.from_url(redis_url, encoding="utf-8", decode_responses=True)
+            except Exception as err:
+                self._warn(f"Redis cache disabled: failed to initialize client: {err}")
+                self._redis = None
+    def build_cache_key(self, namespace: str, payload: Dict[str, Any]) -> str:
+        canonical = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str, ensure_ascii=True)
+        digest = sha256(canonical.encode("utf-8")).hexdigest()
+        return f"{namespace}:{digest}"
+    async def get(self, key: str) -> Optional[Any]:
+        if not self.enabled:
+            return None
+        local_hit = self._get_local(key)
+        if local_hit is not None:
+            return local_hit
+        if self._redis is None:
+            return None
+        redis_key = self._redis_key(key)
+        try:
+            raw = await self._redis.get(redis_key)
+            if raw is None:
+                return None
+            decoded = json.loads(raw)
+            ttl_seconds = await self._redis.ttl(redis_key)
+            if isinstance(ttl_seconds, int) and ttl_seconds > 0:
+                self._set_local(key, decoded, ttl_seconds)
+            return decoded
+        except Exception as err:
+            self._warn(f"Redis cache get failed for {key}: {err}")
+            return None
+    async def set(self, key: str, value: Any, ttl_seconds: int) -> None:
+        if not self.enabled:
+            return
+        ttl = int(ttl_seconds)
+        if ttl <= 0:
+            return
+        normalized_value = self._normalize(value)
+        self._set_local(key, normalized_value, ttl)
+        if self._redis is None:
+            return
+        redis_key = self._redis_key(key)
+        try:
+            await self._redis.set(redis_key, json.dumps(normalized_value, separators=(",", ":"), default=str), ex=ttl)
+        except Exception as err:
+            self._warn(f"Redis cache set failed for {key}: {err}")
+    async def clear(self) -> None:
+        with self._lock:
+            self._local.clear()
+    def _normalize(self, value: Any) -> Any:
+        # Keep payloads immutable enough for cache semantics and JSON-safe for Redis.
+        return json.loads(json.dumps(value, default=str))
+    def _redis_key(self, key: str) -> str:
+        return f"{self.redis_prefix}{key}"
+    def _get_local(self, key: str) -> Optional[Any]:
+        now = time.time()
+        with self._lock:
+            self._prune_locked(now)
+            record = self._local.get(key)
+            if record is None:
+                return None
+            if record.expires_at <= now:
+                self._local.pop(key, None)
+                return None
+            self._local.move_to_end(key, last=True)
+            return record.value
+    def _set_local(self, key: str, value: Any, ttl_seconds: int) -> None:
+        expires_at = time.time() + ttl_seconds
+        with self._lock:
+            self._prune_locked(time.time())
+            self._local[key] = _CacheRecord(value=value, expires_at=expires_at)
+            self._local.move_to_end(key, last=True)
+            while len(self._local) > self.max_entries:
+                self._local.popitem(last=False)
+    def _prune_locked(self, now: float) -> None:
+        expired_keys = [cache_key for cache_key, record in self._local.items() if record.expires_at <= now]
+        for cache_key in expired_keys:
+            self._local.pop(cache_key, None)
+    def _warn(self, message: str) -> None:
+        if self.logger is not None:
+            self.logger.warning(message)

services/email_service.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import base64
+import logging
+import os
+import smtplib
+import json
+from dataclasses import dataclass
+from email.message import EmailMessage
+from typing import Optional, Protocol
+import requests
+logger = logging.getLogger("mathpulse")
+def _first_nonempty_env(*names: str) -> str:
+    for name in names:
+        value = os.getenv(name, "")
+        if value and value.strip():
+            return value.strip()
+    return ""
+def _parse_int_env(value: str, default: int, *, env_name: str) -> int:
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        logger.warning("Invalid %s value '%s'; using default %s", env_name, value, default)
+        return default
+    if parsed <= 0:
+        logger.warning("Invalid %s value '%s'; using default %s", env_name, value, default)
+        return default
+    return parsed
+def _extract_brevo_api_key(raw_value: str) -> str:
+    value = (raw_value or "").strip()
+    if not value:
+        return ""
+    # Standard Brevo transactional API key format.
+    if value.startswith("xkeysib-"):
+        return value
+    parse_candidates = [value]
+    # Brevo MCP token is often base64-encoded JSON containing {"api_key": "xkeysib-..."}.
+    try:
+        padded = value + ("=" * (-len(value) % 4))
+        decoded = base64.urlsafe_b64decode(padded.encode("utf-8"))
+        decoded_text = decoded.decode("utf-8").strip()
+        if decoded_text:
+            parse_candidates.append(decoded_text)
+    except (ValueError, UnicodeDecodeError):
+        pass
+    for candidate in parse_candidates:
+        try:
+            payload = json.loads(candidate)
+        except json.JSONDecodeError:
+            continue
+        if isinstance(payload, dict):
+            api_key = str(
+                payload.get("api_key") or payload.get("apiKey") or payload.get("api-key") or ""
+            ).strip()
+            if api_key:
+                return api_key
+    return ""
+def _resolve_brevo_api_key_from_env() -> str:
+    configured_value = _first_nonempty_env("BREVO_API_KEY", "BREVO_API_TOKEN")
+    configured_key = _extract_brevo_api_key(configured_value)
+    if configured_key:
+        if configured_value and configured_value != configured_key:
+            logger.info("Resolved Brevo API key from BREVO_API_KEY/BREVO_API_TOKEN payload.")
+        return configured_key
+    mcp_token_value = _first_nonempty_env("BREVO_MCP_TOKEN")
+    mcp_key = _extract_brevo_api_key(mcp_token_value)
+    if mcp_key:
+        logger.info("Resolved Brevo API key from BREVO_MCP_TOKEN.")
+        return mcp_key
+    if mcp_token_value:
+        logger.warning("BREVO_MCP_TOKEN is set but did not contain a usable API key payload.")
+    return ""
+@dataclass
+class EmailMessagePayload:
+    to_name: str
+    to_email: str
+    subject: str
+    html_content: str
+    text_content: str
+@dataclass
+class EmailSendResult:
+    success: bool
+    provider: str
+    message_id: Optional[str] = None
+    error_code: Optional[str] = None
+    error_message: Optional[str] = None
+    retryable: bool = False
+class EmailProvider(Protocol):
+    def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
+        ...
+class BrevoApiEmailProvider:
+    def __init__(self, api_key: str, from_address: str, from_name: str, timeout_sec: int = 15) -> None:
+        self._api_key = api_key
+        self._from_address = from_address
+        self._from_name = from_name
+        self._timeout_sec = timeout_sec
+    def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
+        try:
+            response = requests.post(
+                "https://api.brevo.com/v3/smtp/email",
+                headers={
+                    "accept": "application/json",
+                    "content-type": "application/json",
+                    "api-key": self._api_key,
+                },
+                json={
+                    "sender": {
+                        "name": self._from_name,
+                        "email": self._from_address,
+                    },
+                    "to": [
+                        {
+                            "name": message.to_name,
+                            "email": message.to_email,
+                        }
+                    ],
+                    "subject": message.subject,
+                    "htmlContent": message.html_content,
+                    "textContent": message.text_content,
+                },
+                timeout=self._timeout_sec,
+            )
+            if 200 <= response.status_code < 300:
+                payload = response.json() if response.content else {}
+                message_id = str(payload.get("messageId") or payload.get("message_id") or "").strip() or None
+                return EmailSendResult(success=True, provider="brevo_api", message_id=message_id)
+            error_message = response.text[:400]
+            retryable = response.status_code in {408, 429, 500, 502, 503, 504}
+            logger.warning(
+                "Brevo API email send failed (status=%s, retryable=%s)",
+                response.status_code,
+                retryable,
+            )
+            return EmailSendResult(
+                success=False,
+                provider="brevo_api",
+                error_code=f"http_{response.status_code}",
+                error_message=error_message,
+                retryable=retryable,
+            )
+        except requests.RequestException as exc:
+            logger.warning("Brevo API email send request exception: %s", exc)
+            return EmailSendResult(
+                success=False,
+                provider="brevo_api",
+                error_code="request_exception",
+                error_message=str(exc),
+                retryable=True,
+            )
+class BrevoSmtpEmailProvider:
+    def __init__(
+        self,
+        smtp_host: str,
+        smtp_port: int,
+        smtp_login: str,
+        smtp_key: str,
+        from_address: str,
+        from_name: str,
+        timeout_sec: int = 15,
+    ) -> None:
+        self._smtp_host = smtp_host
+        self._smtp_port = smtp_port
+        self._smtp_login = smtp_login
+        self._smtp_key = smtp_key
+        self._from_address = from_address
+        self._from_name = from_name
+        self._timeout_sec = timeout_sec
+    def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
+        mime = EmailMessage()
+        mime["Subject"] = message.subject
+        mime["From"] = f"{self._from_name} <{self._from_address}>"
+        mime["To"] = f"{message.to_name} <{message.to_email}>" if message.to_name else message.to_email
+        mime.set_content(message.text_content)
+        mime.add_alternative(message.html_content, subtype="html")
+        try:
+            with smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=self._timeout_sec) as smtp:
+                smtp.ehlo()
+                smtp.starttls()
+                smtp.login(self._smtp_login, self._smtp_key)
+                smtp.send_message(mime)
+            return EmailSendResult(success=True, provider="brevo_smtp")
+        except (smtplib.SMTPException, OSError) as exc:
+            logger.warning("Brevo SMTP email send failed: %s", exc)
+            return EmailSendResult(
+                success=False,
+                provider="brevo_smtp",
+                error_code="smtp_error",
+                error_message=str(exc),
+                retryable=True,
+            )
+class EmailService:
+    def __init__(self, primary_provider: Optional[EmailProvider], fallback_provider: Optional[EmailProvider] = None) -> None:
+        self._primary_provider = primary_provider
+        self._fallback_provider = fallback_provider
+    def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
+        if not self._primary_provider and not self._fallback_provider:
+            return EmailSendResult(
+                success=False,
+                provider="none",
+                error_code="email_not_configured",
+                error_message="Email sending is not configured in this environment.",
+                retryable=False,
+            )
+        primary_result: Optional[EmailSendResult] = None
+        if self._primary_provider:
+            primary_result = self._primary_provider.send_transactional_email(message)
+            if primary_result.success:
+                return primary_result
+        if self._fallback_provider:
+            fallback_result = self._fallback_provider.send_transactional_email(message)
+            if fallback_result.success:
+                return fallback_result
+            if primary_result:
+                return EmailSendResult(
+                    success=False,
+                    provider=f"{primary_result.provider}+{fallback_result.provider}",
+                    error_code=primary_result.error_code or fallback_result.error_code,
+                    error_message=primary_result.error_message or fallback_result.error_message,
+                    retryable=bool(primary_result.retryable or fallback_result.retryable),
+                )
+            return fallback_result
+        return primary_result or EmailSendResult(
+            success=False,
+            provider="none",
+            error_code="unknown_email_error",
+            error_message="Email provider failed with unknown error.",
+            retryable=False,
+        )
+def create_email_service_from_env() -> EmailService:
+    from_address = _first_nonempty_env("MAIL_FROM_ADDRESS", "MAIL_FROM", "BREVO_FROM_ADDRESS") or "noreply@mathpulse.ai"
+    from_name = _first_nonempty_env("MAIL_FROM_NAME", "BREVO_FROM_NAME") or "MathPulse AI"
+    timeout_raw = _first_nonempty_env("MAIL_SEND_TIMEOUT_SEC") or "15"
+    timeout_sec = max(5, _parse_int_env(timeout_raw, 15, env_name="MAIL_SEND_TIMEOUT_SEC"))
+    brevo_api_key = _resolve_brevo_api_key_from_env()
+    smtp_login = _first_nonempty_env("BREVO_SMTP_LOGIN", "BREVO_SMTP_USERNAME", "BREVO_SMTP_USER")
+    smtp_key = _first_nonempty_env("BREVO_SMTP_KEY", "BREVO_SMTP_PASSWORD", "BREVO_SMTP_PASS")
+    smtp_host = _first_nonempty_env("BREVO_SMTP_HOST") or "smtp-relay.brevo.com"
+    smtp_port_raw = _first_nonempty_env("BREVO_SMTP_PORT") or "587"
+    smtp_port = _parse_int_env(smtp_port_raw, 587, env_name="BREVO_SMTP_PORT")
+    primary_provider: Optional[EmailProvider] = None
+    fallback_provider: Optional[EmailProvider] = None
+    if brevo_api_key:
+        primary_provider = BrevoApiEmailProvider(
+            api_key=brevo_api_key,
+            from_address=from_address,
+            from_name=from_name,
+            timeout_sec=timeout_sec,
+        )
+    if smtp_login and smtp_key:
+        smtp_provider = BrevoSmtpEmailProvider(
+            smtp_host=smtp_host,
+            smtp_port=smtp_port,
+            smtp_login=smtp_login,
+            smtp_key=smtp_key,
+            from_address=from_address,
+            from_name=from_name,
+            timeout_sec=timeout_sec,
+        )
+        if primary_provider is None:
+            primary_provider = smtp_provider
+        else:
+            fallback_provider = smtp_provider
+    if smtp_login and not smtp_key:
+        logger.warning("BREVO_SMTP_LOGIN is set but SMTP key/password is missing.")
+    if smtp_key and not smtp_login:
+        logger.warning("SMTP key/password is set but BREVO_SMTP_LOGIN is missing.")
+    mode_parts = []
+    if brevo_api_key:
+        mode_parts.append("brevo_api")
+    if smtp_login and smtp_key:
+        mode_parts.append("brevo_smtp")
+    if mode_parts:
+        logger.info(
+            "Email service configured (%s) from=%s smtp=%s:%s",
+            "+".join(mode_parts),
+            from_address,
+            smtp_host,
+            smtp_port,
+        )
+    else:
+        logger.warning(
+            "Email service is not configured. Set BREVO_API_KEY/BREVO_API_TOKEN, BREVO_MCP_TOKEN, or BREVO_SMTP_LOGIN + BREVO_SMTP_KEY/BREVO_SMTP_PASSWORD."
+        )
+    return EmailService(primary_provider=primary_provider, fallback_provider=fallback_provider)

services/email_templates.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import html
+from dataclasses import dataclass
+from urllib.parse import urlparse
+WELCOME_SUBJECT = "Welcome to MathPulse AI - Your Account Details"
+ACCENT_COLOR = "#9956DE"
+@dataclass
+class WelcomeCredentialsEmailContext:
+    recipient_name: str
+    login_email: str
+    temporary_password: str
+    role: str
+    login_url: str
+    brand_avatar_url: str = ""
+    recipient_avatar_url: str = ""
+def _normalize_display_name(name: str) -> str:
+    cleaned = (name or "").strip()
+    return cleaned or "Learner"
+def _normalize_http_url(url: str) -> str:
+    candidate = (url or "").strip()
+    if not candidate:
+        return ""
+    parsed = urlparse(candidate)
+    if parsed.scheme.lower() not in {"http", "https"}:
+        return ""
+    if not parsed.netloc:
+        return ""
+    return candidate
+def build_welcome_credentials_email(context: WelcomeCredentialsEmailContext) -> dict:
+    recipient_name = _normalize_display_name(context.recipient_name)
+    login_email = (context.login_email or "").strip()
+    temporary_password = (context.temporary_password or "").strip()
+    role = (context.role or "").strip() or "User"
+    login_url = _normalize_http_url(context.login_url) or "https://mathpulse.ai"
+    brand_avatar_url = _normalize_http_url(context.brand_avatar_url)
+    recipient_avatar_url = _normalize_http_url(context.recipient_avatar_url)
+    escaped_name = html.escape(recipient_name)
+    escaped_email = html.escape(login_email)
+    escaped_password = html.escape(temporary_password)
+    escaped_role = html.escape(role)
+    escaped_url = html.escape(login_url, quote=True)
+    escaped_brand_avatar_url = html.escape(brand_avatar_url, quote=True)
+    escaped_recipient_avatar_url = html.escape(recipient_avatar_url, quote=True)
+    recipient_initial = html.escape((recipient_name[:1] or "U").upper())
+    if escaped_brand_avatar_url:
+        brand_avatar_markup = (
+            f'<img src="{escaped_brand_avatar_url}" width="46" height="46" alt="MathPulse avatar" '
+            'style="display:block;width:46px;height:46px;border-radius:50%;background:#ffffff;border:2px solid rgba(255,255,255,0.65);" />'
+        )
+    else:
+        brand_avatar_markup = (
+            '<div style="width:46px;height:46px;border-radius:50%;background:#1b1331;color:#f5ebff;'
+            'font-size:16px;font-weight:800;line-height:46px;text-align:center;border:2px solid rgba(255,255,255,0.4);">MP</div>'
+        )
+    if escaped_recipient_avatar_url:
+        recipient_avatar_markup = (
+            f'<img src="{escaped_recipient_avatar_url}" width="54" height="54" alt="Learner avatar" '
+            'style="display:block;width:54px;height:54px;border-radius:50%;background:#1f2937;border:1px solid #49537a;" />'
+        )
+    else:
+        recipient_avatar_markup = (
+            '<div style="width:54px;height:54px;border-radius:50%;background:#233e74;color:#f8fafc;'
+            f'font-size:22px;font-weight:700;line-height:54px;text-align:center;">{recipient_initial}</div>'
+        )
+    html_content = f"""
+<!DOCTYPE html>
+<html lang=\"en\">
+<head>
+  <meta charset=\"UTF-8\" />
+  <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />
+  <title>{WELCOME_SUBJECT}</title>
+</head>
+<body style=\"margin:0;padding:0;background:#0f1220;font-family:Segoe UI,Arial,sans-serif;color:#e5e7eb;\">
+  <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"padding:24px 12px;background:#0f1220;\">
+    <tr>
+      <td align=\"center\">
+        <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"max-width:640px;background:#181d2f;border-radius:18px;overflow:hidden;border:1px solid #343e62;\">
+          <tr>
+            <td style=\"background:{ACCENT_COLOR};padding:14px 22px;\">
+              <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\">
+                <tr>
+                  <td width=\"52\" valign=\"middle\" style=\"width:52px;\">{brand_avatar_markup}</td>
+                  <td valign=\"middle\" style=\"padding-left:10px;\">
+                    <p style=\"margin:0;color:#1f1238;font-size:20px;font-weight:800;line-height:1.15;\">MathPulse AI</p>
+                    <p style=\"margin:2px 0 0 0;color:#2f1d50;font-size:12px;font-weight:600;line-height:1.4;\">Learning Platform Account Access</p>
+                  </td>
+                </tr>
+              </table>
+            </td>
+          </tr>
+          <tr>
+            <td style=\"padding:24px;\">
+              <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"margin:0 0 14px 0;\">
+                <tr>
+                  <td width=\"62\" valign=\"top\" style=\"width:62px;padding-right:12px;\">{recipient_avatar_markup}</td>
+                  <td valign=\"top\">
+                    <p style=\"margin:0 0 8px 0;font-size:16px;color:#f3f4f6;\">Hello {escaped_name},</p>
+                    <p style=\"margin:0;line-height:1.6;color:#d6daeb;\">Welcome to MathPulse AI. Your account has been created by your administrator. Use the credentials below to sign in and begin your learning journey.</p>
+                  </td>
+                </tr>
+              </table>
+              <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"background:#20263b;border:1px solid #445077;border-radius:12px;padding:16px;\">
+                <tr><td style=\"padding:4px 0;font-size:14px;color:#e5e7eb;\"><strong>Email:</strong> <span style=\"color:#93c5fd;\">{escaped_email}</span></td></tr>
+                <tr><td style=\"padding:4px 0;font-size:14px;color:#e5e7eb;\"><strong>Temporary Password:</strong> {escaped_password}</td></tr>
+                <tr><td style=\"padding:4px 0;font-size:14px;color:#e5e7eb;\"><strong>Role:</strong> {escaped_role}</td></tr>
+              </table>
+              <table role=\"presentation\" cellspacing=\"0\" cellpadding=\"0\" style=\"margin:20px 0 14px 0;\">
+                <tr>
+                  <td align=\"center\" bgcolor=\"{ACCENT_COLOR}\" style=\"border-radius:10px;\">
+                    <a href=\"{escaped_url}\" style=\"display:inline-block;padding:12px 20px;color:#1f1238;text-decoration:none;font-weight:700;font-size:14px;\">Log in to MathPulse</a>
+                  </td>
+                </tr>
+              </table>
+              <p style=\"margin:0 0 8px 0;font-size:13px;line-height:1.5;color:#c7d2fe;\">Security note: Please change your password after your first login.</p>
+              <p style=\"margin:0;font-size:12px;line-height:1.5;color:#a8b3d1;\">If you did not expect this email, please contact your administrator.</p>
+            </td>
+          </tr>
+        </table>
+      </td>
+    </tr>
+  </table>
+</body>
+</html>
+""".strip()
+    text_content = (
+        "MathPulse AI\n\n"
+        f"Hello {recipient_name},\n\n"
+        "Welcome to MathPulse AI. Your account has been created by your administrator.\n\n"
+        "Account details:\n"
+        f"- Email: {login_email}\n"
+        f"- Temporary Password: {temporary_password}\n"
+        f"- Role: {role}\n\n"
+        f"Log in here: {login_url}\n\n"
+        "Security note: Please change your password after your first login.\n\n"
+        "If you did not expect this email, please contact your administrator.\n"
+    )
+    return {
+        "subject": WELCOME_SUBJECT,
+        "html": html_content,
+        "text": text_content,
+    }

services/inference_client.py ADDED Viewed

	@@ -0,0 +1,1048 @@

+import os
+import time
+import json
+import re
+import random
+from threading import Lock
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import requests
+import yaml
+from openai import OpenAI, APIError, RateLimitError, APITimeoutError
+from .ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL, DEEPSEEK_BASE_URL
+from .logging_utils import configure_structured_logging, log_model_call
+LOGGER = configure_structured_logging("mathpulse.inference")
+TEMP_CHAT_MODEL_OVERRIDE_ENV = "INFERENCE_CHAT_MODEL_TEMP_OVERRIDE"
+# ── Model Profiles ────────────────────────────────────────────────────────────
+# A profile sets multiple env defaults in one shot.
+# Individual env vars (DEEPSEEK_MODEL, DEEPSEEK_REASONER_MODEL, etc.) still override.
+# Usage: MODEL_PROFILE=dev  or  MODEL_PROFILE=prod  or  MODEL_PROFILE=budget
+# Profiles can also be applied at runtime via the admin panel without restart.
+_MODEL_PROFILES: dict[str, dict[str, str]] = {
+    "dev": {
+        "INFERENCE_MODEL_ID": CHAT_MODEL,
+        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
+        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
+        "HF_RAG_MODEL_ID": CHAT_MODEL,
+        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
+    },
+    "prod": {
+        "INFERENCE_MODEL_ID": CHAT_MODEL,
+        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
+        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
+        "HF_RAG_MODEL_ID": REASONER_MODEL,
+        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
+    },
+    "budget": {
+        "INFERENCE_MODEL_ID": CHAT_MODEL,
+        "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
+        "HF_QUIZ_MODEL_ID": CHAT_MODEL,
+        "HF_RAG_MODEL_ID": CHAT_MODEL,
+        "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
+    },
+}
+# ── Runtime Override Store ────────────────────────────────────────────────────
+# Mutated at runtime by the admin panel via /api/admin/model-config.
+# Priority: above env vars, below INFERENCE_ENFORCE_LOCK_MODEL.
+# Persisted to Firestore so backend cold-restarts restore the last admin-set config.
+_RUNTIME_OVERRIDES: dict[str, str] = {}
+_RUNTIME_PROFILE: str = ""
+_FS_COLLECTION = "system_config"
+_FS_DOC = "active_model_config"
+def _save_runtime_config_to_firestore() -> None:
+    try:
+        from firebase_admin import firestore as fs
+        db = fs.client()
+        db.collection(_FS_COLLECTION).document(_FS_DOC).set(
+            {
+                "profile": _RUNTIME_PROFILE,
+                "overrides": _RUNTIME_OVERRIDES,
+                "updatedAt": fs.SERVER_TIMESTAMP,
+            }
+        )
+    except Exception as e:
+        LOGGER.warning("Could not persist model config to Firestore: %s", e)
+def _load_runtime_config_from_firestore() -> None:
+    try:
+        from firebase_admin import firestore as fs
+        db = fs.client()
+        doc = db.collection(_FS_COLLECTION).document(_FS_DOC).get()
+        if not doc.exists:
+            return
+        data = doc.to_dict() or {}
+        profile = str(data.get("profile", "")).strip().lower()
+        overrides = data.get("overrides", {})
+        if profile and profile in _MODEL_PROFILES:
+            global _RUNTIME_PROFILE
+            _RUNTIME_PROFILE = profile
+            _RUNTIME_OVERRIDES.clear()
+            _RUNTIME_OVERRIDES.update(_MODEL_PROFILES[profile])
+        if isinstance(overrides, dict):
+            for key, value in overrides.items():
+                _RUNTIME_OVERRIDES[str(key)] = str(value)
+        LOGGER.info("Restored runtime model config from Firestore: profile=%s", profile)
+    except ImportError:
+        LOGGER.debug("Firebase not available (optional for DeepSeek-only)")
+    except Exception as e:
+        LOGGER.warning("Could not restore model config from Firestore: %s", e)
+def _apply_model_profile() -> None:
+    profile_name = os.getenv("MODEL_PROFILE", "").strip().lower()
+    if not profile_name:
+        return
+    profile = _MODEL_PROFILES.get(profile_name)
+    if profile is None:
+        LOGGER.warning("MODEL_PROFILE='%s' is not a known profile.", profile_name)
+        return
+    for key, value in profile.items():
+        if not os.environ.get(key):
+            os.environ[key] = value
+    LOGGER.info("Startup model profile applied: %s", profile_name)
+_apply_model_profile()
+_load_runtime_config_from_firestore()
+def set_runtime_model_profile(profile_name: str) -> None:
+    """Apply a named profile at runtime without restarting the process."""
+    global _RUNTIME_PROFILE, _RUNTIME_OVERRIDES
+    normalized = profile_name.strip().lower()
+    profile = _MODEL_PROFILES.get(normalized)
+    if not profile:
+        raise ValueError(
+            f"Unknown profile: '{profile_name}'. Valid values: {list(_MODEL_PROFILES.keys())}"
+        )
+    _RUNTIME_PROFILE = normalized
+    _RUNTIME_OVERRIDES.clear()
+    _RUNTIME_OVERRIDES.update(profile)
+    LOGGER.info("Runtime model profile switched to: %s", profile_name)
+    _save_runtime_config_to_firestore()
+def set_runtime_model_override(key: str, value: str) -> None:
+    """Set a single model env key at runtime."""
+    _RUNTIME_OVERRIDES[key] = value
+    LOGGER.info("Runtime model override set: %s = %s", key, value)
+    _save_runtime_config_to_firestore()
+def reset_runtime_overrides() -> None:
+    """Clear all runtime overrides."""
+    global _RUNTIME_PROFILE
+    _RUNTIME_OVERRIDES.clear()
+    _RUNTIME_PROFILE = ""
+    LOGGER.info("Runtime model overrides cleared.")
+    _save_runtime_config_to_firestore()
+def get_current_runtime_config() -> dict:
+    resolved: dict[str, str] = {}
+    for key in {
+        "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
+        "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
+    }:
+        resolved[key] = _resolve_key(key)
+    return {
+        "profile": _RUNTIME_PROFILE,
+        "overrides": dict(_RUNTIME_OVERRIDES),
+        "resolved": resolved,
+    }
+def _resolve_key(key: str) -> str:
+    if value := _RUNTIME_OVERRIDES.get(key):
+        return value
+    if _RUNTIME_PROFILE and _RUNTIME_PROFILE in _MODEL_PROFILES:
+        if value := _MODEL_PROFILES[_RUNTIME_PROFILE].get(key):
+            return value
+    return os.getenv(key, "")
+def get_model_for_task(task_type: str) -> str:
+    task = (task_type or "default").strip().lower()
+    enforce_lock = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
+    if enforce_lock:
+        override = (
+            _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID")
+            or os.getenv("INFERENCE_LOCK_MODEL_ID")
+            or CHAT_MODEL
+        )
+        return override
+    task_key_map = {
+        "chat": "INFERENCE_CHAT_MODEL_ID",
+        "quiz_generation": "HF_QUIZ_MODEL_ID",
+        "rag_lesson": "HF_RAG_MODEL_ID",
+        "rag_problem": "HF_RAG_MODEL_ID",
+        "rag_analysis_context": "HF_RAG_MODEL_ID",
+    }
+    if env_key := task_key_map.get(task):
+        if resolved := _resolve_key(env_key):
+            return resolved
+    return _resolve_key("INFERENCE_MODEL_ID") or CHAT_MODEL
+def model_supports_thinking(model_id: str = "") -> bool:
+    mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
+    return mid == REASONER_MODEL
+def _normalize_local_space_url(raw_url: str) -> str:
+    """Accept either hf.space host or huggingface.co/spaces URL for local_space provider."""
+    cleaned = (raw_url or "").strip().rstrip("/")
+    if not cleaned:
+        return "http://127.0.0.1:7860"
+    match = re.match(r"^https?://huggingface\.co/spaces/([^/]+)/([^/]+)$", cleaned, re.IGNORECASE)
+    if match:
+        owner = match.group(1).strip().lower()
+        space = match.group(2).strip().lower()
+        return f"https://{owner}-{space}.hf.space"
+    return cleaned
+@dataclass
+class InferenceRequest:
+    messages: List[Dict[str, str]]
+    model: Optional[str] = None
+    task_type: str = "default"
+    request_tag: str = ""
+    max_new_tokens: int = 900
+    temperature: float = 0.2
+    top_p: float = 0.9
+    repetition_penalty: float = 1.15
+    timeout_sec: Optional[int] = None
+    enable_thinking: bool = False
+class InferenceClient:
+    def __init__(self, firestore_client: Optional[Any] = None) -> None:
+        self.firestore = firestore_client
+        self._last_persist_time = 0.0
+        self._persist_throttle_sec = 30.0
+        config_paths = [
+            Path("./config/models.yaml"),
+            Path("/config/models.yaml"),
+            Path("/app/config/models.yaml"),
+            Path.cwd() / "config" / "models.yaml",
+            Path(__file__).resolve().parents[2] / "config" / "models.yaml",
+        ]
+        config: Dict[str, object] = {}
+        config_path = None
+        for path in config_paths:
+            if path.exists():
+                config_path = path
+                with path.open("r", encoding="utf-8") as fh:
+                    config = yaml.safe_load(fh) or {}
+                LOGGER.info(f"??? Loaded config from {config_path}")
+                break
+        if not config_path:
+            LOGGER.warning(f"??????  Config file not found. Checked: {[str(p) for p in config_paths]}")
+            LOGGER.warning(f"    CWD: {Path.cwd()}")
+            LOGGER.warning(f"    Using hardcoded defaults")
+        primary: Dict[str, object] = {}
+        if isinstance(config, dict):
+            models_cfg = config.get("models", {})
+            if isinstance(models_cfg, dict):
+                primary_cfg = models_cfg.get("primary", {})
+                if isinstance(primary_cfg, dict):
+                    primary = primary_cfg
+        self.provider = "deepseek"
+        self.ds_api_key = os.getenv("DEEPSEEK_API_KEY", "")
+        self.ds_base_url = os.getenv("DEEPSEEK_BASE_URL", DEEPSEEK_BASE_URL)
+        self.ds_chat_model = os.getenv("DEEPSEEK_MODEL", CHAT_MODEL)
+        self.ds_reasoner_model = os.getenv("DEEPSEEK_REASONER_MODEL", REASONER_MODEL)
+        self.local_space_url = _normalize_local_space_url(
+            os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
+        )
+        self.local_generate_path = os.getenv("INFERENCE_LOCAL_SPACE_GENERATE_PATH", "/gradio_api/call/generate")
+        self.enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", CHAT_MODEL).strip() or CHAT_MODEL
+        default_model_fallback = str(primary.get("id") or CHAT_MODEL)
+        env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
+        self.default_model = env_model_id or default_model_fallback
+        default_max_tokens = str(primary.get("max_new_tokens") or 512)
+        self.default_max_new_tokens = int(os.getenv("INFERENCE_MAX_NEW_TOKENS", default_max_tokens))
+        default_temp = str(primary.get("temperature") or 0.2)
+        self.default_temperature = float(os.getenv("INFERENCE_TEMPERATURE", default_temp))
+        default_top_p = str(primary.get("top_p") or 0.9)
+        self.default_top_p = float(os.getenv("INFERENCE_TOP_P", default_top_p))
+        self.chat_model_override = os.getenv("INFERENCE_CHAT_MODEL_ID", "").strip()
+        self.chat_model_temp_override = os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
+        self.chat_strict_model_only = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+        self.ds_timeout_sec = int(os.getenv("INFERENCE_HF_TIMEOUT_SEC", "90"))
+        self.local_timeout_sec = int(os.getenv("INFERENCE_LOCAL_SPACE_TIMEOUT_SEC", "90"))
+        self.max_retries = int(os.getenv("INFERENCE_MAX_RETRIES", "3"))
+        self.backoff_sec = float(os.getenv("INFERENCE_BACKOFF_SEC", "1.5"))
+        self.interactive_timeout_sec = int(os.getenv("INFERENCE_INTERACTIVE_TIMEOUT_SEC", str(self.ds_timeout_sec)))
+        self.background_timeout_sec = int(os.getenv("INFERENCE_BACKGROUND_TIMEOUT_SEC", str(self.ds_timeout_sec)))
+        self.interactive_max_retries = int(os.getenv("INFERENCE_INTERACTIVE_MAX_RETRIES", str(self.max_retries)))
+        self.background_max_retries = int(os.getenv("INFERENCE_BACKGROUND_MAX_RETRIES", str(self.max_retries)))
+        self.interactive_backoff_sec = float(os.getenv("INFERENCE_INTERACTIVE_BACKOFF_SEC", str(self.backoff_sec)))
+        self.background_backoff_sec = float(os.getenv("INFERENCE_BACKGROUND_BACKOFF_SEC", str(self.backoff_sec)))
+        fallback_raw = os.getenv("INFERENCE_FALLBACK_MODELS", "")
+        self.fallback_models = [v.strip() for v in fallback_raw.split(",") if v.strip()]
+        gpu_tasks_raw = os.getenv(
+            "INFERENCE_GPU_REQUIRED_TASKS",
+            "chat,quiz_generation,lesson_generation,learning_path,verify_solution,variant_generation,eval_generation",
+        )
+        self.gpu_required_tasks = {v.strip().lower() for v in gpu_tasks_raw.split(",") if v.strip()}
+        cpu_tasks_raw = os.getenv(
+            "INFERENCE_CPU_ONLY_TASKS",
+            "risk_classification,analytics_aggregation,file_parsing,auth,default_cpu",
+        )
+        self.cpu_only_tasks = {v.strip().lower() for v in cpu_tasks_raw.split(",") if v.strip()}
+        interactive_tasks_raw = os.getenv(
+            "INFERENCE_INTERACTIVE_TASKS",
+            "chat,verify_solution,daily_insight",
+        )
+        self.interactive_tasks = {v.strip().lower() for v in interactive_tasks_raw.split(",") if v.strip()}
+        self.interactive_max_fallback_depth = max(
+            0,
+            int(os.getenv("INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH", "1")),
+        )
+        # Default task-to-model routing.
+        self.task_model_map: Dict[str, str] = {
+            "chat": CHAT_MODEL,
+            "verify_solution": CHAT_MODEL,
+            "lesson_generation": CHAT_MODEL,
+            "quiz_generation": CHAT_MODEL,
+            "learning_path": CHAT_MODEL,
+            "daily_insight": CHAT_MODEL,
+            "risk_classification": CHAT_MODEL,
+            "risk_narrative": CHAT_MODEL,
+        }
+        self.task_fallback_model_map: Dict[str, List[str]] = {
+            "chat": [CHAT_MODEL],
+            "verify_solution": [CHAT_MODEL],
+        }
+        self.model_provider_map: Dict[str, str] = {}
+        self.task_provider_map: Dict[str, str] = {}
+        if isinstance(config, dict):
+            routing_cfg = config.get("routing", {})
+            if isinstance(routing_cfg, dict):
+                task_models = routing_cfg.get("task_model_map", {})
+                if isinstance(task_models, dict):
+                    config_task_models = {
+                        str(task).strip().lower(): str(model).strip()
+                        for task, model in task_models.items()
+                        if str(task).strip() and str(model).strip()
+                    }
+                    self.task_model_map.update(config_task_models)
+                task_fallback_models = routing_cfg.get("task_fallback_model_map", {})
+                if isinstance(task_fallback_models, dict):
+                    parsed: Dict[str, List[str]] = {}
+                    for task, models in task_fallback_models.items():
+                        task_key = str(task).strip().lower()
+                        if not task_key:
+                            continue
+                        if isinstance(models, list):
+                            cleaned = [str(m).strip() for m in models if str(m).strip()]
+                            if cleaned:
+                                parsed[task_key] = cleaned
+                        elif isinstance(models, str):
+                            cleaned = [v.strip() for v in models.split(",") if v.strip()]
+                            if cleaned:
+                                parsed[task_key] = cleaned
+                    self.task_fallback_model_map = parsed
+                task_providers = routing_cfg.get("task_provider_map", {})
+                if isinstance(task_providers, dict):
+                    self.task_provider_map = {
+                        str(task).strip().lower(): str(provider).strip().lower()
+                        for task, provider in task_providers.items()
+                        if str(task).strip() and str(provider).strip()
+                    }
+        # Override all task model mappings with INFERENCE_MODEL_ID env var if set.
+        if env_model_id:
+            original_map = dict(self.task_model_map)
+            for task_key in list(self.task_model_map.keys()):
+                self.task_model_map[task_key] = env_model_id
+            LOGGER.info(
+                f"???? INFERENCE_MODEL_ID env var override applied: {env_model_id}"
+            )
+            LOGGER.info(
+                f"   Task model mappings changed from: {original_map}"
+            )
+            env_override_note = " (env override active)"
+        else:
+            env_override_note = ""
+        if self.enforce_lock_model:
+            lock_map_before = dict(self.task_model_map)
+            self.default_model = self.lock_model_id
+            for task_key in list(self.task_model_map.keys()):
+                self.task_model_map[task_key] = self.lock_model_id
+            self.fallback_models = []
+            self.task_fallback_model_map = {
+                task_key: [] for task_key in self.task_model_map.keys()
+            }
+            LOGGER.info(f"???? INFERENCE_ENFORCE_LOCK_MODEL enabled: locking all inference tasks to {self.lock_model_id}")
+            LOGGER.info(f"   Cleared fallback models")
+            LOGGER.info(f"   Task model mappings forced from: {lock_map_before}")
+        config_status = "from file" if config_path else "hardcoded defaults (no config file found)"
+        effective_chat_model_for_logs = self.chat_model_override or self.task_model_map.get("chat", self.default_model)
+        LOGGER.info(f"??? InferenceClient initialized {config_status}{env_override_note}")
+        LOGGER.info(f"   Default model: {self.default_model}")
+        LOGGER.info(f"   Chat model: {effective_chat_model_for_logs}")
+        LOGGER.info(f"   Chat temp override ({TEMP_CHAT_MODEL_OVERRIDE_ENV}): {self.chat_model_temp_override or 'disabled'}")
+        LOGGER.info(f"   Chat strict model lock: {self.chat_strict_model_only}")
+        LOGGER.info(f"   Global model lock: {self.enforce_lock_model}")
+        LOGGER.info(f"   Verify solution model: {self.task_model_map.get('verify_solution', self.default_model)}")
+        LOGGER.info(f"   Full task_model_map: {self.task_model_map}")
+        self._metrics_started_at = time.time()
+        self._metrics_lock = Lock()
+        self._metrics: Dict[str, Any] = {
+            "requests_total": 0,
+            "requests_ok": 0,
+            "requests_error": 0,
+            "retries_total": 0,
+            "fallback_attempts": 0,
+            "latency_sum_ms": 0.0,
+            "latency_count": 0,
+            "route_counts": {},
+            "task_counts": {},
+            "provider_counts": {},
+            "status_code_counts": {},
+        }
+        self._load_persistent_metrics()
+    def _bump_metric(self, key: str, inc: int = 1) -> None:
+        with self._metrics_lock:
+            current = self._metrics.get(key) or 0
+            if not isinstance(current, int):
+                current = 0
+            self._metrics[key] = current + inc
+        self._persist_metrics()
+    def _bump_bucket(self, key: str, bucket: str, inc: int = 1) -> None:
+        with self._metrics_lock:
+            mapping = self._metrics.get(key)
+            if not isinstance(mapping, dict):
+                mapping = {}
+                self._metrics[key] = mapping
+            current = mapping.get(bucket) or 0
+            if not isinstance(current, int):
+                current = 0
+            mapping[bucket] = current + inc
+        self._persist_metrics()
+    def _record_completion(self, *, latency_ms: float) -> None:
+        with self._metrics_lock:
+            self._metrics["latency_sum_ms"] = (self._metrics.get("latency_sum_ms") or 0.0) + latency_ms
+            self._metrics["latency_count"] = (self._metrics.get("latency_count") or 0) + 1
+        self._persist_metrics()
+    def _load_persistent_metrics(self) -> None:
+        if not self.firestore:
+            return
+        try:
+            doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
+            doc = doc_ref.get()
+            if doc.exists:
+                data = doc.to_dict() or {}
+                with self._metrics_lock:
+                    for k, v in data.items():
+                        if k in self._metrics:
+                            if isinstance(v, (int, float)):
+                                self._metrics[k] = v
+                            elif isinstance(v, dict) and isinstance(self._metrics[k], dict):
+                                self._metrics[k].update(v)
+                LOGGER.info("??? Persistent inference metrics loaded from Firestore")
+        except Exception as e:
+            LOGGER.warning(f"?????? Failed to load persistent metrics: {e}")
+    def _persist_metrics(self, force: bool = False) -> None:
+        if not self.firestore:
+            return
+        now = time.time()
+        if not force and (now - self._last_persist_time < self._persist_throttle_sec):
+            return
+        try:
+            self._last_persist_time = now
+            doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
+            with self._metrics_lock:
+                snapshot = dict(self._metrics)
+            doc_ref.set(snapshot, merge=True)
+        except Exception as e:
+            LOGGER.warning(f"?????? Failed to persist metrics: {e}")
+    def _record_attempt(self, *, task_type: str, provider: str, route: str, fallback_depth: int) -> None:
+        self._bump_metric("requests_total", 1)
+        self._bump_bucket("task_counts", (task_type or "default").strip().lower(), 1)
+        self._bump_bucket("provider_counts", provider, 1)
+        self._bump_bucket("route_counts", route, 1)
+        if fallback_depth > 0:
+            self._bump_metric("fallback_attempts", 1)
+    def snapshot_metrics(self) -> Dict[str, Any]:
+        with self._metrics_lock:
+            l_sum = self._metrics.get("latency_sum_ms") or 0.0
+            l_count = self._metrics.get("latency_count") or 0
+            avg_latency = round(l_sum / l_count, 2) if l_count > 0 else 0.0
+            snapshot = {
+                "uptime_sec": round(max(0.0, time.time() - self._metrics_started_at), 2),
+                "requests_total": self._metrics.get("requests_total") or 0,
+                "requests_ok": self._metrics.get("requests_ok") or 0,
+                "requests_error": self._metrics.get("requests_error") or 0,
+                "retries_total": self._metrics.get("retries_total") or 0,
+                "fallback_attempts": self._metrics.get("fallback_attempts") or 0,
+                "avg_latency_ms": avg_latency,
+                "active_model": self.default_model,
+                "primary_provider": self.provider,
+                "route_counts": dict(self._metrics.get("route_counts") or {}),
+                "task_counts": dict(self._metrics.get("task_counts") or {}),
+                "provider_counts": dict(self._metrics.get("provider_counts") or {}),
+                "status_code_counts": dict(self._metrics.get("status_code_counts") or {}),
+            }
+        return snapshot
+    def generate_from_messages(self, req: InferenceRequest) -> str:
+        effective_task = (req.task_type or "default").strip().lower()
+        request_tag = req.request_tag.strip() or f"{effective_task}-{int(time.time() * 1000)}"
+        selected_model, model_selection_source = self._resolve_primary_model(req)
+        model_chain = self._model_chain_for_task(effective_task, selected_model)
+        last_error: Optional[Exception] = None
+        model_base = selected_model
+        LOGGER.info(
+            f"???? request_tag={request_tag} task={effective_task} source={model_selection_source} "
+            f"selected_model={model_base} (primary)"
+        )
+        LOGGER.info(f"   fallback_chain={model_chain[1:] if len(model_chain) > 1 else 'none'}")
+        for fallback_depth, model_name in enumerate(model_chain):
+            request_for_model = InferenceRequest(
+                messages=req.messages,
+                model=model_name,
+                task_type=req.task_type,
+                request_tag=request_tag,
+                max_new_tokens=req.max_new_tokens or self.default_max_new_tokens,
+                temperature=req.temperature if req.temperature is not None else self.default_temperature,
+                top_p=req.top_p if req.top_p is not None else self.default_top_p,
+                repetition_penalty=req.repetition_penalty,
+                timeout_sec=req.timeout_sec,
+            )
+            try:
+                result = self._call_deepseek(request_for_model, fallback_depth)
+                if fallback_depth > 0:
+                    LOGGER.info(f"??? Fallback succeeded at depth={fallback_depth} model={model_name}")
+                return result
+            except Exception as exc:
+                last_error = exc
+                fallback_hint = f" (depth {fallback_depth})" if fallback_depth > 0 else ""
+                LOGGER.warning(
+                    f"??????  Attempt failed{fallback_hint}: task={request_for_model.task_type} "
+                    f"model={model_name} error={exc.__class__.__name__}: {str(exc)[:100]}"
+                )
+        if last_error:
+            raise last_error
+        raise RuntimeError("Inference failed with empty model chain")
+    def _runtime_chat_model_override(self) -> str:
+        return os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
+    def _resolve_primary_model(self, req: InferenceRequest) -> Tuple[str, str]:
+        effective_task = (req.task_type or "default").strip().lower()
+        runtime_chat_override = self._runtime_chat_model_override()
+        if effective_task == "chat" and runtime_chat_override:
+            selected_model = runtime_chat_override
+            model_selection_source = "chat_temp_override_env"
+        elif req.model:
+            selected_model = req.model
+            model_selection_source = "explicit_request"
+        elif effective_task == "chat" and self.chat_model_override:
+            selected_model = self.chat_model_override
+            model_selection_source = "chat_override_env"
+        else:
+            selected_model = self.task_model_map.get(effective_task, self.default_model)
+            model_selection_source = "task_map"
+        if self.enforce_lock_model:
+            effective_lock_model_id = self.lock_model_id
+            if effective_task == "chat":
+                effective_lock_model_id = runtime_chat_override or self.chat_model_override or self.lock_model_id
+            selected_base = (selected_model or "").split(":", 1)[0].strip()
+            lock_base = (effective_lock_model_id or "").split(":", 1)[0].strip()
+            if selected_base != lock_base:
+                LOGGER.warning(
+                    f"?????? Model lock replaced requested model {selected_model} with {effective_lock_model_id}"
+                )
+            selected_model = effective_lock_model_id
+            model_selection_source = f"{model_selection_source}:model_lock"
+        if effective_task == "chat" and self.chat_strict_model_only:
+            return selected_model, f"{model_selection_source}:chat_strict_model_only"
+        return selected_model, model_selection_source
+    def _model_chain_for_task(self, task_type: str, selected_model: str) -> List[str]:
+        normalized = (task_type or "default").strip().lower()
+        runtime_chat_override = self._runtime_chat_model_override() if normalized == "chat" else ""
+        chat_lock_model_id = runtime_chat_override or (self.chat_model_override if normalized == "chat" else "")
+        if self.enforce_lock_model:
+            if normalized == "chat":
+                locked_model = (chat_lock_model_id or self.lock_model_id or "").strip()
+            else:
+                locked_model = (self.lock_model_id or "").strip()
+            return [locked_model] if locked_model else []
+        if normalized == "chat" and self.chat_strict_model_only:
+            chat_model = (chat_lock_model_id or selected_model or "").strip()
+            return [chat_model] if chat_model else []
+        per_task_candidates = self.task_fallback_model_map.get(task_type, [])
+        combined = [selected_model] + per_task_candidates + self.fallback_models
+        deduped: List[str] = []
+        seen = set()
+        for model_id in combined:
+            model_name = (model_id or "").strip()
+            if not model_name or model_name in seen:
+                continue
+            seen.add(model_name)
+            deduped.append(model_name)
+        if normalized in self.interactive_tasks:
+            max_models = 1 + self.interactive_max_fallback_depth
+            return deduped[:max_models]
+        return deduped
+    def _retry_profile(self, task_type: str) -> Tuple[int, float]:
+        normalized = (task_type or "default").strip().lower()
+        if normalized in self.interactive_tasks:
+            return self.interactive_max_retries, self.interactive_backoff_sec
+        return self.background_max_retries, self.background_backoff_sec
+    def _timeout_for(self, req: InferenceRequest, provider: str) -> int:
+        if req.timeout_sec:
+            return req.timeout_sec
+        if provider == "local_space":
+            return self.local_timeout_sec
+        normalized = (req.task_type or "default").strip().lower()
+        if normalized in self.interactive_tasks:
+            return self.interactive_timeout_sec
+        return self.background_timeout_sec
+    def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
+        parts: List[str] = []
+        for msg in messages:
+            role = (msg.get("role") or "user").strip().lower()
+            content = (msg.get("content") or "").strip()
+            if not content or role in {"tool", "function"}:
+                continue
+            prefix = "USER"
+            if role == "system":
+                prefix = "SYSTEM"
+            elif role == "assistant":
+                prefix = "ASSISTANT"
+            parts.append(f"{prefix}:\n{content}")
+        parts.append("ASSISTANT:")
+        return "\n\n".join(parts)
+    def _latest_user_message(self, messages: List[Dict[str, str]]) -> str:
+        for msg in reversed(messages):
+            role = (msg.get("role") or "").strip().lower()
+            content = (msg.get("content") or "").strip()
+            if role == "user" and content:
+                return content
+        return self._messages_to_prompt(messages)
+    def _call_deepseek(self, req: InferenceRequest, fallback_depth: int) -> str:
+        """Call DeepSeek API with OpenAI-compatible chat completions."""
+        if not self.ds_api_key:
+            raise RuntimeError("DEEPSEEK_API_KEY is not set")
+        target_model = req.model or self.default_model
+        route = "deepseek"
+        task_type = req.task_type or "default"
+        LOGGER.debug(
+            f"???? Calling DeepSeek: task={task_type} model={target_model} "
+            f"route={route} depth={fallback_depth}"
+        )
+        timeout = self._timeout_for(req, "deepseek")
+        max_retries, backoff_sec = self._retry_profile(task_type)
+        client = get_deepseek_client()
+        # Build chat completions params
+        params: Dict[str, Any] = {
+            "model": target_model,
+            "messages": req.messages,
+            "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
+        }
+        if target_model == REASONER_MODEL:
+            params["max_tokens"] = req.max_new_tokens or 1024
+        else:
+            params["temperature"] = req.temperature
+            params["top_p"] = req.top_p
+        # Use JSON mode for quiz generation
+        if task_type == "quiz_generation" and target_model != REASONER_MODEL:
+            params["response_format"] = {"type": "json_object"}
+        for attempt in range(max_retries):
+            self._record_attempt(
+                task_type=task_type,
+                provider="deepseek",
+                route=route,
+                fallback_depth=fallback_depth,
+            )
+            start = time.perf_counter()
+            try:
+                response = client.chat.completions.create(**params, timeout=timeout)
+                latency_ms = (time.perf_counter() - start) * 1000
+                content = response.choices[0].message.content or ""
+                reasoning = getattr(response.choices[0].message, "reasoning_content", None)
+                text = content.strip()
+                if reasoning:
+                    text = f"{reasoning}\n{text}"
+                log_model_call(
+                    LOGGER,
+                    provider="deepseek",
+                    model=target_model,
+                    endpoint=self.ds_base_url,
+                    latency_ms=latency_ms,
+                    input_tokens=None,
+                    output_tokens=None,
+                    status="ok",
+                    task_type=task_type,
+                    request_tag=req.request_tag,
+                    retry_attempt=attempt + 1,
+                    fallback_depth=fallback_depth,
+                    route=route,
+                )
+                self._record_attempt(
+                    task_type=task_type,
+                    provider="deepseek",
+                    route=route,
+                    fallback_depth=fallback_depth,
+                )
+                self._record_completion(latency_ms=latency_ms)
+                self._bump_metric("requests_ok", 1)
+                return text
+            except RateLimitError:
+                latency_ms = (time.perf_counter() - start) * 1000
+                if attempt < max_retries - 1:
+                    log_model_call(
+                        LOGGER,
+                        provider="deepseek",
+                        model=target_model,
+                        endpoint=self.ds_base_url,
+                        latency_ms=latency_ms,
+                        input_tokens=None,
+                        output_tokens=None,
+                        status="error",
+                        error_class="RateLimitError",
+                        error_message="rate limited",
+                        task_type=task_type,
+                        request_tag=req.request_tag,
+                        retry_attempt=attempt + 1,
+                        fallback_depth=fallback_depth,
+                        route=route,
+                    )
+                    self._bump_metric("retries_total", 1)
+                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
+                    continue
+                self._bump_metric("requests_error", 1)
+                raise RuntimeError("DeepSeek API rate limit reached. Please try again shortly.")
+            except APITimeoutError:
+                latency_ms = (time.perf_counter() - start) * 1000
+                if attempt < max_retries - 1:
+                    log_model_call(
+                        LOGGER,
+                        provider="deepseek",
+                        model=target_model,
+                        endpoint=self.ds_base_url,
+                        latency_ms=latency_ms,
+                        input_tokens=None,
+                        output_tokens=None,
+                        status="error",
+                        error_class="APITimeoutError",
+                        error_message="timeout",
+                        task_type=task_type,
+                        request_tag=req.request_tag,
+                        retry_attempt=attempt + 1,
+                        fallback_depth=fallback_depth,
+                        route=route,
+                    )
+                    self._bump_metric("retries_total", 1)
+                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
+                    continue
+                self._bump_metric("requests_error", 1)
+                raise RuntimeError("DeepSeek API timed out. Please retry.")
+            except APIError as e:
+                latency_ms = (time.perf_counter() - start) * 1000
+                if attempt < max_retries - 1:
+                    log_model_call(
+                        LOGGER,
+                        provider="deepseek",
+                        model=target_model,
+                        endpoint=self.ds_base_url,
+                        latency_ms=latency_ms,
+                        input_tokens=None,
+                        output_tokens=None,
+                        status="error",
+                        error_class="APIError",
+                        error_message=str(e)[:200],
+                        task_type=task_type,
+                        request_tag=req.request_tag,
+                        retry_attempt=attempt + 1,
+                        fallback_depth=fallback_depth,
+                        route=route,
+                    )
+                    self._bump_metric("retries_total", 1)
+                    time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
+                    continue
+                self._bump_metric("requests_error", 1)
+                raise RuntimeError(f"DeepSeek API error: {str(e)}")
+            except Exception as exc:
+                latency_ms = (time.perf_counter() - start) * 1000
+                self._bump_metric("requests_error", 1)
+                log_model_call(
+                    LOGGER,
+                    provider="deepseek",
+                    model=target_model,
+                    endpoint=self.ds_base_url,
+                    latency_ms=latency_ms,
+                    input_tokens=None,
+                    output_tokens=None,
+                    status="error",
+                    error_class=exc.__class__.__name__,
+                    error_message=str(exc)[:200],
+                    task_type=task_type,
+                    request_tag=req.request_tag,
+                    retry_attempt=attempt + 1,
+                    fallback_depth=fallback_depth,
+                    route=route,
+                )
+                raise
+        raise RuntimeError(f"DeepSeek call failed after {max_retries} attempts")
+    def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
+        target_model = req.model or self.default_model
+        url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
+        prompt = self._messages_to_prompt(req.messages)
+        payload: Dict[str, object] = {
+            "data": [
+                prompt,
+                [],
+                req.temperature,
+                req.top_p,
+                req.max_new_tokens,
+            ]
+        }
+        headers = {"Content-Type": "application/json"}
+        timeout = self._timeout_for(req, provider)
+        self._record_attempt(
+            task_type=req.task_type,
+            provider=provider,
+            route=route,
+            fallback_depth=fallback_depth,
+        )
+        start = time.perf_counter()
+        try:
+            resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
+        except Exception as exc:
+            latency_ms = (time.perf_counter() - start) * 1000
+            log_model_call(
+                LOGGER,
+                provider=provider,
+                model=target_model,
+                endpoint=url,
+                latency_ms=latency_ms,
+                input_tokens=None,
+                output_tokens=None,
+                status="error",
+                error_class=exc.__class__.__name__,
+                error_message=str(exc),
+                task_type=req.task_type,
+                request_tag=req.request_tag,
+                retry_attempt=1,
+                fallback_depth=fallback_depth,
+                route=route,
+            )
+            self._bump_metric("requests_error", 1)
+            raise
+        latency_ms = (time.perf_counter() - start) * 1000
+        self._bump_bucket("status_code_counts", str(resp.status_code), 1)
+        if resp.status_code != 200:
+            self._bump_metric("requests_error", 1)
+            raise RuntimeError(f"Local Space error {resp.status_code}: {resp.text}")
+        data = resp.json()
+        event_id = data.get("event_id")
+        if not event_id:
+            return self._extract_text(data)
+        result_url = f"{self.local_space_url.rstrip('/')}/gradio_api/call/generate/{event_id}"
+        result_resp = requests.get(result_url, timeout=req.timeout_sec or self.local_timeout_sec)
+        if result_resp.status_code != 200:
+            raise RuntimeError(f"Local Space result error {result_resp.status_code}: {result_resp.text}")
+        line_data = None
+        for line in result_resp.text.splitlines():
+            if line.startswith("data:"):
+                line_data = line.split("data:", 1)[1].strip()
+        if not line_data:
+            raise RuntimeError("Local Space result stream missing data")
+        parsed = json.loads(line_data)
+        output_payload = parsed if isinstance(parsed, dict) else {"data": parsed}
+        text = self._extract_text(output_payload)
+        log_model_call(
+            LOGGER,
+            provider=provider,
+            model=target_model,
+            endpoint=url,
+            latency_ms=latency_ms,
+            input_tokens=None,
+            output_tokens=None,
+            status="ok",
+            task_type=req.task_type,
+            request_tag=req.request_tag,
+            retry_attempt=1,
+            fallback_depth=fallback_depth,
+            route=route,
+        )
+        self._bump_metric("requests_ok", 1)
+        return text
+    def _extract_text(self, data: object) -> str:
+        """Extract clean text from inference response, stripping JSON artifacts."""
+        if isinstance(data, list) and data:
+            first = data[0]
+            if isinstance(first, dict):
+                val = (first.get("generated_text") or "").strip()
+                if val:
+                    return self._clean_response_text(val)
+        if isinstance(data, dict):
+            direct = (data.get("generated_text") or "").strip()
+            if direct:
+                return self._clean_response_text(direct)
+            choices = data.get("choices", [])
+            if choices:
+                message = choices[0].get("message", {})
+                msg = (message.get("content") or "").strip()
+                if msg:
+                    return self._clean_response_text(msg)
+                reasoning = (message.get("reasoning") or "").strip()
+                if reasoning:
+                    return self._clean_response_text(reasoning)
+            generic_data = data.get("data")
+            if isinstance(generic_data, list) and generic_data:
+                first = generic_data[0]
+                if isinstance(first, str) and first.strip():
+                    return self._clean_response_text(first.strip())
+        raise RuntimeError(f"Unexpected inference response format: {data}")
+    def _clean_response_text(self, text: str) -> str:
+        """Strip JSON braces, template artifacts, and whitespace from response text."""
+        text = text.strip()
+        if text.startswith("{") and text.endswith("}"):
+            try:
+                parsed = json.loads(text)
+                if isinstance(parsed, dict):
+                    if "content" in parsed:
+                        text = str(parsed["content"]).strip()
+                    elif "text" in parsed:
+                        text = str(parsed["text"]).strip()
+            except json.JSONDecodeError:
+                text = text.strip("{}")
+        if text.startswith("```json") or text.startswith("```"):
+            text = re.sub(r"^```(?:json)?", "", text).strip()
+        if text.endswith("```"):
+            text = text[:-3].strip()
+        return text.strip()
+def create_default_client(firestore_client: Optional[Any] = None) -> InferenceClient:
+    return InferenceClient(firestore_client=firestore_client)
+def is_sequential_model(model_id: str = "") -> bool:
+    mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
+    if not mid:
+        return False
+    if mid == REASONER_MODEL:
+        return True
+    if _RUNTIME_OVERRIDES:
+        lock = _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID", "")
+        if lock == REASONER_MODEL:
+            return True
+    return False

services/logging_utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import json
+import logging
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional
+def configure_structured_logging(name: str) -> logging.Logger:
+    logger = logging.getLogger(name)
+    if logger.handlers:
+        return logger
+    logger.setLevel(logging.INFO)
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.propagate = False
+    return logger
+def _safe_json(payload: Dict[str, Any]) -> str:
+    return json.dumps(payload, ensure_ascii=True, default=str)
+def log_model_call(
+    logger: logging.Logger,
+    *,
+    provider: str,
+    model: str,
+    endpoint: str,
+    latency_ms: float,
+    input_tokens: Optional[int],
+    output_tokens: Optional[int],
+    status: str,
+    error_class: Optional[str] = None,
+    error_message: Optional[str] = None,
+    task_type: Optional[str] = None,
+    request_tag: Optional[str] = None,
+    retry_attempt: Optional[int] = None,
+    fallback_depth: Optional[int] = None,
+    route: Optional[str] = None,
+) -> None:
+    payload = {
+        "ts": datetime.now(timezone.utc).isoformat(),
+        "event": "model_call",
+        "provider": provider,
+        "model": model,
+        "endpoint": endpoint,
+        "latency_ms": round(latency_ms, 2),
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "status": status,
+        "error_class": error_class,
+        "error_message": error_message,
+        "task_type": task_type,
+        "request_tag": request_tag,
+        "retry_attempt": retry_attempt,
+        "fallback_depth": fallback_depth,
+        "route": route,
+    }
+    if status == "ok":
+        logger.info(_safe_json(payload))
+    else:
+        logger.error(_safe_json(payload))
+def log_job_metric(
+    logger: logging.Logger,
+    *,
+    job_name: str,
+    run_id: str,
+    metric_name: str,
+    metric_value: Any,
+    extras: Optional[Dict[str, Any]] = None,
+) -> None:
+    payload: Dict[str, Any] = {
+        "ts": datetime.now(timezone.utc).isoformat(),
+        "event": "job_metric",
+        "job_name": job_name,
+        "run_id": run_id,
+        "metric_name": metric_name,
+        "metric_value": metric_value,
+    }
+    if extras:
+        payload.update(extras)
+    logger.info(_safe_json(payload))

services/user_provisioning_service.py ADDED Viewed

	@@ -0,0 +1,332 @@

+import logging
+import os
+import re
+from urllib.parse import quote_plus, urlparse
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from .email_service import EmailSendResult, EmailService, EmailMessagePayload
+from .email_templates import WelcomeCredentialsEmailContext, build_welcome_credentials_email
+logger = logging.getLogger("mathpulse")
+VALID_ROLES = {"student", "teacher", "admin"}
+VALID_STATUSES = {"active", "inactive"}
+EMAIL_REGEX = re.compile(r"^[^\s@]+@[^\s@]+\.[^\s@]+$")
+PASSWORD_UPPER_REGEX = re.compile(r"[A-Z]")
+PASSWORD_LOWER_REGEX = re.compile(r"[a-z]")
+PASSWORD_DIGIT_REGEX = re.compile(r"\d")
+PASSWORD_SPECIAL_REGEX = re.compile(r"[^A-Za-z0-9]")
+@dataclass
+class AdminCreateUserInput:
+    name: str
+    email: str
+    password: str
+    confirm_password: str
+    role: str
+    status: str
+    grade: str
+    section: str
+    lrn: Optional[str] = None
+@dataclass
+class CreateUserAndNotifyResult:
+    uid: str
+    user_created: bool
+    email_sent: bool
+    result_code: str
+    message: str
+    warnings: List[str] = field(default_factory=list)
+    email_result: Optional[EmailSendResult] = None
+class UserProvisioningError(Exception):
+    def __init__(self, code: str, message: str, status_code: int = 400) -> None:
+        super().__init__(message)
+        self.code = code
+        self.message = message
+        self.status_code = status_code
+class UserProvisioningService:
+    def __init__(
+        self,
+        *,
+        firebase_auth_module: Any,
+        firestore_module: Any,
+        firestore_server_timestamp: Any,
+        email_service: EmailService,
+    ) -> None:
+        self._firebase_auth_module = firebase_auth_module
+        self._firestore_module = firestore_module
+        self._firestore_server_timestamp = firestore_server_timestamp
+        self._email_service = email_service
+    def _ensure_dependencies(self) -> None:
+        if self._firebase_auth_module is None:
+            raise UserProvisioningError("auth_unavailable", "Firebase Auth service is unavailable.", 503)
+        if self._firestore_module is None:
+            raise UserProvisioningError("firestore_unavailable", "Firestore service is unavailable.", 503)
+    def _normalize_role(self, role: str) -> str:
+        normalized = (role or "").strip().lower()
+        if normalized not in VALID_ROLES:
+            raise UserProvisioningError("invalid_role", "Role must be Student, Teacher, or Admin.", 400)
+        return normalized
+    def _normalize_status(self, status: str) -> str:
+        normalized = (status or "").strip().lower()
+        if normalized not in VALID_STATUSES:
+            raise UserProvisioningError("invalid_status", "Status must be Active or Inactive.", 400)
+        return "Active" if normalized == "active" else "Inactive"
+    def _validate_email(self, email: str) -> str:
+        normalized = (email or "").strip().lower()
+        if not normalized or not EMAIL_REGEX.match(normalized):
+            raise UserProvisioningError("invalid_email", "Invalid email format.", 400)
+        return normalized
+    def _validate_password(self, password: str, confirm_password: str) -> str:
+        value = password or ""
+        if len(value) < 8:
+            raise UserProvisioningError("weak_password", "Password must be at least 8 characters.", 400)
+        if not PASSWORD_UPPER_REGEX.search(value):
+            raise UserProvisioningError("weak_password", "Password must include at least one uppercase letter.", 400)
+        if not PASSWORD_LOWER_REGEX.search(value):
+            raise UserProvisioningError("weak_password", "Password must include at least one lowercase letter.", 400)
+        if not PASSWORD_DIGIT_REGEX.search(value):
+            raise UserProvisioningError("weak_password", "Password must include at least one number.", 400)
+        if not PASSWORD_SPECIAL_REGEX.search(value):
+            raise UserProvisioningError("weak_password", "Password must include at least one special character.", 400)
+        if value != (confirm_password or ""):
+            raise UserProvisioningError("password_mismatch", "Password and confirm password do not match.", 400)
+        return value
+    @staticmethod
+    def _auth_user_not_found(error: Exception) -> bool:
+        message = str(error).lower()
+        return "not found" in message or "no user record" in message
+    @staticmethod
+    def _slugify(value: str) -> str:
+        token = re.sub(r"[^a-z0-9]+", "_", (value or "").strip().lower())
+        return re.sub(r"_+", "_", token).strip("_")
+    @staticmethod
+    def _build_default_avatar_url(display_name: str) -> str:
+        return f"https://ui-avatars.com/api/?name={quote_plus(display_name or 'User')}&background=0d9488&color=fff"
+    @staticmethod
+    def _derive_brand_avatar_url(login_url: str) -> str:
+        configured = (os.getenv("APP_BRAND_AVATAR_URL", "") or "").strip()
+        if configured:
+            return configured
+        parsed = urlparse(login_url or "")
+        if parsed.scheme in {"http", "https"} and parsed.netloc:
+            return f"{parsed.scheme}://{parsed.netloc}/avatar/avatar_icon.png"
+        return "https://mathpulse.ai/avatar/avatar_icon.png"
+    def _ensure_no_duplicate_email(self, email: str, firestore_client: Any) -> None:
+        try:
+            self._firebase_auth_module.get_user_by_email(email)
+            raise UserProvisioningError("duplicate_email", "A user with this email already exists.", 409)
+        except UserProvisioningError:
+            raise
+        except Exception as auth_lookup_error:
+            if not self._auth_user_not_found(auth_lookup_error):
+                logger.warning("Auth duplicate lookup failed for %s: %s", email, auth_lookup_error)
+                raise UserProvisioningError("auth_lookup_failed", "Unable to verify duplicate email in Auth.", 503)
+        try:
+            existing_docs = list(
+                firestore_client.collection("users").where("email", "==", email).limit(1).stream()
+            )
+            if existing_docs:
+                raise UserProvisioningError("duplicate_email", "A user profile with this email already exists.", 409)
+        except UserProvisioningError:
+            raise
+        except Exception as firestore_lookup_error:
+            logger.warning("Firestore duplicate lookup failed for %s: %s", email, firestore_lookup_error)
+            raise UserProvisioningError("firestore_lookup_failed", "Unable to verify duplicate email in Firestore.", 503)
+    def _build_profile_payload(self, user_input: AdminCreateUserInput, role_lower: str, normalized_status: str) -> Dict[str, Any]:
+        display_name = (user_input.name or "").strip()
+        grade = (user_input.grade or "").strip() or "Grade 11"
+        section = (user_input.section or "").strip() or "Section A"
+        class_section_id = self._slugify(f"{grade}_{section}") or "grade_11_section_a"
+        payload: Dict[str, Any] = {
+            "name": display_name,
+            "email": (user_input.email or "").strip().lower(),
+            "role": role_lower,
+            "status": normalized_status,
+            "grade": grade,
+            "section": section,
+            "classSectionId": class_section_id,
+            "forcePasswordChange": True,
+            "photo": self._build_default_avatar_url(display_name),
+            "updatedAt": self._firestore_server_timestamp,
+        }
+        if role_lower == "student":
+            lrn = (user_input.lrn or "").strip()
+            if not lrn:
+                raise UserProvisioningError("missing_lrn", "LRN is required for student accounts.", 400)
+            payload.update(
+                {
+                    "lrn": lrn,
+                    "level": 1,
+                    "currentXP": 0,
+                    "totalXP": 0,
+                    "streak": 0,
+                    "atRiskSubjects": [],
+                    "hasTakenDiagnostic": False,
+                }
+            )
+        elif role_lower == "teacher":
+            payload.update(
+                {
+                    "department": f"{grade} - {section}",
+                    "teacherId": f"TCH-{self._slugify(payload['email'])}",
+                    "subject": "Mathematics",
+                    "yearsOfExperience": "0",
+                    "qualification": "",
+                    "students": [],
+                }
+            )
+        else:
+            payload.update(
+                {
+                    "department": f"{grade} - {section}",
+                    "adminId": f"ADM-{self._slugify(payload['email'])}",
+                    "position": "Administrator",
+                }
+            )
+        return payload
+    def create_user(self, user_input: AdminCreateUserInput) -> str:
+        self._ensure_dependencies()
+        if not (user_input.name or "").strip():
+            raise UserProvisioningError("missing_name", "Name is required.", 400)
+        normalized_email = self._validate_email(user_input.email)
+        validated_password = self._validate_password(user_input.password, user_input.confirm_password)
+        role_lower = self._normalize_role(user_input.role)
+        normalized_status = self._normalize_status(user_input.status)
+        firestore_client = self._firestore_module.client()
+        self._ensure_no_duplicate_email(normalized_email, firestore_client)
+        try:
+            created_auth_user = self._firebase_auth_module.create_user(
+                email=normalized_email,
+                password=validated_password,
+                display_name=(user_input.name or "").strip(),
+                disabled=(normalized_status == "Inactive"),
+            )
+        except Exception as auth_create_error:
+            logger.error("Auth user creation failed for %s: %s", normalized_email, auth_create_error)
+            auth_error_text = str(auth_create_error)
+            auth_error_text_lower = auth_error_text.lower()
+            if "password_does_not_meet_requirements" in auth_error_text_lower or "password requirements" in auth_error_text_lower:
+                raise UserProvisioningError(
+                    "weak_password",
+                    "Password does not meet authentication policy requirements.",
+                    400,
+                )
+            if "email already exists" in auth_error_text_lower or "email_exists" in auth_error_text_lower:
+                raise UserProvisioningError("duplicate_email", "A user with this email already exists.", 409)
+            raise UserProvisioningError("auth_create_failed", "Failed to create authentication account.", 500)
+        uid = str(getattr(created_auth_user, "uid", "") or "").strip()
+        if not uid:
+            raise UserProvisioningError("missing_uid", "Authentication account created without UID.", 500)
+        profile_payload = self._build_profile_payload(user_input, role_lower, normalized_status)
+        profile_payload["createdAt"] = self._firestore_server_timestamp
+        try:
+            firestore_client.collection("users").document(uid).set(profile_payload, merge=True)
+        except Exception as firestore_write_error:
+            logger.error("Firestore profile write failed for %s: %s", uid, firestore_write_error)
+            try:
+                self._firebase_auth_module.delete_user(uid)
+                logger.info("Rolled back Auth user creation for %s after Firestore write failure.", uid)
+            except Exception as rollback_error:
+                logger.warning(
+                    "Failed to roll back Auth user %s after Firestore write failure: %s",
+                    uid,
+                    rollback_error,
+                )
+            raise UserProvisioningError("profile_write_failed", "Failed to create user profile in Firestore.", 500)
+        return uid
+    def send_welcome_credentials_email(self, user_input: AdminCreateUserInput) -> EmailSendResult:
+        display_name = (user_input.name or "").strip()
+        login_url = (os.getenv("APP_LOGIN_URL", "") or "").strip() or "https://mathpulse.ai"
+        brand_avatar_url = self._derive_brand_avatar_url(login_url)
+        recipient_avatar_url = self._build_default_avatar_url(display_name)
+        template = build_welcome_credentials_email(
+            WelcomeCredentialsEmailContext(
+                recipient_name=display_name,
+                login_email=(user_input.email or "").strip().lower(),
+                temporary_password=user_input.password,
+                role=(user_input.role or "").strip().title(),
+                login_url=login_url,
+                brand_avatar_url=brand_avatar_url,
+                recipient_avatar_url=recipient_avatar_url,
+            )
+        )
+        message = EmailMessagePayload(
+            to_name=display_name,
+            to_email=(user_input.email or "").strip().lower(),
+            subject=template["subject"],
+            html_content=template["html"],
+            text_content=template["text"],
+        )
+        return self._email_service.send_transactional_email(message)
+    def create_user_and_notify(self, user_input: AdminCreateUserInput) -> CreateUserAndNotifyResult:
+        uid = self.create_user(user_input)
+        warnings: List[str] = []
+        email_result = self.send_welcome_credentials_email(user_input)
+        if email_result.success:
+            return CreateUserAndNotifyResult(
+                uid=uid,
+                user_created=True,
+                email_sent=True,
+                result_code="created_and_emailed",
+                message="User account was created and welcome email was sent.",
+                warnings=warnings,
+                email_result=email_result,
+            )
+        warnings.append("User was created but welcome email delivery failed.")
+        if email_result.error_message:
+            warnings.append(email_result.error_message)
+        return CreateUserAndNotifyResult(
+            uid=uid,
+            user_created=True,
+            email_sent=False,
+            result_code="created_email_failed",
+            message="User account was created, but welcome email failed to send.",
+            warnings=warnings,
+            email_result=email_result,
+        )

services/youtube_service.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""
+YouTube search service for lesson video embeddings.
+Uses YouTube Data API v3 to find relevant educational videos.
+"""
+from __future__ import annotations
+import os
+import logging
+from typing import Optional
+logger = logging.getLogger("mathpulse.youtube")
+YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "")
+def _parse_iso8601_duration(duration: str) -> int:
+    """Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
+    import re
+    if not duration:
+        return 0
+    hours_match = re.search(r'(\d+)H', duration)
+    minutes_match = re.search(r'(\d+)M', duration)
+    seconds_match = re.search(r'(\d+)S', duration)
+    hours = int(hours_match.group(1)) if hours_match else 0
+    minutes = int(minutes_match.group(1)) if minutes_match else 0
+    seconds = int(seconds_match.group(1)) if seconds_match else 0
+    return hours * 3600 + minutes * 60 + seconds
+def search_youtube_video(
+    query: str,
+    max_results: int = 5,
+    min_duration_seconds: int = 180,
+    language: str = "en",
+) -> Optional[dict]:
+    """
+    Search YouTube Data API v3 for relevant educational videos.
+    Args:
+        query: Search query combining lesson title, subject, and competency
+        max_results: Maximum number of results to return
+        min_duration_seconds: Minimum video duration (filter out shorts)
+        language: Preferred video language
+    Returns:
+        Best video match with videoId, title, channel, embedUrl, thumbnailUrl
+    """
+    if not YOUTUBE_API_KEY:
+        logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
+        return None
+    import urllib.parse
+    import json
+    search_query = f"{query} DepEd Philippines Grade 11 Grade 12 mathematics"
+    encoded_query = urllib.parse.quote(search_query)
+    search_url = (
+        f"https://www.googleapis.com/youtube/v3/search"
+        f"?part=snippet&type=video&q={encoded_query}"
+        f"&maxResults={max_results}&relevanceLanguage={language}"
+        f"&key={YOUTUBE_API_KEY}"
+    )
+    try:
+        import urllib.request
+        with urllib.request.urlopen(search_url, timeout=10) as response:
+            data = json.loads(response.read().decode())
+        video_results = []
+        for item in data.get("items", []):
+            video_id = item.get("id", {}).get("videoId", "")
+            if not video_id:
+                continue
+            title = item.get("snippet", {}).get("title", "")
+            channel = item.get("snippet", {}).get("channelTitle", "")
+            description = item.get("snippet", {}).get("description", "")
+            video_details_url = (
+                f"https://www.googleapis.com/youtube/v3/videos"
+                f"?part=contentDetails,statistics&id={video_id}&key={YOUTUBE_API_KEY}"
+            )
+            try:
+                with urllib.request.urlopen(video_details_url, timeout=10) as vd_response:
+                    vd_data = json.loads(vd_response.read().decode())
+                    vd_item = vd_data.get("items", [{}])[0]
+                    content_details = vd_item.get("contentDetails", {})
+                    duration = content_details.get("duration", "")
+                    duration_secs = _parse_iso8601_duration(duration)
+            except Exception:
+                duration_secs = 600
+            if duration_secs < min_duration_seconds:
+                continue
+            embed_url = f"https://www.youtube.com/embed/{video_id}"
+            thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
+            video_results.append({
+                "videoId": video_id,
+                "videoTitle": title,
+                "videoChannel": channel,
+                "embedUrl": embed_url,
+                "thumbnailUrl": thumbnail_url,
+                "durationSeconds": duration_secs,
+                "description": description[:200],
+            })
+        if not video_results:
+            return None
+        for vr in video_results:
+            if any(term in vr["videoTitle"].lower() or term in vr["description"].lower()
+                   for term in ["tutorial", "lesson", "explain", "math", "solution"]):
+                return vr
+        return video_results[0] if video_results else None
+    except Exception as e:
+        logger.error("YouTube search failed: %s", e)
+        return None
+def get_video_for_lesson(
+    lesson_title: str,
+    subject: str,
+    competency: str = "",
+    quarter: int = 1,
+) -> Optional[dict]:
+    """Get the best YouTube video for a lesson."""
+    query = " ".join(filter(None, [lesson_title, subject, competency]))[:200]
+    return search_youtube_video(query)
+def store_video_in_firestore(lesson_id: str, video_data: dict):
+    """Persist chosen video to Firestore for caching."""
+    try:
+        import firebase_admin
+        from firebase_admin import firestore
+        if not firebase_admin._apps:
+            return
+        db = firestore.client()
+        doc_ref = db.collection("curriculumDocuments").document(lesson_id)
+        doc_ref.collection("videoEmbed").document("primary").set({
+            **video_data,
+            "storedAt": firestore.SERVER_TIMESTAMP,
+        })
+    except Exception as e:
+        logger.warning("Could not store video in Firestore: %s", e)
+def get_cached_video(lesson_id: str) -> Optional[dict]:
+    """Retrieve cached video from Firestore."""
+    try:
+        import firebase_admin
+        from firebase_admin import firestore
+        if not firebase_admin._apps:
+            return None
+        db = firestore.client()
+        doc = db.collection("curriculumDocuments").document(lesson_id)
+        video_doc = doc.collection("videoEmbed").document("primary").get()
+        if video_doc.exists:
+            return video_doc.to_dict()
+    except Exception:
+        pass
+    return None

startup.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+#!/bin/sh
+set -eu
+if [ -d "/data" ]; then
+    : "${CURRICULUM_DIR:=/data/curriculum}"
+    : "${VECTORSTORE_DIR:=/data/vectorstore}"
+else
+    : "${CURRICULUM_DIR:=/app/datasets/curriculum}"
+    : "${VECTORSTORE_DIR:=/app/datasets/vectorstore}"
+fi
+export CURRICULUM_DIR
+export VECTORSTORE_DIR
+mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
+_ingest_script="/app/scripts/ingest_curriculum.py"
+if [ -f "${_ingest_script}" ]; then
+    if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
+        echo "INFO: Running curriculum ingestion (optional)..."
+        python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
+    else
+        echo "INFO: No curriculum PDFs present and CURRICULUM_SOURCE_REPO_ID unset; skipping ingest"
+    fi
+else
+    echo "INFO: Curriculum ingestion script not found at ${_ingest_script}; skipping (curriculum is optional)"
+fi
+exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1

startup_validation.py ADDED Viewed

	@@ -0,0 +1,374 @@

+"""
+Startup validation for MathPulse AI backend.
+This module validates all critical dependencies and configurations BEFORE
+the FastAPI app starts, preventing indefinite restart loops.
+If any critical check fails, the process exits with a clear error message
+that's visible in HF Space logs.
+"""
+import os
+import sys
+import logging
+from pathlib import Path
+logger = logging.getLogger("mathpulse.startup")
+class StartupError(Exception):
+    """Critical error during startup validation."""
+    pass
+def validate_imports() -> None:
+    """Verify all critical imports work. Use absolute imports."""
+    logger.info("🔍 Validating Python imports...")
+    try:
+        # Core FastAPI stack
+        import fastapi  # noqa
+        import uvicorn  # noqa
+        import pydantic  # noqa
+        logger.info("   ✓ FastAPI, Uvicorn, Pydantic OK")
+        # Backend services (use ABSOLUTE imports like deployed code)
+        from services.inference_client import (
+            InferenceClient, create_default_client, is_sequential_model,
+            get_current_runtime_config, get_model_for_task, model_supports_thinking,
+            set_runtime_model_profile, set_runtime_model_override, reset_runtime_overrides,
+            _MODEL_PROFILES,
+        )  # noqa
+        logger.info("   ✓ InferenceClient imports OK")
+        from automation_engine import automation_engine  # noqa
+        logger.info("   ✓ automation_engine imports OK")
+        from analytics import compute_competency_analysis  # noqa
+        logger.info("   ✓ analytics imports OK")
+        # Firebase
+        try:
+            import firebase_admin  # noqa
+            logger.info("   ✓ firebase_admin imports OK")
+        except ImportError:
+            logger.warning("   ⚠ firebase_admin not available (OK if Firebase not needed)")
+        # ML & inference
+        from services.ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL  # noqa
+        logger.info("   ✓ DeepSeek AI client imports OK")
+        logger.info("✅ All critical imports validated")
+    except ImportError as e:
+        raise StartupError(
+            f"❌ IMPORT ERROR - Cannot start backend:\n"
+            f"   {e}\n"
+            f"\n"
+            f"This usually means:\n"
+            f"  - A Python package is missing (check requirements.txt)\n"
+            f"  - A relative import was used (must be absolute in container)\n"
+            f"  - A circular import exists\n"
+            f"\n"
+            f"Deploy will FAIL and backend will restart indefinitely.\n"
+        ) from e
+    except Exception as e:
+        raise StartupError(f"❌ Unexpected import error: {e}") from e
+def validate_environment() -> None:
+    """Verify required environment variables are set."""
+    logger.info("🔍 Validating environment variables...")
+    # CRITICAL: DEEPSEEK_API_KEY for inference
+    ds_api_key = os.environ.get("DEEPSEEK_API_KEY")
+    if not ds_api_key:
+        logger.warning(
+            "⚠  WARNING: DEEPSEEK_API_KEY is not set as an environment variable.\n"
+            "   AI inference will fail without this token.\n"
+            "   Use: Set DEEPSEEK_API_KEY in your .env or space secrets."
+        )
+    else:
+        logger.info("   ✓ DEEPSEEK_API_KEY is set")
+    # Check inference provider config
+    inference_provider = os.getenv("INFERENCE_PROVIDER", "deepseek")
+    logger.info(f"   ✓ INFERENCE_PROVIDER: {inference_provider}")
+    # Check model IDs
+    chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "deepseek-chat"
+    logger.info(f"   ✓ Chat model configured: {chat_model}")
+    chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
+    chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
+    enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
+    lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", "deepseek-chat").strip() or "deepseek-chat"
+    logger.info(f"   ✓ INFERENCE_ENFORCE_LOCK_MODEL: {enforce_lock_model}")
+    logger.info(f"   ✓ INFERENCE_LOCK_MODEL_ID: {lock_model_id}")
+    model_profile = os.getenv("MODEL_PROFILE", "").strip().lower()
+    quiz_model = os.getenv("HF_QUIZ_MODEL_ID", "").strip()
+    rag_model = os.getenv("HF_RAG_MODEL_ID", "").strip()
+    logger.info(f"   ✓ MODEL_PROFILE: {model_profile or 'not set (using individual env vars)'}")
+    logger.info(f"   ✓ HF_QUIZ_MODEL_ID: {quiz_model or 'not set (using defaults)'}")
+    logger.info(f"   ✓ HF_RAG_MODEL_ID: {rag_model or 'not set (using defaults)'}")
+    if not chat_strict:
+        logger.warning("   ⚠ Chat strict model lock is disabled; chat may fallback to alternate models")
+    if chat_strict and chat_hard_trigger:
+        logger.warning(
+            "   ⚠ Chat hard trigger is enabled while strict chat lock is on; hard escalation will be bypassed"
+        )
+    _validate_embedding_model()
+    logger.info("✅ Environment variables OK")
+EXPECTED_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
+def _validate_embedding_model() -> None:
+    embedding_model = os.getenv("EMBEDDING_MODEL", "").strip()
+    if not embedding_model:
+        logger.warning(
+            "WARNING: EMBEDDING_MODEL env var is not set. "
+            f"Expected: {EXPECTED_EMBEDDING_MODEL}. "
+            "RAG retrieval will fail without an embedding model."
+        )
+    elif embedding_model != EXPECTED_EMBEDDING_MODEL:
+        logger.warning(
+            f"WARNING: EMBEDDING_MODEL is set to '{embedding_model}' — "
+            f"expected '{EXPECTED_EMBEDDING_MODEL}'. "
+            "Confirm this is intentional before deploying."
+        )
+    from services.ai_client import CHAT_MODEL, REASONER_MODEL  # noqa
+    generation_model_ids = [
+        CHAT_MODEL, REASONER_MODEL,
+    ]
+    if embedding_model in generation_model_ids:
+        logger.warning(
+            f"CRITICAL: EMBEDDING_MODEL is set to a generation model ('{embedding_model}'). "
+            "This will break RAG retrieval. Set it to 'BAAI/bge-small-en-v1.5'."
+        )
+    else:
+        logger.info(f"   EMBEDDING_MODEL: {embedding_model or 'not set'}")
+def validate_config_files() -> None:
+    """Verify config files exist and are readable."""
+    logger.info("🔍 Validating configuration files...")
+    # Accept either deployment/runtime path without warning when one valid path exists.
+    model_config_candidates = [
+        "config/models.yaml",
+        "backend/config/models.yaml",
+    ]
+    readable_model_config = None
+    for config_path in model_config_candidates:
+        full_path = Path(config_path)
+        if not full_path.exists():
+            continue
+        try:
+            with open(full_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            if not content.strip():
+                raise StartupError(
+                    f"❌ CONFIG ERROR: {config_path} is empty!\n"
+                    f"   This will cause model routing to fail.\n"
+                )
+            readable_model_config = config_path
+            break
+        except StartupError:
+            raise
+        except Exception as e:
+            raise StartupError(
+                f"❌ CONFIG ERROR: Cannot read {config_path}:\n"
+                f"   {e}\n"
+            ) from e
+    if not readable_model_config:
+        joined_paths = ", ".join(model_config_candidates)
+        raise StartupError(
+            f"❌ CONFIG ERROR: No readable model config found.\n"
+            f"   Checked: {joined_paths}\n"
+        )
+    logger.info(f"   ✓ Using model config: {readable_model_config}")
+    _validate_model_config_fields(readable_model_config)
+    logger.info("✅ Configuration files OK")
+def validate_file_structure() -> None:
+    """Verify critical backend files exist."""
+    logger.info("🔍 Validating file structure...")
+    required_path_sets = [
+        ["main.py", "backend/main.py"],
+        ["services/inference_client.py", "backend/services/inference_client.py"],
+        ["analytics.py", "backend/analytics.py"],
+        ["automation_engine.py", "backend/automation_engine.py"],
+    ]
+    optional_path_sets = [
+        ["Dockerfile", "backend/Dockerfile"],
+    ]
+    for candidates in required_path_sets:
+        found = None
+        for candidate in candidates:
+            if Path(candidate).exists():
+                found = candidate
+                break
+        if not found:
+            joined = " or ".join(candidates)
+            raise StartupError(
+                f"❌ FILE MISSING: {joined}\n"
+                f"   Backend structure is broken for this deployment layout.\n"
+            )
+        logger.info(f"   ✓ Found {found}")
+    for candidates in optional_path_sets:
+        found = None
+        for candidate in candidates:
+            if Path(candidate).exists():
+                found = candidate
+                break
+        if found:
+            logger.info(f"   ✓ Found optional build file {found}")
+            continue
+        joined = " or ".join(candidates)
+        logger.info(
+            f"   ℹ Optional build file not present at runtime: {joined}"
+        )
+    logger.info("✅ File structure OK")
+def validate_inference_client_config() -> None:
+    """Validate InferenceClient can load its config."""
+    logger.info("🔍 Validating InferenceClient configuration...")
+    try:
+        # Try to create the client (this will load config from YAML)
+        from services.inference_client import create_default_client
+        client = create_default_client()
+        # Verify critical attributes
+        if not hasattr(client, 'task_model_map'):
+            raise StartupError("❌ InferenceClient missing task_model_map attribute")
+        if not hasattr(client, 'task_provider_map'):
+            raise StartupError("❌ InferenceClient missing task_provider_map attribute")
+        # Check that required tasks are mapped
+        required_tasks = ['chat', 'verify_solution', 'lesson_generation', 'quiz_generation']
+        for task in required_tasks:
+            if task not in client.task_model_map:
+                raise StartupError(
+                    f"❌ Task '{task}' not in task_model_map.\n"
+                    f"   Check config/models.yaml\n"
+                )
+            model = client.task_model_map[task]
+            provider = client.task_provider_map.get(task, 'unknown')
+            logger.info(f"   ✓ {task}: {model} ({provider})")
+        chat_model = client.task_model_map.get("chat", client.default_model)
+        chat_chain = client._model_chain_for_task("chat", chat_model)
+        logger.info(
+            f"   ✓ chat strict lock: {client.chat_strict_model_only}; "
+            f"effective chat chain length={len(chat_chain)}"
+        )
+        if client.chat_strict_model_only and len(chat_chain) != 1:
+            raise StartupError(
+                "❌ Chat strict model lock is enabled but effective chat model chain is not singular.\n"
+                "   Check INFERENCE_CHAT_STRICT_MODEL_ONLY and routing.task_fallback_model_map.chat\n"
+            )
+        logger.info("✅ InferenceClient configuration OK")
+    except StartupError:
+        raise
+    except Exception as e:
+        raise StartupError(
+            f"❌ InferenceClient validation failed:\n"
+            f"   {e}\n"
+            f"   Check config/models.yaml and backend/config/models.yaml\n"
+        ) from e
+def _validate_model_config_fields(config_path: str) -> None:
+    try:
+        import yaml
+        with open(config_path, "r", encoding="utf-8") as f:
+            config = yaml.safe_load(f) or {}
+    except Exception as e:
+        raise StartupError(f"❌ Cannot parse {config_path} as YAML: {e}") from e
+    models = config.get("models", {})
+    if not isinstance(models, dict):
+        raise StartupError(f"❌ {config_path}: 'models' section missing or invalid")
+    if "rag_primary" not in models:
+        raise StartupError(f"❌ {config_path}: missing 'models.rag_primary' field")
+    rag_primary = models["rag_primary"]
+    if isinstance(rag_primary, dict):
+        logger.info(f"   ✓ rag_primary model: {rag_primary.get('id', 'UNSET')}")
+    else:
+        logger.warning(f"   ⚠ rag_primary is not a dict, may cause issues")
+    capabilities = models.get("model_capabilities")
+    if not isinstance(capabilities, dict):
+        raise StartupError(f"❌ {config_path}: missing 'models.model_capabilities' section")
+    logger.info(f"   ✓ model_capabilities: sequential_only={capabilities.get('sequential_only')}, supports_thinking={capabilities.get('supports_thinking')}")
+    tasks = config.get("routing", {}).get("task_model_map", {})
+    rag_tasks = {"rag_lesson", "rag_problem", "rag_analysis_context"}
+    missing_rag = rag_tasks - set(str(t).strip().lower() for t in tasks.keys())
+    if missing_rag:
+        raise StartupError(f"❌ {config_path}: missing RAG task mappings: {missing_rag}")
+    logger.info(f"   ✓ All RAG task mappings present")
+def run_all_validations() -> None:
+    """Run comprehensive startup validation.
+    If any check fails, exits with clear error message visible in logs.
+    """
+    logger.info("=" * 70)
+    logger.info("🚀 STARTUP VALIDATION - Checking all critical dependencies")
+    logger.info("=" * 70)
+    strict_mode = os.getenv("STARTUP_VALIDATION_STRICT", "false").strip().lower() in {"1", "true", "yes", "on"}
+    try:
+        validate_file_structure()
+        validate_imports()
+        validate_environment()
+        validate_config_files()
+        validate_inference_client_config()
+        logger.info("=" * 70)
+        logger.info("✅ ALL STARTUP VALIDATIONS PASSED")
+        logger.info("=" * 70)
+    except StartupError as e:
+        logger.error("=" * 70)
+        logger.error(str(e))
+        logger.error("=" * 70)
+        if strict_mode:
+            logger.error("\n🛑 DEPLOYMENT WILL FAIL - Fix errors above and redeploy")
+            sys.exit(1)
+        logger.warning(
+            "\n⚠️  Continuing startup because STARTUP_VALIDATION_STRICT is disabled. "
+            "Set STARTUP_VALIDATION_STRICT=true to fail fast."
+        )
+    except Exception as e:
+        logger.exception(f"Unexpected validation error: {e}")
+        if strict_mode:
+            sys.exit(1)
+        logger.warning(
+            "⚠️  Continuing startup after unexpected validation error because "
+            "STARTUP_VALIDATION_STRICT is disabled."
+        )

tests/test_admin_model_routes.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+Route-level tests for the /api/admin/model-config endpoints.
+Follows the auth mock pattern from test_api.py.
+"""
+import os
+from unittest.mock import MagicMock, patch
+import pytest
+from fastapi.testclient import TestClient
+import main as main_module
+from main import app
+from services.inference_client import reset_runtime_overrides
+main_module._firebase_ready = True
+main_module._init_firebase_admin = lambda: None
+main_module.firebase_firestore = None
+if getattr(main_module, "firebase_auth", None) is None:
+    main_module.firebase_auth = MagicMock()
+main_module.firebase_auth.verify_id_token = MagicMock(return_value={
+    "uid": "admin-uid",
+    "email": "admin@example.com",
+    "role": "admin",
+})
+admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
+_RESOLVED_KEYS = {
+    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
+    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
+}
+_KNOWN_PROFILES = {"dev", "budget", "prod"}
+_BASE_CONFIG_KEYS = {"profile", "overrides", "resolved"}
+@pytest.fixture(autouse=True)
+def _mock_firestore():
+    with patch("services.inference_client._save_runtime_config_to_firestore", side_effect=None):
+        yield
+@pytest.fixture(autouse=True)
+def _reset_overrides():
+    reset_runtime_overrides()
+    yield
+    reset_runtime_overrides()
+# ─── Auth Enforcement ────────────────────────────────────────
+class TestAuth:
+    def test_get_rejects_bad_token(self):
+        main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
+        c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
+        response = c.get("/api/admin/model-config")
+        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
+            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
+        })
+        assert response.status_code in {401, 403}
+    def test_get_rejects_student_role(self):
+        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
+            "uid": "student-uid", "email": "s@example.com", "role": "student",
+        })
+        c = TestClient(app, headers={"Authorization": "Bearer student-token"})
+        response = c.get("/api/admin/model-config")
+        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
+            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
+        })
+        assert response.status_code == 403
+# ─── GET Model Config ─────────────────────────────────────────
+class TestGetModelConfig:
+    def test_returns_base_keys(self):
+        response = admin_client.get("/api/admin/model-config")
+        assert response.status_code == 200
+        data = response.json()
+        for key in _BASE_CONFIG_KEYS:
+            assert key in data
+    def test_resolved_contains_expected_keys(self):
+        response = admin_client.get("/api/admin/model-config")
+        data = response.json()
+        resolved = data.get("resolved", {})
+        for key in _RESOLVED_KEYS:
+            assert key in resolved
+    def test_available_profiles_present(self):
+        response = admin_client.get("/api/admin/model-config")
+        data = response.json()
+        profiles = data.get("availableProfiles", [])
+        for p in _KNOWN_PROFILES:
+            assert p in profiles
+    def test_profile_descriptions_present(self):
+        response = admin_client.get("/api/admin/model-config")
+        data = response.json()
+        descriptions = data.get("profileDescriptions", {})
+        for p in _KNOWN_PROFILES:
+            assert p in descriptions
+    def test_resolved_models_are_non_empty_strings(self):
+        admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
+        response = admin_client.get("/api/admin/model-config")
+        data = response.json()
+        resolved = data.get("resolved", {})
+        for key, value in resolved.items():
+            assert isinstance(value, str), f"{key} is not a string: {value}"
+            assert len(value) > 0, f"Resolved key {key} is empty"
+# ─── POST Profile Switch ─────────────────────────────────────
+class TestPostProfileSwitch:
+    def test_switch_to_dev_succeeds(self):
+        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
+        assert response.status_code == 200
+        assert response.json()["success"] is True
+    def test_switch_to_budget_succeeds(self):
+        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["applied"]["profile"] == "budget"
+    def test_switch_to_prod_succeeds(self):
+        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "prod"})
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["applied"]["profile"] == "prod"
+    def test_switch_to_invalid_profile_returns_400(self):
+        response = admin_client.post("/api/admin/model-config/profile", json={"profile": "nonexistent"})
+        assert response.status_code == 400
+    def test_switch_missing_profile_field(self):
+        response = admin_client.post("/api/admin/model-config/profile", json={})
+        assert response.status_code == 422
+# ─── POST Override ───────────────────────────────────────────
+class TestPostOverride:
+    def test_set_valid_override_key_succeeds(self):
+        response = admin_client.post(
+            "/api/admin/model-config/override",
+            json={"key": "INFERENCE_MODEL_ID", "value": "test/override-model"},
+        )
+        assert response.status_code == 200
+        assert response.json()["success"] is True
+    def test_set_invalid_override_key_returns_400(self):
+        response = admin_client.post(
+            "/api/admin/model-config/override",
+            json={"key": "EMBEDDING_MODEL", "value": "test/emb"},
+        )
+        assert response.status_code == 400
+    def test_override_is_visible_in_subsequent_get(self):
+        admin_client.post(
+            "/api/admin/model-config/override",
+            json={"key": "INFERENCE_MODEL_ID", "value": "custom/model-v2"},
+        )
+        response = admin_client.get("/api/admin/model-config")
+        data = response.json()
+        overrides = data.get("overrides", {})
+        assert "INFERENCE_MODEL_ID" in overrides
+        assert overrides["INFERENCE_MODEL_ID"] == "custom/model-v2"
+# ─── DELETE Reset ───────────────────────────────────────────
+class TestDeleteReset:
+    def test_reset_returns_success(self):
+        response = admin_client.delete("/api/admin/model-config/reset")
+        assert response.status_code == 200
+        assert response.json()["success"] is True
+    def test_reset_clears_override(self):
+        admin_client.post(
+            "/api/admin/model-config/override",
+            json={"key": "INFERENCE_MODEL_ID", "value": "temp/model"},
+        )
+        response = admin_client.delete("/api/admin/model-config/reset")
+        assert response.status_code == 200
+        overrides = response.json()["current"]["overrides"]
+        assert overrides == {}
+    def test_reset_clears_profile(self):
+        admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
+        response = admin_client.delete("/api/admin/model-config/reset")
+        assert response.status_code == 200
+        assert response.json()["current"]["profile"] == ""
+# ─── Profile after switch ────────────────────────────────────
+class TestProfileAfterSwitch:
+    def test_switched_profile_visible_in_get(self):
+        admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
+        response = admin_client.get("/api/admin/model-config")
+        assert response.json()["profile"] == "dev"

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,2053 @@

+"""
+backend/tests/test_api.py
+Comprehensive tests for all FastAPI endpoints.
+Tests cover:
+  - Successful requests with valid data
+  - AI inference API failures (502 fallback)
+  - Timeout handling
+  - Malformed response data
+  - Error status-code mapping
+Run with:  pytest backend/tests/test_api.py -v
+"""
+import asyncio
+import json
+import os
+import sys
+import time
+from typing import Any, Dict, List
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest  # type: ignore[import-not-found]
+from fastapi.testclient import TestClient
+# Add backend directory to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from services.inference_client import InferenceClient, InferenceRequest
+# automation_engine has Firebase dependencies - mock its heavy parts
+# but keep the Pydantic model classes
+mock_ae = MagicMock()
+# Define minimal Pydantic-like classes for payloads automation_engine exports
+from pydantic import BaseModel as _BM
+from services.email_service import EmailSendResult
+class _DiagnosticCompletionPayload(_BM):
+    studentId: str
+    results: list
+    gradeLevel: str | None = None
+    questionBreakdown: dict | None = None
+class _QuizSubmissionPayload(_BM):
+    studentId: str
+    quizId: str
+    subject: str
+    score: float
+    totalQuestions: int
+    correctAnswers: int
+    timeSpentSeconds: int
+class _StudentEnrollmentPayload(_BM):
+    studentId: str
+    name: str
+    email: str
+    gradeLevel: str | None = None
+    teacherId: str | None = None
+class _DataImportPayload(_BM):
+    teacherId: str
+    students: list
+    columnMapping: dict
+class _ContentUpdatePayload(_BM):
+    adminId: str
+    action: str
+    contentType: str
+    contentId: str
+    subjectId: str | None = None
+    details: str | None = None
+class _AutomationResult(_BM):
+    success: bool = True
+    message: str = ""
+    actions: list = []
+mock_ae.automation_engine = MagicMock()
+mock_ae.DiagnosticCompletionPayload = _DiagnosticCompletionPayload
+mock_ae.QuizSubmissionPayload = _QuizSubmissionPayload
+mock_ae.StudentEnrollmentPayload = _StudentEnrollmentPayload
+mock_ae.DataImportPayload = _DataImportPayload
+mock_ae.ContentUpdatePayload = _ContentUpdatePayload
+mock_ae.AutomationResult = _AutomationResult
+sys.modules["automation_engine"] = mock_ae
+# Override tokens so client init doesn't fail
+os.environ["HF_TOKEN"] = "test-token-for-testing"
+os.environ["DEEPSEEK_API_KEY"] = "test-ds-key-for-testing"
+# analytics.py is importable directly (its heavy deps are guarded)
+import main as main_module  # noqa: E402
+app = main_module.app
+# Mock auth verification so protected endpoints can run in tests without Firebase credentials.
+main_module._firebase_ready = True
+main_module._init_firebase_admin = lambda: None
+main_module.firebase_firestore = None
+if getattr(main_module, "firebase_auth", None) is None:
+    main_module.firebase_auth = MagicMock()
+main_module.firebase_auth.verify_id_token = MagicMock(
+    return_value={
+        "uid": "test-teacher-uid",
+        "email": "teacher@example.com",
+        "role": "teacher",
+    }
+)
+client = TestClient(app, headers={"Authorization": "Bearer test-auth-token"})
+# ─── Fixtures ──────────────────────────────────────────────────
+def make_deepseek_risk_mock(
+    risk_label: str = "low risk academically stable",
+    confidence: float = 0.85,
+):
+    """Create a mock DeepSeek client for risk prediction tests."""
+    mock_ds = MagicMock()
+    mock_choice = MagicMock()
+    mock_choice.message.content = json.dumps({
+        "risk_label": risk_label,
+        "confidence": confidence,
+        "reasoning": "Mock risk assessment."
+    })
+    mock_ds.chat.completions.create.return_value = MagicMock(
+        choices=[mock_choice]
+    )
+    return mock_ds
+# ─── Health & Root ─────────────────────────────────────────────
+class TestHealthEndpoints:
+    def test_health_returns_200(self):
+        response = client.get("/health")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "healthy"
+        assert "models" in data
+    def test_root_returns_api_info(self):
+        response = client.get("/")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["name"] == "MathPulse AI API"
+        assert "version" in data
+    def test_health_includes_request_id_header(self):
+        response = client.get("/health")
+        assert "x-request-id" in response.headers
+class TestAuthMiddleware:
+    def test_accepts_user_id_claim_when_uid_missing(self):
+        now = int(time.time())
+        firestore = _FakeFirestoreModule(
+            {
+                "courseMaterials": [
+                    {
+                        "materialId": "mat-auth-1",
+                        "teacherId": "test-teacher-uid",
+                        "fileName": "auth-check.pdf",
+                        "fileType": "pdf",
+                        "classSectionId": "grade11_a",
+                        "topics": [{"title": "Linear Equations"}],
+                        "extractedTextLength": 300,
+                        "retentionDays": 180,
+                        "expiresAtEpoch": now + 3600,
+                    }
+                ]
+            }
+        )
+        with patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "user_id": "test-teacher-uid",
+            "email": "teacher@example.com",
+            "role": "teacher",
+        }), patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
+            response = client.get("/api/upload/course-materials/recent?classSectionId=grade11_a&limit=10")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert len(data["materials"]) == 1
+        assert data["materials"][0]["materialId"] == "mat-auth-1"
+# ─── Chat Endpoint ─────────────────────────────────────────────
+class TestChatEndpoint:
+    @patch("main.call_hf_chat")
+    def test_chat_success(self, mock_chat):
+        mock_chat.return_value = "Hello! 2+2=4."
+        response = client.post("/api/chat", json={
+            "message": "What is 2+2?",
+            "history": [],
+        })
+        assert response.status_code == 200
+        assert "4" in response.json()["response"]
+    @patch("main.call_hf_chat")
+    def test_chat_non_math_returns_refusal_and_skips_inference(self, mock_chat):
+        response = client.post("/api/chat", json={
+            "message": "Who is Elon Musk?",
+            "history": [],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] in main_module._NON_MATH_REDIRECT_RESPONSES
+        mock_chat.assert_not_called()
+    @patch("main.call_hf_chat")
+    def test_chat_greeting_returns_friendly_response_and_skips_inference(self, mock_chat):
+        response = client.post("/api/chat", json={
+            "message": "hello",
+            "history": [],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] in main_module._GREETING_RESPONSES
+        mock_chat.assert_not_called()
+    @patch("main.call_hf_chat")
+    def test_chat_thanks_returns_friendly_response_and_skips_inference(self, mock_chat):
+        response = client.post("/api/chat", json={
+            "message": "thanks",
+            "history": [],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] in main_module._THANKS_RESPONSES
+        mock_chat.assert_not_called()
+    @patch("main.call_hf_chat_async", new_callable=AsyncMock)
+    def test_chat_allows_contextual_followup_token_and_calls_inference(self, mock_chat_async):
+        mock_chat_async.return_value = "Sure. Next step: isolate x on one side."
+        response = client.post("/api/chat", json={
+            "message": "go",
+            "history": [
+                {"role": "assistant", "content": "Nice work. Shall we continue?"},
+            ],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] == "Sure. Next step: isolate x on one side."
+        mock_chat_async.assert_called_once()
+    @patch("main.call_hf_chat_async", new_callable=AsyncMock)
+    def test_chat_followup_token_reconstructs_latest_math_intent_and_calls_inference(self, mock_chat_async):
+        mock_chat_async.return_value = "Continuing: subtract 3 from both sides first."
+        response = client.post("/api/chat", json={
+            "message": "more",
+            "history": [
+                {"role": "user", "content": "Solve for x in 2x + 3 = 7"},
+                {"role": "assistant", "content": "Start by isolating x."},
+            ],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] == "Continuing: subtract 3 from both sides first."
+        mock_chat_async.assert_called_once()
+    @patch("main.call_hf_chat_async", new_callable=AsyncMock)
+    def test_chat_followup_token_without_context_requests_clarification(self, mock_chat_async):
+        response = client.post("/api/chat", json={
+            "message": "go",
+            "history": [],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] == main_module._CONTINUATION_CONTEXT_CLARIFY_RESPONSE
+        mock_chat_async.assert_not_called()
+    @patch("main.call_hf_chat_async", new_callable=AsyncMock)
+    def test_chat_punctuated_followup_token_without_context_requests_clarification(self, mock_chat_async):
+        response = client.post("/api/chat", json={
+            "message": "go!",
+            "history": [],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] == main_module._CONTINUATION_CONTEXT_CLARIFY_RESPONSE
+        mock_chat_async.assert_not_called()
+    @patch("main.call_hf_chat_async", new_callable=AsyncMock)
+    def test_chat_followup_token_after_refused_request_remains_blocked(self, mock_chat_async):
+        response = client.post("/api/chat", json={
+            "message": "continue",
+            "history": [
+                {"role": "user", "content": "Who is Elon Musk?"},
+                {
+                    "role": "assistant",
+                    "content": main_module._NON_MATH_REDIRECT_RESPONSES[0],
+                },
+            ],
+        })
+        assert response.status_code == 200
+        assert response.json()["response"] in main_module._NON_MATH_REDIRECT_RESPONSES
+        mock_chat_async.assert_not_called()
+    @patch("main.call_hf_chat")
+    def test_chat_with_history(self, mock_chat):
+        mock_chat.return_value = "Yes, that's right."
+        response = client.post("/api/chat", json={
+            "message": "Is x = 4 correct for 2 + 2 = x?",
+            "history": [
+                {"role": "user", "content": "What is 2+2?"},
+                {"role": "assistant", "content": "4"},
+            ],
+        })
+        assert response.status_code == 200
+        # Verify history was included in messages
+        call_args = mock_chat.call_args
+        messages = call_args.args[0] if call_args.args else call_args.kwargs.get("messages", [])
+        assert len(messages) >= 3  # system + 2 history + 1 current
+    def test_chat_missing_message_returns_422(self):
+        response = client.post("/api/chat", json={"history": []})
+        assert response.status_code == 422
+    @patch("main.call_hf_chat")
+    def test_chat_hf_failure_returns_502(self, mock_chat):
+        mock_chat.side_effect = Exception("HF API down")
+        response = client.post("/api/chat", json={
+            "message": "Solve 3x + 1 = 10",
+            "history": [],
+        })
+        assert response.status_code == 502
+    @patch("main.call_hf_chat")
+    def test_chat_quadratic_prompt_smoke(self, mock_chat):
+        mock_chat.return_value = (
+            "Given x^2 - 5x + 6 = 0, factor to (x-2)(x-3)=0. "
+            "So x = 2 or x = 3. Final answer: x = 2, x = 3."
+        )
+        response = client.post("/api/chat", json={
+            "message": "Solve quadratic equation x² - 5x + 6 = 0 step-by-step.",
+            "history": [],
+        })
+        assert response.status_code == 200
+        data = response.json()["response"]
+        assert "x = 2" in data
+        assert "x = 3" in data
+    @patch("main.call_hf_chat_stream")
+    def test_chat_stream_success(self, mock_stream):
+        mock_stream.return_value = iter(["Hello", " world"])
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "What is 2 + 2?",
+            "history": [],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert "event: chunk" in content
+        assert '"chunk": "Hello"' in content
+        assert "event: end" in content
+    @patch("main.call_hf_chat_stream")
+    def test_chat_stream_emits_error_event(self, mock_stream):
+        mock_stream.side_effect = Exception("HF stream down")
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "Solve x + 2 = 5",
+            "history": [],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert "event: error" in content
+        assert "event: end" in content
+    @patch("main.call_hf_chat_stream_async")
+    def test_chat_stream_timeout_emits_error_and_end_events(self, mock_stream_async):
+        async def _slow_stream(*args, **kwargs):
+            await asyncio.sleep(0.05)
+            yield "late chunk"
+        mock_stream_async.return_value = _slow_stream()
+        with patch.object(main_module, "CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC", 0.01), patch.object(main_module, "CHAT_STREAM_TOTAL_TIMEOUT_SEC", 0.03):
+            with client.stream("POST", "/api/chat/stream", json={
+                "message": "Solve x + 2 = 5",
+                "history": [],
+            }) as response:
+                assert response.status_code == 200
+                content = "".join(response.iter_text())
+        assert "event: error" in content
+        assert "timed out" in content.lower()
+        assert "event: end" in content
+    @patch("main.call_hf_chat_stream_async")
+    def test_chat_stream_marker_mode_continues_until_marker(self, mock_stream_async):
+        async def _first_stream(*args, **kwargs):
+            yield "n=1: x=1\n"
+            yield "n=2: x=2"
+        async def _second_stream(*args, **kwargs):
+            yield "\nn=3: x=3\nEND_MARKER"
+        mock_stream_async.side_effect = [_first_stream(), _second_stream()]
+        with patch.object(main_module, "CHAT_STREAM_CONTINUATION_MAX_ROUNDS", 1):
+            with client.stream("POST", "/api/chat/stream", json={
+                "message": "Solve x+n=2n for n=1..3 and end with END_MARKER",
+                "history": [],
+                "completionMode": "marker",
+                "expectedEndMarker": "END_MARKER",
+            }) as response:
+                assert response.status_code == 200
+                content = "".join(response.iter_text())
+        assert "END_MARKER" in content
+        assert "event: end" in content
+        assert mock_stream_async.call_count == 2
+    @patch("main.call_hf_chat_stream")
+    def test_chat_stream_non_math_returns_refusal_and_skips_inference(self, mock_stream):
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "Who is Elon Musk?",
+            "history": [],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert "event: chunk" in content
+        assert any(candidate in content for candidate in main_module._NON_MATH_REDIRECT_RESPONSES)
+        assert "event: end" in content
+        mock_stream.assert_not_called()
+    @patch("main.call_hf_chat_stream_async")
+    def test_chat_stream_allows_contextual_followup_token_and_calls_inference(self, mock_stream_async):
+        async def _stream(*args, **kwargs):
+            yield "Sure, continuing with the next step."
+        mock_stream_async.return_value = _stream()
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "go",
+            "history": [
+                {"role": "assistant", "content": "Would you like to continue?"},
+            ],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert "Sure, continuing with the next step." in content
+        assert "event: end" in content
+        mock_stream_async.assert_called_once()
+    @patch("main.call_hf_chat_stream_async")
+    def test_chat_stream_followup_token_reconstructs_latest_math_intent_and_calls_inference(self, mock_stream_async):
+        async def _stream(*args, **kwargs):
+            yield "Continuing the same solution from the previous step."
+        mock_stream_async.return_value = _stream()
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "more",
+            "history": [
+                {"role": "user", "content": "Solve 2x + 3 = 7"},
+                {"role": "assistant", "content": "We can isolate x now."},
+            ],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert "Continuing the same solution from the previous step." in content
+        assert "event: end" in content
+        mock_stream_async.assert_called_once()
+    @patch("main.call_hf_chat_stream_async")
+    def test_chat_stream_followup_token_without_context_requests_clarification(self, mock_stream_async):
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "go",
+            "history": [],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert main_module._CONTINUATION_CONTEXT_CLARIFY_RESPONSE in content
+        assert "event: end" in content
+        mock_stream_async.assert_not_called()
+    @patch("main.call_hf_chat_stream_async")
+    def test_chat_stream_followup_token_after_refused_request_remains_blocked(self, mock_stream_async):
+        with client.stream("POST", "/api/chat/stream", json={
+            "message": "continue",
+            "history": [
+                {"role": "user", "content": "Who is Elon Musk?"},
+                {
+                    "role": "assistant",
+                    "content": main_module._NON_MATH_REDIRECT_RESPONSES[1],
+                },
+            ],
+        }) as response:
+            assert response.status_code == 200
+            content = "".join(response.iter_text())
+        assert any(candidate in content for candidate in main_module._NON_MATH_REDIRECT_RESPONSES)
+        assert "event: end" in content
+        mock_stream_async.assert_not_called()
+class TestChatTransport:
+    @patch("services.ai_client.get_deepseek_client")
+    def test_call_hf_chat_uses_deepseek_api(self, mock_ds_fn):
+        mock_ds = MagicMock()
+        mock_choice = MagicMock()
+        mock_choice.message.content = "x = 2 or x = 3"
+        mock_ds.chat.completions.create.return_value = MagicMock(
+            choices=[mock_choice]
+        )
+        mock_ds_fn.return_value = mock_ds
+        with patch.object(main_module, "get_inference_client") as mock_get_ic:
+            ic = MagicMock()
+            ic.generate_from_messages.return_value = "x = 2 or x = 3"
+            mock_get_ic.return_value = ic
+            result = main_module.call_hf_chat(
+                [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0"}],
+                max_tokens=256,
+                temperature=0.2,
+                top_p=0.9,
+            )
+        assert result
+class TestInferenceRouting:
+    def test_chat_strict_model_lock_keeps_single_model_chain(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Show all steps and prove the result rigorously."}],
+            task_type="chat",
+        )
+        selected_model, source = client._resolve_primary_model(req)
+        model_chain = client._model_chain_for_task("chat", selected_model)
+        assert selected_model == "deepseek-chat"
+        assert "chat_strict_model_only" in source
+        assert model_chain == ["deepseek-chat"]
+    def test_chat_env_override_wins_under_model_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
+        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Find the roots and explain why."}],
+            task_type="chat",
+        )
+        selected_model, source = client._resolve_primary_model(req)
+        model_chain = client._model_chain_for_task("chat", selected_model)
+        assert selected_model == "deepseek-chat"
+        assert "chat_override_env" in source
+        assert model_chain == ["deepseek-chat"]
+    def test_chat_temp_override_wins_under_model_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-reasoner")
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
+        monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
+        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
+        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Find the roots and explain why."}],
+            task_type="chat",
+        )
+        selected_model, source = client._resolve_primary_model(req)
+        model_chain = client._model_chain_for_task("chat", selected_model)
+        assert selected_model == "deepseek-chat"
+        assert "chat_temp_override_env" in source
+        assert model_chain == ["deepseek-chat"]
+    def test_chat_temp_override_does_not_change_non_chat_task_under_lock(self, monkeypatch):
+        monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
+        monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
+        monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
+        client = InferenceClient()
+        req = InferenceRequest(
+            messages=[{"role": "user", "content": "Check if my solution is correct."}],
+            task_type="verify_solution",
+        )
+        selected_model, source = client._resolve_primary_model(req)
+        model_chain = client._model_chain_for_task("verify_solution", selected_model)
+        assert selected_model == "deepseek-reasoner"
+        assert "chat_temp_override_env" not in source
+        assert model_chain == ["deepseek-reasoner"]
+# ─── Risk Prediction ──────────────────────────────────────────
+class TestRiskPrediction:
+    @patch("main.get_deepseek_client")
+    def test_predict_risk_success(self, mock_ds_fn):
+        mock_ds_fn.return_value = make_deepseek_risk_mock()
+        response = client.post("/api/predict-risk", json={
+            "engagementScore": 80,
+            "avgQuizScore": 75,
+            "attendance": 90,
+            "assignmentCompletion": 85,
+        })
+        assert response.status_code == 200
+        data = response.json()
+        assert data["riskLevel"] in ("High", "Medium", "Low")
+        assert 0 <= data["confidence"] <= 1
+    def test_predict_risk_invalid_score_range(self):
+        response = client.post("/api/predict-risk", json={
+            "engagementScore": 150,
+            "avgQuizScore": 75,
+            "attendance": 90,
+            "assignmentCompletion": 85,
+        })
+        assert response.status_code == 422
+    def test_predict_risk_negative_score(self):
+        response = client.post("/api/predict-risk", json={
+            "engagementScore": -5,
+            "avgQuizScore": 75,
+            "attendance": 90,
+            "assignmentCompletion": 85,
+        })
+        assert response.status_code == 422
+    def test_predict_risk_missing_fields(self):
+        response = client.post("/api/predict-risk", json={
+            "engagementScore": 80,
+        })
+        assert response.status_code == 422
+    @patch("main.get_deepseek_client")
+    def test_predict_risk_ai_failure(self, mock_ds_fn):
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = Exception("AI down")
+        mock_ds_fn.return_value = mock_client
+        response = client.post("/api/predict-risk", json={
+            "engagementScore": 80,
+            "avgQuizScore": 75,
+            "attendance": 90,
+            "assignmentCompletion": 85,
+        })
+        assert response.status_code == 502
+    @patch("main.get_deepseek_client")
+    def test_batch_risk_prediction(self, mock_ds_fn):
+        mock_ds_fn.return_value = make_deepseek_risk_mock()
+        response = client.post("/api/predict-risk/batch", json={
+            "students": [
+                {"engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "assignmentCompletion": 85},
+                {"engagementScore": 30, "avgQuizScore": 40, "attendance": 50, "assignmentCompletion": 35},
+            ],
+        })
+        assert response.status_code == 200
+        assert len(response.json()) == 2
+# ─── Learning Path ────────────────────────────────────────────
+class TestLearningPath:
+    @patch("main.call_hf_chat")
+    def test_learning_path_success(self, mock_chat):
+        mock_chat.return_value = "1. Review fractions\n2. Practice decimals"
+        response = client.post("/api/learning-path", json={
+            "weaknesses": ["fractions", "decimals"],
+            "gradeLevel": "Grade 11",
+        })
+        assert response.status_code == 200
+        assert "fractions" in response.json()["learningPath"].lower()
+    def test_learning_path_missing_weaknesses(self):
+        response = client.post("/api/learning-path", json={
+            "gradeLevel": "Grade 11",
+        })
+        assert response.status_code == 422
+    def test_learning_path_missing_grade(self):
+        response = client.post("/api/learning-path", json={
+            "weaknesses": ["fractions"],
+        })
+        assert response.status_code == 422
+    @patch("main.call_hf_chat")
+    def test_learning_path_ai_failure(self, mock_chat):
+        mock_chat.side_effect = Exception("AI service down")
+        response = client.post("/api/learning-path", json={
+            "weaknesses": ["algebra"],
+            "gradeLevel": "Grade 11",
+        })
+        assert response.status_code == 502
+# ─── Daily Insight ─────────────────────────────────────────────
+class TestDailyInsight:
+    @patch("main.call_hf_chat")
+    def test_daily_insight_success(self, mock_chat):
+        mock_chat.return_value = "Class is doing well."
+        response = client.post("/api/analytics/daily-insight", json={
+            "students": [
+                {"name": "Alice", "engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "riskLevel": "Low"},
+            ],
+        })
+        assert response.status_code == 200
+        assert response.json()["insight"]
+    def test_daily_insight_empty_students(self):
+        response = client.post("/api/analytics/daily-insight", json={
+            "students": [],
+        })
+        assert response.status_code == 200
+        assert "No student data" in response.json()["insight"]
+# ─── Quiz Topics ───────────────────────────────────────────────
+class TestQuizTopics:
+    def test_get_all_topics(self):
+        response = client.get("/api/quiz/topics")
+        assert response.status_code == 200
+        assert "allTopics" in response.json()
+    def test_get_topics_by_grade(self):
+        response = client.get("/api/quiz/topics?gradeLevel=Grade%2011")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["gradeLevel"] == "Grade 11"
+        assert "topics" in data
+    def test_get_topics_invalid_grade(self):
+        response = client.get("/api/quiz/topics?gradeLevel=Grade%2099")
+        assert response.status_code == 404
+# ─── Quiz Generation ──────────────────────────────────────────
+class TestQuizGeneration:
+    @patch("main.call_hf_chat")
+    def test_generate_quiz_success(self, mock_chat):
+        quiz_json = json.dumps([{
+            "questionType": "multiple_choice",
+            "question": "What is 2+2?",
+            "correctAnswer": "4",
+            "options": ["A) 3", "B) 4", "C) 5", "D) 6"],
+            "bloomLevel": "remember",
+            "difficulty": "easy",
+            "topic": "Arithmetic",
+            "points": 1,
+            "explanation": "2+2=4",
+        }])
+        mock_chat.return_value = quiz_json
+        response = client.post("/api/quiz/generate", json={
+            "topics": ["Arithmetic"],
+            "gradeLevel": "Grade 11",
+            "numQuestions": 1,
+        })
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["questions"]) >= 1
+        assert data["totalPoints"] > 0
+    def test_generate_quiz_missing_topics(self):
+        response = client.post("/api/quiz/generate", json={
+            "gradeLevel": "Grade 11",
+        })
+        assert response.status_code == 422
+class TestClassRecordImportMapping:
+    def test_sanitize_column_mapping_drops_none_and_unknown_fields(self):
+        raw_mapping = {
+            "Student Name": "name",
+            "Grade Level": None,
+            "Section": "",
+            "General Mathematics": None,
+            "Custom": "not_a_supported_field",
+            "Average": "avgQuizScore",
+        }
+        sanitized = main_module._sanitize_column_mapping(raw_mapping)
+        assert sanitized == {
+            "Student Name": "name",
+            "Average": "avgQuizScore",
+        }
+    @patch("main.call_hf_chat")
+    def test_generate_quiz_bad_llm_output(self, mock_chat):
+        mock_chat.return_value = "This is not valid JSON at all."
+        response = client.post("/api/quiz/generate", json={
+            "topics": ["Algebra"],
+            "gradeLevel": "Grade 11",
+            "numQuestions": 1,
+        })
+        assert response.status_code == 500
+    @patch("main.call_hf_chat")
+    def test_preview_quiz(self, mock_chat):
+        quiz_json = json.dumps([{
+            "questionType": "identification",
+            "question": "Define slope.",
+            "correctAnswer": "Rise over run",
+            "bloomLevel": "remember",
+            "difficulty": "easy",
+            "topic": "Algebra",
+            "points": 1,
+            "explanation": "Slope = rise/run.",
+        }])
+        mock_chat.return_value = quiz_json
+        response = client.post("/api/quiz/preview", json={
+            "topics": ["Algebra"],
+            "gradeLevel": "Grade 11",
+        })
+        assert response.status_code == 200
+    @patch("main.call_hf_chat")
+    def test_generate_quiz_accepts_new_max_limits(self, mock_chat):
+        max_questions = main_module.MAX_QUESTIONS_LIMIT
+        quiz_json = json.dumps([
+            {
+                "questionType": "identification",
+                "question": f"Question {i + 1}",
+                "correctAnswer": "Answer",
+                "bloomLevel": "remember",
+                "difficulty": "easy",
+                "topic": "Algebra",
+                "points": 1,
+                "explanation": "Because.",
+            }
+            for i in range(max_questions)
+        ])
+        mock_chat.return_value = quiz_json
+        response = client.post("/api/quiz/generate", json={
+            "topics": [f"Topic {i + 1}" for i in range(main_module.MAX_TOPICS_LIMIT)],
+            "gradeLevel": "Grade 11",
+            "numQuestions": max_questions,
+        })
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["questions"]) == max_questions
+    def test_generate_quiz_rejects_over_max_questions(self):
+        response = client.post("/api/quiz/generate", json={
+            "topics": ["Algebra"],
+            "gradeLevel": "Grade 11",
+            "numQuestions": main_module.MAX_QUESTIONS_LIMIT + 1,
+        })
+        assert response.status_code == 422
+class TestUploadClassRecordsGuardrails:
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_upload_class_records_rejects_unsupported_dataset_intent(self, _mock_chat):
+        files = {
+            "files": ("records.csv", b"name,lrn,email,avgQuizScore,attendance,engagementScore,assignmentCompletion\nAna,1001,ana@example.com,80,90,85,88\n", "text/csv"),
+        }
+        response = client.post(
+            "/api/upload/class-records",
+            files=files,
+            data={"datasetIntent": "unsupported_intent"},
+        )
+        assert response.status_code == 400
+        assert "Unsupported datasetIntent" in response.json()["detail"]
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_upload_class_records_warns_when_preferred_core_fields_missing(self, _mock_chat):
+        files = {
+            "files": (
+                "records.csv",
+                b"name,lrn,email,attendance\nAna,1001,ana@example.com,90\n",
+                "text/csv",
+            ),
+        }
+        response = client.post(
+            "/api/upload/class-records",
+            files=files,
+            data={"datasetIntent": "synthetic_student_records"},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["summary"]["failedFiles"] == 0
+        assert payload["summary"]["partialSuccessFiles"] == 1
+        combined_warnings = " ".join(payload.get("warnings", []))
+        assert "Missing preferred educational columns" in combined_warnings
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_upload_class_records_returns_interpretation_metadata(self, _mock_chat):
+        files = {
+            "files": (
+                "records.csv",
+                (
+                    b"name,lrn,email,avgQuizScore,attendance,engagementScore,assignmentCompletion,patient_diagnosis\n"
+                    b"Ana,1001,ana@example.com,80,90,85,88,none\n"
+                ),
+                "text/csv",
+            ),
+        }
+        response = client.post(
+            "/api/upload/class-records",
+            files=files,
+            data={"datasetIntent": "synthetic_student_records"},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["datasetIntent"] == "synthetic_student_records"
+        assert isinstance(payload.get("columnInterpretations"), list)
+        summary = payload.get("interpretationSummary") or {}
+        assert summary.get("storageOnlyColumns", 0) >= 1
+        assert summary.get("domainMismatchWarnings", 0) >= 1
+        class_metadata = payload.get("classMetadata") or {}
+        assert class_metadata.get("classSectionId")
+        assert class_metadata.get("className")
+        assert class_metadata.get("grade")
+        assert class_metadata.get("section")
+        assert class_metadata.get("gradeLevel")
+        assert class_metadata.get("classification")
+        patient_column = next(
+            (item for item in payload["columnInterpretations"] if item.get("columnName") == "patient_diagnosis"),
+            None,
+        )
+        assert patient_column is not None
+        assert patient_column["usagePolicy"] == "storage_only"
+        assert patient_column["confidenceBand"] == "low"
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_upload_class_records_accepts_minimal_teacher_schema(self, _mock_chat):
+        files = {
+            "files": (
+                "records.csv",
+                (
+                    b"name,lrn,avgQuizScore,attendance,engagementScore\n"
+                    b"Ana Cruz,1001,81,92,88\n"
+                    b"Ben Dela,1002,58,70,52\n"
+                ),
+                "text/csv",
+            ),
+        }
+        response = client.post(
+            "/api/upload/class-records",
+            files=files,
+            data={"datasetIntent": "synthetic_student_records"},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["interpretedRows"] == 2
+        assert payload["rejectedRows"] == 0
+        assert payload["inferredStateCoverage"]["inferredRows"] == 2
+        assert payload["inferredStateCoverage"]["coveragePct"] == 100.0
+        assert all("inferredState" in row for row in payload["students"])
+        class_metadata = payload.get("classMetadata") or {}
+        assert class_metadata.get("classSectionId")
+        assert class_metadata.get("className")
+        assert class_metadata.get("grade") == "Grade 11"
+        assert class_metadata.get("section") == "Section A"
+        assert class_metadata.get("gradeLevel") == "Grade 11"
+        assert class_metadata.get("classification") == "Senior High School"
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_upload_class_records_reports_explicit_row_rejections(self, _mock_chat):
+        files = {
+            "files": (
+                "records.csv",
+                (
+                    b"name,lrn,email,avgQuizScore,attendance,engagementScore\n"
+                    b",1001,ana@example.com,81,92,88\n"
+                    b"Ben Dela,,,58,70,52\n"
+                    b"Cara Lim,1003,,77,83,75\n"
+                ),
+                "text/csv",
+            ),
+        }
+        response = client.post(
+            "/api/upload/class-records",
+            files=files,
+            data={"datasetIntent": "synthetic_student_records"},
+        )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["interpretedRows"] == 1
+        assert payload["rejectedRows"] == 2
+        reasons = payload.get("rejectedReasons") or {}
+        assert any("missing required field: name" in key for key in reasons.keys())
+        assert any("missing required identity value: lrn_or_email" in key for key in reasons.keys())
+        assert len(payload.get("rejectedRowDetails") or []) == 2
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_upload_class_records_degrades_gracefully_when_firestore_adc_missing(self, _mock_chat):
+        class _FailingFirestoreModule:
+            def client(self):
+                raise Exception(
+                    "Your default credentials were not found. "
+                    "To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc"
+                )
+        files = {
+            "files": (
+                "records.csv",
+                (
+                    b"name,lrn,avgQuizScore,attendance,engagementScore\n"
+                    b"Ana Cruz,1001,81,92,88\n"
+                ),
+                "text/csv",
+            ),
+        }
+        with patch.object(main_module, "firebase_firestore", _FailingFirestoreModule()), patch.object(main_module, "_firebase_ready", True):
+            response = client.post(
+                "/api/upload/class-records",
+                files=files,
+                data={"datasetIntent": "synthetic_student_records"},
+            )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["persisted"] is False
+        assert (payload.get("dashboardSync") or {}).get("synced") is False
+        warnings_blob = " ".join(payload.get("warnings", []))
+        assert "adc is not configured" in warnings_blob.lower()
+class TestImportedOverviewAndTopicMastery:
+    def test_imported_class_overview_returns_inferred_state_for_realistic_minimal_records(self):
+        firestore = _FakeFirestoreModule(
+            {
+                "normalizedClassRecords": [
+                    {
+                        "teacherId": "test-teacher-uid",
+                        "name": "Ana Cruz",
+                        "lrn": "1001",
+                        "classSectionId": "grade11_a",
+                        "className": "Grade 11 - A",
+                        "avgQuizScore": 92,
+                        "attendance": 96,
+                        "engagementScore": 91,
+                        "unknownFields": {},
+                    },
+                    {
+                        "teacherId": "test-teacher-uid",
+                        "name": "Ben Dela",
+                        "lrn": "1002",
+                        "classSectionId": "grade11_a",
+                        "className": "Grade 11 - A",
+                        "avgQuizScore": 68,
+                        "attendance": 82,
+                        "engagementScore": 66,
+                        "unknownFields": {},
+                    },
+                    {
+                        "teacherId": "test-teacher-uid",
+                        "name": "Cara Lim",
+                        "lrn": "1003",
+                        "classSectionId": "grade11_a",
+                        "className": "Grade 11 - A",
+                        "avgQuizScore": 49,
+                        "attendance": 71,
+                        "engagementScore": 50,
+                        "unknownFields": {},
+                    },
+                ]
+            }
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
+            response = client.get("/api/analytics/imported-class-overview?classSectionId=grade11_a&limit=100")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert len(payload["students"]) == 3
+        coverage = payload.get("inferredStateCoverage") or {}
+        assert coverage.get("inferredRows") == 3
+        assert coverage.get("coveragePct") == 100.0
+        risk_levels = {student["riskLevel"] for student in payload["students"]}
+        assert risk_levels == {"Low", "Medium", "High"}
+        assert all(student.get("inferredState") for student in payload["students"])
+        assert all("stateConfidence" in student for student in payload["students"])
+        assert all(student.get("classMetadata") for student in payload["students"])
+        assert all(student.get("classMetadata", {}).get("classSectionId") == "grade11_a" for student in payload["students"])
+        assert all(student.get("classMetadata", {}).get("gradeLevel") for student in payload["students"])
+        assert all(student.get("classMetadata", {}).get("classification") for student in payload["students"])
+        assert all(classroom.get("classMetadata") for classroom in payload["classrooms"])
+        assert all(classroom.get("classMetadata", {}).get("classSectionId") == "grade11_a" for classroom in payload["classrooms"])
+        assert all(classroom.get("classMetadata", {}).get("gradeLevel") for classroom in payload["classrooms"])
+        assert all(classroom.get("classMetadata", {}).get("classification") for classroom in payload["classrooms"])
+    def test_imported_class_overview_returns_503_when_firestore_adc_missing(self):
+        firestore = _FakeFirestoreModule(
+            {"normalizedClassRecords": []},
+            stream_error=(
+                "Your default credentials were not found. "
+                "To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc"
+            ),
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
+            response = client.get("/api/analytics/imported-class-overview?classSectionId=grade11_a&limit=100")
+        assert response.status_code == 503
+        detail = str((response.json() or {}).get("detail") or "").lower()
+        assert "firestore adc is not configured" in detail
+        assert "google_application_credentials" in detail
+    def test_topic_mastery_reports_fallback_warning_without_topic_columns(self):
+        firestore = _FakeFirestoreModule(
+            {
+                "normalizedClassRecords": [
+                    {
+                        "teacherId": "test-teacher-uid",
+                        "name": "Ana Cruz",
+                        "lrn": "1001",
+                        "classSectionId": "grade11_a",
+                        "className": "Grade 11 - A",
+                        "avgQuizScore": 84,
+                        "attendance": 92,
+                        "engagementScore": 88,
+                        "assessmentName": "general-assessment",
+                        "unknownFields": {},
+                    }
+                ],
+                "courseMaterials": [],
+            }
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
+            response = client.get("/api/analytics/topic-mastery?teacherId=test-teacher-uid&classSectionId=grade11_a")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["summary"]["totalTopicsTracked"] >= 1
+        assert payload["summary"].get("fallbackTopicRows") == 1
+        assert any("fallback topic context" in warning.lower() for warning in payload.get("warnings") or [])
+class TestAsyncGenerationTasks:
+    @patch("main.asyncio.create_task")
+    def test_quiz_generate_async_submit_status_list_cancel(self, mock_create_task):
+        main_module._async_tasks.clear()
+        mock_create_task.side_effect = lambda coro: coro.close()
+        response = client.post("/api/quiz/generate-async", json={
+            "topics": ["Algebra"],
+            "gradeLevel": "Grade 11",
+            "numQuestions": 1,
+        })
+        assert response.status_code == 200
+        payload = response.json()
+        task_id = payload["taskId"]
+        assert payload["status"] == "queued"
+        assert mock_create_task.called
+        status_response = client.get(f"/api/tasks/{task_id}")
+        assert status_response.status_code == 200
+        status_payload = status_response.json()
+        assert status_payload["taskId"] == task_id
+        assert status_payload["status"] in {"queued", "running", "cancelling", "cancelled", "completed", "failed"}
+        list_response = client.get("/api/tasks?limit=20")
+        assert list_response.status_code == 200
+        list_payload = list_response.json()
+        assert list_payload["count"] >= 1
+        assert any(item["taskId"] == task_id for item in list_payload["tasks"])
+        cancel_response = client.post(f"/api/tasks/{task_id}/cancel")
+        assert cancel_response.status_code == 200
+        cancel_payload = cancel_response.json()
+        assert cancel_payload["taskId"] == task_id
+        assert cancel_payload["status"] in {"cancelled", "cancelling"}
+    def test_inference_metrics_requires_admin(self):
+        response = client.get("/api/ops/inference-metrics")
+        assert response.status_code == 403
+    @patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+        "uid": "admin-uid",
+        "email": "admin@example.com",
+        "role": "admin",
+    })
+    def test_inference_metrics_admin_success(self, _mock_verify):
+        response = client.get("/api/ops/inference-metrics")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert "metrics" in payload
+        assert "requests_total" in payload["metrics"]
+# ─── Calculator ────────────────────────────────────────────────
+class TestCalculator:
+    def test_evaluate_simple_expression(self):
+        response = client.post("/api/calculator/evaluate", json={
+            "expression": "2 + 3",
+        })
+        # sympy may not be installed in test env — accept 200 or 500
+        assert response.status_code in (200, 500)
+        if response.status_code == 200:
+            data = response.json()
+            assert data["result"] == "5"
+    def test_evaluate_with_variables(self):
+        response = client.post("/api/calculator/evaluate", json={
+            "expression": "x**2 + 2*x + 1",
+        })
+        # Accept 200 (sympy available) or 500 (sympy missing)
+        assert response.status_code in (200, 500)
+    def test_evaluate_dangerous_expression(self):
+        response = client.post("/api/calculator/evaluate", json={
+            "expression": "__import__('os').system('rm -rf /')",
+        })
+        # 400 if validation catches it, 500 if sympy missing or general error
+        assert response.status_code in (400, 500)
+    def test_evaluate_empty_expression(self):
+        response = client.post("/api/calculator/evaluate", json={
+            "expression": "",
+        })
+        assert response.status_code == 422
+    def test_evaluate_too_long_expression(self):
+        response = client.post("/api/calculator/evaluate", json={
+            "expression": "x + " * 200,
+        })
+        # 400 if length validation, 422 if pydantic validation, 500 if sympy missing
+        assert response.status_code in (400, 422, 500)
+# ─── Error Handling ────────────────────────────────────────────
+class TestErrorHandling:
+    def test_404_for_unknown_endpoint(self):
+        response = client.get("/api/nonexistent")
+        assert response.status_code == 404
+    def test_method_not_allowed(self):
+        response = client.get("/api/chat")
+        assert response.status_code == 405
+    def test_request_id_in_error_response(self):
+        response = client.get("/api/nonexistent")
+        assert "x-request-id" in response.headers
+    def test_invalid_json_body(self):
+        response = client.post(
+            "/api/chat",
+            content="this is not json",
+            headers={"Content-Type": "application/json"},
+        )
+        assert response.status_code == 422
+# ─── Student Competency ───────────────────────────────────────
+class TestStudentCompetency:
+    @patch("main.call_hf_chat")
+    def test_competency_no_history(self, mock_chat):
+        mock_chat.return_value = ""
+        response = client.post("/api/quiz/student-competency", json={
+            "studentId": "student123",
+            "quizHistory": [],
+        })
+        assert response.status_code == 200
+        data = response.json()
+        assert data["studentId"] == "student123"
+        assert data["competencies"] == []
+    @patch("main.call_hf_chat")
+    def test_competency_with_history(self, mock_chat):
+        mock_chat.return_value = "Good progress overall."
+        response = client.post("/api/quiz/student-competency", json={
+            "studentId": "student123",
+            "quizHistory": [
+                {"topic": "Algebra", "score": 8, "total": 10, "timeTaken": 300},
+                {"topic": "Algebra", "score": 9, "total": 10, "timeTaken": 250},
+                {"topic": "Geometry", "score": 4, "total": 10, "timeTaken": 500},
+            ],
+        })
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["competencies"]) > 0
+        # Algebra should be higher competency than Geometry
+        algebra = next((c for c in data["competencies"] if c["topic"] == "Algebra"), None)
+        geometry = next((c for c in data["competencies"] if c["topic"] == "Geometry"), None)
+        if algebra and geometry:
+            assert algebra["efficiencyScore"] > geometry["efficiencyScore"]
+# ─── Course Materials Recent Retrieval ───────────────────────
+class _FakeDocSnapshot:
+    def __init__(self, doc_id: str, data: Dict[str, Any]):
+        self.id = doc_id
+        self._data = data
+    def to_dict(self) -> Dict[str, Any]:
+        return self._data
+class _FakeQuery:
+    def __init__(self, docs: List[Dict[str, Any]], fail_order: bool = False, stream_error: str | None = None):
+        self._docs = docs
+        self._filters: List[tuple[str, str, Any]] = []
+        self._limit: int | None = None
+        self._fail_order = fail_order
+        self._stream_error = stream_error
+    def where(self, field: str, op: str, value: Any):
+        self._filters.append((field, op, value))
+        return self
+    def order_by(self, *args, **kwargs):
+        if self._fail_order:
+            raise Exception("missing composite index")
+        return self
+    def limit(self, value: int):
+        self._limit = value
+        return self
+    def stream(self):
+        if self._stream_error:
+            raise Exception(self._stream_error)
+        filtered: List[Dict[str, Any]] = []
+        for doc in self._docs:
+            include = True
+            for field, op, expected in self._filters:
+                if op != "==":
+                    continue
+                if doc.get(field) != expected:
+                    include = False
+                    break
+            if include:
+                filtered.append(doc)
+        if self._limit is not None:
+            filtered = filtered[: self._limit]
+        return [_FakeDocSnapshot(str(doc.get("materialId") or "doc"), doc) for doc in filtered]
+class _FakeCollection:
+    def __init__(
+        self,
+        name: str,
+        store: Dict[str, List[Dict[str, Any]]],
+        audit_logs: List[Dict[str, Any]],
+        fail_order: bool = False,
+        stream_error: str | None = None,
+    ):
+        self._name = name
+        self._store = store
+        self._audit_logs = audit_logs
+        self._fail_order = fail_order
+        self._stream_error = stream_error
+    def where(self, field: str, op: str, value: Any):
+        docs = list(self._store.get(self._name, []))
+        query = _FakeQuery(docs, fail_order=self._fail_order, stream_error=self._stream_error)
+        return query.where(field, op, value)
+    def add(self, payload: Dict[str, Any]):
+        self._audit_logs.append(payload)
+        return (None, None)
+class _FakeFirestoreClient:
+    def __init__(self, store: Dict[str, List[Dict[str, Any]]], fail_order: bool = False, stream_error: str | None = None):
+        self._store = store
+        self.audit_logs: List[Dict[str, Any]] = []
+        self._fail_order = fail_order
+        self._stream_error = stream_error
+    def collection(self, name: str):
+        return _FakeCollection(
+            name,
+            self._store,
+            self.audit_logs,
+            fail_order=self._fail_order,
+            stream_error=self._stream_error,
+        )
+class _FakeFirestoreModule:
+    class Query:
+        DESCENDING = "DESCENDING"
+    SERVER_TIMESTAMP = object()
+    def __init__(
+        self,
+        store: Dict[str, List[Dict[str, Any]]],
+        fail_order: bool = False,
+        stream_error: str | None = None,
+    ):
+        self._client = _FakeFirestoreClient(store, fail_order=fail_order, stream_error=stream_error)
+    def client(self):
+        return self._client
+class TestRecentCourseMaterials:
+    def test_recent_course_materials_respects_class_section_filter(self):
+        now = int(time.time())
+        firestore = _FakeFirestoreModule(
+            {
+                "courseMaterials": [
+                    {
+                        "materialId": "mat-a",
+                        "teacherId": "test-teacher-uid",
+                        "fileName": "algebra-a.pdf",
+                        "fileType": "pdf",
+                        "classSectionId": "grade11_a",
+                        "topics": [{"title": "Linear Equations"}],
+                        "extractedTextLength": 1200,
+                        "retentionDays": 180,
+                        "expiresAtEpoch": now + 3600,
+                    },
+                    {
+                        "materialId": "mat-b",
+                        "teacherId": "test-teacher-uid",
+                        "fileName": "algebra-b.pdf",
+                        "fileType": "pdf",
+                        "classSectionId": "grade11_b",
+                        "topics": [{"title": "Quadratics"}],
+                        "extractedTextLength": 1600,
+                        "retentionDays": 180,
+                        "expiresAtEpoch": now + 3600,
+                    },
+                ]
+            }
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
+            response = client.get("/api/upload/course-materials/recent?classSectionId=grade11_a&limit=10")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        assert data["classSectionId"] == "grade11_a"
+        assert len(data["materials"]) == 1
+        assert data["materials"][0]["materialId"] == "mat-a"
+        assert all(item["classSectionId"] == "grade11_a" for item in data["materials"])
+    def test_recent_course_materials_reports_retention_exclusions(self):
+        now = int(time.time())
+        firestore = _FakeFirestoreModule(
+            {
+                "courseMaterials": [
+                    {
+                        "materialId": "mat-valid",
+                        "teacherId": "test-teacher-uid",
+                        "fileName": "active.txt",
+                        "fileType": "txt",
+                        "classSectionId": "grade11_a",
+                        "topics": [{"title": "Functions"}],
+                        "extractedTextLength": 900,
+                        "retentionDays": 180,
+                        "expiresAtEpoch": now + 7200,
+                    },
+                    {
+                        "materialId": "mat-expired",
+                        "teacherId": "test-teacher-uid",
+                        "fileName": "expired.txt",
+                        "fileType": "txt",
+                        "classSectionId": "grade11_a",
+                        "topics": [{"title": "Inequalities"}],
+                        "extractedTextLength": 700,
+                        "retentionDays": 30,
+                        "expiresAtEpoch": now - 60,
+                    },
+                ]
+            },
+            fail_order=True,
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
+            response = client.get("/api/upload/course-materials/recent?classSectionId=grade11_a&limit=10")
+        assert response.status_code == 200
+        data = response.json()
+        assert len(data["materials"]) == 1
+        assert data["materials"][0]["materialId"] == "mat-valid"
+        warning_text = " ".join(data.get("warnings", []))
+        assert "expired course-material artifact" in warning_text.lower()
+        assert "fallback query path" in warning_text.lower()
+# ─── Student Account Provisioning ───────────────────────────
+class _ProvisionDocSnapshot:
+    def __init__(self, doc_id: str, data: Dict[str, Any] | None):
+        self.id = doc_id
+        self._data = data
+    @property
+    def exists(self) -> bool:
+        return self._data is not None
+    def to_dict(self) -> Dict[str, Any]:
+        return dict(self._data or {})
+class _ProvisionDocumentRef:
+    def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]], collection_name: str, doc_id: str):
+        self._store = store
+        self._collection_name = collection_name
+        self._doc_id = doc_id
+    def get(self):
+        data = self._store.get(self._collection_name, {}).get(self._doc_id)
+        return _ProvisionDocSnapshot(self._doc_id, data)
+    def set(self, payload: Dict[str, Any], merge: bool = False):
+        collection = self._store.setdefault(self._collection_name, {})
+        existing = dict(collection.get(self._doc_id, {})) if merge else {}
+        existing.update(payload)
+        collection[self._doc_id] = existing
+    def delete(self):
+        collection = self._store.setdefault(self._collection_name, {})
+        collection.pop(self._doc_id, None)
+class _ProvisionQuery:
+    def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]], collection_name: str):
+        self._store = store
+        self._collection_name = collection_name
+        self._filters: List[tuple[str, str, Any]] = []
+        self._limit: int | None = None
+    def where(self, field: str, op: str, value: Any):
+        self._filters.append((field, op, value))
+        return self
+    def limit(self, value: int):
+        self._limit = value
+        return self
+    def stream(self):
+        collection = self._store.get(self._collection_name, {})
+        docs: List[_ProvisionDocSnapshot] = []
+        for doc_id, data in collection.items():
+            include = True
+            for field, op, expected in self._filters:
+                if op != "==":
+                    continue
+                if data.get(field) != expected:
+                    include = False
+                    break
+            if include:
+                docs.append(_ProvisionDocSnapshot(doc_id, data))
+        if self._limit is not None:
+            docs = docs[: self._limit]
+        return docs
+class _ProvisionCollectionRef:
+    def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]], collection_name: str):
+        self._store = store
+        self._collection_name = collection_name
+    def where(self, field: str, op: str, value: Any):
+        return _ProvisionQuery(self._store, self._collection_name).where(field, op, value)
+    def limit(self, value: int):
+        return _ProvisionQuery(self._store, self._collection_name).limit(value)
+    def stream(self):
+        collection = self._store.get(self._collection_name, {})
+        return [_ProvisionDocSnapshot(doc_id, data) for doc_id, data in collection.items()]
+    def document(self, doc_id: str):
+        return _ProvisionDocumentRef(self._store, self._collection_name, doc_id)
+    def add(self, payload: Dict[str, Any]):
+        collection = self._store.setdefault(self._collection_name, {})
+        doc_id = f"auto-{len(collection) + 1}"
+        collection[doc_id] = dict(payload)
+        return (None, None)
+class _ProvisionFirestoreClient:
+    def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]]):
+        self.store = store
+    def collection(self, name: str):
+        return _ProvisionCollectionRef(self.store, name)
+class _ProvisionFirestoreModule:
+    class Query:
+        DESCENDING = "DESCENDING"
+    SERVER_TIMESTAMP = object()
+    def __init__(self, seed: Dict[str, Dict[str, Dict[str, Any]]] | None = None):
+        self._client = _ProvisionFirestoreClient(seed or {})
+    def client(self):
+        return self._client
+class TestStudentAccountProvisioningImport:
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_preview_student_account_import_returns_validation_summary(self, _mock_chat):
+        firestore = _ProvisionFirestoreModule(
+            {
+                "users": {
+                    "existing-student": {
+                        "email": "existing@student.com",
+                        "lrn": "1002",
+                        "role": "student",
+                    }
+                }
+            }
+        )
+        def _lookup_user(email: str):
+            if email == "existing@student.com":
+                return type("AuthUser", (), {"uid": "auth-existing"})()
+            raise Exception("user not found")
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=_lookup_user):
+            response = client.post(
+                "/api/import/student-accounts/preview",
+                files={
+                    "file": (
+                        "accounts.csv",
+                        (
+                            b"First Name,Last Name,Student ID,Email,Grade,Section\n"
+                            b"Ana,Cruz,1001,ana@student.com,Grade 11,STEM-A\n"
+                            b"Ben,Dela,1002,existing@student.com,Grade 11,STEM-A\n"
+                            b",Lim,1003,cara@student.com,Grade 11,STEM-A\n"
+                        ),
+                        "text/csv",
+                    )
+                },
+            )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload.get("previewToken")
+        assert payload["summary"]["totalRows"] == 3
+        assert payload["summary"]["validRows"] == 1
+        assert payload["summary"]["duplicateRows"] >= 1
+        assert payload["summary"]["invalidRows"] >= 1
+    @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
+    def test_commit_student_account_import_provisions_profiles(self, _mock_chat):
+        firestore = _ProvisionFirestoreModule({"users": {}, "managedStudents": {}, "classSectionOwnership": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "auth-created-1"})()):
+            preview_response = client.post(
+                "/api/import/student-accounts/preview",
+                files={
+                    "file": (
+                        "accounts.csv",
+                        b"First Name,Last Name,Student ID,Email,Grade,Section\nAna,Cruz,1001,ana@student.com,Grade 11,STEM-A\n",
+                        "text/csv",
+                    )
+                },
+            )
+            assert preview_response.status_code == 200
+            preview_payload = preview_response.json()
+            assert preview_payload["summary"]["validRows"] == 1
+            commit_response = client.post(
+                "/api/import/student-accounts/commit",
+                json={
+                    "previewToken": preview_payload["previewToken"],
+                    "forcePasswordChange": True,
+                    "createAuthUsers": True,
+                },
+            )
+        assert commit_response.status_code == 200
+        commit_payload = commit_response.json()
+        assert commit_payload["summary"]["createdRows"] == 1
+        assert commit_payload["summary"]["failedRows"] == 0
+        assert len(commit_payload["rows"]) == 1
+        assert commit_payload["rows"][0]["status"] in {"created", "updated"}
+        assert commit_payload["rows"][0]["uid"]
+        users_store = firestore.client().store.get("users", {})
+        assert len(users_store) == 1
+        provisioned_profile = next(iter(users_store.values()))
+        assert provisioned_profile.get("role") == "student"
+        assert provisioned_profile.get("forcePasswordChange") is True
+class _FakeEmailServiceSuccess:
+    def send_transactional_email(self, _message):
+        return EmailSendResult(success=True, provider="test_email", message_id="msg-1")
+class _FakeEmailServiceFailure:
+    def send_transactional_email(self, _message):
+        return EmailSendResult(
+            success=False,
+            provider="test_email",
+            error_code="provider_down",
+            error_message="Provider unreachable",
+            retryable=True,
+        )
+class TestAdminCreateUserEndpoint:
+    def test_create_admin_user_returns_success_when_email_delivered(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "new-user-uid"})()), patch.object(main_module, "create_email_service_from_env", return_value=_FakeEmailServiceSuccess()):
+            response = client.post(
+                "/api/admin/users",
+                json={
+                    "name": "Ana & José/Lee",
+                    "email": "ana@student.com",
+                    "password": "StrongPass1!",
+                    "confirmPassword": "StrongPass1!",
+                    "role": "Student",
+                    "status": "Active",
+                    "grade": "Grade 11",
+                    "section": "STEM A",
+                    "lrn": "123456789012",
+                },
+            )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["userCreated"] is True
+        assert payload["emailSent"] is True
+        assert payload["resultCode"] == "created_and_emailed"
+        assert payload["uid"] == "new-user-uid"
+        users_store = firestore.client().store.get("users", {})
+        assert "new-user-uid" in users_store
+        assert users_store["new-user-uid"].get("role") == "student"
+        assert "Ana+%26+Jos%C3%A9%2FLee" in users_store["new-user-uid"].get("photo", "")
+    def test_create_admin_user_returns_partial_success_when_email_fails(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "new-user-uid-2"})()), patch.object(main_module, "create_email_service_from_env", return_value=_FakeEmailServiceFailure()):
+            response = client.post(
+                "/api/admin/users",
+                json={
+                    "name": "Ben Dela",
+                    "email": "ben@student.com",
+                    "password": "StrongPass1!",
+                    "confirmPassword": "StrongPass1!",
+                    "role": "Student",
+                    "status": "Active",
+                    "grade": "Grade 11",
+                    "section": "STEM B",
+                    "lrn": "123456789013",
+                },
+            )
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["userCreated"] is True
+        assert payload["emailSent"] is False
+        assert payload["resultCode"] == "created_email_failed"
+        assert payload["uid"] == "new-user-uid-2"
+        assert isinstance(payload.get("warnings"), list)
+        assert payload.get("emailError", {}).get("code") == "provider_down"
+    def test_create_admin_user_rejects_password_without_special_character(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }):
+            response = client.post(
+                "/api/admin/users",
+                json={
+                    "name": "Cara Diaz",
+                    "email": "cara@student.com",
+                    "password": "StrongPass1",
+                    "confirmPassword": "StrongPass1",
+                    "role": "Student",
+                    "status": "Active",
+                    "grade": "Grade 11",
+                    "section": "STEM C",
+                    "lrn": "123456789014",
+                },
+            )
+        assert response.status_code == 400
+        payload = response.json()
+        assert "special character" in payload["detail"].lower()
+    def test_create_admin_user_rolls_back_auth_user_when_firestore_write_fails(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        delete_user_mock = MagicMock()
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "new-user-uid-3"})()), patch.object(main_module.firebase_auth, "delete_user", delete_user_mock), patch.object(_ProvisionDocumentRef, "set", side_effect=Exception("firestore unavailable")):
+            response = client.post(
+                "/api/admin/users",
+                json={
+                    "name": "Dana Flores",
+                    "email": "dana@student.com",
+                    "password": "StrongPass1!",
+                    "confirmPassword": "StrongPass1!",
+                    "role": "Student",
+                    "status": "Active",
+                    "grade": "Grade 11",
+                    "section": "STEM A",
+                    "lrn": "123456789015",
+                },
+            )
+        assert response.status_code == 500
+        payload = response.json()
+        assert "firestore" in payload["detail"].lower()
+        delete_user_mock.assert_called_once_with("new-user-uid-3")
+class TestAdminListUsersEndpoint:
+    def test_get_admin_users_returns_paginated_results(self):
+        firestore = _ProvisionFirestoreModule(
+            {
+                "users": {
+                    "student-a": {
+                        "name": "Alice Student",
+                        "email": "alice@student.com",
+                        "role": "student",
+                        "status": "Active",
+                        "grade": "Grade 11",
+                        "section": "STEM A",
+                        "lrn": "100000000001",
+                        "createdAt": 1710000000,
+                    },
+                    "student-b": {
+                        "name": "Ben Student",
+                        "email": "ben@student.com",
+                        "role": "student",
+                        "status": "Active",
+                        "grade": "Grade 11",
+                        "section": "STEM B",
+                        "lrn": "100000000002",
+                        "createdAt": 1710000100,
+                    },
+                    "teacher-a": {
+                        "name": "Tina Teacher",
+                        "email": "tina@school.com",
+                        "role": "teacher",
+                        "status": "Active",
+                        "department": "Mathematics",
+                        "createdAt": 1710000200,
+                    },
+                },
+                "accessAuditLogs": {},
+            }
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }):
+            response = client.get("/api/admin/users?page=1&pageSize=1&role=student")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["page"] == 1
+        assert payload["pageSize"] == 1
+        assert len(payload["users"]) == 1
+        assert payload["users"][0]["role"] == "Student"
+        assert payload["hasMore"] is True
+    def test_get_admin_users_rejects_invalid_role_filter(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }):
+            response = client.get("/api/admin/users?role=guest")
+        assert response.status_code == 400
+        assert "role must be one of" in response.json()["detail"].lower()
+    def test_get_admin_users_rejects_non_admin_role(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "teacher-uid",
+            "email": "teacher@example.com",
+            "role": "teacher",
+        }):
+            response = client.get("/api/admin/users?page=1&pageSize=25")
+        assert response.status_code == 403
+        assert "forbidden" in response.json()["detail"].lower()
+class TestAdminDeleteUserEndpoint:
+    def test_delete_admin_user_removes_auth_and_profile(self):
+        firestore = _ProvisionFirestoreModule(
+            {
+                "users": {
+                    "target-uid": {
+                        "email": "target@student.com",
+                        "role": "student",
+                    }
+                },
+                "accessAuditLogs": {},
+            }
+        )
+        delete_user_mock = MagicMock()
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }), patch.object(main_module.firebase_auth, "delete_user", delete_user_mock):
+            response = client.delete("/api/admin/users?uid=target-uid")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["uid"] == "target-uid"
+        assert payload["authDeleted"] is True
+        assert payload["profileDeleted"] is True
+        delete_user_mock.assert_called_once_with("target-uid")
+        assert "target-uid" not in firestore.client().store.get("users", {})
+    def test_delete_admin_user_handles_missing_auth_record(self):
+        firestore = _ProvisionFirestoreModule(
+            {
+                "users": {
+                    "target-uid-2": {
+                        "email": "missing-auth@student.com",
+                        "role": "student",
+                    }
+                },
+                "accessAuditLogs": {},
+            }
+        )
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }), patch.object(main_module.firebase_auth, "delete_user", side_effect=Exception("No user record found for the provided uid")):
+            response = client.delete("/api/admin/users?uid=target-uid-2")
+        assert response.status_code == 200
+        payload = response.json()
+        assert payload["success"] is True
+        assert payload["uid"] == "target-uid-2"
+        assert payload["authDeleted"] is False
+        assert payload["profileDeleted"] is True
+        assert any("already missing" in warning.lower() for warning in payload.get("warnings", []))
+        assert "target-uid-2" not in firestore.client().store.get("users", {})
+    def test_delete_admin_user_rejects_self_delete(self):
+        firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
+        with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
+            "uid": "admin-uid",
+            "email": "admin@example.com",
+            "role": "admin",
+        }):
+            response = client.delete("/api/admin/users?uid=admin-uid")
+        assert response.status_code == 400
+        assert "cannot delete their own account" in response.json()["detail"].lower()
+# ─── Run ───────────────────────────────────────────────────────
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

tests/test_email_service.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import base64
+import json
+import os
+import sys
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from services.email_service import (  # noqa: E402
+    BrevoApiEmailProvider,
+    EmailMessagePayload,
+    create_email_service_from_env,
+)
+_EMAIL_ENV_KEYS = [
+    "BREVO_API_KEY",
+    "BREVO_API_TOKEN",
+    "BREVO_MCP_TOKEN",
+    "BREVO_SMTP_LOGIN",
+    "BREVO_SMTP_USERNAME",
+    "BREVO_SMTP_USER",
+    "BREVO_SMTP_KEY",
+    "BREVO_SMTP_PASSWORD",
+    "BREVO_SMTP_PASS",
+    "BREVO_SMTP_HOST",
+    "BREVO_SMTP_PORT",
+    "MAIL_FROM_ADDRESS",
+    "MAIL_FROM",
+    "BREVO_FROM_ADDRESS",
+    "MAIL_FROM_NAME",
+    "BREVO_FROM_NAME",
+    "MAIL_SEND_TIMEOUT_SEC",
+]
+def _clear_email_env(monkeypatch) -> None:
+    for key in _EMAIL_ENV_KEYS:
+        monkeypatch.delenv(key, raising=False)
+def _encode_mcp_payload(payload: dict) -> str:
+    encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8")
+    return encoded.rstrip("=")
+def test_create_email_service_uses_mcp_token_when_api_key_missing(monkeypatch) -> None:
+    _clear_email_env(monkeypatch)
+    monkeypatch.setenv("BREVO_MCP_TOKEN", _encode_mcp_payload({"api_key": "xkeysib-test-from-mcp"}))
+    service = create_email_service_from_env()
+    assert isinstance(service._primary_provider, BrevoApiEmailProvider)
+    assert service._fallback_provider is None
+def test_create_email_service_prefers_direct_api_key_when_present(monkeypatch) -> None:
+    _clear_email_env(monkeypatch)
+    monkeypatch.setenv("BREVO_API_KEY", "xkeysib-direct")
+    monkeypatch.setenv("BREVO_MCP_TOKEN", _encode_mcp_payload({"api_key": "xkeysib-from-mcp"}))
+    service = create_email_service_from_env()
+    assert isinstance(service._primary_provider, BrevoApiEmailProvider)
+    assert getattr(service._primary_provider, "_api_key") == "xkeysib-direct"
+def test_create_email_service_returns_not_configured_for_invalid_mcp_token(monkeypatch) -> None:
+    _clear_email_env(monkeypatch)
+    monkeypatch.setenv("BREVO_MCP_TOKEN", "not-a-valid-token")
+    service = create_email_service_from_env()
+    result = service.send_transactional_email(
+        EmailMessagePayload(
+            to_name="Test User",
+            to_email="test@example.com",
+            subject="subject",
+            html_content="<p>hello</p>",
+            text_content="hello",
+        )
+    )
+    assert result.success is False
+    assert result.provider == "none"
+    assert result.error_code == "email_not_configured"

tests/test_email_templates.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import sys
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from services.email_templates import (  # noqa: E402
+    WelcomeCredentialsEmailContext,
+    build_welcome_credentials_email,
+)
+def test_build_welcome_email_includes_brand_and_recipient_avatar_images() -> None:
+    result = build_welcome_credentials_email(
+        WelcomeCredentialsEmailContext(
+            recipient_name="Ana Cruz",
+            login_email="ana@student.com",
+            temporary_password="StrongPass1!",
+            role="Student",
+            login_url="https://mathpulse.ai/login",
+            brand_avatar_url="https://cdn.mathpulse.ai/assets/avatar_icon.png",
+            recipient_avatar_url="https://ui-avatars.com/api/?name=Ana+Cruz",
+        )
+    )
+    html_content = result["html"]
+    assert "MathPulse AI" in html_content
+    assert "Learning Platform Account Access" in html_content
+    assert "https://cdn.mathpulse.ai/assets/avatar_icon.png" in html_content
+    assert "https://ui-avatars.com/api/?name=Ana+Cruz" in html_content
+    assert "Temporary Password" in html_content
+def test_build_welcome_email_sanitizes_invalid_avatar_urls_and_falls_back() -> None:
+    result = build_welcome_credentials_email(
+        WelcomeCredentialsEmailContext(
+            recipient_name="Ben Dela",
+            login_email="ben@student.com",
+            temporary_password="StrongPass1!",
+            role="Student",
+            login_url="javascript:alert(1)",
+            brand_avatar_url="ftp://invalid-avatar",
+            recipient_avatar_url="data:text/html,unsafe",
+        )
+    )
+    html_content = result["html"]
+    assert "javascript:alert(1)" not in html_content
+    assert "ftp://invalid-avatar" not in html_content
+    assert "data:text/html,unsafe" not in html_content
+    assert "https://mathpulse.ai" in html_content
+    assert ">MP</div>" in html_content

tests/test_hf_monitoring_routes.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Route-level tests for /api/hf/monitoring endpoint.
+Updated for DeepSeek AI monitoring.
+"""
+import os
+from unittest.mock import MagicMock, Mock, patch
+import pytest
+from fastapi.testclient import TestClient
+import main as main_module
+from main import app
+main_module._firebase_ready = True
+main_module._init_firebase_admin = lambda: None
+main_module.firebase_firestore = None
+if getattr(main_module, "firebase_auth", None) is None:
+    main_module.firebase_auth = MagicMock()
+main_module.firebase_auth.verify_id_token = MagicMock(return_value={
+    "uid": "admin-uid",
+    "email": "admin@example.com",
+    "role": "admin",
+})
+admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
+EXPECTED_MONITORING_FIELDS = {
+    "modelId", "modelStatus", "avgResponseTimeMs",
+    "embeddingModelId", "embeddingModelStatus",
+    "inferenceBalance", "totalPeriodCost",
+    "hubApiCallsUsed", "hubApiCallsLimit",
+    "zeroGpuMinutesUsed", "zeroGpuMinutesLimit",
+    "publicStorageUsedTB", "publicStorageLimitTB",
+    "lastChecked", "periodStart", "periodEnd",
+    "activeProfile", "runtimeOverridesActive", "resolvedModels",
+    "provider", "apiBaseUrl",
+}
+EXPECTED_FIELDS_AFTER_DS_REPLACEMENT = EXPECTED_MONITORING_FIELDS
+@pytest.fixture(autouse=True)
+def _mock_env():
+    with patch.dict(os.environ, {"DEEPSEEK_API_KEY": "test-ds-monitoring-key"}):
+        yield
+# ─── Auth Enforcement ────────────────────────────────────────
+class TestMonitoringAuth:
+    def test_rejects_bad_token(self):
+        main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
+        c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
+        response = c.get("/api/hf/monitoring")
+        main_module.firebase_auth.verify_id_token = MagicMock(return_value={
+            "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
+        })
+        assert response.status_code in {401, 403}
+# ─── Response Shape ───────────────────────────────────────────
+class TestMonitoringResponseShape:
+    @patch("main.time.time")
+    def test_success_response_contains_all_expected_fields(self, mock_time):
+        mock_time.return_value = 1000.0
+        response = admin_client.get("/api/hf/monitoring")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+        payload = data["data"]
+        for field in EXPECTED_FIELDS_AFTER_DS_REPLACEMENT:
+            assert field in payload, f"Missing field: {field}"
+    @patch("main.time.time")
+    @patch("services.ai_client.get_deepseek_client")
+    def test_all_probes_fail_gracefully(self, mock_ds_client_fn, mock_time):
+        mock_time.return_value = 1000.0
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = Exception("network down")
+        mock_ds_client_fn.return_value = mock_client
+        response = admin_client.get("/api/hf/monitoring")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["success"] is True
+# ─── Response Values ──────────────────────────────────────────
+class TestMonitoringResponseValues:
+    @patch("services.ai_client.get_deepseek_client")
+    @patch("main.time.time")
+    def test_model_status_is_degraded_when_probe_fails(self, mock_time, mock_ds_client_fn):
+        mock_time.return_value = 1000.0
+        mock_client = MagicMock()
+        mock_client.chat.completions.create.side_effect = Exception("probe down")
+        mock_ds_client_fn.return_value = mock_client
+        response = admin_client.get("/api/hf/monitoring")
+        data = response.json()
+        assert data["success"] is True
+        assert data["data"]["modelStatus"] == "Degraded"
+    @patch("main.time.time")
+    def test_embedding_model_id_is_returned(self, mock_time):
+        mock_time.return_value = 1000.0
+        response = admin_client.get("/api/hf/monitoring")
+        data = response.json()
+        assert data["success"] is True
+        assert "bge-small" in data["data"]["embeddingModelId"].lower()
+    @patch("main.time.time")
+    def test_resolved_models_contains_task_keys(self, mock_time):
+        mock_time.return_value = 1000.0
+        response = admin_client.get("/api/hf/monitoring")
+        data = response.json()
+        resolved = data["data"].get("resolvedModels", {})
+        expected_tasks = {"chat", "rag_lesson", "rag_problem", "quiz_generation"}
+        for task in expected_tasks:
+            assert task in resolved, f"Missing task: {task}"
+            assert isinstance(resolved[task], str) and len(resolved[task]) > 0
+    @patch("main.time.time")
+    def test_active_profile_returned(self, mock_time):
+        mock_time.return_value = 1000.0
+        response = admin_client.get("/api/hf/monitoring")
+        data = response.json()
+        assert data["success"] is True
+        assert data["data"]["activeProfile"] in {"dev", "budget", "prod", ""}
+    @patch("main.time.time")
+    def test_provider_and_api_base_url_present(self, mock_time):
+        mock_time.return_value = 1000.0
+        response = admin_client.get("/api/hf/monitoring")
+        data = response.json()
+        assert data["success"] is True
+        assert data["data"]["provider"] == "deepseek"
+        assert "api.deepseek.com" in data["data"]["apiBaseUrl"]

tests/test_model_profiles.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from __future__ import annotations
+import os
+import sys
+from unittest.mock import patch
+import pytest
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+from services import inference_client as inf_client
+from services.inference_client import (
+    _MODEL_PROFILES,
+    get_current_runtime_config,
+    get_model_for_task,
+    is_sequential_model,
+    model_supports_thinking,
+    reset_runtime_overrides,
+    set_runtime_model_override,
+    set_runtime_model_profile,
+)
+REQUIRED_PROFILE_KEYS = {
+    "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
+    "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
+}
+class TestModelProfiles:
+    def test_profiles_have_all_keys(self):
+        for name, profile in _MODEL_PROFILES.items():
+            assert REQUIRED_PROFILE_KEYS == set(profile.keys()), \
+                f"Profile '{name}' missing or extra keys"
+    def test_dev_uses_chat_model(self):
+        dev = _MODEL_PROFILES["dev"]
+        for key, value in dev.items():
+            assert "deepseek-chat" in value, f"dev/{key} = {value}, expected deepseek-chat"
+    def test_prod_chat_is_chat_model(self):
+        assert "deepseek-chat" in _MODEL_PROFILES["prod"]["INFERENCE_CHAT_MODEL_ID"]
+    def test_prod_rag_is_reasoner(self):
+        assert "deepseek-reasoner" in _MODEL_PROFILES["prod"]["HF_RAG_MODEL_ID"]
+    def test_budget_uses_chat_model_everywhere(self):
+        budget = _MODEL_PROFILES["budget"]
+        for key, value in budget.items():
+            assert "deepseek-chat" in value, f"budget/{key} = {value}"
+class TestRuntimeOverrides:
+    def setup_method(self):
+        reset_runtime_overrides()
+    def teardown_method(self):
+        reset_runtime_overrides()
+    def test_set_profile_populates_overrides(self):
+        set_runtime_model_profile("dev")
+        assert inf_client._RUNTIME_PROFILE == "dev"
+        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_MODEL_ID"] == "deepseek-chat"
+        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_CHAT_MODEL_ID"] == "deepseek-chat"
+    def test_set_profile_replaces_all_overrides(self):
+        set_runtime_model_profile("dev")
+        set_runtime_model_profile("prod")
+        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_CHAT_MODEL_ID"] == "deepseek-chat"
+        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_LOCK_MODEL_ID"] == "deepseek-chat"
+    def test_set_profile_unknown_raises(self):
+        with pytest.raises(ValueError, match="Unknown profile"):
+            set_runtime_model_profile("nonexistent")
+    def test_single_override_sets_key(self):
+        set_runtime_model_override("HF_RAG_MODEL_ID", "custom/model")
+        assert inf_client._RUNTIME_OVERRIDES["HF_RAG_MODEL_ID"] == "custom/model"
+    def test_reset_clears_overrides(self):
+        set_runtime_model_profile("dev")
+        reset_runtime_overrides()
+        assert inf_client._RUNTIME_PROFILE == ""
+        assert inf_client._RUNTIME_OVERRIDES == {}
+    def test_override_layers_on_profile(self):
+        set_runtime_model_profile("dev")
+        set_runtime_model_override("HF_RAG_MODEL_ID", "custom/model")
+        assert inf_client._RUNTIME_OVERRIDES["HF_RAG_MODEL_ID"] == "custom/model"
+        assert inf_client._RUNTIME_OVERRIDES["INFERENCE_MODEL_ID"] == "deepseek-chat"
+class TestGetCurrentRuntimeConfig:
+    def setup_method(self):
+        reset_runtime_overrides()
+    def teardown_method(self):
+        reset_runtime_overrides()
+    def test_returns_resolved_dict_with_all_keys(self):
+        set_runtime_model_profile("dev")
+        config = get_current_runtime_config()
+        assert config["profile"] == "dev"
+        for key in REQUIRED_PROFILE_KEYS:
+            assert key in config["resolved"], f"Missing {key}"
+    def test_override_takes_priority_over_profile(self):
+        set_runtime_model_profile("dev")
+        set_runtime_model_override("INFERENCE_CHAT_MODEL_ID", "custom/chat")
+        config = get_current_runtime_config()
+        assert config["resolved"]["INFERENCE_CHAT_MODEL_ID"] == "custom/chat"
+class TestGetModelForTask:
+    def setup_method(self):
+        reset_runtime_overrides()
+    def teardown_method(self):
+        reset_runtime_overrides()
+    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
+    def test_returns_profile_default_for_rag(self):
+        set_runtime_model_profile("prod")
+        model = get_model_for_task("rag_lesson")
+        assert "deepseek-reasoner" in model
+    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
+    def test_returns_profile_default_for_chat(self):
+        set_runtime_model_profile("prod")
+        model = get_model_for_task("chat")
+        assert "deepseek-chat" in model
+    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
+    def test_returns_runtime_override_for_chat(self):
+        set_runtime_model_override("INFERENCE_CHAT_MODEL_ID", "custom/chat")
+        model = get_model_for_task("chat")
+        assert model == "custom/chat"
+    @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "true"})
+    def test_enforce_qwen_overrides_task(self):
+        set_runtime_model_profile("prod")
+        model = get_model_for_task("rag_lesson")
+        assert "deepseek-chat" in model
+class TestIsSequentialModel:
+    def setup_method(self):
+        reset_runtime_overrides()
+    def teardown_method(self):
+        reset_runtime_overrides()
+    def test_reasoner_is_sequential(self):
+        assert is_sequential_model("deepseek-reasoner") is True
+    def test_chat_is_not_sequential(self):
+        assert is_sequential_model("deepseek-chat") is False
+    def test_empty_string_checks_env(self):
+        result = is_sequential_model("")
+        assert result is True or result is False
+    @patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-reasoner"})
+    def test_env_model_reasoner_is_sequential(self):
+        assert is_sequential_model("") is True
+    @patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-chat"})
+    def test_env_model_chat_is_not_sequential(self):
+        assert is_sequential_model("") is False
+class TestModelSupportsThinking:
+    def test_reasoner_supports_thinking(self):
+        assert model_supports_thinking("deepseek-reasoner") is True
+    def test_chat_does_not_support_thinking(self):
+        assert model_supports_thinking("deepseek-chat") is False
+    def test_unknown_does_not_support_thinking(self):
+        assert model_supports_thinking("meta-llama/Llama-3.1-8B-Instruct") is False

tests/test_rag_pipeline.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from __future__ import annotations
+import os
+from unittest.mock import MagicMock, patch
+import pytest
+from rag.curriculum_rag import (
+    _distance_to_score,
+    build_lesson_prompt,
+    build_lesson_query,
+    retrieve_curriculum_context,
+    summarize_retrieval_confidence,
+)
+def _mock_vectorstore_components(collection_mock, embedder_mock):
+    def _factory():
+        return (MagicMock(), collection_mock, embedder_mock)
+    return _factory
+class TestRetrieveCurriculumContext:
+    def test_empty_collection_returns_empty_list(self):
+        collection = MagicMock()
+        collection_get_result = collection.get.return_value
+        collection_get_result.__getitem__.return_value = []
+        embedder = MagicMock()
+        with patch(
+            "rag.curriculum_rag.get_vectorstore_components",
+            return_value=(MagicMock(), collection, embedder),
+        ):
+            result = retrieve_curriculum_context(
+                query="test query",
+                subject="General Mathematics",
+                top_k=5,
+            )
+            assert result == []
+class TestDistanceToScore:
+    def test_zero_distance_returns_one(self):
+        assert _distance_to_score(0.0) == 1.0
+    def test_never_returns_zero_or_negative(self):
+        scores = [_distance_to_score(d) for d in [0.0, 0.5, 1.0, 5.0, 100.0]]
+        for s in scores:
+            assert s > 0.0
+            assert s <= 1.0
+class TestBuildLessonPrompt:
+    def test_contains_json_and_required_keys(self):
+        prompt = build_lesson_prompt(
+            lesson_title="Compound Interest",
+            competency="M11GM-IIc-1",
+            grade_level="Grade 11-12",
+            subject="General Mathematics",
+            quarter=3,
+            learner_level="mixed",
+            module_unit="Business Math",
+            curriculum_chunks=[
+                {
+                    "content": "Compound interest formula A=P(1+r/n)^(nt)",
+                    "source_file": "sample_curriculum.json",
+                    "page": 5,
+                    "content_domain": "Business Mathematics",
+                    "chunk_type": "content_explanation",
+                    "score": 0.85,
+                }
+            ],
+        )
+        assert "JSON" in prompt
+        assert "lessonTitle" in prompt
+        assert "needsReview" in prompt
+        ph_context_terms = [
+            "payroll", "VAT", "discounts", "loans", "Pag-IBIG", "school",
+        ]
+        assert any(term in prompt for term in ph_context_terms)
+    def test_contains_thinking_hint(self):
+        prompt = build_lesson_prompt(
+            lesson_title="Functions",
+            competency="M11GM-Ia-1",
+            grade_level="Grade 11-12",
+            subject="General Mathematics",
+            quarter=1,
+            learner_level=None,
+            module_unit=None,
+            curriculum_chunks=[],
+        )
+        assert "Think step by step" in prompt
+class TestSummarizeRetrievalConfidence:
+    def test_empty_chunks_returns_low(self):
+        result = summarize_retrieval_confidence([])
+        assert result["band"] == "low"
+        assert result["confidence"] == 0.0
+    def test_high_confidence(self):
+        chunks = [{"score": 0.85}, {"score": 0.80}, {"score": 0.75}]
+        result = summarize_retrieval_confidence(chunks)
+        assert result["band"] == "high"
+    def test_medium_confidence(self):
+        chunks = [{"score": 0.65}, {"score": 0.60}]
+        result = summarize_retrieval_confidence(chunks)
+        assert result["band"] == "medium"
+    def test_low_confidence(self):
+        chunks = [{"score": 0.35}, {"score": 0.30}]
+        result = summarize_retrieval_confidence(chunks)
+        assert result["band"] == "low"
+    def test_chunk_count_included(self):
+        chunks = [{"score": 0.8}, {"score": 0.7}, {"score": 0.6}]
+        result = summarize_retrieval_confidence(chunks)
+        assert result["chunkCount"] == 3
+class TestBuildLessonQuery:
+    def test_includes_all_fields(self):
+        query = build_lesson_query(
+            "Compound Interest",
+            "General Mathematics",
+            3,
+            lesson_title="Compound Interest Basics",
+            competency="M11GM-IIc-1",
+            module_unit="Business Math",
+            learner_level="mixed",
+        )
+        assert "Compound Interest" in query
+        assert "General Mathematics" in query
+        assert "Quarter 3" in query
+        assert "Compound Interest Basics" in query
+class TestIsSequentialModel:
+    def test_sequential_for_reasoner(self):
+        with patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-reasoner"}):
+            from services.inference_client import is_sequential_model
+            assert is_sequential_model() is True
+    def test_not_sequential_for_chat(self):
+        with patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-chat"}):
+            from services.inference_client import is_sequential_model
+            assert is_sequential_model() is False