github-actions[bot] commited on
Commit
b5cb5bb
·
1 Parent(s): e2217c1

🚀 Auto-deploy backend from GitHub (41571e6)

Browse files
.deploy-trigger ADDED
@@ -0,0 +1 @@
 
 
1
+ 2026-04-29 21:37:27
.dockerignore ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .pytest_cache/
6
+ .mypy_cache/
7
+ .ruff_cache/
8
+
9
+ .git/
10
+ .github/
11
+ .vscode/
12
+ .idea/
13
+
14
+ .env
15
+ .env.*
16
+ *.log
17
+
18
+ tests/
19
+ docs/
20
+ datasets/
21
+ jobs/
22
+ build/
23
+ dist/
24
+ node_modules/
25
+
26
+ Dockerfile*
27
+ docker-compose.yml
28
+ *.md
29
+ !README.md
.env.example ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ── Vector Store ──────────────────────────────────────────────────
2
+ # Path to ChromaDB vectorstore directory
3
+ CURRICULUM_VECTORSTORE_DIR=datasets/vectorstore
4
+
5
+ # Sentence transformer for embeddings
6
+ # WARNING: changing this requires full re-ingestion of all curriculum data
7
+ EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
8
+
9
+ # ── DeepSeek AI Inference ─────────────────────────────────────────
10
+ # DeepSeek API key (OpenAI-compatible), required for all AI features
11
+ DEEPSEEK_API_KEY=your_deepseek_api_key_here
12
+ DEEPSEEK_BASE_URL=https://api.deepseek.com
13
+ DEEPSEEK_MODEL=deepseek-chat
14
+ DEEPSEEK_REASONER_MODEL=deepseek-reasoner
15
+
16
+ # ── HuggingFace (dataset push / HF Space deployment only) ─────────
17
+ # HF API token — kept only for HF Space deployment and dataset push
18
+ HF_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
19
+
20
+ # HF Model ID for AI monitoring proxy
21
+ VITE_HF_MODEL_ID=Qwen/QwQ-32B
22
+
23
+ # ── Model Selection ───────────────────────────────────────────────
24
+ # LOCAL DEVELOPMENT — deepseek-chat (fast, $0.14/M input)
25
+ HF_MODEL_ID=deepseek-chat
26
+
27
+ # PRODUCTION — deepseek-reasoner for step-by-step solutions
28
+ # HF_MODEL_ID=deepseek-reasoner
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PIP_DISABLE_PIP_VERSION_CHECK=1 \
6
+ PIP_NO_CACHE_DIR=1 \
7
+ HF_HOME=/data/.huggingface \
8
+ HUGGINGFACE_HUB_CACHE=/data/.huggingface/hub \
9
+ TRANSFORMERS_CACHE=/data/.huggingface/transformers \
10
+ MPLCONFIGDIR=/tmp/matplotlib
11
+
12
+ WORKDIR /app
13
+
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ ca-certificates \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ COPY requirements.txt /app/requirements.txt
19
+ RUN python -m pip install --upgrade pip setuptools wheel && \
20
+ python -m pip install --prefer-binary --retries 5 -r /app/requirements.txt
21
+
22
+ COPY . /app
23
+
24
+ EXPOSE 7860
25
+
26
+ CMD ["/bin/sh", "/app/startup.sh"]
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Mathpulse Api V3test
3
- emoji: 📊
4
- colorFrom: purple
5
- colorTo: gray
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: MathPulse AI API
3
+ emoji: "🧮"
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ # MathPulse AI Backend
12
+
13
+ FastAPI backend for the MathPulse AI educational platform.
analytics.py ADDED
@@ -0,0 +1,2110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MathPulse AI - ML-Powered Student Analytics & Adaptive Learning Module
3
+
4
+ Provides:
5
+ - Student competency assessment via IRT (Item Response Theory)
6
+ - Enhanced risk prediction with trained ML models (Random Forest / XGBoost)
7
+ - Quiz difficulty calibration engine
8
+ - Topic recommendation engine
9
+ - Learning analytics aggregation
10
+ - Mock data generation for development/testing
11
+ """
12
+
13
+ import os
14
+ import math
15
+ import time
16
+ import random
17
+ import logging
18
+ import traceback
19
+ import re
20
+ from typing import List, Optional, Dict, Any, Tuple, Literal
21
+ from datetime import datetime, timedelta
22
+ from collections import defaultdict
23
+
24
+ import numpy as np # type: ignore[import-not-found]
25
+ from scipy.optimize import minimize_scalar # type: ignore[import-not-found]
26
+ from sklearn.linear_model import LinearRegression # type: ignore[import-not-found]
27
+ from sklearn.ensemble import RandomForestClassifier # type: ignore[import-not-found]
28
+ from sklearn.model_selection import train_test_split # type: ignore[import-not-found]
29
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report # type: ignore[import-not-found]
30
+ from pydantic import BaseModel, Field
31
+
32
+ # Optional heavy dependencies — guarded imports
33
+ xgb: Any = None
34
+ shap: Any = None
35
+ joblib: Any = None
36
+ firebase_admin: Any = None
37
+ credentials: Any = None
38
+ firestore: Any = None
39
+
40
+ try:
41
+ import xgboost as xgb # type: ignore[import-not-found,no-redef]
42
+ HAS_XGBOOST = True
43
+ except ImportError:
44
+ HAS_XGBOOST = False
45
+
46
+ try:
47
+ import shap # type: ignore[import-not-found,no-redef]
48
+ HAS_SHAP = True
49
+ except ImportError:
50
+ HAS_SHAP = False
51
+
52
+ try:
53
+ import joblib # type: ignore[import-not-found,no-redef]
54
+ HAS_JOBLIB = True
55
+ except ImportError:
56
+ HAS_JOBLIB = False
57
+
58
+ try:
59
+ import firebase_admin # type: ignore[import-not-found,no-redef]
60
+ from firebase_admin import credentials, firestore # type: ignore[import-not-found,no-redef,assignment]
61
+ HAS_FIREBASE = True
62
+ except ImportError:
63
+ HAS_FIREBASE = False
64
+
65
+ logger = logging.getLogger("mathpulse.analytics")
66
+
67
+
68
+ def _normalize_topic_key(value: str) -> str:
69
+ key = re.sub(r"[^a-z0-9\s]+", " ", (value or "").lower())
70
+ key = re.sub(r"\s+", " ", key).strip()
71
+ return key
72
+
73
+
74
+ TOPIC_LABEL_ALIASES: Dict[str, str] = {
75
+ _normalize_topic_key("Functions and Relations"): "Functions as Mathematical Models",
76
+ _normalize_topic_key("Evaluating Functions"): "Function Notation and Evaluation",
77
+ _normalize_topic_key("Rational Functions"): "Graphs of Rational Functions",
78
+ _normalize_topic_key("Exponential Functions"): "Graphs of Exponential Functions",
79
+ _normalize_topic_key("Logarithmic Functions"): "Graphs of Logarithmic Functions",
80
+ _normalize_topic_key("Simple Interest"): "Simple and Compound Interest",
81
+ _normalize_topic_key("Compound Interest"): "Simple and Compound Interest",
82
+ _normalize_topic_key("Annuities"): "Simple and General Annuities",
83
+ _normalize_topic_key("Loans and Amortization"): "Loans, Amortization, and Sinking Funds",
84
+ _normalize_topic_key("Stocks and Bonds"): "Stocks, Bonds, and Market Indices",
85
+ _normalize_topic_key("Propositions and Connectives"): "Propositions and Logical Connectives",
86
+ _normalize_topic_key("Truth Tables"): "Truth Values and Truth Tables",
87
+ _normalize_topic_key("Logical Equivalence"): "Logical Equivalence and Implication",
88
+ _normalize_topic_key("Valid Arguments and Fallacies"): "Validity of Arguments",
89
+ }
90
+
91
+
92
+ def _canonicalize_topic_label(value: str) -> str:
93
+ clean_value = str(value or "").strip()
94
+ if not clean_value:
95
+ return "Unknown"
96
+ return TOPIC_LABEL_ALIASES.get(_normalize_topic_key(clean_value), clean_value)
97
+
98
+ # ─── Configuration ─────────────────────────────────────────────
99
+
100
+ RISK_MODEL_PATH = "models/risk_classifier.joblib"
101
+ IRT_DIFFICULTY_CACHE_TTL = 3600 # 1 hour
102
+ MIN_QUIZ_ATTEMPTS_FOR_COMPETENCY = 3
103
+ LEARNING_VELOCITY_WINDOW_DAYS = 30
104
+ COMPETENCY_THRESHOLDS = {
105
+ "beginner": (0, 40),
106
+ "developing": (40, 65),
107
+ "proficient": (65, 85),
108
+ "advanced": (85, 100),
109
+ }
110
+
111
+ # Topic dependency / prerequisite graph
112
+ TOPIC_PREREQUISITES: Dict[str, List[str]] = {
113
+ "Quadratic Equations": ["Linear Equations", "Variables & Expressions"],
114
+ "Systems of Equations": ["Linear Equations", "Slope & Rate of Change"],
115
+ "Polynomials": ["Variables & Expressions", "Exponents & Powers"],
116
+ "Factoring": ["Polynomials", "Variables & Expressions"],
117
+ "Quadratic Functions": ["Quadratic Equations", "Functions"],
118
+ "Exponential Functions": ["Exponents & Powers", "Functions"],
119
+ "Trigonometric Ratios": ["Pythagorean Theorem", "Angles", "Triangles"],
120
+ "Trigonometric Functions": ["Trigonometric Ratios", "Functions"],
121
+ "Derivatives": ["Limits", "Functions"],
122
+ "Integration": ["Derivatives", "Area Under a Curve"],
123
+ "Limits": ["Functions", "Rational Expressions"],
124
+ "Coordinate Geometry": ["Linear Equations", "Slope & Rate of Change"],
125
+ "Circle Theorems": ["Circles", "Angles"],
126
+ "Logarithmic Functions": ["Exponential Functions"],
127
+ "Rational Functions": ["Polynomials", "Factoring"],
128
+ "Complex Numbers": ["Quadratic Equations", "Radicals & Exponents"],
129
+ "Matrices (Introduction)": ["Systems of Equations"],
130
+ "Conic Sections": ["Coordinate Geometry", "Quadratic Functions"],
131
+ "Probability of Compound Events": ["Probability Basics"],
132
+ "Permutations & Combinations": ["Probability Basics", "Factorial"],
133
+ "Hypothesis Testing Basics": ["Normal Distribution Basics", "Sampling Methods"],
134
+ "Confidence Intervals": ["Normal Distribution Basics", "Sampling Methods"],
135
+ "Regression Analysis": ["Scatter Plots", "Linear Functions"],
136
+ "Statistical Inference": ["Hypothesis Testing Basics", "Confidence Intervals"],
137
+ "Multivariable Calculus": ["Derivatives", "Integration"],
138
+ "Differential Equations": ["Derivatives", "Integration"],
139
+ "Vector Calculus": ["Multivariable Calculus", "Vectors"],
140
+ "Linear Transformations": ["Matrices & Determinants", "Vector Spaces"],
141
+ "Eigenvalues & Eigenvectors": ["Matrices & Determinants"],
142
+ }
143
+
144
+
145
+ # ─── Pydantic Models ──────────────────────────────────────────
146
+
147
+ class CompetencyAnalysisRequest(BaseModel):
148
+ studentId: str
149
+ topicId: Optional[str] = None
150
+
151
+
152
+ class CompetencyAnalysis(BaseModel):
153
+ topicId: str
154
+ topicName: str
155
+ efficiencyScore: float = Field(..., ge=0, le=100)
156
+ competencyLevel: str
157
+ masteryPercentage: float
158
+ learningVelocity: float
159
+ totalAttempts: int
160
+ averageAccuracy: float
161
+ lastAttemptDate: Optional[str] = None
162
+
163
+
164
+ class CompetencyAnalysisResponse(BaseModel):
165
+ studentId: str
166
+ status: str # "success" | "insufficient_data"
167
+ analyses: List[CompetencyAnalysis]
168
+ overallCompetency: Optional[str] = None
169
+ thetaEstimate: Optional[float] = None
170
+
171
+
172
+ class TopicRecommendation(BaseModel):
173
+ topicId: str
174
+ topicName: str
175
+ recommendationScore: float
176
+ reasoning: str
177
+ estimatedTimeToMastery: int # hours
178
+ prerequisitesMet: bool
179
+ currentCompetency: str
180
+
181
+
182
+ class TopicRecommendationRequest(BaseModel):
183
+ studentId: str
184
+ numRecommendations: int = Field(default=5, ge=1, le=20)
185
+
186
+
187
+ class TopicRecommendationResponse(BaseModel):
188
+ studentId: str
189
+ recommendations: List[TopicRecommendation]
190
+ status: str
191
+
192
+
193
+ class EnhancedRiskPrediction(BaseModel):
194
+ riskLevel: str
195
+ confidence: float
196
+ probabilities: Dict[str, float]
197
+ contributingFactors: List[Dict[str, Any]]
198
+ recommendations: List[str]
199
+ modelUsed: str # "ml_model" | "rule_based" | "zero_shot"
200
+ risk_level: str
201
+ risk_score: float
202
+ top_factors: List[str]
203
+
204
+
205
+ def _to_strict_risk_level(level: str) -> str:
206
+ normalized = (level or "").strip().lower()
207
+ if normalized in {"high", "medium", "low"}:
208
+ return normalized
209
+ return "medium"
210
+
211
+
212
+ def _extract_top_factor_texts(factors: List[Dict[str, Any]]) -> List[str]:
213
+ texts: List[str] = []
214
+ for factor in factors[:3]:
215
+ detail = str(factor.get("detail") or "").strip()
216
+ feature = str(factor.get("feature") or "").strip()
217
+ value = factor.get("value")
218
+ if detail:
219
+ texts.append(detail)
220
+ elif feature and value is not None:
221
+ texts.append(f"{feature}={value}")
222
+ elif feature:
223
+ texts.append(feature)
224
+ if not texts:
225
+ texts.append("No major risk indicators detected")
226
+ return texts
227
+
228
+
229
+ class EnhancedRiskRequest(BaseModel):
230
+ studentId: str
231
+ engagementScore: float = Field(..., ge=0, le=100)
232
+ avgQuizScore: float = Field(..., ge=0, le=100)
233
+ attendance: float = Field(..., ge=0, le=100)
234
+ assignmentCompletion: float = Field(..., ge=0, le=100)
235
+ streak: Optional[int] = 0
236
+ xpGrowthRate: Optional[float] = 0.0
237
+ timeOnPlatform: Optional[float] = 0.0 # hours
238
+ # Optional trend data
239
+ engagementTrend7d: Optional[float] = None
240
+ quizScoreVariance: Optional[float] = None
241
+ consecutiveAbsences: Optional[int] = 0
242
+ daysSinceLastActivity: Optional[int] = 0
243
+
244
+
245
+ class StudentRiskPredictionV2(BaseModel):
246
+ risk_level: Literal["low", "medium", "high"]
247
+ risk_score: float = Field(..., ge=0.0, le=1.0)
248
+ top_factors: List[str]
249
+ probabilities: Dict[str, float]
250
+ model_used: str
251
+
252
+
253
+ class RiskTrainRequest(BaseModel):
254
+ forceRetrain: bool = False
255
+
256
+
257
+ class RiskTrainResponse(BaseModel):
258
+ status: str
259
+ accuracy: float
260
+ precision: float
261
+ recall: float
262
+ f1Score: float
263
+ samplesUsed: int
264
+ modelPath: str
265
+
266
+
267
+ class CalibrateDifficultyRequest(BaseModel):
268
+ questionId: str
269
+ studentResponses: List[Dict[str, Any]] # [{studentId, correct, timeSpent, attempts}]
270
+
271
+
272
+ class CalibrateDifficultyResponse(BaseModel):
273
+ questionId: str
274
+ difficultyParameter: float # b parameter
275
+ discriminationParameter: float # a parameter
276
+ guessingParameter: float # c parameter
277
+ difficultyLabel: str # "easy" | "medium" | "hard"
278
+ totalResponses: int
279
+ successRate: float
280
+
281
+
282
+ class AdaptiveQuizRequest(BaseModel):
283
+ studentId: str
284
+ topicId: str
285
+ numQuestions: int = Field(default=10, ge=1, le=50)
286
+ targetSuccessRate: float = Field(default=0.70, ge=0.3, le=0.95)
287
+
288
+
289
+ class AdaptiveQuizSelection(BaseModel):
290
+ questionId: str
291
+ estimatedDifficulty: float
292
+ predictedSuccessProbability: float
293
+ difficultyLabel: str
294
+
295
+
296
+ class AdaptiveQuizResponse(BaseModel):
297
+ studentId: str
298
+ topicId: str
299
+ selectedQuestions: List[AdaptiveQuizSelection]
300
+ studentAbilityEstimate: float
301
+ expectedSuccessRate: float
302
+ difficultyDistribution: Dict[str, int]
303
+
304
+
305
+ class StudentSummaryResponse(BaseModel):
306
+ studentId: str
307
+ competencyDistribution: Dict[str, int]
308
+ riskAssessment: Optional[Dict[str, Any]] = None
309
+ recommendedTopics: List[Dict[str, Any]]
310
+ learningVelocityTrend: List[Dict[str, Any]]
311
+ efficiencyScores: Dict[str, float]
312
+ predictedNextQuizScore: Optional[float] = None
313
+ engagementPatterns: Dict[str, Any]
314
+ status: str
315
+
316
+
317
+ class ClassInsightsRequest(BaseModel):
318
+ teacherId: str
319
+ classId: Optional[str] = None
320
+
321
+
322
+ class ClassInsightsResponse(BaseModel):
323
+ teacherId: str
324
+ riskDistribution: Dict[str, int]
325
+ riskTrend: List[Dict[str, Any]]
326
+ commonWeakTopics: List[Dict[str, Any]]
327
+ learningVelocityDistribution: Dict[str, float]
328
+ engagementPatterns: Dict[str, Any]
329
+ interventionRecommendations: List[Dict[str, Any]]
330
+ successPredictions: Dict[str, Any]
331
+ totalStudents: int
332
+ status: str
333
+
334
+
335
+ class MockDataRequest(BaseModel):
336
+ numStudents: int = Field(default=30, ge=1, le=200)
337
+ numQuizzes: int = Field(default=20, ge=1, le=100)
338
+ seed: Optional[int] = None
339
+
340
+
341
+ class RefreshCacheResponse(BaseModel):
342
+ status: str
343
+ cachedItems: int
344
+ timestamp: str
345
+
346
+
347
+ # ─── In-Memory Caches ─────────────────────────────────────────
348
+
349
+ _competency_cache: Dict[str, Tuple[float, Any]] = {}
350
+ _class_stats_cache: Dict[str, Tuple[float, Any]] = {}
351
+ _difficulty_cache: Dict[str, Tuple[float, Any]] = {}
352
+ _risk_model_cache: Dict[str, Any] = {}
353
+
354
+
355
+ def _cache_get(cache: Dict[str, Tuple[float, Any]], key: str, ttl: int) -> Optional[Any]:
356
+ """Get from cache if not expired."""
357
+ if key in cache:
358
+ ts, val = cache[key]
359
+ if time.time() - ts < ttl:
360
+ return val
361
+ del cache[key]
362
+ return None
363
+
364
+
365
+ def _cache_set(cache: Dict[str, Tuple[float, Any]], key: str, value: Any):
366
+ """Set a cache entry with current timestamp."""
367
+ cache[key] = (time.time(), value)
368
+
369
+
370
+ # ─── Firebase Helpers ──────────────────────────────────────────
371
+
372
+ _firestore_db = None
373
+
374
+
375
+ def _get_firestore_db():
376
+ """Get or initialise Firestore client."""
377
+ global _firestore_db
378
+ if _firestore_db is not None:
379
+ return _firestore_db
380
+
381
+ if not HAS_FIREBASE:
382
+ logger.warning("firebase-admin not installed; Firestore operations will use mock data")
383
+ return None
384
+
385
+ try:
386
+ # Check if already initialised
387
+ firebase_admin.get_app()
388
+ except ValueError:
389
+ # Initialise with default credentials or service account
390
+ cred_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
391
+ if cred_path and os.path.exists(cred_path):
392
+ cred = credentials.Certificate(cred_path)
393
+ firebase_admin.initialize_app(cred)
394
+ else:
395
+ # Try default credentials (e.g., GCP environment)
396
+ try:
397
+ firebase_admin.initialize_app()
398
+ except Exception as e:
399
+ logger.warning(f"Could not initialise Firebase: {e}")
400
+ return None
401
+
402
+ _firestore_db = firestore.client()
403
+ return _firestore_db
404
+
405
+
406
+ async def fetch_student_quiz_history(student_id: str) -> List[Dict[str, Any]]:
407
+ """Fetch quiz attempt history for a student from Firestore."""
408
+ db = _get_firestore_db()
409
+ if db is None:
410
+ logger.info(f"No Firestore connection; returning empty quiz history for {student_id}")
411
+ return []
412
+
413
+ try:
414
+ # Query progress collection for the student
415
+ progress_ref = db.collection("progress").where("userId", "==", student_id)
416
+ docs = progress_ref.stream()
417
+ history = []
418
+ for doc in docs:
419
+ data = doc.to_dict()
420
+ if data:
421
+ data["id"] = doc.id
422
+ history.append(data)
423
+
424
+ # Also check quizAttempts subcollection if it exists
425
+ quiz_ref = db.collection("quizAttempts").where("studentId", "==", student_id).order_by(
426
+ "completedAt", direction=firestore.Query.DESCENDING
427
+ )
428
+ quiz_docs = quiz_ref.stream()
429
+ for doc in quiz_docs:
430
+ data = doc.to_dict()
431
+ if data:
432
+ data["id"] = doc.id
433
+ data["source"] = "quizAttempts"
434
+ history.append(data)
435
+
436
+ logger.info(f"Fetched {len(history)} quiz history records for student {student_id}")
437
+ return history
438
+
439
+ except Exception as e:
440
+ logger.error(f"Error fetching quiz history for {student_id}: {e}")
441
+ return []
442
+
443
+
444
+ async def fetch_student_engagement_metrics(student_id: str, days: int = 30) -> Dict[str, Any]:
445
+ """Fetch engagement metrics for a student over the past N days."""
446
+ db = _get_firestore_db()
447
+ if db is None:
448
+ return {
449
+ "totalTimeOnPlatform": 0,
450
+ "sessionsCount": 0,
451
+ "avgSessionDuration": 0,
452
+ "dailyActivity": {},
453
+ "hourlyActivity": {},
454
+ }
455
+
456
+ try:
457
+ cutoff = datetime.utcnow() - timedelta(days=days)
458
+
459
+ # Fetch XP activities as engagement proxy
460
+ xp_ref = db.collection("xpActivities").where(
461
+ "userId", "==", student_id
462
+ ).where("timestamp", ">=", cutoff)
463
+ xp_docs = xp_ref.stream()
464
+
465
+ daily_activity: Dict[str, int] = {}
466
+ hourly_activity: Dict[int, int] = defaultdict(int)
467
+ total_xp = 0
468
+ activity_count = 0
469
+
470
+ for doc in xp_docs:
471
+ data = doc.to_dict()
472
+ if data:
473
+ activity_count += 1
474
+ total_xp += data.get("xpAmount", 0)
475
+ ts = data.get("timestamp")
476
+ if ts:
477
+ if hasattr(ts, "seconds"):
478
+ dt = datetime.utcfromtimestamp(ts.seconds)
479
+ elif isinstance(ts, datetime):
480
+ dt = ts
481
+ else:
482
+ continue
483
+ day_key = dt.strftime("%Y-%m-%d")
484
+ daily_activity[day_key] = daily_activity.get(day_key, 0) + 1
485
+ hourly_activity[dt.hour] += 1
486
+
487
+ return {
488
+ "totalXP": total_xp,
489
+ "activityCount": activity_count,
490
+ "dailyActivity": daily_activity,
491
+ "hourlyActivity": dict(hourly_activity),
492
+ "activeDays": len(daily_activity),
493
+ "avgActivitiesPerDay": round(activity_count / max(len(daily_activity), 1), 2),
494
+ }
495
+
496
+ except Exception as e:
497
+ logger.error(f"Error fetching engagement metrics for {student_id}: {e}")
498
+ return {"totalXP": 0, "activityCount": 0, "dailyActivity": {}, "hourlyActivity": {}}
499
+
500
+
501
+ def fetch_topic_dependencies() -> Dict[str, List[str]]:
502
+ """Return the topic prerequisite graph."""
503
+ return TOPIC_PREREQUISITES.copy()
504
+
505
+
506
+ async def store_competency_analysis(student_id: str, analysis: Dict[str, Any]):
507
+ """Store competency analysis results in Firestore."""
508
+ db = _get_firestore_db()
509
+ if db is None:
510
+ logger.info(f"No Firestore; skipping competency storage for {student_id}")
511
+ return
512
+
513
+ try:
514
+ doc_ref = db.collection("competencyAnalyses").document(student_id)
515
+ analysis["updatedAt"] = firestore.SERVER_TIMESTAMP
516
+ doc_ref.set(analysis, merge=True)
517
+ logger.info(f"Stored competency analysis for {student_id}")
518
+ except Exception as e:
519
+ logger.error(f"Error storing competency analysis: {e}")
520
+
521
+
522
+ async def store_question_difficulty(question_id: str, params: Dict[str, Any]):
523
+ """Store question IRT difficulty parameters in Firestore."""
524
+ db = _get_firestore_db()
525
+ if db is None:
526
+ logger.info(f"No Firestore; skipping difficulty storage for {question_id}")
527
+ return
528
+
529
+ try:
530
+ doc_ref = db.collection("questions").document(question_id).collection(
531
+ "difficulty_params"
532
+ ).document("irt")
533
+ params["updatedAt"] = firestore.SERVER_TIMESTAMP
534
+ doc_ref.set(params, merge=True)
535
+ logger.info(f"Stored difficulty params for question {question_id}")
536
+ except Exception as e:
537
+ logger.error(f"Error storing question difficulty: {e}")
538
+
539
+
540
+ # ─── IRT & Statistical Helpers ─────────────────────────────────
541
+
542
+
543
+ def _irt_3pl_probability(theta: float, a: float, b: float, c: float = 0.25) -> float:
544
+ """
545
+ 3-Parameter Logistic IRT model.
546
+ P(correct) = c + (1 - c) / (1 + exp(-a * (theta - b)))
547
+ theta: student ability
548
+ a: discrimination
549
+ b: difficulty
550
+ c: guessing parameter
551
+ """
552
+ exponent = -a * (theta - b)
553
+ exponent = max(-20, min(20, exponent)) # numerical stability
554
+ return c + (1 - c) / (1 + math.exp(exponent))
555
+
556
+
557
+ def _estimate_theta(responses: List[Dict[str, Any]], difficulty_params: Dict[str, Dict[str, float]]) -> float:
558
+ """
559
+ Estimate student ability (theta) using Maximum Likelihood Estimation.
560
+ responses: list of {questionId, correct: bool}
561
+ difficulty_params: {questionId: {a, b, c}}
562
+ """
563
+ if not responses:
564
+ return 0.0
565
+
566
+ def neg_log_likelihood(theta: float) -> float:
567
+ ll = 0.0
568
+ for r in responses:
569
+ qid = r.get("questionId", "")
570
+ params = difficulty_params.get(qid, {"a": 1.0, "b": 0.0, "c": 0.25})
571
+ p = _irt_3pl_probability(theta, params["a"], params["b"], params.get("c", 0.25))
572
+ p = max(1e-10, min(1 - 1e-10, p)) # avoid log(0)
573
+ if r.get("correct", False):
574
+ ll += math.log(p)
575
+ else:
576
+ ll += math.log(1 - p)
577
+ return -ll
578
+
579
+ result = minimize_scalar(neg_log_likelihood, bounds=(-4, 4), method="bounded")
580
+ return round(result.x, 3)
581
+
582
+
583
+ def _calculate_learning_velocity(scores_over_time: List[Tuple[float, float]]) -> float:
584
+ """
585
+ Calculate learning velocity using weighted linear regression.
586
+ scores_over_time: list of (timestamp_as_days, score)
587
+ Returns slope (positive = improving, negative = declining).
588
+ """
589
+ if len(scores_over_time) < 2:
590
+ return 0.0
591
+
592
+ times = np.array([t for t, _ in scores_over_time]).reshape(-1, 1)
593
+ scores = np.array([s for _, s in scores_over_time])
594
+
595
+ # Exponential decay weights (more recent = higher weight)
596
+ max_time = times.max()
597
+ decay_rate = 0.05
598
+ weights = np.exp(-decay_rate * (max_time - times.flatten()))
599
+ weights = weights / weights.sum()
600
+
601
+ # Weighted linear regression
602
+ model = LinearRegression()
603
+ model.fit(times, scores, sample_weight=weights)
604
+
605
+ return round(float(model.coef_[0]), 4)
606
+
607
+
608
+ def _calculate_efficiency_score(
609
+ student_times: List[float],
610
+ student_accuracies: List[bool],
611
+ class_avg_time: float,
612
+ attempt_counts: List[int],
613
+ ) -> float:
614
+ """
615
+ Efficiency = (class_avg_time / student_time) * accuracy_multiplier * 100
616
+ Penalise multiple attempts.
617
+ """
618
+ if not student_times or class_avg_time <= 0:
619
+ return 50.0
620
+
621
+ efficiencies = []
622
+ for t, correct, attempts in zip(student_times, student_accuracies, attempt_counts):
623
+ if t <= 0:
624
+ t = 1.0
625
+ time_ratio = class_avg_time / t
626
+ accuracy_mult = 1.0 if correct else 0.3
627
+ attempt_penalty = 1.0 / max(attempts, 1)
628
+ eff = time_ratio * accuracy_mult * attempt_penalty * 100
629
+ efficiencies.append(min(eff, 150)) # cap at 150 to avoid outliers
630
+
631
+ raw = sum(efficiencies) / len(efficiencies)
632
+ return round(min(max(raw, 0), 100), 2)
633
+
634
+
635
+ def _get_competency_level(score: float) -> str:
636
+ """Map a score (0-100) to competency level."""
637
+ for level, (low, high) in COMPETENCY_THRESHOLDS.items():
638
+ if low <= score < high:
639
+ return level
640
+ return "advanced" if score >= 85 else "beginner"
641
+
642
+
643
+ # ─── Competency Assessment System ─────────────────────────────
644
+
645
+
646
+ async def compute_competency_analysis(
647
+ student_id: str,
648
+ quiz_history: List[Dict[str, Any]],
649
+ topic_filter: Optional[str] = None,
650
+ ) -> CompetencyAnalysisResponse:
651
+ """
652
+ Full competency analysis using IRT approach.
653
+ """
654
+ if not quiz_history or len(quiz_history) < MIN_QUIZ_ATTEMPTS_FOR_COMPETENCY:
655
+ return CompetencyAnalysisResponse(
656
+ studentId=student_id,
657
+ status="insufficient_data",
658
+ analyses=[],
659
+ overallCompetency=None,
660
+ thetaEstimate=None,
661
+ )
662
+
663
+ # Group by topic
664
+ topic_data: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
665
+ for entry in quiz_history:
666
+ topic = _canonicalize_topic_label(str(entry.get("topicId") or entry.get("topic") or "Unknown"))
667
+ if topic_filter and topic != _canonicalize_topic_label(topic_filter):
668
+ continue
669
+ topic_data[topic].append(entry)
670
+
671
+ if not topic_data:
672
+ return CompetencyAnalysisResponse(
673
+ studentId=student_id,
674
+ status="insufficient_data",
675
+ analyses=[],
676
+ )
677
+
678
+ # Build difficulty params from class-wide success rates
679
+ difficulty_params: Dict[str, Dict[str, float]] = {}
680
+ all_responses_for_irt: List[Dict[str, Any]] = []
681
+
682
+ for topic, entries in topic_data.items():
683
+ for entry in entries:
684
+ qid = entry.get("questionId", entry.get("id", f"{topic}_{len(all_responses_for_irt)}"))
685
+ correct = entry.get("correct", False)
686
+ if isinstance(correct, (int, float)):
687
+ correct = correct > 0.5
688
+ score = entry.get("score", 0)
689
+ total = entry.get("total", 1)
690
+ if not isinstance(correct, bool) and total > 0:
691
+ correct = (score / total) >= 0.5
692
+
693
+ all_responses_for_irt.append({"questionId": qid, "correct": correct})
694
+
695
+ # Estimate difficulty from success rate across the dataset
696
+ if qid not in difficulty_params:
697
+ difficulty_params[qid] = {"a": 1.0, "b": 0.0, "c": 0.25}
698
+
699
+ # Estimate theta
700
+ theta = _estimate_theta(all_responses_for_irt, difficulty_params)
701
+
702
+ # Per-topic analysis
703
+ analyses: List[CompetencyAnalysis] = []
704
+
705
+ for topic, entries in topic_data.items():
706
+ topic_name = topic.replace("_", " ").title()
707
+
708
+ # Accuracy
709
+ correct_count = 0
710
+ total_count = 0
711
+ first_attempt_correct = 0
712
+ first_attempt_total = 0
713
+ times: List[float] = []
714
+ accuracies: List[bool] = []
715
+ attempt_counts: List[int] = []
716
+ scores_over_time: List[Tuple[float, float]] = []
717
+
718
+ for entry in entries:
719
+ total_count += 1
720
+ score = entry.get("score", 0)
721
+ total = max(entry.get("total", 1), 1)
722
+ pct = (score / total) * 100
723
+ correct = pct >= 50
724
+ if correct:
725
+ correct_count += 1
726
+
727
+ attempts = entry.get("attempts", 1)
728
+ if attempts <= 1 and correct:
729
+ first_attempt_correct += 1
730
+ first_attempt_total += 1
731
+
732
+ time_spent = entry.get("timeTaken") or entry.get("timeSpent") or 60
733
+ times.append(float(time_spent))
734
+ accuracies.append(correct)
735
+ attempt_counts.append(max(attempts, 1))
736
+
737
+ # Timestamp for velocity
738
+ ts = entry.get("completedAt") or entry.get("timestamp") or entry.get("date")
739
+ if ts:
740
+ if isinstance(ts, (int, float)):
741
+ day_val = ts / 86400
742
+ elif hasattr(ts, "seconds"):
743
+ day_val = ts.seconds / 86400
744
+ elif isinstance(ts, datetime):
745
+ day_val = ts.timestamp() / 86400
746
+ elif isinstance(ts, str):
747
+ try:
748
+ dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
749
+ day_val = dt.timestamp() / 86400
750
+ except Exception:
751
+ day_val = time.time() / 86400
752
+ else:
753
+ day_val = time.time() / 86400
754
+ scores_over_time.append((day_val, pct))
755
+
756
+ avg_accuracy = (correct_count / max(total_count, 1)) * 100
757
+ mastery_pct = (first_attempt_correct / max(first_attempt_total, 1)) * 100
758
+
759
+ # Class average time (use all entries as proxy)
760
+ class_avg_time = float(np.mean(times)) if times else 60.0
761
+
762
+ efficiency = _calculate_efficiency_score(times, accuracies, class_avg_time, attempt_counts)
763
+ velocity = _calculate_learning_velocity(scores_over_time)
764
+ competency_level = _get_competency_level(avg_accuracy)
765
+
766
+ # Last attempt date
767
+ last_date = None
768
+ if scores_over_time:
769
+ last_ts = max(t for t, _ in scores_over_time)
770
+ last_date = datetime.utcfromtimestamp(last_ts * 86400).isoformat()
771
+
772
+ analyses.append(CompetencyAnalysis(
773
+ topicId=topic,
774
+ topicName=topic_name,
775
+ efficiencyScore=efficiency,
776
+ competencyLevel=competency_level,
777
+ masteryPercentage=round(mastery_pct, 2),
778
+ learningVelocity=velocity,
779
+ totalAttempts=total_count,
780
+ averageAccuracy=round(avg_accuracy, 2),
781
+ lastAttemptDate=last_date,
782
+ ))
783
+
784
+ # Sort by efficiency score ascending (weakest first)
785
+ analyses.sort(key=lambda a: a.efficiencyScore)
786
+
787
+ # Overall competency
788
+ if analyses:
789
+ avg_eff = sum(a.efficiencyScore for a in analyses) / len(analyses)
790
+ overall = _get_competency_level(avg_eff)
791
+ else:
792
+ overall = None
793
+
794
+ return CompetencyAnalysisResponse(
795
+ studentId=student_id,
796
+ status="success",
797
+ analyses=analyses,
798
+ overallCompetency=overall,
799
+ thetaEstimate=theta,
800
+ )
801
+
802
+
803
+ # ─── Enhanced Risk Prediction ─────────────────────────────────
804
+
805
+
806
+ def _build_risk_features(data: EnhancedRiskRequest) -> np.ndarray:
807
+ """Build feature vector for risk prediction."""
808
+ features = [
809
+ data.engagementScore,
810
+ data.avgQuizScore,
811
+ data.attendance,
812
+ data.assignmentCompletion,
813
+ data.streak or 0,
814
+ data.xpGrowthRate or 0.0,
815
+ data.timeOnPlatform or 0.0,
816
+ data.engagementTrend7d or 0.0,
817
+ data.quizScoreVariance or 0.0,
818
+ data.consecutiveAbsences or 0,
819
+ data.daysSinceLastActivity or 0,
820
+ ]
821
+ return np.array(features).reshape(1, -1)
822
+
823
+
824
+ RISK_FEATURE_NAMES = [
825
+ "engagementScore",
826
+ "avgQuizScore",
827
+ "attendance",
828
+ "assignmentCompletion",
829
+ "streak",
830
+ "xpGrowthRate",
831
+ "timeOnPlatform",
832
+ "engagementTrend7d",
833
+ "quizScoreVariance",
834
+ "consecutiveAbsences",
835
+ "daysSinceLastActivity",
836
+ ]
837
+
838
+
839
+ def _load_risk_model():
840
+ """Load trained risk model from disk."""
841
+ if not HAS_JOBLIB:
842
+ return None
843
+
844
+ cache_key = "risk_model"
845
+ cached = _risk_model_cache.get(cache_key)
846
+ if cached is not None:
847
+ return cached
848
+
849
+ if os.path.exists(RISK_MODEL_PATH):
850
+ try:
851
+ model = joblib.load(RISK_MODEL_PATH)
852
+ _risk_model_cache[cache_key] = model
853
+ logger.info("Loaded trained risk model from disk")
854
+ return model
855
+ except Exception as e:
856
+ logger.error(f"Error loading risk model: {e}")
857
+ return None
858
+
859
+
860
+ def _rule_based_risk(data: EnhancedRiskRequest) -> EnhancedRiskPrediction:
861
+ """Fallback rule-based risk prediction when no ML model is available."""
862
+ score = (
863
+ data.engagementScore * 0.25
864
+ + data.avgQuizScore * 0.30
865
+ + data.attendance * 0.25
866
+ + data.assignmentCompletion * 0.20
867
+ )
868
+
869
+ # Penalties
870
+ if (data.consecutiveAbsences or 0) >= 3:
871
+ score -= 10
872
+ if (data.daysSinceLastActivity or 0) >= 7:
873
+ score -= 10
874
+ if (data.streak or 0) == 0:
875
+ score -= 5
876
+
877
+ # Bonuses
878
+ if (data.streak or 0) >= 7:
879
+ score += 5
880
+ if (data.engagementTrend7d or 0) > 0:
881
+ score += 5
882
+
883
+ score = max(0, min(100, score))
884
+
885
+ if score >= 70:
886
+ risk_level = "Low"
887
+ probs = {"High": 0.05, "Medium": 0.15, "Low": 0.80}
888
+ elif score >= 45:
889
+ risk_level = "Medium"
890
+ probs = {"High": 0.15, "Medium": 0.55, "Low": 0.30}
891
+ else:
892
+ risk_level = "High"
893
+ probs = {"High": 0.70, "Medium": 0.20, "Low": 0.10}
894
+
895
+ factors = []
896
+ if data.avgQuizScore < 50:
897
+ factors.append({"feature": "avgQuizScore", "impact": -0.3, "detail": "Low quiz scores"})
898
+ if data.attendance < 60:
899
+ factors.append({"feature": "attendance", "impact": -0.25, "detail": "Poor attendance"})
900
+ if data.engagementScore < 40:
901
+ factors.append({"feature": "engagementScore", "impact": -0.2, "detail": "Low engagement"})
902
+ if (data.consecutiveAbsences or 0) >= 3:
903
+ factors.append({"feature": "consecutiveAbsences", "impact": -0.15, "detail": "Multiple consecutive absences"})
904
+ if data.assignmentCompletion < 50:
905
+ factors.append({"feature": "assignmentCompletion", "impact": -0.2, "detail": "Low assignment completion"})
906
+ if not factors:
907
+ factors.append({"feature": "overall", "impact": 0.0, "detail": "No major risk factors identified"})
908
+
909
+ recommendations = []
910
+ if risk_level == "High":
911
+ recommendations = [
912
+ "Schedule immediate one-on-one check-in with student",
913
+ "Set up tutoring sessions for weak subjects",
914
+ "Contact parent/guardian about academic concerns",
915
+ "Create a structured study plan with daily goals",
916
+ ]
917
+ elif risk_level == "Medium":
918
+ recommendations = [
919
+ "Monitor progress closely over next 2 weeks",
920
+ "Encourage participation in study groups",
921
+ "Assign additional practice exercises for weak areas",
922
+ ]
923
+ else:
924
+ recommendations = [
925
+ "Continue current learning approach",
926
+ "Challenge with advanced material when ready",
927
+ ]
928
+
929
+ return EnhancedRiskPrediction(
930
+ riskLevel=risk_level,
931
+ confidence=round(max(probs.values()), 3),
932
+ probabilities=probs,
933
+ contributingFactors=factors[:3],
934
+ recommendations=recommendations,
935
+ modelUsed="rule_based",
936
+ risk_level=_to_strict_risk_level(risk_level),
937
+ risk_score=round(float(probs.get("High", 0.0)), 4),
938
+ top_factors=_extract_top_factor_texts(factors),
939
+ )
940
+
941
+
942
+ async def predict_risk_enhanced(data: EnhancedRiskRequest) -> EnhancedRiskPrediction:
943
+ """Enhanced risk prediction using trained ML model with SHAP explanations."""
944
+ model = _load_risk_model()
945
+
946
+ if model is None:
947
+ logger.info("No trained ML model found; using rule-based risk prediction")
948
+ return _rule_based_risk(data)
949
+
950
+ try:
951
+ features = _build_risk_features(data)
952
+ label_map = {0: "High", 1: "Medium", 2: "Low"}
953
+
954
+ # Predict
955
+ prediction = model.predict(features)[0]
956
+ probabilities_raw = model.predict_proba(features)[0]
957
+ risk_level = label_map.get(int(prediction), "Medium")
958
+
959
+ probs = {}
960
+ for i, label in label_map.items():
961
+ if i < len(probabilities_raw):
962
+ probs[label] = round(float(probabilities_raw[i]), 4)
963
+ else:
964
+ probs[label] = 0.0
965
+
966
+ confidence = round(float(max(probabilities_raw)), 4)
967
+
968
+ # SHAP explanations
969
+ factors = []
970
+ if HAS_SHAP:
971
+ try:
972
+ explainer = shap.TreeExplainer(model)
973
+ shap_values = explainer.shap_values(features)
974
+
975
+ if isinstance(shap_values, list):
976
+ # Multi-class: use SHAP values for predicted class
977
+ sv = shap_values[int(prediction)][0]
978
+ else:
979
+ sv = shap_values[0]
980
+
981
+ # Get top 3 contributing features
982
+ feature_impacts = list(zip(RISK_FEATURE_NAMES, sv))
983
+ feature_impacts.sort(key=lambda x: abs(x[1]), reverse=True)
984
+
985
+ for fname, impact in feature_impacts[:3]:
986
+ idx = RISK_FEATURE_NAMES.index(fname)
987
+ fval = features[0][idx]
988
+ factors.append({
989
+ "feature": fname,
990
+ "impact": round(float(impact), 4),
991
+ "value": round(float(fval), 2),
992
+ "detail": f"{fname} = {fval:.1f} (SHAP impact: {impact:.3f})",
993
+ })
994
+ except Exception as e:
995
+ logger.warning(f"SHAP explanation failed: {e}")
996
+ factors = [{"feature": "model_prediction", "impact": 0.0, "detail": "SHAP unavailable"}]
997
+ else:
998
+ # Feature importance fallback
999
+ if hasattr(model, "feature_importances_"):
1000
+ importances = model.feature_importances_
1001
+ fi = list(zip(RISK_FEATURE_NAMES, importances))
1002
+ fi.sort(key=lambda x: x[1], reverse=True)
1003
+ for fname, imp in fi[:3]:
1004
+ idx = RISK_FEATURE_NAMES.index(fname)
1005
+ fval = features[0][idx]
1006
+ factors.append({
1007
+ "feature": fname,
1008
+ "impact": round(float(imp), 4),
1009
+ "value": round(float(fval), 2),
1010
+ "detail": f"{fname} = {fval:.1f} (importance: {imp:.3f})",
1011
+ })
1012
+
1013
+ # Recommendations based on prediction
1014
+ if risk_level == "High":
1015
+ recommendations = [
1016
+ "Immediate intervention recommended — schedule one-on-one session",
1017
+ "Review recent quiz performance for specific skill gaps",
1018
+ "Contact parent/guardian about academic concerns",
1019
+ "Create personalised remediation plan",
1020
+ ]
1021
+ elif risk_level == "Medium":
1022
+ recommendations = [
1023
+ "Monitor student progress more frequently",
1024
+ "Assign targeted practice for weak areas",
1025
+ "Encourage peer study groups",
1026
+ ]
1027
+ else:
1028
+ recommendations = [
1029
+ "Student is performing well — maintain current pace",
1030
+ "Consider enrichment activities for advanced topics",
1031
+ ]
1032
+
1033
+ return EnhancedRiskPrediction(
1034
+ riskLevel=risk_level,
1035
+ confidence=confidence,
1036
+ probabilities=probs,
1037
+ contributingFactors=factors,
1038
+ recommendations=recommendations,
1039
+ modelUsed="ml_model",
1040
+ risk_level=_to_strict_risk_level(risk_level),
1041
+ risk_score=round(float(probs.get("High", 0.0)), 4),
1042
+ top_factors=_extract_top_factor_texts(factors),
1043
+ )
1044
+
1045
+ except Exception as e:
1046
+ logger.error(f"ML risk prediction failed: {e}\n{traceback.format_exc()}")
1047
+ logger.info("Falling back to rule-based prediction")
1048
+ return _rule_based_risk(data)
1049
+
1050
+
1051
+ def _humanize_risk_factor(factor: Dict[str, Any]) -> str:
1052
+ """Convert raw factor payloads into teacher-friendly explanations."""
1053
+ feature = str(factor.get("feature", "overall"))
1054
+ value = factor.get("value", None)
1055
+
1056
+ if feature == "attendance":
1057
+ if value is not None:
1058
+ return f"Low attendance rate ({value:.0f}%) over recent sessions."
1059
+ return "Low attendance rate over recent sessions."
1060
+ if feature == "avgQuizScore":
1061
+ if value is not None:
1062
+ return f"Consistently low quiz performance (average {value:.0f}%)."
1063
+ return "Consistently low quiz performance."
1064
+ if feature == "assignmentCompletion":
1065
+ if value is not None:
1066
+ return f"Missing or incomplete assignments ({value:.0f}% completion)."
1067
+ return "Missing or incomplete assignments."
1068
+ if feature == "engagementScore":
1069
+ if value is not None:
1070
+ return f"Low learning engagement indicators ({value:.0f}%)."
1071
+ return "Low learning engagement indicators."
1072
+ if feature == "consecutiveAbsences":
1073
+ if value is not None:
1074
+ return f"Frequent recent absences ({int(value)} in a row)."
1075
+ return "Frequent recent absences."
1076
+ if feature == "daysSinceLastActivity":
1077
+ if value is not None:
1078
+ return f"Long inactivity window ({int(value)} days since last activity)."
1079
+ return "Long inactivity window since last activity."
1080
+
1081
+ detail = str(factor.get("detail", "")).strip()
1082
+ if detail:
1083
+ return detail
1084
+ return "Multiple performance indicators suggest elevated support needs."
1085
+
1086
+
1087
+ async def predict_risk_v2(data: EnhancedRiskRequest) -> StudentRiskPredictionV2:
1088
+ """Return normalized risk payload while reusing the existing enhanced ML model."""
1089
+ enhanced = await predict_risk_enhanced(data)
1090
+
1091
+ risk_level_map: Dict[str, Literal["low", "medium", "high"]] = {
1092
+ "High": "high",
1093
+ "Medium": "medium",
1094
+ "Low": "low",
1095
+ }
1096
+ normalized_level: Literal["low", "medium", "high"] = risk_level_map.get(enhanced.riskLevel, "medium")
1097
+
1098
+ high_prob = float(enhanced.probabilities.get("High", 0.0))
1099
+ medium_prob = float(enhanced.probabilities.get("Medium", 0.0))
1100
+ risk_score = max(0.0, min(1.0, round(high_prob + (0.5 * medium_prob), 4)))
1101
+
1102
+ factor_texts = [_humanize_risk_factor(f) for f in enhanced.contributingFactors]
1103
+ if not factor_texts:
1104
+ factor_texts = ["Limited data available; monitor student trends after next assessments."]
1105
+
1106
+ return StudentRiskPredictionV2(
1107
+ risk_level=normalized_level,
1108
+ risk_score=risk_score,
1109
+ top_factors=factor_texts[:3],
1110
+ probabilities={
1111
+ "high": round(high_prob, 4),
1112
+ "medium": round(medium_prob, 4),
1113
+ "low": round(float(enhanced.probabilities.get("Low", 0.0)), 4),
1114
+ },
1115
+ model_used=enhanced.modelUsed,
1116
+ )
1117
+
1118
+
1119
+ async def train_risk_model(force_retrain: bool = False) -> RiskTrainResponse:
1120
+ """
1121
+ Train a risk classification model on historical student data.
1122
+ Tries XGBoost first, falls back to Random Forest.
1123
+ """
1124
+ if not HAS_JOBLIB:
1125
+ raise ValueError("joblib not installed; cannot save model")
1126
+
1127
+ # Check if model exists and skip unless forced
1128
+ if os.path.exists(RISK_MODEL_PATH) and not force_retrain:
1129
+ return RiskTrainResponse(
1130
+ status="model_exists",
1131
+ accuracy=0.0,
1132
+ precision=0.0,
1133
+ recall=0.0,
1134
+ f1Score=0.0,
1135
+ samplesUsed=0,
1136
+ modelPath=RISK_MODEL_PATH,
1137
+ )
1138
+
1139
+ # Fetch historical data from Firestore
1140
+ db = _get_firestore_db()
1141
+ X_data = []
1142
+ y_data = []
1143
+
1144
+ if db is not None:
1145
+ try:
1146
+ users_ref = db.collection("users").where("role", "==", "student").limit(500)
1147
+ user_docs = users_ref.stream()
1148
+
1149
+ for doc in user_docs:
1150
+ data = doc.to_dict()
1151
+ if not data:
1152
+ continue
1153
+
1154
+ features = [
1155
+ data.get("engagementScore", 50),
1156
+ data.get("avgQuizScore", 50),
1157
+ data.get("attendance", 80),
1158
+ data.get("assignmentCompletion", 60),
1159
+ data.get("streak", 0),
1160
+ data.get("xpGrowthRate", 0),
1161
+ data.get("timeOnPlatform", 0),
1162
+ 0.0, # engagementTrend7d
1163
+ 0.0, # quizScoreVariance
1164
+ data.get("consecutiveAbsences", 0),
1165
+ data.get("daysSinceLastActivity", 0),
1166
+ ]
1167
+ X_data.append(features)
1168
+
1169
+ # Determine label from existing riskLevel or compute it
1170
+ risk = data.get("riskLevel", "")
1171
+ if risk == "High":
1172
+ y_data.append(0)
1173
+ elif risk == "Medium":
1174
+ y_data.append(1)
1175
+ else:
1176
+ y_data.append(2)
1177
+
1178
+ except Exception as e:
1179
+ logger.error(f"Error fetching training data: {e}")
1180
+
1181
+ # If insufficient real data, generate synthetic training data
1182
+ if len(X_data) < 50:
1183
+ logger.info("Insufficient Firestore data; generating synthetic training data")
1184
+ synth_X, synth_y = _generate_synthetic_risk_data(500)
1185
+ for row in synth_X:
1186
+ X_data.append(list(row))
1187
+ for label in synth_y:
1188
+ y_data.append(int(label))
1189
+
1190
+ X = np.array(X_data)
1191
+ y = np.array(y_data)
1192
+
1193
+ # Train/test split
1194
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
1195
+
1196
+ # Train model
1197
+ if HAS_XGBOOST:
1198
+ model = xgb.XGBClassifier(
1199
+ n_estimators=100,
1200
+ max_depth=6,
1201
+ learning_rate=0.1,
1202
+ objective="multi:softprob",
1203
+ num_class=3,
1204
+ eval_metric="mlogloss",
1205
+ random_state=42,
1206
+ use_label_encoder=False,
1207
+ )
1208
+ logger.info("Training XGBoost risk classifier")
1209
+ else:
1210
+ model = RandomForestClassifier(
1211
+ n_estimators=100,
1212
+ max_depth=10,
1213
+ random_state=42,
1214
+ class_weight="balanced",
1215
+ )
1216
+ logger.info("Training Random Forest risk classifier")
1217
+
1218
+ model.fit(X_train, y_train)
1219
+ y_pred = model.predict(X_test)
1220
+
1221
+ acc = float(accuracy_score(y_test, y_pred))
1222
+ prec = float(precision_score(y_test, y_pred, average="weighted", zero_division=0))
1223
+ rec = float(recall_score(y_test, y_pred, average="weighted", zero_division=0))
1224
+ f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
1225
+
1226
+ logger.info(f"Risk model trained: accuracy={acc:.3f}, F1={f1:.3f}")
1227
+ logger.info(f"Classification report:\n{classification_report(y_test, y_pred, zero_division=0)}")
1228
+
1229
+ # Save model
1230
+ os.makedirs(os.path.dirname(RISK_MODEL_PATH), exist_ok=True)
1231
+ joblib.dump(model, RISK_MODEL_PATH)
1232
+ logger.info(f"Risk model saved to {RISK_MODEL_PATH}")
1233
+
1234
+ # Clear model cache so next prediction loads new model
1235
+ _risk_model_cache.clear()
1236
+
1237
+ return RiskTrainResponse(
1238
+ status="trained",
1239
+ accuracy=round(acc, 4),
1240
+ precision=round(prec, 4),
1241
+ recall=round(rec, 4),
1242
+ f1Score=round(f1, 4),
1243
+ samplesUsed=len(X_data),
1244
+ modelPath=RISK_MODEL_PATH,
1245
+ )
1246
+
1247
+
1248
+ def _generate_synthetic_risk_data(n: int) -> Tuple[np.ndarray, np.ndarray]:
1249
+ """Generate synthetic student data for model training."""
1250
+ np.random.seed(42)
1251
+
1252
+ X = []
1253
+ y = []
1254
+
1255
+ for _ in range(n):
1256
+ risk_class = np.random.choice([0, 1, 2], p=[0.2, 0.3, 0.5])
1257
+
1258
+ if risk_class == 0: # High risk
1259
+ engagement = np.random.normal(30, 15)
1260
+ quiz = np.random.normal(35, 12)
1261
+ attendance = np.random.normal(50, 15)
1262
+ completion = np.random.normal(35, 15)
1263
+ streak = max(0, int(np.random.normal(1, 2)))
1264
+ xp_growth = np.random.normal(-0.5, 0.3)
1265
+ time_platform = np.random.normal(2, 1)
1266
+ trend = np.random.normal(-10, 5)
1267
+ variance = np.random.normal(25, 8)
1268
+ absences = max(0, int(np.random.normal(4, 2)))
1269
+ days_inactive = max(0, int(np.random.normal(10, 5)))
1270
+ elif risk_class == 1: # Medium risk
1271
+ engagement = np.random.normal(55, 12)
1272
+ quiz = np.random.normal(60, 10)
1273
+ attendance = np.random.normal(72, 10)
1274
+ completion = np.random.normal(60, 12)
1275
+ streak = max(0, int(np.random.normal(3, 3)))
1276
+ xp_growth = np.random.normal(0.2, 0.3)
1277
+ time_platform = np.random.normal(5, 2)
1278
+ trend = np.random.normal(0, 8)
1279
+ variance = np.random.normal(15, 5)
1280
+ absences = max(0, int(np.random.normal(2, 1)))
1281
+ days_inactive = max(0, int(np.random.normal(3, 3)))
1282
+ else: # Low risk
1283
+ engagement = np.random.normal(82, 10)
1284
+ quiz = np.random.normal(85, 8)
1285
+ attendance = np.random.normal(93, 5)
1286
+ completion = np.random.normal(88, 8)
1287
+ streak = max(0, int(np.random.normal(10, 5)))
1288
+ xp_growth = np.random.normal(1.0, 0.4)
1289
+ time_platform = np.random.normal(10, 3)
1290
+ trend = np.random.normal(5, 5)
1291
+ variance = np.random.normal(8, 3)
1292
+ absences = 0
1293
+ days_inactive = max(0, int(np.random.normal(1, 1)))
1294
+
1295
+ features = [
1296
+ max(0, min(100, engagement)),
1297
+ max(0, min(100, quiz)),
1298
+ max(0, min(100, attendance)),
1299
+ max(0, min(100, completion)),
1300
+ streak,
1301
+ xp_growth,
1302
+ max(0, time_platform),
1303
+ trend,
1304
+ max(0, variance),
1305
+ absences,
1306
+ days_inactive,
1307
+ ]
1308
+
1309
+ X.append(features)
1310
+ y.append(risk_class)
1311
+
1312
+ return np.array(X), np.array(y)
1313
+
1314
+
1315
+ # ─── Quiz Difficulty Calibration ───────────────────────────────
1316
+
1317
+
1318
+ async def calibrate_question_difficulty(request: CalibrateDifficultyRequest) -> CalibrateDifficultyResponse:
1319
+ """
1320
+ Calculate IRT difficulty parameters for a question based on student responses.
1321
+ """
1322
+ responses = request.studentResponses
1323
+ if not responses:
1324
+ raise ValueError("No student responses provided")
1325
+
1326
+ correct_count = sum(1 for r in responses if r.get("correct", False))
1327
+ total = len(responses)
1328
+ success_rate = correct_count / total
1329
+
1330
+ # Difficulty parameter b = logit(1 - p_correct)
1331
+ p = max(0.01, min(0.99, success_rate)) # clamp to avoid infinity
1332
+ b = round(math.log((1 - p) / p), 3)
1333
+
1334
+ # Discrimination parameter a
1335
+ # Split students into high and low performers by time
1336
+ if len(responses) >= 4:
1337
+ times = [r.get("timeSpent", 60) for r in responses]
1338
+ median_time = sorted(times)[len(times) // 2]
1339
+
1340
+ fast_correct = sum(1 for r in responses if r.get("correct") and r.get("timeSpent", 60) <= median_time)
1341
+ fast_total = sum(1 for r in responses if r.get("timeSpent", 60) <= median_time)
1342
+ slow_correct = sum(1 for r in responses if r.get("correct") and r.get("timeSpent", 60) > median_time)
1343
+ slow_total = sum(1 for r in responses if r.get("timeSpent", 60) > median_time)
1344
+
1345
+ p_fast = fast_correct / max(fast_total, 1)
1346
+ p_slow = slow_correct / max(slow_total, 1)
1347
+
1348
+ # Higher discrimination if fast students do much better
1349
+ a = round(max(0.3, min(3.0, (p_fast - p_slow) * 3 + 1.0)), 3)
1350
+ else:
1351
+ a = 1.0
1352
+
1353
+ # Guessing parameter c (based on question type; default 0.25 for 4-choice)
1354
+ c = 0.25
1355
+
1356
+ # Difficulty label
1357
+ if b < -1.0:
1358
+ diff_label = "easy"
1359
+ elif b < 1.0:
1360
+ diff_label = "medium"
1361
+ else:
1362
+ diff_label = "hard"
1363
+
1364
+ # Store in Firestore
1365
+ params = {
1366
+ "b": b,
1367
+ "a": a,
1368
+ "c": c,
1369
+ "difficultyLabel": diff_label,
1370
+ "successRate": round(success_rate, 4),
1371
+ "totalResponses": total,
1372
+ }
1373
+ await store_question_difficulty(request.questionId, params)
1374
+
1375
+ # Cache it
1376
+ _cache_set(_difficulty_cache, request.questionId, params)
1377
+
1378
+ return CalibrateDifficultyResponse(
1379
+ questionId=request.questionId,
1380
+ difficultyParameter=b,
1381
+ discriminationParameter=a,
1382
+ guessingParameter=c,
1383
+ difficultyLabel=diff_label,
1384
+ totalResponses=total,
1385
+ successRate=round(success_rate, 4),
1386
+ )
1387
+
1388
+
1389
+ async def select_adaptive_quiz(request: AdaptiveQuizRequest) -> AdaptiveQuizResponse:
1390
+ """
1391
+ Select questions adaptively based on student ability and IRT parameters.
1392
+ """
1393
+ # Get student competency for this topic
1394
+ quiz_history = await fetch_student_quiz_history(request.studentId)
1395
+
1396
+ # Estimate student ability
1397
+ canonical_topic_id = _canonicalize_topic_label(request.topicId)
1398
+ topic_entries = [
1399
+ e for e in quiz_history
1400
+ if _canonicalize_topic_label(str(e.get("topicId") or e.get("topic") or "")) == canonical_topic_id
1401
+ ]
1402
+
1403
+ if topic_entries:
1404
+ responses_for_irt = []
1405
+ difficulty_params = {}
1406
+ for i, entry in enumerate(topic_entries):
1407
+ qid = entry.get("questionId", f"q_{i}")
1408
+ correct = entry.get("correct", False)
1409
+ if isinstance(correct, (int, float)):
1410
+ correct = correct > 0.5
1411
+ score = entry.get("score", 0)
1412
+ total = max(entry.get("total", 1), 1)
1413
+ if not isinstance(correct, bool):
1414
+ correct = (score / total) >= 0.5
1415
+
1416
+ responses_for_irt.append({"questionId": qid, "correct": correct})
1417
+ difficulty_params[qid] = {"a": 1.0, "b": 0.0, "c": 0.25}
1418
+
1419
+ theta = _estimate_theta(responses_for_irt, difficulty_params)
1420
+ else:
1421
+ theta = 0.0 # Default ability
1422
+
1423
+ competency_level = _get_competency_level((theta + 4) / 8 * 100) # normalise theta to 0-100
1424
+
1425
+ # Difficulty distribution based on competency
1426
+ distributions = {
1427
+ "beginner": {"easy": 0.70, "medium": 0.20, "hard": 0.10},
1428
+ "developing": {"easy": 0.40, "medium": 0.40, "hard": 0.20},
1429
+ "proficient": {"easy": 0.20, "medium": 0.40, "hard": 0.40},
1430
+ "advanced": {"easy": 0.10, "medium": 0.30, "hard": 0.60},
1431
+ }
1432
+
1433
+ dist = distributions.get(competency_level, distributions["developing"])
1434
+
1435
+ # Generate question selections with adaptive difficulty
1436
+ n = request.numQuestions
1437
+ selected: List[AdaptiveQuizSelection] = []
1438
+ current_theta = theta
1439
+ difficulty_counts = {"easy": 0, "medium": 0, "hard": 0}
1440
+
1441
+ # Calculate target counts per difficulty
1442
+ target_counts = {
1443
+ "easy": max(1, round(n * dist["easy"])),
1444
+ "medium": max(1, round(n * dist["medium"])),
1445
+ "hard": max(0, n - max(1, round(n * dist["easy"])) - max(1, round(n * dist["medium"]))),
1446
+ }
1447
+
1448
+ for i in range(n):
1449
+ # Determine difficulty for this question
1450
+ if i < 2:
1451
+ # Start near student's level
1452
+ b = current_theta
1453
+ else:
1454
+ # Adaptive: alternate based on simulated performance
1455
+ if i % 3 == 0:
1456
+ b = current_theta - 0.5 # Slightly easier
1457
+ elif i % 3 == 1:
1458
+ b = current_theta
1459
+ else:
1460
+ b = current_theta + 0.5 # Slightly harder
1461
+
1462
+ # Classify difficulty
1463
+ if b < -1.0:
1464
+ diff_label = "easy"
1465
+ elif b < 1.0:
1466
+ diff_label = "medium"
1467
+ else:
1468
+ diff_label = "hard"
1469
+
1470
+ # Ensure we don't exceed target counts
1471
+ if difficulty_counts[diff_label] >= target_counts[diff_label]:
1472
+ # Pick the difficulty with most remaining quota
1473
+ remaining = {k: target_counts[k] - difficulty_counts[k] for k in target_counts}
1474
+ diff_label = max(remaining, key=lambda k: remaining[k])
1475
+ if diff_label == "easy":
1476
+ b = min(b, -1.0)
1477
+ elif diff_label == "hard":
1478
+ b = max(b, 1.0)
1479
+
1480
+ difficulty_counts[diff_label] += 1
1481
+
1482
+ # Calculate predicted success probability
1483
+ predicted_p = _irt_3pl_probability(current_theta, a=1.0, b=b, c=0.25)
1484
+
1485
+ selected.append(AdaptiveQuizSelection(
1486
+ questionId=f"{request.topicId}_q{i+1}",
1487
+ estimatedDifficulty=round(b, 3),
1488
+ predictedSuccessProbability=round(predicted_p, 3),
1489
+ difficultyLabel=diff_label,
1490
+ ))
1491
+
1492
+ # Expected overall success rate
1493
+ avg_success = sum(q.predictedSuccessProbability for q in selected) / max(len(selected), 1)
1494
+
1495
+ return AdaptiveQuizResponse(
1496
+ studentId=request.studentId,
1497
+ topicId=request.topicId,
1498
+ selectedQuestions=selected,
1499
+ studentAbilityEstimate=round(theta, 3),
1500
+ expectedSuccessRate=round(avg_success, 3),
1501
+ difficultyDistribution=difficulty_counts,
1502
+ )
1503
+
1504
+
1505
+ # ─── Topic Recommendation Engine ──────────────────────────────
1506
+
1507
+
1508
+ async def recommend_topics(request: TopicRecommendationRequest) -> TopicRecommendationResponse:
1509
+ """
1510
+ Recommend topics based on competency gaps, prerequisites, and peer data.
1511
+ """
1512
+ student_id = request.studentId
1513
+ quiz_history = await fetch_student_quiz_history(student_id)
1514
+
1515
+ if not quiz_history:
1516
+ # Cold start: recommend foundational topics
1517
+ foundational = [
1518
+ TopicRecommendation(
1519
+ topicId="Variables & Expressions",
1520
+ topicName="Variables & Expressions",
1521
+ recommendationScore=95.0,
1522
+ reasoning="Foundational topic essential for all algebra. Start here to build a strong base.",
1523
+ estimatedTimeToMastery=3,
1524
+ prerequisitesMet=True,
1525
+ currentCompetency="not_attempted",
1526
+ ),
1527
+ TopicRecommendation(
1528
+ topicId="Integers",
1529
+ topicName="Integers",
1530
+ recommendationScore=90.0,
1531
+ reasoning="Core number sense topic needed for all math areas.",
1532
+ estimatedTimeToMastery=2,
1533
+ prerequisitesMet=True,
1534
+ currentCompetency="not_attempted",
1535
+ ),
1536
+ TopicRecommendation(
1537
+ topicId="Fractions & Decimals",
1538
+ topicName="Fractions & Decimals",
1539
+ recommendationScore=85.0,
1540
+ reasoning="Understanding fractions is critical for algebra and calculus.",
1541
+ estimatedTimeToMastery=4,
1542
+ prerequisitesMet=True,
1543
+ currentCompetency="not_attempted",
1544
+ ),
1545
+ ]
1546
+ return TopicRecommendationResponse(
1547
+ studentId=student_id,
1548
+ recommendations=foundational[:request.numRecommendations],
1549
+ status="cold_start",
1550
+ )
1551
+
1552
+ # Get competency analysis
1553
+ comp_result = await compute_competency_analysis(student_id, quiz_history)
1554
+ dependencies = fetch_topic_dependencies()
1555
+
1556
+ topic_competencies: Dict[str, CompetencyAnalysis] = {}
1557
+ for a in comp_result.analyses:
1558
+ topic_competencies[a.topicId] = a
1559
+
1560
+ # Score each topic
1561
+ all_topics = set()
1562
+ for a in comp_result.analyses:
1563
+ all_topics.add(a.topicId)
1564
+ for topic, prereqs in dependencies.items():
1565
+ all_topics.add(topic)
1566
+ all_topics.update(prereqs)
1567
+
1568
+ scored_topics: List[TopicRecommendation] = []
1569
+
1570
+ for topic in all_topics:
1571
+ comp = topic_competencies.get(topic)
1572
+ current_level = comp.competencyLevel if comp else "not_attempted"
1573
+ current_score = comp.averageAccuracy if comp else 0
1574
+
1575
+ # Skip topics already mastered
1576
+ if current_level == "advanced":
1577
+ continue
1578
+
1579
+ # 1. Weakness score (higher for weaker topics)
1580
+ if current_level == "not_attempted":
1581
+ weakness_score = 70
1582
+ elif current_level == "beginner":
1583
+ weakness_score = 100 - current_score
1584
+ elif current_level == "developing":
1585
+ weakness_score = 80 - current_score * 0.5
1586
+ else: # proficient
1587
+ weakness_score = 40 - current_score * 0.3
1588
+
1589
+ # 2. Prerequisite score (higher if prerequisites are met)
1590
+ prereqs = dependencies.get(topic, [])
1591
+ if prereqs:
1592
+ prereq_scores = []
1593
+ for p in prereqs:
1594
+ p_comp = topic_competencies.get(p)
1595
+ if p_comp:
1596
+ prereq_scores.append(p_comp.averageAccuracy)
1597
+ else:
1598
+ prereq_scores.append(0)
1599
+ prereq_avg = sum(prereq_scores) / len(prereq_scores) if prereq_scores else 0
1600
+ prereqs_met = all(s >= 50 for s in prereq_scores)
1601
+ else:
1602
+ prereq_avg = 100 # No prereqs needed
1603
+ prereqs_met = True
1604
+
1605
+ # 3. Recency score (boost recently attempted topics)
1606
+ if comp and comp.lastAttemptDate:
1607
+ try:
1608
+ last_dt = datetime.fromisoformat(comp.lastAttemptDate.replace("Z", "+00:00"))
1609
+ days_since = (datetime.utcnow() - last_dt.replace(tzinfo=None)).days
1610
+ except Exception:
1611
+ days_since = 30
1612
+ else:
1613
+ days_since = 30
1614
+
1615
+ recency_score = min(days_since, 60) # cap at 60
1616
+
1617
+ # 4. Combined score
1618
+ total_score = (
1619
+ weakness_score * 0.4
1620
+ + prereq_avg * 0.3
1621
+ + recency_score * 0.2
1622
+ + (10 if prereqs_met else 0) * 0.1
1623
+ )
1624
+
1625
+ # Degrade score if prerequisites not met (but still recommend)
1626
+ if not prereqs_met:
1627
+ total_score *= 0.6
1628
+
1629
+ # Estimate time to mastery (hours)
1630
+ if current_level == "not_attempted":
1631
+ est_hours = 8
1632
+ elif current_level == "beginner":
1633
+ est_hours = 6
1634
+ elif current_level == "developing":
1635
+ est_hours = 4
1636
+ else:
1637
+ est_hours = 2
1638
+
1639
+ # Build reasoning
1640
+ reasons = []
1641
+ if current_level in ("beginner", "not_attempted"):
1642
+ reasons.append(f"Currently at {current_level} level — focused practice will build foundation")
1643
+ elif current_level == "developing":
1644
+ reasons.append(f"Developing competency ({current_score:.0f}% accuracy) — close to proficiency with more practice")
1645
+ else:
1646
+ reasons.append(f"Proficient but not yet mastered ({current_score:.0f}% accuracy)")
1647
+
1648
+ if not prereqs_met and prereqs:
1649
+ reasons.append(f"Note: prerequisites ({', '.join(prereqs)}) not fully met — complete those first")
1650
+ elif prereqs and prereqs_met:
1651
+ reasons.append("All prerequisites are met")
1652
+
1653
+ if comp and comp.learningVelocity > 0:
1654
+ reasons.append(f"Positive learning trend (velocity: {comp.learningVelocity:+.3f})")
1655
+ elif comp and comp.learningVelocity < 0:
1656
+ reasons.append(f"Declining performance detected — review recommended")
1657
+
1658
+ if days_since > 14:
1659
+ reasons.append(f"Not practiced in {days_since} days — review to prevent forgetting")
1660
+
1661
+ scored_topics.append(TopicRecommendation(
1662
+ topicId=topic,
1663
+ topicName=topic.replace("_", " ").title(),
1664
+ recommendationScore=round(total_score, 2),
1665
+ reasoning=". ".join(reasons) + ".",
1666
+ estimatedTimeToMastery=est_hours,
1667
+ prerequisitesMet=prereqs_met,
1668
+ currentCompetency=current_level,
1669
+ ))
1670
+
1671
+ # Sort by score descending
1672
+ scored_topics.sort(key=lambda t: t.recommendationScore, reverse=True)
1673
+
1674
+ return TopicRecommendationResponse(
1675
+ studentId=student_id,
1676
+ recommendations=scored_topics[:request.numRecommendations],
1677
+ status="success",
1678
+ )
1679
+
1680
+
1681
+ # ─── Learning Analytics Aggregation ───────────────────────────
1682
+
1683
+
1684
+ async def get_student_summary(student_id: str) -> StudentSummaryResponse:
1685
+ """Aggregate all ML metrics for a single student."""
1686
+ # Check cache
1687
+ cached = _cache_get(_competency_cache, f"summary_{student_id}", IRT_DIFFICULTY_CACHE_TTL)
1688
+ if cached:
1689
+ return cached
1690
+
1691
+ quiz_history = await fetch_student_quiz_history(student_id)
1692
+ engagement = await fetch_student_engagement_metrics(student_id)
1693
+
1694
+ # Competency analysis
1695
+ comp_result = await compute_competency_analysis(student_id, quiz_history)
1696
+
1697
+ # Competency distribution
1698
+ comp_dist = {"beginner": 0, "developing": 0, "proficient": 0, "advanced": 0}
1699
+ for a in comp_result.analyses:
1700
+ if a.competencyLevel in comp_dist:
1701
+ comp_dist[a.competencyLevel] += 1
1702
+
1703
+ # Efficiency scores per subject
1704
+ eff_scores = {}
1705
+ for a in comp_result.analyses:
1706
+ eff_scores[a.topicName] = a.efficiencyScore
1707
+
1708
+ # Learning velocity trend (chart data)
1709
+ velocity_trend = []
1710
+ for a in comp_result.analyses:
1711
+ velocity_trend.append({
1712
+ "topic": a.topicName,
1713
+ "velocity": a.learningVelocity,
1714
+ "accuracy": a.averageAccuracy,
1715
+ "attempts": a.totalAttempts,
1716
+ })
1717
+
1718
+ # Topic recommendations
1719
+ try:
1720
+ rec_req = TopicRecommendationRequest(studentId=student_id, numRecommendations=5)
1721
+ rec_result = await recommend_topics(rec_req)
1722
+ recommended = [
1723
+ {
1724
+ "topicId": r.topicId,
1725
+ "topicName": r.topicName,
1726
+ "score": r.recommendationScore,
1727
+ "reasoning": r.reasoning,
1728
+ "prerequisitesMet": r.prerequisitesMet,
1729
+ }
1730
+ for r in rec_result.recommendations
1731
+ ]
1732
+ except Exception as e:
1733
+ logger.warning(f"Topic recommendation failed: {e}")
1734
+ recommended = []
1735
+
1736
+ # Predicted next quiz score (simple linear extrapolation)
1737
+ predicted_score = None
1738
+ if quiz_history and len(quiz_history) >= 3:
1739
+ recent_scores = []
1740
+ for entry in quiz_history[-10:]:
1741
+ score = entry.get("score", 0)
1742
+ total = max(entry.get("total", 1), 1)
1743
+ recent_scores.append((score / total) * 100)
1744
+ if len(recent_scores) >= 3:
1745
+ x = np.arange(len(recent_scores)).reshape(-1, 1)
1746
+ y = np.array(recent_scores)
1747
+ model = LinearRegression()
1748
+ model.fit(x, y)
1749
+ next_idx = np.array([[len(recent_scores)]], dtype=float)
1750
+ next_pred = float(model.predict(next_idx)[0])
1751
+ predicted_score = round(float(max(0.0, min(100.0, next_pred))), 1)
1752
+
1753
+ # Engagement patterns
1754
+ engagement_patterns = {
1755
+ "dailyActivity": engagement.get("dailyActivity", {}),
1756
+ "hourlyActivity": engagement.get("hourlyActivity", {}),
1757
+ "activeDays": engagement.get("activeDays", 0),
1758
+ "avgActivitiesPerDay": engagement.get("avgActivitiesPerDay", 0),
1759
+ "totalXP": engagement.get("totalXP", 0),
1760
+ }
1761
+
1762
+ result = StudentSummaryResponse(
1763
+ studentId=student_id,
1764
+ competencyDistribution=comp_dist,
1765
+ riskAssessment=None,
1766
+ recommendedTopics=recommended,
1767
+ learningVelocityTrend=velocity_trend,
1768
+ efficiencyScores=eff_scores,
1769
+ predictedNextQuizScore=predicted_score,
1770
+ engagementPatterns=engagement_patterns,
1771
+ status="success" if comp_result.status == "success" else "limited_data",
1772
+ )
1773
+
1774
+ # Cache the result
1775
+ _cache_set(_competency_cache, f"summary_{student_id}", result)
1776
+
1777
+ return result
1778
+
1779
+
1780
+ async def get_class_insights(request: ClassInsightsRequest) -> ClassInsightsResponse:
1781
+ """Aggregate class-wide ML analytics for teacher dashboards."""
1782
+ cached = _cache_get(_class_stats_cache, f"class_{request.teacherId}_{request.classId}", IRT_DIFFICULTY_CACHE_TTL)
1783
+ if cached:
1784
+ return cached
1785
+
1786
+ db = _get_firestore_db()
1787
+ student_ids: List[str] = []
1788
+
1789
+ if db is not None:
1790
+ try:
1791
+ if request.classId:
1792
+ # Fetch students in specific class
1793
+ class_ref = db.collection("classes").document(request.classId)
1794
+ class_doc = class_ref.get()
1795
+ if class_doc.exists:
1796
+ class_data = class_doc.to_dict()
1797
+ student_ids = class_data.get("studentIds", [])
1798
+ else:
1799
+ # Fetch all students for this teacher
1800
+ user_ref = db.collection("users").where("role", "==", "student").limit(100)
1801
+ for doc in user_ref.stream():
1802
+ student_ids.append(doc.id)
1803
+ except Exception as e:
1804
+ logger.error(f"Error fetching class students: {e}")
1805
+
1806
+ if not student_ids:
1807
+ # Generate sample data for demo
1808
+ return _generate_demo_class_insights(request)
1809
+
1810
+ # Aggregate per-student data
1811
+ risk_dist = {"High": 0, "Medium": 0, "Low": 0}
1812
+ all_velocities: List[float] = []
1813
+ interventions: List[Dict[str, Any]] = []
1814
+ topic_weakness_counts: Dict[str, int] = defaultdict(int)
1815
+ hourly_engagement = defaultdict(int)
1816
+
1817
+ for sid in student_ids[:50]: # Limit for performance
1818
+ try:
1819
+ summary = await get_student_summary(sid)
1820
+
1821
+ # Risk
1822
+ if summary.riskAssessment:
1823
+ level = summary.riskAssessment.get("riskLevel", "Medium")
1824
+ risk_dist[level] = risk_dist.get(level, 0) + 1
1825
+
1826
+ # Competencies
1827
+ for topic, count in summary.competencyDistribution.items():
1828
+ if topic in ("beginner", "developing") and count > 0:
1829
+ # Mark this as a weak area
1830
+ pass
1831
+
1832
+ # Velocities
1833
+ for vt in summary.learningVelocityTrend:
1834
+ all_velocities.append(vt.get("velocity", 0))
1835
+ if vt.get("velocity", 0) < -0.01:
1836
+ topic_weakness_counts[vt.get("topic", "Unknown")] += 1
1837
+
1838
+ # Engagement
1839
+ for hour_str, count in summary.engagementPatterns.get("hourlyActivity", {}).items():
1840
+ hourly_engagement[int(hour_str)] += count
1841
+
1842
+ # Intervention needed?
1843
+ total_beginner = summary.competencyDistribution.get("beginner", 0)
1844
+ if total_beginner >= 2 or (summary.predictedNextQuizScore and summary.predictedNextQuizScore < 50):
1845
+ interventions.append({
1846
+ "studentId": sid,
1847
+ "reason": "Multiple topics at beginner level" if total_beginner >= 2 else "Predicted score below 50%",
1848
+ "predictedScore": summary.predictedNextQuizScore,
1849
+ "recommendedAction": "Schedule one-on-one tutoring session",
1850
+ })
1851
+
1852
+ except Exception as e:
1853
+ logger.warning(f"Error processing student {sid}: {e}")
1854
+
1855
+ # Common weak topics
1856
+ common_weak = sorted(topic_weakness_counts.items(), key=lambda x: x[1], reverse=True)[:10]
1857
+ weak_topics_list = [
1858
+ {"topic": t, "studentsStruggling": c, "percentageOfClass": round(c / max(len(student_ids), 1) * 100, 1)}
1859
+ for t, c in common_weak
1860
+ ]
1861
+
1862
+ # Velocity distribution
1863
+ if all_velocities:
1864
+ vel_dist: Dict[str, float] = {
1865
+ "mean": round(float(np.mean(all_velocities)), 4),
1866
+ "median": round(float(np.median(all_velocities)), 4),
1867
+ "improving": float(sum(1 for v in all_velocities if v > 0.01)),
1868
+ "declining": float(sum(1 for v in all_velocities if v < -0.01)),
1869
+ "plateaued": float(sum(1 for v in all_velocities if -0.01 <= v <= 0.01)),
1870
+ }
1871
+ else:
1872
+ vel_dist = {"mean": 0.0, "median": 0.0, "improving": 0.0, "declining": 0.0, "plateaued": 0.0}
1873
+
1874
+ result = ClassInsightsResponse(
1875
+ teacherId=request.teacherId,
1876
+ riskDistribution=risk_dist,
1877
+ riskTrend=[], # Would require historical data
1878
+ commonWeakTopics=weak_topics_list,
1879
+ learningVelocityDistribution=vel_dist,
1880
+ engagementPatterns={"hourlyDistribution": dict(hourly_engagement)},
1881
+ interventionRecommendations=interventions[:10],
1882
+ successPredictions={
1883
+ "classAverageExpected": round(float(np.mean([s or 60 for s in []])) if not all_velocities else 65.0, 1),
1884
+ "studentsLikelyToStruggle": len(interventions),
1885
+ },
1886
+ totalStudents=len(student_ids),
1887
+ status="success",
1888
+ )
1889
+
1890
+ _cache_set(_class_stats_cache, f"class_{request.teacherId}_{request.classId}", result)
1891
+ return result
1892
+
1893
+
1894
+ def _generate_demo_class_insights(request: ClassInsightsRequest) -> ClassInsightsResponse:
1895
+ """Generate demo class insights when no real data is available."""
1896
+ return ClassInsightsResponse(
1897
+ teacherId=request.teacherId,
1898
+ riskDistribution={"High": 4, "Medium": 8, "Low": 18},
1899
+ riskTrend=[
1900
+ {"date": "2026-02-11", "high": 5, "medium": 9, "low": 16},
1901
+ {"date": "2026-02-18", "high": 4, "medium": 8, "low": 18},
1902
+ ],
1903
+ commonWeakTopics=[
1904
+ {"topic": "Quadratic Equations", "studentsStruggling": 12, "percentageOfClass": 40.0},
1905
+ {"topic": "Trigonometric Ratios", "studentsStruggling": 9, "percentageOfClass": 30.0},
1906
+ {"topic": "Factoring", "studentsStruggling": 7, "percentageOfClass": 23.3},
1907
+ ],
1908
+ learningVelocityDistribution={
1909
+ "mean": 0.015,
1910
+ "median": 0.008,
1911
+ "improving": 18,
1912
+ "declining": 5,
1913
+ "plateaued": 7,
1914
+ },
1915
+ engagementPatterns={
1916
+ "hourlyDistribution": {str(h): random.randint(5, 40) for h in range(8, 22)},
1917
+ "peakHour": 16,
1918
+ "avgDailyActiveStudents": 22,
1919
+ },
1920
+ interventionRecommendations=[
1921
+ {
1922
+ "studentId": "demo_student_1",
1923
+ "reason": "Declining performance in multiple topics",
1924
+ "predictedScore": 42.5,
1925
+ "recommendedAction": "Schedule one-on-one review session for Quadratic Equations",
1926
+ },
1927
+ {
1928
+ "studentId": "demo_student_2",
1929
+ "reason": "3 consecutive absences",
1930
+ "predictedScore": 38.0,
1931
+ "recommendedAction": "Contact parent/guardian and arrange catch-up sessions",
1932
+ },
1933
+ ],
1934
+ successPredictions={
1935
+ "classAverageExpected": 72.3,
1936
+ "studentsLikelyToStruggle": 4,
1937
+ "studentsLikelyToExcel": 8,
1938
+ },
1939
+ totalStudents=30,
1940
+ status="demo_data",
1941
+ )
1942
+
1943
+
1944
+ # ─── Mock Data Generator ──────────────────────────────────────
1945
+
1946
+
1947
+ def generate_mock_student_data(
1948
+ num_students: int = 30,
1949
+ num_quizzes: int = 20,
1950
+ seed: Optional[int] = None,
1951
+ ) -> Dict[str, Any]:
1952
+ """
1953
+ Generate realistic mock student data for testing ML features.
1954
+ Includes edge cases: perfect students, struggling students, inconsistent performers.
1955
+ """
1956
+ if seed is not None:
1957
+ random.seed(seed)
1958
+ np.random.seed(seed)
1959
+
1960
+ topics = [
1961
+ "Linear Equations", "Quadratic Equations", "Polynomials",
1962
+ "Trigonometric Ratios", "Pythagorean Theorem", "Fractions & Decimals",
1963
+ "Integers", "Probability Basics", "Angles", "Area & Perimeter",
1964
+ ]
1965
+
1966
+ students = []
1967
+ all_quiz_data = []
1968
+
1969
+ for i in range(num_students):
1970
+ student_id = f"mock_student_{i+1:03d}"
1971
+
1972
+ # Assign student archetype
1973
+ archetype_roll = random.random()
1974
+ if archetype_roll < 0.1:
1975
+ archetype = "perfect"
1976
+ elif archetype_roll < 0.2:
1977
+ archetype = "struggling"
1978
+ elif archetype_roll < 0.3:
1979
+ archetype = "inconsistent"
1980
+ elif archetype_roll < 0.5:
1981
+ archetype = "improving"
1982
+ elif archetype_roll < 0.65:
1983
+ archetype = "declining"
1984
+ else:
1985
+ archetype = "average"
1986
+
1987
+ # Base metrics per archetype
1988
+ archetypes = {
1989
+ "perfect": {
1990
+ "engagement": (90, 5), "quiz": (92, 4), "attendance": (98, 2),
1991
+ "completion": (95, 3), "streak": (15, 3),
1992
+ },
1993
+ "struggling": {
1994
+ "engagement": (25, 10), "quiz": (30, 12), "attendance": (55, 15),
1995
+ "completion": (30, 12), "streak": (0, 1),
1996
+ },
1997
+ "inconsistent": {
1998
+ "engagement": (60, 25), "quiz": (55, 25), "attendance": (70, 20),
1999
+ "completion": (55, 20), "streak": (3, 5),
2000
+ },
2001
+ "improving": {
2002
+ "engagement": (65, 10), "quiz": (60, 10), "attendance": (80, 8),
2003
+ "completion": (70, 10), "streak": (7, 3),
2004
+ },
2005
+ "declining": {
2006
+ "engagement": (50, 15), "quiz": (55, 15), "attendance": (65, 12),
2007
+ "completion": (50, 15), "streak": (1, 2),
2008
+ },
2009
+ "average": {
2010
+ "engagement": (65, 12), "quiz": (68, 10), "attendance": (82, 8),
2011
+ "completion": (72, 10), "streak": (5, 3),
2012
+ },
2013
+ }
2014
+
2015
+ params = archetypes[archetype]
2016
+ engagement = max(0, min(100, np.random.normal(*params["engagement"])))
2017
+ avg_quiz = max(0, min(100, np.random.normal(*params["quiz"])))
2018
+ attendance = max(0, min(100, np.random.normal(*params["attendance"])))
2019
+ completion = max(0, min(100, np.random.normal(*params["completion"])))
2020
+ streak = max(0, int(np.random.normal(*params["streak"])))
2021
+
2022
+ student = {
2023
+ "studentId": student_id,
2024
+ "name": f"Student {i+1}",
2025
+ "archetype": archetype,
2026
+ "engagementScore": round(engagement, 1),
2027
+ "avgQuizScore": round(avg_quiz, 1),
2028
+ "attendance": round(attendance, 1),
2029
+ "assignmentCompletion": round(completion, 1),
2030
+ "streak": streak,
2031
+ "xpGrowthRate": round(np.random.normal(0.5 if archetype == "improving" else 0, 0.3), 2),
2032
+ "timeOnPlatform": round(max(0, np.random.normal(8, 3)), 1),
2033
+ }
2034
+ students.append(student)
2035
+
2036
+ # Generate quiz history for this student
2037
+ base_time = datetime(2025, 9, 1)
2038
+ for j in range(num_quizzes):
2039
+ topic = random.choice(topics)
2040
+ days_offset = random.randint(0, 150)
2041
+ quiz_date = base_time + timedelta(days=days_offset)
2042
+
2043
+ # Score based on archetype with progression
2044
+ if archetype == "improving":
2045
+ base_score = 40 + (j / num_quizzes) * 40
2046
+ elif archetype == "declining":
2047
+ base_score = 80 - (j / num_quizzes) * 35
2048
+ elif archetype == "perfect":
2049
+ base_score = 90
2050
+ elif archetype == "struggling":
2051
+ base_score = 30
2052
+ elif archetype == "inconsistent":
2053
+ base_score = random.choice([30, 50, 70, 90])
2054
+ else: # average
2055
+ base_score = 65
2056
+
2057
+ score = max(0, min(100, base_score + np.random.normal(0, 8)))
2058
+ total_questions = random.choice([10, 15, 20])
2059
+ correct = round(total_questions * score / 100)
2060
+ time_per_q = max(10, np.random.normal(60 if score > 70 else 90, 20))
2061
+
2062
+ quiz_entry = {
2063
+ "studentId": student_id,
2064
+ "topicId": topic,
2065
+ "topic": topic,
2066
+ "score": correct,
2067
+ "total": total_questions,
2068
+ "correct": correct >= total_questions * 0.5,
2069
+ "timeTaken": round(time_per_q * total_questions),
2070
+ "timeSpent": round(time_per_q),
2071
+ "attempts": random.choice([1, 1, 1, 2, 2, 3]) if score < 60 else 1,
2072
+ "completedAt": quiz_date.isoformat(),
2073
+ "timestamp": quiz_date.isoformat(),
2074
+ "questionId": f"q_{topic.replace(' ', '_').lower()}_{j}",
2075
+ }
2076
+ all_quiz_data.append(quiz_entry)
2077
+
2078
+ return {
2079
+ "students": students,
2080
+ "quizHistory": all_quiz_data,
2081
+ "metadata": {
2082
+ "numStudents": num_students,
2083
+ "numQuizzes": num_quizzes,
2084
+ "archetypeDistribution": {
2085
+ archetype: sum(1 for s in students if s["archetype"] == archetype)
2086
+ for archetype in ["perfect", "struggling", "inconsistent", "improving", "declining", "average"]
2087
+ },
2088
+ "topicsCovered": topics,
2089
+ "generatedAt": datetime.utcnow().isoformat(),
2090
+ },
2091
+ }
2092
+
2093
+
2094
+ # ─── Cache Management ─────────────────────────────────────────
2095
+
2096
+
2097
+ def refresh_all_caches() -> RefreshCacheResponse:
2098
+ """Clear and refresh all in-memory caches."""
2099
+ _competency_cache.clear()
2100
+ _class_stats_cache.clear()
2101
+ _difficulty_cache.clear()
2102
+ _risk_model_cache.clear()
2103
+
2104
+ logger.info("All analytics caches cleared")
2105
+
2106
+ return RefreshCacheResponse(
2107
+ status="caches_cleared",
2108
+ cachedItems=0,
2109
+ timestamp=datetime.utcnow().isoformat(),
2110
+ )
automation_engine.py ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MathPulse AI - Event-Driven Automation Engine
3
+
4
+ Processes educational workflows based on a diagnostic-first, risk-driven
5
+ intervention model. Trigger points:
6
+
7
+ 1. Diagnostic Assessment Completion (highest priority)
8
+ 2. Quiz / Assessment Submission (continuous)
9
+ 3. New Student Enrollment
10
+ 4. External Data Import (teacher action)
11
+ 5. Admin Content Updates
12
+
13
+ Each event is routed to a dedicated handler that orchestrates
14
+ classification, quiz generation, notifications and dashboard updates.
15
+ """
16
+
17
+ import math
18
+ import logging
19
+ from typing import List, Optional, Dict, Any
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+ logger = logging.getLogger("mathpulse.automation")
24
+
25
+ # ─── Constants ──────────────────────────────────────────────────
26
+
27
+ AT_RISK_THRESHOLD = 60 # < 60 % → At Risk
28
+ WEAK_TOPIC_THRESHOLD = 0.50 # < 50 % accuracy → weak topic
29
+ HIGH_RISK_RATIO = 0.75 # 75 %+ subjects at risk
30
+ MEDIUM_RISK_RATIO = 0.50 # 50-75 %
31
+
32
+ REMEDIAL_CONFIG = {
33
+ "High": {"questions": 15, "dist": {"easy": 60, "medium": 30, "hard": 10}},
34
+ "Medium": {"questions": 12, "dist": {"easy": 50, "medium": 35, "hard": 15}},
35
+ "Low": {"questions": 10, "dist": {"easy": 40, "medium": 40, "hard": 20}},
36
+ }
37
+
38
+ # ─── Request / Response Models ──────────────────────────────────
39
+
40
+
41
+ class DiagnosticResult(BaseModel):
42
+ """Per-subject score from diagnostic assessment."""
43
+ subject: str
44
+ score: float = Field(..., ge=0, le=100)
45
+
46
+
47
+ class DiagnosticCompletionPayload(BaseModel):
48
+ """Payload sent when a student completes the diagnostic."""
49
+ studentId: str
50
+ results: List[DiagnosticResult]
51
+ gradeLevel: str = "Grade 10"
52
+ questionBreakdown: Optional[Dict[str, list]] = None # topic → [{correct: bool, …}]
53
+
54
+
55
+ class QuizSubmissionPayload(BaseModel):
56
+ """Payload sent on quiz / assessment submission."""
57
+ studentId: str
58
+ quizId: str
59
+ subject: str
60
+ score: float = Field(..., ge=0, le=100)
61
+ totalQuestions: int
62
+ correctAnswers: int
63
+ timeSpentSeconds: int
64
+ answers: Optional[List[Dict[str, Any]]] = None
65
+
66
+
67
+ class StudentEnrollmentPayload(BaseModel):
68
+ """Payload sent when a new student account is created."""
69
+ studentId: str
70
+ name: str
71
+ email: str
72
+ gradeLevel: str = "Grade 10"
73
+ teacherId: Optional[str] = None
74
+
75
+
76
+ class DataImportPayload(BaseModel):
77
+ """Payload sent after a teacher uploads a spreadsheet."""
78
+ teacherId: str
79
+ students: List[Dict[str, Any]] # parsed student rows
80
+ columnMapping: Dict[str, str]
81
+
82
+
83
+ class ContentUpdatePayload(BaseModel):
84
+ """Payload sent when admin performs CRUD on curriculum."""
85
+ adminId: str
86
+ action: str # create | update | delete
87
+ contentType: str # lesson | quiz | module | subject
88
+ contentId: str
89
+ subjectId: Optional[str] = None
90
+ details: Optional[str] = None
91
+
92
+
93
+ # ─── Risk classification helpers ─────────────────────────────────
94
+
95
+
96
+ class SubjectRiskClassification(BaseModel):
97
+ status: str # "At Risk" | "On Track"
98
+ score: float
99
+ confidence: float
100
+ needsIntervention: bool
101
+
102
+
103
+ class AutomationResult(BaseModel):
104
+ """Standardised result returned by every handler."""
105
+ success: bool
106
+ event: str
107
+ studentId: Optional[str] = None
108
+ message: str
109
+ riskClassifications: Optional[Dict[str, Dict[str, Any]]] = None
110
+ overallRisk: Optional[str] = None
111
+ atRiskSubjects: Optional[List[str]] = None
112
+ weakTopics: Optional[List[Dict[str, Any]]] = None
113
+ learningPath: Optional[str] = None
114
+ remedialQuizzesCreated: int = 0
115
+ interventions: Optional[str] = None
116
+ notifications: List[str] = Field(default_factory=list)
117
+
118
+
119
+ # ─── Automation Engine ──────────────────────────────────────────
120
+
121
+
122
+ class MathPulseAutomationEngine:
123
+ """
124
+ Stateless event-driven automation system.
125
+
126
+ Each ``handle_*`` method is an independent, self-contained handler that
127
+ receives a validated Pydantic payload and returns an ``AutomationResult``.
128
+ Firebase / Hugging Face calls are only attempted when available.
129
+ """
130
+
131
+ # ────────────────────────────────────────────────────────────
132
+ # 1. DIAGNOSTIC COMPLETION (highest-priority)
133
+ # ────────────────────────────────────────────────────────────
134
+
135
+ async def handle_diagnostic_completion(
136
+ self, payload: DiagnosticCompletionPayload
137
+ ) -> AutomationResult:
138
+ """
139
+ Runs when a student completes the mandatory diagnostic.
140
+
141
+ Steps:
142
+ 1. Classify per-subject risk
143
+ 2. Identify weak topics
144
+ 3. Compute overall risk
145
+ 4. Generate personalised learning path (AI)
146
+ 5. Create remedial quiz assignments
147
+ 6. Generate teacher intervention recommendations (AI)
148
+ 7. Persist everything & notify
149
+ """
150
+ student_id = payload.studentId
151
+ logger.info(f"📊 DIAGNOSTIC COMPLETED for {student_id}")
152
+ notifications: list[str] = []
153
+
154
+ # 1 — subject-level risk
155
+ risk_classifications = self._classify_subject_risks(payload.results)
156
+
157
+ # 2 — weak topics
158
+ weak_topics = self._identify_weak_topics(payload.questionBreakdown)
159
+
160
+ # 3 — overall risk
161
+ overall_risk = self._calculate_overall_risk(risk_classifications)
162
+
163
+ at_risk_subjects = [
164
+ subj for subj, data in risk_classifications.items()
165
+ if data["status"] == "At Risk"
166
+ ]
167
+
168
+ # 4 — learning path (AI call)
169
+ learning_path: Optional[str] = None
170
+ if at_risk_subjects:
171
+ learning_path = await self._generate_learning_path(
172
+ at_risk_subjects, weak_topics, payload.gradeLevel
173
+ )
174
+
175
+ # 5 — remedial quizzes
176
+ remedial_count = 0
177
+ remedial_quizzes: list[dict] = []
178
+ if at_risk_subjects:
179
+ remedial_quizzes = self._build_remedial_quiz_configs(
180
+ student_id, at_risk_subjects, overall_risk, payload.gradeLevel
181
+ )
182
+ remedial_count = len(remedial_quizzes)
183
+
184
+ # 6 — teacher interventions (AI call)
185
+ interventions: Optional[str] = None
186
+ if at_risk_subjects:
187
+ interventions = await self._generate_teacher_interventions(
188
+ risk_classifications, weak_topics
189
+ )
190
+
191
+ # 7 — notification messages
192
+ if at_risk_subjects:
193
+ notifications.append(
194
+ f"Diagnostic complete — {len(at_risk_subjects)} subject(s) flagged At Risk: "
195
+ + ", ".join(at_risk_subjects)
196
+ )
197
+ else:
198
+ notifications.append("Diagnostic complete — all subjects On Track!")
199
+
200
+ logger.info(
201
+ f"✅ DIAGNOSTIC PROCESSING COMPLETE for {student_id} | "
202
+ f"Overall={overall_risk} | AtRisk={at_risk_subjects}"
203
+ )
204
+
205
+ return AutomationResult(
206
+ success=True,
207
+ event="diagnostic_completed",
208
+ studentId=student_id,
209
+ message=f"Diagnostic processed for {student_id}",
210
+ riskClassifications=risk_classifications,
211
+ overallRisk=overall_risk,
212
+ atRiskSubjects=at_risk_subjects,
213
+ weakTopics=weak_topics,
214
+ learningPath=learning_path,
215
+ remedialQuizzesCreated=remedial_count,
216
+ interventions=interventions,
217
+ notifications=notifications,
218
+ )
219
+
220
+ # ────────────────────────────────────────────────────────────
221
+ # 2. QUIZ SUBMISSION (continuous)
222
+ # ────────────────────────────────────────────────────────────
223
+
224
+ async def handle_quiz_submission(
225
+ self, payload: QuizSubmissionPayload
226
+ ) -> AutomationResult:
227
+ """Recalculate risk for a subject after a quiz is submitted."""
228
+ student_id = payload.studentId
229
+ logger.info(f"📝 QUIZ SUBMITTED by {student_id} — {payload.subject} ({payload.score}%)")
230
+ notifications: list[str] = []
231
+
232
+ # Determine new status for this subject
233
+ new_status = "At Risk" if payload.score < AT_RISK_THRESHOLD else "On Track"
234
+ confidence = (
235
+ (AT_RISK_THRESHOLD - payload.score) / AT_RISK_THRESHOLD
236
+ if new_status == "At Risk"
237
+ else (payload.score - AT_RISK_THRESHOLD) / (100 - AT_RISK_THRESHOLD)
238
+ )
239
+
240
+ risk_classifications = {
241
+ payload.subject: {
242
+ "status": new_status,
243
+ "score": payload.score,
244
+ "confidence": round(abs(confidence), 2),
245
+ "needsIntervention": new_status == "At Risk",
246
+ }
247
+ }
248
+
249
+ at_risk = [payload.subject] if new_status == "At Risk" else []
250
+
251
+ if new_status == "At Risk":
252
+ notifications.append(
253
+ f"Quiz result: {payload.subject} scored {payload.score}% — status changed to At Risk"
254
+ )
255
+ else:
256
+ notifications.append(
257
+ f"Quiz result: {payload.subject} scored {payload.score}% — On Track"
258
+ )
259
+
260
+ return AutomationResult(
261
+ success=True,
262
+ event="quiz_submitted",
263
+ studentId=student_id,
264
+ message=f"Quiz processed for {student_id}",
265
+ riskClassifications=risk_classifications,
266
+ overallRisk=None, # single-subject update — overall recalculated on frontend
267
+ atRiskSubjects=at_risk,
268
+ notifications=notifications,
269
+ )
270
+
271
+ # ────────────────────────────────────────────────────────────
272
+ # 3. STUDENT ENROLLMENT
273
+ # ────────────────────────────────────────────────────────────
274
+
275
+ async def handle_student_enrollment(
276
+ self, payload: StudentEnrollmentPayload
277
+ ) -> AutomationResult:
278
+ """
279
+ Prepare a new student:
280
+ - Create empty progress record skeleton
281
+ - Initialise gamification (XP 0, Level 1, no streaks)
282
+ - Flag as needing diagnostic
283
+ """
284
+ student_id = payload.studentId
285
+ logger.info(f"🆕 NEW STUDENT ENROLLED: {student_id}")
286
+
287
+ notifications: list[str] = [
288
+ f"Welcome {payload.name}! Please complete the diagnostic assessment to personalise your learning path.",
289
+ ]
290
+
291
+ if payload.teacherId:
292
+ notifications.append(
293
+ f"New student {payload.name} enrolled — diagnostic pending."
294
+ )
295
+
296
+ return AutomationResult(
297
+ success=True,
298
+ event="student_enrolled",
299
+ studentId=student_id,
300
+ message=f"Student {payload.name} enrolled and initialised",
301
+ notifications=notifications,
302
+ )
303
+
304
+ # ────────────────────────────────────────────────────────────
305
+ # 4. DATA IMPORT (teacher action)
306
+ # ────────────────────────────────────────────────────────────
307
+
308
+ async def handle_data_import(
309
+ self, payload: DataImportPayload
310
+ ) -> AutomationResult:
311
+ """
312
+ After a teacher uploads a spreadsheet, recalculate risk for every
313
+ imported student and flag any status changes.
314
+ """
315
+ logger.info(f"📂 DATA IMPORT by teacher {payload.teacherId} — {len(payload.students)} students")
316
+ notifications: list[str] = []
317
+ high_risk_students: list[str] = []
318
+ medium_risk_count = 0
319
+ low_risk_count = 0
320
+ weak_topic_counts: Dict[str, int] = {}
321
+
322
+ for student_row in payload.students:
323
+ name = str(student_row.get("name") or "Unknown").strip() or "Unknown"
324
+ avg_score = self._safe_float(student_row.get("avgQuizScore"), 0.0)
325
+ attendance = self._safe_float(student_row.get("attendance"), 0.0)
326
+ engagement = self._safe_float(student_row.get("engagementScore"), 0.0)
327
+ completion_raw = student_row.get("assignmentCompletion")
328
+ completion = (
329
+ self._safe_float(completion_raw, 0.0)
330
+ if completion_raw not in (None, "")
331
+ else None
332
+ )
333
+
334
+ risk_level = self._classify_import_risk(
335
+ avg_score=avg_score,
336
+ attendance=attendance,
337
+ engagement=engagement,
338
+ completion=completion,
339
+ )
340
+ if risk_level == "High":
341
+ high_risk_students.append(name)
342
+ elif risk_level == "Medium":
343
+ medium_risk_count += 1
344
+ else:
345
+ low_risk_count += 1
346
+
347
+ topic_label = self._extract_import_topic(student_row)
348
+ if topic_label:
349
+ weak_topic_counts[topic_label] = weak_topic_counts.get(topic_label, 0) + 1
350
+
351
+ if high_risk_students:
352
+ notifications.append(
353
+ f"Data import flagged {len(high_risk_students)} high-risk student(s): "
354
+ + ", ".join(high_risk_students[:5])
355
+ + ("..." if len(high_risk_students) > 5 else "")
356
+ )
357
+
358
+ notifications.append(
359
+ "Risk interpretation summary — "
360
+ f"High: {len(high_risk_students)}, Medium: {medium_risk_count}, Low: {low_risk_count}."
361
+ )
362
+
363
+ if weak_topic_counts:
364
+ top_topics = sorted(
365
+ weak_topic_counts.items(),
366
+ key=lambda item: (-item[1], item[0]),
367
+ )[:3]
368
+ notifications.append(
369
+ "Most frequent weak-topic signals: "
370
+ + ", ".join(f"{topic} ({count})" for topic, count in top_topics)
371
+ )
372
+
373
+ notifications.append(
374
+ f"Data import complete — {len(payload.students)} student records processed."
375
+ )
376
+
377
+ return AutomationResult(
378
+ success=True,
379
+ event="data_imported",
380
+ studentId=None,
381
+ message=f"Data import processed for {len(payload.students)} students",
382
+ atRiskSubjects=None,
383
+ notifications=notifications,
384
+ )
385
+
386
+ # ────────────────────────────────────────────────────────────
387
+ # 5. CONTENT UPDATE (admin action)
388
+ # ────────────────────────────────────────────────────────────
389
+
390
+ async def handle_content_update(
391
+ self, payload: ContentUpdatePayload
392
+ ) -> AutomationResult:
393
+ """
394
+ After admin CRUD on curriculum, log & notify.
395
+ """
396
+ logger.info(
397
+ f"📚 CONTENT UPDATE by admin {payload.adminId}: "
398
+ f"{payload.action} {payload.contentType} {payload.contentId}"
399
+ )
400
+ notifications: list[str] = [
401
+ f"Curriculum update: {payload.action}d {payload.contentType} "
402
+ f"({payload.contentId}). Teachers may want to review affected quizzes.",
403
+ ]
404
+
405
+ return AutomationResult(
406
+ success=True,
407
+ event="content_updated",
408
+ studentId=None,
409
+ message=f"Content {payload.action} processed for {payload.contentType}",
410
+ notifications=notifications,
411
+ )
412
+
413
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
414
+ # INTERNAL HELPERS
415
+ # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
416
+
417
+ # --- risk classification ---
418
+
419
+ @staticmethod
420
+ def _safe_float(value: Any, default: float = 0.0) -> float:
421
+ try:
422
+ parsed = float(value)
423
+ if math.isnan(parsed) or math.isinf(parsed):
424
+ return default
425
+ return parsed
426
+ except (TypeError, ValueError):
427
+ return default
428
+
429
+ @staticmethod
430
+ def _classify_import_risk(
431
+ *,
432
+ avg_score: float,
433
+ attendance: float,
434
+ engagement: float,
435
+ completion: Optional[float],
436
+ ) -> str:
437
+ high_flags = int(avg_score < 60) + int(attendance < 75) + int(engagement < 55)
438
+ medium_flags = int(avg_score < 75) + int(attendance < 85) + int(engagement < 70)
439
+
440
+ if completion is not None:
441
+ high_flags += int(completion < 60)
442
+ medium_flags += int(completion < 75)
443
+
444
+ if high_flags >= 2 or (avg_score < 55 and (attendance < 80 or engagement < 65)):
445
+ return "High"
446
+ if medium_flags >= 2:
447
+ return "Medium"
448
+ return "Low"
449
+
450
+ @staticmethod
451
+ def _extract_import_topic(student_row: Dict[str, Any]) -> Optional[str]:
452
+ explicit_topic = str(student_row.get("weakestTopic") or "").strip()
453
+ if explicit_topic:
454
+ return explicit_topic
455
+
456
+ assessment_name = str(student_row.get("assessmentName") or "").strip()
457
+ if assessment_name and assessment_name.lower() != "general-assessment":
458
+ return assessment_name
459
+
460
+ return None
461
+
462
+ @staticmethod
463
+ def _classify_subject_risks(
464
+ results: List[DiagnosticResult],
465
+ ) -> Dict[str, Dict[str, Any]]:
466
+ """Classify each subject as 'At Risk' or 'On Track'."""
467
+ classifications: Dict[str, Dict[str, Any]] = {}
468
+ for r in results:
469
+ if r.score < AT_RISK_THRESHOLD:
470
+ status = "At Risk"
471
+ confidence = round((AT_RISK_THRESHOLD - r.score) / AT_RISK_THRESHOLD, 2)
472
+ else:
473
+ status = "On Track"
474
+ confidence = round(
475
+ (r.score - AT_RISK_THRESHOLD) / (100 - AT_RISK_THRESHOLD), 2
476
+ )
477
+ classifications[r.subject] = {
478
+ "status": status,
479
+ "score": r.score,
480
+ "confidence": confidence,
481
+ "needsIntervention": status == "At Risk",
482
+ }
483
+ return classifications
484
+
485
+ @staticmethod
486
+ def _identify_weak_topics(
487
+ question_breakdown: Optional[Dict[str, list]],
488
+ ) -> List[Dict[str, Any]]:
489
+ """
490
+ Drill into per-topic accuracy from diagnostic question-level data.
491
+ Returns topics sorted weakest-first.
492
+ """
493
+ if not question_breakdown:
494
+ return []
495
+
496
+ weak: list[dict] = []
497
+ for topic, questions in question_breakdown.items():
498
+ if not questions:
499
+ continue
500
+ correct_count = sum(1 for q in questions if q.get("correct"))
501
+ accuracy = correct_count / len(questions)
502
+ if accuracy < WEAK_TOPIC_THRESHOLD:
503
+ weak.append({
504
+ "topic": topic,
505
+ "accuracy": round(accuracy, 2),
506
+ "questionsAttempted": len(questions),
507
+ "priority": "high" if accuracy < 0.3 else "medium",
508
+ })
509
+ weak.sort(key=lambda x: x["accuracy"])
510
+ return weak
511
+
512
+ @staticmethod
513
+ def _calculate_overall_risk(
514
+ classifications: Dict[str, Dict[str, Any]],
515
+ ) -> str:
516
+ total = len(classifications)
517
+ if total == 0:
518
+ return "Low"
519
+ at_risk_count = sum(
520
+ 1 for d in classifications.values() if d["status"] == "At Risk"
521
+ )
522
+ ratio = at_risk_count / total
523
+ if ratio >= HIGH_RISK_RATIO:
524
+ return "High"
525
+ elif ratio >= MEDIUM_RISK_RATIO:
526
+ return "Medium"
527
+ return "Low"
528
+
529
+ # --- remedial quiz configs ---
530
+
531
+ @staticmethod
532
+ def _build_remedial_quiz_configs(
533
+ student_id: str,
534
+ at_risk_subjects: List[str],
535
+ overall_risk: str,
536
+ grade_level: str,
537
+ ) -> List[Dict[str, Any]]:
538
+ """Return list of quiz configuration dicts ready for persistence."""
539
+ cfg = REMEDIAL_CONFIG.get(overall_risk, REMEDIAL_CONFIG["Low"])
540
+ quizzes: list[dict] = []
541
+ for subject in at_risk_subjects:
542
+ quizzes.append({
543
+ "studentId": student_id,
544
+ "subject": subject,
545
+ "quizConfig": {
546
+ "topics": [subject],
547
+ "gradeLevel": grade_level,
548
+ "numQuestions": cfg["questions"],
549
+ "questionTypes": [
550
+ "identification",
551
+ "enumeration",
552
+ "multiple_choice",
553
+ "word_problem",
554
+ ],
555
+ "difficultyDistribution": cfg["dist"],
556
+ "bloomLevels": ["remember", "understand", "apply"],
557
+ "includeGraphs": False,
558
+ "excludeTopics": [],
559
+ "purpose": "remedial",
560
+ "targetStudent": student_id,
561
+ },
562
+ "status": "pending",
563
+ "autoGenerated": True,
564
+ "reason": f'Diagnostic identified "{subject}" as At Risk',
565
+ "priority": "high" if overall_risk == "High" else "medium",
566
+ "dueInDays": 7,
567
+ })
568
+ return quizzes
569
+
570
+ # --- AI helpers (Hugging Face) ---
571
+
572
+ async def _generate_learning_path(
573
+ self,
574
+ at_risk_subjects: List[str],
575
+ weak_topics: List[Dict[str, Any]],
576
+ grade_level: str,
577
+ ) -> Optional[str]:
578
+ """Generate a personalised learning path via HF Serverless Inference."""
579
+ try:
580
+ from main import call_hf_chat
581
+
582
+ weakness_lines = ", ".join(at_risk_subjects)
583
+ topic_lines = "\n".join(
584
+ f" - {t['topic']} ({t['accuracy']*100:.0f}% accuracy)"
585
+ for t in weak_topics[:5]
586
+ )
587
+
588
+ prompt = (
589
+ f"Generate a personalised math learning path for a {grade_level} student.\n\n"
590
+ f"Weak subjects: {weakness_lines}\n"
591
+ f"Weak topics:\n{topic_lines}\n\n"
592
+ "Create 5-7 specific activities. For each give:\n"
593
+ "1. Activity title\n"
594
+ "2. Brief description (1-2 sentences)\n"
595
+ "3. Estimated duration\n"
596
+ "4. Type (video, practice, quiz, reading, interactive)\n\n"
597
+ "Format as a numbered list. Be specific."
598
+ )
599
+
600
+ return call_hf_chat(
601
+ messages=[
602
+ {
603
+ "role": "system",
604
+ "content": (
605
+ "You are an educational curriculum expert specialising in "
606
+ "mathematics. Create clear, actionable learning paths."
607
+ ),
608
+ },
609
+ {"role": "user", "content": prompt},
610
+ ],
611
+ max_tokens=1500,
612
+ temperature=0.7,
613
+ )
614
+ except Exception as e:
615
+ logger.warning(f"Learning-path AI call failed: {e}")
616
+ return None
617
+
618
+ async def _generate_teacher_interventions(
619
+ self,
620
+ risk_classifications: Dict[str, Dict[str, Any]],
621
+ weak_topics: List[Dict[str, Any]],
622
+ ) -> Optional[str]:
623
+ """Generate teacher intervention recommendations via HF Serverless Inference."""
624
+ try:
625
+ from main import call_hf_chat
626
+
627
+ at_risk = [
628
+ subj for subj, data in risk_classifications.items()
629
+ if data["status"] == "At Risk"
630
+ ]
631
+ topic_lines = "\n".join(
632
+ f"- {t['topic']} ({t['accuracy']*100:.0f}% accuracy)"
633
+ for t in weak_topics[:5]
634
+ )
635
+
636
+ prompt = (
637
+ "You are an educational intervention specialist. A student has completed "
638
+ "their diagnostic assessment with the following results:\n\n"
639
+ f"At-Risk Subjects: {', '.join(at_risk)}\n\n"
640
+ f"Weak Topics Identified:\n{topic_lines}\n\n"
641
+ "Generate a 'Remedial Path Timeline' with:\n"
642
+ "1. Prioritised list of topics to address (most critical first)\n"
643
+ "2. Suggested teaching strategies for each topic\n"
644
+ "3. Recommended one-on-one intervention activities\n"
645
+ "4. Timeline for reassessment\n"
646
+ "5. Warning signs that student needs additional support\n\n"
647
+ "Keep response under 300 words, structured with clear sections."
648
+ )
649
+
650
+ return call_hf_chat(
651
+ messages=[
652
+ {
653
+ "role": "system",
654
+ "content": (
655
+ "You are an expert educational intervention specialist. "
656
+ "Provide actionable, structured recommendations for teachers."
657
+ ),
658
+ },
659
+ {"role": "user", "content": prompt},
660
+ ],
661
+ max_tokens=1000,
662
+ temperature=0.5,
663
+ )
664
+ except Exception as e:
665
+ logger.warning(f"Teacher-intervention AI call failed: {e}")
666
+ return None
667
+
668
+
669
+ # Module-level singleton
670
+ automation_engine = MathPulseAutomationEngine()
config/env.sample ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepSeek AI API (OpenAI-compatible)
2
+ DEEPSEEK_API_KEY=your_deepseek_api_key_here
3
+ DEEPSEEK_BASE_URL=https://api.deepseek.com
4
+ DEEPSEEK_MODEL=deepseek-chat
5
+ DEEPSEEK_REASONER_MODEL=deepseek-reasoner
6
+
7
+ # Inference provider selection
8
+ # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed.
9
+ INFERENCE_PROVIDER=deepseek
10
+ INFERENCE_PRO_ENABLED=true
11
+ INFERENCE_PRO_PROVIDER=deepseek
12
+ INFERENCE_GPU_PROVIDER=deepseek
13
+ INFERENCE_CPU_PROVIDER=deepseek
14
+ INFERENCE_ENABLE_PROVIDER_FALLBACK=true
15
+ INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution
16
+ INFERENCE_PRO_ROUTE_HEADER_NAME=
17
+ INFERENCE_PRO_ROUTE_HEADER_VALUE=true
18
+
19
+ # task policy sets, comma-separated
20
+ INFERENCE_GPU_REQUIRED_TASKS=chat
21
+ INFERENCE_CPU_ONLY_TASKS=risk_classification,analytics_aggregation,file_parsing,auth,default_cpu
22
+ INFERENCE_INTERACTIVE_TASKS=chat,verify_solution,daily_insight
23
+ ENABLE_LLM_RISK_RECOMMENDATIONS=true
24
+
25
+ # local_space provider settings
26
+ # Accepts either runtime host (https://<owner>-<space>.hf.space) or
27
+ # Space page URL (https://huggingface.co/spaces/<owner>/<space>).
28
+ # Example: https://huggingface.co/spaces/Deign86/mathpulse-ai
29
+ INFERENCE_LOCAL_SPACE_URL=http://127.0.0.1:7860
30
+ INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate
31
+ INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180
32
+
33
+ # HF_TOKEN kept for Hugging Face Space deployment and dataset push only
34
+ # Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN
35
+ HF_TOKEN=your_hf_token
36
+ FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026
37
+ # Prefer one of the options below for backend Firestore/Admin access in deployment:
38
+ # FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...}
39
+ # FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json
40
+ # DeepSeek timeout settings
41
+ INFERENCE_HF_TIMEOUT_SEC=90
42
+ INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
43
+ INFERENCE_BACKGROUND_TIMEOUT_SEC=120
44
+
45
+ # Curriculum PDF storage
46
+ # Store the binary curriculum files in a Hugging Face dataset or Space repo,
47
+ # then point the backend at that repo so it downloads them at build/startup time.
48
+ CURRICULUM_SOURCE_REPO_ID=Deign86/mathpulse-curriculum
49
+ CURRICULUM_SOURCE_REPO_TYPE=dataset
50
+ CURRICULUM_SOURCE_REVISION=main
51
+
52
+ # Transactional email settings for admin-created accounts
53
+ # Primary provider: Brevo Transactional API
54
+ BREVO_API_KEY=
55
+ # Optional: Brevo MCP token (base64 JSON containing api_key) if BREVO_API_KEY is not set
56
+ BREVO_MCP_TOKEN=
57
+ # Optional SMTP fallback provider (Brevo SMTP relay)
58
+ BREVO_SMTP_LOGIN=
59
+ BREVO_SMTP_KEY=
60
+ BREVO_SMTP_HOST=smtp-relay.brevo.com
61
+ BREVO_SMTP_PORT=587
62
+ MAIL_FROM_ADDRESS=noreply@mathpulse.ai
63
+ MAIL_FROM_NAME=MathPulse AI
64
+ MAIL_SEND_TIMEOUT_SEC=15
65
+ APP_LOGIN_URL=https://mathpulse.ai
66
+ # Optional: absolute http(s) URL used as the email header avatar image.
67
+ # If unset, backend derives this from APP_LOGIN_URL + /avatar/avatar_icon.png.
68
+ APP_BRAND_AVATAR_URL=
69
+
70
+ # model defaults
71
+ # Global default model for all tasks.
72
+ INFERENCE_MODEL_ID=deepseek-chat
73
+ INFERENCE_ENFORCE_QWEN_ONLY=true
74
+ INFERENCE_QWEN_LOCK_MODEL=deepseek-chat
75
+ INFERENCE_MAX_NEW_TOKENS=8192
76
+ INFERENCE_TEMPERATURE=0.2
77
+ INFERENCE_TOP_P=0.9
78
+ INFERENCE_CHAT_MODEL_ID=deepseek-chat
79
+ # Temporary chat-only override for experiments (clear to roll back instantly).
80
+ # Example: Qwen/Qwen3-32B
81
+ INFERENCE_CHAT_MODEL_TEMP_OVERRIDE=
82
+ INFERENCE_CHAT_STRICT_MODEL_ONLY=true
83
+ INFERENCE_CHAT_HARD_MODEL_ID=meta-llama/Meta-Llama-3-70B-Instruct
84
+ INFERENCE_CHAT_HARD_TRIGGER_ENABLED=false
85
+ INFERENCE_CHAT_HARD_PROMPT_CHARS=650
86
+ INFERENCE_CHAT_HARD_HISTORY_CHARS=1500
87
+ INFERENCE_CHAT_HARD_KEYWORDS=step-by-step,show all steps,explain each step,justify each step,derive,derivation,proof,prove,rigorous,multi-step,word problem
88
+ CHAT_MAX_NEW_TOKENS=8192
89
+ CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC=90
90
+ CHAT_STREAM_TOTAL_TIMEOUT_SEC=900
91
+ CHAT_STREAM_CONTINUATION_ENABLED=true
92
+ CHAT_STREAM_CONTINUATION_MAX_ROUNDS=2
93
+ CHAT_STREAM_CONTINUATION_MIN_NEW_CHARS=24
94
+ CHAT_STREAM_CONTINUATION_TAIL_CHARS=900
95
+ CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto
96
+ # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
97
+ HF_QUIZ_MODEL_ID=
98
+ HF_QUIZ_JSON_REPAIR_MODEL_ID=deepseek-chat
99
+
100
+ # retry behavior
101
+ INFERENCE_MAX_RETRIES=3
102
+ INFERENCE_BACKOFF_SEC=1.5
103
+ INFERENCE_INTERACTIVE_MAX_RETRIES=1
104
+ INFERENCE_BACKGROUND_MAX_RETRIES=3
105
+ INFERENCE_INTERACTIVE_BACKOFF_SEC=1.0
106
+ INFERENCE_BACKGROUND_BACKOFF_SEC=1.75
107
+ INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH=1
108
+ # Max simultaneous blocking HF calls allowed from async endpoints.
109
+ HF_BLOCKING_CALL_CONCURRENCY=16
110
+ HF_ASYNC_MAX_CONNECTIONS=64
111
+ HF_ASYNC_MAX_KEEPALIVE_CONNECTIONS=32
112
+ HF_ASYNC_CONNECT_TIMEOUT_SEC=10.0
113
+ HF_ASYNC_WRITE_TIMEOUT_SEC=30.0
114
+ HF_ASYNC_POOL_TIMEOUT_SEC=10.0
115
+
116
+ # fallback model ids, comma-separated
117
+ INFERENCE_FALLBACK_MODELS=
118
+
119
+ # async generation controls
120
+ ENABLE_ASYNC_GENERATION=true
121
+ ASYNC_TASK_TTL_SECONDS=3600
122
+ ASYNC_TASK_MAX_ITEMS=400
config/models.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models:
2
+ primary:
3
+ id: deepseek-chat
4
+ description: Default DeepSeek chat model — all chat tasks, quizzes, lessons, reasoning
5
+ max_new_tokens: 800
6
+ temperature: 0.7
7
+ top_p: 0.9
8
+
9
+ rag_primary:
10
+ id: deepseek-reasoner
11
+ description: DeepSeek reasoner — extended reasoning for complex RAG tasks
12
+ max_new_tokens: 1800
13
+ temperature: 0.2
14
+ top_p: 0.9
15
+ enable_thinking_tasks:
16
+ - rag_lesson
17
+ - verify_solution
18
+ - risk_narrative
19
+ no_thinking_tasks:
20
+ - chat
21
+ - quiz_generation
22
+ - learning_path
23
+ - daily_insight
24
+
25
+ embedding:
26
+ id: BAAI/bge-small-en-v1.5
27
+ description: Embedding model for RAG retrieval — curriculum vectorstore ingestion and semantic search
28
+ note: Not part of the generation pipeline. Read from EMBEDDING_MODEL env var only. Not swappable via admin panel.
29
+
30
+ model_capabilities:
31
+ sequential_only:
32
+ - deepseek-reasoner
33
+ supports_thinking:
34
+ - deepseek-reasoner
35
+
36
+ routing:
37
+ task_model_map:
38
+ chat: deepseek-chat
39
+ verify_solution: deepseek-reasoner
40
+ lesson_generation: deepseek-chat
41
+ quiz_generation: deepseek-chat
42
+ learning_path: deepseek-chat
43
+ daily_insight: deepseek-chat
44
+ risk_classification: deepseek-chat
45
+ risk_narrative: deepseek-reasoner
46
+ rag_lesson: deepseek-reasoner
47
+ rag_problem: deepseek-chat
48
+ rag_analysis_context: deepseek-chat
49
+
50
+ task_fallback_model_map:
51
+ chat:
52
+ - deepseek-chat
53
+ verify_solution:
54
+ - deepseek-chat
55
+ lesson_generation:
56
+ - deepseek-chat
57
+ quiz_generation:
58
+ - deepseek-chat
59
+ learning_path:
60
+ - deepseek-chat
61
+ daily_insight:
62
+ - deepseek-chat
63
+ risk_classification:
64
+ - deepseek-chat
65
+ risk_narrative:
66
+ - deepseek-chat
67
+ rag_lesson:
68
+ - deepseek-chat
69
+ rag_problem:
70
+ - deepseek-chat
71
+ rag_analysis_context:
72
+ - deepseek-chat
73
+
74
+ task_provider_map:
75
+ chat: deepseek
76
+ verify_solution: deepseek
77
+ lesson_generation: deepseek
78
+ quiz_generation: deepseek
79
+ learning_path: deepseek
80
+ daily_insight: deepseek
81
+ risk_classification: deepseek
82
+ risk_narrative: deepseek
83
+ rag_lesson: deepseek
84
+ rag_problem: deepseek
85
+ rag_analysis_context: deepseek
datasets/curriculum/.gitkeep ADDED
File without changes
datasets/sample_curriculum.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "content": "The learner demonstrates understanding of key concepts of functions. Functions can be represented as ordered pairs, tables of values, graphs, and equations. A function is a relation where each element in the domain corresponds to exactly one element in the range. Key types include linear functions (f(x)=mx+b), quadratic functions (f(x)=ax^2+bx+c), and polynomial functions of higher degrees.",
4
+ "subject": "General Mathematics",
5
+ "quarter": 1,
6
+ "content_domain": "Functions and Their Graphs",
7
+ "chunk_type": "content_explanation",
8
+ "source_file": "sample_curriculum.json",
9
+ "page": 1
10
+ },
11
+ {
12
+ "content": "Learning Competency (M11GM-Ia-1): Represents real-life situations using functions, including piece-wise functions. Example: A taxi fare is computed as P40 for the first 500 meters plus P3.50 for every additional 300 meters or fraction thereof. This is a piecewise function where f(d)=40 for d<=500 and f(d)=40+3.5*ceil((d-500)/300) for d>500.",
13
+ "subject": "General Mathematics",
14
+ "quarter": 1,
15
+ "content_domain": "Functions and Their Graphs",
16
+ "chunk_type": "learning_competency",
17
+ "source_file": "sample_curriculum.json",
18
+ "page": 1
19
+ },
20
+ {
21
+ "content": "Learning Competency (M11GM-Ia-2): Evaluates a function. To evaluate f(x) at x=a, substitute a for every occurrence of x in the expression and simplify. Example: Given f(x)=2x^2-3x+5, evaluate f(2): f(2)=2(4)-3(2)+5=8-6+5=7.",
22
+ "subject": "General Mathematics",
23
+ "quarter": 1,
24
+ "content_domain": "Functions and Their Graphs",
25
+ "chunk_type": "content_explanation",
26
+ "source_file": "sample_curriculum.json",
27
+ "page": 2
28
+ },
29
+ {
30
+ "content": "Rational Functions have the form f(x)=P(x)/Q(x) where P(x) and Q(x) are polynomials and Q(x)!=0. Key features: vertical asymptotes occur where Q(x)=0 but P(x)!=0; horizontal asymptotes depend on the degrees of P and Q. The domain of f(x) excludes all x-values that make the denominator zero. Solving rational equations and inequalities requires careful handling of the denominator signs.",
31
+ "subject": "General Mathematics",
32
+ "quarter": 1,
33
+ "content_domain": "Rational Functions",
34
+ "chunk_type": "content_explanation",
35
+ "source_file": "sample_curriculum.json",
36
+ "page": 3
37
+ },
38
+ {
39
+ "content": "Learning Competency (M11GM-Ib-3): Solves problems involving rational functions, rational equations, and rational inequalities. Example: A jeepney operator's average revenue per trip is modeled by R(n)=(5000+300n)/n where n is the number of trips per day. Find how many trips are needed for average revenue to reach P450.",
40
+ "subject": "General Mathematics",
41
+ "quarter": 1,
42
+ "content_domain": "Rational Functions",
43
+ "chunk_type": "learning_competency",
44
+ "source_file": "sample_curriculum.json",
45
+ "page": 3
46
+ },
47
+ {
48
+ "content": "Exponential Functions f(x)=a*b^x (a!=0, b>0, b!=1) model growth and decay. Key properties: domain is all real numbers; range is (0,infinity) for a>0; horizontal asymptote at y=0; y-intercept at (0,a). Solving exponential equations involves expressing both sides with the same base and equating exponents. Philippine applications include bacterial growth and radioactive decay in medical contexts.",
49
+ "subject": "General Mathematics",
50
+ "quarter": 2,
51
+ "content_domain": "Exponential Functions",
52
+ "chunk_type": "content_explanation",
53
+ "source_file": "sample_curriculum.json",
54
+ "page": 4
55
+ },
56
+ {
57
+ "content": "Compound Interest is calculated using A=P(1+r/n)^(nt) where A is the final amount, P is the principal, r is the annual interest rate (decimal), n is the number of compounding periods per year, and t is the time in years. Philippine banks offer savings and loan products with various compounding frequencies: annually (n=1), semi-annually (n=2), quarterly (n=4), monthly (n=12).",
58
+ "subject": "General Mathematics",
59
+ "quarter": 3,
60
+ "content_domain": "Business Mathematics",
61
+ "chunk_type": "content_explanation",
62
+ "source_file": "sample_curriculum.json",
63
+ "page": 5
64
+ },
65
+ {
66
+ "content": "Learning Competency (M11GM-IIc-1): Illustrates simple and compound interests. Simple interest I=Prt where P is principal, r is rate, t is time. Compound interest uses compounding formula. Example: Juana deposits P50,000 in a bank offering 3.5% interest compounded quarterly. After 3 years, her balance will be A=50000(1+0.035/4)^(4*3)=55543.19 using the compound interest formula.",
67
+ "subject": "General Mathematics",
68
+ "quarter": 3,
69
+ "content_domain": "Business Mathematics",
70
+ "chunk_type": "learning_competency",
71
+ "source_file": "sample_curriculum.json",
72
+ "page": 5
73
+ },
74
+ {
75
+ "content": "Annuities are sequences of equal payments made at equal time intervals. The future value of an ordinary annuity (payment at end of period) is FV=PMT*[(1+r)^n-1]/r and present value is PV=PMT*[1-(1+r)^(-n)]/r. Applications include Pag-IBIG housing loans, SSS contributions, and insurance premiums. Philippine context problems often involve 15-year and 25-year housing loans.",
76
+ "subject": "General Mathematics",
77
+ "quarter": 3,
78
+ "content_domain": "Business Mathematics",
79
+ "chunk_type": "content_explanation",
80
+ "source_file": "sample_curriculum.json",
81
+ "page": 6
82
+ },
83
+ {
84
+ "content": "Stocks and Bonds represent two types of investments. Stocks represent ownership shares in a corporation with dividends as earnings — prices are quoted per share in the Philippine Stock Exchange (PSE). Bonds are debt instruments where the issuing entity borrows money and pays periodic interest then repays principal at maturity. Key computations: stock yield = annual dividend per share / market price; bond yield = annual interest payment / market price.",
85
+ "subject": "General Mathematics",
86
+ "quarter": 3,
87
+ "content_domain": "Business Mathematics",
88
+ "chunk_type": "content_explanation",
89
+ "source_file": "sample_curriculum.json",
90
+ "page": 6
91
+ },
92
+ {
93
+ "content": "A Random Variable is a function that assigns a real number to each outcome in the sample space of a random experiment. A Discrete Random Variable has a countable number of possible values. The probability mass function (PMF) gives the probability P(X=x) for each value x. Key properties: sum of all P(X=x)=1 and P(X=x)>=0 for all x. Common discrete distributions include Binomial for success/failure and Poisson for rare events.",
94
+ "subject": "Statistics and Probability",
95
+ "quarter": 1,
96
+ "content_domain": "Random Variables and Probability Distributions",
97
+ "chunk_type": "content_explanation",
98
+ "source_file": "sample_curriculum.json",
99
+ "page": 7
100
+ },
101
+ {
102
+ "content": "Learning Competency (M11/12SP-IIIa-1): Illustrates a random variable (discrete and continuous). A discrete random variable takes countable values like the number of defective items in a batch of 50 bulbs. A continuous random variable takes infinite uncountable values in an interval, such as the height of Grade 11 students in centimeters. The learner distinguishes between discrete and continuous random variables for real Philippine data.",
103
+ "subject": "Statistics and Probability",
104
+ "quarter": 1,
105
+ "content_domain": "Random Variables and Probability Distributions",
106
+ "chunk_type": "learning_competency",
107
+ "source_file": "sample_curriculum.json",
108
+ "page": 7
109
+ },
110
+ {
111
+ "content": "The Normal Distribution (Gaussian) is a continuous probability distribution with a bell-shaped curve symmetric about the mean mu. Standard normal distribution has mu=0 and sigma=1; converting to standard normal z=(x-mu)/sigma allows probability calculation using z-tables. Properties: 68% of data within 1 sigma of mu, 95% within 2 sigma, 99.7% within 3 sigma. Philippine applications include standardized test scores (NAT, college entrance exams) and quality control in manufacturing.",
112
+ "subject": "Statistics and Probability",
113
+ "quarter": 1,
114
+ "content_domain": "Random Variables and Probability Distributions",
115
+ "chunk_type": "content_explanation",
116
+ "source_file": "sample_curriculum.json",
117
+ "page": 8
118
+ },
119
+ {
120
+ "content": "Conic Sections are curves formed by the intersection of a plane and a double-napped cone. The four types are: Circle (all points equidistant from a center), Parabola (all points equidistant from a focus and directrix), Ellipse (sum of distances to two foci is constant), and Hyperbola (absolute difference of distances to two foci is constant). Standard forms: Circle (x-h)^2+(y-k)^2=r^2; Parabola (x-h)^2=4p(y-k) or (y-k)^2=4p(x-h).",
121
+ "subject": "Pre-Calculus",
122
+ "quarter": 1,
123
+ "content_domain": "Analytic Geometry",
124
+ "chunk_type": "content_explanation",
125
+ "source_file": "sample_curriculum.json",
126
+ "page": 9
127
+ },
128
+ {
129
+ "content": "Learning Competency (STEM_PC11AG-Ia-1): Illustrates the different types of conic sections: circle, parabola, ellipse, and hyperbola. The learner identifies conic sections from their standard equations and determines their key properties including center, radius (for circles), vertex, focus, directrix (for parabolas), and asymptotes (for hyperbolas). Real-world applications include satellite dishes, telescope mirrors, and bridge arch designs.",
130
+ "subject": "Pre-Calculus",
131
+ "quarter": 1,
132
+ "content_domain": "Analytic Geometry",
133
+ "chunk_type": "learning_competency",
134
+ "source_file": "sample_curriculum.json",
135
+ "page": 9
136
+ }
137
+ ]
main.py ADDED
The diff for this file is too large to render. See raw diff
 
models/.gitkeep ADDED
File without changes
pre_deploy_check.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Pre-deployment validation script for MathPulse AI backend.
4
+
5
+ This script runs BEFORE deployment to catch issues early and prevent
6
+ restart loops on HF Spaces.
7
+
8
+ Usage:
9
+ python backend/pre_deploy_check.py
10
+
11
+ Exit codes:
12
+ 0: All checks passed, safe to deploy
13
+ 1: Critical issue found, deployment should be blocked
14
+ """
15
+
16
+ import sys
17
+ import os
18
+
19
+ # Add repo root to path (for services/ delegation) AND backend to path
20
+ _repo_root = os.path.dirname(os.path.abspath(__file__))
21
+ _parent = os.path.dirname(_repo_root)
22
+ _backend = _repo_root
23
+
24
+ # Add in order: parent first (so services/ can delegate), then backend (for when services/__init__.py tries to import)
25
+ if _parent not in sys.path:
26
+ sys.path.insert(0, _parent)
27
+ if _backend not in sys.path:
28
+ sys.path.insert(0, _backend)
29
+
30
+ def main() -> int:
31
+ """Run pre-deployment checks."""
32
+ print("=" * 70)
33
+ print("🔍 PRE-DEPLOYMENT VALIDATION - Backend will run these checks")
34
+ print("=" * 70)
35
+ print()
36
+
37
+ try:
38
+ # Import the validation module
39
+ from backend.startup_validation import (
40
+ validate_imports,
41
+ validate_environment,
42
+ validate_config_files,
43
+ validate_file_structure,
44
+ validate_inference_client_config,
45
+ )
46
+
47
+ print("Running pre-deployment checks...\n")
48
+
49
+ validate_file_structure()
50
+ print()
51
+
52
+ validate_imports()
53
+ print()
54
+
55
+ validate_environment()
56
+ print()
57
+
58
+ validate_config_files()
59
+ print()
60
+
61
+ validate_inference_client_config()
62
+ print()
63
+
64
+ print("=" * 70)
65
+ print("✅ PRE-DEPLOYMENT VALIDATION PASSED")
66
+ print("=" * 70)
67
+ print()
68
+ print("Backend is ready for deployment to HF Spaces.")
69
+ print()
70
+
71
+ return 0
72
+
73
+ except Exception as e:
74
+ print()
75
+ print("=" * 70)
76
+ print("❌ PRE-DEPLOYMENT VALIDATION FAILED")
77
+ print("=" * 70)
78
+ print()
79
+ print(f"Error: {e}")
80
+ print()
81
+ print("🛑 BLOCK DEPLOYMENT - Fix errors above before pushing to main branch")
82
+ print()
83
+
84
+ return 1
85
+
86
+
87
+ if __name__ == "__main__":
88
+ sys.exit(main())
rag/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Curriculum RAG package for DepEd-grounded retrieval utilities."""
2
+
3
+ from .curriculum_rag import (
4
+ retrieve_curriculum_context,
5
+ build_lesson_prompt,
6
+ build_problem_generation_prompt,
7
+ build_analysis_curriculum_context,
8
+ build_lesson_query,
9
+ format_retrieved_chunks,
10
+ summarize_retrieval_confidence,
11
+ )
12
+ from .vectorstore_loader import reset_vectorstore_singleton
13
+
14
+ __all__ = [
15
+ "retrieve_curriculum_context",
16
+ "build_lesson_prompt",
17
+ "build_problem_generation_prompt",
18
+ "build_analysis_curriculum_context",
19
+ "build_lesson_query",
20
+ "format_retrieved_chunks",
21
+ "summarize_retrieval_confidence",
22
+ "reset_vectorstore_singleton",
23
+ ]
rag/curriculum_rag.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Updated curriculum RAG with exact match retrieval and 7-section notebook output.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Dict, List, Optional, Tuple
8
+
9
+
10
+ def _to_where(
11
+ subject: Optional[str] = None,
12
+ quarter: Optional[int] = None,
13
+ content_domain: Optional[str] = None,
14
+ chunk_type: Optional[str] = None,
15
+ module_id: Optional[str] = None,
16
+ lesson_id: Optional[str] = None,
17
+ competency_code: Optional[str] = None,
18
+ storage_path: Optional[str] = None,
19
+ ) -> Optional[Dict[str, object]]:
20
+ clauses = []
21
+ if subject:
22
+ clauses.append({"subject": {"$eq": subject}})
23
+ if quarter is not None:
24
+ clauses.append({"quarter": {"$eq": int(quarter)}})
25
+ if content_domain:
26
+ clauses.append({"content_domain": {"$eq": content_domain}})
27
+ if chunk_type:
28
+ clauses.append({"chunk_type": {"$eq": chunk_type}})
29
+ if module_id:
30
+ clauses.append({"module_id": {"$eq": module_id}})
31
+ if lesson_id:
32
+ clauses.append({"lesson_id": {"$eq": lesson_id}})
33
+ if competency_code:
34
+ clauses.append({"competency_code": {"$eq": competency_code}})
35
+ if storage_path:
36
+ clauses.append({"storage_path": {"$eq": storage_path}})
37
+ if not clauses:
38
+ return None
39
+ if len(clauses) == 1:
40
+ return clauses[0]
41
+ return {"$and": clauses}
42
+
43
+
44
+ def _distance_to_score(distance: float) -> float:
45
+ return round(1.0 / (1.0 + max(distance, 0.0)), 4)
46
+
47
+
48
+ def retrieve_curriculum_context(
49
+ query: str,
50
+ subject: str | None = None,
51
+ quarter: int | None = None,
52
+ content_domain: str | None = None,
53
+ chunk_type: str | None = None,
54
+ module_id: str | None = None,
55
+ lesson_id: str | None = None,
56
+ competency_code: str | None = None,
57
+ storage_path: str | None = None,
58
+ top_k: int = 8,
59
+ ) -> list[dict]:
60
+ from backend.rag.vectorstore_loader import get_vectorstore_components
61
+
62
+ _, collection, embedder = get_vectorstore_components()
63
+ where = _to_where(subject, quarter, content_domain, chunk_type, module_id, lesson_id, competency_code, storage_path)
64
+
65
+ prefixed_query = f"Represent this sentence for searching relevant passages: {query}"
66
+ query_embedding = embedder.encode(
67
+ prefixed_query,
68
+ normalize_embeddings=True,
69
+ ).tolist()
70
+
71
+ result = collection.query(
72
+ query_embeddings=[query_embedding],
73
+ n_results=max(1, top_k),
74
+ where=where,
75
+ include=["documents", "metadatas", "distances"],
76
+ )
77
+
78
+ documents = (result.get("documents") or [[]])[0]
79
+ metadatas = (result.get("metadatas") or [[]])[0]
80
+ distances = (result.get("distances") or [[]])[0]
81
+
82
+ rows: List[dict] = []
83
+ for idx, content in enumerate(documents):
84
+ md = metadatas[idx] if idx < len(metadatas) and isinstance(metadatas[idx], dict) else {}
85
+ distance = float(distances[idx]) if idx < len(distances) else 1.0
86
+ rows.append({
87
+ "content": str(content or ""),
88
+ "subject": str(md.get("subject") or "unknown"),
89
+ "quarter": int(md.get("quarter") or 0),
90
+ "content_domain": str(md.get("content_domain") or "general"),
91
+ "chunk_type": str(md.get("chunk_type") or "concept"),
92
+ "source_file": str(md.get("source_file") or ""),
93
+ "storage_path": str(md.get("storage_path") or ""),
94
+ "module_id": str(md.get("module_id") or ""),
95
+ "lesson_id": str(md.get("lesson_id") or ""),
96
+ "competency_code": str(md.get("competency_code") or ""),
97
+ "page": int(md.get("page") or 0),
98
+ "score": _distance_to_score(distance),
99
+ })
100
+ return rows
101
+
102
+
103
+ def build_exact_lesson_query(
104
+ topic: str,
105
+ subject: str,
106
+ quarter: int,
107
+ lesson_title: str | None = None,
108
+ competency: str | None = None,
109
+ module_unit: str | None = None,
110
+ learner_level: str | None = None,
111
+ competency_code: str | None = None,
112
+ ) -> str:
113
+ parts = [topic, subject, f"Quarter {quarter}"]
114
+ for value in (lesson_title, competency, module_unit, learner_level, competency_code):
115
+ clean = str(value or "").strip()
116
+ if clean:
117
+ parts.append(clean)
118
+ return " | ".join(parts)
119
+
120
+
121
+ def build_lesson_query(
122
+ topic: str,
123
+ subject: str,
124
+ quarter: int,
125
+ *,
126
+ lesson_title: Optional[str] = None,
127
+ competency: Optional[str] = None,
128
+ module_unit: Optional[str] = None,
129
+ learner_level: Optional[str] = None,
130
+ ) -> str:
131
+ parts = [topic, subject, f"Quarter {quarter}"]
132
+ for value in (lesson_title, competency, module_unit, learner_level):
133
+ clean_value = str(value or "").strip()
134
+ if clean_value:
135
+ parts.append(clean_value)
136
+ return " | ".join(parts)
137
+
138
+
139
+ def retrieve_lesson_pdf_context(
140
+ topic: str,
141
+ subject: str,
142
+ quarter: int,
143
+ lesson_title: str | None = None,
144
+ competency: str | None = None,
145
+ module_id: str | None = None,
146
+ lesson_id: str | None = None,
147
+ competency_code: str | None = None,
148
+ storage_path: str | None = None,
149
+ top_k: int = 8,
150
+ ) -> Tuple[list[dict], str]:
151
+ """Retrieve chunks by storage_path exact match + semantic ranking; fallback to general query."""
152
+ if storage_path:
153
+ exact_chunks = retrieve_curriculum_context(
154
+ query=topic,
155
+ subject=subject,
156
+ quarter=quarter,
157
+ storage_path=storage_path,
158
+ top_k=top_k,
159
+ )
160
+ if exact_chunks and any(c["score"] >= 0.65 for c in exact_chunks):
161
+ return exact_chunks, "exact"
162
+
163
+ general_chunks = retrieve_curriculum_context(
164
+ query=topic,
165
+ subject=subject,
166
+ quarter=quarter,
167
+ top_k=top_k,
168
+ )
169
+
170
+ if storage_path and exact_chunks:
171
+ all_chunks = exact_chunks + general_chunks
172
+ seen = set()
173
+ deduped = []
174
+ for c in all_chunks:
175
+ key = f"{c.get('source_file')}:{c.get('page')}:{c.get('content', '')[:60]}"
176
+ if key not in seen:
177
+ seen.add(key)
178
+ deduped.append(c)
179
+ deduped.sort(key=lambda x: x.get("score", 0), reverse=True)
180
+ return deduped[:top_k], "hybrid"
181
+
182
+ return general_chunks, "general"
183
+
184
+
185
+ def format_retrieved_chunks(curriculum_chunks: list[dict]) -> str:
186
+ refs = []
187
+ for i, chunk in enumerate(curriculum_chunks, start=1):
188
+ refs.append(
189
+ f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
190
+ f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) score={chunk.get('score')}\n"
191
+ f" Excerpt: {chunk.get('content', '')}"
192
+ )
193
+ return "\n".join(refs) if refs else "No curriculum context retrieved."
194
+
195
+
196
+ def summarize_retrieval_confidence(curriculum_chunks: list[dict]) -> Dict[str, any]:
197
+ if not curriculum_chunks:
198
+ return {"confidence": 0.0, "band": "low"}
199
+
200
+ top_scores = [float(c.get("score") or 0.0) for c in curriculum_chunks[:5]]
201
+ score = sum(top_scores) / max(1, len(top_scores))
202
+ band = "high" if score >= 0.72 else "medium" if score >= 0.5 else "low"
203
+ return {"confidence": round(score, 3), "band": band}
204
+
205
+
206
+ def organize_chunks_by_section(chunks: list[dict]) -> Dict[str, List[dict]]:
207
+ """Organize retrieved chunks into lesson section categories."""
208
+ sections: Dict[str, List[dict]] = {
209
+ "introduction": [],
210
+ "key_concepts": [],
211
+ "worked_examples": [],
212
+ "important_notes": [],
213
+ "practice": [],
214
+ "summary": [],
215
+ "assessment": [],
216
+ "general": [],
217
+ }
218
+ domain_priority = {
219
+ "introduction": 1, "key_concepts": 2, "worked_examples": 3,
220
+ "important_notes": 4, "practice": 5, "summary": 6,
221
+ "assessment": 7, "general": 8,
222
+ }
223
+ for chunk in chunks:
224
+ domain = chunk.get("content_domain", "general")
225
+ if domain in sections:
226
+ sections[domain].append(chunk)
227
+ else:
228
+ sections["general"].append(chunk)
229
+ return sections
230
+
231
+
232
+ def build_lesson_prompt(
233
+ *,
234
+ lesson_title: str,
235
+ competency: str,
236
+ grade_level: str,
237
+ subject: str,
238
+ quarter: int,
239
+ learner_level: Optional[str],
240
+ module_unit: Optional[str],
241
+ curriculum_chunks: list[dict],
242
+ competency_code: Optional[str] = None,
243
+ ) -> str:
244
+ refs_text = format_retrieved_chunks(curriculum_chunks)
245
+ organized = organize_chunks_by_section(curriculum_chunks)
246
+
247
+ return (
248
+ "You are a DepEd-aligned Grade 11-12 mathematics instructional designer.\n"
249
+ "Generate a lesson in JSON format. Use ONLY the retrieved curriculum evidence below.\n"
250
+ "Do NOT invent content. Do NOT add generic motivational text. All content must be grounded in the retrieved excerpts.\n\n"
251
+ f"Lesson title: {lesson_title}\n"
252
+ f"Competency code: {competency_code or 'n/a'}\n"
253
+ f"Curriculum competency: {competency}\n"
254
+ f"Grade level: {grade_level}\n"
255
+ f"Subject: {subject}\n"
256
+ f"Quarter: Q{quarter}\n"
257
+ f"Learner level: {learner_level or 'Grade 11-12'}\n"
258
+ f"Module/unit: {module_unit or 'n/a'}\n\n"
259
+ "[CURRICULUM CONTEXT]\n"
260
+ f"{refs_text}\n\n"
261
+ "Return ONLY valid JSON with this exact structure. All 7 sections are required:\n"
262
+ "{\n"
263
+ ' "sections": [\n'
264
+ ' {"type": "introduction", "title": "Introduction", "content": "..."},\n'
265
+ ' {"type": "key_concepts", "title": "Key Concepts", "content": "...", "callouts": [{"type":"important|ti..."}]\n},'
266
+ ' {"type": "video", "title": "Video Lesson", "content": "...", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},\n'
267
+ ' {"type": "worked_examples", "title": "Worked Examples", "examples": [{"problem":"...","steps":["Step 1: ...","Step 2: ..."],"answer":"..."}]},\n'
268
+ ' {"type": "important_notes", "title": "Important Notes", "bulletPoints": ["...","..."]},\n'
269
+ ' {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": [{"question":"...","solution":"..."}]},\n'
270
+ ' {"type": "summary", "title": "Summary", "content": "..."}\n'
271
+ " ],\n"
272
+ ' "needsReview": false\n'
273
+ "}\n\n"
274
+ "Rules:\n"
275
+ "- content in introduction, key_concepts, important_notes, summary: use paragraph/bullet text grounded in retrieved chunks\n"
276
+ "- examples must reflect actual content from the retrieved curriculum (real formulas, real contexts)\n"
277
+ "- practiceProblems should be derivable from worked examples\n"
278
+ "- callouts: type is 'important', 'tip', or 'warning'\n"
279
+ "- video section: content is a brief sentence, leave videoId empty (will be filled by backend)\n"
280
+ "- Do not use placeholder text like 'placeholder' or 'example text'\n"
281
+ "- Do not fabricate worked examples - use actual curriculum content\n"
282
+ )
283
+
284
+
285
+ def build_problem_generation_prompt(topic: str, difficulty: str, curriculum_chunks: list[dict]) -> str:
286
+ refs = []
287
+ for i, chunk in enumerate(curriculum_chunks, start=1):
288
+ refs.append(
289
+ f"{i}. [{chunk.get('source_file')} p.{chunk.get('page')}] "
290
+ f"({chunk.get('content_domain')}/{chunk.get('chunk_type')}) {chunk.get('content', '')}"
291
+ )
292
+ refs_text = "\n".join(refs) if refs else "No curriculum context retrieved."
293
+
294
+ return (
295
+ "Generate one practice problem strictly aligned to the retrieved DepEd competency scope.\n"
296
+ "Do not include topics outside the competency context.\n\n"
297
+ f"Topic: {topic}\n"
298
+ f"Difficulty: {difficulty}\n\n"
299
+ "[CURRICULUM CONTEXT]\n"
300
+ f"{refs_text}\n\n"
301
+ "Return JSON with keys: problem, solution, competencyReference"
302
+ )
303
+
304
+
305
+ def build_analysis_curriculum_context(weak_topics: list[str], subject: str) -> list[dict]:
306
+ dedup: Dict[str, dict] = {}
307
+ for weak_topic in weak_topics:
308
+ rows = retrieve_curriculum_context(
309
+ query=f"DepEd learning competency for {weak_topic}",
310
+ subject=subject,
311
+ chunk_type="learning_competency",
312
+ top_k=2,
313
+ )
314
+ for row in rows:
315
+ key = f"{row.get('source_file')}::{row.get('page')}::{row.get('content', '')[:80]}"
316
+ if key not in dedup:
317
+ dedup[key] = row
318
+ return list(dedup.values())
rag/firebase_storage_loader.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Firebase Storage PDF loader for curriculum ingestion.
3
+ Downloads PDFs from Firebase Storage and extracts text for ChromaDB indexing.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Tuple
12
+
13
+ logger = logging.getLogger("mathpulse.fb_storage_loader")
14
+
15
+ _FIREBASE_INITIALIZED = False
16
+
17
+
18
+ def _init_firebase_storage() -> Tuple[any, any]:
19
+ global _FIREBASE_INITIALIZED
20
+ if _FIREBASE_INITIALIZED:
21
+ try:
22
+ from firebase_admin import storage as fb_storage
23
+ bucket = fb_storage.bucket()
24
+ return fb_storage, bucket
25
+ except Exception as e:
26
+ logger.warning("Firebase storage unavailable: %s", e)
27
+ return None, None
28
+
29
+ try:
30
+ import firebase_admin
31
+ from firebase_admin import credentials, storage
32
+ except ImportError:
33
+ logger.warning("firebase_admin not installed")
34
+ return None, None
35
+
36
+ if firebase_admin._apps:
37
+ _FIREBASE_INITIALIZED = True
38
+ bucket = storage.bucket()
39
+ return storage, bucket
40
+
41
+ sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
42
+ sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
43
+ bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
44
+
45
+ try:
46
+ if sa_json:
47
+ import json as _json
48
+ creds = credentials.Certificate(_json.loads(sa_json))
49
+ elif sa_file and Path(sa_file).exists():
50
+ creds = credentials.Certificate(sa_file)
51
+ else:
52
+ creds = credentials.ApplicationDefault()
53
+
54
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
55
+ _FIREBASE_INITIALIZED = True
56
+ bucket = storage.bucket()
57
+ return storage, bucket
58
+ except Exception as e:
59
+ logger.warning("Firebase init failed: %s", e)
60
+ return None, None
61
+
62
+
63
+ def download_pdf_from_storage(storage_path: str, dest_path: Optional[str] = None) -> Optional[bytes]:
64
+ """Download a PDF from Firebase Storage and return its bytes."""
65
+ _, bucket = _init_firebase_storage()
66
+ if bucket is None:
67
+ logger.warning("Firebase Storage not available, skipping download")
68
+ return None
69
+
70
+ try:
71
+ blob = bucket.blob(storage_path)
72
+ if not blob.exists():
73
+ logger.warning("Blob does not exist: %s", storage_path)
74
+ return None
75
+ bytes_data = blob.download_as_bytes()
76
+ logger.info("Downloaded %s (%d bytes)", storage_path, len(bytes_data))
77
+
78
+ if dest_path:
79
+ Path(dest_path).parent.mkdir(parents=True, exist_ok=True)
80
+ with open(dest_path, "wb") as f:
81
+ f.write(bytes_data)
82
+ logger.info("Saved to %s", dest_path)
83
+
84
+ return bytes_data
85
+ except Exception as e:
86
+ logger.error("Failed to download %s: %s", storage_path, e)
87
+ return None
88
+
89
+
90
+ def list_curriculum_blobs(prefix: str = "curriculum/") -> List[Dict[str, str]]:
91
+ """List all blobs under a prefix in Firebase Storage."""
92
+ _, bucket = _init_firebase_storage()
93
+ if bucket is None:
94
+ return []
95
+
96
+ blobs = bucket.list_blobs(prefix=prefix)
97
+ result = []
98
+ for blob in blobs:
99
+ if blob.name.endswith(".pdf"):
100
+ result.append({
101
+ "name": blob.name,
102
+ "size": blob.size,
103
+ "updated": str(blob.updated) if blob.updated else None,
104
+ "download_url": f"https://storage.googleapis.com/{bucket.name}/{blob.name}",
105
+ })
106
+ return result
107
+
108
+
109
+ PDF_METADATA: Dict[str, dict] = {
110
+ "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf": {
111
+ "subject": "General Mathematics",
112
+ "subjectId": "gen-math",
113
+ "type": "curriculum_guide",
114
+ "content_domain": "general",
115
+ "quarter": 1,
116
+ "storage_path": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
117
+ },
118
+ "curriculum/finite_math/Finite-Mathematics-1-1.pdf": {
119
+ "subject": "Finite Mathematics 1",
120
+ "subjectId": "finite-math-1",
121
+ "type": "curriculum_guide",
122
+ "content_domain": "finite_math",
123
+ "quarter": 1,
124
+ "storage_path": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
125
+ },
126
+ "curriculum/finite_math/Finite-Mathematics-2-1.pdf": {
127
+ "subject": "Finite Mathematics 2",
128
+ "subjectId": "finite-math-2",
129
+ "type": "curriculum_guide",
130
+ "content_domain": "finite_math",
131
+ "quarter": 1,
132
+ "storage_path": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
133
+ },
134
+ "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
135
+ "subject": "General Mathematics",
136
+ "subjectId": "gen-math",
137
+ "type": "sdo_module",
138
+ "content_domain": "general",
139
+ "quarter": 1,
140
+ "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
141
+ },
142
+ "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": {
143
+ "subject": "Business Mathematics",
144
+ "subjectId": "business-math",
145
+ "type": "sdo_module",
146
+ "content_domain": "business",
147
+ "quarter": 1,
148
+ "storage_path": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
149
+ },
150
+ "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": {
151
+ "subject": "Organization and Management",
152
+ "subjectId": "org-mgmt",
153
+ "type": "sdo_module",
154
+ "content_domain": "org_management",
155
+ "quarter": 1,
156
+ "storage_path": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
157
+ },
158
+ "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf": {
159
+ "subject": "Statistics and Probability",
160
+ "subjectId": "stats-prob",
161
+ "type": "sdo_module",
162
+ "content_domain": "statistics",
163
+ "quarter": 1,
164
+ "storage_path": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf",
165
+ },
166
+ }
rag/vectorstore_loader.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from threading import Lock
6
+ from typing import Any, Dict, Tuple
7
+
8
+ import chromadb
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+ _VECTORSTORE_LOCK = Lock()
12
+ _VECTORSTORE_SINGLETON: Tuple[Any, Any, SentenceTransformer] | None = None
13
+
14
+
15
+ def reset_vectorstore_singleton() -> None:
16
+ global _VECTORSTORE_SINGLETON
17
+ with _VECTORSTORE_LOCK:
18
+ _VECTORSTORE_SINGLETON = None
19
+
20
+
21
+ def _resolve_vectorstore_dir() -> Path:
22
+ raw = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
23
+ path = Path(raw)
24
+ if path.is_absolute():
25
+ return path
26
+
27
+ cwd_candidate = Path.cwd() / path
28
+ if cwd_candidate.exists() or str(Path.cwd()).endswith("MATHPULSE-AI"):
29
+ return cwd_candidate
30
+
31
+ backend_candidate = Path(__file__).resolve().parents[2] / path
32
+ return backend_candidate
33
+
34
+
35
+ def get_vectorstore_components(
36
+ collection_name: str = "curriculum_chunks",
37
+ model_name: str = "BAAI/bge-base-en-v1.5",
38
+ ):
39
+ global _VECTORSTORE_SINGLETON
40
+ if _VECTORSTORE_SINGLETON is None:
41
+ with _VECTORSTORE_LOCK:
42
+ if _VECTORSTORE_SINGLETON is None:
43
+ vectorstore_dir = _resolve_vectorstore_dir()
44
+ vectorstore_dir.mkdir(parents=True, exist_ok=True)
45
+ client = chromadb.PersistentClient(path=str(vectorstore_dir))
46
+ collection = client.get_or_create_collection(
47
+ name=collection_name,
48
+ metadata={"hnsw:space": "cosine"},
49
+ )
50
+ embedder = SentenceTransformer(model_name)
51
+ _VECTORSTORE_SINGLETON = (client, collection, embedder)
52
+ return _VECTORSTORE_SINGLETON
53
+
54
+
55
+ def get_vectorstore_health() -> Dict[str, Any]:
56
+ _, collection, _ = get_vectorstore_components()
57
+ payload = collection.get(include=["metadatas"])
58
+ metadatas = payload.get("metadatas") or []
59
+ subjects: Dict[str, int] = {}
60
+ for md in metadatas:
61
+ if not isinstance(md, dict):
62
+ continue
63
+ subject = str(md.get("subject") or "unknown")
64
+ subjects[subject] = subjects.get(subject, 0) + 1
65
+ return {
66
+ "chunkCount": len(payload.get("ids") or []),
67
+ "subjects": subjects,
68
+ "vectorstoreDir": str(_resolve_vectorstore_dir()),
69
+ }
requirements-dev.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ -r requirements.txt
2
+ mypy>=1.11.0
3
+ pytest>=8.3.0
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.104.0
2
+ uvicorn[standard]>=0.24.0
3
+ openai>=1.0.0
4
+ huggingface-hub>=0.31.0
5
+ requests>=2.31.0
6
+ pandas==2.2.3
7
+ openpyxl==3.1.5
8
+ pdfplumber==0.11.5
9
+ chromadb>=0.5.0
10
+ sentence-transformers>=3.0.0
11
+ langchain-text-splitters>=0.3.0
12
+ python-docx==1.1.2
13
+ python-multipart>=0.0.6
14
+ sympy==1.13.3
15
+ matplotlib==3.10.0
16
+ scikit-learn==1.6.1
17
+ joblib==1.4.2
18
+ scipy==1.15.1
19
+ numpy==2.2.1
20
+ firebase-admin>=6.2.0
21
+ redis[hiredis]>=5.0.0
22
+ PyYAML>=6.0.0
23
+ mypy>=1.20.0
24
+ pytest>=9.0.0
routes/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Backend route modules."""
routes/admin_model_routes.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, Request
2
+ from pydantic import BaseModel
3
+ from services.inference_client import (
4
+ set_runtime_model_profile, set_runtime_model_override,
5
+ reset_runtime_overrides, get_current_runtime_config, _MODEL_PROFILES,
6
+ )
7
+
8
+ router = APIRouter(prefix="/api/admin/model-config", tags=["admin"])
9
+
10
+ ALLOWED_OVERRIDE_KEYS = {
11
+ "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
12
+ "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
13
+ }
14
+
15
+
16
+ def require_admin(request: Request):
17
+ user = getattr(request.state, "user", None)
18
+ if user is None:
19
+ raise HTTPException(status_code=401, detail="Authentication required")
20
+ if user.role != "admin":
21
+ raise HTTPException(status_code=403, detail="Admin access required")
22
+ return user
23
+
24
+
25
+ class ProfileSwitchRequest(BaseModel):
26
+ profile: str
27
+
28
+
29
+ class OverrideRequest(BaseModel):
30
+ key: str
31
+ value: str
32
+
33
+
34
+ @router.get("")
35
+ def get_model_config(_admin=Depends(require_admin)):
36
+ return {
37
+ **get_current_runtime_config(),
38
+ "availableProfiles": list(_MODEL_PROFILES.keys()),
39
+ "profileDescriptions": {
40
+ "dev": "deepseek-chat everywhere - fast, $0.14/M input",
41
+ "budget": "deepseek-chat for all tasks - minimal cost",
42
+ "prod": "deepseek-reasoner for RAG, deepseek-chat for chat - best quality",
43
+ },
44
+ }
45
+
46
+
47
+ @router.post("/profile")
48
+ def switch_profile(req: ProfileSwitchRequest, _admin=Depends(require_admin)):
49
+ try:
50
+ set_runtime_model_profile(req.profile)
51
+ return {"success": True, "applied": get_current_runtime_config()}
52
+ except ValueError as e:
53
+ raise HTTPException(status_code=400, detail=str(e))
54
+
55
+
56
+ @router.post("/override")
57
+ def set_override(req: OverrideRequest, _admin=Depends(require_admin)):
58
+ if req.key not in ALLOWED_OVERRIDE_KEYS:
59
+ raise HTTPException(status_code=400, detail=f"Key '{req.key}' is not overridable.")
60
+ set_runtime_model_override(req.key, req.value)
61
+ return {"success": True, "applied": get_current_runtime_config()}
62
+
63
+
64
+ @router.delete("/reset")
65
+ def reset_to_env(_admin=Depends(require_admin)):
66
+ reset_runtime_overrides()
67
+ return {"success": True, "current": get_current_runtime_config()}
routes/diagnostic.py ADDED
@@ -0,0 +1,710 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MathPulse AI - Diagnostic Assessment Router
3
+ POST /api/diagnostic/generate - Generate 15-item diagnostic test grounded in RAG curriculum
4
+ POST /api/diagnostic/submit - Score responses, run risk analysis, save to Firestore
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import traceback
12
+ import uuid
13
+ from datetime import datetime, timezone
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ from fastapi import APIRouter, HTTPException, Request
17
+ from pydantic import BaseModel, Field
18
+
19
+ from services.ai_client import CHAT_MODEL, get_deepseek_client
20
+ from rag.curriculum_rag import retrieve_curriculum_context
21
+
22
+ logger = logging.getLogger("mathpulse.diagnostic")
23
+
24
+ router = APIRouter(prefix="/api/diagnostic", tags=["diagnostic"])
25
+
26
+
27
+ # ─── Pydantic Models ───────────────────────────────────────────────
28
+
29
+ class DiagnosticGenerateRequest(BaseModel):
30
+ strand: str = Field(..., description="Student strand: ABM, STEM, HUMSS, GAS, TVL")
31
+ grade_level: str = Field(..., description="Grade level: Grade 11 or Grade 12")
32
+
33
+
34
+ class DiagnosticOption(BaseModel):
35
+ A: str
36
+ B: str
37
+ C: str
38
+ D: str
39
+
40
+
41
+ class DiagnosticQuestionStripped(BaseModel):
42
+ question_id: str
43
+ competency_code: str
44
+ domain: str
45
+ topic: str
46
+ difficulty: str
47
+ bloom_level: str
48
+ question_text: str
49
+ options: DiagnosticOption
50
+ curriculum_reference: str
51
+
52
+
53
+ class DiagnosticGenerateResponse(BaseModel):
54
+ test_id: str
55
+ questions: List[DiagnosticQuestionStripped]
56
+ total_items: int
57
+ estimated_minutes: float
58
+
59
+
60
+ class DiagnosticResponseItem(BaseModel):
61
+ question_id: str
62
+ student_answer: str
63
+ time_spent_seconds: int
64
+
65
+
66
+ class DiagnosticSubmitRequest(BaseModel):
67
+ test_id: str
68
+ responses: List[DiagnosticResponseItem]
69
+
70
+
71
+ class MasterySummary(BaseModel):
72
+ mastered: List[str]
73
+ developing: List[str]
74
+ beginning: List[str]
75
+
76
+
77
+ class DiagnosticSubmitResponse(BaseModel):
78
+ success: bool
79
+ overall_risk: str
80
+ overall_score_percent: float
81
+ mastery_summary: MasterySummary
82
+ recommended_intervention: str
83
+ xp_earned: int
84
+ badge_unlocked: str
85
+ redirect_to: str
86
+
87
+
88
+ # ─── Competency Code Registry ───────────────────────────────────────
89
+
90
+ COMPETENCY_REGISTRY = {
91
+ "NA-WAGE-01": {"subject": "General Mathematics", "title": "Wages, Salaries, Overtime, Commissions, VAT"},
92
+ "NA-SEQ-01": {"subject": "General Mathematics", "title": "Arithmetic Sequences and Series"},
93
+ "NA-SEQ-02": {"subject": "General Mathematics", "title": "Geometric Sequences and Series"},
94
+ "NA-FUNC-01": {"subject": "General Mathematics", "title": "Functions, Relations, Vertical Line Test"},
95
+ "NA-FUNC-02": {"subject": "General Mathematics", "title": "Evaluating Functions, Operations, Composition"},
96
+ "NA-FUNC-03": {"subject": "General Mathematics", "title": "One-to-One Functions, Inverse Functions"},
97
+ "NA-EXP-01": {"subject": "General Mathematics", "title": "Exponential Functions, Equations, Inequalities"},
98
+ "NA-LOG-01": {"subject": "General Mathematics", "title": "Logarithmic Functions"},
99
+ "MG-TRIG-01": {"subject": "General Mathematics", "title": "Trigonometric Ratios, Right Triangles"},
100
+ "NA-FIN-01": {"subject": "General Mathematics", "title": "Compound Interest, Maturity Value"},
101
+ "NA-FIN-02": {"subject": "General Mathematics", "title": "Simple and General Annuities"},
102
+ "NA-FIN-04": {"subject": "General Mathematics", "title": "Business and Consumer Loans, Amortization"},
103
+ "NA-LOGIC-01": {"subject": "General Mathematics", "title": "Logical Propositions, Connectives, Truth Tables"},
104
+ "BM-FDP-01": {"subject": "Business Mathematics", "title": "Fractions, Decimals, Percent Conversions"},
105
+ "BM-FDP-02": {"subject": "Business Mathematics", "title": "Proportion: Direct, Inverse, Partitive"},
106
+ "BM-BUS-01": {"subject": "Business Mathematics", "title": "Markup, Margin, Trade Discounts, VAT"},
107
+ "BM-BUS-02": {"subject": "Business Mathematics", "title": "Profit, Loss, Break-even Point"},
108
+ "BM-COMM-01": {"subject": "Business Mathematics", "title": "Straight Commission, Salary Plus Commission"},
109
+ "BM-COMM-02": {"subject": "Business Mathematics", "title": "Commission on Cash and Installment Basis"},
110
+ "BM-SW-01": {"subject": "Business Mathematics", "title": "Salary vs. Wage, Income"},
111
+ "BM-SW-03": {"subject": "Business Mathematics", "title": "Mandatory Deductions: SSS, PhilHealth, Pag-IBIG"},
112
+ "BM-SW-04": {"subject": "Business Mathematics", "title": "Overtime Pay Computation (Labor Code)"},
113
+ "SP-RV-01": {"subject": "Statistics & Probability", "title": "Random Variables, Discrete vs. Continuous"},
114
+ "SP-RV-02": {"subject": "Statistics & Probability", "title": "Probability Distribution, Mean, Variance, SD"},
115
+ "SP-NORM-01": {"subject": "Statistics & Probability", "title": "Normal Curve Properties"},
116
+ "SP-NORM-02": {"subject": "Statistics & Probability", "title": "Z-Scores, Standard Normal Table"},
117
+ "SP-SAMP-01": {"subject": "Statistics & Probability", "title": "Types of Random Sampling"},
118
+ "SP-SAMP-03": {"subject": "Statistics & Probability", "title": "Central Limit Theorem"},
119
+ "SP-HYP-01": {"subject": "Statistics & Probability", "title": "Hypothesis Testing: H0 and Ha"},
120
+ "FM1-MAT-01": {"subject": "Finite Mathematics", "title": "Matrices and Matrix Operations"},
121
+ "FM2-PROB-01": {"subject": "Finite Mathematics", "title": "Counting Principles and Permutations"},
122
+ "FM2-PROB-02": {"subject": "Finite Mathematics", "title": "Combinations and Probability"},
123
+ }
124
+
125
+ LEARNING_PATH_ORDER: Dict[str, List[str]] = {
126
+ "BM": ["BM-FDP-01", "BM-FDP-02", "BM-BUS-01", "BM-BUS-02", "BM-COMM-01",
127
+ "BM-COMM-02", "BM-SW-01", "BM-SW-03", "BM-SW-04"],
128
+ "NA": ["NA-WAGE-01", "NA-SEQ-01", "NA-SEQ-02", "NA-FUNC-01", "NA-FUNC-02",
129
+ "NA-FUNC-03", "NA-EXP-01", "NA-LOG-01", "NA-FIN-01", "NA-FIN-02",
130
+ "NA-FIN-04", "NA-LOGIC-01"],
131
+ "SP": ["SP-RV-01", "SP-RV-02", "SP-NORM-01", "SP-NORM-02", "SP-SAMP-01",
132
+ "SP-SAMP-03", "SP-HYP-01"],
133
+ }
134
+
135
+
136
+ STRAND_SUBJECTS: Dict[str, List[str]] = {
137
+ "ABM": ["General Mathematics", "Business Mathematics"],
138
+ "STEM": ["General Mathematics", "Statistics and Probability"],
139
+ "HUMSS": ["General Mathematics"],
140
+ "GAS": ["General Mathematics"],
141
+ "TVL": ["General Mathematics"],
142
+ }
143
+
144
+
145
+ FULL_QUESTION_SCHEMA: Dict[str, List[str]] = {
146
+ "ABM": [
147
+ "General Mathematics: 5 items",
148
+ "Business Mathematics: 5 items",
149
+ "Statistics & Probability: 5 items",
150
+ ],
151
+ "STEM": [
152
+ "General Mathematics: 7 items",
153
+ "Statistics & Probability: 5 items",
154
+ "Finite Mathematics: 3 items",
155
+ ],
156
+ "HUMSS": ["General Mathematics: 15 items"],
157
+ "GAS": ["General Mathematics: 15 items"],
158
+ "TVL": ["General Mathematics: 15 items"],
159
+ }
160
+
161
+ STRAND_COVERAGE_TEXT: Dict[str, str] = {
162
+ "ABM": """FOR ABM STRAND:
163
+ - 5 questions: General Mathematics (NA-WAGE, NA-SEQ, NA-FIN topics -- wages, sequences, interest)
164
+ - 5 questions: Business Mathematics (BM-FDP, BM-BUS, BM-COMM, BM-SW topics -- percent, markup, commission, salaries, deductions using SSS/PhilHealth/Pag-IBIG rates)
165
+ - 5 questions: Statistics & Probability (SP-RV, SP-NORM topics -- random variables, normal distribution, z-scores)""",
166
+ "STEM": """FOR STEM STRAND:
167
+ - 7 questions: General Mathematics (NA-FUNC, NA-EXP, NA-LOG, MG-TRIG, NA-FIN -- functions, exponentials, trigonometry, financial math)
168
+ - 5 questions: Statistics & Probability (SP-RV, SP-NORM, SP-SAMP, SP-HYP -- distributions, sampling, hypothesis)
169
+ - 3 questions: Finite Mathematics (FM1-MAT or FM2-PROB -- matrices or counting/probability)""",
170
+ "HUMSS": """FOR HUMSS STRAND:
171
+ - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
172
+ "GAS": """FOR GAS STRAND:
173
+ - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
174
+ "TVL": """FOR TVL STRAND:
175
+ - 15 questions: General Mathematics only (spread across NA-WAGE, NA-SEQ, NA-FUNC, NA-FIN, NA-LOGIC -- wages, sequences, functions, interest, logic)""",
176
+ }
177
+
178
+
179
+ def _get_strand_coverage(strand: str) -> str:
180
+ return STRAND_COVERAGE_TEXT.get(strand.upper(), STRAND_COVERAGE_TEXT["STEM"])
181
+
182
+
183
+ def _build_rag_context(strand: str) -> str:
184
+ subjects = STRAND_SUBJECTS.get(strand.upper(), ["General Mathematics"])
185
+ rag_context_parts: List[str] = []
186
+
187
+ rag_query = f"SHS {strand} diagnostic assessment competency questions Grade 11"
188
+
189
+ for subject in subjects:
190
+ try:
191
+ chunks = retrieve_curriculum_context(
192
+ query=rag_query,
193
+ subject=subject,
194
+ top_k=3,
195
+ )
196
+ except Exception as e:
197
+ logger.warning(f"[WARN] RAG unavailable for {subject}: {e}")
198
+ continue
199
+
200
+ if not chunks:
201
+ continue
202
+
203
+ chunk_texts: List[str] = []
204
+ for chunk in chunks:
205
+ source = chunk.get("source_file", "unknown")
206
+ content = str(chunk.get("content", ""))[:600]
207
+ chunk_texts.append(f"[Source: {source}]\n{content}")
208
+ rag_context_parts.append(
209
+ f"\n=== {subject.upper()} CURRICULUM REFERENCE ===\n" + "\n---\n".join(chunk_texts)
210
+ )
211
+
212
+ if not rag_context_parts:
213
+ logger.warning("[WARN] RAG unavailable for diagnostic generation -- proceeding without curriculum context")
214
+ return ""
215
+
216
+ return "\n".join(rag_context_parts)
217
+
218
+
219
+ def _build_system_prompt(strand: str, grade_level: str, rag_context: str) -> str:
220
+ strand_upper = strand.upper()
221
+ coverage_text = _get_strand_coverage(strand_upper)
222
+
223
+ rag_block = ""
224
+ if rag_context:
225
+ rag_block = f"""
226
+ OFFICIAL CURRICULUM REFERENCE (from indexed DepEd modules via RAG):
227
+ {rag_context}
228
+
229
+ IMPORTANT: Base ALL questions strictly on the curriculum content above.
230
+ Do not invent formulas, definitions, or problem types not found in the
231
+ reference material. If the reference material is insufficient for a topic,
232
+ use only standard DepEd SHS competencies for that strand.
233
+ """
234
+
235
+ return f"""SYSTEM ROLE:
236
+ You are MathPulse AI's Diagnostic Test Generator. Your job is to create a
237
+ 15-item multiple-choice diagnostic assessment for a Filipino SHS student,
238
+ strictly grounded in the DepEd Strengthened SHS Curriculum (SDO Navotas
239
+ modules and DepEd K-12 Curriculum Guides).
240
+
241
+ STUDENT CONTEXT:
242
+ - Strand: {strand_upper}
243
+ - Grade Level: {grade_level}
244
+ - Test Purpose: DIAGNOSTIC (pre-learning, not summative -- assess current
245
+ knowledge to build a personalized learning path)
246
+ {rag_block}
247
+ STRAND-SUBJECT COVERAGE:
248
+ Generate 15 questions distributed across these subjects and domains:
249
+
250
+ {coverage_text}
251
+
252
+ COMPETENCY CODE FORMAT:
253
+ Assign each question exactly one competency_code from this registry:
254
+ General Math: NA-WAGE-01, NA-SEQ-01, NA-SEQ-02, NA-FUNC-01,
255
+ NA-FUNC-02, NA-FUNC-03, NA-EXP-01, NA-LOG-01,
256
+ MG-TRIG-01, NA-FIN-01, NA-FIN-02, NA-FIN-04,
257
+ NA-LOGIC-01
258
+ Business Math: BM-FDP-01, BM-FDP-02, BM-BUS-01, BM-BUS-02,
259
+ BM-COMM-01, BM-COMM-02, BM-SW-01, BM-SW-03, BM-SW-04
260
+ Statistics: SP-RV-01, SP-RV-02, SP-NORM-01, SP-NORM-02,
261
+ SP-SAMP-01, SP-SAMP-03, SP-HYP-01
262
+ Finite Math: FM1-MAT-01, FM2-PROB-01, FM2-PROB-02
263
+
264
+ DIFFICULTY DISTRIBUTION (across all 15 questions):
265
+ - Easy (Bloom: remembering / understanding): 6 questions (40%)
266
+ - Medium (Bloom: applying / analyzing): 6 questions (40%)
267
+ - Hard (Bloom: evaluating / creating): 3 questions (20%)
268
+
269
+ QUESTION RULES:
270
+ 1. All questions are 4-option multiple choice (A, B, C, D).
271
+ 2. Use Filipino real-life context: peso amounts, Filipino names
272
+ (Juan, Maria, Jose), Philippine institutions (SSS, PhilHealth,
273
+ Pag-IBIG, BIR, BDO, local schools, SM malls).
274
+ 3. Never use trick questions. Wrong options must be plausible but clearly
275
+ incorrect to a student who knows the concept.
276
+ 4. Include a solution_hint (1-2 sentences) -- this is for the backend
277
+ scoring engine ONLY. NEVER include it in the client response.
278
+ 5. Cover as many different competency codes as possible across 15 items.
279
+ Do not repeat the same competency code more than twice.
280
+
281
+ OUTPUT FORMAT (strict JSON array, no extra text, no markdown):
282
+ [
283
+ {{
284
+ "question_id": "DX-<uuid>",
285
+ "competency_code": "BM-SW-03",
286
+ "domain": "Business Mathematics",
287
+ "topic": "Mandatory Deductions",
288
+ "difficulty": "medium",
289
+ "bloom_level": "applying",
290
+ "question_text": "...",
291
+ "options": {{"A": "...", "B": "...", "C": "...", "D": "..."}},
292
+ "correct_answer": "C",
293
+ "solution_hint": "Compute SSS contribution using the prescribed table...",
294
+ "curriculum_reference": "SDO Navotas Bus. Math SHS 1st Sem - Salaries and Wages"
295
+ }}
296
+ ]
297
+ """
298
+
299
+
300
+ async def _call_deepseek(system_prompt: str, user_message: str, temperature: float = 0.7) -> str:
301
+ try:
302
+ client = get_deepseek_client()
303
+ response = client.chat.completions.create(
304
+ model=CHAT_MODEL,
305
+ messages=[
306
+ {"role": "system", "content": system_prompt},
307
+ {"role": "user", "content": user_message},
308
+ ],
309
+ temperature=temperature,
310
+ response_format={"type": "json_object"},
311
+ )
312
+ return response.choices[0].message.content or ""
313
+ except Exception as e:
314
+ logger.error(f"DeepSeek API error: {e}")
315
+ raise HTTPException(status_code=500, detail="AI model unavailable. Please try again later.")
316
+
317
+
318
+ def _parse_questions_response(raw_response: str) -> List[Dict[str, Any]]:
319
+ try:
320
+ data = json.loads(raw_response)
321
+ if isinstance(data, dict):
322
+ for key in ("questions", "items", "data", "results"):
323
+ if key in data and isinstance(data[key], list):
324
+ return data[key]
325
+ for key, value in data.items():
326
+ if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
327
+ if "question_text" in value[0]:
328
+ return value
329
+ if isinstance(data, list):
330
+ return data
331
+ except json.JSONDecodeError:
332
+ pass
333
+
334
+ import re
335
+ match = re.search(r'\[.*\]', raw_response, re.DOTALL)
336
+ if match:
337
+ try:
338
+ return json.loads(match.group())
339
+ except json.JSONDecodeError:
340
+ pass
341
+
342
+ raise ValueError("Could not parse questions from AI response")
343
+
344
+
345
+ async def _generate_questions(strand: str, grade_level: str) -> tuple[str, List[Dict[str, Any]]]:
346
+ test_id = f"DX-{uuid.uuid4().hex[:12]}"
347
+ rag_context = _build_rag_context(strand)
348
+ system_prompt = _build_system_prompt(strand, grade_level, rag_context)
349
+ user_message = f"Generate 15 diagnostic questions for a Grade 11 {strand} student."
350
+
351
+ for attempt in range(2):
352
+ temperature = 0.7 if attempt == 0 else 0.3
353
+ try:
354
+ raw_response = await _call_deepseek(system_prompt, user_message, temperature)
355
+ questions = _parse_questions_response(raw_response)
356
+ if questions:
357
+ return test_id, questions[:15]
358
+ except ValueError:
359
+ if attempt == 0:
360
+ logger.warning("Malformed JSON from DeepSeek, retrying with temperature=0.3")
361
+ continue
362
+ raise
363
+
364
+ raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
365
+
366
+
367
+ async def _store_diagnostic_session(
368
+ firestore_client: Any,
369
+ user_id: str,
370
+ test_id: str,
371
+ strand: str,
372
+ grade_level: str,
373
+ questions: List[Dict[str, Any]],
374
+ ) -> bool:
375
+ try:
376
+ doc_ref = (
377
+ firestore_client.collection("diagnosticSessions")
378
+ .document(test_id)
379
+ )
380
+ doc_ref.set({
381
+ "testId": test_id,
382
+ "userId": user_id,
383
+ "generatedAt": firestore_client.SERVER_TIMESTAMP,
384
+ "strand": strand,
385
+ "gradeLevel": grade_level,
386
+ "questions": questions,
387
+ "status": "in_progress",
388
+ })
389
+ return True
390
+ except Exception as e:
391
+ logger.error(f"Failed to store diagnostic session: {e}")
392
+ return False
393
+
394
+
395
+ def _strip_answers(questions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
396
+ stripped = []
397
+ for q in questions:
398
+ stripped.append({
399
+ "question_id": q.get("question_id", ""),
400
+ "competency_code": q.get("competency_code", ""),
401
+ "domain": q.get("domain", ""),
402
+ "topic": q.get("topic", ""),
403
+ "difficulty": q.get("difficulty", ""),
404
+ "bloom_level": q.get("bloom_level", ""),
405
+ "question_text": q.get("question_text", ""),
406
+ "options": q.get("options", {}),
407
+ "curriculum_reference": q.get("curriculum_reference", ""),
408
+ })
409
+ return stripped
410
+
411
+
412
+ # ─── ENDPOINT 1: Generate Diagnostic ────────────────────────────────
413
+
414
+ @router.post("/generate", response_model=DiagnosticGenerateResponse)
415
+ async def generate_diagnostic(request: DiagnosticGenerateRequest, req: Request):
416
+ user = getattr(req.state, "user", None)
417
+ if not user or not getattr(user, "uid", None):
418
+ raise HTTPException(status_code=401, detail="Authentication required")
419
+
420
+ try:
421
+ test_id, questions = await _generate_questions(
422
+ request.strand,
423
+ request.grade_level,
424
+ )
425
+ except HTTPException:
426
+ raise
427
+ except Exception as e:
428
+ logger.error(f"Generation error: {e}\n{traceback.format_exc()}")
429
+ raise HTTPException(status_code=500, detail="Assessment generation failed. Please try again.")
430
+
431
+ try:
432
+ import firebase_admin
433
+ from firebase_admin import firestore as fs
434
+ firestore_client = fs.client()
435
+ await _store_diagnostic_session(
436
+ firestore_client,
437
+ user.uid,
438
+ test_id,
439
+ request.strand,
440
+ request.grade_level,
441
+ questions,
442
+ )
443
+ except Exception as e:
444
+ logger.warning(f"Could not store diagnostic session: {e}")
445
+
446
+ client_questions = _strip_answers(questions)
447
+
448
+ return DiagnosticGenerateResponse(
449
+ test_id=test_id,
450
+ questions=client_questions,
451
+ total_items=len(client_questions),
452
+ estimated_minutes=11.6,
453
+ )
454
+
455
+
456
+ # ─── ENDPOINT 2: Submit and Evaluate ─────────────────────────────────
457
+
458
+ def _score_responses(stored_questions: List[Dict[str, Any]], responses: List[DiagnosticResponseItem]) -> tuple:
459
+ question_map: Dict[str, Dict[str, Any]] = {}
460
+ for q in stored_questions:
461
+ question_map[q.get("question_id", "")] = q
462
+
463
+ scored = []
464
+ total_correct = 0
465
+ domain_correct: Dict[str, int] = {}
466
+ domain_total: Dict[str, int] = {}
467
+ comp_attempts: Dict[str, List[bool]] = {}
468
+
469
+ for resp in responses:
470
+ question = question_map.get(resp.question_id, {})
471
+ correct_answer = question.get("correct_answer", "")
472
+ is_correct = (resp.student_answer.strip().upper() == correct_answer.strip().upper())
473
+
474
+ domain = question.get("domain", "Unknown")
475
+ competency_code = question.get("competency_code", "")
476
+
477
+ if domain not in domain_correct:
478
+ domain_correct[domain] = 0
479
+ domain_total[domain] = 0
480
+ domain_total[domain] += 1
481
+ if is_correct:
482
+ domain_correct[domain] += 1
483
+ total_correct += 1
484
+
485
+ if competency_code not in comp_attempts:
486
+ comp_attempts[competency_code] = []
487
+ comp_attempts[competency_code].append(is_correct)
488
+
489
+ scored.append({
490
+ "question_id": resp.question_id,
491
+ "competency_code": competency_code,
492
+ "domain": domain,
493
+ "topic": question.get("topic", ""),
494
+ "difficulty": question.get("difficulty", ""),
495
+ "bloom_level": question.get("bloom_level", ""),
496
+ "student_answer": resp.student_answer,
497
+ "correct_answer": correct_answer,
498
+ "is_correct": is_correct,
499
+ "time_spent_seconds": resp.time_spent_seconds,
500
+ })
501
+
502
+ return scored, total_correct, domain_correct, domain_total, comp_attempts
503
+
504
+
505
+ def _compute_domain_scores(domain_correct: Dict[str, int], domain_total: Dict[str, int]) -> Dict[str, Dict[str, Any]]:
506
+ domain_scores = {}
507
+ for domain in domain_total:
508
+ correct = domain_correct.get(domain, 0)
509
+ total = domain_total[domain]
510
+ pct = (correct / total * 100) if total > 0 else 0
511
+ mastery = "mastered" if pct >= 80 else "developing" if pct >= 60 else "beginning"
512
+ domain_scores[domain] = {
513
+ "correct": correct,
514
+ "total": total,
515
+ "percentage": round(pct, 1),
516
+ "mastery_level": mastery,
517
+ }
518
+ return domain_scores
519
+
520
+
521
+ def _compute_risk_profile(
522
+ total_correct: int,
523
+ total_items: int,
524
+ scored_responses: List[Dict[str, Any]],
525
+ domain_scores: Dict[str, Dict[str, Any]],
526
+ ) -> Dict[str, Any]:
527
+ overall_pct = (total_correct / total_items * 100) if total_items > 0 else 0
528
+
529
+ mastered = [d for d, s in domain_scores.items() if s["mastery_level"] == "mastered"]
530
+ developing = [d for d, s in domain_scores.items() if s["mastery_level"] == "developing"]
531
+ beginning = [d for d, s in domain_scores.items() if s["mastery_level"] == "beginning"]
532
+
533
+ critical_gaps = []
534
+ for resp in scored_responses:
535
+ code = resp.get("competency_code", "")
536
+ if not code:
537
+ continue
538
+ attempts = [r for r in scored_responses if r.get("competency_code") == code]
539
+ if len(attempts) >= 2 and not any(r.get("is_correct") for r in attempts):
540
+ if code not in critical_gaps:
541
+ critical_gaps.append(code)
542
+
543
+ if overall_pct >= 75 and len(beginning) == 0:
544
+ overall_risk = "low"
545
+ elif overall_pct >= 55 or len(beginning) <= 2:
546
+ overall_risk = "moderate"
547
+ elif overall_pct >= 40 or len(beginning) <= 4:
548
+ overall_risk = "high"
549
+ else:
550
+ overall_risk = "critical"
551
+
552
+ suggested_path = []
553
+ for code in critical_gaps:
554
+ if code not in suggested_path:
555
+ suggested_path.append(code)
556
+ for domain in beginning:
557
+ for prefix in ["NA", "BM", "SP", "FM"]:
558
+ if domain.upper().startswith(prefix) or any(
559
+ s.upper().startswith(prefix) for s in [domain]
560
+ ):
561
+ for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
562
+ if comp_code not in suggested_path:
563
+ suggested_path.append(comp_code)
564
+ break
565
+ for domain in developing:
566
+ for prefix in ["NA", "BM", "SP", "FM"]:
567
+ if any(c.startswith(prefix) for c in COMPETENCY_REGISTRY):
568
+ for comp_code in LEARNING_PATH_ORDER.get(prefix, []):
569
+ if comp_code not in suggested_path:
570
+ suggested_path.append(comp_code)
571
+
572
+ interventions = {
573
+ "low": "Great job! You have a solid foundation. Keep practicing to maintain your skills!",
574
+ "moderate": "You're making good progress. Focus on the topics where you need more practice. Kaya mo yan!",
575
+ "high": "Don't worry! With focused practice on your weak areas, you'll improve quickly.",
576
+ "critical": "Let's work on this together. Start with the basics and build up your confidence step by step.",
577
+ }
578
+
579
+ return {
580
+ "overall_risk": overall_risk,
581
+ "overall_score_percent": round(overall_pct, 1),
582
+ "mastery_summary": {
583
+ "mastered": mastered,
584
+ "developing": developing,
585
+ "beginning": beginning,
586
+ },
587
+ "weak_domains": beginning,
588
+ "critical_gaps": critical_gaps,
589
+ "recommended_intervention": interventions.get(overall_risk, interventions["moderate"]),
590
+ "suggested_learning_path": suggested_path[:20],
591
+ }
592
+
593
+
594
+ async def _save_results(
595
+ firestore_client: Any,
596
+ user_id: str,
597
+ test_id: str,
598
+ strand: str,
599
+ grade_level: str,
600
+ scored_responses: List[Dict[str, Any]],
601
+ domain_scores: Dict[str, Dict[str, Any]],
602
+ risk_profile: Dict[str, Any],
603
+ total_correct: int,
604
+ total_items: int,
605
+ ) -> None:
606
+ try:
607
+ overall_pct = round(total_correct / total_items * 100, 1) if total_items > 0 else 0
608
+
609
+ firestore_client.collection("diagnosticResults").document(user_id).set({
610
+ "userId": user_id,
611
+ "testId": test_id,
612
+ "takenAt": firestore_client.SERVER_TIMESTAMP,
613
+ "strand": strand,
614
+ "gradeLevel": grade_level,
615
+ "status": "completed",
616
+ "totalItems": total_items,
617
+ "totalScore": total_correct,
618
+ "percentageScore": overall_pct,
619
+ "responses": scored_responses,
620
+ "domainScores": domain_scores,
621
+ "riskProfile": risk_profile,
622
+ })
623
+
624
+ mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
625
+
626
+ firestore_client.collection("studentProgress").document(user_id).collection("stats").document("main").set({
627
+ "learning_path": risk_profile.get("suggested_learning_path", []),
628
+ "current_topic_index": 0,
629
+ "total_xp": firestore_client.Increment(50 + mastered_count * 10),
630
+ "current_streak_days": 1,
631
+ "badges": firestore_client.ArrayUnion(["first_assessment"]),
632
+ "topics_mastered": mastered_count,
633
+ "diagnostic_completed": True,
634
+ "overall_risk": risk_profile.get("overall_risk", "moderate"),
635
+ }, merge=True)
636
+
637
+ firestore_client.collection("diagnosticSessions").document(test_id).update({
638
+ "status": "completed",
639
+ "completedAt": firestore_client.SERVER_TIMESTAMP,
640
+ })
641
+
642
+ except Exception as e:
643
+ logger.error(f"Firestore save error: {e}")
644
+ raise
645
+
646
+
647
+ @router.post("/submit", response_model=DiagnosticSubmitResponse)
648
+ async def submit_diagnostic(request: DiagnosticSubmitRequest, req: Request):
649
+ user = getattr(req.state, "user", None)
650
+ if not user or not getattr(user, "uid", None):
651
+ raise HTTPException(status_code=401, detail="Authentication required")
652
+
653
+ try:
654
+ import firebase_admin
655
+ from firebase_admin import firestore as fs
656
+ firestore_client = fs.client()
657
+ except Exception as e:
658
+ raise HTTPException(status_code=503, detail="Database unavailable")
659
+
660
+ try:
661
+ session_doc = firestore_client.collection("diagnosticSessions").document(request.test_id).get()
662
+ if not session_doc.exists:
663
+ raise HTTPException(status_code=404, detail="Diagnostic session not found")
664
+
665
+ session_data = session_doc.to_dict() or {}
666
+ stored_questions = session_data.get("questions", [])
667
+ strand = session_data.get("strand", "STEM")
668
+ grade_level = session_data.get("gradeLevel", "Grade 11")
669
+
670
+ if not stored_questions:
671
+ raise HTTPException(status_code=400, detail="No questions found for this session")
672
+ except HTTPException:
673
+ raise
674
+ except Exception as e:
675
+ logger.error(f"Session retrieval error: {e}")
676
+ raise HTTPException(status_code=500, detail="Failed to retrieve diagnostic session")
677
+
678
+ scored_responses, total_correct, domain_correct, domain_total, _ = _score_responses(
679
+ stored_questions, request.responses
680
+ )
681
+
682
+ total_items = len(stored_questions)
683
+ domain_scores = _compute_domain_scores(domain_correct, domain_total)
684
+ risk_profile = _compute_risk_profile(total_correct, total_items, scored_responses, domain_scores)
685
+
686
+ await _save_results(
687
+ firestore_client,
688
+ user.uid,
689
+ request.test_id,
690
+ strand,
691
+ grade_level,
692
+ scored_responses,
693
+ domain_scores,
694
+ risk_profile,
695
+ total_correct,
696
+ total_items,
697
+ )
698
+
699
+ mastered_count = len(risk_profile.get("mastery_summary", {}).get("mastered", []))
700
+
701
+ return DiagnosticSubmitResponse(
702
+ success=True,
703
+ overall_risk=risk_profile["overall_risk"],
704
+ overall_score_percent=risk_profile["overall_score_percent"],
705
+ mastery_summary=MasterySummary(**risk_profile["mastery_summary"]),
706
+ recommended_intervention=risk_profile["recommended_intervention"],
707
+ xp_earned=50 + mastered_count * 10,
708
+ badge_unlocked="first_assessment",
709
+ redirect_to="/dashboard",
710
+ )
routes/rag_routes.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ from datetime import datetime, timezone
8
+ from threading import Lock
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from fastapi import APIRouter, HTTPException, Request
12
+ from pydantic import BaseModel, Field
13
+
14
+ from services.inference_client import (
15
+ InferenceRequest,
16
+ create_default_client,
17
+ is_sequential_model,
18
+ get_model_for_task,
19
+ )
20
+ from rag.curriculum_rag import (
21
+ build_analysis_curriculum_context,
22
+ build_lesson_prompt,
23
+ build_lesson_query,
24
+ build_problem_generation_prompt,
25
+ format_retrieved_chunks,
26
+ retrieve_curriculum_context,
27
+ retrieve_lesson_pdf_context,
28
+ summarize_retrieval_confidence,
29
+ )
30
+ from rag.vectorstore_loader import get_vectorstore_health, reset_vectorstore_singleton
31
+
32
+ try:
33
+ from firebase_admin import firestore as firebase_firestore
34
+ except Exception:
35
+ firebase_firestore = None
36
+
37
+ logger = logging.getLogger("mathpulse.rag")
38
+ router = APIRouter(prefix="/api/rag", tags=["rag"])
39
+
40
+ _inference_client = None
41
+ _inference_lock = Lock()
42
+
43
+
44
+ def _get_inference_client():
45
+ global _inference_client
46
+ if _inference_client is None:
47
+ with _inference_lock:
48
+ if _inference_client is None:
49
+ _inference_client = create_default_client()
50
+ return _inference_client
51
+
52
+
53
+ async def _generate_text(
54
+ prompt: str,
55
+ task_type: str,
56
+ max_new_tokens: int = 900,
57
+ enable_thinking: bool = False,
58
+ ) -> str:
59
+ request = InferenceRequest(
60
+ messages=[
61
+ {"role": "system", "content": "You are a precise DepEd-aligned curriculum assistant."},
62
+ {"role": "user", "content": prompt},
63
+ ],
64
+ task_type=task_type,
65
+ max_new_tokens=max_new_tokens,
66
+ temperature=0.2,
67
+ top_p=0.9,
68
+ enable_thinking=enable_thinking,
69
+ )
70
+ return _get_inference_client().generate_from_messages(request)
71
+
72
+
73
+ def _log_rag_usage(
74
+ request: Request,
75
+ *,
76
+ event_type: str,
77
+ topic: str,
78
+ subject: str,
79
+ quarter: Optional[int],
80
+ chunks: List[Dict[str, Any]],
81
+ ) -> None:
82
+ if firebase_firestore is None:
83
+ return
84
+ try:
85
+ user = getattr(request.state, "user", None)
86
+ uid = getattr(user, "uid", None)
87
+ domains = sorted({str(chunk.get("content_domain") or "").strip() for chunk in chunks if chunk.get("content_domain")})
88
+ top_score = max((float(chunk.get("score") or 0.0) for chunk in chunks), default=0.0)
89
+ payload = {
90
+ "userId": uid,
91
+ "type": event_type,
92
+ "topic": topic,
93
+ "subject": subject,
94
+ "quarter": quarter,
95
+ "retrievedChunks": len(chunks),
96
+ "topScore": top_score,
97
+ "curriculumDomainsHit": domains,
98
+ "timestamp": firebase_firestore.SERVER_TIMESTAMP,
99
+ "createdAtIso": datetime.now(timezone.utc).isoformat(),
100
+ }
101
+ firebase_firestore.client().collection("rag_usage").add(payload)
102
+ except Exception as exc:
103
+ logger.warning("rag_usage logging skipped: %s", exc)
104
+
105
+
106
+ def _strip_thinking_and_parse(text: str) -> dict:
107
+ cleaned = text.strip()
108
+ cleaned = re.sub(r" </think>", "", cleaned, flags=re.DOTALL).strip()
109
+ if "{" in cleaned and "}" in cleaned:
110
+ try:
111
+ start = cleaned.find("{")
112
+ end = cleaned.rfind("}") + 1
113
+ parsed = json.loads(cleaned[start:end])
114
+ if isinstance(parsed, dict):
115
+ return parsed
116
+ except Exception:
117
+ pass
118
+ return {"explanation": text}
119
+
120
+
121
+ class RagLessonRequest(BaseModel):
122
+ topic: str
123
+ subject: str
124
+ quarter: int
125
+ lessonTitle: Optional[str] = None
126
+ learningCompetency: Optional[str] = None
127
+ moduleUnit: Optional[str] = None
128
+ learnerLevel: Optional[str] = None
129
+ userId: Optional[str] = None
130
+ moduleId: Optional[str] = None
131
+ lessonId: Optional[str] = None
132
+ competencyCode: Optional[str] = None
133
+ storagePath: Optional[str] = None
134
+
135
+
136
+ class RagProblemRequest(BaseModel):
137
+ topic: str
138
+ subject: str
139
+ quarter: int
140
+ difficulty: str = Field(default="medium")
141
+ userId: Optional[str] = None
142
+
143
+
144
+ class RagAnalysisContextRequest(BaseModel):
145
+ weakTopics: List[str]
146
+ subject: str
147
+ userId: Optional[str] = None
148
+
149
+
150
+ @router.get("/health")
151
+ async def rag_health():
152
+ active_model = get_model_for_task("rag_lesson")
153
+ is_seq = is_sequential_model(active_model)
154
+ try:
155
+ health = get_vectorstore_health()
156
+ return {
157
+ "status": "ok",
158
+ "chunkCount": health["chunkCount"],
159
+ "subjects": health["subjects"],
160
+ "lastIngested": datetime.now(timezone.utc).isoformat(),
161
+ "activeModel": active_model,
162
+ "isSequentialModel": is_seq,
163
+ }
164
+ except Exception as exc:
165
+ return {
166
+ "status": "degraded",
167
+ "chunkCount": 0,
168
+ "subjects": {},
169
+ "lastIngested": None,
170
+ "activeModel": active_model,
171
+ "isSequentialModel": is_seq,
172
+ "warning": str(exc),
173
+ }
174
+
175
+
176
+ def _fetch_youtube_video(lesson_title: str, subject: str, competency: str, quarter: int) -> dict:
177
+ try:
178
+ from backend.services.youtube_service import get_video_for_lesson
179
+ except ImportError:
180
+ return {}
181
+ try:
182
+ video = get_video_for_lesson(lesson_title, subject, competency, quarter)
183
+ return video or {}
184
+ except Exception as e:
185
+ logger.warning("YouTube search failed: %s", e)
186
+ return {}
187
+
188
+
189
+ def _ensure_7_sections(lesson_data: dict, lesson_title: str) -> dict:
190
+ sections = lesson_data.get("sections", [])
191
+ section_types = {s.get("type") for s in sections}
192
+ required = ["introduction", "key_concepts", "video", "worked_examples", "important_notes", "try_it_yourself", "summary"]
193
+
194
+ default_content = {
195
+ "introduction": {"type": "introduction", "title": "Introduction", "content": f"Welcome to the lesson on {lesson_title}."},
196
+ "key_concepts": {"type": "key_concepts", "title": "Key Concepts", "content": "Below are the key concepts covered in this lesson.", "callouts": []},
197
+ "video": {"type": "video", "title": "Video Lesson", "content": "Watch this explanation to understand the concepts visually.", "videoId": "", "videoTitle": "", "videoChannel": "", "embedUrl": "", "thumbnailUrl": ""},
198
+ "worked_examples": {"type": "worked_examples", "title": "Worked Examples", "examples": []},
199
+ "important_notes": {"type": "important_notes", "title": "Important Notes", "bulletPoints": []},
200
+ "try_it_yourself": {"type": "try_it_yourself", "title": "Try It Yourself", "practiceProblems": []},
201
+ "summary": {"type": "summary", "title": "Summary", "content": f"Great job completing the lesson on {lesson_title}!"},
202
+ }
203
+
204
+ filled = {}
205
+ for req_type in required:
206
+ for existing in sections:
207
+ if existing.get("type") == req_type:
208
+ filled[req_type] = existing
209
+ break
210
+ else:
211
+ filled[req_type] = default_content[req_type]
212
+
213
+ ordered = [filled[t] for t in required]
214
+
215
+ for i, section in enumerate(ordered):
216
+ s_type = section.get("type")
217
+ if s_type == "key_concepts" and not section.get("callouts"):
218
+ section["callouts"] = []
219
+ if s_type == "worked_examples" and not section.get("examples"):
220
+ section["examples"] = []
221
+ if s_type == "important_notes" and not section.get("bulletPoints"):
222
+ section["bulletPoints"] = []
223
+ if s_type == "try_it_yourself" and not section.get("practiceProblems"):
224
+ section["practiceProblems"] = []
225
+ ordered[i] = section
226
+
227
+ return {**lesson_data, "sections": ordered}
228
+
229
+
230
+ @router.post("/lesson")
231
+ async def rag_lesson(request: Request, payload: RagLessonRequest):
232
+ chunks, retrieval_mode = retrieve_lesson_pdf_context(
233
+ query=build_lesson_query(
234
+ payload.topic,
235
+ payload.subject,
236
+ payload.quarter,
237
+ lesson_title=payload.lessonTitle,
238
+ competency=payload.learningCompetency,
239
+ module_unit=payload.moduleUnit,
240
+ learner_level=payload.learnerLevel,
241
+ ),
242
+ subject=payload.subject,
243
+ quarter=payload.quarter,
244
+ lesson_title=payload.lessonTitle,
245
+ competency=payload.learningCompetency,
246
+ module_id=payload.moduleId,
247
+ lesson_id=payload.lessonId,
248
+ competency_code=payload.competencyCode,
249
+ storage_path=payload.storagePath,
250
+ top_k=8,
251
+ )
252
+
253
+ if not chunks:
254
+ raise HTTPException(
255
+ status_code=404,
256
+ detail={
257
+ "error": "no_curriculum_context",
258
+ "message": f"No curriculum content found for lesson '{payload.lessonTitle}' ({payload.subject} Q{payload.quarter}). Please ensure the PDF has been ingested.",
259
+ "retrievalBand": "low",
260
+ "sources": [],
261
+ },
262
+ )
263
+
264
+ prompt = build_lesson_prompt(
265
+ lesson_title=payload.lessonTitle or payload.topic,
266
+ competency=payload.learningCompetency or payload.topic,
267
+ grade_level="Grade 11-12",
268
+ subject=payload.subject,
269
+ quarter=payload.quarter,
270
+ learner_level=payload.learnerLevel,
271
+ module_unit=payload.moduleUnit,
272
+ curriculum_chunks=chunks,
273
+ competency_code=payload.competencyCode,
274
+ )
275
+
276
+ raw_explanation = await _generate_text(
277
+ prompt,
278
+ task_type="lesson_generation",
279
+ max_new_tokens=1800,
280
+ enable_thinking=True,
281
+ )
282
+
283
+ parsed_lesson = _strip_thinking_and_parse(raw_explanation)
284
+ parsed_lesson = _ensure_7_sections(parsed_lesson, payload.lessonTitle or payload.topic)
285
+
286
+ if parsed_lesson.get("sections"):
287
+ video_section = next((s for s in parsed_lesson["sections"] if s.get("type") == "video"), None)
288
+ if video_section:
289
+ video_data = _fetch_youtube_video(
290
+ payload.lessonTitle or payload.topic,
291
+ payload.subject,
292
+ payload.learningCompetency or "",
293
+ payload.quarter,
294
+ )
295
+ if video_data:
296
+ video_section["videoId"] = video_data.get("videoId", "")
297
+ video_section["videoTitle"] = video_data.get("videoTitle", "")
298
+ video_section["videoChannel"] = video_data.get("videoChannel", "")
299
+ video_section["embedUrl"] = video_data.get("embedUrl", "")
300
+ video_section["thumbnailUrl"] = video_data.get("thumbnailUrl", "")
301
+
302
+ retrieval_summary = summarize_retrieval_confidence(chunks)
303
+
304
+ _log_rag_usage(
305
+ request,
306
+ event_type="lesson",
307
+ topic=build_lesson_query(payload.topic, payload.subject, payload.quarter, lesson_title=payload.lessonTitle),
308
+ subject=payload.subject,
309
+ quarter=payload.quarter,
310
+ chunks=chunks,
311
+ )
312
+
313
+ needs_review = parsed_lesson.get("needsReview", False)
314
+ if retrieval_summary.get("band") == "low":
315
+ needs_review = True
316
+
317
+ return {
318
+ **parsed_lesson,
319
+ "retrievalConfidence": retrieval_summary.get("confidence", 0.0),
320
+ "retrievalBand": retrieval_summary.get("band", "low"),
321
+ "retrievalMode": retrieval_mode,
322
+ "needsReview": needs_review,
323
+ "sources": [
324
+ {
325
+ "subject": row.get("subject"),
326
+ "quarter": row.get("quarter"),
327
+ "source_file": row.get("source_file"),
328
+ "storage_path": row.get("storage_path"),
329
+ "page": row.get("page"),
330
+ "score": row.get("score"),
331
+ "content_domain": row.get("content_domain"),
332
+ "chunk_type": row.get("chunk_type"),
333
+ "content": row.get("content"),
334
+ }
335
+ for row in chunks
336
+ ],
337
+ "activeModel": get_model_for_task("rag_lesson"),
338
+ }
339
+
340
+
341
+ @router.post("/generate-problem")
342
+ async def rag_generate_problem(request: Request, payload: RagProblemRequest):
343
+ chunks = retrieve_curriculum_context(
344
+ query=payload.topic,
345
+ subject=payload.subject,
346
+ quarter=payload.quarter,
347
+ top_k=5,
348
+ )
349
+ prompt = build_problem_generation_prompt(payload.topic, payload.difficulty, chunks)
350
+ raw = await _generate_text(
351
+ prompt,
352
+ task_type="quiz_generation",
353
+ max_new_tokens=600,
354
+ enable_thinking=False,
355
+ )
356
+
357
+ parsed = _strip_thinking_and_parse(raw)
358
+
359
+ problem = str(parsed.get("problem") or raw)
360
+ if not problem or problem.startswith("{"):
361
+ problem = str(parsed.get("content") or str(parsed))
362
+ if len(problem) < 3 or problem.startswith("{"):
363
+ problem = raw
364
+ solution = str(parsed.get("solution") or "")
365
+ competency_ref = str(parsed.get("competencyReference") or "DepEd competency-aligned")
366
+
367
+ _log_rag_usage(
368
+ request,
369
+ event_type="problem_generation",
370
+ topic=payload.topic,
371
+ subject=payload.subject,
372
+ quarter=payload.quarter,
373
+ chunks=chunks,
374
+ )
375
+
376
+ return {
377
+ "problem": problem,
378
+ "solution": solution,
379
+ "competencyReference": competency_ref,
380
+ "sources": [
381
+ {
382
+ "subject": row.get("subject"),
383
+ "quarter": row.get("quarter"),
384
+ "source_file": row.get("source_file"),
385
+ "page": row.get("page"),
386
+ "score": row.get("score"),
387
+ }
388
+ for row in chunks
389
+ ],
390
+ }
391
+
392
+
393
+ @router.post("/analysis-context")
394
+ async def rag_analysis_context(request: Request, payload: RagAnalysisContextRequest):
395
+ if not payload.weakTopics:
396
+ raise HTTPException(status_code=400, detail="weakTopics must be a non-empty list")
397
+
398
+ chunks = build_analysis_curriculum_context(payload.weakTopics, payload.subject)
399
+ lines = ["LEARNING COMPETENCIES:"]
400
+ for index, row in enumerate(chunks, start=1):
401
+ lines.append(
402
+ f"{index}. {row.get('content')} (Source: {row.get('source_file')} p.{row.get('page')}, "
403
+ f"Q{row.get('quarter')}, {row.get('content_domain')})"
404
+ )
405
+
406
+ _log_rag_usage(
407
+ request,
408
+ event_type="analysis_context",
409
+ topic=", ".join(payload.weakTopics),
410
+ subject=payload.subject,
411
+ quarter=None,
412
+ chunks=chunks,
413
+ )
414
+
415
+ return {"curriculumContext": "\n".join(lines)}
scripts/ingest_curriculum.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import hashlib
5
+ import json
6
+ import logging
7
+ import os
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List
11
+
12
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
13
+
14
+ from rag.vectorstore_loader import (
15
+ get_vectorstore_components,
16
+ reset_vectorstore_singleton,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def _resolve_data_dir(raw: str | None) -> Path:
23
+ if raw:
24
+ p = Path(raw)
25
+ if p.is_absolute():
26
+ return p
27
+ p = Path.cwd() / raw
28
+ if p.exists():
29
+ return p
30
+ default = Path(__file__).resolve().parents[1] / "datasets"
31
+ return default
32
+
33
+
34
+ def _iter_json_files(data_dir: Path):
35
+ for file in sorted(data_dir.rglob("*")):
36
+ if file.suffix not in {".json", ".jsonl"}:
37
+ continue
38
+ yield file
39
+
40
+
41
+ def _load_records(file_path: Path) -> List[Dict[str, Any]]:
42
+ records: List[Dict[str, Any]] = []
43
+ try:
44
+ raw = file_path.read_text(encoding="utf-8").strip()
45
+ if file_path.suffix == ".jsonl":
46
+ for lineno, line in enumerate(raw.splitlines(), start=1):
47
+ line = line.strip()
48
+ if not line:
49
+ continue
50
+ try:
51
+ records.append(json.loads(line))
52
+ except json.JSONDecodeError:
53
+ logger.warning("Skipping malformed JSONL line %s:%d", file_path.name, lineno)
54
+ else:
55
+ parsed = json.loads(raw)
56
+ if isinstance(parsed, list):
57
+ records.extend(parsed)
58
+ elif isinstance(parsed, dict):
59
+ records.append(parsed)
60
+ except Exception as exc:
61
+ logger.warning("Failed to parse %s: %s", file_path.name, exc)
62
+ return records
63
+
64
+
65
+ def _build_id(source_file: str, page: int, content: str) -> str:
66
+ key = f"{source_file}::{page}::{content[:120]}"
67
+ return hashlib.sha256(key.encode()).hexdigest()[:40]
68
+
69
+
70
+ def main() -> None:
71
+ parser = argparse.ArgumentParser(description="Ingest DepEd SHS curriculum JSON/JSONL into ChromaDB")
72
+ parser.add_argument("--data-dir", default=None, help="Directory containing .json/.jsonl files")
73
+ parser.add_argument("--reset", action="store_true", help="Reset the vectorstore singleton before ingestion")
74
+ args = parser.parse_args()
75
+
76
+ data_dir = _resolve_data_dir(args.data_dir)
77
+ logger.info("Ingesting from: %s", data_dir)
78
+
79
+ if args.reset:
80
+ reset_vectorstore_singleton()
81
+ _, collection, _ = get_vectorstore_components()
82
+ try:
83
+ collection.delete(ids=collection.get(include=[])["ids"])
84
+ except Exception:
85
+ pass
86
+ reset_vectorstore_singleton()
87
+
88
+ total_processed = 0
89
+ total_upserted = 0
90
+ total_errors = 0
91
+
92
+ _, collection, embedder = get_vectorstore_components()
93
+
94
+ for file_path in _iter_json_files(data_dir):
95
+ records = _load_records(file_path)
96
+ documents: List[str] = []
97
+ metadatas: List[Dict[str, Any]] = []
98
+ ids: List[str] = []
99
+ embeddings_list: List[List[float]] = []
100
+
101
+ for record in records:
102
+ total_processed += 1
103
+ content = str(record.get("content") or "").strip()
104
+ if not content:
105
+ logger.debug("Skipping empty content in %s", file_path.name)
106
+ continue
107
+
108
+ try:
109
+ subject = str(record.get("subject") or "unknown")
110
+ quarter = int(record.get("quarter") or 0)
111
+ page = int(record.get("page") or 0)
112
+ content_domain = str(record.get("content_domain") or "unknown")
113
+ chunk_type = str(record.get("chunk_type") or "unknown")
114
+ source_file = str(record.get("source_file") or file_path.name)
115
+
116
+ embedding = embedder.encode(content).tolist()
117
+ chunk_id = _build_id(source_file, page, content)
118
+
119
+ metadata = {
120
+ "subject": subject,
121
+ "quarter": quarter,
122
+ "content_domain": content_domain,
123
+ "chunk_type": chunk_type,
124
+ "source_file": source_file,
125
+ "page": page,
126
+ }
127
+
128
+ documents.append(content)
129
+ metadatas.append(metadata)
130
+ ids.append(chunk_id)
131
+ embeddings_list.append(embedding)
132
+
133
+ except Exception as exc:
134
+ total_errors += 1
135
+ logger.warning("Error processing record in %s: %s", file_path.name, exc)
136
+
137
+ if documents:
138
+ try:
139
+ collection.upsert(
140
+ ids=ids,
141
+ documents=documents,
142
+ metadatas=metadatas,
143
+ embeddings=embeddings_list,
144
+ )
145
+ total_upserted += len(documents)
146
+ logger.info("Upserted %d chunks from %s", len(documents), file_path.name)
147
+ except Exception as exc:
148
+ total_errors += len(documents)
149
+ logger.warning("Failed to upsert batch from %s: %s", file_path.name, exc)
150
+
151
+ print(f"=== Ingestion Summary ===")
152
+ print(f"Total records processed: {total_processed}")
153
+ print(f"Total chunks upserted: {total_upserted}")
154
+ print(f"Total errors: {total_errors}")
155
+
156
+
157
+ if __name__ == "__main__":
158
+ logging.basicConfig(level=logging.INFO)
159
+ main()
scripts/ingest_from_storage.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ingest curriculum PDFs from Firebase Storage into ChromaDB.
3
+ Run: python -m backend.scripts.ingest_from_storage
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ logger = logging.getLogger("mathpulse.ingest")
15
+
16
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
17
+
18
+ from backend.rag.firebase_storage_loader import (
19
+ PDF_METADATA,
20
+ download_pdf_from_storage,
21
+ list_curriculum_blobs,
22
+ )
23
+
24
+ _CONTENT_DOMAIN_CLASSIFIERS = [
25
+ ("introduction", ["introduction", "welcome", "overview", "objectives", "learning objectives"]),
26
+ ("key_concepts", ["key concepts", "key ideas", "main concepts", "definitions", "key terms"]),
27
+ ("worked_examples", ["example", "worked example", "illustrative example", "sample problem", "solution"]),
28
+ ("important_notes", ["important", "note", "remember", "tip", "caution", "warning", "key point"]),
29
+ ("practice", ["practice", "exercise", "try it", "your turn", "activity", "problem set"]),
30
+ ("summary", ["summary", "recap", "key takeaways", "wrap-up", "conclusion"]),
31
+ ("assessment", ["assessment", "quiz", "test", "evaluation", "exam"]),
32
+ ]
33
+
34
+ _CONTENT_TYPE_CLASSIFIERS = [
35
+ ("definition", ["definition", "define", "means", "is defined as"]),
36
+ ("formula", ["formula", "equation", "expression", "rule"]),
37
+ ("procedure", ["step", "method", "how to", "procedure", "process"]),
38
+ ("concept", ["concept", "idea", "principle", "theory"]),
39
+ ("application", ["application", "use", "example", "solve", "problem"]),
40
+ ]
41
+
42
+
43
+ def _classify_chunk(content: str) -> tuple[str, str]:
44
+ content_lower = content.lower()
45
+ content_domain = "general"
46
+ chunk_type = "concept"
47
+
48
+ for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
49
+ if any(kw in content_lower for kw in keywords):
50
+ content_domain = domain
51
+ break
52
+
53
+ for ctype, keywords in _CONTENT_TYPE_CLASSIFIERS:
54
+ if any(kw in content_lower for kw in keywords):
55
+ chunk_type = ctype
56
+ break
57
+
58
+ return content_domain, chunk_type
59
+
60
+
61
+ def _classify_lesson_section(content: str) -> str:
62
+ content_lower = content.lower().strip()
63
+ first_sentence = content_lower[:200]
64
+
65
+ for domain, keywords in _CONTENT_DOMAIN_CLASSIFIERS:
66
+ if any(kw in first_sentence for kw in keywords):
67
+ return domain
68
+ return "general"
69
+
70
+
71
+ def chunk_text_preserve_pages(text: str, page_starts: List[int], chunk_size: int = 500, overlap: int = 80) -> List[Dict[str, Any]]:
72
+ """Split text into overlapping chunks, preserving page traceability."""
73
+ words = text.split()
74
+ chunks = []
75
+ i = 0
76
+ chunk_idx = 0
77
+ while i < len(words):
78
+ chunk_words = words[i : i + chunk_size]
79
+ chunk_text = " ".join(chunk_words)
80
+ estimated_page = max(1, (i // chunk_size) + 1)
81
+ content_domain, chunk_type = _classify_chunk(chunk_text)
82
+
83
+ chunks.append({
84
+ "text": chunk_text,
85
+ "chunk_index": chunk_idx,
86
+ "estimated_page": estimated_page,
87
+ "content_domain": content_domain,
88
+ "chunk_type": chunk_type,
89
+ })
90
+ i += chunk_size - overlap
91
+ chunk_idx += 1
92
+ return chunks
93
+
94
+
95
+ def extract_pdf_text_and_pages(pdf_bytes: bytes) -> tuple[str, List[int]]:
96
+ """Extract text from PDF bytes, returning full text and page start positions."""
97
+ try:
98
+ from pypdf import PdfReader
99
+ except ImportError:
100
+ try:
101
+ import PyPDF2 as PdfReaderModule
102
+ from PyPDF2 import PdfReader
103
+ except ImportError:
104
+ logger.error("No PDF library available. Install: pip install pypdf")
105
+ return "", []
106
+
107
+ import io
108
+ reader = PdfReader(io.BytesIO(pdf_bytes))
109
+ pages: List[str] = []
110
+ for page in reader.pages:
111
+ text = page.extract_text() or ""
112
+ pages.append(text)
113
+
114
+ page_starts = []
115
+ position = 0
116
+ for page_text in pages:
117
+ page_starts.append(position)
118
+ position += len(page_text) + 1
119
+
120
+ full_text = "\n".join(pages)
121
+ return full_text, page_starts
122
+
123
+
124
+ def get_firestore_client():
125
+ try:
126
+ import firebase_admin
127
+ from firebase_admin import firestore
128
+ if not firebase_admin._apps:
129
+ sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
130
+ sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
131
+ bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
132
+ if sa_json:
133
+ import json as _json
134
+ from firebase_admin import credentials
135
+ creds = credentials.Certificate(_json.loads(sa_json))
136
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
137
+ elif sa_file and Path(sa_file).exists():
138
+ from firebase_admin import credentials
139
+ creds = credentials.Certificate(sa_file)
140
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
141
+ else:
142
+ firebase_admin.initialize_app(options={"storageBucket": bucket_name})
143
+ return firestore.client()
144
+ except Exception as e:
145
+ logger.warning("Firestore unavailable: %s", e)
146
+ return None
147
+
148
+
149
+ def ingest_from_firebase_storage(force_reindex: bool = False):
150
+ """Download PDFs from Firebase Storage and ingest into ChromaDB."""
151
+ try:
152
+ from sentence_transformers import SentenceTransformer
153
+ import chromadb
154
+ except ImportError:
155
+ logger.error("Missing dependencies. Install: pip install chromadb sentence-transformers pypdf")
156
+ return
157
+
158
+ chroma_path = os.getenv("CURRICULUM_VECTORSTORE_DIR", "datasets/vectorstore")
159
+ chroma_client = chromadb.PersistentClient(path=chroma_path)
160
+ collection = chroma_client.get_or_create_collection(
161
+ name="curriculum_chunks",
162
+ metadata={"hnsw:space": "cosine"},
163
+ )
164
+ embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
165
+
166
+ db = get_firestore_client()
167
+
168
+ logger.info("Starting ingestion from Firebase Storage...")
169
+ ingested_count = 0
170
+ skipped_count = 0
171
+ error_count = 0
172
+
173
+ for storage_path, metadata in PDF_METADATA.items():
174
+ doc_id = storage_path.replace("/", "_").replace(".pdf", "")
175
+
176
+ if db:
177
+ try:
178
+ doc_ref = db.collection("curriculumDocuments").document(doc_id)
179
+ existing = doc_ref.get()
180
+ if existing.exists:
181
+ if not force_reindex and existing.to_dict().get("status") == "ingested":
182
+ logger.info("[SKIP] %s already ingested", storage_path)
183
+ skipped_count += 1
184
+ continue
185
+ except Exception as e:
186
+ logger.warning("Firestore check failed for %s: %s", storage_path, e)
187
+
188
+ logger.info("Downloading: %s", storage_path)
189
+ pdf_bytes = download_pdf_from_storage(storage_path)
190
+ if pdf_bytes is None:
191
+ logger.error("[ERROR] Failed to download: %s", storage_path)
192
+ if db:
193
+ try:
194
+ doc_ref.set({
195
+ "storagePath": storage_path,
196
+ "status": "failed",
197
+ "error": "download_failed",
198
+ **metadata,
199
+ }, merge=True)
200
+ except:
201
+ pass
202
+ error_count += 1
203
+ continue
204
+
205
+ logger.info("Extracting text from: %s (%d bytes)", storage_path, len(pdf_bytes))
206
+ full_text, page_starts = extract_pdf_text_and_pages(pdf_bytes)
207
+ if not full_text.strip():
208
+ logger.warning("[WARN] No text extracted from: %s", storage_path)
209
+ error_count += 1
210
+ continue
211
+
212
+ chunks = chunk_text_preserve_pages(full_text, page_starts)
213
+ logger.info(" -> %d chunks created", len(chunks))
214
+
215
+ existing_ids = [cid for cid in collection.get()["ids"] if cid.startswith(f"{doc_id}_chunk_")]
216
+ if existing_ids:
217
+ collection.delete(ids=existing_ids)
218
+ logger.info(" Removed %d existing chunks", len(existing_ids))
219
+
220
+ for chunk in chunks:
221
+ chunk_id = f"{doc_id}_chunk_{chunk['chunk_index']}"
222
+ embedding = embedder.encode(chunk["text"], normalize_embeddings=True).tolist()
223
+
224
+ collection.add(
225
+ embeddings=[embedding],
226
+ documents=[chunk["text"]],
227
+ metadatas=[{
228
+ "document_id": doc_id,
229
+ "module_id": metadata.get("subjectId", ""),
230
+ "lesson_id": f"lesson-{doc_id}",
231
+ "title": metadata.get("subject", ""),
232
+ "subject": metadata.get("subject", ""),
233
+ "subjectId": metadata.get("subjectId", ""),
234
+ "quarter": metadata.get("quarter", 1),
235
+ "competency_code": metadata.get("competency_code", ""),
236
+ "content_domain": chunk["content_domain"],
237
+ "chunk_type": chunk["chunk_type"],
238
+ "source_file": storage_path.split("/")[-1],
239
+ "storage_path": storage_path,
240
+ "page": chunk["estimated_page"],
241
+ "chunk_index": chunk["chunk_index"],
242
+ "type": metadata.get("type", ""),
243
+ }],
244
+ ids=[chunk_id],
245
+ )
246
+
247
+ if db:
248
+ try:
249
+ doc_ref.set({
250
+ "id": doc_id,
251
+ "storagePath": storage_path,
252
+ "status": "ingested",
253
+ "ingestedAt": __import__("firebase_admin").firestore.SERVER_TIMESTAMP,
254
+ "chunkCount": len(chunks),
255
+ **metadata,
256
+ }, merge=True)
257
+ except Exception as e:
258
+ logger.warning("Firestore update failed: %s", e)
259
+
260
+ logger.info("[OK] Ingested %s (%d chunks)", storage_path, len(chunks))
261
+ ingested_count += 1
262
+
263
+ logger.info("=" * 50)
264
+ logger.info("Ingestion complete: %d ingested, %d skipped, %d errors", ingested_count, skipped_count, error_count)
265
+ logger.info("Total chunks in ChromaDB: %d", collection.count())
266
+
267
+
268
+ if __name__ == "__main__":
269
+ import argparse
270
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
271
+
272
+ parser = argparse.ArgumentParser(description="Ingest curriculum PDFs from Firebase Storage into ChromaDB")
273
+ parser.add_argument("--force", action="store_true", help="Re-ingest even if already ingested")
274
+ args = parser.parse_args()
275
+
276
+ ingest_from_firebase_storage(force_reindex=args.force)
scripts/register_firestore_metadata.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Register curriculum document metadata in Firestore.
3
+ Populates the curriculumDocuments collection so the app can display
4
+ lessons mapped to their source PDFs before ingestion.
5
+
6
+ Run: python backend/scripts/register_firestore_metadata.py
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import sys
13
+ from pathlib import Path
14
+
15
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
16
+
17
+
18
+ def _get_firestore_client():
19
+ try:
20
+ import firebase_admin
21
+ from firebase_admin import firestore
22
+ if not firebase_admin._apps:
23
+ sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
24
+ sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
25
+ bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
26
+ if sa_json:
27
+ import json as _json
28
+ from firebase_admin import credentials
29
+ creds = credentials.Certificate(_json.loads(sa_json))
30
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
31
+ elif sa_file and Path(sa_file).exists():
32
+ from firebase_admin import credentials
33
+ creds = credentials.Certificate(sa_file)
34
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
35
+ else:
36
+ firebase_admin.initialize_app(options={"storageBucket": bucket_name})
37
+ return firestore.client()
38
+ except Exception as e:
39
+ print(f"Firestore init failed: {e}")
40
+ return None
41
+
42
+
43
+ CURRICULUM_DOCUMENTS = [
44
+ {
45
+ "id": "gm_lesson_1",
46
+ "moduleId": "gm-q1-business-finance",
47
+ "lessonId": "gm-q1-bf-1",
48
+ "title": "Represent business transactions and financial goals using variables and equations.",
49
+ "subject": "General Mathematics",
50
+ "subjectId": "gen-math",
51
+ "quarter": 1,
52
+ "competencyCode": "GM11-BF-1",
53
+ "learningCompetency": "Represent business transactions and financial goals using variables and equations.",
54
+ "storagePath": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
55
+ "status": "uploaded",
56
+ },
57
+ {
58
+ "id": "gm_navotas_lesson_1",
59
+ "moduleId": "gm-q1-patterns-sequences-series",
60
+ "lessonId": "gm-q1-pss-1",
61
+ "title": "Identify and describe arithmetic and geometric patterns in data.",
62
+ "subject": "General Mathematics",
63
+ "subjectId": "gen-math",
64
+ "quarter": 1,
65
+ "competencyCode": "GM11-PSS-1",
66
+ "learningCompetency": "Identify and describe arithmetic and geometric patterns in data.",
67
+ "storagePath": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
68
+ "status": "uploaded",
69
+ },
70
+ {
71
+ "id": "bm_lesson_1",
72
+ "moduleId": "bm-q1-business-math",
73
+ "lessonId": "bm-q1-1",
74
+ "title": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
75
+ "subject": "Business Mathematics",
76
+ "subjectId": "business-math",
77
+ "quarter": 1,
78
+ "competencyCode": "ABM_BM11BS-Ia-b-1",
79
+ "learningCompetency": "Translate verbal phrases to mathematical expressions; model business scenarios using linear equations and inequalities.",
80
+ "storagePath": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
81
+ "status": "uploaded",
82
+ },
83
+ {
84
+ "id": "stat_lesson_1",
85
+ "moduleId": "stat-q1-probability",
86
+ "lessonId": "stat-q1-1",
87
+ "title": "Define and describe random variables and their types.",
88
+ "subject": "Statistics and Probability",
89
+ "subjectId": "stats-prob",
90
+ "quarter": 1,
91
+ "competencyCode": "SP_SHS11-Ia-1",
92
+ "learningCompetency": "Define and describe random variables and their types.",
93
+ "storagePath": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem.FV.pdf",
94
+ "status": "uploaded",
95
+ },
96
+ {
97
+ "id": "fm1_lesson_1",
98
+ "moduleId": "fm1-q1-counting",
99
+ "lessonId": "fm1-q1-fpc-1",
100
+ "title": "Apply the fundamental counting principle in contextual problems.",
101
+ "subject": "Finite Mathematics 1",
102
+ "subjectId": "finite-math-1",
103
+ "quarter": 1,
104
+ "competencyCode": "FM1-SHS11-Ia-1",
105
+ "learningCompetency": "Apply the fundamental counting principle in contextual problems.",
106
+ "storagePath": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
107
+ "status": "uploaded",
108
+ },
109
+ {
110
+ "id": "fm2_lesson_1",
111
+ "moduleId": "fm2-q1-matrices",
112
+ "lessonId": "fm2-q1-matrices-1",
113
+ "title": "Represent contextual data using matrix notation.",
114
+ "subject": "Finite Mathematics 2",
115
+ "subjectId": "finite-math-2",
116
+ "quarter": 1,
117
+ "competencyCode": "FM2-SHS11-Ia-1",
118
+ "learningCompetency": "Represent contextual data using matrix notation.",
119
+ "storagePath": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
120
+ "status": "uploaded",
121
+ },
122
+ {
123
+ "id": "org_mgmt_lesson_1",
124
+ "moduleId": "org-mgmt-q1",
125
+ "lessonId": "org-mgmt-q1-1",
126
+ "title": "Understand the fundamental concepts of organization and management.",
127
+ "subject": "Organization and Management",
128
+ "subjectId": "org-mgmt",
129
+ "quarter": 1,
130
+ "competencyCode": "ABM_OM11-Ia-1",
131
+ "learningCompetency": "Understand the fundamental concepts of organization and management.",
132
+ "storagePath": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
133
+ "status": "uploaded",
134
+ },
135
+ ]
136
+
137
+
138
+ def register_metadata(force: bool = False):
139
+ db = _get_firestore_client()
140
+ if db is None:
141
+ print("ERROR: Cannot connect to Firestore. Check credentials.")
142
+ print("Set FIREBASE_SERVICE_ACCOUNT_JSON or place mathpulse-sa.json in backend/ directory.")
143
+ return
144
+
145
+ print("Connected to Firestore.")
146
+ print("-" * 50)
147
+
148
+ registered = 0
149
+ skipped = 0
150
+ updated = 0
151
+
152
+ for doc in CURRICULUM_DOCUMENTS:
153
+ doc_id = doc["id"]
154
+ doc_ref = db.collection("curriculumDocuments").document(doc_id)
155
+ existing = doc_ref.get()
156
+
157
+ if existing.exists and not force:
158
+ print(f"[SKIP] {doc_id} already registered")
159
+ skipped += 1
160
+ continue
161
+
162
+ if existing.exists and force:
163
+ updated += 1
164
+ else:
165
+ registered += 1
166
+
167
+ data = {
168
+ **doc,
169
+ "uploadedAt": None,
170
+ }
171
+ doc_ref.set(data, merge=True)
172
+ print(f"[OK] {'Updated' if force and existing.exists else 'Registered'} {doc_id} -> {doc.get('storagePath')}")
173
+
174
+ print("-" * 50)
175
+ print(f"Done: {registered} registered, {skipped} skipped, {updated} updated.")
176
+
177
+
178
+ if __name__ == "__main__":
179
+ import argparse
180
+ parser = argparse.ArgumentParser(description="Register curriculum document metadata in Firestore")
181
+ parser.add_argument("--force", action="store_true", help="Overwrite existing records")
182
+ args = parser.parse_args()
183
+ register_metadata(force=args.force)
scripts/upload_curriculum_pdfs.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Upload DepEd curriculum PDFs to Firebase Storage.
3
+ Run once during initial setup: python scripts/upload_curriculum_pdfs.py
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import Dict, List
12
+
13
+ sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
14
+
15
+ LOCAL_PDF_DIR = r"C:\Users\Deign\Downloads\Documents"
16
+
17
+ PDF_METADATA: Dict[str, Dict[str, object]] = {
18
+ "GENERAL-MATHEMATICS-1.pdf": {
19
+ "subject": "General Mathematics",
20
+ "type": "curriculum_guide",
21
+ "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
22
+ "quarters": ["Q1", "Q2", "Q3", "Q4"],
23
+ "storage_path": "curriculum/general_math/GENERAL-MATHEMATICS-1.pdf",
24
+ },
25
+ "Finite-Mathematics-1-1.pdf": {
26
+ "subject": "Finite Mathematics 1",
27
+ "type": "curriculum_guide",
28
+ "strand": ["STEM", "ABM"],
29
+ "quarters": ["Q1", "Q2"],
30
+ "storage_path": "curriculum/finite_math/Finite-Mathematics-1-1.pdf",
31
+ },
32
+ "Finite-Mathematics-2-1.pdf": {
33
+ "subject": "Finite Mathematics 2",
34
+ "type": "curriculum_guide",
35
+ "strand": ["STEM", "ABM"],
36
+ "quarters": ["Q1", "Q2"],
37
+ "storage_path": "curriculum/finite_math/Finite-Mathematics-2-1.pdf",
38
+ },
39
+ "SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf": {
40
+ "subject": "General Mathematics",
41
+ "type": "sdo_module",
42
+ "strand": ["STEM", "ABM", "HUMSS", "GAS", "TVL"],
43
+ "quarters": ["Q1", "Q2"],
44
+ "storage_path": "curriculum/gen_math_sdo/SDO_Navotas_Gen.Math_SHS_1stSem.FV.pdf",
45
+ },
46
+ "SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf": {
47
+ "subject": "Business Mathematics",
48
+ "type": "sdo_module",
49
+ "strand": ["ABM"],
50
+ "quarters": ["Q1", "Q2"],
51
+ "storage_path": "curriculum/business_math/SDO_Navotas_Bus.Math_SHS_1stSem.FV.pdf",
52
+ },
53
+ "SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf": {
54
+ "subject": "Organization and Management",
55
+ "type": "sdo_module",
56
+ "strand": ["ABM"],
57
+ "quarters": ["Q1", "Q2"],
58
+ "storage_path": "curriculum/org_mgmt/SDO_Navotas_SHS_ABM_OrgAndMngt_FirstSem_FV.pdf",
59
+ },
60
+ "SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf": {
61
+ "subject": "Statistics and Probability",
62
+ "type": "sdo_module",
63
+ "strand": ["STEM", "ABM"],
64
+ "quarters": ["Q1", "Q2"],
65
+ "storage_path": "curriculum/stat_prob/SDO_Navotas_STAT_PROB_SHS_1stSem_FV.pdf",
66
+ },
67
+ }
68
+
69
+
70
+ def chunk_text(text: str, chunk_size: int = 600, overlap: int = 100) -> List[str]:
71
+ """Split text into overlapping chunks."""
72
+ words = text.split()
73
+ chunks: List[str] = []
74
+ i = 0
75
+ while i < len(words):
76
+ chunk = " ".join(words[i : i + chunk_size])
77
+ chunks.append(chunk)
78
+ i += chunk_size - overlap
79
+ return chunks
80
+
81
+
82
+ def upload_pdfs():
83
+ """Upload PDFs from local directory to Firebase Storage."""
84
+ try:
85
+ import firebase_admin
86
+ from firebase_admin import credentials, storage, firestore
87
+ except ImportError:
88
+ print("ERROR: firebase-admin not installed. Run: pip install firebase-admin")
89
+ return
90
+
91
+ service_account_path = Path(__file__).resolve().parents[1] / "serviceAccountKey.json"
92
+ if not service_account_path.exists():
93
+ print(f"ERROR: Service account key not found at {service_account_path}")
94
+ return
95
+
96
+ bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "").strip()
97
+ if not bucket_name:
98
+ print("ERROR: FIREBASE_STORAGE_BUCKET not set in environment")
99
+ return
100
+
101
+ cred = credentials.Certificate(str(service_account_path))
102
+ firebase_admin.initialize_app(cred, {"storageBucket": bucket_name})
103
+
104
+ bucket = storage.bucket()
105
+ db = firestore.client()
106
+
107
+ print(f"Scanning: {LOCAL_PDF_DIR}")
108
+ print("-" * 50)
109
+
110
+ uploaded = 0
111
+ skipped = 0
112
+
113
+ for filename, meta in PDF_METADATA.items():
114
+ local_path = Path(LOCAL_PDF_DIR) / filename
115
+
116
+ if not local_path.exists():
117
+ print(f"[SKIP] {filename} not found in {LOCAL_PDF_DIR}")
118
+ skipped += 1
119
+ continue
120
+
121
+ doc_ref = db.collection("curriculumDocs").document(filename)
122
+ if doc_ref.get().exists:
123
+ print(f"[SKIP] {filename} already uploaded")
124
+ skipped += 1
125
+ continue
126
+
127
+ try:
128
+ blob = bucket.blob(meta["storage_path"])
129
+ blob.upload_from_filename(str(local_path), content_type="application/pdf")
130
+
131
+ doc_ref.set(
132
+ {
133
+ "filename": filename,
134
+ "subject": meta["subject"],
135
+ "type": meta["type"],
136
+ "strand": meta["strand"],
137
+ "quarters": meta["quarters"],
138
+ "storage_path": meta["storage_path"],
139
+ "uploaded_at": firestore.SERVER_TIMESTAMP,
140
+ "indexed": False,
141
+ }
142
+ )
143
+
144
+ print(f"[OK] Uploaded {filename}")
145
+ uploaded += 1
146
+ except Exception as e:
147
+ print(f"[ERROR] {filename}: {e}")
148
+
149
+ print("-" * 50)
150
+ print(f"Upload complete: {uploaded} uploaded, {skipped} skipped")
151
+
152
+
153
+ def index_pdfs():
154
+ """Extract text from PDFs, chunk, embed, and store in ChromaDB."""
155
+ try:
156
+ from pypdf import PdfReader
157
+ import chromadb
158
+ from sentence_transformers import SentenceTransformer
159
+ from firebase_admin import firestore
160
+ except ImportError:
161
+ print("ERROR: Missing dependencies. Run: pip install pypdf chromadb sentence-transformers firebase-admin")
162
+ return
163
+
164
+ chroma_path = os.getenv("CHROMA_PERSIST_PATH", "./datasets/vectorstore")
165
+
166
+ chroma_client = chromadb.PersistentClient(path=chroma_path)
167
+ collection = chroma_client.get_or_create_collection(
168
+ name="curriculum_chunks",
169
+ metadata={"hnsw:space": "cosine"},
170
+ )
171
+ embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")
172
+
173
+ try:
174
+ import firebase_admin
175
+ from firebase_admin import firestore as FS
176
+ db = FS.client()
177
+ except Exception:
178
+ db = None
179
+
180
+ print(f"Indexing PDFs from: {LOCAL_PDF_DIR}")
181
+ print("-" * 50)
182
+
183
+ indexed = 0
184
+ skipped = 0
185
+
186
+ for filename, meta in PDF_METADATA.items():
187
+ if db:
188
+ doc_ref = db.collection("curriculumDocs").document(filename)
189
+ doc = doc_ref.get()
190
+ if doc and doc.to_dict().get("indexed", False):
191
+ print(f"[SKIP] {filename} already indexed")
192
+ skipped += 1
193
+ continue
194
+
195
+ local_path = Path(LOCAL_PDF_DIR) / filename
196
+ if not local_path.exists():
197
+ print(f"[SKIP] {filename} not found")
198
+ skipped += 1
199
+ continue
200
+
201
+ try:
202
+ reader = PdfReader(str(local_path))
203
+ full_text = "\n".join(page.extract_text() or "" for page in reader.pages)
204
+
205
+ if not full_text.strip():
206
+ print(f"[WARN] {filename} has no extractable text")
207
+ continue
208
+
209
+ chunks = chunk_text(full_text)
210
+ print(f"[INFO] {filename} -> {len(chunks)} chunks")
211
+
212
+ for i, chunk in enumerate(chunks):
213
+ chunk_id = f"{filename}_chunk_{i}"
214
+
215
+ existing = collection.get(ids=[chunk_id])
216
+ if existing and existing.get("ids"):
217
+ continue
218
+
219
+ chunk_embedding = embedder.encode(
220
+ chunk,
221
+ normalize_embeddings=True,
222
+ ).tolist()
223
+
224
+ collection.add(
225
+ embeddings=[chunk_embedding],
226
+ documents=[chunk],
227
+ metadatas=[
228
+ {
229
+ "source_file": filename,
230
+ "subject": meta["subject"],
231
+ "strand": ",".join(meta["strand"]),
232
+ "quarter": ",".join(meta["quarters"]),
233
+ "chunk_index": i,
234
+ "type": meta["type"],
235
+ }
236
+ ],
237
+ ids=[chunk_id],
238
+ )
239
+
240
+ if db:
241
+ doc_ref.update({"indexed": True})
242
+
243
+ print(f"[OK] Indexed {filename}")
244
+ indexed += 1
245
+ except Exception as e:
246
+ print(f"[ERROR] {filename}: {e}")
247
+
248
+ print("-" * 50)
249
+ print(f"Indexing complete: {indexed} indexed, {skipped} skipped")
250
+ print(f"Total chunks in ChromaDB: {collection.count()}")
251
+
252
+
253
+ if __name__ == "__main__":
254
+ import argparse
255
+
256
+ parser = argparse.ArgumentParser(description="Upload and index DepEd curriculum PDFs")
257
+ parser.add_argument("action", choices=["upload", "index", "both"], help="Action to perform")
258
+ args = parser.parse_args()
259
+
260
+ if args.action in ("upload", "both"):
261
+ upload_pdfs()
262
+
263
+ if args.action in ("index", "both"):
264
+ index_pdfs()
services/__init__.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backend service helpers for inference, logging, and integrations."""
2
+
3
+ from .inference_client import (
4
+ create_default_client,
5
+ InferenceRequest,
6
+ InferenceClient,
7
+ is_sequential_model,
8
+ get_current_runtime_config,
9
+ get_model_for_task,
10
+ set_runtime_model_profile,
11
+ set_runtime_model_override,
12
+ reset_runtime_overrides,
13
+ model_supports_thinking,
14
+ _MODEL_PROFILES,
15
+ )
16
+
17
+ from .ai_client import (
18
+ get_deepseek_client,
19
+ CHAT_MODEL,
20
+ REASONER_MODEL,
21
+ APIError,
22
+ RateLimitError,
23
+ APITimeoutError,
24
+ )
25
+
26
+ __all__ = [
27
+ "create_default_client",
28
+ "InferenceRequest",
29
+ "InferenceClient",
30
+ "is_sequential_model",
31
+ "get_current_runtime_config",
32
+ "get_model_for_task",
33
+ "set_runtime_model_profile",
34
+ "set_runtime_model_override",
35
+ "reset_runtime_overrides",
36
+ "model_supports_thinking",
37
+ "_MODEL_PROFILES",
38
+ "get_deepseek_client",
39
+ "CHAT_MODEL",
40
+ "REASONER_MODEL",
41
+ "APIError",
42
+ "RateLimitError",
43
+ "APITimeoutError",
44
+ ]
services/ai_client.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from openai import OpenAI, APIError, RateLimitError, APITimeoutError
3
+ from functools import lru_cache
4
+
5
+ __all__ = [
6
+ "get_deepseek_client",
7
+ "CHAT_MODEL",
8
+ "REASONER_MODEL",
9
+ "DEEPSEEK_BASE_URL",
10
+ "APIError",
11
+ "RateLimitError",
12
+ "APITimeoutError",
13
+ ]
14
+
15
+ DEEPSEEK_BASE_URL = os.getenv("DEEPSEEK_BASE_URL", "https://api.deepseek.com")
16
+ CHAT_MODEL = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
17
+ REASONER_MODEL = os.getenv("DEEPSEEK_REASONER_MODEL", "deepseek-reasoner")
18
+
19
+
20
+ @lru_cache(maxsize=1)
21
+ def get_deepseek_client() -> OpenAI:
22
+ api_key = os.getenv("DEEPSEEK_API_KEY")
23
+ if not api_key:
24
+ raise ValueError("DEEPSEEK_API_KEY environment variable not set")
25
+ return OpenAI(
26
+ api_key=api_key,
27
+ base_url=DEEPSEEK_BASE_URL,
28
+ )
services/deterministic_cache.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ from collections import OrderedDict
4
+ from dataclasses import dataclass
5
+ from hashlib import sha256
6
+ from threading import Lock
7
+ from typing import Any, Dict, Optional
8
+
9
+ try:
10
+ import redis.asyncio as redis_async # type: ignore[import-not-found]
11
+ except Exception: # pragma: no cover - optional dependency
12
+ redis_async = None # type: ignore[assignment]
13
+
14
+
15
+ @dataclass
16
+ class _CacheRecord:
17
+ value: Any
18
+ expires_at: float
19
+
20
+
21
+ class DeterministicResponseCache:
22
+ """TTL + LRU response cache with optional Redis backing.
23
+
24
+ - Local cache is always used for fast lookups.
25
+ - Redis is optional and fail-open.
26
+ - Values are normalized through JSON roundtrip to keep payloads serializable.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ *,
32
+ enabled: bool,
33
+ max_entries: int,
34
+ redis_url: Optional[str] = None,
35
+ redis_prefix: str = "mathpulse:det-cache:",
36
+ logger: Any = None,
37
+ ) -> None:
38
+ self.enabled = bool(enabled)
39
+ self.max_entries = max(1, int(max_entries))
40
+ self.redis_prefix = redis_prefix
41
+ self.logger = logger
42
+
43
+ self._lock = Lock()
44
+ self._local: OrderedDict[str, _CacheRecord] = OrderedDict()
45
+
46
+ self._redis = None
47
+ if self.enabled and redis_url and redis_async is not None:
48
+ try:
49
+ self._redis = redis_async.from_url(redis_url, encoding="utf-8", decode_responses=True)
50
+ except Exception as err:
51
+ self._warn(f"Redis cache disabled: failed to initialize client: {err}")
52
+ self._redis = None
53
+
54
+ def build_cache_key(self, namespace: str, payload: Dict[str, Any]) -> str:
55
+ canonical = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str, ensure_ascii=True)
56
+ digest = sha256(canonical.encode("utf-8")).hexdigest()
57
+ return f"{namespace}:{digest}"
58
+
59
+ async def get(self, key: str) -> Optional[Any]:
60
+ if not self.enabled:
61
+ return None
62
+
63
+ local_hit = self._get_local(key)
64
+ if local_hit is not None:
65
+ return local_hit
66
+
67
+ if self._redis is None:
68
+ return None
69
+
70
+ redis_key = self._redis_key(key)
71
+ try:
72
+ raw = await self._redis.get(redis_key)
73
+ if raw is None:
74
+ return None
75
+ decoded = json.loads(raw)
76
+
77
+ ttl_seconds = await self._redis.ttl(redis_key)
78
+ if isinstance(ttl_seconds, int) and ttl_seconds > 0:
79
+ self._set_local(key, decoded, ttl_seconds)
80
+ return decoded
81
+ except Exception as err:
82
+ self._warn(f"Redis cache get failed for {key}: {err}")
83
+ return None
84
+
85
+ async def set(self, key: str, value: Any, ttl_seconds: int) -> None:
86
+ if not self.enabled:
87
+ return
88
+
89
+ ttl = int(ttl_seconds)
90
+ if ttl <= 0:
91
+ return
92
+
93
+ normalized_value = self._normalize(value)
94
+ self._set_local(key, normalized_value, ttl)
95
+
96
+ if self._redis is None:
97
+ return
98
+
99
+ redis_key = self._redis_key(key)
100
+ try:
101
+ await self._redis.set(redis_key, json.dumps(normalized_value, separators=(",", ":"), default=str), ex=ttl)
102
+ except Exception as err:
103
+ self._warn(f"Redis cache set failed for {key}: {err}")
104
+
105
+ async def clear(self) -> None:
106
+ with self._lock:
107
+ self._local.clear()
108
+
109
+ def _normalize(self, value: Any) -> Any:
110
+ # Keep payloads immutable enough for cache semantics and JSON-safe for Redis.
111
+ return json.loads(json.dumps(value, default=str))
112
+
113
+ def _redis_key(self, key: str) -> str:
114
+ return f"{self.redis_prefix}{key}"
115
+
116
+ def _get_local(self, key: str) -> Optional[Any]:
117
+ now = time.time()
118
+ with self._lock:
119
+ self._prune_locked(now)
120
+ record = self._local.get(key)
121
+ if record is None:
122
+ return None
123
+ if record.expires_at <= now:
124
+ self._local.pop(key, None)
125
+ return None
126
+ self._local.move_to_end(key, last=True)
127
+ return record.value
128
+
129
+ def _set_local(self, key: str, value: Any, ttl_seconds: int) -> None:
130
+ expires_at = time.time() + ttl_seconds
131
+ with self._lock:
132
+ self._prune_locked(time.time())
133
+ self._local[key] = _CacheRecord(value=value, expires_at=expires_at)
134
+ self._local.move_to_end(key, last=True)
135
+ while len(self._local) > self.max_entries:
136
+ self._local.popitem(last=False)
137
+
138
+ def _prune_locked(self, now: float) -> None:
139
+ expired_keys = [cache_key for cache_key, record in self._local.items() if record.expires_at <= now]
140
+ for cache_key in expired_keys:
141
+ self._local.pop(cache_key, None)
142
+
143
+ def _warn(self, message: str) -> None:
144
+ if self.logger is not None:
145
+ self.logger.warning(message)
services/email_service.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import logging
3
+ import os
4
+ import smtplib
5
+ import json
6
+ from dataclasses import dataclass
7
+ from email.message import EmailMessage
8
+ from typing import Optional, Protocol
9
+
10
+ import requests
11
+
12
+
13
+ logger = logging.getLogger("mathpulse")
14
+
15
+
16
+ def _first_nonempty_env(*names: str) -> str:
17
+ for name in names:
18
+ value = os.getenv(name, "")
19
+ if value and value.strip():
20
+ return value.strip()
21
+ return ""
22
+
23
+
24
+ def _parse_int_env(value: str, default: int, *, env_name: str) -> int:
25
+ try:
26
+ parsed = int(value)
27
+ except (TypeError, ValueError):
28
+ logger.warning("Invalid %s value '%s'; using default %s", env_name, value, default)
29
+ return default
30
+
31
+ if parsed <= 0:
32
+ logger.warning("Invalid %s value '%s'; using default %s", env_name, value, default)
33
+ return default
34
+
35
+ return parsed
36
+
37
+
38
+ def _extract_brevo_api_key(raw_value: str) -> str:
39
+ value = (raw_value or "").strip()
40
+ if not value:
41
+ return ""
42
+
43
+ # Standard Brevo transactional API key format.
44
+ if value.startswith("xkeysib-"):
45
+ return value
46
+
47
+ parse_candidates = [value]
48
+
49
+ # Brevo MCP token is often base64-encoded JSON containing {"api_key": "xkeysib-..."}.
50
+ try:
51
+ padded = value + ("=" * (-len(value) % 4))
52
+ decoded = base64.urlsafe_b64decode(padded.encode("utf-8"))
53
+ decoded_text = decoded.decode("utf-8").strip()
54
+ if decoded_text:
55
+ parse_candidates.append(decoded_text)
56
+ except (ValueError, UnicodeDecodeError):
57
+ pass
58
+
59
+ for candidate in parse_candidates:
60
+ try:
61
+ payload = json.loads(candidate)
62
+ except json.JSONDecodeError:
63
+ continue
64
+
65
+ if isinstance(payload, dict):
66
+ api_key = str(
67
+ payload.get("api_key") or payload.get("apiKey") or payload.get("api-key") or ""
68
+ ).strip()
69
+ if api_key:
70
+ return api_key
71
+
72
+ return ""
73
+
74
+
75
+ def _resolve_brevo_api_key_from_env() -> str:
76
+ configured_value = _first_nonempty_env("BREVO_API_KEY", "BREVO_API_TOKEN")
77
+ configured_key = _extract_brevo_api_key(configured_value)
78
+ if configured_key:
79
+ if configured_value and configured_value != configured_key:
80
+ logger.info("Resolved Brevo API key from BREVO_API_KEY/BREVO_API_TOKEN payload.")
81
+ return configured_key
82
+
83
+ mcp_token_value = _first_nonempty_env("BREVO_MCP_TOKEN")
84
+ mcp_key = _extract_brevo_api_key(mcp_token_value)
85
+ if mcp_key:
86
+ logger.info("Resolved Brevo API key from BREVO_MCP_TOKEN.")
87
+ return mcp_key
88
+
89
+ if mcp_token_value:
90
+ logger.warning("BREVO_MCP_TOKEN is set but did not contain a usable API key payload.")
91
+
92
+ return ""
93
+
94
+
95
+ @dataclass
96
+ class EmailMessagePayload:
97
+ to_name: str
98
+ to_email: str
99
+ subject: str
100
+ html_content: str
101
+ text_content: str
102
+
103
+
104
+ @dataclass
105
+ class EmailSendResult:
106
+ success: bool
107
+ provider: str
108
+ message_id: Optional[str] = None
109
+ error_code: Optional[str] = None
110
+ error_message: Optional[str] = None
111
+ retryable: bool = False
112
+
113
+
114
+ class EmailProvider(Protocol):
115
+ def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
116
+ ...
117
+
118
+
119
+ class BrevoApiEmailProvider:
120
+ def __init__(self, api_key: str, from_address: str, from_name: str, timeout_sec: int = 15) -> None:
121
+ self._api_key = api_key
122
+ self._from_address = from_address
123
+ self._from_name = from_name
124
+ self._timeout_sec = timeout_sec
125
+
126
+ def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
127
+ try:
128
+ response = requests.post(
129
+ "https://api.brevo.com/v3/smtp/email",
130
+ headers={
131
+ "accept": "application/json",
132
+ "content-type": "application/json",
133
+ "api-key": self._api_key,
134
+ },
135
+ json={
136
+ "sender": {
137
+ "name": self._from_name,
138
+ "email": self._from_address,
139
+ },
140
+ "to": [
141
+ {
142
+ "name": message.to_name,
143
+ "email": message.to_email,
144
+ }
145
+ ],
146
+ "subject": message.subject,
147
+ "htmlContent": message.html_content,
148
+ "textContent": message.text_content,
149
+ },
150
+ timeout=self._timeout_sec,
151
+ )
152
+
153
+ if 200 <= response.status_code < 300:
154
+ payload = response.json() if response.content else {}
155
+ message_id = str(payload.get("messageId") or payload.get("message_id") or "").strip() or None
156
+ return EmailSendResult(success=True, provider="brevo_api", message_id=message_id)
157
+
158
+ error_message = response.text[:400]
159
+ retryable = response.status_code in {408, 429, 500, 502, 503, 504}
160
+ logger.warning(
161
+ "Brevo API email send failed (status=%s, retryable=%s)",
162
+ response.status_code,
163
+ retryable,
164
+ )
165
+ return EmailSendResult(
166
+ success=False,
167
+ provider="brevo_api",
168
+ error_code=f"http_{response.status_code}",
169
+ error_message=error_message,
170
+ retryable=retryable,
171
+ )
172
+ except requests.RequestException as exc:
173
+ logger.warning("Brevo API email send request exception: %s", exc)
174
+ return EmailSendResult(
175
+ success=False,
176
+ provider="brevo_api",
177
+ error_code="request_exception",
178
+ error_message=str(exc),
179
+ retryable=True,
180
+ )
181
+
182
+
183
+ class BrevoSmtpEmailProvider:
184
+ def __init__(
185
+ self,
186
+ smtp_host: str,
187
+ smtp_port: int,
188
+ smtp_login: str,
189
+ smtp_key: str,
190
+ from_address: str,
191
+ from_name: str,
192
+ timeout_sec: int = 15,
193
+ ) -> None:
194
+ self._smtp_host = smtp_host
195
+ self._smtp_port = smtp_port
196
+ self._smtp_login = smtp_login
197
+ self._smtp_key = smtp_key
198
+ self._from_address = from_address
199
+ self._from_name = from_name
200
+ self._timeout_sec = timeout_sec
201
+
202
+ def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
203
+ mime = EmailMessage()
204
+ mime["Subject"] = message.subject
205
+ mime["From"] = f"{self._from_name} <{self._from_address}>"
206
+ mime["To"] = f"{message.to_name} <{message.to_email}>" if message.to_name else message.to_email
207
+ mime.set_content(message.text_content)
208
+ mime.add_alternative(message.html_content, subtype="html")
209
+
210
+ try:
211
+ with smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=self._timeout_sec) as smtp:
212
+ smtp.ehlo()
213
+ smtp.starttls()
214
+ smtp.login(self._smtp_login, self._smtp_key)
215
+ smtp.send_message(mime)
216
+ return EmailSendResult(success=True, provider="brevo_smtp")
217
+ except (smtplib.SMTPException, OSError) as exc:
218
+ logger.warning("Brevo SMTP email send failed: %s", exc)
219
+ return EmailSendResult(
220
+ success=False,
221
+ provider="brevo_smtp",
222
+ error_code="smtp_error",
223
+ error_message=str(exc),
224
+ retryable=True,
225
+ )
226
+
227
+
228
+ class EmailService:
229
+ def __init__(self, primary_provider: Optional[EmailProvider], fallback_provider: Optional[EmailProvider] = None) -> None:
230
+ self._primary_provider = primary_provider
231
+ self._fallback_provider = fallback_provider
232
+
233
+ def send_transactional_email(self, message: EmailMessagePayload) -> EmailSendResult:
234
+ if not self._primary_provider and not self._fallback_provider:
235
+ return EmailSendResult(
236
+ success=False,
237
+ provider="none",
238
+ error_code="email_not_configured",
239
+ error_message="Email sending is not configured in this environment.",
240
+ retryable=False,
241
+ )
242
+
243
+ primary_result: Optional[EmailSendResult] = None
244
+ if self._primary_provider:
245
+ primary_result = self._primary_provider.send_transactional_email(message)
246
+ if primary_result.success:
247
+ return primary_result
248
+
249
+ if self._fallback_provider:
250
+ fallback_result = self._fallback_provider.send_transactional_email(message)
251
+ if fallback_result.success:
252
+ return fallback_result
253
+ if primary_result:
254
+ return EmailSendResult(
255
+ success=False,
256
+ provider=f"{primary_result.provider}+{fallback_result.provider}",
257
+ error_code=primary_result.error_code or fallback_result.error_code,
258
+ error_message=primary_result.error_message or fallback_result.error_message,
259
+ retryable=bool(primary_result.retryable or fallback_result.retryable),
260
+ )
261
+ return fallback_result
262
+
263
+ return primary_result or EmailSendResult(
264
+ success=False,
265
+ provider="none",
266
+ error_code="unknown_email_error",
267
+ error_message="Email provider failed with unknown error.",
268
+ retryable=False,
269
+ )
270
+
271
+
272
+ def create_email_service_from_env() -> EmailService:
273
+ from_address = _first_nonempty_env("MAIL_FROM_ADDRESS", "MAIL_FROM", "BREVO_FROM_ADDRESS") or "noreply@mathpulse.ai"
274
+ from_name = _first_nonempty_env("MAIL_FROM_NAME", "BREVO_FROM_NAME") or "MathPulse AI"
275
+ timeout_raw = _first_nonempty_env("MAIL_SEND_TIMEOUT_SEC") or "15"
276
+ timeout_sec = max(5, _parse_int_env(timeout_raw, 15, env_name="MAIL_SEND_TIMEOUT_SEC"))
277
+
278
+ brevo_api_key = _resolve_brevo_api_key_from_env()
279
+ smtp_login = _first_nonempty_env("BREVO_SMTP_LOGIN", "BREVO_SMTP_USERNAME", "BREVO_SMTP_USER")
280
+ smtp_key = _first_nonempty_env("BREVO_SMTP_KEY", "BREVO_SMTP_PASSWORD", "BREVO_SMTP_PASS")
281
+ smtp_host = _first_nonempty_env("BREVO_SMTP_HOST") or "smtp-relay.brevo.com"
282
+ smtp_port_raw = _first_nonempty_env("BREVO_SMTP_PORT") or "587"
283
+ smtp_port = _parse_int_env(smtp_port_raw, 587, env_name="BREVO_SMTP_PORT")
284
+
285
+ primary_provider: Optional[EmailProvider] = None
286
+ fallback_provider: Optional[EmailProvider] = None
287
+
288
+ if brevo_api_key:
289
+ primary_provider = BrevoApiEmailProvider(
290
+ api_key=brevo_api_key,
291
+ from_address=from_address,
292
+ from_name=from_name,
293
+ timeout_sec=timeout_sec,
294
+ )
295
+
296
+ if smtp_login and smtp_key:
297
+ smtp_provider = BrevoSmtpEmailProvider(
298
+ smtp_host=smtp_host,
299
+ smtp_port=smtp_port,
300
+ smtp_login=smtp_login,
301
+ smtp_key=smtp_key,
302
+ from_address=from_address,
303
+ from_name=from_name,
304
+ timeout_sec=timeout_sec,
305
+ )
306
+ if primary_provider is None:
307
+ primary_provider = smtp_provider
308
+ else:
309
+ fallback_provider = smtp_provider
310
+
311
+ if smtp_login and not smtp_key:
312
+ logger.warning("BREVO_SMTP_LOGIN is set but SMTP key/password is missing.")
313
+ if smtp_key and not smtp_login:
314
+ logger.warning("SMTP key/password is set but BREVO_SMTP_LOGIN is missing.")
315
+
316
+ mode_parts = []
317
+ if brevo_api_key:
318
+ mode_parts.append("brevo_api")
319
+ if smtp_login and smtp_key:
320
+ mode_parts.append("brevo_smtp")
321
+
322
+ if mode_parts:
323
+ logger.info(
324
+ "Email service configured (%s) from=%s smtp=%s:%s",
325
+ "+".join(mode_parts),
326
+ from_address,
327
+ smtp_host,
328
+ smtp_port,
329
+ )
330
+ else:
331
+ logger.warning(
332
+ "Email service is not configured. Set BREVO_API_KEY/BREVO_API_TOKEN, BREVO_MCP_TOKEN, or BREVO_SMTP_LOGIN + BREVO_SMTP_KEY/BREVO_SMTP_PASSWORD."
333
+ )
334
+
335
+ return EmailService(primary_provider=primary_provider, fallback_provider=fallback_provider)
services/email_templates.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import html
2
+ from dataclasses import dataclass
3
+ from urllib.parse import urlparse
4
+
5
+
6
+ WELCOME_SUBJECT = "Welcome to MathPulse AI - Your Account Details"
7
+ ACCENT_COLOR = "#9956DE"
8
+
9
+
10
+ @dataclass
11
+ class WelcomeCredentialsEmailContext:
12
+ recipient_name: str
13
+ login_email: str
14
+ temporary_password: str
15
+ role: str
16
+ login_url: str
17
+ brand_avatar_url: str = ""
18
+ recipient_avatar_url: str = ""
19
+
20
+
21
+ def _normalize_display_name(name: str) -> str:
22
+ cleaned = (name or "").strip()
23
+ return cleaned or "Learner"
24
+
25
+
26
+ def _normalize_http_url(url: str) -> str:
27
+ candidate = (url or "").strip()
28
+ if not candidate:
29
+ return ""
30
+
31
+ parsed = urlparse(candidate)
32
+ if parsed.scheme.lower() not in {"http", "https"}:
33
+ return ""
34
+ if not parsed.netloc:
35
+ return ""
36
+ return candidate
37
+
38
+
39
+ def build_welcome_credentials_email(context: WelcomeCredentialsEmailContext) -> dict:
40
+ recipient_name = _normalize_display_name(context.recipient_name)
41
+ login_email = (context.login_email or "").strip()
42
+ temporary_password = (context.temporary_password or "").strip()
43
+ role = (context.role or "").strip() or "User"
44
+ login_url = _normalize_http_url(context.login_url) or "https://mathpulse.ai"
45
+ brand_avatar_url = _normalize_http_url(context.brand_avatar_url)
46
+ recipient_avatar_url = _normalize_http_url(context.recipient_avatar_url)
47
+
48
+ escaped_name = html.escape(recipient_name)
49
+ escaped_email = html.escape(login_email)
50
+ escaped_password = html.escape(temporary_password)
51
+ escaped_role = html.escape(role)
52
+ escaped_url = html.escape(login_url, quote=True)
53
+ escaped_brand_avatar_url = html.escape(brand_avatar_url, quote=True)
54
+ escaped_recipient_avatar_url = html.escape(recipient_avatar_url, quote=True)
55
+ recipient_initial = html.escape((recipient_name[:1] or "U").upper())
56
+
57
+ if escaped_brand_avatar_url:
58
+ brand_avatar_markup = (
59
+ f'<img src="{escaped_brand_avatar_url}" width="46" height="46" alt="MathPulse avatar" '
60
+ 'style="display:block;width:46px;height:46px;border-radius:50%;background:#ffffff;border:2px solid rgba(255,255,255,0.65);" />'
61
+ )
62
+ else:
63
+ brand_avatar_markup = (
64
+ '<div style="width:46px;height:46px;border-radius:50%;background:#1b1331;color:#f5ebff;'
65
+ 'font-size:16px;font-weight:800;line-height:46px;text-align:center;border:2px solid rgba(255,255,255,0.4);">MP</div>'
66
+ )
67
+
68
+ if escaped_recipient_avatar_url:
69
+ recipient_avatar_markup = (
70
+ f'<img src="{escaped_recipient_avatar_url}" width="54" height="54" alt="Learner avatar" '
71
+ 'style="display:block;width:54px;height:54px;border-radius:50%;background:#1f2937;border:1px solid #49537a;" />'
72
+ )
73
+ else:
74
+ recipient_avatar_markup = (
75
+ '<div style="width:54px;height:54px;border-radius:50%;background:#233e74;color:#f8fafc;'
76
+ f'font-size:22px;font-weight:700;line-height:54px;text-align:center;">{recipient_initial}</div>'
77
+ )
78
+
79
+ html_content = f"""
80
+ <!DOCTYPE html>
81
+ <html lang=\"en\">
82
+ <head>
83
+ <meta charset=\"UTF-8\" />
84
+ <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\" />
85
+ <title>{WELCOME_SUBJECT}</title>
86
+ </head>
87
+ <body style=\"margin:0;padding:0;background:#0f1220;font-family:Segoe UI,Arial,sans-serif;color:#e5e7eb;\">
88
+ <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"padding:24px 12px;background:#0f1220;\">
89
+ <tr>
90
+ <td align=\"center\">
91
+ <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"max-width:640px;background:#181d2f;border-radius:18px;overflow:hidden;border:1px solid #343e62;\">
92
+ <tr>
93
+ <td style=\"background:{ACCENT_COLOR};padding:14px 22px;\">
94
+ <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\">
95
+ <tr>
96
+ <td width=\"52\" valign=\"middle\" style=\"width:52px;\">{brand_avatar_markup}</td>
97
+ <td valign=\"middle\" style=\"padding-left:10px;\">
98
+ <p style=\"margin:0;color:#1f1238;font-size:20px;font-weight:800;line-height:1.15;\">MathPulse AI</p>
99
+ <p style=\"margin:2px 0 0 0;color:#2f1d50;font-size:12px;font-weight:600;line-height:1.4;\">Learning Platform Account Access</p>
100
+ </td>
101
+ </tr>
102
+ </table>
103
+ </td>
104
+ </tr>
105
+ <tr>
106
+ <td style=\"padding:24px;\">
107
+ <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"margin:0 0 14px 0;\">
108
+ <tr>
109
+ <td width=\"62\" valign=\"top\" style=\"width:62px;padding-right:12px;\">{recipient_avatar_markup}</td>
110
+ <td valign=\"top\">
111
+ <p style=\"margin:0 0 8px 0;font-size:16px;color:#f3f4f6;\">Hello {escaped_name},</p>
112
+ <p style=\"margin:0;line-height:1.6;color:#d6daeb;\">Welcome to MathPulse AI. Your account has been created by your administrator. Use the credentials below to sign in and begin your learning journey.</p>
113
+ </td>
114
+ </tr>
115
+ </table>
116
+
117
+ <table role=\"presentation\" width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" style=\"background:#20263b;border:1px solid #445077;border-radius:12px;padding:16px;\">
118
+ <tr><td style=\"padding:4px 0;font-size:14px;color:#e5e7eb;\"><strong>Email:</strong> <span style=\"color:#93c5fd;\">{escaped_email}</span></td></tr>
119
+ <tr><td style=\"padding:4px 0;font-size:14px;color:#e5e7eb;\"><strong>Temporary Password:</strong> {escaped_password}</td></tr>
120
+ <tr><td style=\"padding:4px 0;font-size:14px;color:#e5e7eb;\"><strong>Role:</strong> {escaped_role}</td></tr>
121
+ </table>
122
+
123
+ <table role=\"presentation\" cellspacing=\"0\" cellpadding=\"0\" style=\"margin:20px 0 14px 0;\">
124
+ <tr>
125
+ <td align=\"center\" bgcolor=\"{ACCENT_COLOR}\" style=\"border-radius:10px;\">
126
+ <a href=\"{escaped_url}\" style=\"display:inline-block;padding:12px 20px;color:#1f1238;text-decoration:none;font-weight:700;font-size:14px;\">Log in to MathPulse</a>
127
+ </td>
128
+ </tr>
129
+ </table>
130
+
131
+ <p style=\"margin:0 0 8px 0;font-size:13px;line-height:1.5;color:#c7d2fe;\">Security note: Please change your password after your first login.</p>
132
+ <p style=\"margin:0;font-size:12px;line-height:1.5;color:#a8b3d1;\">If you did not expect this email, please contact your administrator.</p>
133
+ </td>
134
+ </tr>
135
+ </table>
136
+ </td>
137
+ </tr>
138
+ </table>
139
+ </body>
140
+ </html>
141
+ """.strip()
142
+
143
+ text_content = (
144
+ "MathPulse AI\n\n"
145
+ f"Hello {recipient_name},\n\n"
146
+ "Welcome to MathPulse AI. Your account has been created by your administrator.\n\n"
147
+ "Account details:\n"
148
+ f"- Email: {login_email}\n"
149
+ f"- Temporary Password: {temporary_password}\n"
150
+ f"- Role: {role}\n\n"
151
+ f"Log in here: {login_url}\n\n"
152
+ "Security note: Please change your password after your first login.\n\n"
153
+ "If you did not expect this email, please contact your administrator.\n"
154
+ )
155
+
156
+ return {
157
+ "subject": WELCOME_SUBJECT,
158
+ "html": html_content,
159
+ "text": text_content,
160
+ }
services/inference_client.py ADDED
@@ -0,0 +1,1048 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import re
5
+ import random
6
+ from threading import Lock
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any, Dict, List, Optional, Tuple
10
+
11
+ import requests
12
+ import yaml
13
+ from openai import OpenAI, APIError, RateLimitError, APITimeoutError
14
+
15
+ from .ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL, DEEPSEEK_BASE_URL
16
+ from .logging_utils import configure_structured_logging, log_model_call
17
+
18
+ LOGGER = configure_structured_logging("mathpulse.inference")
19
+ TEMP_CHAT_MODEL_OVERRIDE_ENV = "INFERENCE_CHAT_MODEL_TEMP_OVERRIDE"
20
+
21
+ # ── Model Profiles ────────────────────────────────────────────────────────────
22
+ # A profile sets multiple env defaults in one shot.
23
+ # Individual env vars (DEEPSEEK_MODEL, DEEPSEEK_REASONER_MODEL, etc.) still override.
24
+ # Usage: MODEL_PROFILE=dev or MODEL_PROFILE=prod or MODEL_PROFILE=budget
25
+ # Profiles can also be applied at runtime via the admin panel without restart.
26
+
27
+ _MODEL_PROFILES: dict[str, dict[str, str]] = {
28
+ "dev": {
29
+ "INFERENCE_MODEL_ID": CHAT_MODEL,
30
+ "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
31
+ "HF_QUIZ_MODEL_ID": CHAT_MODEL,
32
+ "HF_RAG_MODEL_ID": CHAT_MODEL,
33
+ "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
34
+ },
35
+ "prod": {
36
+ "INFERENCE_MODEL_ID": CHAT_MODEL,
37
+ "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
38
+ "HF_QUIZ_MODEL_ID": CHAT_MODEL,
39
+ "HF_RAG_MODEL_ID": REASONER_MODEL,
40
+ "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
41
+ },
42
+ "budget": {
43
+ "INFERENCE_MODEL_ID": CHAT_MODEL,
44
+ "INFERENCE_CHAT_MODEL_ID": CHAT_MODEL,
45
+ "HF_QUIZ_MODEL_ID": CHAT_MODEL,
46
+ "HF_RAG_MODEL_ID": CHAT_MODEL,
47
+ "INFERENCE_LOCK_MODEL_ID": CHAT_MODEL,
48
+ },
49
+ }
50
+
51
+ # ── Runtime Override Store ────────────────────────────────────────────────────
52
+ # Mutated at runtime by the admin panel via /api/admin/model-config.
53
+ # Priority: above env vars, below INFERENCE_ENFORCE_LOCK_MODEL.
54
+ # Persisted to Firestore so backend cold-restarts restore the last admin-set config.
55
+
56
+ _RUNTIME_OVERRIDES: dict[str, str] = {}
57
+ _RUNTIME_PROFILE: str = ""
58
+
59
+ _FS_COLLECTION = "system_config"
60
+ _FS_DOC = "active_model_config"
61
+
62
+
63
+ def _save_runtime_config_to_firestore() -> None:
64
+ try:
65
+ from firebase_admin import firestore as fs
66
+
67
+ db = fs.client()
68
+ db.collection(_FS_COLLECTION).document(_FS_DOC).set(
69
+ {
70
+ "profile": _RUNTIME_PROFILE,
71
+ "overrides": _RUNTIME_OVERRIDES,
72
+ "updatedAt": fs.SERVER_TIMESTAMP,
73
+ }
74
+ )
75
+ except Exception as e:
76
+ LOGGER.warning("Could not persist model config to Firestore: %s", e)
77
+
78
+
79
+ def _load_runtime_config_from_firestore() -> None:
80
+ try:
81
+ from firebase_admin import firestore as fs
82
+
83
+ db = fs.client()
84
+ doc = db.collection(_FS_COLLECTION).document(_FS_DOC).get()
85
+ if not doc.exists:
86
+ return
87
+ data = doc.to_dict() or {}
88
+ profile = str(data.get("profile", "")).strip().lower()
89
+ overrides = data.get("overrides", {})
90
+ if profile and profile in _MODEL_PROFILES:
91
+ global _RUNTIME_PROFILE
92
+ _RUNTIME_PROFILE = profile
93
+ _RUNTIME_OVERRIDES.clear()
94
+ _RUNTIME_OVERRIDES.update(_MODEL_PROFILES[profile])
95
+ if isinstance(overrides, dict):
96
+ for key, value in overrides.items():
97
+ _RUNTIME_OVERRIDES[str(key)] = str(value)
98
+ LOGGER.info("Restored runtime model config from Firestore: profile=%s", profile)
99
+ except ImportError:
100
+ LOGGER.debug("Firebase not available (optional for DeepSeek-only)")
101
+ except Exception as e:
102
+ LOGGER.warning("Could not restore model config from Firestore: %s", e)
103
+
104
+
105
+ def _apply_model_profile() -> None:
106
+ profile_name = os.getenv("MODEL_PROFILE", "").strip().lower()
107
+ if not profile_name:
108
+ return
109
+ profile = _MODEL_PROFILES.get(profile_name)
110
+ if profile is None:
111
+ LOGGER.warning("MODEL_PROFILE='%s' is not a known profile.", profile_name)
112
+ return
113
+ for key, value in profile.items():
114
+ if not os.environ.get(key):
115
+ os.environ[key] = value
116
+ LOGGER.info("Startup model profile applied: %s", profile_name)
117
+
118
+
119
+ _apply_model_profile()
120
+ _load_runtime_config_from_firestore()
121
+
122
+
123
+ def set_runtime_model_profile(profile_name: str) -> None:
124
+ """Apply a named profile at runtime without restarting the process."""
125
+ global _RUNTIME_PROFILE, _RUNTIME_OVERRIDES
126
+ normalized = profile_name.strip().lower()
127
+ profile = _MODEL_PROFILES.get(normalized)
128
+ if not profile:
129
+ raise ValueError(
130
+ f"Unknown profile: '{profile_name}'. Valid values: {list(_MODEL_PROFILES.keys())}"
131
+ )
132
+ _RUNTIME_PROFILE = normalized
133
+ _RUNTIME_OVERRIDES.clear()
134
+ _RUNTIME_OVERRIDES.update(profile)
135
+ LOGGER.info("Runtime model profile switched to: %s", profile_name)
136
+ _save_runtime_config_to_firestore()
137
+
138
+
139
+ def set_runtime_model_override(key: str, value: str) -> None:
140
+ """Set a single model env key at runtime."""
141
+ _RUNTIME_OVERRIDES[key] = value
142
+ LOGGER.info("Runtime model override set: %s = %s", key, value)
143
+ _save_runtime_config_to_firestore()
144
+
145
+
146
+ def reset_runtime_overrides() -> None:
147
+ """Clear all runtime overrides."""
148
+ global _RUNTIME_PROFILE
149
+ _RUNTIME_OVERRIDES.clear()
150
+ _RUNTIME_PROFILE = ""
151
+ LOGGER.info("Runtime model overrides cleared.")
152
+ _save_runtime_config_to_firestore()
153
+
154
+
155
+ def get_current_runtime_config() -> dict:
156
+ resolved: dict[str, str] = {}
157
+ for key in {
158
+ "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
159
+ "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
160
+ }:
161
+ resolved[key] = _resolve_key(key)
162
+ return {
163
+ "profile": _RUNTIME_PROFILE,
164
+ "overrides": dict(_RUNTIME_OVERRIDES),
165
+ "resolved": resolved,
166
+ }
167
+
168
+
169
+ def _resolve_key(key: str) -> str:
170
+ if value := _RUNTIME_OVERRIDES.get(key):
171
+ return value
172
+ if _RUNTIME_PROFILE and _RUNTIME_PROFILE in _MODEL_PROFILES:
173
+ if value := _MODEL_PROFILES[_RUNTIME_PROFILE].get(key):
174
+ return value
175
+ return os.getenv(key, "")
176
+
177
+
178
+ def get_model_for_task(task_type: str) -> str:
179
+ task = (task_type or "default").strip().lower()
180
+ enforce_lock = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
181
+ if enforce_lock:
182
+ override = (
183
+ _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID")
184
+ or os.getenv("INFERENCE_LOCK_MODEL_ID")
185
+ or CHAT_MODEL
186
+ )
187
+ return override
188
+ task_key_map = {
189
+ "chat": "INFERENCE_CHAT_MODEL_ID",
190
+ "quiz_generation": "HF_QUIZ_MODEL_ID",
191
+ "rag_lesson": "HF_RAG_MODEL_ID",
192
+ "rag_problem": "HF_RAG_MODEL_ID",
193
+ "rag_analysis_context": "HF_RAG_MODEL_ID",
194
+ }
195
+ if env_key := task_key_map.get(task):
196
+ if resolved := _resolve_key(env_key):
197
+ return resolved
198
+ return _resolve_key("INFERENCE_MODEL_ID") or CHAT_MODEL
199
+
200
+
201
+ def model_supports_thinking(model_id: str = "") -> bool:
202
+ mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
203
+ return mid == REASONER_MODEL
204
+
205
+
206
+ def _normalize_local_space_url(raw_url: str) -> str:
207
+ """Accept either hf.space host or huggingface.co/spaces URL for local_space provider."""
208
+ cleaned = (raw_url or "").strip().rstrip("/")
209
+ if not cleaned:
210
+ return "http://127.0.0.1:7860"
211
+
212
+ match = re.match(r"^https?://huggingface\.co/spaces/([^/]+)/([^/]+)$", cleaned, re.IGNORECASE)
213
+ if match:
214
+ owner = match.group(1).strip().lower()
215
+ space = match.group(2).strip().lower()
216
+ return f"https://{owner}-{space}.hf.space"
217
+
218
+ return cleaned
219
+
220
+
221
+ @dataclass
222
+ class InferenceRequest:
223
+ messages: List[Dict[str, str]]
224
+ model: Optional[str] = None
225
+ task_type: str = "default"
226
+ request_tag: str = ""
227
+ max_new_tokens: int = 900
228
+ temperature: float = 0.2
229
+ top_p: float = 0.9
230
+ repetition_penalty: float = 1.15
231
+ timeout_sec: Optional[int] = None
232
+ enable_thinking: bool = False
233
+
234
+
235
+ class InferenceClient:
236
+ def __init__(self, firestore_client: Optional[Any] = None) -> None:
237
+ self.firestore = firestore_client
238
+ self._last_persist_time = 0.0
239
+ self._persist_throttle_sec = 30.0
240
+
241
+ config_paths = [
242
+ Path("./config/models.yaml"),
243
+ Path("/config/models.yaml"),
244
+ Path("/app/config/models.yaml"),
245
+ Path.cwd() / "config" / "models.yaml",
246
+ Path(__file__).resolve().parents[2] / "config" / "models.yaml",
247
+ ]
248
+
249
+ config: Dict[str, object] = {}
250
+ config_path = None
251
+
252
+ for path in config_paths:
253
+ if path.exists():
254
+ config_path = path
255
+ with path.open("r", encoding="utf-8") as fh:
256
+ config = yaml.safe_load(fh) or {}
257
+ LOGGER.info(f"??? Loaded config from {config_path}")
258
+ break
259
+
260
+ if not config_path:
261
+ LOGGER.warning(f"?????? Config file not found. Checked: {[str(p) for p in config_paths]}")
262
+ LOGGER.warning(f" CWD: {Path.cwd()}")
263
+ LOGGER.warning(f" Using hardcoded defaults")
264
+
265
+ primary: Dict[str, object] = {}
266
+ if isinstance(config, dict):
267
+ models_cfg = config.get("models", {})
268
+ if isinstance(models_cfg, dict):
269
+ primary_cfg = models_cfg.get("primary", {})
270
+ if isinstance(primary_cfg, dict):
271
+ primary = primary_cfg
272
+
273
+ self.provider = "deepseek"
274
+ self.ds_api_key = os.getenv("DEEPSEEK_API_KEY", "")
275
+ self.ds_base_url = os.getenv("DEEPSEEK_BASE_URL", DEEPSEEK_BASE_URL)
276
+ self.ds_chat_model = os.getenv("DEEPSEEK_MODEL", CHAT_MODEL)
277
+ self.ds_reasoner_model = os.getenv("DEEPSEEK_REASONER_MODEL", REASONER_MODEL)
278
+
279
+ self.local_space_url = _normalize_local_space_url(
280
+ os.getenv("INFERENCE_LOCAL_SPACE_URL", "http://127.0.0.1:7860")
281
+ )
282
+ self.local_generate_path = os.getenv("INFERENCE_LOCAL_SPACE_GENERATE_PATH", "/gradio_api/call/generate")
283
+
284
+ self.enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
285
+ self.lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", CHAT_MODEL).strip() or CHAT_MODEL
286
+
287
+ default_model_fallback = str(primary.get("id") or CHAT_MODEL)
288
+ env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
289
+ self.default_model = env_model_id or default_model_fallback
290
+
291
+ default_max_tokens = str(primary.get("max_new_tokens") or 512)
292
+ self.default_max_new_tokens = int(os.getenv("INFERENCE_MAX_NEW_TOKENS", default_max_tokens))
293
+
294
+ default_temp = str(primary.get("temperature") or 0.2)
295
+ self.default_temperature = float(os.getenv("INFERENCE_TEMPERATURE", default_temp))
296
+
297
+ default_top_p = str(primary.get("top_p") or 0.9)
298
+ self.default_top_p = float(os.getenv("INFERENCE_TOP_P", default_top_p))
299
+
300
+ self.chat_model_override = os.getenv("INFERENCE_CHAT_MODEL_ID", "").strip()
301
+ self.chat_model_temp_override = os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
302
+ self.chat_strict_model_only = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
303
+
304
+ self.ds_timeout_sec = int(os.getenv("INFERENCE_HF_TIMEOUT_SEC", "90"))
305
+ self.local_timeout_sec = int(os.getenv("INFERENCE_LOCAL_SPACE_TIMEOUT_SEC", "90"))
306
+ self.max_retries = int(os.getenv("INFERENCE_MAX_RETRIES", "3"))
307
+ self.backoff_sec = float(os.getenv("INFERENCE_BACKOFF_SEC", "1.5"))
308
+ self.interactive_timeout_sec = int(os.getenv("INFERENCE_INTERACTIVE_TIMEOUT_SEC", str(self.ds_timeout_sec)))
309
+ self.background_timeout_sec = int(os.getenv("INFERENCE_BACKGROUND_TIMEOUT_SEC", str(self.ds_timeout_sec)))
310
+ self.interactive_max_retries = int(os.getenv("INFERENCE_INTERACTIVE_MAX_RETRIES", str(self.max_retries)))
311
+ self.background_max_retries = int(os.getenv("INFERENCE_BACKGROUND_MAX_RETRIES", str(self.max_retries)))
312
+ self.interactive_backoff_sec = float(os.getenv("INFERENCE_INTERACTIVE_BACKOFF_SEC", str(self.backoff_sec)))
313
+ self.background_backoff_sec = float(os.getenv("INFERENCE_BACKGROUND_BACKOFF_SEC", str(self.backoff_sec)))
314
+
315
+ fallback_raw = os.getenv("INFERENCE_FALLBACK_MODELS", "")
316
+ self.fallback_models = [v.strip() for v in fallback_raw.split(",") if v.strip()]
317
+
318
+ gpu_tasks_raw = os.getenv(
319
+ "INFERENCE_GPU_REQUIRED_TASKS",
320
+ "chat,quiz_generation,lesson_generation,learning_path,verify_solution,variant_generation,eval_generation",
321
+ )
322
+ self.gpu_required_tasks = {v.strip().lower() for v in gpu_tasks_raw.split(",") if v.strip()}
323
+
324
+ cpu_tasks_raw = os.getenv(
325
+ "INFERENCE_CPU_ONLY_TASKS",
326
+ "risk_classification,analytics_aggregation,file_parsing,auth,default_cpu",
327
+ )
328
+ self.cpu_only_tasks = {v.strip().lower() for v in cpu_tasks_raw.split(",") if v.strip()}
329
+
330
+ interactive_tasks_raw = os.getenv(
331
+ "INFERENCE_INTERACTIVE_TASKS",
332
+ "chat,verify_solution,daily_insight",
333
+ )
334
+ self.interactive_tasks = {v.strip().lower() for v in interactive_tasks_raw.split(",") if v.strip()}
335
+ self.interactive_max_fallback_depth = max(
336
+ 0,
337
+ int(os.getenv("INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH", "1")),
338
+ )
339
+
340
+ # Default task-to-model routing.
341
+ self.task_model_map: Dict[str, str] = {
342
+ "chat": CHAT_MODEL,
343
+ "verify_solution": CHAT_MODEL,
344
+ "lesson_generation": CHAT_MODEL,
345
+ "quiz_generation": CHAT_MODEL,
346
+ "learning_path": CHAT_MODEL,
347
+ "daily_insight": CHAT_MODEL,
348
+ "risk_classification": CHAT_MODEL,
349
+ "risk_narrative": CHAT_MODEL,
350
+ }
351
+ self.task_fallback_model_map: Dict[str, List[str]] = {
352
+ "chat": [CHAT_MODEL],
353
+ "verify_solution": [CHAT_MODEL],
354
+ }
355
+ self.model_provider_map: Dict[str, str] = {}
356
+ self.task_provider_map: Dict[str, str] = {}
357
+ if isinstance(config, dict):
358
+ routing_cfg = config.get("routing", {})
359
+ if isinstance(routing_cfg, dict):
360
+ task_models = routing_cfg.get("task_model_map", {})
361
+ if isinstance(task_models, dict):
362
+ config_task_models = {
363
+ str(task).strip().lower(): str(model).strip()
364
+ for task, model in task_models.items()
365
+ if str(task).strip() and str(model).strip()
366
+ }
367
+ self.task_model_map.update(config_task_models)
368
+ task_fallback_models = routing_cfg.get("task_fallback_model_map", {})
369
+ if isinstance(task_fallback_models, dict):
370
+ parsed: Dict[str, List[str]] = {}
371
+ for task, models in task_fallback_models.items():
372
+ task_key = str(task).strip().lower()
373
+ if not task_key:
374
+ continue
375
+ if isinstance(models, list):
376
+ cleaned = [str(m).strip() for m in models if str(m).strip()]
377
+ if cleaned:
378
+ parsed[task_key] = cleaned
379
+ elif isinstance(models, str):
380
+ cleaned = [v.strip() for v in models.split(",") if v.strip()]
381
+ if cleaned:
382
+ parsed[task_key] = cleaned
383
+ self.task_fallback_model_map = parsed
384
+ task_providers = routing_cfg.get("task_provider_map", {})
385
+ if isinstance(task_providers, dict):
386
+ self.task_provider_map = {
387
+ str(task).strip().lower(): str(provider).strip().lower()
388
+ for task, provider in task_providers.items()
389
+ if str(task).strip() and str(provider).strip()
390
+ }
391
+
392
+ # Override all task model mappings with INFERENCE_MODEL_ID env var if set.
393
+ if env_model_id:
394
+ original_map = dict(self.task_model_map)
395
+ for task_key in list(self.task_model_map.keys()):
396
+ self.task_model_map[task_key] = env_model_id
397
+ LOGGER.info(
398
+ f"???? INFERENCE_MODEL_ID env var override applied: {env_model_id}"
399
+ )
400
+ LOGGER.info(
401
+ f" Task model mappings changed from: {original_map}"
402
+ )
403
+ env_override_note = " (env override active)"
404
+ else:
405
+ env_override_note = ""
406
+
407
+ if self.enforce_lock_model:
408
+ lock_map_before = dict(self.task_model_map)
409
+ self.default_model = self.lock_model_id
410
+ for task_key in list(self.task_model_map.keys()):
411
+ self.task_model_map[task_key] = self.lock_model_id
412
+ self.fallback_models = []
413
+ self.task_fallback_model_map = {
414
+ task_key: [] for task_key in self.task_model_map.keys()
415
+ }
416
+ LOGGER.info(f"???? INFERENCE_ENFORCE_LOCK_MODEL enabled: locking all inference tasks to {self.lock_model_id}")
417
+ LOGGER.info(f" Cleared fallback models")
418
+ LOGGER.info(f" Task model mappings forced from: {lock_map_before}")
419
+
420
+ config_status = "from file" if config_path else "hardcoded defaults (no config file found)"
421
+ effective_chat_model_for_logs = self.chat_model_override or self.task_model_map.get("chat", self.default_model)
422
+ LOGGER.info(f"??? InferenceClient initialized {config_status}{env_override_note}")
423
+ LOGGER.info(f" Default model: {self.default_model}")
424
+ LOGGER.info(f" Chat model: {effective_chat_model_for_logs}")
425
+ LOGGER.info(f" Chat temp override ({TEMP_CHAT_MODEL_OVERRIDE_ENV}): {self.chat_model_temp_override or 'disabled'}")
426
+ LOGGER.info(f" Chat strict model lock: {self.chat_strict_model_only}")
427
+ LOGGER.info(f" Global model lock: {self.enforce_lock_model}")
428
+ LOGGER.info(f" Verify solution model: {self.task_model_map.get('verify_solution', self.default_model)}")
429
+ LOGGER.info(f" Full task_model_map: {self.task_model_map}")
430
+
431
+ self._metrics_started_at = time.time()
432
+ self._metrics_lock = Lock()
433
+ self._metrics: Dict[str, Any] = {
434
+ "requests_total": 0,
435
+ "requests_ok": 0,
436
+ "requests_error": 0,
437
+ "retries_total": 0,
438
+ "fallback_attempts": 0,
439
+ "latency_sum_ms": 0.0,
440
+ "latency_count": 0,
441
+ "route_counts": {},
442
+ "task_counts": {},
443
+ "provider_counts": {},
444
+ "status_code_counts": {},
445
+ }
446
+
447
+ self._load_persistent_metrics()
448
+
449
+ def _bump_metric(self, key: str, inc: int = 1) -> None:
450
+ with self._metrics_lock:
451
+ current = self._metrics.get(key) or 0
452
+ if not isinstance(current, int):
453
+ current = 0
454
+ self._metrics[key] = current + inc
455
+ self._persist_metrics()
456
+
457
+ def _bump_bucket(self, key: str, bucket: str, inc: int = 1) -> None:
458
+ with self._metrics_lock:
459
+ mapping = self._metrics.get(key)
460
+ if not isinstance(mapping, dict):
461
+ mapping = {}
462
+ self._metrics[key] = mapping
463
+ current = mapping.get(bucket) or 0
464
+ if not isinstance(current, int):
465
+ current = 0
466
+ mapping[bucket] = current + inc
467
+ self._persist_metrics()
468
+
469
+ def _record_completion(self, *, latency_ms: float) -> None:
470
+ with self._metrics_lock:
471
+ self._metrics["latency_sum_ms"] = (self._metrics.get("latency_sum_ms") or 0.0) + latency_ms
472
+ self._metrics["latency_count"] = (self._metrics.get("latency_count") or 0) + 1
473
+ self._persist_metrics()
474
+
475
+ def _load_persistent_metrics(self) -> None:
476
+ if not self.firestore:
477
+ return
478
+ try:
479
+ doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
480
+ doc = doc_ref.get()
481
+ if doc.exists:
482
+ data = doc.to_dict() or {}
483
+ with self._metrics_lock:
484
+ for k, v in data.items():
485
+ if k in self._metrics:
486
+ if isinstance(v, (int, float)):
487
+ self._metrics[k] = v
488
+ elif isinstance(v, dict) and isinstance(self._metrics[k], dict):
489
+ self._metrics[k].update(v)
490
+ LOGGER.info("??? Persistent inference metrics loaded from Firestore")
491
+ except Exception as e:
492
+ LOGGER.warning(f"?????? Failed to load persistent metrics: {e}")
493
+
494
+ def _persist_metrics(self, force: bool = False) -> None:
495
+ if not self.firestore:
496
+ return
497
+
498
+ now = time.time()
499
+ if not force and (now - self._last_persist_time < self._persist_throttle_sec):
500
+ return
501
+
502
+ try:
503
+ self._last_persist_time = now
504
+ doc_ref = self.firestore.collection("system_metrics").document("inference_stats")
505
+ with self._metrics_lock:
506
+ snapshot = dict(self._metrics)
507
+
508
+ doc_ref.set(snapshot, merge=True)
509
+ except Exception as e:
510
+ LOGGER.warning(f"?????? Failed to persist metrics: {e}")
511
+
512
+ def _record_attempt(self, *, task_type: str, provider: str, route: str, fallback_depth: int) -> None:
513
+ self._bump_metric("requests_total", 1)
514
+ self._bump_bucket("task_counts", (task_type or "default").strip().lower(), 1)
515
+ self._bump_bucket("provider_counts", provider, 1)
516
+ self._bump_bucket("route_counts", route, 1)
517
+ if fallback_depth > 0:
518
+ self._bump_metric("fallback_attempts", 1)
519
+
520
+ def snapshot_metrics(self) -> Dict[str, Any]:
521
+ with self._metrics_lock:
522
+ l_sum = self._metrics.get("latency_sum_ms") or 0.0
523
+ l_count = self._metrics.get("latency_count") or 0
524
+ avg_latency = round(l_sum / l_count, 2) if l_count > 0 else 0.0
525
+
526
+ snapshot = {
527
+ "uptime_sec": round(max(0.0, time.time() - self._metrics_started_at), 2),
528
+ "requests_total": self._metrics.get("requests_total") or 0,
529
+ "requests_ok": self._metrics.get("requests_ok") or 0,
530
+ "requests_error": self._metrics.get("requests_error") or 0,
531
+ "retries_total": self._metrics.get("retries_total") or 0,
532
+ "fallback_attempts": self._metrics.get("fallback_attempts") or 0,
533
+ "avg_latency_ms": avg_latency,
534
+ "active_model": self.default_model,
535
+ "primary_provider": self.provider,
536
+ "route_counts": dict(self._metrics.get("route_counts") or {}),
537
+ "task_counts": dict(self._metrics.get("task_counts") or {}),
538
+ "provider_counts": dict(self._metrics.get("provider_counts") or {}),
539
+ "status_code_counts": dict(self._metrics.get("status_code_counts") or {}),
540
+ }
541
+ return snapshot
542
+
543
+ def generate_from_messages(self, req: InferenceRequest) -> str:
544
+ effective_task = (req.task_type or "default").strip().lower()
545
+ request_tag = req.request_tag.strip() or f"{effective_task}-{int(time.time() * 1000)}"
546
+ selected_model, model_selection_source = self._resolve_primary_model(req)
547
+
548
+ model_chain = self._model_chain_for_task(effective_task, selected_model)
549
+ last_error: Optional[Exception] = None
550
+
551
+ model_base = selected_model
552
+
553
+ LOGGER.info(
554
+ f"???? request_tag={request_tag} task={effective_task} source={model_selection_source} "
555
+ f"selected_model={model_base} (primary)"
556
+ )
557
+ LOGGER.info(f" fallback_chain={model_chain[1:] if len(model_chain) > 1 else 'none'}")
558
+
559
+ for fallback_depth, model_name in enumerate(model_chain):
560
+ request_for_model = InferenceRequest(
561
+ messages=req.messages,
562
+ model=model_name,
563
+ task_type=req.task_type,
564
+ request_tag=request_tag,
565
+ max_new_tokens=req.max_new_tokens or self.default_max_new_tokens,
566
+ temperature=req.temperature if req.temperature is not None else self.default_temperature,
567
+ top_p=req.top_p if req.top_p is not None else self.default_top_p,
568
+ repetition_penalty=req.repetition_penalty,
569
+ timeout_sec=req.timeout_sec,
570
+ )
571
+
572
+ try:
573
+ result = self._call_deepseek(request_for_model, fallback_depth)
574
+ if fallback_depth > 0:
575
+ LOGGER.info(f"??? Fallback succeeded at depth={fallback_depth} model={model_name}")
576
+ return result
577
+ except Exception as exc:
578
+ last_error = exc
579
+ fallback_hint = f" (depth {fallback_depth})" if fallback_depth > 0 else ""
580
+ LOGGER.warning(
581
+ f"?????? Attempt failed{fallback_hint}: task={request_for_model.task_type} "
582
+ f"model={model_name} error={exc.__class__.__name__}: {str(exc)[:100]}"
583
+ )
584
+
585
+ if last_error:
586
+ raise last_error
587
+ raise RuntimeError("Inference failed with empty model chain")
588
+
589
+ def _runtime_chat_model_override(self) -> str:
590
+ return os.getenv(TEMP_CHAT_MODEL_OVERRIDE_ENV, "").strip()
591
+
592
+ def _resolve_primary_model(self, req: InferenceRequest) -> Tuple[str, str]:
593
+ effective_task = (req.task_type or "default").strip().lower()
594
+ runtime_chat_override = self._runtime_chat_model_override()
595
+
596
+ if effective_task == "chat" and runtime_chat_override:
597
+ selected_model = runtime_chat_override
598
+ model_selection_source = "chat_temp_override_env"
599
+ elif req.model:
600
+ selected_model = req.model
601
+ model_selection_source = "explicit_request"
602
+ elif effective_task == "chat" and self.chat_model_override:
603
+ selected_model = self.chat_model_override
604
+ model_selection_source = "chat_override_env"
605
+ else:
606
+ selected_model = self.task_model_map.get(effective_task, self.default_model)
607
+ model_selection_source = "task_map"
608
+
609
+ if self.enforce_lock_model:
610
+ effective_lock_model_id = self.lock_model_id
611
+ if effective_task == "chat":
612
+ effective_lock_model_id = runtime_chat_override or self.chat_model_override or self.lock_model_id
613
+
614
+ selected_base = (selected_model or "").split(":", 1)[0].strip()
615
+ lock_base = (effective_lock_model_id or "").split(":", 1)[0].strip()
616
+ if selected_base != lock_base:
617
+ LOGGER.warning(
618
+ f"?????? Model lock replaced requested model {selected_model} with {effective_lock_model_id}"
619
+ )
620
+ selected_model = effective_lock_model_id
621
+ model_selection_source = f"{model_selection_source}:model_lock"
622
+
623
+ if effective_task == "chat" and self.chat_strict_model_only:
624
+ return selected_model, f"{model_selection_source}:chat_strict_model_only"
625
+
626
+ return selected_model, model_selection_source
627
+
628
+ def _model_chain_for_task(self, task_type: str, selected_model: str) -> List[str]:
629
+ normalized = (task_type or "default").strip().lower()
630
+ runtime_chat_override = self._runtime_chat_model_override() if normalized == "chat" else ""
631
+ chat_lock_model_id = runtime_chat_override or (self.chat_model_override if normalized == "chat" else "")
632
+
633
+ if self.enforce_lock_model:
634
+ if normalized == "chat":
635
+ locked_model = (chat_lock_model_id or self.lock_model_id or "").strip()
636
+ else:
637
+ locked_model = (self.lock_model_id or "").strip()
638
+ return [locked_model] if locked_model else []
639
+
640
+ if normalized == "chat" and self.chat_strict_model_only:
641
+ chat_model = (chat_lock_model_id or selected_model or "").strip()
642
+ return [chat_model] if chat_model else []
643
+
644
+ per_task_candidates = self.task_fallback_model_map.get(task_type, [])
645
+ combined = [selected_model] + per_task_candidates + self.fallback_models
646
+
647
+ deduped: List[str] = []
648
+ seen = set()
649
+ for model_id in combined:
650
+ model_name = (model_id or "").strip()
651
+ if not model_name or model_name in seen:
652
+ continue
653
+ seen.add(model_name)
654
+ deduped.append(model_name)
655
+
656
+ if normalized in self.interactive_tasks:
657
+ max_models = 1 + self.interactive_max_fallback_depth
658
+ return deduped[:max_models]
659
+ return deduped
660
+
661
+ def _retry_profile(self, task_type: str) -> Tuple[int, float]:
662
+ normalized = (task_type or "default").strip().lower()
663
+ if normalized in self.interactive_tasks:
664
+ return self.interactive_max_retries, self.interactive_backoff_sec
665
+ return self.background_max_retries, self.background_backoff_sec
666
+
667
+ def _timeout_for(self, req: InferenceRequest, provider: str) -> int:
668
+ if req.timeout_sec:
669
+ return req.timeout_sec
670
+ if provider == "local_space":
671
+ return self.local_timeout_sec
672
+ normalized = (req.task_type or "default").strip().lower()
673
+ if normalized in self.interactive_tasks:
674
+ return self.interactive_timeout_sec
675
+ return self.background_timeout_sec
676
+
677
+ def _messages_to_prompt(self, messages: List[Dict[str, str]]) -> str:
678
+ parts: List[str] = []
679
+ for msg in messages:
680
+ role = (msg.get("role") or "user").strip().lower()
681
+ content = (msg.get("content") or "").strip()
682
+ if not content or role in {"tool", "function"}:
683
+ continue
684
+ prefix = "USER"
685
+ if role == "system":
686
+ prefix = "SYSTEM"
687
+ elif role == "assistant":
688
+ prefix = "ASSISTANT"
689
+ parts.append(f"{prefix}:\n{content}")
690
+ parts.append("ASSISTANT:")
691
+ return "\n\n".join(parts)
692
+
693
+ def _latest_user_message(self, messages: List[Dict[str, str]]) -> str:
694
+ for msg in reversed(messages):
695
+ role = (msg.get("role") or "").strip().lower()
696
+ content = (msg.get("content") or "").strip()
697
+ if role == "user" and content:
698
+ return content
699
+ return self._messages_to_prompt(messages)
700
+
701
+ def _call_deepseek(self, req: InferenceRequest, fallback_depth: int) -> str:
702
+ """Call DeepSeek API with OpenAI-compatible chat completions."""
703
+ if not self.ds_api_key:
704
+ raise RuntimeError("DEEPSEEK_API_KEY is not set")
705
+
706
+ target_model = req.model or self.default_model
707
+ route = "deepseek"
708
+ task_type = req.task_type or "default"
709
+
710
+ LOGGER.debug(
711
+ f"???? Calling DeepSeek: task={task_type} model={target_model} "
712
+ f"route={route} depth={fallback_depth}"
713
+ )
714
+
715
+ timeout = self._timeout_for(req, "deepseek")
716
+ max_retries, backoff_sec = self._retry_profile(task_type)
717
+
718
+ client = get_deepseek_client()
719
+
720
+ # Build chat completions params
721
+ params: Dict[str, Any] = {
722
+ "model": target_model,
723
+ "messages": req.messages,
724
+ "max_tokens": req.max_new_tokens or self.default_max_new_tokens,
725
+ }
726
+
727
+ if target_model == REASONER_MODEL:
728
+ params["max_tokens"] = req.max_new_tokens or 1024
729
+ else:
730
+ params["temperature"] = req.temperature
731
+ params["top_p"] = req.top_p
732
+
733
+ # Use JSON mode for quiz generation
734
+ if task_type == "quiz_generation" and target_model != REASONER_MODEL:
735
+ params["response_format"] = {"type": "json_object"}
736
+
737
+ for attempt in range(max_retries):
738
+ self._record_attempt(
739
+ task_type=task_type,
740
+ provider="deepseek",
741
+ route=route,
742
+ fallback_depth=fallback_depth,
743
+ )
744
+ start = time.perf_counter()
745
+ try:
746
+ response = client.chat.completions.create(**params, timeout=timeout)
747
+ latency_ms = (time.perf_counter() - start) * 1000
748
+
749
+ content = response.choices[0].message.content or ""
750
+ reasoning = getattr(response.choices[0].message, "reasoning_content", None)
751
+
752
+ text = content.strip()
753
+ if reasoning:
754
+ text = f"{reasoning}\n{text}"
755
+
756
+ log_model_call(
757
+ LOGGER,
758
+ provider="deepseek",
759
+ model=target_model,
760
+ endpoint=self.ds_base_url,
761
+ latency_ms=latency_ms,
762
+ input_tokens=None,
763
+ output_tokens=None,
764
+ status="ok",
765
+ task_type=task_type,
766
+ request_tag=req.request_tag,
767
+ retry_attempt=attempt + 1,
768
+ fallback_depth=fallback_depth,
769
+ route=route,
770
+ )
771
+ self._record_attempt(
772
+ task_type=task_type,
773
+ provider="deepseek",
774
+ route=route,
775
+ fallback_depth=fallback_depth,
776
+ )
777
+ self._record_completion(latency_ms=latency_ms)
778
+ self._bump_metric("requests_ok", 1)
779
+ return text
780
+
781
+ except RateLimitError:
782
+ latency_ms = (time.perf_counter() - start) * 1000
783
+ if attempt < max_retries - 1:
784
+ log_model_call(
785
+ LOGGER,
786
+ provider="deepseek",
787
+ model=target_model,
788
+ endpoint=self.ds_base_url,
789
+ latency_ms=latency_ms,
790
+ input_tokens=None,
791
+ output_tokens=None,
792
+ status="error",
793
+ error_class="RateLimitError",
794
+ error_message="rate limited",
795
+ task_type=task_type,
796
+ request_tag=req.request_tag,
797
+ retry_attempt=attempt + 1,
798
+ fallback_depth=fallback_depth,
799
+ route=route,
800
+ )
801
+ self._bump_metric("retries_total", 1)
802
+ time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
803
+ continue
804
+ self._bump_metric("requests_error", 1)
805
+ raise RuntimeError("DeepSeek API rate limit reached. Please try again shortly.")
806
+
807
+ except APITimeoutError:
808
+ latency_ms = (time.perf_counter() - start) * 1000
809
+ if attempt < max_retries - 1:
810
+ log_model_call(
811
+ LOGGER,
812
+ provider="deepseek",
813
+ model=target_model,
814
+ endpoint=self.ds_base_url,
815
+ latency_ms=latency_ms,
816
+ input_tokens=None,
817
+ output_tokens=None,
818
+ status="error",
819
+ error_class="APITimeoutError",
820
+ error_message="timeout",
821
+ task_type=task_type,
822
+ request_tag=req.request_tag,
823
+ retry_attempt=attempt + 1,
824
+ fallback_depth=fallback_depth,
825
+ route=route,
826
+ )
827
+ self._bump_metric("retries_total", 1)
828
+ time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
829
+ continue
830
+ self._bump_metric("requests_error", 1)
831
+ raise RuntimeError("DeepSeek API timed out. Please retry.")
832
+
833
+ except APIError as e:
834
+ latency_ms = (time.perf_counter() - start) * 1000
835
+ if attempt < max_retries - 1:
836
+ log_model_call(
837
+ LOGGER,
838
+ provider="deepseek",
839
+ model=target_model,
840
+ endpoint=self.ds_base_url,
841
+ latency_ms=latency_ms,
842
+ input_tokens=None,
843
+ output_tokens=None,
844
+ status="error",
845
+ error_class="APIError",
846
+ error_message=str(e)[:200],
847
+ task_type=task_type,
848
+ request_tag=req.request_tag,
849
+ retry_attempt=attempt + 1,
850
+ fallback_depth=fallback_depth,
851
+ route=route,
852
+ )
853
+ self._bump_metric("retries_total", 1)
854
+ time.sleep(backoff_sec * (attempt + 1) * random.uniform(0.9, 1.2))
855
+ continue
856
+ self._bump_metric("requests_error", 1)
857
+ raise RuntimeError(f"DeepSeek API error: {str(e)}")
858
+
859
+ except Exception as exc:
860
+ latency_ms = (time.perf_counter() - start) * 1000
861
+ self._bump_metric("requests_error", 1)
862
+ log_model_call(
863
+ LOGGER,
864
+ provider="deepseek",
865
+ model=target_model,
866
+ endpoint=self.ds_base_url,
867
+ latency_ms=latency_ms,
868
+ input_tokens=None,
869
+ output_tokens=None,
870
+ status="error",
871
+ error_class=exc.__class__.__name__,
872
+ error_message=str(exc)[:200],
873
+ task_type=task_type,
874
+ request_tag=req.request_tag,
875
+ retry_attempt=attempt + 1,
876
+ fallback_depth=fallback_depth,
877
+ route=route,
878
+ )
879
+ raise
880
+
881
+ raise RuntimeError(f"DeepSeek call failed after {max_retries} attempts")
882
+
883
+ def _call_local_space(self, req: InferenceRequest, *, provider: str, route: str, fallback_depth: int) -> str:
884
+ target_model = req.model or self.default_model
885
+ url = f"{self.local_space_url.rstrip('/')}{self.local_generate_path}"
886
+
887
+ prompt = self._messages_to_prompt(req.messages)
888
+ payload: Dict[str, object] = {
889
+ "data": [
890
+ prompt,
891
+ [],
892
+ req.temperature,
893
+ req.top_p,
894
+ req.max_new_tokens,
895
+ ]
896
+ }
897
+ headers = {"Content-Type": "application/json"}
898
+
899
+ timeout = self._timeout_for(req, provider)
900
+
901
+ self._record_attempt(
902
+ task_type=req.task_type,
903
+ provider=provider,
904
+ route=route,
905
+ fallback_depth=fallback_depth,
906
+ )
907
+ start = time.perf_counter()
908
+
909
+ try:
910
+ resp = requests.post(url, headers=headers, json=payload, timeout=timeout)
911
+ except Exception as exc:
912
+ latency_ms = (time.perf_counter() - start) * 1000
913
+ log_model_call(
914
+ LOGGER,
915
+ provider=provider,
916
+ model=target_model,
917
+ endpoint=url,
918
+ latency_ms=latency_ms,
919
+ input_tokens=None,
920
+ output_tokens=None,
921
+ status="error",
922
+ error_class=exc.__class__.__name__,
923
+ error_message=str(exc),
924
+ task_type=req.task_type,
925
+ request_tag=req.request_tag,
926
+ retry_attempt=1,
927
+ fallback_depth=fallback_depth,
928
+ route=route,
929
+ )
930
+ self._bump_metric("requests_error", 1)
931
+ raise
932
+
933
+ latency_ms = (time.perf_counter() - start) * 1000
934
+ self._bump_bucket("status_code_counts", str(resp.status_code), 1)
935
+
936
+ if resp.status_code != 200:
937
+ self._bump_metric("requests_error", 1)
938
+ raise RuntimeError(f"Local Space error {resp.status_code}: {resp.text}")
939
+
940
+ data = resp.json()
941
+ event_id = data.get("event_id")
942
+ if not event_id:
943
+ return self._extract_text(data)
944
+
945
+ result_url = f"{self.local_space_url.rstrip('/')}/gradio_api/call/generate/{event_id}"
946
+ result_resp = requests.get(result_url, timeout=req.timeout_sec or self.local_timeout_sec)
947
+ if result_resp.status_code != 200:
948
+ raise RuntimeError(f"Local Space result error {result_resp.status_code}: {result_resp.text}")
949
+
950
+ line_data = None
951
+ for line in result_resp.text.splitlines():
952
+ if line.startswith("data:"):
953
+ line_data = line.split("data:", 1)[1].strip()
954
+
955
+ if not line_data:
956
+ raise RuntimeError("Local Space result stream missing data")
957
+
958
+ parsed = json.loads(line_data)
959
+ output_payload = parsed if isinstance(parsed, dict) else {"data": parsed}
960
+ text = self._extract_text(output_payload)
961
+ log_model_call(
962
+ LOGGER,
963
+ provider=provider,
964
+ model=target_model,
965
+ endpoint=url,
966
+ latency_ms=latency_ms,
967
+ input_tokens=None,
968
+ output_tokens=None,
969
+ status="ok",
970
+ task_type=req.task_type,
971
+ request_tag=req.request_tag,
972
+ retry_attempt=1,
973
+ fallback_depth=fallback_depth,
974
+ route=route,
975
+ )
976
+ self._bump_metric("requests_ok", 1)
977
+ return text
978
+
979
+ def _extract_text(self, data: object) -> str:
980
+ """Extract clean text from inference response, stripping JSON artifacts."""
981
+ if isinstance(data, list) and data:
982
+ first = data[0]
983
+ if isinstance(first, dict):
984
+ val = (first.get("generated_text") or "").strip()
985
+ if val:
986
+ return self._clean_response_text(val)
987
+
988
+ if isinstance(data, dict):
989
+ direct = (data.get("generated_text") or "").strip()
990
+ if direct:
991
+ return self._clean_response_text(direct)
992
+
993
+ choices = data.get("choices", [])
994
+ if choices:
995
+ message = choices[0].get("message", {})
996
+ msg = (message.get("content") or "").strip()
997
+ if msg:
998
+ return self._clean_response_text(msg)
999
+ reasoning = (message.get("reasoning") or "").strip()
1000
+ if reasoning:
1001
+ return self._clean_response_text(reasoning)
1002
+
1003
+ generic_data = data.get("data")
1004
+ if isinstance(generic_data, list) and generic_data:
1005
+ first = generic_data[0]
1006
+ if isinstance(first, str) and first.strip():
1007
+ return self._clean_response_text(first.strip())
1008
+
1009
+ raise RuntimeError(f"Unexpected inference response format: {data}")
1010
+
1011
+ def _clean_response_text(self, text: str) -> str:
1012
+ """Strip JSON braces, template artifacts, and whitespace from response text."""
1013
+ text = text.strip()
1014
+
1015
+ if text.startswith("{") and text.endswith("}"):
1016
+ try:
1017
+ parsed = json.loads(text)
1018
+ if isinstance(parsed, dict):
1019
+ if "content" in parsed:
1020
+ text = str(parsed["content"]).strip()
1021
+ elif "text" in parsed:
1022
+ text = str(parsed["text"]).strip()
1023
+ except json.JSONDecodeError:
1024
+ text = text.strip("{}")
1025
+
1026
+ if text.startswith("```json") or text.startswith("```"):
1027
+ text = re.sub(r"^```(?:json)?", "", text).strip()
1028
+ if text.endswith("```"):
1029
+ text = text[:-3].strip()
1030
+
1031
+ return text.strip()
1032
+
1033
+
1034
+ def create_default_client(firestore_client: Optional[Any] = None) -> InferenceClient:
1035
+ return InferenceClient(firestore_client=firestore_client)
1036
+
1037
+
1038
+ def is_sequential_model(model_id: str = "") -> bool:
1039
+ mid = (model_id or os.getenv("INFERENCE_MODEL_ID") or "").strip()
1040
+ if not mid:
1041
+ return False
1042
+ if mid == REASONER_MODEL:
1043
+ return True
1044
+ if _RUNTIME_OVERRIDES:
1045
+ lock = _RUNTIME_OVERRIDES.get("INFERENCE_LOCK_MODEL_ID", "")
1046
+ if lock == REASONER_MODEL:
1047
+ return True
1048
+ return False
services/logging_utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ from datetime import datetime, timezone
4
+ from typing import Any, Dict, Optional
5
+
6
+
7
+ def configure_structured_logging(name: str) -> logging.Logger:
8
+ logger = logging.getLogger(name)
9
+ if logger.handlers:
10
+ return logger
11
+
12
+ logger.setLevel(logging.INFO)
13
+ handler = logging.StreamHandler()
14
+ formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s")
15
+ handler.setFormatter(formatter)
16
+ logger.addHandler(handler)
17
+ logger.propagate = False
18
+ return logger
19
+
20
+
21
+ def _safe_json(payload: Dict[str, Any]) -> str:
22
+ return json.dumps(payload, ensure_ascii=True, default=str)
23
+
24
+
25
+ def log_model_call(
26
+ logger: logging.Logger,
27
+ *,
28
+ provider: str,
29
+ model: str,
30
+ endpoint: str,
31
+ latency_ms: float,
32
+ input_tokens: Optional[int],
33
+ output_tokens: Optional[int],
34
+ status: str,
35
+ error_class: Optional[str] = None,
36
+ error_message: Optional[str] = None,
37
+ task_type: Optional[str] = None,
38
+ request_tag: Optional[str] = None,
39
+ retry_attempt: Optional[int] = None,
40
+ fallback_depth: Optional[int] = None,
41
+ route: Optional[str] = None,
42
+ ) -> None:
43
+ payload = {
44
+ "ts": datetime.now(timezone.utc).isoformat(),
45
+ "event": "model_call",
46
+ "provider": provider,
47
+ "model": model,
48
+ "endpoint": endpoint,
49
+ "latency_ms": round(latency_ms, 2),
50
+ "input_tokens": input_tokens,
51
+ "output_tokens": output_tokens,
52
+ "status": status,
53
+ "error_class": error_class,
54
+ "error_message": error_message,
55
+ "task_type": task_type,
56
+ "request_tag": request_tag,
57
+ "retry_attempt": retry_attempt,
58
+ "fallback_depth": fallback_depth,
59
+ "route": route,
60
+ }
61
+ if status == "ok":
62
+ logger.info(_safe_json(payload))
63
+ else:
64
+ logger.error(_safe_json(payload))
65
+
66
+
67
+ def log_job_metric(
68
+ logger: logging.Logger,
69
+ *,
70
+ job_name: str,
71
+ run_id: str,
72
+ metric_name: str,
73
+ metric_value: Any,
74
+ extras: Optional[Dict[str, Any]] = None,
75
+ ) -> None:
76
+ payload: Dict[str, Any] = {
77
+ "ts": datetime.now(timezone.utc).isoformat(),
78
+ "event": "job_metric",
79
+ "job_name": job_name,
80
+ "run_id": run_id,
81
+ "metric_name": metric_name,
82
+ "metric_value": metric_value,
83
+ }
84
+ if extras:
85
+ payload.update(extras)
86
+ logger.info(_safe_json(payload))
services/user_provisioning_service.py ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import re
4
+ from urllib.parse import quote_plus, urlparse
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from .email_service import EmailSendResult, EmailService, EmailMessagePayload
9
+ from .email_templates import WelcomeCredentialsEmailContext, build_welcome_credentials_email
10
+
11
+
12
+ logger = logging.getLogger("mathpulse")
13
+
14
+
15
+ VALID_ROLES = {"student", "teacher", "admin"}
16
+ VALID_STATUSES = {"active", "inactive"}
17
+ EMAIL_REGEX = re.compile(r"^[^\s@]+@[^\s@]+\.[^\s@]+$")
18
+ PASSWORD_UPPER_REGEX = re.compile(r"[A-Z]")
19
+ PASSWORD_LOWER_REGEX = re.compile(r"[a-z]")
20
+ PASSWORD_DIGIT_REGEX = re.compile(r"\d")
21
+ PASSWORD_SPECIAL_REGEX = re.compile(r"[^A-Za-z0-9]")
22
+
23
+
24
+ @dataclass
25
+ class AdminCreateUserInput:
26
+ name: str
27
+ email: str
28
+ password: str
29
+ confirm_password: str
30
+ role: str
31
+ status: str
32
+ grade: str
33
+ section: str
34
+ lrn: Optional[str] = None
35
+
36
+
37
+ @dataclass
38
+ class CreateUserAndNotifyResult:
39
+ uid: str
40
+ user_created: bool
41
+ email_sent: bool
42
+ result_code: str
43
+ message: str
44
+ warnings: List[str] = field(default_factory=list)
45
+ email_result: Optional[EmailSendResult] = None
46
+
47
+
48
+ class UserProvisioningError(Exception):
49
+ def __init__(self, code: str, message: str, status_code: int = 400) -> None:
50
+ super().__init__(message)
51
+ self.code = code
52
+ self.message = message
53
+ self.status_code = status_code
54
+
55
+
56
+ class UserProvisioningService:
57
+ def __init__(
58
+ self,
59
+ *,
60
+ firebase_auth_module: Any,
61
+ firestore_module: Any,
62
+ firestore_server_timestamp: Any,
63
+ email_service: EmailService,
64
+ ) -> None:
65
+ self._firebase_auth_module = firebase_auth_module
66
+ self._firestore_module = firestore_module
67
+ self._firestore_server_timestamp = firestore_server_timestamp
68
+ self._email_service = email_service
69
+
70
+ def _ensure_dependencies(self) -> None:
71
+ if self._firebase_auth_module is None:
72
+ raise UserProvisioningError("auth_unavailable", "Firebase Auth service is unavailable.", 503)
73
+ if self._firestore_module is None:
74
+ raise UserProvisioningError("firestore_unavailable", "Firestore service is unavailable.", 503)
75
+
76
+ def _normalize_role(self, role: str) -> str:
77
+ normalized = (role or "").strip().lower()
78
+ if normalized not in VALID_ROLES:
79
+ raise UserProvisioningError("invalid_role", "Role must be Student, Teacher, or Admin.", 400)
80
+ return normalized
81
+
82
+ def _normalize_status(self, status: str) -> str:
83
+ normalized = (status or "").strip().lower()
84
+ if normalized not in VALID_STATUSES:
85
+ raise UserProvisioningError("invalid_status", "Status must be Active or Inactive.", 400)
86
+ return "Active" if normalized == "active" else "Inactive"
87
+
88
+ def _validate_email(self, email: str) -> str:
89
+ normalized = (email or "").strip().lower()
90
+ if not normalized or not EMAIL_REGEX.match(normalized):
91
+ raise UserProvisioningError("invalid_email", "Invalid email format.", 400)
92
+ return normalized
93
+
94
+ def _validate_password(self, password: str, confirm_password: str) -> str:
95
+ value = password or ""
96
+ if len(value) < 8:
97
+ raise UserProvisioningError("weak_password", "Password must be at least 8 characters.", 400)
98
+ if not PASSWORD_UPPER_REGEX.search(value):
99
+ raise UserProvisioningError("weak_password", "Password must include at least one uppercase letter.", 400)
100
+ if not PASSWORD_LOWER_REGEX.search(value):
101
+ raise UserProvisioningError("weak_password", "Password must include at least one lowercase letter.", 400)
102
+ if not PASSWORD_DIGIT_REGEX.search(value):
103
+ raise UserProvisioningError("weak_password", "Password must include at least one number.", 400)
104
+ if not PASSWORD_SPECIAL_REGEX.search(value):
105
+ raise UserProvisioningError("weak_password", "Password must include at least one special character.", 400)
106
+ if value != (confirm_password or ""):
107
+ raise UserProvisioningError("password_mismatch", "Password and confirm password do not match.", 400)
108
+ return value
109
+
110
+ @staticmethod
111
+ def _auth_user_not_found(error: Exception) -> bool:
112
+ message = str(error).lower()
113
+ return "not found" in message or "no user record" in message
114
+
115
+ @staticmethod
116
+ def _slugify(value: str) -> str:
117
+ token = re.sub(r"[^a-z0-9]+", "_", (value or "").strip().lower())
118
+ return re.sub(r"_+", "_", token).strip("_")
119
+
120
+ @staticmethod
121
+ def _build_default_avatar_url(display_name: str) -> str:
122
+ return f"https://ui-avatars.com/api/?name={quote_plus(display_name or 'User')}&background=0d9488&color=fff"
123
+
124
+ @staticmethod
125
+ def _derive_brand_avatar_url(login_url: str) -> str:
126
+ configured = (os.getenv("APP_BRAND_AVATAR_URL", "") or "").strip()
127
+ if configured:
128
+ return configured
129
+
130
+ parsed = urlparse(login_url or "")
131
+ if parsed.scheme in {"http", "https"} and parsed.netloc:
132
+ return f"{parsed.scheme}://{parsed.netloc}/avatar/avatar_icon.png"
133
+
134
+ return "https://mathpulse.ai/avatar/avatar_icon.png"
135
+
136
+ def _ensure_no_duplicate_email(self, email: str, firestore_client: Any) -> None:
137
+ try:
138
+ self._firebase_auth_module.get_user_by_email(email)
139
+ raise UserProvisioningError("duplicate_email", "A user with this email already exists.", 409)
140
+ except UserProvisioningError:
141
+ raise
142
+ except Exception as auth_lookup_error:
143
+ if not self._auth_user_not_found(auth_lookup_error):
144
+ logger.warning("Auth duplicate lookup failed for %s: %s", email, auth_lookup_error)
145
+ raise UserProvisioningError("auth_lookup_failed", "Unable to verify duplicate email in Auth.", 503)
146
+
147
+ try:
148
+ existing_docs = list(
149
+ firestore_client.collection("users").where("email", "==", email).limit(1).stream()
150
+ )
151
+ if existing_docs:
152
+ raise UserProvisioningError("duplicate_email", "A user profile with this email already exists.", 409)
153
+ except UserProvisioningError:
154
+ raise
155
+ except Exception as firestore_lookup_error:
156
+ logger.warning("Firestore duplicate lookup failed for %s: %s", email, firestore_lookup_error)
157
+ raise UserProvisioningError("firestore_lookup_failed", "Unable to verify duplicate email in Firestore.", 503)
158
+
159
+ def _build_profile_payload(self, user_input: AdminCreateUserInput, role_lower: str, normalized_status: str) -> Dict[str, Any]:
160
+ display_name = (user_input.name or "").strip()
161
+ grade = (user_input.grade or "").strip() or "Grade 11"
162
+ section = (user_input.section or "").strip() or "Section A"
163
+ class_section_id = self._slugify(f"{grade}_{section}") or "grade_11_section_a"
164
+
165
+ payload: Dict[str, Any] = {
166
+ "name": display_name,
167
+ "email": (user_input.email or "").strip().lower(),
168
+ "role": role_lower,
169
+ "status": normalized_status,
170
+ "grade": grade,
171
+ "section": section,
172
+ "classSectionId": class_section_id,
173
+ "forcePasswordChange": True,
174
+ "photo": self._build_default_avatar_url(display_name),
175
+ "updatedAt": self._firestore_server_timestamp,
176
+ }
177
+
178
+ if role_lower == "student":
179
+ lrn = (user_input.lrn or "").strip()
180
+ if not lrn:
181
+ raise UserProvisioningError("missing_lrn", "LRN is required for student accounts.", 400)
182
+ payload.update(
183
+ {
184
+ "lrn": lrn,
185
+ "level": 1,
186
+ "currentXP": 0,
187
+ "totalXP": 0,
188
+ "streak": 0,
189
+ "atRiskSubjects": [],
190
+ "hasTakenDiagnostic": False,
191
+ }
192
+ )
193
+ elif role_lower == "teacher":
194
+ payload.update(
195
+ {
196
+ "department": f"{grade} - {section}",
197
+ "teacherId": f"TCH-{self._slugify(payload['email'])}",
198
+ "subject": "Mathematics",
199
+ "yearsOfExperience": "0",
200
+ "qualification": "",
201
+ "students": [],
202
+ }
203
+ )
204
+ else:
205
+ payload.update(
206
+ {
207
+ "department": f"{grade} - {section}",
208
+ "adminId": f"ADM-{self._slugify(payload['email'])}",
209
+ "position": "Administrator",
210
+ }
211
+ )
212
+
213
+ return payload
214
+
215
+ def create_user(self, user_input: AdminCreateUserInput) -> str:
216
+ self._ensure_dependencies()
217
+
218
+ if not (user_input.name or "").strip():
219
+ raise UserProvisioningError("missing_name", "Name is required.", 400)
220
+
221
+ normalized_email = self._validate_email(user_input.email)
222
+ validated_password = self._validate_password(user_input.password, user_input.confirm_password)
223
+ role_lower = self._normalize_role(user_input.role)
224
+ normalized_status = self._normalize_status(user_input.status)
225
+
226
+ firestore_client = self._firestore_module.client()
227
+ self._ensure_no_duplicate_email(normalized_email, firestore_client)
228
+
229
+ try:
230
+ created_auth_user = self._firebase_auth_module.create_user(
231
+ email=normalized_email,
232
+ password=validated_password,
233
+ display_name=(user_input.name or "").strip(),
234
+ disabled=(normalized_status == "Inactive"),
235
+ )
236
+ except Exception as auth_create_error:
237
+ logger.error("Auth user creation failed for %s: %s", normalized_email, auth_create_error)
238
+ auth_error_text = str(auth_create_error)
239
+ auth_error_text_lower = auth_error_text.lower()
240
+
241
+ if "password_does_not_meet_requirements" in auth_error_text_lower or "password requirements" in auth_error_text_lower:
242
+ raise UserProvisioningError(
243
+ "weak_password",
244
+ "Password does not meet authentication policy requirements.",
245
+ 400,
246
+ )
247
+
248
+ if "email already exists" in auth_error_text_lower or "email_exists" in auth_error_text_lower:
249
+ raise UserProvisioningError("duplicate_email", "A user with this email already exists.", 409)
250
+
251
+ raise UserProvisioningError("auth_create_failed", "Failed to create authentication account.", 500)
252
+
253
+ uid = str(getattr(created_auth_user, "uid", "") or "").strip()
254
+ if not uid:
255
+ raise UserProvisioningError("missing_uid", "Authentication account created without UID.", 500)
256
+
257
+ profile_payload = self._build_profile_payload(user_input, role_lower, normalized_status)
258
+ profile_payload["createdAt"] = self._firestore_server_timestamp
259
+
260
+ try:
261
+ firestore_client.collection("users").document(uid).set(profile_payload, merge=True)
262
+ except Exception as firestore_write_error:
263
+ logger.error("Firestore profile write failed for %s: %s", uid, firestore_write_error)
264
+ try:
265
+ self._firebase_auth_module.delete_user(uid)
266
+ logger.info("Rolled back Auth user creation for %s after Firestore write failure.", uid)
267
+ except Exception as rollback_error:
268
+ logger.warning(
269
+ "Failed to roll back Auth user %s after Firestore write failure: %s",
270
+ uid,
271
+ rollback_error,
272
+ )
273
+ raise UserProvisioningError("profile_write_failed", "Failed to create user profile in Firestore.", 500)
274
+
275
+ return uid
276
+
277
+ def send_welcome_credentials_email(self, user_input: AdminCreateUserInput) -> EmailSendResult:
278
+ display_name = (user_input.name or "").strip()
279
+ login_url = (os.getenv("APP_LOGIN_URL", "") or "").strip() or "https://mathpulse.ai"
280
+ brand_avatar_url = self._derive_brand_avatar_url(login_url)
281
+ recipient_avatar_url = self._build_default_avatar_url(display_name)
282
+
283
+ template = build_welcome_credentials_email(
284
+ WelcomeCredentialsEmailContext(
285
+ recipient_name=display_name,
286
+ login_email=(user_input.email or "").strip().lower(),
287
+ temporary_password=user_input.password,
288
+ role=(user_input.role or "").strip().title(),
289
+ login_url=login_url,
290
+ brand_avatar_url=brand_avatar_url,
291
+ recipient_avatar_url=recipient_avatar_url,
292
+ )
293
+ )
294
+
295
+ message = EmailMessagePayload(
296
+ to_name=display_name,
297
+ to_email=(user_input.email or "").strip().lower(),
298
+ subject=template["subject"],
299
+ html_content=template["html"],
300
+ text_content=template["text"],
301
+ )
302
+ return self._email_service.send_transactional_email(message)
303
+
304
+ def create_user_and_notify(self, user_input: AdminCreateUserInput) -> CreateUserAndNotifyResult:
305
+ uid = self.create_user(user_input)
306
+ warnings: List[str] = []
307
+
308
+ email_result = self.send_welcome_credentials_email(user_input)
309
+ if email_result.success:
310
+ return CreateUserAndNotifyResult(
311
+ uid=uid,
312
+ user_created=True,
313
+ email_sent=True,
314
+ result_code="created_and_emailed",
315
+ message="User account was created and welcome email was sent.",
316
+ warnings=warnings,
317
+ email_result=email_result,
318
+ )
319
+
320
+ warnings.append("User was created but welcome email delivery failed.")
321
+ if email_result.error_message:
322
+ warnings.append(email_result.error_message)
323
+
324
+ return CreateUserAndNotifyResult(
325
+ uid=uid,
326
+ user_created=True,
327
+ email_sent=False,
328
+ result_code="created_email_failed",
329
+ message="User account was created, but welcome email failed to send.",
330
+ warnings=warnings,
331
+ email_result=email_result,
332
+ )
services/youtube_service.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ YouTube search service for lesson video embeddings.
3
+ Uses YouTube Data API v3 to find relevant educational videos.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import logging
10
+ from typing import Optional
11
+
12
+ logger = logging.getLogger("mathpulse.youtube")
13
+
14
+ YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY", "")
15
+
16
+
17
+ def _parse_iso8601_duration(duration: str) -> int:
18
+ """Parse ISO 8601 duration string like 'PT5M30S' to seconds."""
19
+ import re
20
+ if not duration:
21
+ return 0
22
+ hours_match = re.search(r'(\d+)H', duration)
23
+ minutes_match = re.search(r'(\d+)M', duration)
24
+ seconds_match = re.search(r'(\d+)S', duration)
25
+ hours = int(hours_match.group(1)) if hours_match else 0
26
+ minutes = int(minutes_match.group(1)) if minutes_match else 0
27
+ seconds = int(seconds_match.group(1)) if seconds_match else 0
28
+ return hours * 3600 + minutes * 60 + seconds
29
+
30
+
31
+ def search_youtube_video(
32
+ query: str,
33
+ max_results: int = 5,
34
+ min_duration_seconds: int = 180,
35
+ language: str = "en",
36
+ ) -> Optional[dict]:
37
+ """
38
+ Search YouTube Data API v3 for relevant educational videos.
39
+
40
+ Args:
41
+ query: Search query combining lesson title, subject, and competency
42
+ max_results: Maximum number of results to return
43
+ min_duration_seconds: Minimum video duration (filter out shorts)
44
+ language: Preferred video language
45
+
46
+ Returns:
47
+ Best video match with videoId, title, channel, embedUrl, thumbnailUrl
48
+ """
49
+ if not YOUTUBE_API_KEY:
50
+ logger.warning("YOUTUBE_API_KEY not set. Video search disabled.")
51
+ return None
52
+
53
+ import urllib.parse
54
+ import json
55
+
56
+ search_query = f"{query} DepEd Philippines Grade 11 Grade 12 mathematics"
57
+ encoded_query = urllib.parse.quote(search_query)
58
+
59
+ search_url = (
60
+ f"https://www.googleapis.com/youtube/v3/search"
61
+ f"?part=snippet&type=video&q={encoded_query}"
62
+ f"&maxResults={max_results}&relevanceLanguage={language}"
63
+ f"&key={YOUTUBE_API_KEY}"
64
+ )
65
+
66
+ try:
67
+ import urllib.request
68
+ with urllib.request.urlopen(search_url, timeout=10) as response:
69
+ data = json.loads(response.read().decode())
70
+
71
+ video_results = []
72
+ for item in data.get("items", []):
73
+ video_id = item.get("id", {}).get("videoId", "")
74
+ if not video_id:
75
+ continue
76
+
77
+ title = item.get("snippet", {}).get("title", "")
78
+ channel = item.get("snippet", {}).get("channelTitle", "")
79
+ description = item.get("snippet", {}).get("description", "")
80
+
81
+ video_details_url = (
82
+ f"https://www.googleapis.com/youtube/v3/videos"
83
+ f"?part=contentDetails,statistics&id={video_id}&key={YOUTUBE_API_KEY}"
84
+ )
85
+
86
+ try:
87
+ with urllib.request.urlopen(video_details_url, timeout=10) as vd_response:
88
+ vd_data = json.loads(vd_response.read().decode())
89
+ vd_item = vd_data.get("items", [{}])[0]
90
+ content_details = vd_item.get("contentDetails", {})
91
+ duration = content_details.get("duration", "")
92
+
93
+ duration_secs = _parse_iso8601_duration(duration)
94
+ except Exception:
95
+ duration_secs = 600
96
+
97
+ if duration_secs < min_duration_seconds:
98
+ continue
99
+
100
+ embed_url = f"https://www.youtube.com/embed/{video_id}"
101
+ thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
102
+
103
+ video_results.append({
104
+ "videoId": video_id,
105
+ "videoTitle": title,
106
+ "videoChannel": channel,
107
+ "embedUrl": embed_url,
108
+ "thumbnailUrl": thumbnail_url,
109
+ "durationSeconds": duration_secs,
110
+ "description": description[:200],
111
+ })
112
+
113
+ if not video_results:
114
+ return None
115
+
116
+ for vr in video_results:
117
+ if any(term in vr["videoTitle"].lower() or term in vr["description"].lower()
118
+ for term in ["tutorial", "lesson", "explain", "math", "solution"]):
119
+ return vr
120
+
121
+ return video_results[0] if video_results else None
122
+
123
+ except Exception as e:
124
+ logger.error("YouTube search failed: %s", e)
125
+ return None
126
+
127
+
128
+ def get_video_for_lesson(
129
+ lesson_title: str,
130
+ subject: str,
131
+ competency: str = "",
132
+ quarter: int = 1,
133
+ ) -> Optional[dict]:
134
+ """Get the best YouTube video for a lesson."""
135
+ query = " ".join(filter(None, [lesson_title, subject, competency]))[:200]
136
+ return search_youtube_video(query)
137
+
138
+
139
+ def store_video_in_firestore(lesson_id: str, video_data: dict):
140
+ """Persist chosen video to Firestore for caching."""
141
+ try:
142
+ import firebase_admin
143
+ from firebase_admin import firestore
144
+ if not firebase_admin._apps:
145
+ return
146
+ db = firestore.client()
147
+ doc_ref = db.collection("curriculumDocuments").document(lesson_id)
148
+ doc_ref.collection("videoEmbed").document("primary").set({
149
+ **video_data,
150
+ "storedAt": firestore.SERVER_TIMESTAMP,
151
+ })
152
+ except Exception as e:
153
+ logger.warning("Could not store video in Firestore: %s", e)
154
+
155
+
156
+ def get_cached_video(lesson_id: str) -> Optional[dict]:
157
+ """Retrieve cached video from Firestore."""
158
+ try:
159
+ import firebase_admin
160
+ from firebase_admin import firestore
161
+ if not firebase_admin._apps:
162
+ return None
163
+ db = firestore.client()
164
+ doc = db.collection("curriculumDocuments").document(lesson_id)
165
+ video_doc = doc.collection("videoEmbed").document("primary").get()
166
+ if video_doc.exists:
167
+ return video_doc.to_dict()
168
+ except Exception:
169
+ pass
170
+ return None
startup.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ set -eu
3
+
4
+ if [ -d "/data" ]; then
5
+ : "${CURRICULUM_DIR:=/data/curriculum}"
6
+ : "${VECTORSTORE_DIR:=/data/vectorstore}"
7
+ else
8
+ : "${CURRICULUM_DIR:=/app/datasets/curriculum}"
9
+ : "${VECTORSTORE_DIR:=/app/datasets/vectorstore}"
10
+ fi
11
+
12
+ export CURRICULUM_DIR
13
+ export VECTORSTORE_DIR
14
+
15
+ mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
16
+
17
+ _ingest_script="/app/scripts/ingest_curriculum.py"
18
+ if [ -f "${_ingest_script}" ]; then
19
+ if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
20
+ echo "INFO: Running curriculum ingestion (optional)..."
21
+ python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
22
+ else
23
+ echo "INFO: No curriculum PDFs present and CURRICULUM_SOURCE_REPO_ID unset; skipping ingest"
24
+ fi
25
+ else
26
+ echo "INFO: Curriculum ingestion script not found at ${_ingest_script}; skipping (curriculum is optional)"
27
+ fi
28
+
29
+ exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1
startup_validation.py ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Startup validation for MathPulse AI backend.
3
+
4
+ This module validates all critical dependencies and configurations BEFORE
5
+ the FastAPI app starts, preventing indefinite restart loops.
6
+
7
+ If any critical check fails, the process exits with a clear error message
8
+ that's visible in HF Space logs.
9
+ """
10
+
11
+ import os
12
+ import sys
13
+ import logging
14
+ from pathlib import Path
15
+
16
+ logger = logging.getLogger("mathpulse.startup")
17
+
18
+
19
+ class StartupError(Exception):
20
+ """Critical error during startup validation."""
21
+ pass
22
+
23
+
24
+ def validate_imports() -> None:
25
+ """Verify all critical imports work. Use absolute imports."""
26
+ logger.info("🔍 Validating Python imports...")
27
+ try:
28
+ # Core FastAPI stack
29
+ import fastapi # noqa
30
+ import uvicorn # noqa
31
+ import pydantic # noqa
32
+ logger.info(" ✓ FastAPI, Uvicorn, Pydantic OK")
33
+
34
+ # Backend services (use ABSOLUTE imports like deployed code)
35
+ from services.inference_client import (
36
+ InferenceClient, create_default_client, is_sequential_model,
37
+ get_current_runtime_config, get_model_for_task, model_supports_thinking,
38
+ set_runtime_model_profile, set_runtime_model_override, reset_runtime_overrides,
39
+ _MODEL_PROFILES,
40
+ ) # noqa
41
+ logger.info(" ✓ InferenceClient imports OK")
42
+
43
+ from automation_engine import automation_engine # noqa
44
+ logger.info(" ✓ automation_engine imports OK")
45
+
46
+ from analytics import compute_competency_analysis # noqa
47
+ logger.info(" ✓ analytics imports OK")
48
+
49
+ # Firebase
50
+ try:
51
+ import firebase_admin # noqa
52
+ logger.info(" ✓ firebase_admin imports OK")
53
+ except ImportError:
54
+ logger.warning(" ⚠ firebase_admin not available (OK if Firebase not needed)")
55
+
56
+ # ML & inference
57
+ from services.ai_client import get_deepseek_client, CHAT_MODEL, REASONER_MODEL # noqa
58
+ logger.info(" ✓ DeepSeek AI client imports OK")
59
+
60
+ logger.info("✅ All critical imports validated")
61
+ except ImportError as e:
62
+ raise StartupError(
63
+ f"❌ IMPORT ERROR - Cannot start backend:\n"
64
+ f" {e}\n"
65
+ f"\n"
66
+ f"This usually means:\n"
67
+ f" - A Python package is missing (check requirements.txt)\n"
68
+ f" - A relative import was used (must be absolute in container)\n"
69
+ f" - A circular import exists\n"
70
+ f"\n"
71
+ f"Deploy will FAIL and backend will restart indefinitely.\n"
72
+ ) from e
73
+ except Exception as e:
74
+ raise StartupError(f"❌ Unexpected import error: {e}") from e
75
+
76
+
77
+ def validate_environment() -> None:
78
+ """Verify required environment variables are set."""
79
+ logger.info("🔍 Validating environment variables...")
80
+
81
+ # CRITICAL: DEEPSEEK_API_KEY for inference
82
+ ds_api_key = os.environ.get("DEEPSEEK_API_KEY")
83
+ if not ds_api_key:
84
+ logger.warning(
85
+ "⚠ WARNING: DEEPSEEK_API_KEY is not set as an environment variable.\n"
86
+ " AI inference will fail without this token.\n"
87
+ " Use: Set DEEPSEEK_API_KEY in your .env or space secrets."
88
+ )
89
+ else:
90
+ logger.info(" ✓ DEEPSEEK_API_KEY is set")
91
+
92
+ # Check inference provider config
93
+ inference_provider = os.getenv("INFERENCE_PROVIDER", "deepseek")
94
+ logger.info(f" ✓ INFERENCE_PROVIDER: {inference_provider}")
95
+
96
+ # Check model IDs
97
+ chat_model = os.getenv("INFERENCE_CHAT_MODEL_ID") or os.getenv("INFERENCE_MODEL_ID") or "deepseek-chat"
98
+ logger.info(f" ✓ Chat model configured: {chat_model}")
99
+
100
+ chat_strict = os.getenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true").strip().lower() in {"1", "true", "yes", "on"}
101
+ chat_hard_trigger = os.getenv("INFERENCE_CHAT_HARD_TRIGGER_ENABLED", "false").strip().lower() in {"1", "true", "yes", "on"}
102
+ enforce_lock_model = os.getenv("INFERENCE_ENFORCE_LOCK_MODEL", "true").strip().lower() in {"1", "true", "yes", "on"}
103
+ lock_model_id = os.getenv("INFERENCE_LOCK_MODEL_ID", "deepseek-chat").strip() or "deepseek-chat"
104
+ logger.info(f" ✓ INFERENCE_ENFORCE_LOCK_MODEL: {enforce_lock_model}")
105
+ logger.info(f" ✓ INFERENCE_LOCK_MODEL_ID: {lock_model_id}")
106
+ model_profile = os.getenv("MODEL_PROFILE", "").strip().lower()
107
+ quiz_model = os.getenv("HF_QUIZ_MODEL_ID", "").strip()
108
+ rag_model = os.getenv("HF_RAG_MODEL_ID", "").strip()
109
+ logger.info(f" ✓ MODEL_PROFILE: {model_profile or 'not set (using individual env vars)'}")
110
+ logger.info(f" ✓ HF_QUIZ_MODEL_ID: {quiz_model or 'not set (using defaults)'}")
111
+ logger.info(f" ✓ HF_RAG_MODEL_ID: {rag_model or 'not set (using defaults)'}")
112
+ if not chat_strict:
113
+ logger.warning(" ⚠ Chat strict model lock is disabled; chat may fallback to alternate models")
114
+ if chat_strict and chat_hard_trigger:
115
+ logger.warning(
116
+ " ⚠ Chat hard trigger is enabled while strict chat lock is on; hard escalation will be bypassed"
117
+ )
118
+
119
+ _validate_embedding_model()
120
+
121
+ logger.info("✅ Environment variables OK")
122
+
123
+
124
+ EXPECTED_EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
125
+
126
+ def _validate_embedding_model() -> None:
127
+ embedding_model = os.getenv("EMBEDDING_MODEL", "").strip()
128
+ if not embedding_model:
129
+ logger.warning(
130
+ "WARNING: EMBEDDING_MODEL env var is not set. "
131
+ f"Expected: {EXPECTED_EMBEDDING_MODEL}. "
132
+ "RAG retrieval will fail without an embedding model."
133
+ )
134
+ elif embedding_model != EXPECTED_EMBEDDING_MODEL:
135
+ logger.warning(
136
+ f"WARNING: EMBEDDING_MODEL is set to '{embedding_model}' — "
137
+ f"expected '{EXPECTED_EMBEDDING_MODEL}'. "
138
+ "Confirm this is intentional before deploying."
139
+ )
140
+ from services.ai_client import CHAT_MODEL, REASONER_MODEL # noqa
141
+ generation_model_ids = [
142
+ CHAT_MODEL, REASONER_MODEL,
143
+ ]
144
+ if embedding_model in generation_model_ids:
145
+ logger.warning(
146
+ f"CRITICAL: EMBEDDING_MODEL is set to a generation model ('{embedding_model}'). "
147
+ "This will break RAG retrieval. Set it to 'BAAI/bge-small-en-v1.5'."
148
+ )
149
+ else:
150
+ logger.info(f" EMBEDDING_MODEL: {embedding_model or 'not set'}")
151
+
152
+
153
+ def validate_config_files() -> None:
154
+ """Verify config files exist and are readable."""
155
+ logger.info("🔍 Validating configuration files...")
156
+
157
+ # Accept either deployment/runtime path without warning when one valid path exists.
158
+ model_config_candidates = [
159
+ "config/models.yaml",
160
+ "backend/config/models.yaml",
161
+ ]
162
+
163
+ readable_model_config = None
164
+ for config_path in model_config_candidates:
165
+ full_path = Path(config_path)
166
+ if not full_path.exists():
167
+ continue
168
+ try:
169
+ with open(full_path, 'r', encoding='utf-8') as f:
170
+ content = f.read()
171
+ if not content.strip():
172
+ raise StartupError(
173
+ f"❌ CONFIG ERROR: {config_path} is empty!\n"
174
+ f" This will cause model routing to fail.\n"
175
+ )
176
+ readable_model_config = config_path
177
+ break
178
+ except StartupError:
179
+ raise
180
+ except Exception as e:
181
+ raise StartupError(
182
+ f"❌ CONFIG ERROR: Cannot read {config_path}:\n"
183
+ f" {e}\n"
184
+ ) from e
185
+
186
+ if not readable_model_config:
187
+ joined_paths = ", ".join(model_config_candidates)
188
+ raise StartupError(
189
+ f"❌ CONFIG ERROR: No readable model config found.\n"
190
+ f" Checked: {joined_paths}\n"
191
+ )
192
+
193
+ logger.info(f" ✓ Using model config: {readable_model_config}")
194
+
195
+ _validate_model_config_fields(readable_model_config)
196
+
197
+ logger.info("✅ Configuration files OK")
198
+
199
+
200
+ def validate_file_structure() -> None:
201
+ """Verify critical backend files exist."""
202
+ logger.info("🔍 Validating file structure...")
203
+ required_path_sets = [
204
+ ["main.py", "backend/main.py"],
205
+ ["services/inference_client.py", "backend/services/inference_client.py"],
206
+ ["analytics.py", "backend/analytics.py"],
207
+ ["automation_engine.py", "backend/automation_engine.py"],
208
+ ]
209
+ optional_path_sets = [
210
+ ["Dockerfile", "backend/Dockerfile"],
211
+ ]
212
+
213
+ for candidates in required_path_sets:
214
+ found = None
215
+ for candidate in candidates:
216
+ if Path(candidate).exists():
217
+ found = candidate
218
+ break
219
+
220
+ if not found:
221
+ joined = " or ".join(candidates)
222
+ raise StartupError(
223
+ f"❌ FILE MISSING: {joined}\n"
224
+ f" Backend structure is broken for this deployment layout.\n"
225
+ )
226
+
227
+ logger.info(f" ✓ Found {found}")
228
+
229
+ for candidates in optional_path_sets:
230
+ found = None
231
+ for candidate in candidates:
232
+ if Path(candidate).exists():
233
+ found = candidate
234
+ break
235
+
236
+ if found:
237
+ logger.info(f" ✓ Found optional build file {found}")
238
+ continue
239
+
240
+ joined = " or ".join(candidates)
241
+ logger.info(
242
+ f" ℹ Optional build file not present at runtime: {joined}"
243
+ )
244
+
245
+ logger.info("✅ File structure OK")
246
+
247
+
248
+ def validate_inference_client_config() -> None:
249
+ """Validate InferenceClient can load its config."""
250
+ logger.info("🔍 Validating InferenceClient configuration...")
251
+
252
+ try:
253
+ # Try to create the client (this will load config from YAML)
254
+ from services.inference_client import create_default_client
255
+ client = create_default_client()
256
+
257
+ # Verify critical attributes
258
+ if not hasattr(client, 'task_model_map'):
259
+ raise StartupError("❌ InferenceClient missing task_model_map attribute")
260
+
261
+ if not hasattr(client, 'task_provider_map'):
262
+ raise StartupError("❌ InferenceClient missing task_provider_map attribute")
263
+
264
+ # Check that required tasks are mapped
265
+ required_tasks = ['chat', 'verify_solution', 'lesson_generation', 'quiz_generation']
266
+ for task in required_tasks:
267
+ if task not in client.task_model_map:
268
+ raise StartupError(
269
+ f"❌ Task '{task}' not in task_model_map.\n"
270
+ f" Check config/models.yaml\n"
271
+ )
272
+ model = client.task_model_map[task]
273
+ provider = client.task_provider_map.get(task, 'unknown')
274
+ logger.info(f" ✓ {task}: {model} ({provider})")
275
+
276
+ chat_model = client.task_model_map.get("chat", client.default_model)
277
+ chat_chain = client._model_chain_for_task("chat", chat_model)
278
+ logger.info(
279
+ f" ✓ chat strict lock: {client.chat_strict_model_only}; "
280
+ f"effective chat chain length={len(chat_chain)}"
281
+ )
282
+ if client.chat_strict_model_only and len(chat_chain) != 1:
283
+ raise StartupError(
284
+ "❌ Chat strict model lock is enabled but effective chat model chain is not singular.\n"
285
+ " Check INFERENCE_CHAT_STRICT_MODEL_ONLY and routing.task_fallback_model_map.chat\n"
286
+ )
287
+
288
+ logger.info("✅ InferenceClient configuration OK")
289
+
290
+ except StartupError:
291
+ raise
292
+ except Exception as e:
293
+ raise StartupError(
294
+ f"❌ InferenceClient validation failed:\n"
295
+ f" {e}\n"
296
+ f" Check config/models.yaml and backend/config/models.yaml\n"
297
+ ) from e
298
+
299
+
300
+ def _validate_model_config_fields(config_path: str) -> None:
301
+ try:
302
+ import yaml
303
+ with open(config_path, "r", encoding="utf-8") as f:
304
+ config = yaml.safe_load(f) or {}
305
+ except Exception as e:
306
+ raise StartupError(f"❌ Cannot parse {config_path} as YAML: {e}") from e
307
+
308
+ models = config.get("models", {})
309
+ if not isinstance(models, dict):
310
+ raise StartupError(f"❌ {config_path}: 'models' section missing or invalid")
311
+
312
+ if "rag_primary" not in models:
313
+ raise StartupError(f"❌ {config_path}: missing 'models.rag_primary' field")
314
+ rag_primary = models["rag_primary"]
315
+ if isinstance(rag_primary, dict):
316
+ logger.info(f" ✓ rag_primary model: {rag_primary.get('id', 'UNSET')}")
317
+ else:
318
+ logger.warning(f" ⚠ rag_primary is not a dict, may cause issues")
319
+
320
+ capabilities = models.get("model_capabilities")
321
+ if not isinstance(capabilities, dict):
322
+ raise StartupError(f"❌ {config_path}: missing 'models.model_capabilities' section")
323
+ logger.info(f" ✓ model_capabilities: sequential_only={capabilities.get('sequential_only')}, supports_thinking={capabilities.get('supports_thinking')}")
324
+
325
+ tasks = config.get("routing", {}).get("task_model_map", {})
326
+ rag_tasks = {"rag_lesson", "rag_problem", "rag_analysis_context"}
327
+ missing_rag = rag_tasks - set(str(t).strip().lower() for t in tasks.keys())
328
+ if missing_rag:
329
+ raise StartupError(f"❌ {config_path}: missing RAG task mappings: {missing_rag}")
330
+
331
+ logger.info(f" ✓ All RAG task mappings present")
332
+
333
+
334
+ def run_all_validations() -> None:
335
+ """Run comprehensive startup validation.
336
+
337
+ If any check fails, exits with clear error message visible in logs.
338
+ """
339
+ logger.info("=" * 70)
340
+ logger.info("🚀 STARTUP VALIDATION - Checking all critical dependencies")
341
+ logger.info("=" * 70)
342
+
343
+ strict_mode = os.getenv("STARTUP_VALIDATION_STRICT", "false").strip().lower() in {"1", "true", "yes", "on"}
344
+
345
+ try:
346
+ validate_file_structure()
347
+ validate_imports()
348
+ validate_environment()
349
+ validate_config_files()
350
+ validate_inference_client_config()
351
+
352
+ logger.info("=" * 70)
353
+ logger.info("✅ ALL STARTUP VALIDATIONS PASSED")
354
+ logger.info("=" * 70)
355
+
356
+ except StartupError as e:
357
+ logger.error("=" * 70)
358
+ logger.error(str(e))
359
+ logger.error("=" * 70)
360
+ if strict_mode:
361
+ logger.error("\n🛑 DEPLOYMENT WILL FAIL - Fix errors above and redeploy")
362
+ sys.exit(1)
363
+ logger.warning(
364
+ "\n⚠️ Continuing startup because STARTUP_VALIDATION_STRICT is disabled. "
365
+ "Set STARTUP_VALIDATION_STRICT=true to fail fast."
366
+ )
367
+ except Exception as e:
368
+ logger.exception(f"Unexpected validation error: {e}")
369
+ if strict_mode:
370
+ sys.exit(1)
371
+ logger.warning(
372
+ "⚠️ Continuing startup after unexpected validation error because "
373
+ "STARTUP_VALIDATION_STRICT is disabled."
374
+ )
tests/test_admin_model_routes.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Route-level tests for the /api/admin/model-config endpoints.
3
+
4
+ Follows the auth mock pattern from test_api.py.
5
+ """
6
+
7
+ import os
8
+ from unittest.mock import MagicMock, patch
9
+
10
+ import pytest
11
+ from fastapi.testclient import TestClient
12
+
13
+ import main as main_module
14
+ from main import app
15
+ from services.inference_client import reset_runtime_overrides
16
+
17
+ main_module._firebase_ready = True
18
+ main_module._init_firebase_admin = lambda: None
19
+ main_module.firebase_firestore = None
20
+ if getattr(main_module, "firebase_auth", None) is None:
21
+ main_module.firebase_auth = MagicMock()
22
+ main_module.firebase_auth.verify_id_token = MagicMock(return_value={
23
+ "uid": "admin-uid",
24
+ "email": "admin@example.com",
25
+ "role": "admin",
26
+ })
27
+
28
+ admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
29
+
30
+ _RESOLVED_KEYS = {
31
+ "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
32
+ "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
33
+ }
34
+ _KNOWN_PROFILES = {"dev", "budget", "prod"}
35
+ _BASE_CONFIG_KEYS = {"profile", "overrides", "resolved"}
36
+
37
+
38
+ @pytest.fixture(autouse=True)
39
+ def _mock_firestore():
40
+ with patch("services.inference_client._save_runtime_config_to_firestore", side_effect=None):
41
+ yield
42
+
43
+
44
+ @pytest.fixture(autouse=True)
45
+ def _reset_overrides():
46
+ reset_runtime_overrides()
47
+ yield
48
+ reset_runtime_overrides()
49
+
50
+
51
+ # ─── Auth Enforcement ────────────────────────────────────────
52
+
53
+
54
+ class TestAuth:
55
+ def test_get_rejects_bad_token(self):
56
+ main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
57
+ c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
58
+ response = c.get("/api/admin/model-config")
59
+ main_module.firebase_auth.verify_id_token = MagicMock(return_value={
60
+ "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
61
+ })
62
+ assert response.status_code in {401, 403}
63
+
64
+ def test_get_rejects_student_role(self):
65
+ main_module.firebase_auth.verify_id_token = MagicMock(return_value={
66
+ "uid": "student-uid", "email": "s@example.com", "role": "student",
67
+ })
68
+ c = TestClient(app, headers={"Authorization": "Bearer student-token"})
69
+ response = c.get("/api/admin/model-config")
70
+ main_module.firebase_auth.verify_id_token = MagicMock(return_value={
71
+ "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
72
+ })
73
+ assert response.status_code == 403
74
+
75
+
76
+ # ─── GET Model Config ─────────────────────────────────────────
77
+
78
+
79
+ class TestGetModelConfig:
80
+ def test_returns_base_keys(self):
81
+ response = admin_client.get("/api/admin/model-config")
82
+ assert response.status_code == 200
83
+ data = response.json()
84
+ for key in _BASE_CONFIG_KEYS:
85
+ assert key in data
86
+
87
+ def test_resolved_contains_expected_keys(self):
88
+ response = admin_client.get("/api/admin/model-config")
89
+ data = response.json()
90
+ resolved = data.get("resolved", {})
91
+ for key in _RESOLVED_KEYS:
92
+ assert key in resolved
93
+
94
+ def test_available_profiles_present(self):
95
+ response = admin_client.get("/api/admin/model-config")
96
+ data = response.json()
97
+ profiles = data.get("availableProfiles", [])
98
+ for p in _KNOWN_PROFILES:
99
+ assert p in profiles
100
+
101
+ def test_profile_descriptions_present(self):
102
+ response = admin_client.get("/api/admin/model-config")
103
+ data = response.json()
104
+ descriptions = data.get("profileDescriptions", {})
105
+ for p in _KNOWN_PROFILES:
106
+ assert p in descriptions
107
+
108
+ def test_resolved_models_are_non_empty_strings(self):
109
+ admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
110
+ response = admin_client.get("/api/admin/model-config")
111
+ data = response.json()
112
+ resolved = data.get("resolved", {})
113
+ for key, value in resolved.items():
114
+ assert isinstance(value, str), f"{key} is not a string: {value}"
115
+ assert len(value) > 0, f"Resolved key {key} is empty"
116
+
117
+
118
+ # ─── POST Profile Switch ─────────────────────────────────────
119
+
120
+
121
+ class TestPostProfileSwitch:
122
+ def test_switch_to_dev_succeeds(self):
123
+ response = admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
124
+ assert response.status_code == 200
125
+ assert response.json()["success"] is True
126
+
127
+ def test_switch_to_budget_succeeds(self):
128
+ response = admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
129
+ assert response.status_code == 200
130
+ data = response.json()
131
+ assert data["success"] is True
132
+ assert data["applied"]["profile"] == "budget"
133
+
134
+ def test_switch_to_prod_succeeds(self):
135
+ response = admin_client.post("/api/admin/model-config/profile", json={"profile": "prod"})
136
+ assert response.status_code == 200
137
+ data = response.json()
138
+ assert data["success"] is True
139
+ assert data["applied"]["profile"] == "prod"
140
+
141
+ def test_switch_to_invalid_profile_returns_400(self):
142
+ response = admin_client.post("/api/admin/model-config/profile", json={"profile": "nonexistent"})
143
+ assert response.status_code == 400
144
+
145
+ def test_switch_missing_profile_field(self):
146
+ response = admin_client.post("/api/admin/model-config/profile", json={})
147
+ assert response.status_code == 422
148
+
149
+
150
+ # ─── POST Override ───────────────────────────────────────────
151
+
152
+
153
+ class TestPostOverride:
154
+ def test_set_valid_override_key_succeeds(self):
155
+ response = admin_client.post(
156
+ "/api/admin/model-config/override",
157
+ json={"key": "INFERENCE_MODEL_ID", "value": "test/override-model"},
158
+ )
159
+ assert response.status_code == 200
160
+ assert response.json()["success"] is True
161
+
162
+ def test_set_invalid_override_key_returns_400(self):
163
+ response = admin_client.post(
164
+ "/api/admin/model-config/override",
165
+ json={"key": "EMBEDDING_MODEL", "value": "test/emb"},
166
+ )
167
+ assert response.status_code == 400
168
+
169
+ def test_override_is_visible_in_subsequent_get(self):
170
+ admin_client.post(
171
+ "/api/admin/model-config/override",
172
+ json={"key": "INFERENCE_MODEL_ID", "value": "custom/model-v2"},
173
+ )
174
+ response = admin_client.get("/api/admin/model-config")
175
+ data = response.json()
176
+ overrides = data.get("overrides", {})
177
+ assert "INFERENCE_MODEL_ID" in overrides
178
+ assert overrides["INFERENCE_MODEL_ID"] == "custom/model-v2"
179
+
180
+
181
+ # ─── DELETE Reset ───────────────────────────────────────────
182
+
183
+
184
+ class TestDeleteReset:
185
+ def test_reset_returns_success(self):
186
+ response = admin_client.delete("/api/admin/model-config/reset")
187
+ assert response.status_code == 200
188
+ assert response.json()["success"] is True
189
+
190
+ def test_reset_clears_override(self):
191
+ admin_client.post(
192
+ "/api/admin/model-config/override",
193
+ json={"key": "INFERENCE_MODEL_ID", "value": "temp/model"},
194
+ )
195
+ response = admin_client.delete("/api/admin/model-config/reset")
196
+ assert response.status_code == 200
197
+ overrides = response.json()["current"]["overrides"]
198
+ assert overrides == {}
199
+
200
+ def test_reset_clears_profile(self):
201
+ admin_client.post("/api/admin/model-config/profile", json={"profile": "budget"})
202
+ response = admin_client.delete("/api/admin/model-config/reset")
203
+ assert response.status_code == 200
204
+ assert response.json()["current"]["profile"] == ""
205
+
206
+
207
+ # ─── Profile after switch ────────────────────────────────────
208
+
209
+
210
+ class TestProfileAfterSwitch:
211
+ def test_switched_profile_visible_in_get(self):
212
+ admin_client.post("/api/admin/model-config/profile", json={"profile": "dev"})
213
+ response = admin_client.get("/api/admin/model-config")
214
+ assert response.json()["profile"] == "dev"
tests/test_api.py ADDED
@@ -0,0 +1,2053 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ backend/tests/test_api.py
3
+ Comprehensive tests for all FastAPI endpoints.
4
+
5
+ Tests cover:
6
+ - Successful requests with valid data
7
+ - AI inference API failures (502 fallback)
8
+ - Timeout handling
9
+ - Malformed response data
10
+ - Error status-code mapping
11
+
12
+ Run with: pytest backend/tests/test_api.py -v
13
+ """
14
+
15
+ import asyncio
16
+ import json
17
+ import os
18
+ import sys
19
+ import time
20
+ from typing import Any, Dict, List
21
+ from unittest.mock import AsyncMock, MagicMock, patch
22
+
23
+ import pytest # type: ignore[import-not-found]
24
+ from fastapi.testclient import TestClient
25
+
26
+ # Add backend directory to path
27
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
28
+ from services.inference_client import InferenceClient, InferenceRequest
29
+
30
+ # automation_engine has Firebase dependencies - mock its heavy parts
31
+ # but keep the Pydantic model classes
32
+ mock_ae = MagicMock()
33
+
34
+ # Define minimal Pydantic-like classes for payloads automation_engine exports
35
+ from pydantic import BaseModel as _BM
36
+ from services.email_service import EmailSendResult
37
+
38
+ class _DiagnosticCompletionPayload(_BM):
39
+ studentId: str
40
+ results: list
41
+ gradeLevel: str | None = None
42
+ questionBreakdown: dict | None = None
43
+
44
+ class _QuizSubmissionPayload(_BM):
45
+ studentId: str
46
+ quizId: str
47
+ subject: str
48
+ score: float
49
+ totalQuestions: int
50
+ correctAnswers: int
51
+ timeSpentSeconds: int
52
+
53
+ class _StudentEnrollmentPayload(_BM):
54
+ studentId: str
55
+ name: str
56
+ email: str
57
+ gradeLevel: str | None = None
58
+ teacherId: str | None = None
59
+
60
+ class _DataImportPayload(_BM):
61
+ teacherId: str
62
+ students: list
63
+ columnMapping: dict
64
+
65
+ class _ContentUpdatePayload(_BM):
66
+ adminId: str
67
+ action: str
68
+ contentType: str
69
+ contentId: str
70
+ subjectId: str | None = None
71
+ details: str | None = None
72
+
73
+ class _AutomationResult(_BM):
74
+ success: bool = True
75
+ message: str = ""
76
+ actions: list = []
77
+
78
+ mock_ae.automation_engine = MagicMock()
79
+ mock_ae.DiagnosticCompletionPayload = _DiagnosticCompletionPayload
80
+ mock_ae.QuizSubmissionPayload = _QuizSubmissionPayload
81
+ mock_ae.StudentEnrollmentPayload = _StudentEnrollmentPayload
82
+ mock_ae.DataImportPayload = _DataImportPayload
83
+ mock_ae.ContentUpdatePayload = _ContentUpdatePayload
84
+ mock_ae.AutomationResult = _AutomationResult
85
+ sys.modules["automation_engine"] = mock_ae
86
+
87
+ # Override tokens so client init doesn't fail
88
+ os.environ["HF_TOKEN"] = "test-token-for-testing"
89
+ os.environ["DEEPSEEK_API_KEY"] = "test-ds-key-for-testing"
90
+
91
+ # analytics.py is importable directly (its heavy deps are guarded)
92
+ import main as main_module # noqa: E402
93
+
94
+ app = main_module.app
95
+
96
+ # Mock auth verification so protected endpoints can run in tests without Firebase credentials.
97
+ main_module._firebase_ready = True
98
+ main_module._init_firebase_admin = lambda: None
99
+ main_module.firebase_firestore = None
100
+ if getattr(main_module, "firebase_auth", None) is None:
101
+ main_module.firebase_auth = MagicMock()
102
+ main_module.firebase_auth.verify_id_token = MagicMock(
103
+ return_value={
104
+ "uid": "test-teacher-uid",
105
+ "email": "teacher@example.com",
106
+ "role": "teacher",
107
+ }
108
+ )
109
+
110
+ client = TestClient(app, headers={"Authorization": "Bearer test-auth-token"})
111
+
112
+
113
+ # ─── Fixtures ──────────────────────────────────────────────────
114
+
115
+
116
+ def make_deepseek_risk_mock(
117
+ risk_label: str = "low risk academically stable",
118
+ confidence: float = 0.85,
119
+ ):
120
+ """Create a mock DeepSeek client for risk prediction tests."""
121
+ mock_ds = MagicMock()
122
+ mock_choice = MagicMock()
123
+ mock_choice.message.content = json.dumps({
124
+ "risk_label": risk_label,
125
+ "confidence": confidence,
126
+ "reasoning": "Mock risk assessment."
127
+ })
128
+ mock_ds.chat.completions.create.return_value = MagicMock(
129
+ choices=[mock_choice]
130
+ )
131
+ return mock_ds
132
+
133
+
134
+ # ─── Health & Root ─────────────────────────────────────────────
135
+
136
+
137
+ class TestHealthEndpoints:
138
+ def test_health_returns_200(self):
139
+ response = client.get("/health")
140
+ assert response.status_code == 200
141
+ data = response.json()
142
+ assert data["status"] == "healthy"
143
+ assert "models" in data
144
+
145
+ def test_root_returns_api_info(self):
146
+ response = client.get("/")
147
+ assert response.status_code == 200
148
+ data = response.json()
149
+ assert data["name"] == "MathPulse AI API"
150
+ assert "version" in data
151
+
152
+ def test_health_includes_request_id_header(self):
153
+ response = client.get("/health")
154
+ assert "x-request-id" in response.headers
155
+
156
+
157
+ class TestAuthMiddleware:
158
+ def test_accepts_user_id_claim_when_uid_missing(self):
159
+ now = int(time.time())
160
+ firestore = _FakeFirestoreModule(
161
+ {
162
+ "courseMaterials": [
163
+ {
164
+ "materialId": "mat-auth-1",
165
+ "teacherId": "test-teacher-uid",
166
+ "fileName": "auth-check.pdf",
167
+ "fileType": "pdf",
168
+ "classSectionId": "grade11_a",
169
+ "topics": [{"title": "Linear Equations"}],
170
+ "extractedTextLength": 300,
171
+ "retentionDays": 180,
172
+ "expiresAtEpoch": now + 3600,
173
+ }
174
+ ]
175
+ }
176
+ )
177
+
178
+ with patch.object(main_module.firebase_auth, "verify_id_token", return_value={
179
+ "user_id": "test-teacher-uid",
180
+ "email": "teacher@example.com",
181
+ "role": "teacher",
182
+ }), patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
183
+ response = client.get("/api/upload/course-materials/recent?classSectionId=grade11_a&limit=10")
184
+
185
+ assert response.status_code == 200
186
+ data = response.json()
187
+ assert data["success"] is True
188
+ assert len(data["materials"]) == 1
189
+ assert data["materials"][0]["materialId"] == "mat-auth-1"
190
+
191
+
192
+ # ─── Chat Endpoint ─────────────────────────────────────────────
193
+
194
+
195
+ class TestChatEndpoint:
196
+ @patch("main.call_hf_chat")
197
+ def test_chat_success(self, mock_chat):
198
+ mock_chat.return_value = "Hello! 2+2=4."
199
+ response = client.post("/api/chat", json={
200
+ "message": "What is 2+2?",
201
+ "history": [],
202
+ })
203
+ assert response.status_code == 200
204
+ assert "4" in response.json()["response"]
205
+
206
+ @patch("main.call_hf_chat")
207
+ def test_chat_non_math_returns_refusal_and_skips_inference(self, mock_chat):
208
+ response = client.post("/api/chat", json={
209
+ "message": "Who is Elon Musk?",
210
+ "history": [],
211
+ })
212
+
213
+ assert response.status_code == 200
214
+ assert response.json()["response"] in main_module._NON_MATH_REDIRECT_RESPONSES
215
+ mock_chat.assert_not_called()
216
+
217
+ @patch("main.call_hf_chat")
218
+ def test_chat_greeting_returns_friendly_response_and_skips_inference(self, mock_chat):
219
+ response = client.post("/api/chat", json={
220
+ "message": "hello",
221
+ "history": [],
222
+ })
223
+
224
+ assert response.status_code == 200
225
+ assert response.json()["response"] in main_module._GREETING_RESPONSES
226
+ mock_chat.assert_not_called()
227
+
228
+ @patch("main.call_hf_chat")
229
+ def test_chat_thanks_returns_friendly_response_and_skips_inference(self, mock_chat):
230
+ response = client.post("/api/chat", json={
231
+ "message": "thanks",
232
+ "history": [],
233
+ })
234
+
235
+ assert response.status_code == 200
236
+ assert response.json()["response"] in main_module._THANKS_RESPONSES
237
+ mock_chat.assert_not_called()
238
+
239
+ @patch("main.call_hf_chat_async", new_callable=AsyncMock)
240
+ def test_chat_allows_contextual_followup_token_and_calls_inference(self, mock_chat_async):
241
+ mock_chat_async.return_value = "Sure. Next step: isolate x on one side."
242
+ response = client.post("/api/chat", json={
243
+ "message": "go",
244
+ "history": [
245
+ {"role": "assistant", "content": "Nice work. Shall we continue?"},
246
+ ],
247
+ })
248
+
249
+ assert response.status_code == 200
250
+ assert response.json()["response"] == "Sure. Next step: isolate x on one side."
251
+ mock_chat_async.assert_called_once()
252
+
253
+ @patch("main.call_hf_chat_async", new_callable=AsyncMock)
254
+ def test_chat_followup_token_reconstructs_latest_math_intent_and_calls_inference(self, mock_chat_async):
255
+ mock_chat_async.return_value = "Continuing: subtract 3 from both sides first."
256
+ response = client.post("/api/chat", json={
257
+ "message": "more",
258
+ "history": [
259
+ {"role": "user", "content": "Solve for x in 2x + 3 = 7"},
260
+ {"role": "assistant", "content": "Start by isolating x."},
261
+ ],
262
+ })
263
+
264
+ assert response.status_code == 200
265
+ assert response.json()["response"] == "Continuing: subtract 3 from both sides first."
266
+ mock_chat_async.assert_called_once()
267
+
268
+ @patch("main.call_hf_chat_async", new_callable=AsyncMock)
269
+ def test_chat_followup_token_without_context_requests_clarification(self, mock_chat_async):
270
+ response = client.post("/api/chat", json={
271
+ "message": "go",
272
+ "history": [],
273
+ })
274
+
275
+ assert response.status_code == 200
276
+ assert response.json()["response"] == main_module._CONTINUATION_CONTEXT_CLARIFY_RESPONSE
277
+ mock_chat_async.assert_not_called()
278
+
279
+ @patch("main.call_hf_chat_async", new_callable=AsyncMock)
280
+ def test_chat_punctuated_followup_token_without_context_requests_clarification(self, mock_chat_async):
281
+ response = client.post("/api/chat", json={
282
+ "message": "go!",
283
+ "history": [],
284
+ })
285
+
286
+ assert response.status_code == 200
287
+ assert response.json()["response"] == main_module._CONTINUATION_CONTEXT_CLARIFY_RESPONSE
288
+ mock_chat_async.assert_not_called()
289
+
290
+ @patch("main.call_hf_chat_async", new_callable=AsyncMock)
291
+ def test_chat_followup_token_after_refused_request_remains_blocked(self, mock_chat_async):
292
+ response = client.post("/api/chat", json={
293
+ "message": "continue",
294
+ "history": [
295
+ {"role": "user", "content": "Who is Elon Musk?"},
296
+ {
297
+ "role": "assistant",
298
+ "content": main_module._NON_MATH_REDIRECT_RESPONSES[0],
299
+ },
300
+ ],
301
+ })
302
+
303
+ assert response.status_code == 200
304
+ assert response.json()["response"] in main_module._NON_MATH_REDIRECT_RESPONSES
305
+ mock_chat_async.assert_not_called()
306
+
307
+ @patch("main.call_hf_chat")
308
+ def test_chat_with_history(self, mock_chat):
309
+ mock_chat.return_value = "Yes, that's right."
310
+ response = client.post("/api/chat", json={
311
+ "message": "Is x = 4 correct for 2 + 2 = x?",
312
+ "history": [
313
+ {"role": "user", "content": "What is 2+2?"},
314
+ {"role": "assistant", "content": "4"},
315
+ ],
316
+ })
317
+ assert response.status_code == 200
318
+ # Verify history was included in messages
319
+ call_args = mock_chat.call_args
320
+ messages = call_args.args[0] if call_args.args else call_args.kwargs.get("messages", [])
321
+ assert len(messages) >= 3 # system + 2 history + 1 current
322
+
323
+ def test_chat_missing_message_returns_422(self):
324
+ response = client.post("/api/chat", json={"history": []})
325
+ assert response.status_code == 422
326
+
327
+ @patch("main.call_hf_chat")
328
+ def test_chat_hf_failure_returns_502(self, mock_chat):
329
+ mock_chat.side_effect = Exception("HF API down")
330
+ response = client.post("/api/chat", json={
331
+ "message": "Solve 3x + 1 = 10",
332
+ "history": [],
333
+ })
334
+ assert response.status_code == 502
335
+
336
+ @patch("main.call_hf_chat")
337
+ def test_chat_quadratic_prompt_smoke(self, mock_chat):
338
+ mock_chat.return_value = (
339
+ "Given x^2 - 5x + 6 = 0, factor to (x-2)(x-3)=0. "
340
+ "So x = 2 or x = 3. Final answer: x = 2, x = 3."
341
+ )
342
+ response = client.post("/api/chat", json={
343
+ "message": "Solve quadratic equation x² - 5x + 6 = 0 step-by-step.",
344
+ "history": [],
345
+ })
346
+ assert response.status_code == 200
347
+ data = response.json()["response"]
348
+ assert "x = 2" in data
349
+ assert "x = 3" in data
350
+
351
+ @patch("main.call_hf_chat_stream")
352
+ def test_chat_stream_success(self, mock_stream):
353
+ mock_stream.return_value = iter(["Hello", " world"])
354
+
355
+ with client.stream("POST", "/api/chat/stream", json={
356
+ "message": "What is 2 + 2?",
357
+ "history": [],
358
+ }) as response:
359
+ assert response.status_code == 200
360
+ content = "".join(response.iter_text())
361
+
362
+ assert "event: chunk" in content
363
+ assert '"chunk": "Hello"' in content
364
+ assert "event: end" in content
365
+
366
+ @patch("main.call_hf_chat_stream")
367
+ def test_chat_stream_emits_error_event(self, mock_stream):
368
+ mock_stream.side_effect = Exception("HF stream down")
369
+
370
+ with client.stream("POST", "/api/chat/stream", json={
371
+ "message": "Solve x + 2 = 5",
372
+ "history": [],
373
+ }) as response:
374
+ assert response.status_code == 200
375
+ content = "".join(response.iter_text())
376
+
377
+ assert "event: error" in content
378
+ assert "event: end" in content
379
+
380
+ @patch("main.call_hf_chat_stream_async")
381
+ def test_chat_stream_timeout_emits_error_and_end_events(self, mock_stream_async):
382
+ async def _slow_stream(*args, **kwargs):
383
+ await asyncio.sleep(0.05)
384
+ yield "late chunk"
385
+
386
+ mock_stream_async.return_value = _slow_stream()
387
+
388
+ with patch.object(main_module, "CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC", 0.01), patch.object(main_module, "CHAT_STREAM_TOTAL_TIMEOUT_SEC", 0.03):
389
+ with client.stream("POST", "/api/chat/stream", json={
390
+ "message": "Solve x + 2 = 5",
391
+ "history": [],
392
+ }) as response:
393
+ assert response.status_code == 200
394
+ content = "".join(response.iter_text())
395
+
396
+ assert "event: error" in content
397
+ assert "timed out" in content.lower()
398
+ assert "event: end" in content
399
+
400
+ @patch("main.call_hf_chat_stream_async")
401
+ def test_chat_stream_marker_mode_continues_until_marker(self, mock_stream_async):
402
+ async def _first_stream(*args, **kwargs):
403
+ yield "n=1: x=1\n"
404
+ yield "n=2: x=2"
405
+
406
+ async def _second_stream(*args, **kwargs):
407
+ yield "\nn=3: x=3\nEND_MARKER"
408
+
409
+ mock_stream_async.side_effect = [_first_stream(), _second_stream()]
410
+
411
+ with patch.object(main_module, "CHAT_STREAM_CONTINUATION_MAX_ROUNDS", 1):
412
+ with client.stream("POST", "/api/chat/stream", json={
413
+ "message": "Solve x+n=2n for n=1..3 and end with END_MARKER",
414
+ "history": [],
415
+ "completionMode": "marker",
416
+ "expectedEndMarker": "END_MARKER",
417
+ }) as response:
418
+ assert response.status_code == 200
419
+ content = "".join(response.iter_text())
420
+
421
+ assert "END_MARKER" in content
422
+ assert "event: end" in content
423
+ assert mock_stream_async.call_count == 2
424
+
425
+ @patch("main.call_hf_chat_stream")
426
+ def test_chat_stream_non_math_returns_refusal_and_skips_inference(self, mock_stream):
427
+ with client.stream("POST", "/api/chat/stream", json={
428
+ "message": "Who is Elon Musk?",
429
+ "history": [],
430
+ }) as response:
431
+ assert response.status_code == 200
432
+ content = "".join(response.iter_text())
433
+
434
+ assert "event: chunk" in content
435
+ assert any(candidate in content for candidate in main_module._NON_MATH_REDIRECT_RESPONSES)
436
+ assert "event: end" in content
437
+ mock_stream.assert_not_called()
438
+
439
+ @patch("main.call_hf_chat_stream_async")
440
+ def test_chat_stream_allows_contextual_followup_token_and_calls_inference(self, mock_stream_async):
441
+ async def _stream(*args, **kwargs):
442
+ yield "Sure, continuing with the next step."
443
+
444
+ mock_stream_async.return_value = _stream()
445
+
446
+ with client.stream("POST", "/api/chat/stream", json={
447
+ "message": "go",
448
+ "history": [
449
+ {"role": "assistant", "content": "Would you like to continue?"},
450
+ ],
451
+ }) as response:
452
+ assert response.status_code == 200
453
+ content = "".join(response.iter_text())
454
+
455
+ assert "Sure, continuing with the next step." in content
456
+ assert "event: end" in content
457
+ mock_stream_async.assert_called_once()
458
+
459
+ @patch("main.call_hf_chat_stream_async")
460
+ def test_chat_stream_followup_token_reconstructs_latest_math_intent_and_calls_inference(self, mock_stream_async):
461
+ async def _stream(*args, **kwargs):
462
+ yield "Continuing the same solution from the previous step."
463
+
464
+ mock_stream_async.return_value = _stream()
465
+
466
+ with client.stream("POST", "/api/chat/stream", json={
467
+ "message": "more",
468
+ "history": [
469
+ {"role": "user", "content": "Solve 2x + 3 = 7"},
470
+ {"role": "assistant", "content": "We can isolate x now."},
471
+ ],
472
+ }) as response:
473
+ assert response.status_code == 200
474
+ content = "".join(response.iter_text())
475
+
476
+ assert "Continuing the same solution from the previous step." in content
477
+ assert "event: end" in content
478
+ mock_stream_async.assert_called_once()
479
+
480
+ @patch("main.call_hf_chat_stream_async")
481
+ def test_chat_stream_followup_token_without_context_requests_clarification(self, mock_stream_async):
482
+ with client.stream("POST", "/api/chat/stream", json={
483
+ "message": "go",
484
+ "history": [],
485
+ }) as response:
486
+ assert response.status_code == 200
487
+ content = "".join(response.iter_text())
488
+
489
+ assert main_module._CONTINUATION_CONTEXT_CLARIFY_RESPONSE in content
490
+ assert "event: end" in content
491
+ mock_stream_async.assert_not_called()
492
+
493
+ @patch("main.call_hf_chat_stream_async")
494
+ def test_chat_stream_followup_token_after_refused_request_remains_blocked(self, mock_stream_async):
495
+ with client.stream("POST", "/api/chat/stream", json={
496
+ "message": "continue",
497
+ "history": [
498
+ {"role": "user", "content": "Who is Elon Musk?"},
499
+ {
500
+ "role": "assistant",
501
+ "content": main_module._NON_MATH_REDIRECT_RESPONSES[1],
502
+ },
503
+ ],
504
+ }) as response:
505
+ assert response.status_code == 200
506
+ content = "".join(response.iter_text())
507
+
508
+ assert any(candidate in content for candidate in main_module._NON_MATH_REDIRECT_RESPONSES)
509
+ assert "event: end" in content
510
+ mock_stream_async.assert_not_called()
511
+
512
+
513
+ class TestChatTransport:
514
+ @patch("services.ai_client.get_deepseek_client")
515
+ def test_call_hf_chat_uses_deepseek_api(self, mock_ds_fn):
516
+ mock_ds = MagicMock()
517
+ mock_choice = MagicMock()
518
+ mock_choice.message.content = "x = 2 or x = 3"
519
+ mock_ds.chat.completions.create.return_value = MagicMock(
520
+ choices=[mock_choice]
521
+ )
522
+ mock_ds_fn.return_value = mock_ds
523
+
524
+ with patch.object(main_module, "get_inference_client") as mock_get_ic:
525
+ ic = MagicMock()
526
+ ic.generate_from_messages.return_value = "x = 2 or x = 3"
527
+ mock_get_ic.return_value = ic
528
+
529
+ result = main_module.call_hf_chat(
530
+ [{"role": "user", "content": "Solve x^2 - 5x + 6 = 0"}],
531
+ max_tokens=256,
532
+ temperature=0.2,
533
+ top_p=0.9,
534
+ )
535
+
536
+ assert result
537
+
538
+
539
+ class TestInferenceRouting:
540
+ def test_chat_strict_model_lock_keeps_single_model_chain(self, monkeypatch):
541
+ monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
542
+ monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
543
+
544
+ client = InferenceClient()
545
+ req = InferenceRequest(
546
+ messages=[{"role": "user", "content": "Show all steps and prove the result rigorously."}],
547
+ task_type="chat",
548
+ )
549
+
550
+ selected_model, source = client._resolve_primary_model(req)
551
+ model_chain = client._model_chain_for_task("chat", selected_model)
552
+
553
+ assert selected_model == "deepseek-chat"
554
+ assert "chat_strict_model_only" in source
555
+ assert model_chain == ["deepseek-chat"]
556
+
557
+ def test_chat_env_override_wins_under_model_lock(self, monkeypatch):
558
+ monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-chat")
559
+ monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
560
+ monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
561
+ monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
562
+
563
+ client = InferenceClient()
564
+ req = InferenceRequest(
565
+ messages=[{"role": "user", "content": "Find the roots and explain why."}],
566
+ task_type="chat",
567
+ )
568
+
569
+ selected_model, source = client._resolve_primary_model(req)
570
+ model_chain = client._model_chain_for_task("chat", selected_model)
571
+
572
+ assert selected_model == "deepseek-chat"
573
+ assert "chat_override_env" in source
574
+ assert model_chain == ["deepseek-chat"]
575
+
576
+ def test_chat_temp_override_wins_under_model_lock(self, monkeypatch):
577
+ monkeypatch.setenv("INFERENCE_CHAT_MODEL_ID", "deepseek-reasoner")
578
+ monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
579
+ monkeypatch.setenv("INFERENCE_CHAT_STRICT_MODEL_ONLY", "true")
580
+ monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
581
+ monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
582
+
583
+ client = InferenceClient()
584
+ req = InferenceRequest(
585
+ messages=[{"role": "user", "content": "Find the roots and explain why."}],
586
+ task_type="chat",
587
+ )
588
+
589
+ selected_model, source = client._resolve_primary_model(req)
590
+ model_chain = client._model_chain_for_task("chat", selected_model)
591
+
592
+ assert selected_model == "deepseek-chat"
593
+ assert "chat_temp_override_env" in source
594
+ assert model_chain == ["deepseek-chat"]
595
+
596
+ def test_chat_temp_override_does_not_change_non_chat_task_under_lock(self, monkeypatch):
597
+ monkeypatch.setenv("INFERENCE_CHAT_MODEL_TEMP_OVERRIDE", "deepseek-chat")
598
+ monkeypatch.setenv("INFERENCE_ENFORCE_LOCK_MODEL", "true")
599
+ monkeypatch.setenv("INFERENCE_LOCK_MODEL_ID", "deepseek-reasoner")
600
+
601
+ client = InferenceClient()
602
+ req = InferenceRequest(
603
+ messages=[{"role": "user", "content": "Check if my solution is correct."}],
604
+ task_type="verify_solution",
605
+ )
606
+
607
+ selected_model, source = client._resolve_primary_model(req)
608
+ model_chain = client._model_chain_for_task("verify_solution", selected_model)
609
+
610
+ assert selected_model == "deepseek-reasoner"
611
+ assert "chat_temp_override_env" not in source
612
+ assert model_chain == ["deepseek-reasoner"]
613
+
614
+
615
+ # ─── Risk Prediction ──────────────────────────────────────────
616
+
617
+
618
+ class TestRiskPrediction:
619
+ @patch("main.get_deepseek_client")
620
+ def test_predict_risk_success(self, mock_ds_fn):
621
+ mock_ds_fn.return_value = make_deepseek_risk_mock()
622
+ response = client.post("/api/predict-risk", json={
623
+ "engagementScore": 80,
624
+ "avgQuizScore": 75,
625
+ "attendance": 90,
626
+ "assignmentCompletion": 85,
627
+ })
628
+ assert response.status_code == 200
629
+ data = response.json()
630
+ assert data["riskLevel"] in ("High", "Medium", "Low")
631
+ assert 0 <= data["confidence"] <= 1
632
+
633
+ def test_predict_risk_invalid_score_range(self):
634
+ response = client.post("/api/predict-risk", json={
635
+ "engagementScore": 150,
636
+ "avgQuizScore": 75,
637
+ "attendance": 90,
638
+ "assignmentCompletion": 85,
639
+ })
640
+ assert response.status_code == 422
641
+
642
+ def test_predict_risk_negative_score(self):
643
+ response = client.post("/api/predict-risk", json={
644
+ "engagementScore": -5,
645
+ "avgQuizScore": 75,
646
+ "attendance": 90,
647
+ "assignmentCompletion": 85,
648
+ })
649
+ assert response.status_code == 422
650
+
651
+ def test_predict_risk_missing_fields(self):
652
+ response = client.post("/api/predict-risk", json={
653
+ "engagementScore": 80,
654
+ })
655
+ assert response.status_code == 422
656
+
657
+ @patch("main.get_deepseek_client")
658
+ def test_predict_risk_ai_failure(self, mock_ds_fn):
659
+ mock_client = MagicMock()
660
+ mock_client.chat.completions.create.side_effect = Exception("AI down")
661
+ mock_ds_fn.return_value = mock_client
662
+ response = client.post("/api/predict-risk", json={
663
+ "engagementScore": 80,
664
+ "avgQuizScore": 75,
665
+ "attendance": 90,
666
+ "assignmentCompletion": 85,
667
+ })
668
+ assert response.status_code == 502
669
+
670
+ @patch("main.get_deepseek_client")
671
+ def test_batch_risk_prediction(self, mock_ds_fn):
672
+ mock_ds_fn.return_value = make_deepseek_risk_mock()
673
+ response = client.post("/api/predict-risk/batch", json={
674
+ "students": [
675
+ {"engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "assignmentCompletion": 85},
676
+ {"engagementScore": 30, "avgQuizScore": 40, "attendance": 50, "assignmentCompletion": 35},
677
+ ],
678
+ })
679
+ assert response.status_code == 200
680
+ assert len(response.json()) == 2
681
+
682
+
683
+ # ─── Learning Path ────────────────────────────────────────────
684
+
685
+
686
+ class TestLearningPath:
687
+ @patch("main.call_hf_chat")
688
+ def test_learning_path_success(self, mock_chat):
689
+ mock_chat.return_value = "1. Review fractions\n2. Practice decimals"
690
+ response = client.post("/api/learning-path", json={
691
+ "weaknesses": ["fractions", "decimals"],
692
+ "gradeLevel": "Grade 11",
693
+ })
694
+ assert response.status_code == 200
695
+ assert "fractions" in response.json()["learningPath"].lower()
696
+
697
+ def test_learning_path_missing_weaknesses(self):
698
+ response = client.post("/api/learning-path", json={
699
+ "gradeLevel": "Grade 11",
700
+ })
701
+ assert response.status_code == 422
702
+
703
+ def test_learning_path_missing_grade(self):
704
+ response = client.post("/api/learning-path", json={
705
+ "weaknesses": ["fractions"],
706
+ })
707
+ assert response.status_code == 422
708
+
709
+ @patch("main.call_hf_chat")
710
+ def test_learning_path_ai_failure(self, mock_chat):
711
+ mock_chat.side_effect = Exception("AI service down")
712
+ response = client.post("/api/learning-path", json={
713
+ "weaknesses": ["algebra"],
714
+ "gradeLevel": "Grade 11",
715
+ })
716
+ assert response.status_code == 502
717
+
718
+
719
+ # ─── Daily Insight ─────────────────────────────────────────────
720
+
721
+
722
+ class TestDailyInsight:
723
+ @patch("main.call_hf_chat")
724
+ def test_daily_insight_success(self, mock_chat):
725
+ mock_chat.return_value = "Class is doing well."
726
+ response = client.post("/api/analytics/daily-insight", json={
727
+ "students": [
728
+ {"name": "Alice", "engagementScore": 80, "avgQuizScore": 75, "attendance": 90, "riskLevel": "Low"},
729
+ ],
730
+ })
731
+ assert response.status_code == 200
732
+ assert response.json()["insight"]
733
+
734
+ def test_daily_insight_empty_students(self):
735
+ response = client.post("/api/analytics/daily-insight", json={
736
+ "students": [],
737
+ })
738
+ assert response.status_code == 200
739
+ assert "No student data" in response.json()["insight"]
740
+
741
+
742
+ # ─── Quiz Topics ───────────────────────────────────────────────
743
+
744
+
745
+ class TestQuizTopics:
746
+ def test_get_all_topics(self):
747
+ response = client.get("/api/quiz/topics")
748
+ assert response.status_code == 200
749
+ assert "allTopics" in response.json()
750
+
751
+ def test_get_topics_by_grade(self):
752
+ response = client.get("/api/quiz/topics?gradeLevel=Grade%2011")
753
+ assert response.status_code == 200
754
+ data = response.json()
755
+ assert data["gradeLevel"] == "Grade 11"
756
+ assert "topics" in data
757
+
758
+ def test_get_topics_invalid_grade(self):
759
+ response = client.get("/api/quiz/topics?gradeLevel=Grade%2099")
760
+ assert response.status_code == 404
761
+
762
+
763
+ # ─── Quiz Generation ──────────────────────────────────────────
764
+
765
+
766
+ class TestQuizGeneration:
767
+ @patch("main.call_hf_chat")
768
+ def test_generate_quiz_success(self, mock_chat):
769
+ quiz_json = json.dumps([{
770
+ "questionType": "multiple_choice",
771
+ "question": "What is 2+2?",
772
+ "correctAnswer": "4",
773
+ "options": ["A) 3", "B) 4", "C) 5", "D) 6"],
774
+ "bloomLevel": "remember",
775
+ "difficulty": "easy",
776
+ "topic": "Arithmetic",
777
+ "points": 1,
778
+ "explanation": "2+2=4",
779
+ }])
780
+ mock_chat.return_value = quiz_json
781
+
782
+ response = client.post("/api/quiz/generate", json={
783
+ "topics": ["Arithmetic"],
784
+ "gradeLevel": "Grade 11",
785
+ "numQuestions": 1,
786
+ })
787
+ assert response.status_code == 200
788
+ data = response.json()
789
+ assert len(data["questions"]) >= 1
790
+ assert data["totalPoints"] > 0
791
+
792
+ def test_generate_quiz_missing_topics(self):
793
+ response = client.post("/api/quiz/generate", json={
794
+ "gradeLevel": "Grade 11",
795
+ })
796
+ assert response.status_code == 422
797
+
798
+
799
+ class TestClassRecordImportMapping:
800
+ def test_sanitize_column_mapping_drops_none_and_unknown_fields(self):
801
+ raw_mapping = {
802
+ "Student Name": "name",
803
+ "Grade Level": None,
804
+ "Section": "",
805
+ "General Mathematics": None,
806
+ "Custom": "not_a_supported_field",
807
+ "Average": "avgQuizScore",
808
+ }
809
+
810
+ sanitized = main_module._sanitize_column_mapping(raw_mapping)
811
+
812
+ assert sanitized == {
813
+ "Student Name": "name",
814
+ "Average": "avgQuizScore",
815
+ }
816
+
817
+ @patch("main.call_hf_chat")
818
+ def test_generate_quiz_bad_llm_output(self, mock_chat):
819
+ mock_chat.return_value = "This is not valid JSON at all."
820
+ response = client.post("/api/quiz/generate", json={
821
+ "topics": ["Algebra"],
822
+ "gradeLevel": "Grade 11",
823
+ "numQuestions": 1,
824
+ })
825
+ assert response.status_code == 500
826
+
827
+ @patch("main.call_hf_chat")
828
+ def test_preview_quiz(self, mock_chat):
829
+ quiz_json = json.dumps([{
830
+ "questionType": "identification",
831
+ "question": "Define slope.",
832
+ "correctAnswer": "Rise over run",
833
+ "bloomLevel": "remember",
834
+ "difficulty": "easy",
835
+ "topic": "Algebra",
836
+ "points": 1,
837
+ "explanation": "Slope = rise/run.",
838
+ }])
839
+ mock_chat.return_value = quiz_json
840
+ response = client.post("/api/quiz/preview", json={
841
+ "topics": ["Algebra"],
842
+ "gradeLevel": "Grade 11",
843
+ })
844
+ assert response.status_code == 200
845
+
846
+ @patch("main.call_hf_chat")
847
+ def test_generate_quiz_accepts_new_max_limits(self, mock_chat):
848
+ max_questions = main_module.MAX_QUESTIONS_LIMIT
849
+ quiz_json = json.dumps([
850
+ {
851
+ "questionType": "identification",
852
+ "question": f"Question {i + 1}",
853
+ "correctAnswer": "Answer",
854
+ "bloomLevel": "remember",
855
+ "difficulty": "easy",
856
+ "topic": "Algebra",
857
+ "points": 1,
858
+ "explanation": "Because.",
859
+ }
860
+ for i in range(max_questions)
861
+ ])
862
+ mock_chat.return_value = quiz_json
863
+
864
+ response = client.post("/api/quiz/generate", json={
865
+ "topics": [f"Topic {i + 1}" for i in range(main_module.MAX_TOPICS_LIMIT)],
866
+ "gradeLevel": "Grade 11",
867
+ "numQuestions": max_questions,
868
+ })
869
+
870
+ assert response.status_code == 200
871
+ data = response.json()
872
+ assert len(data["questions"]) == max_questions
873
+
874
+ def test_generate_quiz_rejects_over_max_questions(self):
875
+ response = client.post("/api/quiz/generate", json={
876
+ "topics": ["Algebra"],
877
+ "gradeLevel": "Grade 11",
878
+ "numQuestions": main_module.MAX_QUESTIONS_LIMIT + 1,
879
+ })
880
+
881
+ assert response.status_code == 422
882
+
883
+
884
+ class TestUploadClassRecordsGuardrails:
885
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
886
+ def test_upload_class_records_rejects_unsupported_dataset_intent(self, _mock_chat):
887
+ files = {
888
+ "files": ("records.csv", b"name,lrn,email,avgQuizScore,attendance,engagementScore,assignmentCompletion\nAna,1001,ana@example.com,80,90,85,88\n", "text/csv"),
889
+ }
890
+ response = client.post(
891
+ "/api/upload/class-records",
892
+ files=files,
893
+ data={"datasetIntent": "unsupported_intent"},
894
+ )
895
+
896
+ assert response.status_code == 400
897
+ assert "Unsupported datasetIntent" in response.json()["detail"]
898
+
899
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
900
+ def test_upload_class_records_warns_when_preferred_core_fields_missing(self, _mock_chat):
901
+ files = {
902
+ "files": (
903
+ "records.csv",
904
+ b"name,lrn,email,attendance\nAna,1001,ana@example.com,90\n",
905
+ "text/csv",
906
+ ),
907
+ }
908
+ response = client.post(
909
+ "/api/upload/class-records",
910
+ files=files,
911
+ data={"datasetIntent": "synthetic_student_records"},
912
+ )
913
+
914
+ assert response.status_code == 200
915
+ payload = response.json()
916
+ assert payload["success"] is True
917
+ assert payload["summary"]["failedFiles"] == 0
918
+ assert payload["summary"]["partialSuccessFiles"] == 1
919
+ combined_warnings = " ".join(payload.get("warnings", []))
920
+ assert "Missing preferred educational columns" in combined_warnings
921
+
922
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
923
+ def test_upload_class_records_returns_interpretation_metadata(self, _mock_chat):
924
+ files = {
925
+ "files": (
926
+ "records.csv",
927
+ (
928
+ b"name,lrn,email,avgQuizScore,attendance,engagementScore,assignmentCompletion,patient_diagnosis\n"
929
+ b"Ana,1001,ana@example.com,80,90,85,88,none\n"
930
+ ),
931
+ "text/csv",
932
+ ),
933
+ }
934
+ response = client.post(
935
+ "/api/upload/class-records",
936
+ files=files,
937
+ data={"datasetIntent": "synthetic_student_records"},
938
+ )
939
+
940
+ assert response.status_code == 200
941
+ payload = response.json()
942
+ assert payload["success"] is True
943
+ assert payload["datasetIntent"] == "synthetic_student_records"
944
+ assert isinstance(payload.get("columnInterpretations"), list)
945
+ summary = payload.get("interpretationSummary") or {}
946
+ assert summary.get("storageOnlyColumns", 0) >= 1
947
+ assert summary.get("domainMismatchWarnings", 0) >= 1
948
+ class_metadata = payload.get("classMetadata") or {}
949
+ assert class_metadata.get("classSectionId")
950
+ assert class_metadata.get("className")
951
+ assert class_metadata.get("grade")
952
+ assert class_metadata.get("section")
953
+ assert class_metadata.get("gradeLevel")
954
+ assert class_metadata.get("classification")
955
+
956
+ patient_column = next(
957
+ (item for item in payload["columnInterpretations"] if item.get("columnName") == "patient_diagnosis"),
958
+ None,
959
+ )
960
+ assert patient_column is not None
961
+ assert patient_column["usagePolicy"] == "storage_only"
962
+ assert patient_column["confidenceBand"] == "low"
963
+
964
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
965
+ def test_upload_class_records_accepts_minimal_teacher_schema(self, _mock_chat):
966
+ files = {
967
+ "files": (
968
+ "records.csv",
969
+ (
970
+ b"name,lrn,avgQuizScore,attendance,engagementScore\n"
971
+ b"Ana Cruz,1001,81,92,88\n"
972
+ b"Ben Dela,1002,58,70,52\n"
973
+ ),
974
+ "text/csv",
975
+ ),
976
+ }
977
+
978
+ response = client.post(
979
+ "/api/upload/class-records",
980
+ files=files,
981
+ data={"datasetIntent": "synthetic_student_records"},
982
+ )
983
+
984
+ assert response.status_code == 200
985
+ payload = response.json()
986
+ assert payload["success"] is True
987
+ assert payload["interpretedRows"] == 2
988
+ assert payload["rejectedRows"] == 0
989
+ assert payload["inferredStateCoverage"]["inferredRows"] == 2
990
+ assert payload["inferredStateCoverage"]["coveragePct"] == 100.0
991
+ assert all("inferredState" in row for row in payload["students"])
992
+ class_metadata = payload.get("classMetadata") or {}
993
+ assert class_metadata.get("classSectionId")
994
+ assert class_metadata.get("className")
995
+ assert class_metadata.get("grade") == "Grade 11"
996
+ assert class_metadata.get("section") == "Section A"
997
+ assert class_metadata.get("gradeLevel") == "Grade 11"
998
+ assert class_metadata.get("classification") == "Senior High School"
999
+
1000
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
1001
+ def test_upload_class_records_reports_explicit_row_rejections(self, _mock_chat):
1002
+ files = {
1003
+ "files": (
1004
+ "records.csv",
1005
+ (
1006
+ b"name,lrn,email,avgQuizScore,attendance,engagementScore\n"
1007
+ b",1001,ana@example.com,81,92,88\n"
1008
+ b"Ben Dela,,,58,70,52\n"
1009
+ b"Cara Lim,1003,,77,83,75\n"
1010
+ ),
1011
+ "text/csv",
1012
+ ),
1013
+ }
1014
+
1015
+ response = client.post(
1016
+ "/api/upload/class-records",
1017
+ files=files,
1018
+ data={"datasetIntent": "synthetic_student_records"},
1019
+ )
1020
+
1021
+ assert response.status_code == 200
1022
+ payload = response.json()
1023
+ assert payload["success"] is True
1024
+ assert payload["interpretedRows"] == 1
1025
+ assert payload["rejectedRows"] == 2
1026
+ reasons = payload.get("rejectedReasons") or {}
1027
+ assert any("missing required field: name" in key for key in reasons.keys())
1028
+ assert any("missing required identity value: lrn_or_email" in key for key in reasons.keys())
1029
+ assert len(payload.get("rejectedRowDetails") or []) == 2
1030
+
1031
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
1032
+ def test_upload_class_records_degrades_gracefully_when_firestore_adc_missing(self, _mock_chat):
1033
+ class _FailingFirestoreModule:
1034
+ def client(self):
1035
+ raise Exception(
1036
+ "Your default credentials were not found. "
1037
+ "To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc"
1038
+ )
1039
+
1040
+ files = {
1041
+ "files": (
1042
+ "records.csv",
1043
+ (
1044
+ b"name,lrn,avgQuizScore,attendance,engagementScore\n"
1045
+ b"Ana Cruz,1001,81,92,88\n"
1046
+ ),
1047
+ "text/csv",
1048
+ ),
1049
+ }
1050
+
1051
+ with patch.object(main_module, "firebase_firestore", _FailingFirestoreModule()), patch.object(main_module, "_firebase_ready", True):
1052
+ response = client.post(
1053
+ "/api/upload/class-records",
1054
+ files=files,
1055
+ data={"datasetIntent": "synthetic_student_records"},
1056
+ )
1057
+
1058
+ assert response.status_code == 200
1059
+ payload = response.json()
1060
+ assert payload["success"] is True
1061
+ assert payload["persisted"] is False
1062
+ assert (payload.get("dashboardSync") or {}).get("synced") is False
1063
+ warnings_blob = " ".join(payload.get("warnings", []))
1064
+ assert "adc is not configured" in warnings_blob.lower()
1065
+
1066
+
1067
+ class TestImportedOverviewAndTopicMastery:
1068
+ def test_imported_class_overview_returns_inferred_state_for_realistic_minimal_records(self):
1069
+ firestore = _FakeFirestoreModule(
1070
+ {
1071
+ "normalizedClassRecords": [
1072
+ {
1073
+ "teacherId": "test-teacher-uid",
1074
+ "name": "Ana Cruz",
1075
+ "lrn": "1001",
1076
+ "classSectionId": "grade11_a",
1077
+ "className": "Grade 11 - A",
1078
+ "avgQuizScore": 92,
1079
+ "attendance": 96,
1080
+ "engagementScore": 91,
1081
+ "unknownFields": {},
1082
+ },
1083
+ {
1084
+ "teacherId": "test-teacher-uid",
1085
+ "name": "Ben Dela",
1086
+ "lrn": "1002",
1087
+ "classSectionId": "grade11_a",
1088
+ "className": "Grade 11 - A",
1089
+ "avgQuizScore": 68,
1090
+ "attendance": 82,
1091
+ "engagementScore": 66,
1092
+ "unknownFields": {},
1093
+ },
1094
+ {
1095
+ "teacherId": "test-teacher-uid",
1096
+ "name": "Cara Lim",
1097
+ "lrn": "1003",
1098
+ "classSectionId": "grade11_a",
1099
+ "className": "Grade 11 - A",
1100
+ "avgQuizScore": 49,
1101
+ "attendance": 71,
1102
+ "engagementScore": 50,
1103
+ "unknownFields": {},
1104
+ },
1105
+ ]
1106
+ }
1107
+ )
1108
+
1109
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
1110
+ response = client.get("/api/analytics/imported-class-overview?classSectionId=grade11_a&limit=100")
1111
+
1112
+ assert response.status_code == 200
1113
+ payload = response.json()
1114
+ assert payload["success"] is True
1115
+ assert len(payload["students"]) == 3
1116
+ coverage = payload.get("inferredStateCoverage") or {}
1117
+ assert coverage.get("inferredRows") == 3
1118
+ assert coverage.get("coveragePct") == 100.0
1119
+
1120
+ risk_levels = {student["riskLevel"] for student in payload["students"]}
1121
+ assert risk_levels == {"Low", "Medium", "High"}
1122
+ assert all(student.get("inferredState") for student in payload["students"])
1123
+ assert all("stateConfidence" in student for student in payload["students"])
1124
+ assert all(student.get("classMetadata") for student in payload["students"])
1125
+ assert all(student.get("classMetadata", {}).get("classSectionId") == "grade11_a" for student in payload["students"])
1126
+ assert all(student.get("classMetadata", {}).get("gradeLevel") for student in payload["students"])
1127
+ assert all(student.get("classMetadata", {}).get("classification") for student in payload["students"])
1128
+ assert all(classroom.get("classMetadata") for classroom in payload["classrooms"])
1129
+ assert all(classroom.get("classMetadata", {}).get("classSectionId") == "grade11_a" for classroom in payload["classrooms"])
1130
+ assert all(classroom.get("classMetadata", {}).get("gradeLevel") for classroom in payload["classrooms"])
1131
+ assert all(classroom.get("classMetadata", {}).get("classification") for classroom in payload["classrooms"])
1132
+
1133
+ def test_imported_class_overview_returns_503_when_firestore_adc_missing(self):
1134
+ firestore = _FakeFirestoreModule(
1135
+ {"normalizedClassRecords": []},
1136
+ stream_error=(
1137
+ "Your default credentials were not found. "
1138
+ "To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc"
1139
+ ),
1140
+ )
1141
+
1142
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
1143
+ response = client.get("/api/analytics/imported-class-overview?classSectionId=grade11_a&limit=100")
1144
+
1145
+ assert response.status_code == 503
1146
+ detail = str((response.json() or {}).get("detail") or "").lower()
1147
+ assert "firestore adc is not configured" in detail
1148
+ assert "google_application_credentials" in detail
1149
+
1150
+ def test_topic_mastery_reports_fallback_warning_without_topic_columns(self):
1151
+ firestore = _FakeFirestoreModule(
1152
+ {
1153
+ "normalizedClassRecords": [
1154
+ {
1155
+ "teacherId": "test-teacher-uid",
1156
+ "name": "Ana Cruz",
1157
+ "lrn": "1001",
1158
+ "classSectionId": "grade11_a",
1159
+ "className": "Grade 11 - A",
1160
+ "avgQuizScore": 84,
1161
+ "attendance": 92,
1162
+ "engagementScore": 88,
1163
+ "assessmentName": "general-assessment",
1164
+ "unknownFields": {},
1165
+ }
1166
+ ],
1167
+ "courseMaterials": [],
1168
+ }
1169
+ )
1170
+
1171
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
1172
+ response = client.get("/api/analytics/topic-mastery?teacherId=test-teacher-uid&classSectionId=grade11_a")
1173
+
1174
+ assert response.status_code == 200
1175
+ payload = response.json()
1176
+ assert payload["summary"]["totalTopicsTracked"] >= 1
1177
+ assert payload["summary"].get("fallbackTopicRows") == 1
1178
+ assert any("fallback topic context" in warning.lower() for warning in payload.get("warnings") or [])
1179
+
1180
+
1181
+ class TestAsyncGenerationTasks:
1182
+ @patch("main.asyncio.create_task")
1183
+ def test_quiz_generate_async_submit_status_list_cancel(self, mock_create_task):
1184
+ main_module._async_tasks.clear()
1185
+ mock_create_task.side_effect = lambda coro: coro.close()
1186
+ response = client.post("/api/quiz/generate-async", json={
1187
+ "topics": ["Algebra"],
1188
+ "gradeLevel": "Grade 11",
1189
+ "numQuestions": 1,
1190
+ })
1191
+
1192
+ assert response.status_code == 200
1193
+ payload = response.json()
1194
+ task_id = payload["taskId"]
1195
+ assert payload["status"] == "queued"
1196
+ assert mock_create_task.called
1197
+
1198
+ status_response = client.get(f"/api/tasks/{task_id}")
1199
+ assert status_response.status_code == 200
1200
+ status_payload = status_response.json()
1201
+ assert status_payload["taskId"] == task_id
1202
+ assert status_payload["status"] in {"queued", "running", "cancelling", "cancelled", "completed", "failed"}
1203
+
1204
+ list_response = client.get("/api/tasks?limit=20")
1205
+ assert list_response.status_code == 200
1206
+ list_payload = list_response.json()
1207
+ assert list_payload["count"] >= 1
1208
+ assert any(item["taskId"] == task_id for item in list_payload["tasks"])
1209
+
1210
+ cancel_response = client.post(f"/api/tasks/{task_id}/cancel")
1211
+ assert cancel_response.status_code == 200
1212
+ cancel_payload = cancel_response.json()
1213
+ assert cancel_payload["taskId"] == task_id
1214
+ assert cancel_payload["status"] in {"cancelled", "cancelling"}
1215
+
1216
+ def test_inference_metrics_requires_admin(self):
1217
+ response = client.get("/api/ops/inference-metrics")
1218
+ assert response.status_code == 403
1219
+
1220
+ @patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1221
+ "uid": "admin-uid",
1222
+ "email": "admin@example.com",
1223
+ "role": "admin",
1224
+ })
1225
+ def test_inference_metrics_admin_success(self, _mock_verify):
1226
+ response = client.get("/api/ops/inference-metrics")
1227
+ assert response.status_code == 200
1228
+ payload = response.json()
1229
+ assert payload["success"] is True
1230
+ assert "metrics" in payload
1231
+ assert "requests_total" in payload["metrics"]
1232
+
1233
+
1234
+ # ─── Calculator ────────────────────────────────────────────────
1235
+
1236
+
1237
+ class TestCalculator:
1238
+ def test_evaluate_simple_expression(self):
1239
+ response = client.post("/api/calculator/evaluate", json={
1240
+ "expression": "2 + 3",
1241
+ })
1242
+ # sympy may not be installed in test env — accept 200 or 500
1243
+ assert response.status_code in (200, 500)
1244
+ if response.status_code == 200:
1245
+ data = response.json()
1246
+ assert data["result"] == "5"
1247
+
1248
+ def test_evaluate_with_variables(self):
1249
+ response = client.post("/api/calculator/evaluate", json={
1250
+ "expression": "x**2 + 2*x + 1",
1251
+ })
1252
+ # Accept 200 (sympy available) or 500 (sympy missing)
1253
+ assert response.status_code in (200, 500)
1254
+
1255
+ def test_evaluate_dangerous_expression(self):
1256
+ response = client.post("/api/calculator/evaluate", json={
1257
+ "expression": "__import__('os').system('rm -rf /')",
1258
+ })
1259
+ # 400 if validation catches it, 500 if sympy missing or general error
1260
+ assert response.status_code in (400, 500)
1261
+
1262
+ def test_evaluate_empty_expression(self):
1263
+ response = client.post("/api/calculator/evaluate", json={
1264
+ "expression": "",
1265
+ })
1266
+ assert response.status_code == 422
1267
+
1268
+ def test_evaluate_too_long_expression(self):
1269
+ response = client.post("/api/calculator/evaluate", json={
1270
+ "expression": "x + " * 200,
1271
+ })
1272
+ # 400 if length validation, 422 if pydantic validation, 500 if sympy missing
1273
+ assert response.status_code in (400, 422, 500)
1274
+
1275
+
1276
+ # ─── Error Handling ────────────────────────────────────────────
1277
+
1278
+
1279
+ class TestErrorHandling:
1280
+ def test_404_for_unknown_endpoint(self):
1281
+ response = client.get("/api/nonexistent")
1282
+ assert response.status_code == 404
1283
+
1284
+ def test_method_not_allowed(self):
1285
+ response = client.get("/api/chat")
1286
+ assert response.status_code == 405
1287
+
1288
+ def test_request_id_in_error_response(self):
1289
+ response = client.get("/api/nonexistent")
1290
+ assert "x-request-id" in response.headers
1291
+
1292
+ def test_invalid_json_body(self):
1293
+ response = client.post(
1294
+ "/api/chat",
1295
+ content="this is not json",
1296
+ headers={"Content-Type": "application/json"},
1297
+ )
1298
+ assert response.status_code == 422
1299
+
1300
+
1301
+ # ─── Student Competency ───────────────────────────────────────
1302
+
1303
+
1304
+ class TestStudentCompetency:
1305
+ @patch("main.call_hf_chat")
1306
+ def test_competency_no_history(self, mock_chat):
1307
+ mock_chat.return_value = ""
1308
+ response = client.post("/api/quiz/student-competency", json={
1309
+ "studentId": "student123",
1310
+ "quizHistory": [],
1311
+ })
1312
+ assert response.status_code == 200
1313
+ data = response.json()
1314
+ assert data["studentId"] == "student123"
1315
+ assert data["competencies"] == []
1316
+
1317
+ @patch("main.call_hf_chat")
1318
+ def test_competency_with_history(self, mock_chat):
1319
+ mock_chat.return_value = "Good progress overall."
1320
+ response = client.post("/api/quiz/student-competency", json={
1321
+ "studentId": "student123",
1322
+ "quizHistory": [
1323
+ {"topic": "Algebra", "score": 8, "total": 10, "timeTaken": 300},
1324
+ {"topic": "Algebra", "score": 9, "total": 10, "timeTaken": 250},
1325
+ {"topic": "Geometry", "score": 4, "total": 10, "timeTaken": 500},
1326
+ ],
1327
+ })
1328
+ assert response.status_code == 200
1329
+ data = response.json()
1330
+ assert len(data["competencies"]) > 0
1331
+ # Algebra should be higher competency than Geometry
1332
+ algebra = next((c for c in data["competencies"] if c["topic"] == "Algebra"), None)
1333
+ geometry = next((c for c in data["competencies"] if c["topic"] == "Geometry"), None)
1334
+ if algebra and geometry:
1335
+ assert algebra["efficiencyScore"] > geometry["efficiencyScore"]
1336
+
1337
+
1338
+ # ─── Course Materials Recent Retrieval ───────────────────────
1339
+
1340
+
1341
+ class _FakeDocSnapshot:
1342
+ def __init__(self, doc_id: str, data: Dict[str, Any]):
1343
+ self.id = doc_id
1344
+ self._data = data
1345
+
1346
+ def to_dict(self) -> Dict[str, Any]:
1347
+ return self._data
1348
+
1349
+
1350
+ class _FakeQuery:
1351
+ def __init__(self, docs: List[Dict[str, Any]], fail_order: bool = False, stream_error: str | None = None):
1352
+ self._docs = docs
1353
+ self._filters: List[tuple[str, str, Any]] = []
1354
+ self._limit: int | None = None
1355
+ self._fail_order = fail_order
1356
+ self._stream_error = stream_error
1357
+
1358
+ def where(self, field: str, op: str, value: Any):
1359
+ self._filters.append((field, op, value))
1360
+ return self
1361
+
1362
+ def order_by(self, *args, **kwargs):
1363
+ if self._fail_order:
1364
+ raise Exception("missing composite index")
1365
+ return self
1366
+
1367
+ def limit(self, value: int):
1368
+ self._limit = value
1369
+ return self
1370
+
1371
+ def stream(self):
1372
+ if self._stream_error:
1373
+ raise Exception(self._stream_error)
1374
+
1375
+ filtered: List[Dict[str, Any]] = []
1376
+ for doc in self._docs:
1377
+ include = True
1378
+ for field, op, expected in self._filters:
1379
+ if op != "==":
1380
+ continue
1381
+ if doc.get(field) != expected:
1382
+ include = False
1383
+ break
1384
+ if include:
1385
+ filtered.append(doc)
1386
+
1387
+ if self._limit is not None:
1388
+ filtered = filtered[: self._limit]
1389
+
1390
+ return [_FakeDocSnapshot(str(doc.get("materialId") or "doc"), doc) for doc in filtered]
1391
+
1392
+
1393
+ class _FakeCollection:
1394
+ def __init__(
1395
+ self,
1396
+ name: str,
1397
+ store: Dict[str, List[Dict[str, Any]]],
1398
+ audit_logs: List[Dict[str, Any]],
1399
+ fail_order: bool = False,
1400
+ stream_error: str | None = None,
1401
+ ):
1402
+ self._name = name
1403
+ self._store = store
1404
+ self._audit_logs = audit_logs
1405
+ self._fail_order = fail_order
1406
+ self._stream_error = stream_error
1407
+
1408
+ def where(self, field: str, op: str, value: Any):
1409
+ docs = list(self._store.get(self._name, []))
1410
+ query = _FakeQuery(docs, fail_order=self._fail_order, stream_error=self._stream_error)
1411
+ return query.where(field, op, value)
1412
+
1413
+ def add(self, payload: Dict[str, Any]):
1414
+ self._audit_logs.append(payload)
1415
+ return (None, None)
1416
+
1417
+
1418
+ class _FakeFirestoreClient:
1419
+ def __init__(self, store: Dict[str, List[Dict[str, Any]]], fail_order: bool = False, stream_error: str | None = None):
1420
+ self._store = store
1421
+ self.audit_logs: List[Dict[str, Any]] = []
1422
+ self._fail_order = fail_order
1423
+ self._stream_error = stream_error
1424
+
1425
+ def collection(self, name: str):
1426
+ return _FakeCollection(
1427
+ name,
1428
+ self._store,
1429
+ self.audit_logs,
1430
+ fail_order=self._fail_order,
1431
+ stream_error=self._stream_error,
1432
+ )
1433
+
1434
+
1435
+ class _FakeFirestoreModule:
1436
+ class Query:
1437
+ DESCENDING = "DESCENDING"
1438
+
1439
+ SERVER_TIMESTAMP = object()
1440
+
1441
+ def __init__(
1442
+ self,
1443
+ store: Dict[str, List[Dict[str, Any]]],
1444
+ fail_order: bool = False,
1445
+ stream_error: str | None = None,
1446
+ ):
1447
+ self._client = _FakeFirestoreClient(store, fail_order=fail_order, stream_error=stream_error)
1448
+
1449
+ def client(self):
1450
+ return self._client
1451
+
1452
+
1453
+ class TestRecentCourseMaterials:
1454
+ def test_recent_course_materials_respects_class_section_filter(self):
1455
+ now = int(time.time())
1456
+ firestore = _FakeFirestoreModule(
1457
+ {
1458
+ "courseMaterials": [
1459
+ {
1460
+ "materialId": "mat-a",
1461
+ "teacherId": "test-teacher-uid",
1462
+ "fileName": "algebra-a.pdf",
1463
+ "fileType": "pdf",
1464
+ "classSectionId": "grade11_a",
1465
+ "topics": [{"title": "Linear Equations"}],
1466
+ "extractedTextLength": 1200,
1467
+ "retentionDays": 180,
1468
+ "expiresAtEpoch": now + 3600,
1469
+ },
1470
+ {
1471
+ "materialId": "mat-b",
1472
+ "teacherId": "test-teacher-uid",
1473
+ "fileName": "algebra-b.pdf",
1474
+ "fileType": "pdf",
1475
+ "classSectionId": "grade11_b",
1476
+ "topics": [{"title": "Quadratics"}],
1477
+ "extractedTextLength": 1600,
1478
+ "retentionDays": 180,
1479
+ "expiresAtEpoch": now + 3600,
1480
+ },
1481
+ ]
1482
+ }
1483
+ )
1484
+
1485
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
1486
+ response = client.get("/api/upload/course-materials/recent?classSectionId=grade11_a&limit=10")
1487
+
1488
+ assert response.status_code == 200
1489
+ data = response.json()
1490
+ assert data["success"] is True
1491
+ assert data["classSectionId"] == "grade11_a"
1492
+ assert len(data["materials"]) == 1
1493
+ assert data["materials"][0]["materialId"] == "mat-a"
1494
+ assert all(item["classSectionId"] == "grade11_a" for item in data["materials"])
1495
+
1496
+ def test_recent_course_materials_reports_retention_exclusions(self):
1497
+ now = int(time.time())
1498
+ firestore = _FakeFirestoreModule(
1499
+ {
1500
+ "courseMaterials": [
1501
+ {
1502
+ "materialId": "mat-valid",
1503
+ "teacherId": "test-teacher-uid",
1504
+ "fileName": "active.txt",
1505
+ "fileType": "txt",
1506
+ "classSectionId": "grade11_a",
1507
+ "topics": [{"title": "Functions"}],
1508
+ "extractedTextLength": 900,
1509
+ "retentionDays": 180,
1510
+ "expiresAtEpoch": now + 7200,
1511
+ },
1512
+ {
1513
+ "materialId": "mat-expired",
1514
+ "teacherId": "test-teacher-uid",
1515
+ "fileName": "expired.txt",
1516
+ "fileType": "txt",
1517
+ "classSectionId": "grade11_a",
1518
+ "topics": [{"title": "Inequalities"}],
1519
+ "extractedTextLength": 700,
1520
+ "retentionDays": 30,
1521
+ "expiresAtEpoch": now - 60,
1522
+ },
1523
+ ]
1524
+ },
1525
+ fail_order=True,
1526
+ )
1527
+
1528
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True):
1529
+ response = client.get("/api/upload/course-materials/recent?classSectionId=grade11_a&limit=10")
1530
+
1531
+ assert response.status_code == 200
1532
+ data = response.json()
1533
+ assert len(data["materials"]) == 1
1534
+ assert data["materials"][0]["materialId"] == "mat-valid"
1535
+ warning_text = " ".join(data.get("warnings", []))
1536
+ assert "expired course-material artifact" in warning_text.lower()
1537
+ assert "fallback query path" in warning_text.lower()
1538
+
1539
+
1540
+ # ─── Student Account Provisioning ───────────────────────────
1541
+
1542
+
1543
+ class _ProvisionDocSnapshot:
1544
+ def __init__(self, doc_id: str, data: Dict[str, Any] | None):
1545
+ self.id = doc_id
1546
+ self._data = data
1547
+
1548
+ @property
1549
+ def exists(self) -> bool:
1550
+ return self._data is not None
1551
+
1552
+ def to_dict(self) -> Dict[str, Any]:
1553
+ return dict(self._data or {})
1554
+
1555
+
1556
+ class _ProvisionDocumentRef:
1557
+ def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]], collection_name: str, doc_id: str):
1558
+ self._store = store
1559
+ self._collection_name = collection_name
1560
+ self._doc_id = doc_id
1561
+
1562
+ def get(self):
1563
+ data = self._store.get(self._collection_name, {}).get(self._doc_id)
1564
+ return _ProvisionDocSnapshot(self._doc_id, data)
1565
+
1566
+ def set(self, payload: Dict[str, Any], merge: bool = False):
1567
+ collection = self._store.setdefault(self._collection_name, {})
1568
+ existing = dict(collection.get(self._doc_id, {})) if merge else {}
1569
+ existing.update(payload)
1570
+ collection[self._doc_id] = existing
1571
+
1572
+ def delete(self):
1573
+ collection = self._store.setdefault(self._collection_name, {})
1574
+ collection.pop(self._doc_id, None)
1575
+
1576
+
1577
+ class _ProvisionQuery:
1578
+ def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]], collection_name: str):
1579
+ self._store = store
1580
+ self._collection_name = collection_name
1581
+ self._filters: List[tuple[str, str, Any]] = []
1582
+ self._limit: int | None = None
1583
+
1584
+ def where(self, field: str, op: str, value: Any):
1585
+ self._filters.append((field, op, value))
1586
+ return self
1587
+
1588
+ def limit(self, value: int):
1589
+ self._limit = value
1590
+ return self
1591
+
1592
+ def stream(self):
1593
+ collection = self._store.get(self._collection_name, {})
1594
+ docs: List[_ProvisionDocSnapshot] = []
1595
+ for doc_id, data in collection.items():
1596
+ include = True
1597
+ for field, op, expected in self._filters:
1598
+ if op != "==":
1599
+ continue
1600
+ if data.get(field) != expected:
1601
+ include = False
1602
+ break
1603
+ if include:
1604
+ docs.append(_ProvisionDocSnapshot(doc_id, data))
1605
+
1606
+ if self._limit is not None:
1607
+ docs = docs[: self._limit]
1608
+ return docs
1609
+
1610
+
1611
+ class _ProvisionCollectionRef:
1612
+ def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]], collection_name: str):
1613
+ self._store = store
1614
+ self._collection_name = collection_name
1615
+
1616
+ def where(self, field: str, op: str, value: Any):
1617
+ return _ProvisionQuery(self._store, self._collection_name).where(field, op, value)
1618
+
1619
+ def limit(self, value: int):
1620
+ return _ProvisionQuery(self._store, self._collection_name).limit(value)
1621
+
1622
+ def stream(self):
1623
+ collection = self._store.get(self._collection_name, {})
1624
+ return [_ProvisionDocSnapshot(doc_id, data) for doc_id, data in collection.items()]
1625
+
1626
+ def document(self, doc_id: str):
1627
+ return _ProvisionDocumentRef(self._store, self._collection_name, doc_id)
1628
+
1629
+ def add(self, payload: Dict[str, Any]):
1630
+ collection = self._store.setdefault(self._collection_name, {})
1631
+ doc_id = f"auto-{len(collection) + 1}"
1632
+ collection[doc_id] = dict(payload)
1633
+ return (None, None)
1634
+
1635
+
1636
+ class _ProvisionFirestoreClient:
1637
+ def __init__(self, store: Dict[str, Dict[str, Dict[str, Any]]]):
1638
+ self.store = store
1639
+
1640
+ def collection(self, name: str):
1641
+ return _ProvisionCollectionRef(self.store, name)
1642
+
1643
+
1644
+ class _ProvisionFirestoreModule:
1645
+ class Query:
1646
+ DESCENDING = "DESCENDING"
1647
+
1648
+ SERVER_TIMESTAMP = object()
1649
+
1650
+ def __init__(self, seed: Dict[str, Dict[str, Dict[str, Any]]] | None = None):
1651
+ self._client = _ProvisionFirestoreClient(seed or {})
1652
+
1653
+ def client(self):
1654
+ return self._client
1655
+
1656
+
1657
+ class TestStudentAccountProvisioningImport:
1658
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
1659
+ def test_preview_student_account_import_returns_validation_summary(self, _mock_chat):
1660
+ firestore = _ProvisionFirestoreModule(
1661
+ {
1662
+ "users": {
1663
+ "existing-student": {
1664
+ "email": "existing@student.com",
1665
+ "lrn": "1002",
1666
+ "role": "student",
1667
+ }
1668
+ }
1669
+ }
1670
+ )
1671
+
1672
+ def _lookup_user(email: str):
1673
+ if email == "existing@student.com":
1674
+ return type("AuthUser", (), {"uid": "auth-existing"})()
1675
+ raise Exception("user not found")
1676
+
1677
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=_lookup_user):
1678
+ response = client.post(
1679
+ "/api/import/student-accounts/preview",
1680
+ files={
1681
+ "file": (
1682
+ "accounts.csv",
1683
+ (
1684
+ b"First Name,Last Name,Student ID,Email,Grade,Section\n"
1685
+ b"Ana,Cruz,1001,ana@student.com,Grade 11,STEM-A\n"
1686
+ b"Ben,Dela,1002,existing@student.com,Grade 11,STEM-A\n"
1687
+ b",Lim,1003,cara@student.com,Grade 11,STEM-A\n"
1688
+ ),
1689
+ "text/csv",
1690
+ )
1691
+ },
1692
+ )
1693
+
1694
+ assert response.status_code == 200
1695
+ payload = response.json()
1696
+ assert payload["success"] is True
1697
+ assert payload.get("previewToken")
1698
+ assert payload["summary"]["totalRows"] == 3
1699
+ assert payload["summary"]["validRows"] == 1
1700
+ assert payload["summary"]["duplicateRows"] >= 1
1701
+ assert payload["summary"]["invalidRows"] >= 1
1702
+
1703
+ @patch("main.call_hf_chat", side_effect=Exception("mapper unavailable"))
1704
+ def test_commit_student_account_import_provisions_profiles(self, _mock_chat):
1705
+ firestore = _ProvisionFirestoreModule({"users": {}, "managedStudents": {}, "classSectionOwnership": {}, "accessAuditLogs": {}})
1706
+
1707
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1708
+ "uid": "admin-uid",
1709
+ "email": "admin@example.com",
1710
+ "role": "admin",
1711
+ }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "auth-created-1"})()):
1712
+ preview_response = client.post(
1713
+ "/api/import/student-accounts/preview",
1714
+ files={
1715
+ "file": (
1716
+ "accounts.csv",
1717
+ b"First Name,Last Name,Student ID,Email,Grade,Section\nAna,Cruz,1001,ana@student.com,Grade 11,STEM-A\n",
1718
+ "text/csv",
1719
+ )
1720
+ },
1721
+ )
1722
+
1723
+ assert preview_response.status_code == 200
1724
+ preview_payload = preview_response.json()
1725
+ assert preview_payload["summary"]["validRows"] == 1
1726
+
1727
+ commit_response = client.post(
1728
+ "/api/import/student-accounts/commit",
1729
+ json={
1730
+ "previewToken": preview_payload["previewToken"],
1731
+ "forcePasswordChange": True,
1732
+ "createAuthUsers": True,
1733
+ },
1734
+ )
1735
+
1736
+ assert commit_response.status_code == 200
1737
+ commit_payload = commit_response.json()
1738
+ assert commit_payload["summary"]["createdRows"] == 1
1739
+ assert commit_payload["summary"]["failedRows"] == 0
1740
+ assert len(commit_payload["rows"]) == 1
1741
+ assert commit_payload["rows"][0]["status"] in {"created", "updated"}
1742
+ assert commit_payload["rows"][0]["uid"]
1743
+
1744
+ users_store = firestore.client().store.get("users", {})
1745
+ assert len(users_store) == 1
1746
+ provisioned_profile = next(iter(users_store.values()))
1747
+ assert provisioned_profile.get("role") == "student"
1748
+ assert provisioned_profile.get("forcePasswordChange") is True
1749
+
1750
+
1751
+ class _FakeEmailServiceSuccess:
1752
+ def send_transactional_email(self, _message):
1753
+ return EmailSendResult(success=True, provider="test_email", message_id="msg-1")
1754
+
1755
+
1756
+ class _FakeEmailServiceFailure:
1757
+ def send_transactional_email(self, _message):
1758
+ return EmailSendResult(
1759
+ success=False,
1760
+ provider="test_email",
1761
+ error_code="provider_down",
1762
+ error_message="Provider unreachable",
1763
+ retryable=True,
1764
+ )
1765
+
1766
+
1767
+ class TestAdminCreateUserEndpoint:
1768
+ def test_create_admin_user_returns_success_when_email_delivered(self):
1769
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
1770
+
1771
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1772
+ "uid": "admin-uid",
1773
+ "email": "admin@example.com",
1774
+ "role": "admin",
1775
+ }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "new-user-uid"})()), patch.object(main_module, "create_email_service_from_env", return_value=_FakeEmailServiceSuccess()):
1776
+ response = client.post(
1777
+ "/api/admin/users",
1778
+ json={
1779
+ "name": "Ana & José/Lee",
1780
+ "email": "ana@student.com",
1781
+ "password": "StrongPass1!",
1782
+ "confirmPassword": "StrongPass1!",
1783
+ "role": "Student",
1784
+ "status": "Active",
1785
+ "grade": "Grade 11",
1786
+ "section": "STEM A",
1787
+ "lrn": "123456789012",
1788
+ },
1789
+ )
1790
+
1791
+ assert response.status_code == 200
1792
+ payload = response.json()
1793
+ assert payload["success"] is True
1794
+ assert payload["userCreated"] is True
1795
+ assert payload["emailSent"] is True
1796
+ assert payload["resultCode"] == "created_and_emailed"
1797
+ assert payload["uid"] == "new-user-uid"
1798
+
1799
+ users_store = firestore.client().store.get("users", {})
1800
+ assert "new-user-uid" in users_store
1801
+ assert users_store["new-user-uid"].get("role") == "student"
1802
+ assert "Ana+%26+Jos%C3%A9%2FLee" in users_store["new-user-uid"].get("photo", "")
1803
+
1804
+ def test_create_admin_user_returns_partial_success_when_email_fails(self):
1805
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
1806
+
1807
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1808
+ "uid": "admin-uid",
1809
+ "email": "admin@example.com",
1810
+ "role": "admin",
1811
+ }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "new-user-uid-2"})()), patch.object(main_module, "create_email_service_from_env", return_value=_FakeEmailServiceFailure()):
1812
+ response = client.post(
1813
+ "/api/admin/users",
1814
+ json={
1815
+ "name": "Ben Dela",
1816
+ "email": "ben@student.com",
1817
+ "password": "StrongPass1!",
1818
+ "confirmPassword": "StrongPass1!",
1819
+ "role": "Student",
1820
+ "status": "Active",
1821
+ "grade": "Grade 11",
1822
+ "section": "STEM B",
1823
+ "lrn": "123456789013",
1824
+ },
1825
+ )
1826
+
1827
+ assert response.status_code == 200
1828
+ payload = response.json()
1829
+ assert payload["success"] is True
1830
+ assert payload["userCreated"] is True
1831
+ assert payload["emailSent"] is False
1832
+ assert payload["resultCode"] == "created_email_failed"
1833
+ assert payload["uid"] == "new-user-uid-2"
1834
+ assert isinstance(payload.get("warnings"), list)
1835
+ assert payload.get("emailError", {}).get("code") == "provider_down"
1836
+
1837
+ def test_create_admin_user_rejects_password_without_special_character(self):
1838
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
1839
+
1840
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1841
+ "uid": "admin-uid",
1842
+ "email": "admin@example.com",
1843
+ "role": "admin",
1844
+ }):
1845
+ response = client.post(
1846
+ "/api/admin/users",
1847
+ json={
1848
+ "name": "Cara Diaz",
1849
+ "email": "cara@student.com",
1850
+ "password": "StrongPass1",
1851
+ "confirmPassword": "StrongPass1",
1852
+ "role": "Student",
1853
+ "status": "Active",
1854
+ "grade": "Grade 11",
1855
+ "section": "STEM C",
1856
+ "lrn": "123456789014",
1857
+ },
1858
+ )
1859
+
1860
+ assert response.status_code == 400
1861
+ payload = response.json()
1862
+ assert "special character" in payload["detail"].lower()
1863
+
1864
+ def test_create_admin_user_rolls_back_auth_user_when_firestore_write_fails(self):
1865
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
1866
+ delete_user_mock = MagicMock()
1867
+
1868
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1869
+ "uid": "admin-uid",
1870
+ "email": "admin@example.com",
1871
+ "role": "admin",
1872
+ }), patch.object(main_module.firebase_auth, "get_user_by_email", side_effect=Exception("user not found")), patch.object(main_module.firebase_auth, "create_user", return_value=type("AuthUser", (), {"uid": "new-user-uid-3"})()), patch.object(main_module.firebase_auth, "delete_user", delete_user_mock), patch.object(_ProvisionDocumentRef, "set", side_effect=Exception("firestore unavailable")):
1873
+ response = client.post(
1874
+ "/api/admin/users",
1875
+ json={
1876
+ "name": "Dana Flores",
1877
+ "email": "dana@student.com",
1878
+ "password": "StrongPass1!",
1879
+ "confirmPassword": "StrongPass1!",
1880
+ "role": "Student",
1881
+ "status": "Active",
1882
+ "grade": "Grade 11",
1883
+ "section": "STEM A",
1884
+ "lrn": "123456789015",
1885
+ },
1886
+ )
1887
+
1888
+ assert response.status_code == 500
1889
+ payload = response.json()
1890
+ assert "firestore" in payload["detail"].lower()
1891
+ delete_user_mock.assert_called_once_with("new-user-uid-3")
1892
+
1893
+
1894
+ class TestAdminListUsersEndpoint:
1895
+ def test_get_admin_users_returns_paginated_results(self):
1896
+ firestore = _ProvisionFirestoreModule(
1897
+ {
1898
+ "users": {
1899
+ "student-a": {
1900
+ "name": "Alice Student",
1901
+ "email": "alice@student.com",
1902
+ "role": "student",
1903
+ "status": "Active",
1904
+ "grade": "Grade 11",
1905
+ "section": "STEM A",
1906
+ "lrn": "100000000001",
1907
+ "createdAt": 1710000000,
1908
+ },
1909
+ "student-b": {
1910
+ "name": "Ben Student",
1911
+ "email": "ben@student.com",
1912
+ "role": "student",
1913
+ "status": "Active",
1914
+ "grade": "Grade 11",
1915
+ "section": "STEM B",
1916
+ "lrn": "100000000002",
1917
+ "createdAt": 1710000100,
1918
+ },
1919
+ "teacher-a": {
1920
+ "name": "Tina Teacher",
1921
+ "email": "tina@school.com",
1922
+ "role": "teacher",
1923
+ "status": "Active",
1924
+ "department": "Mathematics",
1925
+ "createdAt": 1710000200,
1926
+ },
1927
+ },
1928
+ "accessAuditLogs": {},
1929
+ }
1930
+ )
1931
+
1932
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1933
+ "uid": "admin-uid",
1934
+ "email": "admin@example.com",
1935
+ "role": "admin",
1936
+ }):
1937
+ response = client.get("/api/admin/users?page=1&pageSize=1&role=student")
1938
+
1939
+ assert response.status_code == 200
1940
+ payload = response.json()
1941
+ assert payload["success"] is True
1942
+ assert payload["page"] == 1
1943
+ assert payload["pageSize"] == 1
1944
+ assert len(payload["users"]) == 1
1945
+ assert payload["users"][0]["role"] == "Student"
1946
+ assert payload["hasMore"] is True
1947
+
1948
+ def test_get_admin_users_rejects_invalid_role_filter(self):
1949
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
1950
+
1951
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1952
+ "uid": "admin-uid",
1953
+ "email": "admin@example.com",
1954
+ "role": "admin",
1955
+ }):
1956
+ response = client.get("/api/admin/users?role=guest")
1957
+
1958
+ assert response.status_code == 400
1959
+ assert "role must be one of" in response.json()["detail"].lower()
1960
+
1961
+ def test_get_admin_users_rejects_non_admin_role(self):
1962
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
1963
+
1964
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1965
+ "uid": "teacher-uid",
1966
+ "email": "teacher@example.com",
1967
+ "role": "teacher",
1968
+ }):
1969
+ response = client.get("/api/admin/users?page=1&pageSize=25")
1970
+
1971
+ assert response.status_code == 403
1972
+ assert "forbidden" in response.json()["detail"].lower()
1973
+
1974
+
1975
+ class TestAdminDeleteUserEndpoint:
1976
+ def test_delete_admin_user_removes_auth_and_profile(self):
1977
+ firestore = _ProvisionFirestoreModule(
1978
+ {
1979
+ "users": {
1980
+ "target-uid": {
1981
+ "email": "target@student.com",
1982
+ "role": "student",
1983
+ }
1984
+ },
1985
+ "accessAuditLogs": {},
1986
+ }
1987
+ )
1988
+ delete_user_mock = MagicMock()
1989
+
1990
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
1991
+ "uid": "admin-uid",
1992
+ "email": "admin@example.com",
1993
+ "role": "admin",
1994
+ }), patch.object(main_module.firebase_auth, "delete_user", delete_user_mock):
1995
+ response = client.delete("/api/admin/users?uid=target-uid")
1996
+
1997
+ assert response.status_code == 200
1998
+ payload = response.json()
1999
+ assert payload["success"] is True
2000
+ assert payload["uid"] == "target-uid"
2001
+ assert payload["authDeleted"] is True
2002
+ assert payload["profileDeleted"] is True
2003
+
2004
+ delete_user_mock.assert_called_once_with("target-uid")
2005
+ assert "target-uid" not in firestore.client().store.get("users", {})
2006
+
2007
+ def test_delete_admin_user_handles_missing_auth_record(self):
2008
+ firestore = _ProvisionFirestoreModule(
2009
+ {
2010
+ "users": {
2011
+ "target-uid-2": {
2012
+ "email": "missing-auth@student.com",
2013
+ "role": "student",
2014
+ }
2015
+ },
2016
+ "accessAuditLogs": {},
2017
+ }
2018
+ )
2019
+
2020
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
2021
+ "uid": "admin-uid",
2022
+ "email": "admin@example.com",
2023
+ "role": "admin",
2024
+ }), patch.object(main_module.firebase_auth, "delete_user", side_effect=Exception("No user record found for the provided uid")):
2025
+ response = client.delete("/api/admin/users?uid=target-uid-2")
2026
+
2027
+ assert response.status_code == 200
2028
+ payload = response.json()
2029
+ assert payload["success"] is True
2030
+ assert payload["uid"] == "target-uid-2"
2031
+ assert payload["authDeleted"] is False
2032
+ assert payload["profileDeleted"] is True
2033
+ assert any("already missing" in warning.lower() for warning in payload.get("warnings", []))
2034
+ assert "target-uid-2" not in firestore.client().store.get("users", {})
2035
+
2036
+ def test_delete_admin_user_rejects_self_delete(self):
2037
+ firestore = _ProvisionFirestoreModule({"users": {}, "accessAuditLogs": {}})
2038
+
2039
+ with patch.object(main_module, "firebase_firestore", firestore), patch.object(main_module, "_firebase_ready", True), patch.object(main_module.firebase_auth, "verify_id_token", return_value={
2040
+ "uid": "admin-uid",
2041
+ "email": "admin@example.com",
2042
+ "role": "admin",
2043
+ }):
2044
+ response = client.delete("/api/admin/users?uid=admin-uid")
2045
+
2046
+ assert response.status_code == 400
2047
+ assert "cannot delete their own account" in response.json()["detail"].lower()
2048
+
2049
+
2050
+ # ─── Run ───────────────────────────────────────────────────────
2051
+
2052
+ if __name__ == "__main__":
2053
+ pytest.main([__file__, "-v"])
tests/test_email_service.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import os
4
+ import sys
5
+
6
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
7
+
8
+ from services.email_service import ( # noqa: E402
9
+ BrevoApiEmailProvider,
10
+ EmailMessagePayload,
11
+ create_email_service_from_env,
12
+ )
13
+
14
+
15
+ _EMAIL_ENV_KEYS = [
16
+ "BREVO_API_KEY",
17
+ "BREVO_API_TOKEN",
18
+ "BREVO_MCP_TOKEN",
19
+ "BREVO_SMTP_LOGIN",
20
+ "BREVO_SMTP_USERNAME",
21
+ "BREVO_SMTP_USER",
22
+ "BREVO_SMTP_KEY",
23
+ "BREVO_SMTP_PASSWORD",
24
+ "BREVO_SMTP_PASS",
25
+ "BREVO_SMTP_HOST",
26
+ "BREVO_SMTP_PORT",
27
+ "MAIL_FROM_ADDRESS",
28
+ "MAIL_FROM",
29
+ "BREVO_FROM_ADDRESS",
30
+ "MAIL_FROM_NAME",
31
+ "BREVO_FROM_NAME",
32
+ "MAIL_SEND_TIMEOUT_SEC",
33
+ ]
34
+
35
+
36
+ def _clear_email_env(monkeypatch) -> None:
37
+ for key in _EMAIL_ENV_KEYS:
38
+ monkeypatch.delenv(key, raising=False)
39
+
40
+
41
+ def _encode_mcp_payload(payload: dict) -> str:
42
+ encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).decode("utf-8")
43
+ return encoded.rstrip("=")
44
+
45
+
46
+ def test_create_email_service_uses_mcp_token_when_api_key_missing(monkeypatch) -> None:
47
+ _clear_email_env(monkeypatch)
48
+ monkeypatch.setenv("BREVO_MCP_TOKEN", _encode_mcp_payload({"api_key": "xkeysib-test-from-mcp"}))
49
+
50
+ service = create_email_service_from_env()
51
+
52
+ assert isinstance(service._primary_provider, BrevoApiEmailProvider)
53
+ assert service._fallback_provider is None
54
+
55
+
56
+ def test_create_email_service_prefers_direct_api_key_when_present(monkeypatch) -> None:
57
+ _clear_email_env(monkeypatch)
58
+ monkeypatch.setenv("BREVO_API_KEY", "xkeysib-direct")
59
+ monkeypatch.setenv("BREVO_MCP_TOKEN", _encode_mcp_payload({"api_key": "xkeysib-from-mcp"}))
60
+
61
+ service = create_email_service_from_env()
62
+
63
+ assert isinstance(service._primary_provider, BrevoApiEmailProvider)
64
+ assert getattr(service._primary_provider, "_api_key") == "xkeysib-direct"
65
+
66
+
67
+ def test_create_email_service_returns_not_configured_for_invalid_mcp_token(monkeypatch) -> None:
68
+ _clear_email_env(monkeypatch)
69
+ monkeypatch.setenv("BREVO_MCP_TOKEN", "not-a-valid-token")
70
+
71
+ service = create_email_service_from_env()
72
+ result = service.send_transactional_email(
73
+ EmailMessagePayload(
74
+ to_name="Test User",
75
+ to_email="test@example.com",
76
+ subject="subject",
77
+ html_content="<p>hello</p>",
78
+ text_content="hello",
79
+ )
80
+ )
81
+
82
+ assert result.success is False
83
+ assert result.provider == "none"
84
+ assert result.error_code == "email_not_configured"
tests/test_email_templates.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
5
+
6
+ from services.email_templates import ( # noqa: E402
7
+ WelcomeCredentialsEmailContext,
8
+ build_welcome_credentials_email,
9
+ )
10
+
11
+
12
+ def test_build_welcome_email_includes_brand_and_recipient_avatar_images() -> None:
13
+ result = build_welcome_credentials_email(
14
+ WelcomeCredentialsEmailContext(
15
+ recipient_name="Ana Cruz",
16
+ login_email="ana@student.com",
17
+ temporary_password="StrongPass1!",
18
+ role="Student",
19
+ login_url="https://mathpulse.ai/login",
20
+ brand_avatar_url="https://cdn.mathpulse.ai/assets/avatar_icon.png",
21
+ recipient_avatar_url="https://ui-avatars.com/api/?name=Ana+Cruz",
22
+ )
23
+ )
24
+
25
+ html_content = result["html"]
26
+
27
+ assert "MathPulse AI" in html_content
28
+ assert "Learning Platform Account Access" in html_content
29
+ assert "https://cdn.mathpulse.ai/assets/avatar_icon.png" in html_content
30
+ assert "https://ui-avatars.com/api/?name=Ana+Cruz" in html_content
31
+ assert "Temporary Password" in html_content
32
+
33
+
34
+ def test_build_welcome_email_sanitizes_invalid_avatar_urls_and_falls_back() -> None:
35
+ result = build_welcome_credentials_email(
36
+ WelcomeCredentialsEmailContext(
37
+ recipient_name="Ben Dela",
38
+ login_email="ben@student.com",
39
+ temporary_password="StrongPass1!",
40
+ role="Student",
41
+ login_url="javascript:alert(1)",
42
+ brand_avatar_url="ftp://invalid-avatar",
43
+ recipient_avatar_url="data:text/html,unsafe",
44
+ )
45
+ )
46
+
47
+ html_content = result["html"]
48
+
49
+ assert "javascript:alert(1)" not in html_content
50
+ assert "ftp://invalid-avatar" not in html_content
51
+ assert "data:text/html,unsafe" not in html_content
52
+ assert "https://mathpulse.ai" in html_content
53
+ assert ">MP</div>" in html_content
tests/test_hf_monitoring_routes.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Route-level tests for /api/hf/monitoring endpoint.
3
+ Updated for DeepSeek AI monitoring.
4
+ """
5
+
6
+ import os
7
+ from unittest.mock import MagicMock, Mock, patch
8
+
9
+ import pytest
10
+ from fastapi.testclient import TestClient
11
+
12
+ import main as main_module
13
+ from main import app
14
+
15
+ main_module._firebase_ready = True
16
+ main_module._init_firebase_admin = lambda: None
17
+ main_module.firebase_firestore = None
18
+ if getattr(main_module, "firebase_auth", None) is None:
19
+ main_module.firebase_auth = MagicMock()
20
+ main_module.firebase_auth.verify_id_token = MagicMock(return_value={
21
+ "uid": "admin-uid",
22
+ "email": "admin@example.com",
23
+ "role": "admin",
24
+ })
25
+
26
+ admin_client = TestClient(app, headers={"Authorization": "Bearer admin-token"})
27
+
28
+ EXPECTED_MONITORING_FIELDS = {
29
+ "modelId", "modelStatus", "avgResponseTimeMs",
30
+ "embeddingModelId", "embeddingModelStatus",
31
+ "inferenceBalance", "totalPeriodCost",
32
+ "hubApiCallsUsed", "hubApiCallsLimit",
33
+ "zeroGpuMinutesUsed", "zeroGpuMinutesLimit",
34
+ "publicStorageUsedTB", "publicStorageLimitTB",
35
+ "lastChecked", "periodStart", "periodEnd",
36
+ "activeProfile", "runtimeOverridesActive", "resolvedModels",
37
+ "provider", "apiBaseUrl",
38
+ }
39
+
40
+ EXPECTED_FIELDS_AFTER_DS_REPLACEMENT = EXPECTED_MONITORING_FIELDS
41
+
42
+
43
+ @pytest.fixture(autouse=True)
44
+ def _mock_env():
45
+ with patch.dict(os.environ, {"DEEPSEEK_API_KEY": "test-ds-monitoring-key"}):
46
+ yield
47
+
48
+
49
+ # ─── Auth Enforcement ────────────────────────────────────────
50
+
51
+
52
+ class TestMonitoringAuth:
53
+ def test_rejects_bad_token(self):
54
+ main_module.firebase_auth.verify_id_token = MagicMock(side_effect=Exception("bad"))
55
+ c = TestClient(app, headers={"Authorization": "Bearer bad-token"})
56
+ response = c.get("/api/hf/monitoring")
57
+ main_module.firebase_auth.verify_id_token = MagicMock(return_value={
58
+ "uid": "admin-uid", "email": "admin@example.com", "role": "admin",
59
+ })
60
+ assert response.status_code in {401, 403}
61
+
62
+
63
+ # ─── Response Shape ───────────────────────────────────────────
64
+
65
+
66
+ class TestMonitoringResponseShape:
67
+ @patch("main.time.time")
68
+ def test_success_response_contains_all_expected_fields(self, mock_time):
69
+ mock_time.return_value = 1000.0
70
+
71
+ response = admin_client.get("/api/hf/monitoring")
72
+ assert response.status_code == 200
73
+ data = response.json()
74
+ assert data["success"] is True
75
+ payload = data["data"]
76
+ for field in EXPECTED_FIELDS_AFTER_DS_REPLACEMENT:
77
+ assert field in payload, f"Missing field: {field}"
78
+
79
+ @patch("main.time.time")
80
+ @patch("services.ai_client.get_deepseek_client")
81
+ def test_all_probes_fail_gracefully(self, mock_ds_client_fn, mock_time):
82
+ mock_time.return_value = 1000.0
83
+ mock_client = MagicMock()
84
+ mock_client.chat.completions.create.side_effect = Exception("network down")
85
+ mock_ds_client_fn.return_value = mock_client
86
+
87
+ response = admin_client.get("/api/hf/monitoring")
88
+ assert response.status_code == 200
89
+ data = response.json()
90
+ assert data["success"] is True
91
+
92
+
93
+ # ─── Response Values ──────────────────────────────────────────
94
+
95
+
96
+ class TestMonitoringResponseValues:
97
+ @patch("services.ai_client.get_deepseek_client")
98
+ @patch("main.time.time")
99
+ def test_model_status_is_degraded_when_probe_fails(self, mock_time, mock_ds_client_fn):
100
+ mock_time.return_value = 1000.0
101
+ mock_client = MagicMock()
102
+ mock_client.chat.completions.create.side_effect = Exception("probe down")
103
+ mock_ds_client_fn.return_value = mock_client
104
+
105
+ response = admin_client.get("/api/hf/monitoring")
106
+ data = response.json()
107
+ assert data["success"] is True
108
+ assert data["data"]["modelStatus"] == "Degraded"
109
+
110
+ @patch("main.time.time")
111
+ def test_embedding_model_id_is_returned(self, mock_time):
112
+ mock_time.return_value = 1000.0
113
+
114
+ response = admin_client.get("/api/hf/monitoring")
115
+ data = response.json()
116
+ assert data["success"] is True
117
+ assert "bge-small" in data["data"]["embeddingModelId"].lower()
118
+
119
+ @patch("main.time.time")
120
+ def test_resolved_models_contains_task_keys(self, mock_time):
121
+ mock_time.return_value = 1000.0
122
+
123
+ response = admin_client.get("/api/hf/monitoring")
124
+ data = response.json()
125
+ resolved = data["data"].get("resolvedModels", {})
126
+ expected_tasks = {"chat", "rag_lesson", "rag_problem", "quiz_generation"}
127
+ for task in expected_tasks:
128
+ assert task in resolved, f"Missing task: {task}"
129
+ assert isinstance(resolved[task], str) and len(resolved[task]) > 0
130
+
131
+ @patch("main.time.time")
132
+ def test_active_profile_returned(self, mock_time):
133
+ mock_time.return_value = 1000.0
134
+
135
+ response = admin_client.get("/api/hf/monitoring")
136
+ data = response.json()
137
+ assert data["success"] is True
138
+ assert data["data"]["activeProfile"] in {"dev", "budget", "prod", ""}
139
+
140
+ @patch("main.time.time")
141
+ def test_provider_and_api_base_url_present(self, mock_time):
142
+ mock_time.return_value = 1000.0
143
+
144
+ response = admin_client.get("/api/hf/monitoring")
145
+ data = response.json()
146
+ assert data["success"] is True
147
+ assert data["data"]["provider"] == "deepseek"
148
+ assert "api.deepseek.com" in data["data"]["apiBaseUrl"]
tests/test_model_profiles.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from unittest.mock import patch
6
+
7
+ import pytest
8
+
9
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
10
+ from services import inference_client as inf_client
11
+ from services.inference_client import (
12
+ _MODEL_PROFILES,
13
+ get_current_runtime_config,
14
+ get_model_for_task,
15
+ is_sequential_model,
16
+ model_supports_thinking,
17
+ reset_runtime_overrides,
18
+ set_runtime_model_override,
19
+ set_runtime_model_profile,
20
+ )
21
+
22
+
23
+ REQUIRED_PROFILE_KEYS = {
24
+ "INFERENCE_MODEL_ID", "INFERENCE_CHAT_MODEL_ID",
25
+ "HF_QUIZ_MODEL_ID", "HF_RAG_MODEL_ID", "INFERENCE_LOCK_MODEL_ID",
26
+ }
27
+
28
+
29
+ class TestModelProfiles:
30
+ def test_profiles_have_all_keys(self):
31
+ for name, profile in _MODEL_PROFILES.items():
32
+ assert REQUIRED_PROFILE_KEYS == set(profile.keys()), \
33
+ f"Profile '{name}' missing or extra keys"
34
+
35
+ def test_dev_uses_chat_model(self):
36
+ dev = _MODEL_PROFILES["dev"]
37
+ for key, value in dev.items():
38
+ assert "deepseek-chat" in value, f"dev/{key} = {value}, expected deepseek-chat"
39
+
40
+ def test_prod_chat_is_chat_model(self):
41
+ assert "deepseek-chat" in _MODEL_PROFILES["prod"]["INFERENCE_CHAT_MODEL_ID"]
42
+
43
+ def test_prod_rag_is_reasoner(self):
44
+ assert "deepseek-reasoner" in _MODEL_PROFILES["prod"]["HF_RAG_MODEL_ID"]
45
+
46
+ def test_budget_uses_chat_model_everywhere(self):
47
+ budget = _MODEL_PROFILES["budget"]
48
+ for key, value in budget.items():
49
+ assert "deepseek-chat" in value, f"budget/{key} = {value}"
50
+
51
+
52
+ class TestRuntimeOverrides:
53
+
54
+ def setup_method(self):
55
+ reset_runtime_overrides()
56
+
57
+ def teardown_method(self):
58
+ reset_runtime_overrides()
59
+
60
+ def test_set_profile_populates_overrides(self):
61
+ set_runtime_model_profile("dev")
62
+ assert inf_client._RUNTIME_PROFILE == "dev"
63
+ assert inf_client._RUNTIME_OVERRIDES["INFERENCE_MODEL_ID"] == "deepseek-chat"
64
+ assert inf_client._RUNTIME_OVERRIDES["INFERENCE_CHAT_MODEL_ID"] == "deepseek-chat"
65
+
66
+ def test_set_profile_replaces_all_overrides(self):
67
+ set_runtime_model_profile("dev")
68
+ set_runtime_model_profile("prod")
69
+ assert inf_client._RUNTIME_OVERRIDES["INFERENCE_CHAT_MODEL_ID"] == "deepseek-chat"
70
+ assert inf_client._RUNTIME_OVERRIDES["INFERENCE_LOCK_MODEL_ID"] == "deepseek-chat"
71
+
72
+ def test_set_profile_unknown_raises(self):
73
+ with pytest.raises(ValueError, match="Unknown profile"):
74
+ set_runtime_model_profile("nonexistent")
75
+
76
+ def test_single_override_sets_key(self):
77
+ set_runtime_model_override("HF_RAG_MODEL_ID", "custom/model")
78
+ assert inf_client._RUNTIME_OVERRIDES["HF_RAG_MODEL_ID"] == "custom/model"
79
+
80
+ def test_reset_clears_overrides(self):
81
+ set_runtime_model_profile("dev")
82
+ reset_runtime_overrides()
83
+ assert inf_client._RUNTIME_PROFILE == ""
84
+ assert inf_client._RUNTIME_OVERRIDES == {}
85
+
86
+ def test_override_layers_on_profile(self):
87
+ set_runtime_model_profile("dev")
88
+ set_runtime_model_override("HF_RAG_MODEL_ID", "custom/model")
89
+ assert inf_client._RUNTIME_OVERRIDES["HF_RAG_MODEL_ID"] == "custom/model"
90
+ assert inf_client._RUNTIME_OVERRIDES["INFERENCE_MODEL_ID"] == "deepseek-chat"
91
+
92
+
93
+ class TestGetCurrentRuntimeConfig:
94
+
95
+ def setup_method(self):
96
+ reset_runtime_overrides()
97
+
98
+ def teardown_method(self):
99
+ reset_runtime_overrides()
100
+
101
+ def test_returns_resolved_dict_with_all_keys(self):
102
+ set_runtime_model_profile("dev")
103
+ config = get_current_runtime_config()
104
+ assert config["profile"] == "dev"
105
+ for key in REQUIRED_PROFILE_KEYS:
106
+ assert key in config["resolved"], f"Missing {key}"
107
+
108
+ def test_override_takes_priority_over_profile(self):
109
+ set_runtime_model_profile("dev")
110
+ set_runtime_model_override("INFERENCE_CHAT_MODEL_ID", "custom/chat")
111
+ config = get_current_runtime_config()
112
+ assert config["resolved"]["INFERENCE_CHAT_MODEL_ID"] == "custom/chat"
113
+
114
+
115
+ class TestGetModelForTask:
116
+
117
+ def setup_method(self):
118
+ reset_runtime_overrides()
119
+
120
+ def teardown_method(self):
121
+ reset_runtime_overrides()
122
+
123
+ @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
124
+ def test_returns_profile_default_for_rag(self):
125
+ set_runtime_model_profile("prod")
126
+ model = get_model_for_task("rag_lesson")
127
+ assert "deepseek-reasoner" in model
128
+
129
+ @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
130
+ def test_returns_profile_default_for_chat(self):
131
+ set_runtime_model_profile("prod")
132
+ model = get_model_for_task("chat")
133
+ assert "deepseek-chat" in model
134
+
135
+ @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "false"})
136
+ def test_returns_runtime_override_for_chat(self):
137
+ set_runtime_model_override("INFERENCE_CHAT_MODEL_ID", "custom/chat")
138
+ model = get_model_for_task("chat")
139
+ assert model == "custom/chat"
140
+
141
+ @patch.dict(os.environ, {"INFERENCE_ENFORCE_LOCK_MODEL": "true"})
142
+ def test_enforce_qwen_overrides_task(self):
143
+ set_runtime_model_profile("prod")
144
+ model = get_model_for_task("rag_lesson")
145
+ assert "deepseek-chat" in model
146
+
147
+
148
+ class TestIsSequentialModel:
149
+
150
+ def setup_method(self):
151
+ reset_runtime_overrides()
152
+
153
+ def teardown_method(self):
154
+ reset_runtime_overrides()
155
+
156
+ def test_reasoner_is_sequential(self):
157
+ assert is_sequential_model("deepseek-reasoner") is True
158
+
159
+ def test_chat_is_not_sequential(self):
160
+ assert is_sequential_model("deepseek-chat") is False
161
+
162
+ def test_empty_string_checks_env(self):
163
+ result = is_sequential_model("")
164
+ assert result is True or result is False
165
+
166
+ @patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-reasoner"})
167
+ def test_env_model_reasoner_is_sequential(self):
168
+ assert is_sequential_model("") is True
169
+
170
+ @patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-chat"})
171
+ def test_env_model_chat_is_not_sequential(self):
172
+ assert is_sequential_model("") is False
173
+
174
+
175
+ class TestModelSupportsThinking:
176
+
177
+ def test_reasoner_supports_thinking(self):
178
+ assert model_supports_thinking("deepseek-reasoner") is True
179
+
180
+ def test_chat_does_not_support_thinking(self):
181
+ assert model_supports_thinking("deepseek-chat") is False
182
+
183
+ def test_unknown_does_not_support_thinking(self):
184
+ assert model_supports_thinking("meta-llama/Llama-3.1-8B-Instruct") is False
tests/test_rag_pipeline.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ import pytest
7
+
8
+ from rag.curriculum_rag import (
9
+ _distance_to_score,
10
+ build_lesson_prompt,
11
+ build_lesson_query,
12
+ retrieve_curriculum_context,
13
+ summarize_retrieval_confidence,
14
+ )
15
+
16
+
17
+ def _mock_vectorstore_components(collection_mock, embedder_mock):
18
+ def _factory():
19
+ return (MagicMock(), collection_mock, embedder_mock)
20
+ return _factory
21
+
22
+
23
+ class TestRetrieveCurriculumContext:
24
+ def test_empty_collection_returns_empty_list(self):
25
+ collection = MagicMock()
26
+ collection_get_result = collection.get.return_value
27
+ collection_get_result.__getitem__.return_value = []
28
+
29
+ embedder = MagicMock()
30
+
31
+ with patch(
32
+ "rag.curriculum_rag.get_vectorstore_components",
33
+ return_value=(MagicMock(), collection, embedder),
34
+ ):
35
+ result = retrieve_curriculum_context(
36
+ query="test query",
37
+ subject="General Mathematics",
38
+ top_k=5,
39
+ )
40
+ assert result == []
41
+
42
+
43
+ class TestDistanceToScore:
44
+ def test_zero_distance_returns_one(self):
45
+ assert _distance_to_score(0.0) == 1.0
46
+
47
+ def test_never_returns_zero_or_negative(self):
48
+ scores = [_distance_to_score(d) for d in [0.0, 0.5, 1.0, 5.0, 100.0]]
49
+ for s in scores:
50
+ assert s > 0.0
51
+ assert s <= 1.0
52
+
53
+
54
+ class TestBuildLessonPrompt:
55
+ def test_contains_json_and_required_keys(self):
56
+ prompt = build_lesson_prompt(
57
+ lesson_title="Compound Interest",
58
+ competency="M11GM-IIc-1",
59
+ grade_level="Grade 11-12",
60
+ subject="General Mathematics",
61
+ quarter=3,
62
+ learner_level="mixed",
63
+ module_unit="Business Math",
64
+ curriculum_chunks=[
65
+ {
66
+ "content": "Compound interest formula A=P(1+r/n)^(nt)",
67
+ "source_file": "sample_curriculum.json",
68
+ "page": 5,
69
+ "content_domain": "Business Mathematics",
70
+ "chunk_type": "content_explanation",
71
+ "score": 0.85,
72
+ }
73
+ ],
74
+ )
75
+ assert "JSON" in prompt
76
+ assert "lessonTitle" in prompt
77
+ assert "needsReview" in prompt
78
+ ph_context_terms = [
79
+ "payroll", "VAT", "discounts", "loans", "Pag-IBIG", "school",
80
+ ]
81
+ assert any(term in prompt for term in ph_context_terms)
82
+
83
+ def test_contains_thinking_hint(self):
84
+ prompt = build_lesson_prompt(
85
+ lesson_title="Functions",
86
+ competency="M11GM-Ia-1",
87
+ grade_level="Grade 11-12",
88
+ subject="General Mathematics",
89
+ quarter=1,
90
+ learner_level=None,
91
+ module_unit=None,
92
+ curriculum_chunks=[],
93
+ )
94
+ assert "Think step by step" in prompt
95
+
96
+
97
+ class TestSummarizeRetrievalConfidence:
98
+ def test_empty_chunks_returns_low(self):
99
+ result = summarize_retrieval_confidence([])
100
+ assert result["band"] == "low"
101
+ assert result["confidence"] == 0.0
102
+
103
+ def test_high_confidence(self):
104
+ chunks = [{"score": 0.85}, {"score": 0.80}, {"score": 0.75}]
105
+ result = summarize_retrieval_confidence(chunks)
106
+ assert result["band"] == "high"
107
+
108
+ def test_medium_confidence(self):
109
+ chunks = [{"score": 0.65}, {"score": 0.60}]
110
+ result = summarize_retrieval_confidence(chunks)
111
+ assert result["band"] == "medium"
112
+
113
+ def test_low_confidence(self):
114
+ chunks = [{"score": 0.35}, {"score": 0.30}]
115
+ result = summarize_retrieval_confidence(chunks)
116
+ assert result["band"] == "low"
117
+
118
+ def test_chunk_count_included(self):
119
+ chunks = [{"score": 0.8}, {"score": 0.7}, {"score": 0.6}]
120
+ result = summarize_retrieval_confidence(chunks)
121
+ assert result["chunkCount"] == 3
122
+
123
+
124
+ class TestBuildLessonQuery:
125
+ def test_includes_all_fields(self):
126
+ query = build_lesson_query(
127
+ "Compound Interest",
128
+ "General Mathematics",
129
+ 3,
130
+ lesson_title="Compound Interest Basics",
131
+ competency="M11GM-IIc-1",
132
+ module_unit="Business Math",
133
+ learner_level="mixed",
134
+ )
135
+ assert "Compound Interest" in query
136
+ assert "General Mathematics" in query
137
+ assert "Quarter 3" in query
138
+ assert "Compound Interest Basics" in query
139
+
140
+
141
+ class TestIsSequentialModel:
142
+ def test_sequential_for_reasoner(self):
143
+ with patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-reasoner"}):
144
+ from services.inference_client import is_sequential_model
145
+ assert is_sequential_model() is True
146
+
147
+ def test_not_sequential_for_chat(self):
148
+ with patch.dict(os.environ, {"INFERENCE_MODEL_ID": "deepseek-chat"}):
149
+ from services.inference_client import is_sequential_model
150
+ assert is_sequential_model() is False