Spaces:

FocusGuard
/

final

Sleeping

App Files Files Community

Yingtao-Zheng commited on Mar 14

Commit

4a5bfab

1 Parent(s): 82d2ab7

Put all the models together (expect UI)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +29 -8
Dockerfile +27 -0
README.md +87 -6
app.py +1 -0
checkpoints/hybrid_focus_config.json +10 -0
MLP/models/meta_20260224_024200.npz → checkpoints/meta_best.npz +2 -2
MLP/models/mlp_20260224_024200.joblib → checkpoints/mlp_best.pt +2 -2
best_eye_cnn.pth → checkpoints/model_best.joblib +2 -2
MLP/models/scaler_20260224_024200.joblib → checkpoints/scaler_best.joblib +2 -2
checkpoints/xgboost_face_orientation_best.json +0 -0
{data_preparation → data}/CNN/eye_crops/val/open/.gitkeep +0 -0
data/README.md +47 -0
{data_preparation → data}/collected_Abdelrahman/abdelrahman_20260306_023035.npz +0 -0
{data_preparation → data}/collected_Jarek/Jarek_20260225_012931.npz +0 -0
{data_preparation → data}/collected_Junhao/Junhao_20260303_113554.npz +0 -0
{data_preparation → data}/collected_Kexin/kexin2_20260305_180229.npz +0 -0
{data_preparation → data}/collected_Kexin/kexin_20260224_151043.npz +0 -0
{data_preparation → data}/collected_Langyuan/Langyuan_20260303_153145.npz +0 -0
{data_preparation → data}/collected_Mohamed/session_20260224_010131.npz +0 -0
{data_preparation → data}/collected_Yingtao/Yingtao_20260306_023937.npz +0 -0
{data_preparation/collected_Ayten → data/collected_ayten}/ayten_session_1.npz +0 -0
{data_preparation/collected_Saba → data/collected_saba}/saba_20260306_230710.npz +0 -0
data_preparation/MLP/explore_collected_data.ipynb +0 -0
data_preparation/MLP/train_mlp.ipynb +0 -0
data_preparation/README.md +61 -27
{models/geometric → data_preparation}/__init__.py +0 -0
data_preparation/data_exploration.ipynb +0 -0
data_preparation/prepare_dataset.py +232 -0
docker-compose.yml +5 -0
eslint.config.js +29 -0
evaluation/README.md +45 -2
index.html +17 -0
main.py +964 -0
models/README.md +51 -8
models/{attention/__init__.py → __init__.py} +0 -0
models/attention/classifier.py +0 -0
models/attention/fusion.py +0 -0
models/attention/train.py +0 -0
models/cnn/notebooks/EyeCNN.ipynb +107 -0
models/cnn/notebooks/EyeCNN_Train_Evaluate_new.ipynb +0 -0
models/cnn/notebooks/EyeCNN_Training_Evaluate.ipynb +0 -0
models/cnn/notebooks/README.md +1 -0
models/{attention/collect_features.py → collect_features.py} +26 -19
models/eye_classifier.py +69 -0
models/eye_crop.py +77 -0
models/{geometric/eye_behaviour/eye_scorer.py → eye_scorer.py} +7 -3
models/{pretrained/face_mesh/face_mesh.py → face_mesh.py} +6 -3
models/geometric/eye_behaviour/__init__.py +0 -0
models/geometric/face_orientation/__init__.py +0 -1
models/{geometric/face_orientation/head_pose.py → head_pose.py} +10 -1

.gitignore CHANGED Viewed

@@ -1,4 +1,26 @@
-__pycache__/
 *.py[cod]
 *$py.class
 *.so
@@ -9,12 +31,11 @@ env/
 .env
 *.egg-info/
 .eggs/
-dist/
 build/
-.idea/
-.vscode/
-*.swp
-*.swo
-docs/
-.DS_Store
 Thumbs.db

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules/
+dist/
+dist-ssr/
+*.local
+# Editor directories and files
+.vscode/
+.idea/
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
 *.py[cod]
 *$py.class
 *.so
 .env
 *.egg-info/
 .eggs/
 build/
 Thumbs.db
+# Project specific
+focus_guard.db
+static/
+__pycache__/
+docs/

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.10-slim
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
+ENV PYTHONUNBUFFERED=1
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends libglib2.0-0 libsm6 libxrender1 libxext6 libxcb1 libgl1 libgomp1 ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavdevice-dev libopus-dev libvpx-dev libsrtp2-dev build-essential nodejs npm && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+RUN npm install && npm run build && mkdir -p /app/static && cp -R dist/* /app/static/
+ENV FOCUSGUARD_CACHE_DIR=/app/.cache/focusguard
+RUN python -c "from models.face_mesh import _ensure_model; _ensure_model()"
+RUN mkdir -p /app/data && chown -R user:user /app
+USER user
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "debug"]

README.md CHANGED Viewed

@@ -1,10 +1,91 @@
 # FocusGuard
-Webcam-based focus detection: face mesh, head pose, eye (geometry or YOLO), plus an MLP trained on collected features.
-- **data_preparation/** — collect data, notebooks, processed/collected files
-- **models/** — face mesh, head pose, eye scorer, YOLO classifier, MLP training, attention feature collection
-- **evaluation/** — metrics and run logs
-- **ui/** — live demo (geometry+YOLO or MLP-only)
-Run from here: `pip install -r requirements.txt` then `python ui/live_demo.py` or `python ui/live_demo.py --mlp`.

 # FocusGuard
+Real-time webcam-based focus detection system combining geometric feature extraction with machine learning classification. The pipeline extracts 17 facial features (EAR, gaze, head pose, PERCLOS, blink rate, etc.) from MediaPipe landmarks and classifies attentiveness using MLP and XGBoost models. Served via a React + FastAPI web application with live WebSocket video.
+## 1. Project Structure
+```
+├── data/                     Raw collected sessions (collected_<name>/*.npz)
+├── data_preparation/         Data loading, cleaning, and exploration
+├── notebooks/                Training notebooks (MLP, XGBoost) with LOPO evaluation
+├── models/                   Feature extraction modules and training scripts
+├── checkpoints/              All saved weights (mlp_best.pt, xgboost_*_best.json, GRU, scalers)
+├── evaluation/               Training logs and metrics (JSON)
+├── ui/                       Live OpenCV demo and inference pipeline
+├── src/                      React/Vite frontend source
+├── static/                   Built frontend (served by FastAPI)
+├── app.py / main.py          FastAPI backend (API, WebSocket, DB)
+├── requirements.txt          Python dependencies
+└── package.json              Frontend dependencies
+```
+## 2. Setup
+```bash
+python -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+```
+Frontend (only needed if modifying the React app):
+```bash
+npm install
+npm run build
+cp -r dist/* static/
+```
+## 3. Running
+**Web application (API + frontend):**
+```bash
+uvicorn app:app --host 0.0.0.0 --port 7860
+```
+Open http://localhost:7860 in a browser.
+**Live camera demo (OpenCV):**
+```bash
+python ui/live_demo.py
+python ui/live_demo.py --xgb      # XGBoost mode
+```
+**Training:**
+```bash
+python -m models.mlp.train        # MLP
+python -m models.xgboost.train    # XGBoost
+```
+## 4. Dataset
+- **9 participants**, each recorded via webcam with real-time labelling (focused / unfocused)
+- **144,793 total samples**, 10 selected features, binary classification
+- Collected using `python -m models.collect_features --name <name>`
+- Stored as `.npz` files in `data/collected_<name>/`
+## 5. Models
+| Model | Test Accuracy | Test F1 | ROC-AUC |
+|-------|--------------|---------|---------|
+| XGBoost (600 trees, depth 8, lr 0.149) | 95.87% | 0.959 | 0.991 |
+| MLP (64→32, 30 epochs, lr 1e-3) | 92.92% | 0.929 | 0.971 |
+Both evaluated on a held-out 15% stratified test split. LOPO (Leave-One-Person-Out) cross-validation available in `notebooks/`.
+## 6. Feature Pipeline
+1. **Face mesh** — MediaPipe 478-landmark detection
+2. **Head pose** — solvePnP → yaw, pitch, roll, face score, gaze offset, head deviation
+3. **Eye scorer** — EAR (left/right/avg), horizontal/vertical gaze ratio, MAR
+4. **Temporal tracking** — PERCLOS, blink rate, closure duration, yawn duration
+5. **Classification** — 10-feature vector → MLP or XGBoost → focused / unfocused
+## 7. Tech Stack
+- **Backend:** Python, FastAPI, WebSocket, aiosqlite
+- **Frontend:** React, Vite, TypeScript
+- **ML:** PyTorch (MLP), XGBoost, scikit-learn
+- **Vision:** MediaPipe, OpenCV

app.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from main import app

checkpoints/hybrid_focus_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "w_mlp": 0.6000000000000001,
+  "w_geo": 0.3999999999999999,
+  "threshold": 0.35,
+  "use_yawn_veto": true,
+  "geo_face_weight": 0.4,
+  "geo_eye_weight": 0.6,
+  "mar_yawn_threshold": 0.55,
+  "metric": "f1"
+}

MLP/models/meta_20260224_024200.npz → checkpoints/meta_best.npz RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:769bb62c7bf04aafd808e9b2623e795c2d92bcb933313ebf553d6fce5ebe7143
-size 1616

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d78d1df5e25536a2c82c4b8f5fd0c26dd35f44b28fd59761634cbf78c7546f8
+size 4196

MLP/models/mlp_20260224_024200.joblib → checkpoints/mlp_best.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a72933fcf2d0aed998c6303ea4298c04618d937c7f17bf492e76efcf3b4b54d7
-size 50484

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2f55129785b6882c304483aa5399f5bf6c9ed6e73dfec7ca6f36cd0436156c8
+size 14497

best_eye_cnn.pth → checkpoints/model_best.joblib RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3c3d85de013387e8583fe7218daabb83a8a6f46ca5bcacbf6fbf3619b688da8
-size 2103809

 version https://git-lfs.github.com/spec/v1
+oid sha256:183f2d4419e0eb1e58704e5a7312eb61e331523566d4dc551054a07b3aac7557
+size 5775881

MLP/models/scaler_20260224_024200.joblib → checkpoints/scaler_best.joblib RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f9ef3721cee28f1472886556e001d0f6ed0abe09011d979a70ca9bf447d453e
-size 823

 version https://git-lfs.github.com/spec/v1
+oid sha256:02ed6b4c0d99e0254c6a740a949da2384db58ec7d3e6df6432b9bfcd3a296c71
+size 783

checkpoints/xgboost_face_orientation_best.json ADDED Viewed

The diff for this file is too large to render. See raw diff

{data_preparation → data}/CNN/eye_crops/val/open/.gitkeep RENAMED Viewed

File without changes

data/README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+# data/
+Raw collected session data used for model training and evaluation.
+## 1. Contents
+Each `collected_<name>/` folder contains `.npz` files for one participant:
+| Folder | Participant | Samples |
+|--------|-------------|---------|
+| `collected_Abdelrahman/` | Abdelrahman | 15,870 |
+| `collected_Jarek/` | Jarek | 14,829 |
+| `collected_Junhao/` | Junhao | 8,901 |
+| `collected_Kexin/` | Kexin | 32,312 (2 sessions) |
+| `collected_Langyuan/` | Langyuan | 15,749 |
+| `collected_Mohamed/` | Mohamed | 13,218 |
+| `collected_Yingtao/` | Yingtao | 17,591 |
+| `collected_ayten/` | Ayten | 17,621 |
+| `collected_saba/` | Saba | 8,702 |
+| **Total** | **9 participants** | **144,793** |
+## 2. File Format
+Each `.npz` file contains:
+| Key | Shape | Description |
+|-----|-------|-------------|
+| `features` | (N, 17) | 17-dimensional feature vectors (float32) |
+| `labels` | (N,) | Binary labels: 0 = unfocused, 1 = focused |
+| `feature_names` | (17,) | Column names for the 17 features |
+## 3. Feature List
+`ear_left`, `ear_right`, `ear_avg`, `h_gaze`, `v_gaze`, `mar`, `yaw`, `pitch`, `roll`, `s_face`, `s_eye`, `gaze_offset`, `head_deviation`, `perclos`, `blink_rate`, `closure_duration`, `yawn_duration`
+10 of these are selected for training (see `data_preparation/prepare_dataset.py`).
+## 4. Collection
+```bash
+python -m models.collect_features --name yourname
+```
+1. Webcam opens with live overlay
+2. Press **1** = focused, **0** = unfocused (switch every 10–30 sec)
+3. Press **p** to pause/resume
+4. Press **q** to stop and save

{data_preparation → data}/collected_Abdelrahman/abdelrahman_20260306_023035.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Jarek/Jarek_20260225_012931.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Junhao/Junhao_20260303_113554.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Kexin/kexin2_20260305_180229.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Kexin/kexin_20260224_151043.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Langyuan/Langyuan_20260303_153145.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Mohamed/session_20260224_010131.npz RENAMED Viewed

File without changes

{data_preparation → data}/collected_Yingtao/Yingtao_20260306_023937.npz RENAMED Viewed

File without changes

{data_preparation/collected_Ayten → data/collected_ayten}/ayten_session_1.npz RENAMED Viewed

File without changes

{data_preparation/collected_Saba → data/collected_saba}/saba_20260306_230710.npz RENAMED Viewed

File without changes

data_preparation/MLP/explore_collected_data.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/MLP/train_mlp.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/README.md CHANGED Viewed

@@ -1,41 +1,75 @@
-# Data Preparation
-## Folder Structure
-### collected/
-Contains raw session files in `.npz` format.
-Generated using:
-python -m models.attention.collect_features
-Each session includes:
-- 17-dimensional feature vectors
-- Corresponding labels
----
-### MLP/
-Contains notebooks for:
-- Exploring collected data
-- Training the sklearn MLP model (10 features)
-Trained models are saved to:
-../MLP/models/
----
-### CNN/
-Eye crop directory structure for CNN training (YOLO).
----
-## Collecting Data
-**Step-by-step**
-1. From repo root Install deps: `pip install -r requirements.txt`.
-3. Run: `python -m models.attention.collect_features --name yourname`.
-4. Webcam opens. Look at the camera; press **1** when focused, **0** when unfocused. Switch every 10–30 sec so you get both labels.
-5. Press **p** to pause/resume.
-6. Press **q** when done. One `.npz` is saved to `data_preparation/collected/` (17 features + labels).

+# data_preparation/
+Shared data loading, cleaning, and exploratory analysis.
+## 1. Files
+| File | Description |
+|------|-------------|
+| `prepare_dataset.py` | Central data loading module used by all training scripts and notebooks |
+| `data_exploration.ipynb` | EDA notebook: feature distributions, class balance, correlations |
+## 2. prepare_dataset.py
+Provides a consistent pipeline for loading raw `.npz` data from `data/`:
+| Function | Purpose |
+|----------|---------|
+| `load_all_pooled(model_name)` | Load all participants, clean, select features, concatenate |
+| `load_per_person(model_name)` | Load grouped by person (for LOPO cross-validation) |
+| `get_numpy_splits(model_name)` | Load + stratified 70/15/15 split + StandardScaler |
+| `get_dataloaders(model_name)` | Same as above, wrapped in PyTorch DataLoaders |
+| `_split_and_scale(features, labels, ...)` | Reusable split + optional scaling |
+### Cleaning rules
+- `yaw` clipped to [-45, 45], `pitch`/`roll` to [-30, 30]
+- `ear_left`, `ear_right`, `ear_avg` clipped to [0, 0.85]
+### Selected features (face_orientation)
+`head_deviation`, `s_face`, `s_eye`, `h_gaze`, `pitch`, `ear_left`, `ear_avg`, `ear_right`, `gaze_offset`, `perclos`
+## 3. data_exploration.ipynb
+Run from this folder or from the project root. Covers:
+1. Per-feature statistics (mean, std, min, max)
+2. Class distribution (focused vs unfocused)
+3. Feature histograms and box plots
+4. Correlation matrix
+## 4. How to run
+`prepare_dataset.py` is a **library module**, not a standalone script. You don’t run it directly; you import it from code that needs data.
+**From repo root:**
+```bash
+# Optional: quick test that loading works
+python -c "
+from data_preparation.prepare_dataset import load_all_pooled
+X, y, names = load_all_pooled('face_orientation')
+print(f'Loaded {X.shape[0]} samples, {X.shape[1]} features: {names}')
+"
+```
+**Used by:**
+- `python -m models.mlp.train`
+- `python -m models.xgboost.train`
+- `notebooks/mlp.ipynb`, `notebooks/xgboost.ipynb`
+- `data_preparation/data_exploration.ipynb`
+## 5. Usage (in code)
+```python
+from data_preparation.prepare_dataset import load_all_pooled, get_numpy_splits
+# pooled data
+X, y, names = load_all_pooled("face_orientation")
+# ready-to-train splits
+splits, n_features, n_classes, scaler = get_numpy_splits("face_orientation")
+X_train, y_train = splits["X_train"], splits["y_train"]
+```

{models/geometric → data_preparation}/__init__.py RENAMED Viewed

File without changes

data_preparation/data_exploration.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data_preparation/prepare_dataset.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import os
+import glob
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+try:
+    import torch
+    from torch.utils.data import Dataset, DataLoader
+except ImportError:  # pragma: no cover
+    torch = None
+    class Dataset:  # type: ignore
+        pass
+    class _MissingTorchDataLoader:  # type: ignore
+        def __init__(self, *args, **kwargs):
+            raise ImportError(
+                "PyTorch not installed"
+            )
+    DataLoader = _MissingTorchDataLoader  # type: ignore
+DATA_DIR = os.path.join(os.path.dirname(__file__), "..", "data")
+SELECTED_FEATURES = {
+    "face_orientation": [
+        'head_deviation', 's_face', 's_eye', 'h_gaze', 'pitch',
+        'ear_left', 'ear_avg', 'ear_right', 'gaze_offset', 'perclos'
+    ],
+    "eye_behaviour": [
+        'ear_left', 'ear_right', 'ear_avg', 'mar',
+        'blink_rate', 'closure_duration', 'perclos', 'yawn_duration'
+    ]
+}
+class FeatureVectorDataset(Dataset):
+    def __init__(self, features: np.ndarray, labels: np.ndarray):
+        self.features = torch.tensor(features, dtype=torch.float32)
+        self.labels = torch.tensor(labels, dtype=torch.long)
+    def __len__(self):
+        return len(self.labels)
+    def __getitem__(self, idx):
+        return self.features[idx], self.labels[idx]
+# ── Low-level helpers ────────────────────────────────────────────────────
+def _clean_npz(raw, names):
+    """Apply clipping rules in-place. Shared by all loaders."""
+    for col, lo, hi in [('yaw', -45, 45), ('pitch', -30, 30), ('roll', -30, 30)]:
+        if col in names:
+            raw[:, names.index(col)] = np.clip(raw[:, names.index(col)], lo, hi)
+    for feat in ['ear_left', 'ear_right', 'ear_avg']:
+        if feat in names:
+            raw[:, names.index(feat)] = np.clip(raw[:, names.index(feat)], 0, 0.85)
+    return raw
+def _load_one_npz(npz_path, target_features):
+    """Load a single .npz file, clean and select features. Returns (X, y, selected_feature_names)."""
+    data = np.load(npz_path, allow_pickle=True)
+    raw = data['features'].astype(np.float32)
+    labels = data['labels'].astype(np.int64)
+    names = list(data['feature_names'])
+    raw = _clean_npz(raw, names)
+    selected = [f for f in target_features if f in names]
+    idx = [names.index(f) for f in selected]
+    return raw[:, idx], labels, selected
+# ── Public data loaders ──────────────────────────────────────────────────
+def load_all_pooled(model_name: str = "face_orientation", data_dir: str = None):
+    """Load all collected_*/*.npz, clean, select features, concatenate.
+    Returns (X_all, y_all, all_feature_names).
+    """
+    data_dir = data_dir or DATA_DIR
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    pattern = os.path.join(data_dir, "collected_*", "*.npz")
+    npz_files = sorted(glob.glob(pattern))
+    if not npz_files:
+        print("[DATA] Warning: No .npz files found. Falling back to synthetic.")
+        X, y = _generate_synthetic_data(model_name)
+        return X, y, target_features
+    all_X, all_y = [], []
+    all_names = None
+    for npz_path in npz_files:
+        X, y, names = _load_one_npz(npz_path, target_features)
+        if all_names is None:
+            all_names = names
+        all_X.append(X)
+        all_y.append(y)
+        print(f"[DATA]   + {os.path.basename(npz_path)}: {X.shape[0]} samples")
+    X_all = np.concatenate(all_X, axis=0)
+    y_all = np.concatenate(all_y, axis=0)
+    print(f"[DATA] Loaded {len(npz_files)} file(s) for '{model_name}': "
+          f"{X_all.shape[0]} total samples, {X_all.shape[1]} features")
+    return X_all, y_all, all_names
+def load_per_person(model_name: str = "face_orientation", data_dir: str = None):
+    """Load collected_*/*.npz grouped by person (folder name).
+    Returns dict { person_name: (X, y) } where X/y are per-person numpy arrays.
+    Also returns (X_all, y_all) as pooled data.
+    """
+    data_dir = data_dir or DATA_DIR
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    pattern = os.path.join(data_dir, "collected_*", "*.npz")
+    npz_files = sorted(glob.glob(pattern))
+    if not npz_files:
+        raise FileNotFoundError(f"No .npz files matching {pattern}")
+    by_person = {}
+    all_X, all_y = [], []
+    for npz_path in npz_files:
+        folder = os.path.basename(os.path.dirname(npz_path))
+        person = folder.replace("collected_", "", 1)
+        X, y, _ = _load_one_npz(npz_path, target_features)
+        all_X.append(X)
+        all_y.append(y)
+        if person not in by_person:
+            by_person[person] = []
+        by_person[person].append((X, y))
+        print(f"[DATA]   + {person}/{os.path.basename(npz_path)}: {X.shape[0]} samples")
+    for person, chunks in by_person.items():
+        by_person[person] = (
+            np.concatenate([c[0] for c in chunks], axis=0),
+            np.concatenate([c[1] for c in chunks], axis=0),
+        )
+    X_all = np.concatenate(all_X, axis=0)
+    y_all = np.concatenate(all_y, axis=0)
+    print(f"[DATA] {len(by_person)} persons, {X_all.shape[0]} total samples, {X_all.shape[1]} features")
+    return by_person, X_all, y_all
+def load_raw_npz(npz_path):
+    """Load a single .npz without cleaning or feature selection. For exploration notebooks."""
+    data = np.load(npz_path, allow_pickle=True)
+    features = data['features'].astype(np.float32)
+    labels = data['labels'].astype(np.int64)
+    names = list(data['feature_names'])
+    return features, labels, names
+# ── Legacy helpers (used by models/mlp/train.py and models/xgboost/train.py) ─
+def _load_real_data(model_name: str):
+    X, y, _ = load_all_pooled(model_name)
+    return X, y
+def _generate_synthetic_data(model_name: str):
+    target_features = SELECTED_FEATURES.get(model_name, SELECTED_FEATURES["face_orientation"])
+    n = 500
+    d = len(target_features)
+    c = 2
+    rng = np.random.RandomState(42)
+    features = rng.randn(n, d).astype(np.float32)
+    labels = rng.randint(0, c, size=n).astype(np.int64)
+    print(f"[DATA] Using synthetic data for '{model_name}': {n} samples, {d} features, {c} classes")
+    return features, labels
+def _split_and_scale(features, labels, split_ratios, seed, scale):
+    """Split data into train/val/test (stratified) and optionally scale."""
+    test_ratio = split_ratios[2]
+    val_ratio = split_ratios[1] / (split_ratios[0] + split_ratios[1])
+    X_train_val, X_test, y_train_val, y_test = train_test_split(
+        features, labels, test_size=test_ratio, random_state=seed, stratify=labels,
+    )
+    X_train, X_val, y_train, y_val = train_test_split(
+        X_train_val, y_train_val, test_size=val_ratio, random_state=seed, stratify=y_train_val,
+    )
+    scaler = None
+    if scale:
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(X_train)
+        X_val = scaler.transform(X_val)
+        X_test = scaler.transform(X_test)
+        print("[DATA] Applied StandardScaler (fitted on training split)")
+    splits = {
+        "X_train": X_train, "y_train": y_train,
+        "X_val": X_val,     "y_val": y_val,
+        "X_test": X_test,   "y_test": y_test,
+    }
+    print(f"[DATA] Split (stratified): train={len(y_train)}, val={len(y_val)}, test={len(y_test)}")
+    return splits, scaler
+def get_numpy_splits(model_name: str, split_ratios=(0.7, 0.15, 0.15), seed: int = 42, scale: bool = True):
+    """Return raw numpy arrays for non-PyTorch models (e.g. XGBoost)."""
+    features, labels = _load_real_data(model_name)
+    num_features = features.shape[1]
+    num_classes = int(labels.max()) + 1
+    splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
+    return splits, num_features, num_classes, scaler
+def get_dataloaders(model_name: str, batch_size: int = 32, split_ratios=(0.7, 0.15, 0.15), seed: int = 42, scale: bool = True):
+    """Return PyTorch DataLoaders for neural-network models."""
+    features, labels = _load_real_data(model_name)
+    num_features = features.shape[1]
+    num_classes = int(labels.max()) + 1
+    splits, scaler = _split_and_scale(features, labels, split_ratios, seed, scale)
+    train_ds = FeatureVectorDataset(splits["X_train"], splits["y_train"])
+    val_ds   = FeatureVectorDataset(splits["X_val"],   splits["y_val"])
+    test_ds  = FeatureVectorDataset(splits["X_test"],  splits["y_test"])
+    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
+    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False)
+    test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False)
+    return train_loader, val_loader, test_loader, num_features, num_classes, scaler

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,5 @@

+services:
+  focus-guard:
+    build: .
+    ports:
+      - "7860:7860"

eslint.config.js ADDED Viewed

	@@ -0,0 +1,29 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import { defineConfig, globalIgnores } from 'eslint/config'
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{js,jsx}'],
+    extends: [
+      js.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+      parserOptions: {
+        ecmaVersion: 'latest',
+        ecmaFeatures: { jsx: true },
+        sourceType: 'module',
+      },
+    },
+    rules: {
+      'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
+    },
+  },
+])

evaluation/README.md CHANGED Viewed

@@ -1,3 +1,46 @@
-# evaluation
-Place metrics scripts, run configs, and results here. Logs dir is used by `models.mlp.train` for training logs.

+# evaluation/
+Training logs and performance metrics.
+## 1. Contents
+```
+logs/
+├── face_orientation_training_log.json           # MLP (latest run)
+├── mlp_face_orientation_training_log.json       # MLP (alternate)
+└── xgboost_face_orientation_training_log.json   # XGBoost
+```
+## 2. Log Format
+Each JSON file records the full training history:
+**MLP logs:**
+```json
+{
+  "config": { "epochs": 30, "lr": 0.001, "batch_size": 32, ... },
+  "history": {
+    "train_loss": [0.287, 0.260, ...],
+    "val_loss":   [0.256, 0.245, ...],
+    "train_acc":  [0.889, 0.901, ...],
+    "val_acc":    [0.905, 0.909, ...]
+  },
+  "test": { "accuracy": 0.929, "f1": 0.929, "roc_auc": 0.971 }
+}
+```
+**XGBoost logs:**
+```json
+{
+  "config": { "n_estimators": 600, "max_depth": 8, "learning_rate": 0.149, ... },
+  "train_losses": [0.577, ...],
+  "val_losses":   [0.576, ...],
+  "test": { "accuracy": 0.959, "f1": 0.959, "roc_auc": 0.991 }
+}
+```
+## 3. Generated By
+- `python -m models.mlp.train` → writes MLP log
+- `python -m models.xgboost.train` → writes XGBoost log
+- Notebooks in `notebooks/` also save logs here

index.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Focus Guard</title>
+  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;700&display=swap" rel="stylesheet">
+</head>
+<body>
+  <div id="root"></div>
+  <script type="module" src="/src/main.jsx"></script>
+</body>
+</html>

main.py ADDED Viewed

	@@ -0,0 +1,964 @@

+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, Request
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List, Any
+import base64
+import cv2
+import numpy as np
+import aiosqlite
+import json
+from datetime import datetime, timedelta
+import math
+import os
+from pathlib import Path
+from typing import Callable
+import asyncio
+import concurrent.futures
+import threading
+from aiortc import RTCPeerConnection, RTCSessionDescription, VideoStreamTrack
+from av import VideoFrame
+from mediapipe.tasks.python.vision import FaceLandmarksConnections
+from ui.pipeline import FaceMeshPipeline, MLPPipeline, HybridFocusPipeline, XGBoostPipeline
+from models.face_mesh import FaceMeshDetector
+# ================ FACE MESH DRAWING (server-side, for WebRTC) ================
+_FONT = cv2.FONT_HERSHEY_SIMPLEX
+_CYAN = (255, 255, 0)
+_GREEN = (0, 255, 0)
+_MAGENTA = (255, 0, 255)
+_ORANGE = (0, 165, 255)
+_RED = (0, 0, 255)
+_WHITE = (255, 255, 255)
+_LIGHT_GREEN = (144, 238, 144)
+_TESSELATION_CONNS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION]
+_CONTOUR_CONNS = [(c.start, c.end) for c in FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS]
+_LEFT_EYEBROW = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]
+_RIGHT_EYEBROW = [300, 293, 334, 296, 336, 285, 295, 282, 283, 276]
+_NOSE_BRIDGE = [6, 197, 195, 5, 4, 1, 19, 94, 2]
+_LIPS_OUTER = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 409, 270, 269, 267, 0, 37, 39, 40, 185, 61]
+_LIPS_INNER = [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308, 415, 310, 311, 312, 13, 82, 81, 80, 191, 78]
+_LEFT_EAR_POINTS = [33, 160, 158, 133, 153, 145]
+_RIGHT_EAR_POINTS = [362, 385, 387, 263, 373, 380]
+def _lm_px(lm, idx, w, h):
+    return (int(lm[idx, 0] * w), int(lm[idx, 1] * h))
+def _draw_polyline(frame, lm, indices, w, h, color, thickness):
+    for i in range(len(indices) - 1):
+        cv2.line(frame, _lm_px(lm, indices[i], w, h), _lm_px(lm, indices[i + 1], w, h), color, thickness, cv2.LINE_AA)
+def _draw_face_mesh(frame, lm, w, h):
+    """Draw tessellation, contours, eyebrows, nose, lips, eyes, irises, gaze lines."""
+    # Tessellation (gray triangular grid, semi-transparent)
+    overlay = frame.copy()
+    for s, e in _TESSELATION_CONNS:
+        cv2.line(overlay, _lm_px(lm, s, w, h), _lm_px(lm, e, w, h), (200, 200, 200), 1, cv2.LINE_AA)
+    cv2.addWeighted(overlay, 0.3, frame, 0.7, 0, frame)
+    # Contours
+    for s, e in _CONTOUR_CONNS:
+        cv2.line(frame, _lm_px(lm, s, w, h), _lm_px(lm, e, w, h), _CYAN, 1, cv2.LINE_AA)
+    # Eyebrows
+    _draw_polyline(frame, lm, _LEFT_EYEBROW, w, h, _LIGHT_GREEN, 2)
+    _draw_polyline(frame, lm, _RIGHT_EYEBROW, w, h, _LIGHT_GREEN, 2)
+    # Nose
+    _draw_polyline(frame, lm, _NOSE_BRIDGE, w, h, _ORANGE, 1)
+    # Lips
+    _draw_polyline(frame, lm, _LIPS_OUTER, w, h, _MAGENTA, 1)
+    _draw_polyline(frame, lm, _LIPS_INNER, w, h, (200, 0, 200), 1)
+    # Eyes
+    left_pts = np.array([_lm_px(lm, i, w, h) for i in FaceMeshDetector.LEFT_EYE_INDICES], dtype=np.int32)
+    cv2.polylines(frame, [left_pts], True, _GREEN, 2, cv2.LINE_AA)
+    right_pts = np.array([_lm_px(lm, i, w, h) for i in FaceMeshDetector.RIGHT_EYE_INDICES], dtype=np.int32)
+    cv2.polylines(frame, [right_pts], True, _GREEN, 2, cv2.LINE_AA)
+    # EAR key points
+    for indices in [_LEFT_EAR_POINTS, _RIGHT_EAR_POINTS]:
+        for idx in indices:
+            cv2.circle(frame, _lm_px(lm, idx, w, h), 3, (0, 255, 255), -1, cv2.LINE_AA)
+    # Irises + gaze lines
+    for iris_idx, eye_inner, eye_outer in [
+        (FaceMeshDetector.LEFT_IRIS_INDICES, 133, 33),
+        (FaceMeshDetector.RIGHT_IRIS_INDICES, 362, 263),
+    ]:
+        iris_pts = np.array([_lm_px(lm, i, w, h) for i in iris_idx], dtype=np.int32)
+        center = iris_pts[0]
+        if len(iris_pts) >= 5:
+            radii = [np.linalg.norm(iris_pts[j] - center) for j in range(1, 5)]
+            radius = max(int(np.mean(radii)), 2)
+            cv2.circle(frame, tuple(center), radius, _MAGENTA, 2, cv2.LINE_AA)
+            cv2.circle(frame, tuple(center), 2, _WHITE, -1, cv2.LINE_AA)
+        eye_cx = int((lm[eye_inner, 0] + lm[eye_outer, 0]) / 2.0 * w)
+        eye_cy = int((lm[eye_inner, 1] + lm[eye_outer, 1]) / 2.0 * h)
+        dx, dy = center[0] - eye_cx, center[1] - eye_cy
+        cv2.line(frame, tuple(center), (int(center[0] + dx * 3), int(center[1] + dy * 3)), _RED, 1, cv2.LINE_AA)
+def _draw_hud(frame, result, model_name):
+    """Draw status bar and detail overlay matching live_demo.py."""
+    h, w = frame.shape[:2]
+    is_focused = result["is_focused"]
+    status = "FOCUSED" if is_focused else "NOT FOCUSED"
+    color = _GREEN if is_focused else _RED
+    # Top bar
+    cv2.rectangle(frame, (0, 0), (w, 55), (0, 0, 0), -1)
+    cv2.putText(frame, status, (10, 28), _FONT, 0.8, color, 2, cv2.LINE_AA)
+    cv2.putText(frame, model_name.upper(), (w - 150, 28), _FONT, 0.45, _WHITE, 1, cv2.LINE_AA)
+    # Detail line
+    conf = result.get("mlp_prob", result.get("raw_score", 0.0))
+    mar_s = f" MAR:{result['mar']:.2f}" if result.get("mar") is not None else ""
+    sf = result.get("s_face", 0)
+    se = result.get("s_eye", 0)
+    detail = f"conf:{conf:.2f} S_face:{sf:.2f} S_eye:{se:.2f}{mar_s}"
+    cv2.putText(frame, detail, (10, 48), _FONT, 0.4, _WHITE, 1, cv2.LINE_AA)
+    # Head pose (top right)
+    if result.get("yaw") is not None:
+        cv2.putText(frame, f"yaw:{result['yaw']:+.0f} pitch:{result['pitch']:+.0f} roll:{result['roll']:+.0f}",
+                    (w - 280, 48), _FONT, 0.4, (180, 180, 180), 1, cv2.LINE_AA)
+    # Yawn indicator
+    if result.get("is_yawning"):
+        cv2.putText(frame, "YAWN", (10, 75), _FONT, 0.7, _ORANGE, 2, cv2.LINE_AA)
+# Landmark indices used for face mesh drawing on client (union of all groups).
+# Sending only these instead of all 478 saves ~60% of the landmarks payload.
+_MESH_INDICES = sorted(set(
+    [10,338,297,332,284,251,389,356,454,323,361,288,397,365,379,378,400,377,152,148,176,149,150,136,172,58,132,93,234,127,162,21,54,103,67,109]  # face oval
+    + [33,7,163,144,145,153,154,155,133,173,157,158,159,160,161,246]  # left eye
+    + [362,382,381,380,374,373,390,249,263,466,388,387,386,385,384,398]  # right eye
+    + [468,469,470,471,472, 473,474,475,476,477]  # irises
+    + [70,63,105,66,107,55,65,52,53,46]  # left eyebrow
+    + [300,293,334,296,336,285,295,282,283,276]  # right eyebrow
+    + [6,197,195,5,4,1,19,94,2]  # nose bridge
+    + [61,146,91,181,84,17,314,405,321,375,291,409,270,269,267,0,37,39,40,185]  # lips outer
+    + [78,95,88,178,87,14,317,402,318,324,308,415,310,311,312,13,82,81,80,191]  # lips inner
+    + [33,160,158,133,153,145]  # left EAR key points
+    + [362,385,387,263,373,380]  # right EAR key points
+))
+# Build a lookup: original_index -> position in sparse array, so client can reconstruct.
+_MESH_INDEX_SET = set(_MESH_INDICES)
+# Initialize FastAPI app
+app = FastAPI(title="Focus Guard API")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global variables
+db_path = "focus_guard.db"
+pcs = set()
+_cached_model_name = "mlp"  # in-memory cache, updated via /api/settings
+async def _wait_for_ice_gathering(pc: RTCPeerConnection):
+    if pc.iceGatheringState == "complete":
+        return
+    done = asyncio.Event()
+    @pc.on("icegatheringstatechange")
+    def _on_state_change():
+        if pc.iceGatheringState == "complete":
+            done.set()
+    await done.wait()
+# ================ DATABASE MODELS ================
+async def init_database():
+    """Initialize SQLite database with required tables"""
+    async with aiosqlite.connect(db_path) as db:
+        # FocusSessions table
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS focus_sessions (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                start_time TIMESTAMP NOT NULL,
+                end_time TIMESTAMP,
+                duration_seconds INTEGER DEFAULT 0,
+                focus_score REAL DEFAULT 0.0,
+                total_frames INTEGER DEFAULT 0,
+                focused_frames INTEGER DEFAULT 0,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        # FocusEvents table
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS focus_events (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                session_id INTEGER NOT NULL,
+                timestamp TIMESTAMP NOT NULL,
+                is_focused BOOLEAN NOT NULL,
+                confidence REAL NOT NULL,
+                detection_data TEXT,
+                FOREIGN KEY (session_id) REFERENCES focus_sessions (id)
+            )
+        """)
+        # UserSettings table
+        await db.execute("""
+            CREATE TABLE IF NOT EXISTS user_settings (
+                id INTEGER PRIMARY KEY CHECK (id = 1),
+                sensitivity INTEGER DEFAULT 6,
+                notification_enabled BOOLEAN DEFAULT 1,
+                notification_threshold INTEGER DEFAULT 30,
+                frame_rate INTEGER DEFAULT 30,
+                model_name TEXT DEFAULT 'mlp'
+            )
+        """)
+        # Insert default settings if not exists
+        await db.execute("""
+            INSERT OR IGNORE INTO user_settings (id, sensitivity, notification_enabled, notification_threshold, frame_rate, model_name)
+            VALUES (1, 6, 1, 30, 30, 'mlp')
+        """)
+        await db.commit()
+# ================ PYDANTIC MODELS ================
+class SessionCreate(BaseModel):
+    pass
+class SessionEnd(BaseModel):
+    session_id: int
+class SettingsUpdate(BaseModel):
+    sensitivity: Optional[int] = None
+    notification_enabled: Optional[bool] = None
+    notification_threshold: Optional[int] = None
+    frame_rate: Optional[int] = None
+    model_name: Optional[str] = None
+class VideoTransformTrack(VideoStreamTrack):
+    def __init__(self, track, session_id: int, get_channel: Callable[[], Any]):
+        super().__init__()
+        self.track = track
+        self.session_id = session_id
+        self.get_channel = get_channel
+        self.last_inference_time = 0
+        self.min_inference_interval = 1 / 60
+        self.last_frame = None
+    async def recv(self):
+        frame = await self.track.recv()
+        img = frame.to_ndarray(format="bgr24")
+        if img is None:
+            return frame
+        # Normalize size for inference/drawing
+        img = cv2.resize(img, (640, 480))
+        now = datetime.now().timestamp()
+        do_infer = (now - self.last_inference_time) >= self.min_inference_interval
+        if do_infer:
+            self.last_inference_time = now
+            model_name = _cached_model_name
+            if model_name not in pipelines or pipelines.get(model_name) is None:
+                model_name = 'mlp'
+            active_pipeline = pipelines.get(model_name)
+            if active_pipeline is not None:
+                loop = asyncio.get_event_loop()
+                out = await loop.run_in_executor(
+                    _inference_executor,
+                    _process_frame_safe,
+                    active_pipeline,
+                    img,
+                    model_name,
+                )
+                is_focused = out["is_focused"]
+                confidence = out.get("mlp_prob", out.get("raw_score", 0.0))
+                metadata = {"s_face": out.get("s_face", 0.0), "s_eye": out.get("s_eye", 0.0), "mar": out.get("mar", 0.0), "model": model_name}
+                # Draw face mesh + HUD on the video frame
+                h_f, w_f = img.shape[:2]
+                lm = out.get("landmarks")
+                if lm is not None:
+                    _draw_face_mesh(img, lm, w_f, h_f)
+                _draw_hud(img, out, model_name)
+            else:
+                is_focused = False
+                confidence = 0.0
+                metadata = {"model": model_name}
+                cv2.rectangle(img, (0, 0), (img.shape[1], 55), (0, 0, 0), -1)
+                cv2.putText(img, "NO MODEL", (10, 28), _FONT, 0.8, _RED, 2, cv2.LINE_AA)
+            if self.session_id:
+                await store_focus_event(self.session_id, is_focused, confidence, metadata)
+            channel = self.get_channel()
+            if channel and channel.readyState == "open":
+                try:
+                    channel.send(json.dumps({"type": "detection", "focused": is_focused, "confidence": round(confidence, 3), "detections": detections}))
+                except Exception:
+                    pass
+            self.last_frame = img
+        elif self.last_frame is not None:
+            img = self.last_frame
+        new_frame = VideoFrame.from_ndarray(img, format="bgr24")
+        new_frame.pts = frame.pts
+        new_frame.time_base = frame.time_base
+        return new_frame
+# ================ DATABASE OPERATIONS ================
+async def create_session():
+    async with aiosqlite.connect(db_path) as db:
+        cursor = await db.execute(
+            "INSERT INTO focus_sessions (start_time) VALUES (?)",
+            (datetime.now().isoformat(),)
+        )
+        await db.commit()
+        return cursor.lastrowid
+async def end_session(session_id: int):
+    async with aiosqlite.connect(db_path) as db:
+        cursor = await db.execute(
+            "SELECT start_time, total_frames, focused_frames FROM focus_sessions WHERE id = ?",
+            (session_id,)
+        )
+        row = await cursor.fetchone()
+        if not row:
+            return None
+        start_time_str, total_frames, focused_frames = row
+        start_time = datetime.fromisoformat(start_time_str)
+        end_time = datetime.now()
+        duration = (end_time - start_time).total_seconds()
+        focus_score = focused_frames / total_frames if total_frames > 0 else 0.0
+        await db.execute("""
+            UPDATE focus_sessions
+            SET end_time = ?, duration_seconds = ?, focus_score = ?
+            WHERE id = ?
+        """, (end_time.isoformat(), int(duration), focus_score, session_id))
+        await db.commit()
+        return {
+            'session_id': session_id,
+            'start_time': start_time_str,
+            'end_time': end_time.isoformat(),
+            'duration_seconds': int(duration),
+            'focus_score': round(focus_score, 3),
+            'total_frames': total_frames,
+            'focused_frames': focused_frames
+        }
+async def store_focus_event(session_id: int, is_focused: bool, confidence: float, metadata: dict):
+    async with aiosqlite.connect(db_path) as db:
+        await db.execute("""
+            INSERT INTO focus_events (session_id, timestamp, is_focused, confidence, detection_data)
+            VALUES (?, ?, ?, ?, ?)
+        """, (session_id, datetime.now().isoformat(), is_focused, confidence, json.dumps(metadata)))
+        await db.execute("""
+            UPDATE focus_sessions
+            SET total_frames = total_frames + 1,
+                focused_frames = focused_frames + ?
+            WHERE id = ?
+        """, (1 if is_focused else 0, session_id))
+        await db.commit()
+class _EventBuffer:
+    """Buffer focus events in memory and flush to DB in batches to avoid per-frame DB writes."""
+    def __init__(self, flush_interval: float = 2.0):
+        self._buf: list = []
+        self._lock = asyncio.Lock()
+        self._flush_interval = flush_interval
+        self._task: asyncio.Task | None = None
+        self._total_frames = 0
+        self._focused_frames = 0
+    def start(self):
+        if self._task is None:
+            self._task = asyncio.create_task(self._flush_loop())
+    async def stop(self):
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+            self._task = None
+        await self._flush()
+    def add(self, session_id: int, is_focused: bool, confidence: float, metadata: dict):
+        self._buf.append((session_id, datetime.now().isoformat(), is_focused, confidence, json.dumps(metadata)))
+        self._total_frames += 1
+        if is_focused:
+            self._focused_frames += 1
+    async def _flush_loop(self):
+        while True:
+            await asyncio.sleep(self._flush_interval)
+            await self._flush()
+    async def _flush(self):
+        async with self._lock:
+            if not self._buf:
+                return
+            batch = self._buf[:]
+            total = self._total_frames
+            focused = self._focused_frames
+            self._buf.clear()
+            self._total_frames = 0
+            self._focused_frames = 0
+        if not batch:
+            return
+        session_id = batch[0][0]
+        try:
+            async with aiosqlite.connect(db_path) as db:
+                await db.executemany("""
+                    INSERT INTO focus_events (session_id, timestamp, is_focused, confidence, detection_data)
+                    VALUES (?, ?, ?, ?, ?)
+                """, batch)
+                await db.execute("""
+                    UPDATE focus_sessions
+                    SET total_frames = total_frames + ?,
+                        focused_frames = focused_frames + ?
+                    WHERE id = ?
+                """, (total, focused, session_id))
+                await db.commit()
+        except Exception as e:
+            print(f"[DB] Flush error: {e}")
+# ================ STARTUP/SHUTDOWN ================
+pipelines = {
+    "geometric": None,
+    "mlp": None,
+    "hybrid": None,
+    "xgboost": None,
+}
+# Thread pool for CPU-bound inference so the event loop stays responsive.
+_inference_executor = concurrent.futures.ThreadPoolExecutor(
+    max_workers=4,
+    thread_name_prefix="inference",
+)
+# One lock per pipeline so shared state (TemporalTracker, etc.) is not corrupted when
+# multiple frames are processed in parallel by the thread pool.
+_pipeline_locks = {name: threading.Lock() for name in ("geometric", "mlp", "hybrid", "xgboost")}
+def _process_frame_safe(pipeline, frame, model_name: str):
+    """Run process_frame in executor with per-pipeline lock."""
+    with _pipeline_locks[model_name]:
+        return pipeline.process_frame(frame)
+@app.on_event("startup")
+async def startup_event():
+    global pipelines, _cached_model_name
+    print(" Starting Focus Guard API...")
+    await init_database()
+    # Load cached model name from DB
+    async with aiosqlite.connect(db_path) as db:
+        cursor = await db.execute("SELECT model_name FROM user_settings WHERE id = 1")
+        row = await cursor.fetchone()
+        if row:
+            _cached_model_name = row[0]
+    print("[OK] Database initialized")
+    try:
+        pipelines["geometric"] = FaceMeshPipeline()
+        print("[OK] FaceMeshPipeline (geometric) loaded")
+    except Exception as e:
+        print(f"[WARN] FaceMeshPipeline unavailable: {e}")
+    try:
+        pipelines["mlp"] = MLPPipeline()
+        print("[OK] MLPPipeline loaded")
+    except Exception as e:
+        print(f"[ERR] Failed to load MLPPipeline: {e}")
+    try:
+        pipelines["hybrid"] = HybridFocusPipeline()
+        print("[OK] HybridFocusPipeline loaded")
+    except Exception as e:
+        print(f"[WARN] HybridFocusPipeline unavailable: {e}")
+    try:
+        pipelines["xgboost"] = XGBoostPipeline()
+        print("[OK] XGBoostPipeline loaded")
+    except Exception as e:
+        print(f"[ERR] Failed to load XGBoostPipeline: {e}")
+@app.on_event("shutdown")
+async def shutdown_event():
+    _inference_executor.shutdown(wait=False)
+    print(" Shutting down Focus Guard API...")
+# ================ WEBRTC SIGNALING ================
+@app.post("/api/webrtc/offer")
+async def webrtc_offer(offer: dict):
+    try:
+        print(f"Received WebRTC offer")
+        pc = RTCPeerConnection()
+        pcs.add(pc)
+        session_id = await create_session()
+        print(f"Created session: {session_id}")
+        channel_ref = {"channel": None}
+        @pc.on("datachannel")
+        def on_datachannel(channel):
+            print(f"Data channel opened")
+            channel_ref["channel"] = channel
+        @pc.on("track")
+        def on_track(track):
+            print(f"Received track: {track.kind}")
+            if track.kind == "video":
+                local_track = VideoTransformTrack(track, session_id, lambda: channel_ref["channel"])
+                pc.addTrack(local_track)
+                print(f"Video track added")
+            @track.on("ended")
+            async def on_ended():
+                print(f"Track ended")
+        @pc.on("connectionstatechange")
+        async def on_connectionstatechange():
+            print(f"Connection state changed: {pc.connectionState}")
+            if pc.connectionState in ("failed", "closed", "disconnected"):
+                try:
+                    await end_session(session_id)
+                except Exception as e:
+                    print(f"⚠Error ending session: {e}")
+                pcs.discard(pc)
+                await pc.close()
+        await pc.setRemoteDescription(RTCSessionDescription(sdp=offer["sdp"], type=offer["type"]))
+        print(f"Remote description set")
+        answer = await pc.createAnswer()
+        await pc.setLocalDescription(answer)
+        print(f"Answer created")
+        await _wait_for_ice_gathering(pc)
+        print(f"ICE gathering complete")
+        return {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type, "session_id": session_id}
+    except Exception as e:
+        print(f"WebRTC offer error: {e}")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"WebRTC error: {str(e)}")
+# ================ WEBSOCKET ================
+@app.websocket("/ws/video")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    session_id = None
+    frame_count = 0
+    running = True
+    event_buffer = _EventBuffer(flush_interval=2.0)
+    # Latest frame slot — only the most recent frame is kept, older ones are dropped.
+    # Using a dict so nested functions can mutate without nonlocal issues.
+    _slot = {"frame": None}
+    _frame_ready = asyncio.Event()
+    async def _receive_loop():
+        """Receive messages as fast as possible. Binary = frame, text = control."""
+        nonlocal session_id, running
+        try:
+            while running:
+                msg = await websocket.receive()
+                msg_type = msg.get("type", "")
+                if msg_type == "websocket.disconnect":
+                    running = False
+                    _frame_ready.set()
+                    return
+                # Binary message → JPEG frame (fast path, no base64)
+                raw_bytes = msg.get("bytes")
+                if raw_bytes is not None and len(raw_bytes) > 0:
+                    _slot["frame"] = raw_bytes
+                    _frame_ready.set()
+                    continue
+                # Text message → JSON control command (or legacy base64 frame)
+                text = msg.get("text")
+                if not text:
+                    continue
+                data = json.loads(text)
+                if data["type"] == "frame":
+                    # Legacy base64 path (fallback)
+                    _slot["frame"] = base64.b64decode(data["image"])
+                    _frame_ready.set()
+                elif data["type"] == "start_session":
+                    session_id = await create_session()
+                    event_buffer.start()
+                    for p in pipelines.values():
+                        if p is not None and hasattr(p, "reset_session"):
+                            p.reset_session()
+                    await websocket.send_json({"type": "session_started", "session_id": session_id})
+                elif data["type"] == "end_session":
+                    if session_id:
+                        await event_buffer.stop()
+                        summary = await end_session(session_id)
+                        if summary:
+                            await websocket.send_json({"type": "session_ended", "summary": summary})
+                        session_id = None
+        except WebSocketDisconnect:
+            running = False
+            _frame_ready.set()
+        except Exception as e:
+            print(f"[WS] receive error: {e}")
+            running = False
+            _frame_ready.set()
+    async def _process_loop():
+        """Process only the latest frame, dropping stale ones."""
+        nonlocal frame_count, running
+        loop = asyncio.get_event_loop()
+        while running:
+            await _frame_ready.wait()
+            _frame_ready.clear()
+            if not running:
+                return
+            # Grab latest frame and clear slot
+            raw = _slot["frame"]
+            _slot["frame"] = None
+            if raw is None:
+                continue
+            try:
+                nparr = np.frombuffer(raw, np.uint8)
+                frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+                if frame is None:
+                    continue
+                frame = cv2.resize(frame, (640, 480))
+                model_name = _cached_model_name
+                if model_name not in pipelines or pipelines.get(model_name) is None:
+                    model_name = "mlp"
+                active_pipeline = pipelines.get(model_name)
+                landmarks_list = None
+                if active_pipeline is not None:
+                    out = await loop.run_in_executor(
+                        _inference_executor,
+                        _process_frame_safe,
+                        active_pipeline,
+                        frame,
+                        model_name,
+                    )
+                    is_focused = out["is_focused"]
+                    confidence = out.get("mlp_prob", out.get("raw_score", 0.0))
+                    lm = out.get("landmarks")
+                    if lm is not None:
+                        # Send all 478 landmarks as flat array for tessellation drawing
+                        landmarks_list = [
+                            [round(float(lm[i, 0]), 3), round(float(lm[i, 1]), 3)]
+                            for i in range(lm.shape[0])
+                        ]
+                    if session_id:
+                        event_buffer.add(session_id, is_focused, confidence, {
+                            "s_face": out.get("s_face", 0.0),
+                            "s_eye": out.get("s_eye", 0.0),
+                            "mar": out.get("mar", 0.0),
+                            "model": model_name,
+                        })
+                else:
+                    is_focused = False
+                    confidence = 0.0
+                resp = {
+                    "type": "detection",
+                    "focused": is_focused,
+                    "confidence": round(confidence, 3),
+                    "model": model_name,
+                    "fc": frame_count,
+                }
+                if active_pipeline is not None:
+                    # Send detailed metrics for HUD
+                    if out.get("yaw") is not None:
+                        resp["yaw"] = round(out["yaw"], 1)
+                        resp["pitch"] = round(out["pitch"], 1)
+                        resp["roll"] = round(out["roll"], 1)
+                    if out.get("mar") is not None:
+                        resp["mar"] = round(out["mar"], 3)
+                    resp["sf"] = round(out.get("s_face", 0), 3)
+                    resp["se"] = round(out.get("s_eye", 0), 3)
+                if landmarks_list is not None:
+                    resp["lm"] = landmarks_list
+                await websocket.send_json(resp)
+                frame_count += 1
+            except Exception as e:
+                print(f"[WS] process error: {e}")
+    try:
+        await asyncio.gather(_receive_loop(), _process_loop())
+    except Exception:
+        pass
+    finally:
+        running = False
+        if session_id:
+            await event_buffer.stop()
+            await end_session(session_id)
+# ================ API ENDPOINTS ================
+@app.post("/api/sessions/start")
+async def api_start_session():
+    session_id = await create_session()
+    return {"session_id": session_id}
+@app.post("/api/sessions/end")
+async def api_end_session(data: SessionEnd):
+    summary = await end_session(data.session_id)
+    if not summary: raise HTTPException(status_code=404, detail="Session not found")
+    return summary
+@app.get("/api/sessions")
+async def get_sessions(filter: str = "all", limit: int = 50, offset: int = 0):
+    async with aiosqlite.connect(db_path) as db:
+        db.row_factory = aiosqlite.Row
+        # NEW: If importing/exporting all, remove limit if special flag or high limit
+        # For simplicity: if limit is -1, return all
+        limit_clause = "LIMIT ? OFFSET ?"
+        params = []
+        base_query = "SELECT * FROM focus_sessions"
+        where_clause = ""
+        if filter == "today":
+            date_filter = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+            where_clause = " WHERE start_time >= ?"
+            params.append(date_filter.isoformat())
+        elif filter == "week":
+            date_filter = datetime.now() - timedelta(days=7)
+            where_clause = " WHERE start_time >= ?"
+            params.append(date_filter.isoformat())
+        elif filter == "month":
+            date_filter = datetime.now() - timedelta(days=30)
+            where_clause = " WHERE start_time >= ?"
+            params.append(date_filter.isoformat())
+        elif filter == "all":
+            # Just ensure we only get completed sessions or all sessions
+            where_clause = " WHERE end_time IS NOT NULL"
+        query = f"{base_query}{where_clause} ORDER BY start_time DESC"
+        # Handle Limit for Exports
+        if limit == -1:
+            # No limit clause for export
+            pass
+        else:
+            query += f" {limit_clause}"
+            params.extend([limit, offset])
+        cursor = await db.execute(query, tuple(params))
+        rows = await cursor.fetchall()
+        return [dict(row) for row in rows]
+# --- NEW: Import Endpoint ---
+@app.post("/api/import")
+async def import_sessions(sessions: List[dict]):
+    count = 0
+    try:
+        async with aiosqlite.connect(db_path) as db:
+            for session in sessions:
+                # Use .get() to handle potential missing fields from older versions or edits
+                await db.execute("""
+                    INSERT INTO focus_sessions (start_time, end_time, duration_seconds, focus_score, total_frames, focused_frames, created_at)
+                    VALUES (?, ?, ?, ?, ?, ?, ?)
+                """, (
+                    session.get('start_time'),
+                    session.get('end_time'),
+                    session.get('duration_seconds', 0),
+                    session.get('focus_score', 0.0),
+                    session.get('total_frames', 0),
+                    session.get('focused_frames', 0),
+                    session.get('created_at', session.get('start_time'))
+                ))
+                count += 1
+            await db.commit()
+        return {"status": "success", "count": count}
+    except Exception as e:
+        print(f"Import Error: {e}")
+        return {"status": "error", "message": str(e)}
+# --- NEW: Clear History Endpoint ---
+@app.delete("/api/history")
+async def clear_history():
+    try:
+        async with aiosqlite.connect(db_path) as db:
+            # Delete events first (foreign key good practice)
+            await db.execute("DELETE FROM focus_events")
+            await db.execute("DELETE FROM focus_sessions")
+            await db.commit()
+        return {"status": "success", "message": "History cleared"}
+    except Exception as e:
+        return {"status": "error", "message": str(e)}
+@app.get("/api/sessions/{session_id}")
+async def get_session(session_id: int):
+    async with aiosqlite.connect(db_path) as db:
+        db.row_factory = aiosqlite.Row
+        cursor = await db.execute("SELECT * FROM focus_sessions WHERE id = ?", (session_id,))
+        row = await cursor.fetchone()
+        if not row: raise HTTPException(status_code=404, detail="Session not found")
+        session = dict(row)
+        cursor = await db.execute("SELECT * FROM focus_events WHERE session_id = ? ORDER BY timestamp", (session_id,))
+        events = [dict(r) for r in await cursor.fetchall()]
+        session['events'] = events
+        return session
+@app.get("/api/settings")
+async def get_settings():
+    async with aiosqlite.connect(db_path) as db:
+        db.row_factory = aiosqlite.Row
+        cursor = await db.execute("SELECT * FROM user_settings WHERE id = 1")
+        row = await cursor.fetchone()
+        if row: return dict(row)
+        else: return {'sensitivity': 6, 'notification_enabled': True, 'notification_threshold': 30, 'frame_rate': 30, 'model_name': 'mlp'}
+@app.put("/api/settings")
+async def update_settings(settings: SettingsUpdate):
+    async with aiosqlite.connect(db_path) as db:
+        cursor = await db.execute("SELECT id FROM user_settings WHERE id = 1")
+        exists = await cursor.fetchone()
+        if not exists:
+            await db.execute("INSERT INTO user_settings (id, sensitivity) VALUES (1, 6)")
+            await db.commit()
+        updates = []
+        params = []
+        if settings.sensitivity is not None:
+            updates.append("sensitivity = ?")
+            params.append(max(1, min(10, settings.sensitivity)))
+        if settings.notification_enabled is not None:
+            updates.append("notification_enabled = ?")
+            params.append(settings.notification_enabled)
+        if settings.notification_threshold is not None:
+            updates.append("notification_threshold = ?")
+            params.append(max(5, min(300, settings.notification_threshold)))
+        if settings.frame_rate is not None:
+            updates.append("frame_rate = ?")
+            params.append(max(5, min(60, settings.frame_rate)))
+        if settings.model_name is not None and settings.model_name in pipelines and pipelines[settings.model_name] is not None:
+            updates.append("model_name = ?")
+            params.append(settings.model_name)
+            global _cached_model_name
+            _cached_model_name = settings.model_name
+        if updates:
+            query = f"UPDATE user_settings SET {', '.join(updates)} WHERE id = 1"
+            await db.execute(query, params)
+            await db.commit()
+        return {"status": "success", "updated": len(updates) > 0}
+@app.get("/api/stats/summary")
+async def get_stats_summary():
+    async with aiosqlite.connect(db_path) as db:
+        cursor = await db.execute("SELECT COUNT(*) FROM focus_sessions WHERE end_time IS NOT NULL")
+        total_sessions = (await cursor.fetchone())[0]
+        cursor = await db.execute("SELECT SUM(duration_seconds) FROM focus_sessions WHERE end_time IS NOT NULL")
+        total_focus_time = (await cursor.fetchone())[0] or 0
+        cursor = await db.execute("SELECT AVG(focus_score) FROM focus_sessions WHERE end_time IS NOT NULL")
+        avg_focus_score = (await cursor.fetchone())[0] or 0.0
+        cursor = await db.execute("SELECT DISTINCT DATE(start_time) as session_date FROM focus_sessions WHERE end_time IS NOT NULL ORDER BY session_date DESC")
+        dates = [row[0] for row in await cursor.fetchall()]
+        streak_days = 0
+        if dates:
+            current_date = datetime.now().date()
+            for i, date_str in enumerate(dates):
+                session_date = datetime.fromisoformat(date_str).date()
+                expected_date = current_date - timedelta(days=i)
+                if session_date == expected_date: streak_days += 1
+                else: break
+        return {
+            'total_sessions': total_sessions,
+            'total_focus_time': int(total_focus_time),
+            'avg_focus_score': round(avg_focus_score, 3),
+            'streak_days': streak_days
+        }
+@app.get("/api/models")
+async def get_available_models():
+    """Return list of loaded model names and which is currently active."""
+    available = [name for name, p in pipelines.items() if p is not None]
+    async with aiosqlite.connect(db_path) as db:
+        cursor = await db.execute("SELECT model_name FROM user_settings WHERE id = 1")
+        row = await cursor.fetchone()
+        current = row[0] if row else "mlp"
+        if current not in available and available:
+            current = available[0]
+    return {"available": available, "current": current}
+@app.get("/api/mesh-topology")
+async def get_mesh_topology():
+    """Return tessellation edge pairs for client-side face mesh drawing (cached by client)."""
+    return {"tessellation": _TESSELATION_CONNS}
+@app.get("/health")
+async def health_check():
+    available = [name for name, p in pipelines.items() if p is not None]
+    return {"status": "healthy", "models_loaded": available, "database": os.path.exists(db_path)}
+# ================ STATIC FILES (SPA SUPPORT) ================
+# Resolve static dir from this file so it works regardless of cwd
+_STATIC_DIR = Path(__file__).resolve().parent / "static"
+_ASSETS_DIR = _STATIC_DIR / "assets"
+# 1. Mount the assets folder (JS/CSS) first so /assets/* is never caught by catch-all
+if _ASSETS_DIR.is_dir():
+    app.mount("/assets", StaticFiles(directory=str(_ASSETS_DIR)), name="assets")
+# 2. Catch-all for SPA: serve index.html for app routes, never for /assets (would break JS MIME type)
+@app.get("/{full_path:path}")
+async def serve_react_app(full_path: str, request: Request):
+    if full_path.startswith("api") or full_path.startswith("ws"):
+        raise HTTPException(status_code=404, detail="Not Found")
+    # Don't serve HTML for asset paths; let them 404 so we don't break module script loading
+    if full_path.startswith("assets") or full_path.startswith("assets/"):
+        raise HTTPException(status_code=404, detail="Not Found")
+    index_path = _STATIC_DIR / "index.html"
+    if index_path.is_file():
+        return FileResponse(str(index_path))
+    return {"message": "React app not found. Please run 'npm run build' and copy dist to static."}

models/README.md CHANGED Viewed

@@ -1,10 +1,53 @@
-# models
-- **cnn/eye_attention/** — YOLO open/closed eye classifier, crop helper, train stub
-- **mlp/** — PyTorch MLP on feature vectors (face_orientation / eye_behaviour); checkpoints under `mlp/face_orientation_model/`, `mlp/eye_behaviour_model/`
-- **geometric/face_orientation/** — head pose (solvePnP). **geometric/eye_behaviour/** — EAR, gaze, MAR
-- **pretrained/face_mesh/** — MediaPipe face landmarks (no training)
-- **attention/** — webcam feature collection (17-d), stubs for train/classifier/fusion
-- **prepare_dataset.py** — loads from `data_preparation/processed/` or synthetic; used by `mlp/train.py`
-Run legacy MLP training: `python -m models.mlp.train`. The sklearn MLP used in the live demo is trained in `data_preparation/MLP/train_mlp.ipynb` and saved under `../MLP/models/`.

+# models/
+Feature extraction modules and model training scripts.
+## 1. Feature Extraction
+Root-level modules form the real-time inference pipeline:
+| Module | Input | Output |
+|--------|-------|--------|
+| `face_mesh.py` | BGR frame | 478 MediaPipe landmarks |
+| `head_pose.py` | Landmarks, frame size | yaw, pitch, roll, face/eye score, gaze offset, head deviation |
+| `eye_scorer.py` | Landmarks | EAR (left/right/avg), gaze ratio (h/v), MAR |
+| `eye_crop.py` | Landmarks, frame | Cropped eye region images |
+| `eye_classifier.py` | Eye crops or landmarks | Eye open/closed prediction (geometric fallback) |
+| `collect_features.py` | BGR frame | 17-d feature vector + temporal features (PERCLOS, blink rate, etc.) |
+## 2. Training Scripts
+| Folder | Model | Command |
+|--------|-------|---------|
+| `mlp/` | PyTorch MLP (64→32, 2-class) | `python -m models.mlp.train` |
+| `xgboost/` | XGBoost (600 trees, depth 8) | `python -m models.xgboost.train` |
+### mlp/
+- `train.py` — training loop with early stopping, ClearML opt-in
+- `sweep.py` — hyperparameter search (Optuna: lr, batch_size)
+- `eval_accuracy.py` — load checkpoint and print test metrics
+- Saves to **`checkpoints/mlp_best.pt`**
+### xgboost/
+- `train.py` — training with eval-set logging
+- `sweep.py` / `sweep_local.py` — hyperparameter search (Optuna + ClearML)
+- `eval_accuracy.py` — load checkpoint and print test metrics
+- Saves to **`checkpoints/xgboost_face_orientation_best.json`**
+## 3. Data Loading
+All training scripts import from `data_preparation.prepare_dataset`:
+```python
+from data_preparation.prepare_dataset import get_numpy_splits   # XGBoost
+from data_preparation.prepare_dataset import get_dataloaders     # MLP (PyTorch)
+```
+## 4. Results
+| Model | Test Accuracy | F1 | ROC-AUC |
+|-------|--------------|-----|---------|
+| XGBoost | 95.87% | 0.959 | 0.991 |
+| MLP | 92.92% | 0.929 | 0.971 |

models/{attention/__init__.py → __init__.py} RENAMED Viewed

File without changes

models/attention/classifier.py DELETED Viewed

File without changes

models/attention/fusion.py DELETED Viewed

File without changes

models/attention/train.py DELETED Viewed

File without changes

models/cnn/notebooks/EyeCNN.ipynb ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.optim as optim\n",
+        "from torch.utils.data import DataLoader\n",
+        "from torchvision import datasets, transforms\n",
+        "\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "!cp -r /content/drive/MyDrive/Dataset_clean /content/\n",
+        "\n",
+        "#Verify structure\n",
+        "for split in ['train', 'val', 'test']:\n",
+        "    path = f'/content/Dataset_clean/{split}'\n",
+        "    classes = os.listdir(path)\n",
+        "    total = sum(len(os.listdir(os.path.join(path, c))) for c in classes)\n",
+        "    print(f'{split}: {total} images | classes: {classes}')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "sE1F3em-V5go",
+        "outputId": "2c73a9a6-a198-468c-a2cc-253b2de7cc3f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "nG2bh66rQ56G"
+      },
+      "outputs": [],
+      "source": [
+        "class EyeCNN(nn.Module):\n",
+        "    def __init__(self, num_classes=2):\n",
+        "        super(EyeCNN, self).__init__()\n",
+        "        self.conv_layers = nn.Sequential(\n",
+        "            nn.Conv2d(3, 32, 3, 1, 1),\n",
+        "            nn.BatchNorm2d(32),\n",
+        "            nn.ReLU(),\n",
+        "            nn.MaxPool2d(2, 2),\n",
+        "\n",
+        "            nn.Conv2d(32, 64, 3, 1, 1),\n",
+        "            nn.BatchNorm2d(64),\n",
+        "            nn.ReLU(),\n",
+        "            nn.MaxPool2d(2, 2),\n",
+        "\n",
+        "            nn.Conv2d(64, 128, 3, 1, 1),\n",
+        "            nn.BatchNorm2d(128),\n",
+        "            nn.ReLU(),\n",
+        "            nn.MaxPool2d(2, 2),\n",
+        "\n",
+        "            nn.Conv2d(128, 256, 3, 1, 1),\n",
+        "            nn.BatchNorm2d(256),\n",
+        "            nn.ReLU(),\n",
+        "            nn.MaxPool2d(2, 2)\n",
+        "        )\n",
+        "\n",
+        "        self.fc_layers = nn.Sequential(\n",
+        "            nn.AdaptiveAvgPool2d((1, 1)),\n",
+        "            nn.Flatten(),\n",
+        "            nn.Linear(256, 512),\n",
+        "            nn.ReLU(),\n",
+        "            nn.Dropout(0.35),\n",
+        "            nn.Linear(512, num_classes)\n",
+        "        )\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        x = self.conv_layers(x)\n",
+        "        x = self.fc_layers(x)\n",
+        "        return x"
+      ]
+    }
+  ]
+}

models/cnn/notebooks/EyeCNN_Train_Evaluate_new.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models/cnn/notebooks/EyeCNN_Training_Evaluate.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

models/cnn/notebooks/README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ # GAP Large Project

models/{attention/collect_features.py → collect_features.py} RENAMED Viewed

@@ -1,4 +1,3 @@
-# Usage: python -m models.attention.collect_features [--name alice] [--duration 600]
 import argparse
 import collections
@@ -10,13 +9,13 @@ import time
 import cv2
 import numpy as np
-_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 if _PROJECT_ROOT not in sys.path:
     sys.path.insert(0, _PROJECT_ROOT)
-from models.pretrained.face_mesh.face_mesh import FaceMeshDetector
-from models.geometric.face_orientation.head_pose import HeadPoseEstimator
-from models.geometric.eye_behaviour.eye_scorer import EyeBehaviourScorer, compute_gaze_ratio, compute_mar
 FONT = cv2.FONT_HERSHEY_SIMPLEX
 GREEN = (0, 255, 0)
@@ -38,7 +37,7 @@ assert NUM_FEATURES == 17
 class TemporalTracker:
     EAR_BLINK_THRESH = 0.21
-    MAR_YAWN_THRESH = 0.04
     PERCLOS_WINDOW = 60
     BLINK_WINDOW_SEC = 30.0
@@ -86,25 +85,35 @@ class TemporalTracker:
         return perclos, blink_rate, closure_dur, yawn_dur
-def extract_features(landmarks, w, h, head_pose, eye_scorer, temporal):
-    from models.geometric.eye_behaviour.eye_scorer import _LEFT_EYE_EAR, _RIGHT_EYE_EAR, compute_ear
-    ear_left = compute_ear(landmarks, _LEFT_EYE_EAR)
-    ear_right = compute_ear(landmarks, _RIGHT_EYE_EAR)
     ear_avg = (ear_left + ear_right) / 2.0
-    h_gaze, v_gaze = compute_gaze_ratio(landmarks)
-    mar = compute_mar(landmarks)
-    angles = head_pose.estimate(landmarks, w, h)
     yaw = angles[0] if angles else 0.0
     pitch = angles[1] if angles else 0.0
     roll = angles[2] if angles else 0.0
-    s_face = head_pose.score(landmarks, w, h)
-    s_eye = eye_scorer.score(landmarks)
     gaze_offset = math.sqrt((h_gaze - 0.5) ** 2 + (v_gaze - 0.5) ** 2)
-    head_deviation = math.sqrt(yaw ** 2 + pitch ** 2)
     perclos, blink_rate, closure_dur, yawn_dur = temporal.update(ear_avg, mar)
@@ -181,7 +190,7 @@ def main():
     parser.add_argument("--duration", type=int, default=600,
                         help="Max recording time (seconds, default 10 min)")
     parser.add_argument("--output-dir", type=str,
-                        default=os.path.join(_PROJECT_ROOT, "data_preparation", "collected"),
                         help="Where to save .npz files")
     args = parser.parse_args()
@@ -238,13 +247,11 @@ def main():
             landmarks = detector.process(frame)
             face_ok = landmarks is not None
-            # record if labeling + face visible
             if face_ok and label is not None:
                 vec = extract_features(landmarks, w, h, head_pose, eye_scorer, temporal)
                 features_list.append(vec)
                 labels_list.append(label)
-                # count transitions
                 if prev_label is not None and label != prev_label:
                     transitions += 1
                 prev_label = label

 import argparse
 import collections
 import cv2
 import numpy as np
+_PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 if _PROJECT_ROOT not in sys.path:
     sys.path.insert(0, _PROJECT_ROOT)
+from models.face_mesh import FaceMeshDetector
+from models.head_pose import HeadPoseEstimator
+from models.eye_scorer import EyeBehaviourScorer, compute_gaze_ratio, compute_mar
 FONT = cv2.FONT_HERSHEY_SIMPLEX
 GREEN = (0, 255, 0)
 class TemporalTracker:
     EAR_BLINK_THRESH = 0.21
+    MAR_YAWN_THRESH = 0.55
     PERCLOS_WINDOW = 60
     BLINK_WINDOW_SEC = 30.0
         return perclos, blink_rate, closure_dur, yawn_dur
+def extract_features(landmarks, w, h, head_pose, eye_scorer, temporal,
+                     *, _pre=None):
+    from models.eye_scorer import _LEFT_EYE_EAR, _RIGHT_EYE_EAR, compute_ear
+    p = _pre or {}
+    ear_left = p.get("ear_left", compute_ear(landmarks, _LEFT_EYE_EAR))
+    ear_right = p.get("ear_right", compute_ear(landmarks, _RIGHT_EYE_EAR))
     ear_avg = (ear_left + ear_right) / 2.0
+    if "h_gaze" in p and "v_gaze" in p:
+        h_gaze, v_gaze = p["h_gaze"], p["v_gaze"]
+    else:
+        h_gaze, v_gaze = compute_gaze_ratio(landmarks)
+    mar = p.get("mar", compute_mar(landmarks))
+    angles = p.get("angles")
+    if angles is None:
+        angles = head_pose.estimate(landmarks, w, h)
     yaw = angles[0] if angles else 0.0
     pitch = angles[1] if angles else 0.0
     roll = angles[2] if angles else 0.0
+    s_face = p.get("s_face", head_pose.score(landmarks, w, h))
+    s_eye = p.get("s_eye", eye_scorer.score(landmarks))
     gaze_offset = math.sqrt((h_gaze - 0.5) ** 2 + (v_gaze - 0.5) ** 2)
+    head_deviation = math.sqrt(yaw ** 2 + pitch ** 2)  # cleaned downstream
     perclos, blink_rate, closure_dur, yawn_dur = temporal.update(ear_avg, mar)
     parser.add_argument("--duration", type=int, default=600,
                         help="Max recording time (seconds, default 10 min)")
     parser.add_argument("--output-dir", type=str,
+                        default=os.path.join(_PROJECT_ROOT, "data", "collected_data"),
                         help="Where to save .npz files")
     args = parser.parse_args()
             landmarks = detector.process(frame)
             face_ok = landmarks is not None
             if face_ok and label is not None:
                 vec = extract_features(landmarks, w, h, head_pose, eye_scorer, temporal)
                 features_list.append(vec)
                 labels_list.append(label)
                 if prev_label is not None and label != prev_label:
                     transitions += 1
                 prev_label = label

models/eye_classifier.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from __future__ import annotations
+from abc import ABC, abstractmethod
+import numpy as np
+class EyeClassifier(ABC):
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        pass
+    @abstractmethod
+    def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
+        pass
+class GeometricOnlyClassifier(EyeClassifier):
+    @property
+    def name(self) -> str:
+        return "geometric"
+    def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
+        return 1.0
+class YOLOv11Classifier(EyeClassifier):
+    def __init__(self, checkpoint_path: str, device: str = "cpu"):
+        from ultralytics import YOLO
+        self._model = YOLO(checkpoint_path)
+        self._device = device
+        names = self._model.names
+        self._attentive_idx = None
+        for idx, cls_name in names.items():
+            if cls_name in ("open", "attentive"):
+                self._attentive_idx = idx
+                break
+        if self._attentive_idx is None:
+            self._attentive_idx = max(names.keys())
+        print(f"[YOLO] Classes: {names}, attentive_idx={self._attentive_idx}")
+    @property
+    def name(self) -> str:
+        return "yolo"
+    def predict_score(self, crops_bgr: list[np.ndarray]) -> float:
+        if not crops_bgr:
+            return 1.0
+        results = self._model.predict(crops_bgr, device=self._device, verbose=False)
+        scores = [float(r.probs.data[self._attentive_idx]) for r in results]
+        return sum(scores) / len(scores) if scores else 1.0
+def load_eye_classifier(
+    path: str | None = None,
+    backend: str = "yolo",
+    device: str = "cpu",
+) -> EyeClassifier:
+    if path is None or backend == "geometric":
+        return GeometricOnlyClassifier()
+    try:
+        return YOLOv11Classifier(path, device=device)
+    except ImportError:
+        print("[CLASSIFIER] ultralytics required for YOLO. pip install ultralytics")
+        raise

models/eye_crop.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import cv2
+import numpy as np
+from models.face_mesh import FaceMeshDetector
+LEFT_EYE_CONTOUR = FaceMeshDetector.LEFT_EYE_INDICES
+RIGHT_EYE_CONTOUR = FaceMeshDetector.RIGHT_EYE_INDICES
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD = (0.229, 0.224, 0.225)
+CROP_SIZE = 96
+def _bbox_from_landmarks(
+    landmarks: np.ndarray,
+    indices: list[int],
+    frame_w: int,
+    frame_h: int,
+    expand: float = 0.4,
+) -> tuple[int, int, int, int]:
+    pts = landmarks[indices, :2]
+    px = pts[:, 0] * frame_w
+    py = pts[:, 1] * frame_h
+    x_min, x_max = px.min(), px.max()
+    y_min, y_max = py.min(), py.max()
+    w = x_max - x_min
+    h = y_max - y_min
+    cx = (x_min + x_max) / 2
+    cy = (y_min + y_max) / 2
+    size = max(w, h) * (1 + expand)
+    half = size / 2
+    x1 = int(max(cx - half, 0))
+    y1 = int(max(cy - half, 0))
+    x2 = int(min(cx + half, frame_w))
+    y2 = int(min(cy + half, frame_h))
+    return x1, y1, x2, y2
+def extract_eye_crops(
+    frame: np.ndarray,
+    landmarks: np.ndarray,
+    expand: float = 0.4,
+    crop_size: int = CROP_SIZE,
+) -> tuple[np.ndarray, np.ndarray, tuple, tuple]:
+    h, w = frame.shape[:2]
+    left_bbox = _bbox_from_landmarks(landmarks, LEFT_EYE_CONTOUR, w, h, expand)
+    right_bbox = _bbox_from_landmarks(landmarks, RIGHT_EYE_CONTOUR, w, h, expand)
+    left_crop = frame[left_bbox[1] : left_bbox[3], left_bbox[0] : left_bbox[2]]
+    right_crop = frame[right_bbox[1] : right_bbox[3], right_bbox[0] : right_bbox[2]]
+    if left_crop.size == 0:
+        left_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
+    else:
+        left_crop = cv2.resize(left_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
+    if right_crop.size == 0:
+        right_crop = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
+    else:
+        right_crop = cv2.resize(right_crop, (crop_size, crop_size), interpolation=cv2.INTER_AREA)
+    return left_crop, right_crop, left_bbox, right_bbox
+def crop_to_tensor(crop_bgr: np.ndarray):
+    import torch
+    rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
+    for c in range(3):
+        rgb[:, :, c] = (rgb[:, :, c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]
+    return torch.from_numpy(rgb.transpose(2, 0, 1))

models/{geometric/eye_behaviour/eye_scorer.py → eye_scorer.py} RENAMED Viewed

@@ -95,7 +95,6 @@ def compute_gaze_ratio(landmarks: np.ndarray) -> tuple[float, float]:
 def compute_mar(landmarks: np.ndarray) -> float:
-    # Mouth aspect ratio: high = mouth open (yawning / sleepy)
     top = landmarks[_MOUTH_TOP, :2]
     bottom = landmarks[_MOUTH_BOTTOM, :2]
     left = landmarks[_MOUTH_LEFT, :2]
@@ -140,7 +139,10 @@ class EyeBehaviourScorer:
         return 0.5 * (1.0 + math.cos(math.pi * t))
     def score(self, landmarks: np.ndarray) -> float:
-        ear = compute_avg_ear(landmarks)
         ear_s = self._ear_score(ear)
         if ear_s < 0.3:
             return ear_s
@@ -149,7 +151,9 @@ class EyeBehaviourScorer:
         return ear_s * gaze_s
     def detailed_score(self, landmarks: np.ndarray) -> dict:
-        ear = compute_avg_ear(landmarks)
         ear_s = self._ear_score(ear)
         h_ratio, v_ratio = compute_gaze_ratio(landmarks)
         gaze_s = self._gaze_score(h_ratio, v_ratio)

 def compute_mar(landmarks: np.ndarray) -> float:
     top = landmarks[_MOUTH_TOP, :2]
     bottom = landmarks[_MOUTH_BOTTOM, :2]
     left = landmarks[_MOUTH_LEFT, :2]
         return 0.5 * (1.0 + math.cos(math.pi * t))
     def score(self, landmarks: np.ndarray) -> float:
+        left_ear = compute_ear(landmarks, _LEFT_EYE_EAR)
+        right_ear = compute_ear(landmarks, _RIGHT_EYE_EAR)
+        # Use minimum EAR so closing ONE eye is enough to drop the score
+        ear = min(left_ear, right_ear)
         ear_s = self._ear_score(ear)
         if ear_s < 0.3:
             return ear_s
         return ear_s * gaze_s
     def detailed_score(self, landmarks: np.ndarray) -> dict:
+        left_ear = compute_ear(landmarks, _LEFT_EYE_EAR)
+        right_ear = compute_ear(landmarks, _RIGHT_EYE_EAR)
+        ear = min(left_ear, right_ear)
         ear_s = self._ear_score(ear)
         h_ratio, v_ratio = compute_gaze_ratio(landmarks)
         gaze_s = self._gaze_score(h_ratio, v_ratio)

models/{pretrained/face_mesh/face_mesh.py → face_mesh.py} RENAMED Viewed

@@ -1,4 +1,5 @@
 import os
 from pathlib import Path
 from urllib.request import urlretrieve
@@ -51,14 +52,16 @@ class FaceMeshDetector:
             running_mode=RunningMode.VIDEO,
         )
         self._landmarker = FaceLandmarker.create_from_options(options)
-        self._frame_ts = 0  # ms, for video API
     def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
         # BGR in -> (478,3) norm x,y,z or None
         rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
         mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
-        self._frame_ts += 33  # ~30fps
-        result = self._landmarker.detect_for_video(mp_image, self._frame_ts)
         if not result.face_landmarks:
             return None

 import os
+import time
 from pathlib import Path
 from urllib.request import urlretrieve
             running_mode=RunningMode.VIDEO,
         )
         self._landmarker = FaceLandmarker.create_from_options(options)
+        self._t0 = time.monotonic()
+        self._last_ts = 0
     def process(self, bgr_frame: np.ndarray) -> np.ndarray | None:
         # BGR in -> (478,3) norm x,y,z or None
         rgb = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
         mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
+        ts = max(int((time.monotonic() - self._t0) * 1000), self._last_ts + 1)
+        self._last_ts = ts
+        result = self._landmarker.detect_for_video(mp_image, ts)
         if not result.face_landmarks:
             return None

models/geometric/eye_behaviour/__init__.py DELETED Viewed

File without changes

models/geometric/face_orientation/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	-

models/{geometric/face_orientation/head_pose.py → head_pose.py} RENAMED Viewed

@@ -25,6 +25,8 @@ class HeadPoseEstimator:
         self._camera_matrix = None
         self._frame_size = None
         self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
     def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
         if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
@@ -39,6 +41,10 @@ class HeadPoseEstimator:
         return self._camera_matrix
     def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
         image_points = np.array(
             [
                 [landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
@@ -54,7 +60,10 @@ class HeadPoseEstimator:
             self._dist_coeffs,
             flags=cv2.SOLVEPNP_ITERATIVE,
         )
-        return success, rvec, tvec, image_points
     def estimate(
         self, landmarks: np.ndarray, frame_w: int, frame_h: int

         self._camera_matrix = None
         self._frame_size = None
         self._dist_coeffs = np.zeros((4, 1), dtype=np.float64)
+        self._cache_key = None
+        self._cache_result = None
     def _get_camera_matrix(self, frame_w: int, frame_h: int) -> np.ndarray:
         if self._camera_matrix is not None and self._frame_size == (frame_w, frame_h):
         return self._camera_matrix
     def _solve(self, landmarks: np.ndarray, frame_w: int, frame_h: int):
+        key = (landmarks.data.tobytes(), frame_w, frame_h)
+        if self._cache_key == key:
+            return self._cache_result
         image_points = np.array(
             [
                 [landmarks[i, 0] * frame_w, landmarks[i, 1] * frame_h]
             self._dist_coeffs,
             flags=cv2.SOLVEPNP_ITERATIVE,
         )
+        result = (success, rvec, tvec, image_points)
+        self._cache_key = key
+        self._cache_result = result
+        return result
     def estimate(
         self, landmarks: np.ndarray, frame_w: int, frame_h: int