File size: 22,028 Bytes
7b53d75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
# LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
# ClearML logging: set USE_CLEARML=1 env var or pass --clearml flag

import glob
import os
import sys

import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, roc_auc_score, f1_score
from xgboost import XGBClassifier

_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, _PROJECT_ROOT)

from data_preparation.prepare_dataset import load_per_person, SELECTED_FEATURES

PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
SEED = 42

# ClearML 
# start logging with: USE_CLEARML=1 python -m evaluation.justify_thresholds or: python -m evaluation.justify_thresholds --clearml
_USE_CLEARML = os.environ.get("USE_CLEARML", "0") == "1" or "--clearml" in sys.argv

_task = None
_logger = None

if _USE_CLEARML:
    try:
        from clearml import Task
        _task = Task.init(
            project_name="Focus Guard",
            task_name="Threshold Justification",
            tags=["evaluation", "thresholds"],
        )
        _task.connect({"SEED": SEED, "n_participants": 9})
        _logger = _task.get_logger()
        print("ClearML enabled — logging to project 'Focus Guard'")
    except ImportError:
        print("WARNING: ClearML not installed. Continuing without logging.")
        _USE_CLEARML = False

def _youdens_j(y_true, y_prob):
    fpr, tpr, thresholds = roc_curve(y_true, y_prob)
    j = tpr - fpr
    idx = j.argmax()
    auc = roc_auc_score(y_true, y_prob)
    return float(thresholds[idx]), fpr, tpr, thresholds, float(auc)


def _f1_at_threshold(y_true, y_prob, threshold):
    return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)


def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path, clearml_title=None):
    fig, ax = plt.subplots(figsize=(6, 5))
    ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
    ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
            label=f"Youden's J optimum (t = {opt_thresh:.3f})")
    ax.plot([0, 1], [0, 1], "k--", lw=1, alpha=0.5)
    ax.set_xlabel("False Positive Rate")
    ax.set_ylabel("True Positive Rate")
    ax.set_title(title)
    ax.legend(loc="lower right")
    fig.tight_layout()

    # Log to ClearML before closing the figure
    if _logger and clearml_title:
        _logger.report_matplotlib_figure(
            title=clearml_title, series="ROC", figure=fig, iteration=0
        )

    fig.savefig(path, dpi=150)
    plt.close(fig)
    print(f"  saved {path}")


def run_lopo_models():
    print("\n=== LOPO: MLP and XGBoost ===")
    by_person, _, _ = load_per_person("face_orientation")
    persons = sorted(by_person.keys())

    results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}

    for i, held_out in enumerate(persons):
        X_test, y_test = by_person[held_out]

        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])

        scaler = StandardScaler().fit(train_X)
        X_tr_sc = scaler.transform(train_X)
        X_te_sc = scaler.transform(X_test)

        mlp = MLPClassifier(
            hidden_layer_sizes=(64, 32), activation="relu",
            max_iter=200, early_stopping=True, validation_fraction=0.15,
            random_state=SEED, verbose=False,
        )
        mlp.fit(X_tr_sc, train_y)
        mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
        results["mlp"]["y"].append(y_test)
        results["mlp"]["p"].append(mlp_prob)

        xgb = XGBClassifier(
            n_estimators=600, max_depth=8, learning_rate=0.05,
            subsample=0.8, colsample_bytree=0.8,
            reg_alpha=0.1, reg_lambda=1.0,
            use_label_encoder=False, eval_metric="logloss",
            random_state=SEED, verbosity=0,
        )
        xgb.fit(X_tr_sc, train_y)
        xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
        results["xgb"]["y"].append(y_test)
        results["xgb"]["p"].append(xgb_prob)

        print(f"  fold {i+1}/{len(persons)}: held out {held_out} "
              f"({X_test.shape[0]} samples)")

    for key in results:
        results[key]["y"] = np.concatenate(results[key]["y"])
        results[key]["p"] = np.concatenate(results[key]["p"])

    return results


def analyse_model_thresholds(results):
    print("\n=== Model threshold analysis ===")
    model_stats = {}

    for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
        y, p = results[name]["y"], results[name]["p"]
        opt_t, fpr, tpr, thresholds, auc = _youdens_j(y, p)
        j = tpr - fpr
        opt_idx = j.argmax()
        f1_opt = _f1_at_threshold(y, p, opt_t)
        f1_50 = _f1_at_threshold(y, p, 0.50)

        path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
        _plot_roc(fpr, tpr, auc, opt_t, opt_idx,
                  f"LOPO ROC — {label} (9 folds, 144k samples)", path,
                  clearml_title=f"ROC_{label}")

        model_stats[name] = {
            "label": label, "auc": auc,
            "opt_threshold": opt_t, "f1_opt": f1_opt, "f1_50": f1_50,
        }
        print(f"  {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
              f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")

        # Log scalars to ClearML 
        if _logger:
            _logger.report_single_value(f"{label} Optimal Threshold", opt_t)
            _logger.report_single_value(f"{label} AUC", auc)
            _logger.report_single_value(f"{label} F1 @ Optimal", f1_opt)
            _logger.report_single_value(f"{label} F1 @ 0.5", f1_50)

    return model_stats

def run_geo_weight_search():
    print("\n=== Geometric weight grid search ===")

    by_person, _, _ = load_per_person("face_orientation")
    persons = sorted(by_person.keys())
    features = SELECTED_FEATURES["face_orientation"]
    sf_idx = features.index("s_face")
    se_idx = features.index("s_eye")

    alphas = np.arange(0.2, 0.85, 0.1).round(1)
    alpha_f1 = {a: [] for a in alphas}

    for held_out in persons:
        X_test, y_test = by_person[held_out]
        sf = X_test[:, sf_idx]
        se = X_test[:, se_idx]

        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
        sf_tr = train_X[:, sf_idx]
        se_tr = train_X[:, se_idx]

        for a in alphas:
            score_tr = a * sf_tr + (1.0 - a) * se_tr
            opt_t, *_ = _youdens_j(train_y, score_tr)

            score_te = a * sf + (1.0 - a) * se
            f1 = _f1_at_threshold(y_test, score_te, opt_t)
            alpha_f1[a].append(f1)

    mean_f1 = {a: np.mean(f1s) for a, f1s in alpha_f1.items()}
    best_alpha = max(mean_f1, key=mean_f1.get)

    fig, ax = plt.subplots(figsize=(7, 4))
    ax.bar([f"{a:.1f}" for a in alphas],
           [mean_f1[a] for a in alphas], color="steelblue")
    ax.set_xlabel("Face weight (alpha); eye weight = 1 - alpha")
    ax.set_ylabel("Mean LOPO F1")
    ax.set_title("Geometric Pipeline: Face vs Eye Weight Search")
    ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
    for i, a in enumerate(alphas):
        ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
                ha="center", va="bottom", fontsize=8)
    fig.tight_layout()

    # Log to ClearML before closing
    if _logger:
        _logger.report_matplotlib_figure(
            title="Geo Weight Search", series="F1 vs Alpha", figure=fig, iteration=0
        )

    path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
    fig.savefig(path, dpi=150)
    plt.close(fig)
    print(f"  saved {path}")

    print(f"  Best alpha (face weight) = {best_alpha:.1f}, "
          f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")

    # Log scalars to ClearML 
    if _logger:
        _logger.report_single_value("Geo Best Alpha", best_alpha)
        for i, a in enumerate(sorted(alphas)):
            _logger.report_scalar(
                "Geo Weight Search", "Mean LOPO F1",
                iteration=i, value=mean_f1[a]
            )

    return dict(mean_f1), best_alpha


def run_hybrid_weight_search(lopo_results):
    print("\n=== Hybrid weight grid search ===")

    by_person, _, _ = load_per_person("face_orientation")
    persons = sorted(by_person.keys())
    features = SELECTED_FEATURES["face_orientation"]
    sf_idx = features.index("s_face")
    se_idx = features.index("s_eye")

    GEO_FACE_W = 0.7
    GEO_EYE_W = 0.3

    w_mlps = np.arange(0.3, 0.85, 0.1).round(1)
    wmf1 = {w: [] for w in w_mlps}
    mlp_p = lopo_results["mlp"]["p"]
    offset = 0
    for held_out in persons:
        X_test, y_test = by_person[held_out]
        n = X_test.shape[0]
        mlp_prob_fold = mlp_p[offset:offset + n]
        offset += n

        sf = X_test[:, sf_idx]
        se = X_test[:, se_idx]
        geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)

        train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
        train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
        sf_tr = train_X[:, sf_idx]
        se_tr = train_X[:, se_idx]
        geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)

        scaler = StandardScaler().fit(train_X)
        mlp_tr = MLPClassifier(
            hidden_layer_sizes=(64, 32), activation="relu",
            max_iter=200, early_stopping=True, validation_fraction=0.15,
            random_state=SEED, verbose=False,
        )
        mlp_tr.fit(scaler.transform(train_X), train_y)
        mlp_prob_tr = mlp_tr.predict_proba(scaler.transform(train_X))[:, 1]

        for w in w_mlps:
            combo_tr = w * mlp_prob_tr + (1.0 - w) * geo_tr
            opt_t, *_ = _youdens_j(train_y, combo_tr)

            combo_te = w * mlp_prob_fold + (1.0 - w) * geo_score
            f1 = _f1_at_threshold(y_test, combo_te, opt_t)
            wmf1[w].append(f1)

    mean_f1 = {w: np.mean(f1s) for w, f1s in wmf1.items()}
    best_w = max(mean_f1, key=mean_f1.get)

    fig, ax = plt.subplots(figsize=(7, 4))
    ax.bar([f"{w:.1f}" for w in w_mlps],
           [mean_f1[w] for w in w_mlps], color="darkorange")
    ax.set_xlabel("MLP weight (w_mlp); geo weight = 1 - w_mlp")
    ax.set_ylabel("Mean LOPO F1")
    ax.set_title("Hybrid Pipeline: MLP vs Geometric Weight Search")
    ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
    for i, w in enumerate(w_mlps):
        ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
                ha="center", va="bottom", fontsize=8)
    fig.tight_layout()

    # Log to ClearML before closing
    if _logger:
        _logger.report_matplotlib_figure(
            title="Hybrid Weight Search", series="F1 vs w_mlp", figure=fig, iteration=0
        )

    path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
    fig.savefig(path, dpi=150)
    plt.close(fig)
    print(f"  saved {path}")

    print(f"  Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")

    # Log scalars to ClearML 
    if _logger:
        _logger.report_single_value("Hybrid Best w_mlp", best_w)
        for i, w in enumerate(sorted(w_mlps)):
            _logger.report_scalar(
                "Hybrid Weight Search", "Mean LOPO F1",
                iteration=i, value=mean_f1[w]
            )

    return dict(mean_f1), best_w


def plot_distributions():
    print("\n=== EAR / MAR distributions ===")
    npz_files = sorted(glob.glob(os.path.join(_PROJECT_ROOT, "data", "collected_*", "*.npz")))

    all_ear_l, all_ear_r, all_mar, all_labels = [], [], [], []
    for f in npz_files:
        d = np.load(f, allow_pickle=True)
        names = list(d["feature_names"])
        feat = d["features"].astype(np.float32)
        lab = d["labels"].astype(np.int64)
        all_ear_l.append(feat[:, names.index("ear_left")])
        all_ear_r.append(feat[:, names.index("ear_right")])
        all_mar.append(feat[:, names.index("mar")])
        all_labels.append(lab)

    ear_l = np.concatenate(all_ear_l)
    ear_r = np.concatenate(all_ear_r)
    mar = np.concatenate(all_mar)
    labels = np.concatenate(all_labels)
    ear_min = np.minimum(ear_l, ear_r)
    ear_plot = np.clip(ear_min, 0, 0.85)
    mar_plot = np.clip(mar, 0, 1.5)

    # EAR distribution plot
    fig_ear, ax = plt.subplots(figsize=(7, 4))
    ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
    ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
    for val, lbl, c in [
        (0.16, "ear_closed = 0.16", "red"),
        (0.21, "EAR_BLINK = 0.21", "orange"),
        (0.30, "ear_open = 0.30", "green"),
    ]:
        ax.axvline(val, color=c, ls="--", lw=1.5, label=lbl)
    ax.set_xlabel("min(left_EAR, right_EAR)")
    ax.set_ylabel("Density")
    ax.set_title("EAR Distribution by Class (144k samples)")
    ax.legend(fontsize=8)
    fig_ear.tight_layout()

    # Log to ClearML before closing
    if _logger:
        _logger.report_matplotlib_figure(
            title="EAR Distribution", series="by class", figure=fig_ear, iteration=0
        )

    path = os.path.join(PLOTS_DIR, "ear_distribution.png")
    fig_ear.savefig(path, dpi=150)
    plt.close(fig_ear)
    print(f"  saved {path}")

    # MAR distribution plot 
    fig_mar, ax = plt.subplots(figsize=(7, 4))
    ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
    ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
    ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
    ax.set_xlabel("Mouth Aspect Ratio (MAR)")
    ax.set_ylabel("Density")
    ax.set_title("MAR Distribution by Class (144k samples)")
    ax.legend(fontsize=8)
    fig_mar.tight_layout()

    # Log to ClearML before closing
    if _logger:
        _logger.report_matplotlib_figure(
            title="MAR Distribution", series="by class", figure=fig_mar, iteration=0
        )

    path = os.path.join(PLOTS_DIR, "mar_distribution.png")
    fig_mar.savefig(path, dpi=150)
    plt.close(fig_mar)
    print(f"  saved {path}")

    closed_pct = np.mean(ear_min < 0.16) * 100
    blink_pct = np.mean(ear_min < 0.21) * 100
    open_pct = np.mean(ear_min >= 0.30) * 100
    yawn_pct = np.mean(mar > 0.55) * 100

    stats = {
        "ear_below_016": closed_pct,
        "ear_below_021": blink_pct,
        "ear_above_030": open_pct,
        "mar_above_055": yawn_pct,
        "n_samples": len(ear_min),
    }
    print(f"  EAR<0.16 (closed): {closed_pct:.1f}%  |  EAR<0.21 (blink): {blink_pct:.1f}%  |  "
          f"EAR>=0.30 (open): {open_pct:.1f}%")
    print(f"  MAR>0.55 (yawn): {yawn_pct:.1f}%")
    return stats


def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
    lines = []
    lines.append("# Threshold Justification Report")
    lines.append("")
    lines.append("Auto-generated by `evaluation/justify_thresholds.py` using LOPO cross-validation "
                 "over 9 participants (~145k samples).")
    lines.append("")

    lines.append("## 1. ML Model Decision Thresholds")
    lines.append("")
    lines.append("Thresholds selected via **Youden's J statistic** (J = sensitivity + specificity - 1) "
                 "on pooled LOPO held-out predictions.")
    lines.append("")
    lines.append("| Model | LOPO AUC | Optimal Threshold (Youden's J) | F1 @ Optimal | F1 @ 0.50 |")
    lines.append("|-------|----------|-------------------------------|--------------|-----------|")
    for key in ("mlp", "xgb"):
        s = model_stats[key]
        lines.append(f"| {s['label']} | {s['auc']:.4f} | **{s['opt_threshold']:.3f}** | "
                     f"{s['f1_opt']:.4f} | {s['f1_50']:.4f} |")
    lines.append("")
    lines.append("![MLP ROC](plots/roc_mlp.png)")
    lines.append("")
    lines.append("![XGBoost ROC](plots/roc_xgboost.png)")
    lines.append("")

    lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
    lines.append("")
    lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
                 "Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
    lines.append("")
    lines.append("| Face Weight (alpha) | Mean LOPO F1 |")
    lines.append("|--------------------:|-------------:|")
    for a in sorted(geo_f1.keys()):
        marker = " **<-- selected**" if a == best_alpha else ""
        lines.append(f"| {a:.1f} | {geo_f1[a]:.4f}{marker} |")
    lines.append("")
    lines.append(f"**Best:** alpha = {best_alpha:.1f} (face {best_alpha*100:.0f}%, "
                 f"eye {(1-best_alpha)*100:.0f}%)")
    lines.append("")
    lines.append("![Geometric weight search](plots/geo_weight_search.png)")
    lines.append("")

    lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
    lines.append("")
    lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
                 "Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
                 "If you change geometric weights, re-run this script — optimal w_mlp can shift.")
    lines.append("")
    lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
    lines.append("|-------------------:|-------------:|")
    for w in sorted(hybrid_f1.keys()):
        marker = " **<-- selected**" if w == best_w else ""
        lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
    lines.append("")
    lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
                 f"geometric {(1-best_w)*100:.0f}%)")
    lines.append("")
    lines.append("![Hybrid weight search](plots/hybrid_weight_search.png)")
    lines.append("")

    lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
    lines.append("")
    lines.append("### EAR (Eye Aspect Ratio)")
    lines.append("")
    lines.append("Reference: Soukupova & Cech, \"Real-Time Eye Blink Detection Using Facial "
                 "Landmarks\" (2016) established EAR ~ 0.2 as a blink threshold.")
    lines.append("")
    lines.append("Our thresholds define a linear interpolation zone around this established value:")
    lines.append("")
    lines.append("| Constant | Value | Justification |")
    lines.append("|----------|------:|---------------|")
    lines.append(f"| `ear_closed` | 0.16 | Below this, eyes are fully shut. "
                 f"{dist_stats['ear_below_016']:.1f}% of samples fall here. |")
    lines.append(f"| `EAR_BLINK_THRESH` | 0.21 | Blink detection point; close to the 0.2 reference. "
                 f"{dist_stats['ear_below_021']:.1f}% of samples below. |")
    lines.append(f"| `ear_open` | 0.30 | Above this, eyes are fully open. "
                 f"{dist_stats['ear_above_030']:.1f}% of samples here. |")
    lines.append("")
    lines.append("Between 0.16 and 0.30 the `_ear_score` function linearly interpolates from 0 to 1, "
                 "providing a smooth transition rather than a hard binary cutoff.")
    lines.append("")
    lines.append("![EAR distribution](plots/ear_distribution.png)")
    lines.append("")
    lines.append("### MAR (Mouth Aspect Ratio)")
    lines.append("")
    lines.append(f"| Constant | Value | Justification |")
    lines.append("|----------|------:|---------------|")
    lines.append(f"| `MAR_YAWN_THRESHOLD` | 0.55 | Only {dist_stats['mar_above_055']:.1f}% of "
                 f"samples exceed this, confirming it captures genuine yawns without false positives. |")
    lines.append("")
    lines.append("![MAR distribution](plots/mar_distribution.png)")
    lines.append("")

    lines.append("## 5. Other Constants")
    lines.append("")
    lines.append("| Constant | Value | Rationale |")
    lines.append("|----------|------:|-----------|")
    lines.append("| `gaze_max_offset` | 0.28 | Max iris displacement (normalised) before gaze score "
                 "drops to zero. Corresponds to ~56% of the eye width; beyond this the iris is at "
                 "the extreme edge. |")
    lines.append("| `max_angle` | 22.0 deg | Head deviation beyond which face score = 0. Based on "
                 "typical monitor-viewing cone: at 60 cm distance and a 24\" monitor, the viewing "
                 "angle is ~20-25 degrees. |")
    lines.append("| `roll_weight` | 0.5 | Roll is less indicative of inattention than yaw/pitch "
                 "(tilting head doesn't mean looking away), so it's down-weighted by 50%. |")
    lines.append("| `EMA alpha` | 0.3 | Smoothing factor for focus score. "
                 "Gives ~3-4 frame effective window; balances responsiveness vs flicker. |")
    lines.append("| `grace_frames` | 15 | ~0.5 s at 30 fps before penalising no-face. Allows brief "
                 "occlusions (e.g. hand gesture) without dropping score. |")
    lines.append("| `PERCLOS_WINDOW` | 60 frames | 2 s at 30 fps; standard PERCLOS measurement "
                 "window (Dinges & Grace, 1998). |")
    lines.append("| `BLINK_WINDOW_SEC` | 30 s | Blink rate measured over 30 s; typical spontaneous "
                 "blink rate is 15-20/min (Bentivoglio et al., 1997). |")
    lines.append("")

    with open(REPORT_PATH, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))
    print(f"\nReport written to {REPORT_PATH}")


def main():
    os.makedirs(PLOTS_DIR, exist_ok=True)

    lopo_results = run_lopo_models()
    model_stats = analyse_model_thresholds(lopo_results)
    geo_f1, best_alpha = run_geo_weight_search()
    hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
    dist_stats = plot_distributions()

    write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)

    # Close ClearML task
    if _task:
        _task.close()
        print("ClearML task closed.")

    print("\nDone.")


if __name__ == "__main__":
    main()