Spaces:
Running
Running
Commit ·
2849699
1
Parent(s): 54a8e70
Merge feature/clearml-thresholds
Browse files- evaluation/justify_thresholds.py +119 -537
evaluation/justify_thresholds.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
# LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
|
|
|
|
| 2 |
|
| 3 |
import glob
|
| 4 |
import os
|
|
@@ -8,19 +9,9 @@ import numpy as np
|
|
| 8 |
import matplotlib
|
| 9 |
matplotlib.use("Agg")
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
-
import joblib
|
| 12 |
-
from sklearn.linear_model import LogisticRegression
|
| 13 |
from sklearn.neural_network import MLPClassifier
|
| 14 |
from sklearn.preprocessing import StandardScaler
|
| 15 |
-
from sklearn.metrics import
|
| 16 |
-
roc_curve,
|
| 17 |
-
roc_auc_score,
|
| 18 |
-
f1_score,
|
| 19 |
-
precision_score,
|
| 20 |
-
recall_score,
|
| 21 |
-
accuracy_score,
|
| 22 |
-
confusion_matrix,
|
| 23 |
-
)
|
| 24 |
from xgboost import XGBClassifier
|
| 25 |
|
| 26 |
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
@@ -32,6 +23,27 @@ PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
|
|
| 32 |
REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
|
| 33 |
SEED = 42
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def _youdens_j(y_true, y_prob):
|
| 37 |
fpr, tpr, thresholds = roc_curve(y_true, y_prob)
|
|
@@ -45,7 +57,7 @@ def _f1_at_threshold(y_true, y_prob, threshold):
|
|
| 45 |
return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
|
| 46 |
|
| 47 |
|
| 48 |
-
def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path):
|
| 49 |
fig, ax = plt.subplots(figsize=(6, 5))
|
| 50 |
ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
|
| 51 |
ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
|
|
@@ -56,6 +68,13 @@ def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path):
|
|
| 56 |
ax.set_title(title)
|
| 57 |
ax.legend(loc="lower right")
|
| 58 |
fig.tight_layout()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
fig.savefig(path, dpi=150)
|
| 60 |
plt.close(fig)
|
| 61 |
print(f" saved {path}")
|
|
@@ -66,8 +85,7 @@ def run_lopo_models():
|
|
| 66 |
by_person, _, _ = load_per_person("face_orientation")
|
| 67 |
persons = sorted(by_person.keys())
|
| 68 |
|
| 69 |
-
results = {"mlp": {"y": [], "p": [], "
|
| 70 |
-
"xgb": {"y": [], "p": [], "y_folds": [], "p_folds": []}}
|
| 71 |
|
| 72 |
for i, held_out in enumerate(persons):
|
| 73 |
X_test, y_test = by_person[held_out]
|
|
@@ -88,28 +106,23 @@ def run_lopo_models():
|
|
| 88 |
mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
|
| 89 |
results["mlp"]["y"].append(y_test)
|
| 90 |
results["mlp"]["p"].append(mlp_prob)
|
| 91 |
-
results["mlp"]["y_folds"].append(y_test)
|
| 92 |
-
results["mlp"]["p_folds"].append(mlp_prob)
|
| 93 |
|
| 94 |
xgb = XGBClassifier(
|
| 95 |
n_estimators=600, max_depth=8, learning_rate=0.05,
|
| 96 |
subsample=0.8, colsample_bytree=0.8,
|
| 97 |
reg_alpha=0.1, reg_lambda=1.0,
|
| 98 |
-
eval_metric="logloss",
|
| 99 |
random_state=SEED, verbosity=0,
|
| 100 |
)
|
| 101 |
xgb.fit(X_tr_sc, train_y)
|
| 102 |
xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
|
| 103 |
results["xgb"]["y"].append(y_test)
|
| 104 |
results["xgb"]["p"].append(xgb_prob)
|
| 105 |
-
results["xgb"]["y_folds"].append(y_test)
|
| 106 |
-
results["xgb"]["p_folds"].append(xgb_prob)
|
| 107 |
|
| 108 |
print(f" fold {i+1}/{len(persons)}: held out {held_out} "
|
| 109 |
f"({X_test.shape[0]} samples)")
|
| 110 |
|
| 111 |
-
|
| 112 |
-
for key in ("mlp", "xgb"):
|
| 113 |
results[key]["y"] = np.concatenate(results[key]["y"])
|
| 114 |
results[key]["p"] = np.concatenate(results[key]["p"])
|
| 115 |
|
|
@@ -130,7 +143,8 @@ def analyse_model_thresholds(results):
|
|
| 130 |
|
| 131 |
path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
|
| 132 |
_plot_roc(fpr, tpr, auc, opt_t, opt_idx,
|
| 133 |
-
f"LOPO ROC — {label} (9 folds, 144k samples)", path
|
|
|
|
| 134 |
|
| 135 |
model_stats[name] = {
|
| 136 |
"label": label, "auc": auc,
|
|
@@ -139,131 +153,14 @@ def analyse_model_thresholds(results):
|
|
| 139 |
print(f" {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
|
| 140 |
f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
return 0.0
|
| 149 |
-
df = n - 1
|
| 150 |
-
t_975 = [0, 12.71, 4.30, 3.18, 2.78, 2.57, 2.45, 2.37, 2.31]
|
| 151 |
-
if df < len(t_975):
|
| 152 |
-
return float(t_975[df])
|
| 153 |
-
if df <= 30:
|
| 154 |
-
return 2.0 + (30 - df) / 100
|
| 155 |
-
return 1.96
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
def analyse_precision_recall_confusion(results, model_stats):
|
| 159 |
-
"""Precision/recall at optimal threshold, pooled confusion matrix, per-fold metrics, 95% CIs."""
|
| 160 |
-
print("\n=== Precision, recall, confusion matrix, per-person variance ===")
|
| 161 |
-
from sklearn.metrics import precision_recall_curve, average_precision_score
|
| 162 |
-
|
| 163 |
-
extended = {}
|
| 164 |
-
persons = results["persons"]
|
| 165 |
-
n_folds = len(persons)
|
| 166 |
-
|
| 167 |
-
for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
|
| 168 |
-
y_all = results[name]["y"]
|
| 169 |
-
p_all = results[name]["p"]
|
| 170 |
-
y_folds = results[name]["y_folds"]
|
| 171 |
-
p_folds = results[name]["p_folds"]
|
| 172 |
-
opt_t = model_stats[name]["opt_threshold"]
|
| 173 |
-
|
| 174 |
-
y_pred = (p_all >= opt_t).astype(int)
|
| 175 |
-
prec_pooled = precision_score(y_all, y_pred, zero_division=0)
|
| 176 |
-
rec_pooled = recall_score(y_all, y_pred, zero_division=0)
|
| 177 |
-
acc_pooled = accuracy_score(y_all, y_pred)
|
| 178 |
-
cm = confusion_matrix(y_all, y_pred)
|
| 179 |
-
if cm.shape == (2, 2):
|
| 180 |
-
tn, fp, fn, tp = cm.ravel()
|
| 181 |
-
else:
|
| 182 |
-
tn = fp = fn = tp = 0
|
| 183 |
-
|
| 184 |
-
prec_folds = []
|
| 185 |
-
rec_folds = []
|
| 186 |
-
acc_folds = []
|
| 187 |
-
f1_folds = []
|
| 188 |
-
per_person = []
|
| 189 |
-
for k, (y_f, p_f) in enumerate(zip(y_folds, p_folds)):
|
| 190 |
-
pred_f = (p_f >= opt_t).astype(int)
|
| 191 |
-
prec_f = precision_score(y_f, pred_f, zero_division=0)
|
| 192 |
-
rec_f = recall_score(y_f, pred_f, zero_division=0)
|
| 193 |
-
acc_f = accuracy_score(y_f, pred_f)
|
| 194 |
-
f1_f = f1_score(y_f, pred_f, zero_division=0)
|
| 195 |
-
prec_folds.append(prec_f)
|
| 196 |
-
rec_folds.append(rec_f)
|
| 197 |
-
acc_folds.append(acc_f)
|
| 198 |
-
f1_folds.append(f1_f)
|
| 199 |
-
per_person.append({
|
| 200 |
-
"person": persons[k],
|
| 201 |
-
"accuracy": acc_f,
|
| 202 |
-
"f1": f1_f,
|
| 203 |
-
"precision": prec_f,
|
| 204 |
-
"recall": rec_f,
|
| 205 |
-
})
|
| 206 |
-
|
| 207 |
-
t_mult = _ci_95_t(n_folds)
|
| 208 |
-
mean_acc = np.mean(acc_folds)
|
| 209 |
-
std_acc = np.std(acc_folds, ddof=1) if n_folds > 1 else 0.0
|
| 210 |
-
mean_f1 = np.mean(f1_folds)
|
| 211 |
-
std_f1 = np.std(f1_folds, ddof=1) if n_folds > 1 else 0.0
|
| 212 |
-
mean_prec = np.mean(prec_folds)
|
| 213 |
-
std_prec = np.std(prec_folds, ddof=1) if n_folds > 1 else 0.0
|
| 214 |
-
mean_rec = np.mean(rec_folds)
|
| 215 |
-
std_rec = np.std(rec_folds, ddof=1) if n_folds > 1 else 0.0
|
| 216 |
-
|
| 217 |
-
extended[name] = {
|
| 218 |
-
"label": label,
|
| 219 |
-
"opt_threshold": opt_t,
|
| 220 |
-
"precision_pooled": prec_pooled,
|
| 221 |
-
"recall_pooled": rec_pooled,
|
| 222 |
-
"accuracy_pooled": acc_pooled,
|
| 223 |
-
"confusion_matrix": cm,
|
| 224 |
-
"tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
|
| 225 |
-
"per_person": per_person,
|
| 226 |
-
"accuracy_mean": mean_acc, "accuracy_std": std_acc,
|
| 227 |
-
"accuracy_ci_half": t_mult * (std_acc / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
|
| 228 |
-
"f1_mean": mean_f1, "f1_std": std_f1,
|
| 229 |
-
"f1_ci_half": t_mult * (std_f1 / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
|
| 230 |
-
"precision_mean": mean_prec, "precision_std": std_prec,
|
| 231 |
-
"precision_ci_half": t_mult * (std_prec / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
|
| 232 |
-
"recall_mean": mean_rec, "recall_std": std_rec,
|
| 233 |
-
"recall_ci_half": t_mult * (std_rec / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
|
| 234 |
-
"n_folds": n_folds,
|
| 235 |
-
}
|
| 236 |
-
|
| 237 |
-
print(f" {label}: precision={prec_pooled:.4f}, recall={rec_pooled:.4f} | "
|
| 238 |
-
f"per-fold F1 mean={mean_f1:.4f} ± {std_f1:.4f} "
|
| 239 |
-
f"(95% CI [{mean_f1 - extended[name]['f1_ci_half']:.4f}, {mean_f1 + extended[name]['f1_ci_half']:.4f}])")
|
| 240 |
-
|
| 241 |
-
return extended
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
def plot_confusion_matrices(extended_stats):
|
| 245 |
-
"""Save confusion matrix heatmaps for MLP and XGBoost."""
|
| 246 |
-
for name in ("mlp", "xgb"):
|
| 247 |
-
s = extended_stats[name]
|
| 248 |
-
cm = s["confusion_matrix"]
|
| 249 |
-
fig, ax = plt.subplots(figsize=(4, 3))
|
| 250 |
-
im = ax.imshow(cm, cmap="Blues")
|
| 251 |
-
ax.set_xticks([0, 1])
|
| 252 |
-
ax.set_yticks([0, 1])
|
| 253 |
-
ax.set_xticklabels(["Pred 0", "Pred 1"])
|
| 254 |
-
ax.set_yticklabels(["True 0", "True 1"])
|
| 255 |
-
ax.set_ylabel("True label")
|
| 256 |
-
ax.set_xlabel("Predicted label")
|
| 257 |
-
for i in range(2):
|
| 258 |
-
for j in range(2):
|
| 259 |
-
ax.text(j, i, str(cm[i, j]), ha="center", va="center", color="white" if cm[i, j] > cm.max() / 2 else "black", fontweight="bold")
|
| 260 |
-
ax.set_title(f"LOPO {s['label']} @ t={s['opt_threshold']:.3f}")
|
| 261 |
-
fig.tight_layout()
|
| 262 |
-
path = os.path.join(PLOTS_DIR, f"confusion_matrix_{name}.png")
|
| 263 |
-
fig.savefig(path, dpi=150)
|
| 264 |
-
plt.close(fig)
|
| 265 |
-
print(f" saved {path}")
|
| 266 |
|
|
|
|
| 267 |
|
| 268 |
def run_geo_weight_search():
|
| 269 |
print("\n=== Geometric weight grid search ===")
|
|
@@ -309,6 +206,13 @@ def run_geo_weight_search():
|
|
| 309 |
ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
|
| 310 |
ha="center", va="bottom", fontsize=8)
|
| 311 |
fig.tight_layout()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
|
| 313 |
fig.savefig(path, dpi=150)
|
| 314 |
plt.close(fig)
|
|
@@ -316,6 +220,16 @@ def run_geo_weight_search():
|
|
| 316 |
|
| 317 |
print(f" Best alpha (face weight) = {best_alpha:.1f}, "
|
| 318 |
f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
return dict(mean_f1), best_alpha
|
| 320 |
|
| 321 |
|
|
@@ -382,198 +296,30 @@ def run_hybrid_weight_search(lopo_results):
|
|
| 382 |
ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
|
| 383 |
ha="center", va="bottom", fontsize=8)
|
| 384 |
fig.tight_layout()
|
| 385 |
-
path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
|
| 386 |
-
fig.savefig(path, dpi=150)
|
| 387 |
-
plt.close(fig)
|
| 388 |
-
print(f" saved {path}")
|
| 389 |
-
|
| 390 |
-
print(f" Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
|
| 391 |
-
return dict(mean_f1), best_w
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
def run_hybrid_xgb_weight_search(lopo_results):
|
| 395 |
-
"""Grid search: XGBoost prob + geometric. Same structure as MLP hybrid."""
|
| 396 |
-
print("\n=== Hybrid XGBoost weight grid search ===")
|
| 397 |
-
|
| 398 |
-
by_person, _, _ = load_per_person("face_orientation")
|
| 399 |
-
persons = sorted(by_person.keys())
|
| 400 |
-
features = SELECTED_FEATURES["face_orientation"]
|
| 401 |
-
sf_idx = features.index("s_face")
|
| 402 |
-
se_idx = features.index("s_eye")
|
| 403 |
-
|
| 404 |
-
GEO_FACE_W = 0.7
|
| 405 |
-
GEO_EYE_W = 0.3
|
| 406 |
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
for held_out in persons:
|
| 412 |
-
X_test, y_test = by_person[held_out]
|
| 413 |
-
n = X_test.shape[0]
|
| 414 |
-
xgb_prob_fold = xgb_p[offset : offset + n]
|
| 415 |
-
offset += n
|
| 416 |
-
|
| 417 |
-
sf = X_test[:, sf_idx]
|
| 418 |
-
se = X_test[:, se_idx]
|
| 419 |
-
geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
|
| 420 |
-
|
| 421 |
-
train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
|
| 422 |
-
train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
|
| 423 |
-
sf_tr = train_X[:, sf_idx]
|
| 424 |
-
se_tr = train_X[:, se_idx]
|
| 425 |
-
geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
|
| 426 |
-
|
| 427 |
-
scaler = StandardScaler().fit(train_X)
|
| 428 |
-
X_tr_sc = scaler.transform(train_X)
|
| 429 |
-
xgb_tr = XGBClassifier(
|
| 430 |
-
n_estimators=600, max_depth=8, learning_rate=0.05,
|
| 431 |
-
subsample=0.8, colsample_bytree=0.8,
|
| 432 |
-
reg_alpha=0.1, reg_lambda=1.0,
|
| 433 |
-
eval_metric="logloss",
|
| 434 |
-
random_state=SEED, verbosity=0,
|
| 435 |
)
|
| 436 |
-
xgb_tr.fit(X_tr_sc, train_y)
|
| 437 |
-
xgb_prob_tr = xgb_tr.predict_proba(X_tr_sc)[:, 1]
|
| 438 |
-
|
| 439 |
-
for w in w_xgbs:
|
| 440 |
-
combo_tr = w * xgb_prob_tr + (1.0 - w) * geo_tr
|
| 441 |
-
opt_t, *_ = _youdens_j(train_y, combo_tr)
|
| 442 |
-
|
| 443 |
-
combo_te = w * xgb_prob_fold + (1.0 - w) * geo_score
|
| 444 |
-
f1 = _f1_at_threshold(y_test, combo_te, opt_t)
|
| 445 |
-
wmf1[w].append(f1)
|
| 446 |
|
| 447 |
-
|
| 448 |
-
best_w = max(mean_f1, key=mean_f1.get)
|
| 449 |
-
|
| 450 |
-
fig, ax = plt.subplots(figsize=(7, 4))
|
| 451 |
-
ax.bar([f"{w:.1f}" for w in w_xgbs],
|
| 452 |
-
[mean_f1[w] for w in w_xgbs], color="steelblue")
|
| 453 |
-
ax.set_xlabel("XGBoost weight (w_xgb); geo weight = 1 - w_xgb")
|
| 454 |
-
ax.set_ylabel("Mean LOPO F1")
|
| 455 |
-
ax.set_title("Hybrid Pipeline: XGBoost vs Geometric Weight Search")
|
| 456 |
-
ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
|
| 457 |
-
for i, w in enumerate(w_xgbs):
|
| 458 |
-
ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
|
| 459 |
-
ha="center", va="bottom", fontsize=8)
|
| 460 |
-
fig.tight_layout()
|
| 461 |
-
path = os.path.join(PLOTS_DIR, "hybrid_xgb_weight_search.png")
|
| 462 |
fig.savefig(path, dpi=150)
|
| 463 |
plt.close(fig)
|
| 464 |
print(f" saved {path}")
|
| 465 |
|
| 466 |
-
print(f" Best
|
| 467 |
-
return dict(mean_f1), best_w
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
def run_hybrid_lr_combiner(lopo_results, use_xgb=True):
|
| 471 |
-
"""LR combiner: meta-features = [model_prob, geo_score], learned weights instead of grid search."""
|
| 472 |
-
print("\n=== Hybrid LR combiner (LOPO) ===")
|
| 473 |
-
by_person, _, _ = load_per_person("face_orientation")
|
| 474 |
-
persons = sorted(by_person.keys())
|
| 475 |
-
features = SELECTED_FEATURES["face_orientation"]
|
| 476 |
-
sf_idx = features.index("s_face")
|
| 477 |
-
se_idx = features.index("s_eye")
|
| 478 |
-
GEO_FACE_W = 0.7
|
| 479 |
-
GEO_EYE_W = 0.3
|
| 480 |
-
|
| 481 |
-
key = "xgb" if use_xgb else "mlp"
|
| 482 |
-
model_p = lopo_results[key]["p"]
|
| 483 |
-
offset = 0
|
| 484 |
-
fold_f1s = []
|
| 485 |
-
for held_out in persons:
|
| 486 |
-
X_test, y_test = by_person[held_out]
|
| 487 |
-
n = X_test.shape[0]
|
| 488 |
-
prob_fold = model_p[offset : offset + n]
|
| 489 |
-
offset += n
|
| 490 |
-
sf = X_test[:, sf_idx]
|
| 491 |
-
se = X_test[:, se_idx]
|
| 492 |
-
geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
|
| 493 |
-
meta_te = np.column_stack([prob_fold, geo_score])
|
| 494 |
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
if use_xgb:
|
| 503 |
-
xgb_tr = XGBClassifier(
|
| 504 |
-
n_estimators=600, max_depth=8, learning_rate=0.05,
|
| 505 |
-
subsample=0.8, colsample_bytree=0.8,
|
| 506 |
-
reg_alpha=0.1, reg_lambda=1.0,
|
| 507 |
-
eval_metric="logloss",
|
| 508 |
-
random_state=SEED, verbosity=0,
|
| 509 |
-
)
|
| 510 |
-
xgb_tr.fit(X_tr_sc, train_y)
|
| 511 |
-
prob_tr = xgb_tr.predict_proba(X_tr_sc)[:, 1]
|
| 512 |
-
else:
|
| 513 |
-
mlp_tr = MLPClassifier(
|
| 514 |
-
hidden_layer_sizes=(64, 32), activation="relu",
|
| 515 |
-
max_iter=200, early_stopping=True, validation_fraction=0.15,
|
| 516 |
-
random_state=SEED, verbose=False,
|
| 517 |
)
|
| 518 |
-
mlp_tr.fit(X_tr_sc, train_y)
|
| 519 |
-
prob_tr = mlp_tr.predict_proba(X_tr_sc)[:, 1]
|
| 520 |
-
meta_tr = np.column_stack([prob_tr, geo_tr])
|
| 521 |
-
|
| 522 |
-
lr = LogisticRegression(C=1.0, max_iter=500, random_state=SEED)
|
| 523 |
-
lr.fit(meta_tr, train_y)
|
| 524 |
-
p_tr = lr.predict_proba(meta_tr)[:, 1]
|
| 525 |
-
opt_t, *_ = _youdens_j(train_y, p_tr)
|
| 526 |
-
p_te = lr.predict_proba(meta_te)[:, 1]
|
| 527 |
-
f1 = _f1_at_threshold(y_test, p_te, opt_t)
|
| 528 |
-
fold_f1s.append(f1)
|
| 529 |
-
print(f" fold {held_out}: F1 = {f1:.4f} (threshold = {opt_t:.3f})")
|
| 530 |
-
|
| 531 |
-
mean_f1 = float(np.mean(fold_f1s))
|
| 532 |
-
print(f" LR combiner mean LOPO F1 = {mean_f1:.4f}")
|
| 533 |
-
return mean_f1
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
def train_and_save_hybrid_combiner(lopo_results, use_xgb, geo_face_weight=0.7, geo_eye_weight=0.3,
|
| 537 |
-
combiner_path=None):
|
| 538 |
-
"""Build OOS meta-dataset from LOPO predictions, train one LR, save joblib + optimal threshold."""
|
| 539 |
-
by_person, _, _ = load_per_person("face_orientation")
|
| 540 |
-
persons = sorted(by_person.keys())
|
| 541 |
-
features = SELECTED_FEATURES["face_orientation"]
|
| 542 |
-
sf_idx = features.index("s_face")
|
| 543 |
-
se_idx = features.index("s_eye")
|
| 544 |
|
| 545 |
-
|
| 546 |
-
model_p = lopo_results[key]["p"]
|
| 547 |
-
meta_y = lopo_results[key]["y"]
|
| 548 |
-
geo_list = []
|
| 549 |
-
offset = 0
|
| 550 |
-
for p in persons:
|
| 551 |
-
X, _ = by_person[p]
|
| 552 |
-
n = X.shape[0]
|
| 553 |
-
sf = X[:, sf_idx]
|
| 554 |
-
se = X[:, se_idx]
|
| 555 |
-
geo_list.append(np.clip(geo_face_weight * sf + geo_eye_weight * se, 0, 1))
|
| 556 |
-
offset += n
|
| 557 |
-
geo_all = np.concatenate(geo_list)
|
| 558 |
-
meta_X = np.column_stack([model_p, geo_all])
|
| 559 |
-
|
| 560 |
-
lr = LogisticRegression(C=1.0, max_iter=500, random_state=SEED)
|
| 561 |
-
lr.fit(meta_X, meta_y)
|
| 562 |
-
p = lr.predict_proba(meta_X)[:, 1]
|
| 563 |
-
opt_threshold, *_ = _youdens_j(meta_y, p)
|
| 564 |
-
|
| 565 |
-
if combiner_path is None:
|
| 566 |
-
combiner_path = os.path.join(_PROJECT_ROOT, "checkpoints", "hybrid_combiner.joblib")
|
| 567 |
-
os.makedirs(os.path.dirname(combiner_path), exist_ok=True)
|
| 568 |
-
joblib.dump({
|
| 569 |
-
"combiner": lr,
|
| 570 |
-
"threshold": float(opt_threshold),
|
| 571 |
-
"use_xgb": bool(use_xgb),
|
| 572 |
-
"geo_face_weight": geo_face_weight,
|
| 573 |
-
"geo_eye_weight": geo_eye_weight,
|
| 574 |
-
}, combiner_path)
|
| 575 |
-
print(f" Saved combiner to {combiner_path} (threshold={opt_threshold:.3f})")
|
| 576 |
-
return opt_threshold, combiner_path
|
| 577 |
|
| 578 |
|
| 579 |
def plot_distributions():
|
|
@@ -599,7 +345,8 @@ def plot_distributions():
|
|
| 599 |
ear_plot = np.clip(ear_min, 0, 0.85)
|
| 600 |
mar_plot = np.clip(mar, 0, 1.5)
|
| 601 |
|
| 602 |
-
|
|
|
|
| 603 |
ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
|
| 604 |
ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
|
| 605 |
for val, lbl, c in [
|
|
@@ -612,13 +359,21 @@ def plot_distributions():
|
|
| 612 |
ax.set_ylabel("Density")
|
| 613 |
ax.set_title("EAR Distribution by Class (144k samples)")
|
| 614 |
ax.legend(fontsize=8)
|
| 615 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
path = os.path.join(PLOTS_DIR, "ear_distribution.png")
|
| 617 |
-
|
| 618 |
-
plt.close(
|
| 619 |
print(f" saved {path}")
|
| 620 |
|
| 621 |
-
|
|
|
|
| 622 |
ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
|
| 623 |
ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
|
| 624 |
ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
|
|
@@ -626,10 +381,17 @@ def plot_distributions():
|
|
| 626 |
ax.set_ylabel("Density")
|
| 627 |
ax.set_title("MAR Distribution by Class (144k samples)")
|
| 628 |
ax.legend(fontsize=8)
|
| 629 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
path = os.path.join(PLOTS_DIR, "mar_distribution.png")
|
| 631 |
-
|
| 632 |
-
plt.close(
|
| 633 |
print(f" saved {path}")
|
| 634 |
|
| 635 |
closed_pct = np.mean(ear_min < 0.16) * 100
|
|
@@ -650,11 +412,7 @@ def plot_distributions():
|
|
| 650 |
return stats
|
| 651 |
|
| 652 |
|
| 653 |
-
def write_report(model_stats,
|
| 654 |
-
hybrid_mlp_f1, best_w_mlp,
|
| 655 |
-
hybrid_xgb_f1, best_w_xgb,
|
| 656 |
-
use_xgb_for_hybrid, dist_stats,
|
| 657 |
-
lr_combiner_f1=None):
|
| 658 |
lines = []
|
| 659 |
lines.append("# Threshold Justification Report")
|
| 660 |
lines.append("")
|
|
@@ -679,91 +437,7 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
|
|
| 679 |
lines.append("")
|
| 680 |
lines.append("")
|
| 681 |
|
| 682 |
-
lines.append("## 2.
|
| 683 |
-
lines.append("")
|
| 684 |
-
lines.append("At the optimal threshold (Youden's J), pooled over all LOPO held-out predictions:")
|
| 685 |
-
lines.append("")
|
| 686 |
-
lines.append("| Model | Threshold | Precision | Recall | F1 | Accuracy |")
|
| 687 |
-
lines.append("|-------|----------:|----------:|-------:|---:|---------:|")
|
| 688 |
-
for key in ("mlp", "xgb"):
|
| 689 |
-
s = extended_stats[key]
|
| 690 |
-
lines.append(f"| {s['label']} | {s['opt_threshold']:.3f} | {s['precision_pooled']:.4f} | "
|
| 691 |
-
f"{s['recall_pooled']:.4f} | {model_stats[key]['f1_opt']:.4f} | {s['accuracy_pooled']:.4f} |")
|
| 692 |
-
lines.append("")
|
| 693 |
-
lines.append("Higher threshold → fewer positive predictions → higher precision, lower recall. "
|
| 694 |
-
"Youden's J picks the threshold that balances sensitivity and specificity (recall for the positive class and true negative rate).")
|
| 695 |
-
lines.append("")
|
| 696 |
-
|
| 697 |
-
lines.append("## 3. Confusion Matrix (Pooled LOPO)")
|
| 698 |
-
lines.append("")
|
| 699 |
-
lines.append("At optimal threshold. Rows = true label, columns = predicted label (0 = unfocused, 1 = focused).")
|
| 700 |
-
lines.append("")
|
| 701 |
-
for key in ("mlp", "xgb"):
|
| 702 |
-
s = extended_stats[key]
|
| 703 |
-
lines.append(f"### {s['label']}")
|
| 704 |
-
lines.append("")
|
| 705 |
-
lines.append("| | Pred 0 | Pred 1 |")
|
| 706 |
-
lines.append("|--|-------:|-------:|")
|
| 707 |
-
cm = s["confusion_matrix"]
|
| 708 |
-
if cm.shape == (2, 2):
|
| 709 |
-
lines.append(f"| **True 0** | {cm[0,0]} (TN) | {cm[0,1]} (FP) |")
|
| 710 |
-
lines.append(f"| **True 1** | {cm[1,0]} (FN) | {cm[1,1]} (TP) |")
|
| 711 |
-
lines.append("")
|
| 712 |
-
lines.append(f"TN={s['tn']}, FP={s['fp']}, FN={s['fn']}, TP={s['tp']}. ")
|
| 713 |
-
lines.append("")
|
| 714 |
-
lines.append("")
|
| 715 |
-
lines.append("")
|
| 716 |
-
lines.append("")
|
| 717 |
-
lines.append("")
|
| 718 |
-
|
| 719 |
-
lines.append("## 4. Per-Person Performance Variance (LOPO)")
|
| 720 |
-
lines.append("")
|
| 721 |
-
lines.append("One fold per left-out person; metrics at optimal threshold.")
|
| 722 |
-
lines.append("")
|
| 723 |
-
for key in ("mlp", "xgb"):
|
| 724 |
-
s = extended_stats[key]
|
| 725 |
-
lines.append(f"### {s['label']} — per held-out person")
|
| 726 |
-
lines.append("")
|
| 727 |
-
lines.append("| Person | Accuracy | F1 | Precision | Recall |")
|
| 728 |
-
lines.append("|--------|---------:|---:|----------:|-------:|")
|
| 729 |
-
for row in s["per_person"]:
|
| 730 |
-
lines.append(f"| {row['person']} | {row['accuracy']:.4f} | {row['f1']:.4f} | {row['precision']:.4f} | {row['recall']:.4f} |")
|
| 731 |
-
lines.append("")
|
| 732 |
-
lines.append("### Summary across persons")
|
| 733 |
-
lines.append("")
|
| 734 |
-
lines.append("| Model | Accuracy mean ± std | F1 mean ± std | Precision mean ± std | Recall mean ± std |")
|
| 735 |
-
lines.append("|-------|---------------------|---------------|----------------------|-------------------|")
|
| 736 |
-
for key in ("mlp", "xgb"):
|
| 737 |
-
s = extended_stats[key]
|
| 738 |
-
lines.append(f"| {s['label']} | {s['accuracy_mean']:.4f} ± {s['accuracy_std']:.4f} | "
|
| 739 |
-
f"{s['f1_mean']:.4f} ± {s['f1_std']:.4f} | "
|
| 740 |
-
f"{s['precision_mean']:.4f} ± {s['precision_std']:.4f} | "
|
| 741 |
-
f"{s['recall_mean']:.4f} ± {s['recall_std']:.4f} |")
|
| 742 |
-
lines.append("")
|
| 743 |
-
|
| 744 |
-
lines.append("## 5. Confidence Intervals (95%, LOPO over 9 persons)")
|
| 745 |
-
lines.append("")
|
| 746 |
-
lines.append("Mean ± half-width of 95% t-interval (df=8) for each metric across the 9 left-out persons.")
|
| 747 |
-
lines.append("")
|
| 748 |
-
lines.append("| Model | F1 | Accuracy | Precision | Recall |")
|
| 749 |
-
lines.append("|-------|---:|--------:|----------:|-------:|")
|
| 750 |
-
for key in ("mlp", "xgb"):
|
| 751 |
-
s = extended_stats[key]
|
| 752 |
-
f1_lo = s["f1_mean"] - s["f1_ci_half"]
|
| 753 |
-
f1_hi = s["f1_mean"] + s["f1_ci_half"]
|
| 754 |
-
acc_lo = s["accuracy_mean"] - s["accuracy_ci_half"]
|
| 755 |
-
acc_hi = s["accuracy_mean"] + s["accuracy_ci_half"]
|
| 756 |
-
prec_lo = s["precision_mean"] - s["precision_ci_half"]
|
| 757 |
-
prec_hi = s["precision_mean"] + s["precision_ci_half"]
|
| 758 |
-
rec_lo = s["recall_mean"] - s["recall_ci_half"]
|
| 759 |
-
rec_hi = s["recall_mean"] + s["recall_ci_half"]
|
| 760 |
-
lines.append(f"| {s['label']} | {s['f1_mean']:.4f} [{f1_lo:.4f}, {f1_hi:.4f}] | "
|
| 761 |
-
f"{s['accuracy_mean']:.4f} [{acc_lo:.4f}, {acc_hi:.4f}] | "
|
| 762 |
-
f"{s['precision_mean']:.4f} [{prec_lo:.4f}, {prec_hi:.4f}] | "
|
| 763 |
-
f"{s['recall_mean']:.4f} [{rec_lo:.4f}, {rec_hi:.4f}] |")
|
| 764 |
-
lines.append("")
|
| 765 |
-
|
| 766 |
-
lines.append("## 6. Geometric Pipeline Weights (s_face vs s_eye)")
|
| 767 |
lines.append("")
|
| 768 |
lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
|
| 769 |
"Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
|
|
@@ -780,68 +454,25 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
|
|
| 780 |
lines.append("")
|
| 781 |
lines.append("")
|
| 782 |
|
| 783 |
-
lines.append("##
|
| 784 |
lines.append("")
|
| 785 |
lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
|
| 786 |
-
"Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3)."
|
|
|
|
| 787 |
lines.append("")
|
| 788 |
lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
|
| 789 |
lines.append("|-------------------:|-------------:|")
|
| 790 |
-
for w in sorted(
|
| 791 |
-
marker = " **<-- selected**" if w ==
|
| 792 |
-
lines.append(f"| {w:.1f} | {
|
| 793 |
-
lines.append("")
|
| 794 |
-
lines.append(f"**Best:** w_mlp = {best_w_mlp:.1f} (MLP {best_w_mlp*100:.0f}%, "
|
| 795 |
-
f"geometric {(1-best_w_mlp)*100:.0f}%) → mean LOPO F1 = {hybrid_mlp_f1[best_w_mlp]:.4f}")
|
| 796 |
-
lines.append("")
|
| 797 |
-
lines.append("")
|
| 798 |
-
lines.append("")
|
| 799 |
-
|
| 800 |
-
lines.append("## 8. Hybrid Pipeline: XGBoost vs Geometric")
|
| 801 |
-
lines.append("")
|
| 802 |
-
lines.append("Same grid over w_xgb in {0.3 ... 0.8}. w_geo = 1 - w_xgb.")
|
| 803 |
-
lines.append("")
|
| 804 |
-
lines.append("| XGBoost Weight (w_xgb) | Mean LOPO F1 |")
|
| 805 |
-
lines.append("|-----------------------:|-------------:|")
|
| 806 |
-
for w in sorted(hybrid_xgb_f1.keys()):
|
| 807 |
-
marker = " **<-- selected**" if w == best_w_xgb else ""
|
| 808 |
-
lines.append(f"| {w:.1f} | {hybrid_xgb_f1[w]:.4f}{marker} |")
|
| 809 |
lines.append("")
|
| 810 |
-
lines.append(f"**Best:**
|
|
|
|
| 811 |
lines.append("")
|
| 812 |
-
lines.append("![Hybrid
|
| 813 |
lines.append("")
|
| 814 |
|
| 815 |
-
|
| 816 |
-
f1_xgb = hybrid_xgb_f1[best_w_xgb]
|
| 817 |
-
lines.append("### Which hybrid is used in the app?")
|
| 818 |
-
lines.append("")
|
| 819 |
-
if use_xgb_for_hybrid:
|
| 820 |
-
lines.append(f"**XGBoost hybrid is better** (F1 = {f1_xgb:.4f} vs MLP hybrid F1 = {f1_mlp:.4f}).")
|
| 821 |
-
else:
|
| 822 |
-
lines.append(f"**MLP hybrid is better** (F1 = {f1_mlp:.4f} vs XGBoost hybrid F1 = {f1_xgb:.4f}).")
|
| 823 |
-
lines.append("")
|
| 824 |
-
if lr_combiner_f1 is not None:
|
| 825 |
-
lines.append("### Logistic regression combiner (replaces heuristic weights)")
|
| 826 |
-
lines.append("")
|
| 827 |
-
lines.append("Instead of a fixed linear blend (e.g. 0.3·ML + 0.7·geo), a **logistic regression** "
|
| 828 |
-
"combines model probability and geometric score: meta-features = [model_prob, geo_score], "
|
| 829 |
-
"trained on the same LOPO splits. Threshold from Youden's J on combiner output.")
|
| 830 |
-
lines.append("")
|
| 831 |
-
lines.append(f"| Method | Mean LOPO F1 |")
|
| 832 |
-
lines.append("|--------|-------------:|")
|
| 833 |
-
lines.append(f"| Heuristic weight grid (best w) | {(f1_xgb if use_xgb_for_hybrid else f1_mlp):.4f} |")
|
| 834 |
-
lines.append(f"| **LR combiner** | **{lr_combiner_f1:.4f}** |")
|
| 835 |
-
lines.append("")
|
| 836 |
-
lines.append("The app uses the saved LR combiner when `combiner_path` is set in `hybrid_focus_config.json`.")
|
| 837 |
-
lines.append("")
|
| 838 |
-
else:
|
| 839 |
-
if use_xgb_for_hybrid:
|
| 840 |
-
lines.append("The app uses **XGBoost + geometric** with the weights above.")
|
| 841 |
-
else:
|
| 842 |
-
lines.append("The app uses **MLP + geometric** with the weights above.")
|
| 843 |
-
lines.append("")
|
| 844 |
-
lines.append("## 5. Eye and Mouth Aspect Ratio Thresholds")
|
| 845 |
lines.append("")
|
| 846 |
lines.append("### EAR (Eye Aspect Ratio)")
|
| 847 |
lines.append("")
|
|
@@ -874,7 +505,7 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
|
|
| 874 |
lines.append("")
|
| 875 |
lines.append("")
|
| 876 |
|
| 877 |
-
lines.append("##
|
| 878 |
lines.append("")
|
| 879 |
lines.append("| Constant | Value | Rationale |")
|
| 880 |
lines.append("|----------|------:|-----------|")
|
|
@@ -901,71 +532,22 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
|
|
| 901 |
print(f"\nReport written to {REPORT_PATH}")
|
| 902 |
|
| 903 |
|
| 904 |
-
def write_hybrid_config(use_xgb, best_w_mlp, best_w_xgb, config_path,
|
| 905 |
-
combiner_path=None, combiner_threshold=None):
|
| 906 |
-
"""Write hybrid_focus_config.json. If combiner_path set, app uses LR combiner instead of heuristic weights."""
|
| 907 |
-
import json
|
| 908 |
-
if use_xgb:
|
| 909 |
-
w_xgb = round(float(best_w_xgb), 2)
|
| 910 |
-
w_geo = round(1.0 - best_w_xgb, 2)
|
| 911 |
-
w_mlp = 0.3
|
| 912 |
-
else:
|
| 913 |
-
w_mlp = round(float(best_w_mlp), 2)
|
| 914 |
-
w_geo = round(1.0 - best_w_mlp, 2)
|
| 915 |
-
w_xgb = 0.0
|
| 916 |
-
cfg = {
|
| 917 |
-
"use_xgb": bool(use_xgb),
|
| 918 |
-
"w_mlp": w_mlp,
|
| 919 |
-
"w_xgb": w_xgb,
|
| 920 |
-
"w_geo": w_geo,
|
| 921 |
-
"threshold": float(combiner_threshold) if combiner_threshold is not None else 0.35,
|
| 922 |
-
"use_yawn_veto": True,
|
| 923 |
-
"geo_face_weight": 0.7,
|
| 924 |
-
"geo_eye_weight": 0.3,
|
| 925 |
-
"mar_yawn_threshold": 0.55,
|
| 926 |
-
"metric": "f1",
|
| 927 |
-
}
|
| 928 |
-
if combiner_path:
|
| 929 |
-
cfg["combiner"] = "logistic"
|
| 930 |
-
cfg["combiner_path"] = os.path.basename(combiner_path)
|
| 931 |
-
with open(config_path, "w", encoding="utf-8") as f:
|
| 932 |
-
json.dump(cfg, f, indent=2)
|
| 933 |
-
print(f" Written {config_path} (use_xgb={cfg['use_xgb']}, combiner={cfg.get('combiner', 'heuristic')})")
|
| 934 |
-
|
| 935 |
-
|
| 936 |
def main():
|
| 937 |
os.makedirs(PLOTS_DIR, exist_ok=True)
|
| 938 |
|
| 939 |
lopo_results = run_lopo_models()
|
| 940 |
model_stats = analyse_model_thresholds(lopo_results)
|
| 941 |
-
extended_stats = analyse_precision_recall_confusion(lopo_results, model_stats)
|
| 942 |
-
plot_confusion_matrices(extended_stats)
|
| 943 |
geo_f1, best_alpha = run_geo_weight_search()
|
| 944 |
-
|
| 945 |
-
hybrid_xgb_f1, best_w_xgb = run_hybrid_xgb_weight_search(lopo_results)
|
| 946 |
dist_stats = plot_distributions()
|
| 947 |
|
| 948 |
-
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
combiner_threshold, combiner_path = train_and_save_hybrid_combiner(
|
| 956 |
-
lopo_results, use_xgb_for_hybrid,
|
| 957 |
-
combiner_path=os.path.join(_PROJECT_ROOT, "checkpoints", "hybrid_combiner.joblib"),
|
| 958 |
-
)
|
| 959 |
-
|
| 960 |
-
config_path = os.path.join(_PROJECT_ROOT, "checkpoints", "hybrid_focus_config.json")
|
| 961 |
-
write_hybrid_config(use_xgb_for_hybrid, best_w_mlp, best_w_xgb, config_path,
|
| 962 |
-
combiner_path=combiner_path, combiner_threshold=combiner_threshold)
|
| 963 |
-
|
| 964 |
-
write_report(model_stats, extended_stats, geo_f1, best_alpha,
|
| 965 |
-
hybrid_mlp_f1, best_w_mlp,
|
| 966 |
-
hybrid_xgb_f1, best_w_xgb,
|
| 967 |
-
use_xgb_for_hybrid, dist_stats,
|
| 968 |
-
lr_combiner_f1=lr_combiner_f1)
|
| 969 |
print("\nDone.")
|
| 970 |
|
| 971 |
|
|
|
|
| 1 |
# LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
|
| 2 |
+
# ClearML logging: set USE_CLEARML=1 env var or pass --clearml flag
|
| 3 |
|
| 4 |
import glob
|
| 5 |
import os
|
|
|
|
| 9 |
import matplotlib
|
| 10 |
matplotlib.use("Agg")
|
| 11 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
| 12 |
from sklearn.neural_network import MLPClassifier
|
| 13 |
from sklearn.preprocessing import StandardScaler
|
| 14 |
+
from sklearn.metrics import roc_curve, roc_auc_score, f1_score
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
from xgboost import XGBClassifier
|
| 16 |
|
| 17 |
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
| 23 |
REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
|
| 24 |
SEED = 42
|
| 25 |
|
| 26 |
+
# ClearML
|
| 27 |
+
# start logging with: USE_CLEARML=1 python -m evaluation.justify_thresholds or: python -m evaluation.justify_thresholds --clearml
|
| 28 |
+
_USE_CLEARML = os.environ.get("USE_CLEARML", "0") == "1" or "--clearml" in sys.argv
|
| 29 |
+
|
| 30 |
+
_task = None
|
| 31 |
+
_logger = None
|
| 32 |
+
|
| 33 |
+
if _USE_CLEARML:
|
| 34 |
+
try:
|
| 35 |
+
from clearml import Task
|
| 36 |
+
_task = Task.init(
|
| 37 |
+
project_name="Focus Guard",
|
| 38 |
+
task_name="Threshold Justification",
|
| 39 |
+
tags=["evaluation", "thresholds"],
|
| 40 |
+
)
|
| 41 |
+
_task.connect({"SEED": SEED, "n_participants": 9})
|
| 42 |
+
_logger = _task.get_logger()
|
| 43 |
+
print("ClearML enabled — logging to project 'Focus Guard'")
|
| 44 |
+
except ImportError:
|
| 45 |
+
print("WARNING: ClearML not installed. Continuing without logging.")
|
| 46 |
+
_USE_CLEARML = False
|
| 47 |
|
| 48 |
def _youdens_j(y_true, y_prob):
|
| 49 |
fpr, tpr, thresholds = roc_curve(y_true, y_prob)
|
|
|
|
| 57 |
return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
|
| 58 |
|
| 59 |
|
| 60 |
+
def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path, clearml_title=None):
|
| 61 |
fig, ax = plt.subplots(figsize=(6, 5))
|
| 62 |
ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
|
| 63 |
ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
|
|
|
|
| 68 |
ax.set_title(title)
|
| 69 |
ax.legend(loc="lower right")
|
| 70 |
fig.tight_layout()
|
| 71 |
+
|
| 72 |
+
# Log to ClearML before closing the figure
|
| 73 |
+
if _logger and clearml_title:
|
| 74 |
+
_logger.report_matplotlib_figure(
|
| 75 |
+
title=clearml_title, series="ROC", figure=fig, iteration=0
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
fig.savefig(path, dpi=150)
|
| 79 |
plt.close(fig)
|
| 80 |
print(f" saved {path}")
|
|
|
|
| 85 |
by_person, _, _ = load_per_person("face_orientation")
|
| 86 |
persons = sorted(by_person.keys())
|
| 87 |
|
| 88 |
+
results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}
|
|
|
|
| 89 |
|
| 90 |
for i, held_out in enumerate(persons):
|
| 91 |
X_test, y_test = by_person[held_out]
|
|
|
|
| 106 |
mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
|
| 107 |
results["mlp"]["y"].append(y_test)
|
| 108 |
results["mlp"]["p"].append(mlp_prob)
|
|
|
|
|
|
|
| 109 |
|
| 110 |
xgb = XGBClassifier(
|
| 111 |
n_estimators=600, max_depth=8, learning_rate=0.05,
|
| 112 |
subsample=0.8, colsample_bytree=0.8,
|
| 113 |
reg_alpha=0.1, reg_lambda=1.0,
|
| 114 |
+
use_label_encoder=False, eval_metric="logloss",
|
| 115 |
random_state=SEED, verbosity=0,
|
| 116 |
)
|
| 117 |
xgb.fit(X_tr_sc, train_y)
|
| 118 |
xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
|
| 119 |
results["xgb"]["y"].append(y_test)
|
| 120 |
results["xgb"]["p"].append(xgb_prob)
|
|
|
|
|
|
|
| 121 |
|
| 122 |
print(f" fold {i+1}/{len(persons)}: held out {held_out} "
|
| 123 |
f"({X_test.shape[0]} samples)")
|
| 124 |
|
| 125 |
+
for key in results:
|
|
|
|
| 126 |
results[key]["y"] = np.concatenate(results[key]["y"])
|
| 127 |
results[key]["p"] = np.concatenate(results[key]["p"])
|
| 128 |
|
|
|
|
| 143 |
|
| 144 |
path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
|
| 145 |
_plot_roc(fpr, tpr, auc, opt_t, opt_idx,
|
| 146 |
+
f"LOPO ROC — {label} (9 folds, 144k samples)", path,
|
| 147 |
+
clearml_title=f"ROC_{label}")
|
| 148 |
|
| 149 |
model_stats[name] = {
|
| 150 |
"label": label, "auc": auc,
|
|
|
|
| 153 |
print(f" {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
|
| 154 |
f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
|
| 155 |
|
| 156 |
+
# Log scalars to ClearML
|
| 157 |
+
if _logger:
|
| 158 |
+
_logger.report_single_value(f"{label} Optimal Threshold", opt_t)
|
| 159 |
+
_logger.report_single_value(f"{label} AUC", auc)
|
| 160 |
+
_logger.report_single_value(f"{label} F1 @ Optimal", f1_opt)
|
| 161 |
+
_logger.report_single_value(f"{label} F1 @ 0.5", f1_50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
return model_stats
|
| 164 |
|
| 165 |
def run_geo_weight_search():
|
| 166 |
print("\n=== Geometric weight grid search ===")
|
|
|
|
| 206 |
ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
|
| 207 |
ha="center", va="bottom", fontsize=8)
|
| 208 |
fig.tight_layout()
|
| 209 |
+
|
| 210 |
+
# Log to ClearML before closing
|
| 211 |
+
if _logger:
|
| 212 |
+
_logger.report_matplotlib_figure(
|
| 213 |
+
title="Geo Weight Search", series="F1 vs Alpha", figure=fig, iteration=0
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
|
| 217 |
fig.savefig(path, dpi=150)
|
| 218 |
plt.close(fig)
|
|
|
|
| 220 |
|
| 221 |
print(f" Best alpha (face weight) = {best_alpha:.1f}, "
|
| 222 |
f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
|
| 223 |
+
|
| 224 |
+
# Log scalars to ClearML
|
| 225 |
+
if _logger:
|
| 226 |
+
_logger.report_single_value("Geo Best Alpha", best_alpha)
|
| 227 |
+
for i, a in enumerate(sorted(alphas)):
|
| 228 |
+
_logger.report_scalar(
|
| 229 |
+
"Geo Weight Search", "Mean LOPO F1",
|
| 230 |
+
iteration=i, value=mean_f1[a]
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
return dict(mean_f1), best_alpha
|
| 234 |
|
| 235 |
|
|
|
|
| 296 |
ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
|
| 297 |
ha="center", va="bottom", fontsize=8)
|
| 298 |
fig.tight_layout()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
+
# Log to ClearML before closing
|
| 301 |
+
if _logger:
|
| 302 |
+
_logger.report_matplotlib_figure(
|
| 303 |
+
title="Hybrid Weight Search", series="F1 vs w_mlp", figure=fig, iteration=0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
+
path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
fig.savefig(path, dpi=150)
|
| 308 |
plt.close(fig)
|
| 309 |
print(f" saved {path}")
|
| 310 |
|
| 311 |
+
print(f" Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
+
# Log scalars to ClearML
|
| 314 |
+
if _logger:
|
| 315 |
+
_logger.report_single_value("Hybrid Best w_mlp", best_w)
|
| 316 |
+
for i, w in enumerate(sorted(w_mlps)):
|
| 317 |
+
_logger.report_scalar(
|
| 318 |
+
"Hybrid Weight Search", "Mean LOPO F1",
|
| 319 |
+
iteration=i, value=mean_f1[w]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
+
return dict(mean_f1), best_w
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
|
| 325 |
def plot_distributions():
|
|
|
|
| 345 |
ear_plot = np.clip(ear_min, 0, 0.85)
|
| 346 |
mar_plot = np.clip(mar, 0, 1.5)
|
| 347 |
|
| 348 |
+
# EAR distribution plot
|
| 349 |
+
fig_ear, ax = plt.subplots(figsize=(7, 4))
|
| 350 |
ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
|
| 351 |
ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
|
| 352 |
for val, lbl, c in [
|
|
|
|
| 359 |
ax.set_ylabel("Density")
|
| 360 |
ax.set_title("EAR Distribution by Class (144k samples)")
|
| 361 |
ax.legend(fontsize=8)
|
| 362 |
+
fig_ear.tight_layout()
|
| 363 |
+
|
| 364 |
+
# Log to ClearML before closing
|
| 365 |
+
if _logger:
|
| 366 |
+
_logger.report_matplotlib_figure(
|
| 367 |
+
title="EAR Distribution", series="by class", figure=fig_ear, iteration=0
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
path = os.path.join(PLOTS_DIR, "ear_distribution.png")
|
| 371 |
+
fig_ear.savefig(path, dpi=150)
|
| 372 |
+
plt.close(fig_ear)
|
| 373 |
print(f" saved {path}")
|
| 374 |
|
| 375 |
+
# MAR distribution plot
|
| 376 |
+
fig_mar, ax = plt.subplots(figsize=(7, 4))
|
| 377 |
ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
|
| 378 |
ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
|
| 379 |
ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
|
|
|
|
| 381 |
ax.set_ylabel("Density")
|
| 382 |
ax.set_title("MAR Distribution by Class (144k samples)")
|
| 383 |
ax.legend(fontsize=8)
|
| 384 |
+
fig_mar.tight_layout()
|
| 385 |
+
|
| 386 |
+
# Log to ClearML before closing
|
| 387 |
+
if _logger:
|
| 388 |
+
_logger.report_matplotlib_figure(
|
| 389 |
+
title="MAR Distribution", series="by class", figure=fig_mar, iteration=0
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
path = os.path.join(PLOTS_DIR, "mar_distribution.png")
|
| 393 |
+
fig_mar.savefig(path, dpi=150)
|
| 394 |
+
plt.close(fig_mar)
|
| 395 |
print(f" saved {path}")
|
| 396 |
|
| 397 |
closed_pct = np.mean(ear_min < 0.16) * 100
|
|
|
|
| 412 |
return stats
|
| 413 |
|
| 414 |
|
| 415 |
+
def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
lines = []
|
| 417 |
lines.append("# Threshold Justification Report")
|
| 418 |
lines.append("")
|
|
|
|
| 437 |
lines.append("")
|
| 438 |
lines.append("")
|
| 439 |
|
| 440 |
+
lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
lines.append("")
|
| 442 |
lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
|
| 443 |
"Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
|
|
|
|
| 454 |
lines.append("")
|
| 455 |
lines.append("")
|
| 456 |
|
| 457 |
+
lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
|
| 458 |
lines.append("")
|
| 459 |
lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
|
| 460 |
+
"Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
|
| 461 |
+
"If you change geometric weights, re-run this script — optimal w_mlp can shift.")
|
| 462 |
lines.append("")
|
| 463 |
lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
|
| 464 |
lines.append("|-------------------:|-------------:|")
|
| 465 |
+
for w in sorted(hybrid_f1.keys()):
|
| 466 |
+
marker = " **<-- selected**" if w == best_w else ""
|
| 467 |
+
lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
lines.append("")
|
| 469 |
+
lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
|
| 470 |
+
f"geometric {(1-best_w)*100:.0f}%)")
|
| 471 |
lines.append("")
|
| 472 |
+
lines.append("")
|
| 473 |
lines.append("")
|
| 474 |
|
| 475 |
+
lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
lines.append("")
|
| 477 |
lines.append("### EAR (Eye Aspect Ratio)")
|
| 478 |
lines.append("")
|
|
|
|
| 505 |
lines.append("")
|
| 506 |
lines.append("")
|
| 507 |
|
| 508 |
+
lines.append("## 5. Other Constants")
|
| 509 |
lines.append("")
|
| 510 |
lines.append("| Constant | Value | Rationale |")
|
| 511 |
lines.append("|----------|------:|-----------|")
|
|
|
|
| 532 |
print(f"\nReport written to {REPORT_PATH}")
|
| 533 |
|
| 534 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
def main():
|
| 536 |
os.makedirs(PLOTS_DIR, exist_ok=True)
|
| 537 |
|
| 538 |
lopo_results = run_lopo_models()
|
| 539 |
model_stats = analyse_model_thresholds(lopo_results)
|
|
|
|
|
|
|
| 540 |
geo_f1, best_alpha = run_geo_weight_search()
|
| 541 |
+
hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
|
|
|
|
| 542 |
dist_stats = plot_distributions()
|
| 543 |
|
| 544 |
+
write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)
|
| 545 |
+
|
| 546 |
+
# Close ClearML task
|
| 547 |
+
if _task:
|
| 548 |
+
_task.close()
|
| 549 |
+
print("ClearML task closed.")
|
| 550 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
print("\nDone.")
|
| 552 |
|
| 553 |
|