""" Layer 2: Evaluation (Enhanced) - Evaluate trained models on Leave-One-Out (LOO) predictions. - Compute MAE, RMSE, MAPE per target. - Conformal prediction: compute 90% coverage residual threshold. - Overfitting check: compare train error vs LOO error. """ import pandas as pd import numpy as np import pickle import os import sys from sklearn.metrics import mean_absolute_error, mean_squared_error sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config import CFG def mean_absolute_percentage_error(y_true, y_pred): return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100 def compute_conformal_thresholds(y_true_dict: dict, y_pred_dict: dict, alpha: float = 0.10) -> dict: """ Conformal Prediction: compute residual quantile for (1-alpha) coverage. For each target: q = quantile(|y_true - y_pred|, 1-alpha) The prediction set becomes [y_pred - q, y_pred + q] This guarantees (1-alpha) coverage under exchangeability. """ thresholds = {} for target in y_true_dict: residuals = np.abs(np.array(y_true_dict[target]) - np.array(y_pred_dict[target])) q = np.quantile(residuals, 1.0 - alpha) thresholds[target] = { "q_90": float(q), "coverage_alpha": alpha, "coverage_pct": int((1.0 - alpha) * 100) } return thresholds def main(): print(">>> Starting Layer 2: Evaluation + Conformal Prediction") with open(os.path.join(CFG.MODEL_DIR, "loo_predictions.pkl"), "rb") as f: loo_preds = pickle.load(f) with open(os.path.join(CFG.PROC_DIR, "production_clean.pkl"), "rb") as f: prod_df = pickle.load(f) with open(os.path.join(CFG.MODEL_DIR, "meta_models.pkl"), "rb") as f: meta_models = pickle.load(f) y_actual = prod_df.set_index("Batch_ID").loc[loo_preds.index] metrics = [] y_true_dict = {} y_pred_dict = {} print("\nModel Evaluation Report (LOO CV):") print("-" * 70) print(f"{'Target':<25} | {'MAE':<8} | {'RMSE':<8} | {'MAPE%':<8} | Overfit?") print("-" * 70) for target in CFG.TARGET_COLS: y_true = y_actual[target].values y_pred = loo_preds[target].values mae = mean_absolute_error(y_true, y_pred) rmse = np.sqrt(mean_squared_error(y_true, y_pred)) mape = mean_absolute_percentage_error(y_true, y_pred) # --- Overfitting Check --- # Compare LOO (generalization) MAPE vs full-train MAPE # If train_mape << loo_mape → overfitting # With our regularization (max_depth=4, L1=L2=0.3), both should be similar with open(os.path.join(CFG.PROC_DIR, "X_final.pkl"), "rb") as f: X = pickle.load(f) import warnings; warnings.filterwarnings("ignore") train_pred_raw = meta_models[target].predict(X.astype(float)) train_mape = mean_absolute_percentage_error(y_true, train_pred_raw) overfit_ratio = train_mape / (mape + 1e-6) overfit_flag = "⚠️ OVERFIT" if overfit_ratio < 0.5 else "✅ OK" y_true_dict[target] = y_true.tolist() y_pred_dict[target] = y_pred.tolist() status = " (ATTENTION)" if mape > 15 else "" print(f"{target:<25} | {mae:<8.3f} | {rmse:<8.3f} | {mape:<8.2f}%{status} | {overfit_flag} (train={train_mape:.2f}%)") metrics.append({ "Target": target, "MAE": mae, "RMSE": rmse, "MAPE": mape, "Train_MAPE": train_mape, "Overfit_OK": overfit_ratio >= 0.5 }) print("-" * 70) # --- Conformal Prediction --- conformal = compute_conformal_thresholds(y_true_dict, y_pred_dict, alpha=0.10) print("\nConformal Prediction Thresholds (90% Statistical Coverage):") print("-" * 50) for t, v in conformal.items(): print(f" {t:<25}: ±{v['q_90']:.4f} → guaranteed {v['coverage_pct']}% coverage") print("-" * 50) # Save metrics_df = pd.DataFrame(metrics) metrics_df.to_csv(os.path.join(CFG.MODEL_DIR, "evaluation_metrics.csv"), index=False) with open(os.path.join(CFG.MODEL_DIR, "conformal_thresholds.pkl"), "wb") as f: pickle.dump(conformal, f) print(f"\nMetrics + conformal thresholds saved to {CFG.MODEL_DIR}") print("=" * 60) print(f"✅ EVALUATION COMPLETE") print(f" Targets evaluated: {len(metrics)}") print(f" Conformal thresholds: {len(conformal)} targets") print("=" * 60) if __name__ == "__main__": main()