Spaces:
Running
Running
| """ | |
| Layer 2: Evaluation (Enhanced) | |
| - Evaluate trained models on Leave-One-Out (LOO) predictions. | |
| - Compute MAE, RMSE, MAPE per target. | |
| - Conformal prediction: compute 90% coverage residual threshold. | |
| - Overfitting check: compare train error vs LOO error. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| import os | |
| import sys | |
| from sklearn.metrics import mean_absolute_error, mean_squared_error | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from config import CFG | |
| def mean_absolute_percentage_error(y_true, y_pred): | |
| return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100 | |
| def compute_conformal_thresholds(y_true_dict: dict, y_pred_dict: dict, alpha: float = 0.10) -> dict: | |
| """ | |
| Conformal Prediction: compute residual quantile for (1-alpha) coverage. | |
| For each target: q = quantile(|y_true - y_pred|, 1-alpha) | |
| The prediction set becomes [y_pred - q, y_pred + q] | |
| This guarantees (1-alpha) coverage under exchangeability. | |
| """ | |
| thresholds = {} | |
| for target in y_true_dict: | |
| residuals = np.abs(np.array(y_true_dict[target]) - np.array(y_pred_dict[target])) | |
| q = np.quantile(residuals, 1.0 - alpha) | |
| thresholds[target] = { | |
| "q_90": float(q), | |
| "coverage_alpha": alpha, | |
| "coverage_pct": int((1.0 - alpha) * 100) | |
| } | |
| return thresholds | |
| def main(): | |
| print(">>> Starting Layer 2: Evaluation + Conformal Prediction") | |
| with open(os.path.join(CFG.MODEL_DIR, "loo_predictions.pkl"), "rb") as f: | |
| loo_preds = pickle.load(f) | |
| with open(os.path.join(CFG.PROC_DIR, "production_clean.pkl"), "rb") as f: | |
| prod_df = pickle.load(f) | |
| with open(os.path.join(CFG.MODEL_DIR, "meta_models.pkl"), "rb") as f: | |
| meta_models = pickle.load(f) | |
| y_actual = prod_df.set_index("Batch_ID").loc[loo_preds.index] | |
| metrics = [] | |
| y_true_dict = {} | |
| y_pred_dict = {} | |
| print("\nModel Evaluation Report (LOO CV):") | |
| print("-" * 70) | |
| print(f"{'Target':<25} | {'MAE':<8} | {'RMSE':<8} | {'MAPE%':<8} | Overfit?") | |
| print("-" * 70) | |
| for target in CFG.TARGET_COLS: | |
| y_true = y_actual[target].values | |
| y_pred = loo_preds[target].values | |
| mae = mean_absolute_error(y_true, y_pred) | |
| rmse = np.sqrt(mean_squared_error(y_true, y_pred)) | |
| mape = mean_absolute_percentage_error(y_true, y_pred) | |
| # --- Overfitting Check --- | |
| # Compare LOO (generalization) MAPE vs full-train MAPE | |
| # If train_mape << loo_mape → overfitting | |
| # With our regularization (max_depth=4, L1=L2=0.3), both should be similar | |
| with open(os.path.join(CFG.PROC_DIR, "X_final.pkl"), "rb") as f: | |
| X = pickle.load(f) | |
| import warnings; warnings.filterwarnings("ignore") | |
| train_pred_raw = meta_models[target].predict(X.astype(float)) | |
| train_mape = mean_absolute_percentage_error(y_true, train_pred_raw) | |
| overfit_ratio = train_mape / (mape + 1e-6) | |
| overfit_flag = "⚠️ OVERFIT" if overfit_ratio < 0.5 else "✅ OK" | |
| y_true_dict[target] = y_true.tolist() | |
| y_pred_dict[target] = y_pred.tolist() | |
| status = " (ATTENTION)" if mape > 15 else "" | |
| print(f"{target:<25} | {mae:<8.3f} | {rmse:<8.3f} | {mape:<8.2f}%{status} | {overfit_flag} (train={train_mape:.2f}%)") | |
| metrics.append({ | |
| "Target": target, | |
| "MAE": mae, | |
| "RMSE": rmse, | |
| "MAPE": mape, | |
| "Train_MAPE": train_mape, | |
| "Overfit_OK": overfit_ratio >= 0.5 | |
| }) | |
| print("-" * 70) | |
| # --- Conformal Prediction --- | |
| conformal = compute_conformal_thresholds(y_true_dict, y_pred_dict, alpha=0.10) | |
| print("\nConformal Prediction Thresholds (90% Statistical Coverage):") | |
| print("-" * 50) | |
| for t, v in conformal.items(): | |
| print(f" {t:<25}: ±{v['q_90']:.4f} → guaranteed {v['coverage_pct']}% coverage") | |
| print("-" * 50) | |
| # Save | |
| metrics_df = pd.DataFrame(metrics) | |
| metrics_df.to_csv(os.path.join(CFG.MODEL_DIR, "evaluation_metrics.csv"), index=False) | |
| with open(os.path.join(CFG.MODEL_DIR, "conformal_thresholds.pkl"), "wb") as f: | |
| pickle.dump(conformal, f) | |
| print(f"\nMetrics + conformal thresholds saved to {CFG.MODEL_DIR}") | |
| print("=" * 60) | |
| print(f"✅ EVALUATION COMPLETE") | |
| print(f" Targets evaluated: {len(metrics)}") | |
| print(f" Conformal thresholds: {len(conformal)} targets") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| main() | |