23f3002638
Initial commit with LFS tracking
038ee19
"""
Layer 2: Evaluation (Enhanced)
- Evaluate trained models on Leave-One-Out (LOO) predictions.
- Compute MAE, RMSE, MAPE per target.
- Conformal prediction: compute 90% coverage residual threshold.
- Overfitting check: compare train error vs LOO error.
"""
import pandas as pd
import numpy as np
import pickle
import os
import sys
from sklearn.metrics import mean_absolute_error, mean_squared_error
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import CFG
def mean_absolute_percentage_error(y_true, y_pred):
return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100
def compute_conformal_thresholds(y_true_dict: dict, y_pred_dict: dict, alpha: float = 0.10) -> dict:
"""
Conformal Prediction: compute residual quantile for (1-alpha) coverage.
For each target: q = quantile(|y_true - y_pred|, 1-alpha)
The prediction set becomes [y_pred - q, y_pred + q]
This guarantees (1-alpha) coverage under exchangeability.
"""
thresholds = {}
for target in y_true_dict:
residuals = np.abs(np.array(y_true_dict[target]) - np.array(y_pred_dict[target]))
q = np.quantile(residuals, 1.0 - alpha)
thresholds[target] = {
"q_90": float(q),
"coverage_alpha": alpha,
"coverage_pct": int((1.0 - alpha) * 100)
}
return thresholds
def main():
print(">>> Starting Layer 2: Evaluation + Conformal Prediction")
with open(os.path.join(CFG.MODEL_DIR, "loo_predictions.pkl"), "rb") as f:
loo_preds = pickle.load(f)
with open(os.path.join(CFG.PROC_DIR, "production_clean.pkl"), "rb") as f:
prod_df = pickle.load(f)
with open(os.path.join(CFG.MODEL_DIR, "meta_models.pkl"), "rb") as f:
meta_models = pickle.load(f)
y_actual = prod_df.set_index("Batch_ID").loc[loo_preds.index]
metrics = []
y_true_dict = {}
y_pred_dict = {}
print("\nModel Evaluation Report (LOO CV):")
print("-" * 70)
print(f"{'Target':<25} | {'MAE':<8} | {'RMSE':<8} | {'MAPE%':<8} | Overfit?")
print("-" * 70)
for target in CFG.TARGET_COLS:
y_true = y_actual[target].values
y_pred = loo_preds[target].values
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mape = mean_absolute_percentage_error(y_true, y_pred)
# --- Overfitting Check ---
# Compare LOO (generalization) MAPE vs full-train MAPE
# If train_mape << loo_mape → overfitting
# With our regularization (max_depth=4, L1=L2=0.3), both should be similar
with open(os.path.join(CFG.PROC_DIR, "X_final.pkl"), "rb") as f:
X = pickle.load(f)
import warnings; warnings.filterwarnings("ignore")
train_pred_raw = meta_models[target].predict(X.astype(float))
train_mape = mean_absolute_percentage_error(y_true, train_pred_raw)
overfit_ratio = train_mape / (mape + 1e-6)
overfit_flag = "⚠️ OVERFIT" if overfit_ratio < 0.5 else "✅ OK"
y_true_dict[target] = y_true.tolist()
y_pred_dict[target] = y_pred.tolist()
status = " (ATTENTION)" if mape > 15 else ""
print(f"{target:<25} | {mae:<8.3f} | {rmse:<8.3f} | {mape:<8.2f}%{status} | {overfit_flag} (train={train_mape:.2f}%)")
metrics.append({
"Target": target,
"MAE": mae,
"RMSE": rmse,
"MAPE": mape,
"Train_MAPE": train_mape,
"Overfit_OK": overfit_ratio >= 0.5
})
print("-" * 70)
# --- Conformal Prediction ---
conformal = compute_conformal_thresholds(y_true_dict, y_pred_dict, alpha=0.10)
print("\nConformal Prediction Thresholds (90% Statistical Coverage):")
print("-" * 50)
for t, v in conformal.items():
print(f" {t:<25}: ±{v['q_90']:.4f} → guaranteed {v['coverage_pct']}% coverage")
print("-" * 50)
# Save
metrics_df = pd.DataFrame(metrics)
metrics_df.to_csv(os.path.join(CFG.MODEL_DIR, "evaluation_metrics.csv"), index=False)
with open(os.path.join(CFG.MODEL_DIR, "conformal_thresholds.pkl"), "wb") as f:
pickle.dump(conformal, f)
print(f"\nMetrics + conformal thresholds saved to {CFG.MODEL_DIR}")
print("=" * 60)
print(f"✅ EVALUATION COMPLETE")
print(f" Targets evaluated: {len(metrics)}")
print(f" Conformal thresholds: {len(conformal)} targets")
print("=" * 60)
if __name__ == "__main__":
main()