Spaces:

ar4002
/

batchmind-os

Running

batchmind-os / batchmind_os /layer2_predictor /evaluate.py

23f3002638

Initial commit with LFS tracking

038ee19 19 days ago

4.5 kB

	"""
	Layer 2: Evaluation (Enhanced)
	- Evaluate trained models on Leave-One-Out (LOO) predictions.
	- Compute MAE, RMSE, MAPE per target.
	- Conformal prediction: compute 90% coverage residual threshold.
	- Overfitting check: compare train error vs LOO error.
	"""

	import pandas as pd
	import numpy as np
	import pickle
	import os
	import sys
	from sklearn.metrics import mean_absolute_error, mean_squared_error

	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from config import CFG

	def mean_absolute_percentage_error(y_true, y_pred):
	return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-10))) * 100

	def compute_conformal_thresholds(y_true_dict: dict, y_pred_dict: dict, alpha: float = 0.10) -> dict:
	"""
	Conformal Prediction: compute residual quantile for (1-alpha) coverage.
	For each target: q = quantile(\|y_true - y_pred\|, 1-alpha)
	The prediction set becomes [y_pred - q, y_pred + q]
	This guarantees (1-alpha) coverage under exchangeability.
	"""
	thresholds = {}
	for target in y_true_dict:
	residuals = np.abs(np.array(y_true_dict[target]) - np.array(y_pred_dict[target]))
	q = np.quantile(residuals, 1.0 - alpha)
	thresholds[target] = {
	"q_90": float(q),
	"coverage_alpha": alpha,
	"coverage_pct": int((1.0 - alpha) * 100)
	}
	return thresholds

	def main():
	print(">>> Starting Layer 2: Evaluation + Conformal Prediction")
	with open(os.path.join(CFG.MODEL_DIR, "loo_predictions.pkl"), "rb") as f:
	loo_preds = pickle.load(f)
	with open(os.path.join(CFG.PROC_DIR, "production_clean.pkl"), "rb") as f:
	prod_df = pickle.load(f)
	with open(os.path.join(CFG.MODEL_DIR, "meta_models.pkl"), "rb") as f:
	meta_models = pickle.load(f)

	y_actual = prod_df.set_index("Batch_ID").loc[loo_preds.index]

	metrics = []
	y_true_dict = {}
	y_pred_dict = {}

	print("\nModel Evaluation Report (LOO CV):")
	print("-" * 70)
	print(f"{'Target':<25} \| {'MAE':<8} \| {'RMSE':<8} \| {'MAPE%':<8} \| Overfit?")
	print("-" * 70)

	for target in CFG.TARGET_COLS:
	y_true = y_actual[target].values
	y_pred = loo_preds[target].values

	mae = mean_absolute_error(y_true, y_pred)
	rmse = np.sqrt(mean_squared_error(y_true, y_pred))
	mape = mean_absolute_percentage_error(y_true, y_pred)

	# --- Overfitting Check ---
	# Compare LOO (generalization) MAPE vs full-train MAPE
	# If train_mape << loo_mape → overfitting
	# With our regularization (max_depth=4, L1=L2=0.3), both should be similar
	with open(os.path.join(CFG.PROC_DIR, "X_final.pkl"), "rb") as f:
	X = pickle.load(f)
	import warnings; warnings.filterwarnings("ignore")
	train_pred_raw = meta_models[target].predict(X.astype(float))
	train_mape = mean_absolute_percentage_error(y_true, train_pred_raw)
	overfit_ratio = train_mape / (mape + 1e-6)
	overfit_flag = "⚠️ OVERFIT" if overfit_ratio < 0.5 else "✅ OK"

	y_true_dict[target] = y_true.tolist()
	y_pred_dict[target] = y_pred.tolist()

	status = " (ATTENTION)" if mape > 15 else ""
	print(f"{target:<25} \| {mae:<8.3f} \| {rmse:<8.3f} \| {mape:<8.2f}%{status} \| {overfit_flag} (train={train_mape:.2f}%)")

	metrics.append({
	"Target": target,
	"MAE": mae,
	"RMSE": rmse,
	"MAPE": mape,
	"Train_MAPE": train_mape,
	"Overfit_OK": overfit_ratio >= 0.5
	})

	print("-" * 70)

	# --- Conformal Prediction ---
	conformal = compute_conformal_thresholds(y_true_dict, y_pred_dict, alpha=0.10)
	print("\nConformal Prediction Thresholds (90% Statistical Coverage):")
	print("-" * 50)
	for t, v in conformal.items():
	print(f" {t:<25}: ±{v['q_90']:.4f} → guaranteed {v['coverage_pct']}% coverage")
	print("-" * 50)

	# Save
	metrics_df = pd.DataFrame(metrics)
	metrics_df.to_csv(os.path.join(CFG.MODEL_DIR, "evaluation_metrics.csv"), index=False)

	with open(os.path.join(CFG.MODEL_DIR, "conformal_thresholds.pkl"), "wb") as f:
	pickle.dump(conformal, f)

	print(f"\nMetrics + conformal thresholds saved to {CFG.MODEL_DIR}")
	print("=" * 60)
	print(f"✅ EVALUATION COMPLETE")
	print(f" Targets evaluated: {len(metrics)}")
	print(f" Conformal thresholds: {len(conformal)} targets")
	print("=" * 60)

	if __name__ == "__main__":
	main()