"""Explainability: SHAP feature importance (optional).""" import numpy as np from typing import Any, Dict, List, Optional try: import shap HAS_SHAP = True except ImportError: HAS_SHAP = False def compute_shap_importance(model, X, feature_names=None, model_type="tree", task_type="classification", n_background=100): if not HAS_SHAP: print(" [SHAP not installed – using model feature_importances_ if available]") # Fallback: use sklearn feature_importances_ if available if hasattr(model, "feature_importances_"): imp = model.feature_importances_ names = feature_names or [f"f{i}" for i in range(len(imp))] d = dict(zip(names, imp.tolist())) return dict(sorted(d.items(), key=lambda x: -x[1])) if hasattr(model, "coef_"): imp = np.abs(model.coef_).mean(axis=0) if model.coef_.ndim > 1 else np.abs(model.coef_) names = feature_names or [f"f{i}" for i in range(len(imp))] d = dict(zip(names, imp.tolist())) return dict(sorted(d.items(), key=lambda x: -x[1])) return {} try: shap_values = None if model_type == "tree": explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) if isinstance(shap_values, list): shap_values = np.mean([np.abs(s) for s in shap_values], axis=0) elif model_type == "linear": bg = X[:min(n_background, len(X))] explainer = shap.LinearExplainer(model, bg) shap_values = explainer.shap_values(X) else: bg = X[:min(n_background, len(X))] def pred_fn(x): if hasattr(model, "predict_proba"): return model.predict_proba(x) return model.predict(x) explainer = shap.KernelExplainer(pred_fn, bg) shap_values = explainer.shap_values(X[:min(50, len(X))], nsamples=50) if isinstance(shap_values, list): shap_values = np.mean([np.abs(s) for s in shap_values], axis=0) if shap_values is None: return {} mean_abs = np.abs(shap_values).mean(axis=0) names = feature_names or [f"f{i}" for i in range(len(mean_abs))] d = {n: float(v) for n, v in zip(names, mean_abs)} return dict(sorted(d.items(), key=lambda x: -x[1])) except Exception as e: print(f" [SHAP error: {e}]") return {} def print_feature_importance(importance: Dict[str, float], top_k: int = 15): if not importance: print(" No feature importance available.") return top = list(importance.items())[:top_k] max_val = max(v for _, v in top) or 1e-9 print(f"\n Top {len(top)} Feature Importances (SHAP / model-based):") for i, (name, val) in enumerate(top, 1): bar = "█" * max(1, int(val / max_val * 30)) print(f" {i:2d}. {name:<30s} {val:.4f} {bar}")