he99codes's picture
deploy automl
211c37c
"""Explainability: SHAP feature importance (optional)."""
import numpy as np
from typing import Any, Dict, List, Optional
try:
import shap
HAS_SHAP = True
except ImportError:
HAS_SHAP = False
def compute_shap_importance(model, X, feature_names=None, model_type="tree",
task_type="classification", n_background=100):
if not HAS_SHAP:
print(" [SHAP not installed – using model feature_importances_ if available]")
# Fallback: use sklearn feature_importances_ if available
if hasattr(model, "feature_importances_"):
imp = model.feature_importances_
names = feature_names or [f"f{i}" for i in range(len(imp))]
d = dict(zip(names, imp.tolist()))
return dict(sorted(d.items(), key=lambda x: -x[1]))
if hasattr(model, "coef_"):
imp = np.abs(model.coef_).mean(axis=0) if model.coef_.ndim > 1 else np.abs(model.coef_)
names = feature_names or [f"f{i}" for i in range(len(imp))]
d = dict(zip(names, imp.tolist()))
return dict(sorted(d.items(), key=lambda x: -x[1]))
return {}
try:
shap_values = None
if model_type == "tree":
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
if isinstance(shap_values, list):
shap_values = np.mean([np.abs(s) for s in shap_values], axis=0)
elif model_type == "linear":
bg = X[:min(n_background, len(X))]
explainer = shap.LinearExplainer(model, bg)
shap_values = explainer.shap_values(X)
else:
bg = X[:min(n_background, len(X))]
def pred_fn(x):
if hasattr(model, "predict_proba"):
return model.predict_proba(x)
return model.predict(x)
explainer = shap.KernelExplainer(pred_fn, bg)
shap_values = explainer.shap_values(X[:min(50, len(X))], nsamples=50)
if isinstance(shap_values, list):
shap_values = np.mean([np.abs(s) for s in shap_values], axis=0)
if shap_values is None:
return {}
mean_abs = np.abs(shap_values).mean(axis=0)
names = feature_names or [f"f{i}" for i in range(len(mean_abs))]
d = {n: float(v) for n, v in zip(names, mean_abs)}
return dict(sorted(d.items(), key=lambda x: -x[1]))
except Exception as e:
print(f" [SHAP error: {e}]")
return {}
def print_feature_importance(importance: Dict[str, float], top_k: int = 15):
if not importance:
print(" No feature importance available.")
return
top = list(importance.items())[:top_k]
max_val = max(v for _, v in top) or 1e-9
print(f"\n Top {len(top)} Feature Importances (SHAP / model-based):")
for i, (name, val) in enumerate(top, 1):
bar = "█" * max(1, int(val / max_val * 30))
print(f" {i:2d}. {name:<30s} {val:.4f} {bar}")