Spaces:

he99codes
/

Tabular_AutoML

Running

App Files Files Community

Tabular_AutoML / automl /explainability /shap_explainer.py

he99codes

deploy automl

211c37c 3 days ago

raw

history blame contribute delete

3.01 kB

	"""Explainability: SHAP feature importance (optional)."""

	import numpy as np
	from typing import Any, Dict, List, Optional

	try:
	import shap
	HAS_SHAP = True
	except ImportError:
	HAS_SHAP = False


	def compute_shap_importance(model, X, feature_names=None, model_type="tree",
	task_type="classification", n_background=100):
	if not HAS_SHAP:
	print(" [SHAP not installed – using model feature_importances_ if available]")
	# Fallback: use sklearn feature_importances_ if available
	if hasattr(model, "feature_importances_"):
	imp = model.feature_importances_
	names = feature_names or [f"f{i}" for i in range(len(imp))]
	d = dict(zip(names, imp.tolist()))
	return dict(sorted(d.items(), key=lambda x: -x[1]))
	if hasattr(model, "coef_"):
	imp = np.abs(model.coef_).mean(axis=0) if model.coef_.ndim > 1 else np.abs(model.coef_)
	names = feature_names or [f"f{i}" for i in range(len(imp))]
	d = dict(zip(names, imp.tolist()))
	return dict(sorted(d.items(), key=lambda x: -x[1]))
	return {}

	try:
	shap_values = None
	if model_type == "tree":
	explainer = shap.TreeExplainer(model)
	shap_values = explainer.shap_values(X)
	if isinstance(shap_values, list):
	shap_values = np.mean([np.abs(s) for s in shap_values], axis=0)
	elif model_type == "linear":
	bg = X[:min(n_background, len(X))]
	explainer = shap.LinearExplainer(model, bg)
	shap_values = explainer.shap_values(X)
	else:
	bg = X[:min(n_background, len(X))]
	def pred_fn(x):
	if hasattr(model, "predict_proba"):
	return model.predict_proba(x)
	return model.predict(x)
	explainer = shap.KernelExplainer(pred_fn, bg)
	shap_values = explainer.shap_values(X[:min(50, len(X))], nsamples=50)
	if isinstance(shap_values, list):
	shap_values = np.mean([np.abs(s) for s in shap_values], axis=0)

	if shap_values is None:
	return {}
	mean_abs = np.abs(shap_values).mean(axis=0)
	names = feature_names or [f"f{i}" for i in range(len(mean_abs))]
	d = {n: float(v) for n, v in zip(names, mean_abs)}
	return dict(sorted(d.items(), key=lambda x: -x[1]))
	except Exception as e:
	print(f" [SHAP error: {e}]")
	return {}


	def print_feature_importance(importance: Dict[str, float], top_k: int = 15):
	if not importance:
	print(" No feature importance available.")
	return
	top = list(importance.items())[:top_k]
	max_val = max(v for _, v in top) or 1e-9
	print(f"\n Top {len(top)} Feature Importances (SHAP / model-based):")
	for i, (name, val) in enumerate(top, 1):
	bar = "█" * max(1, int(val / max_val * 30))
	print(f" {i:2d}. {name:<30s} {val:.4f} {bar}")