File size: 3,008 Bytes
211c37c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Explainability: SHAP feature importance (optional)."""

import numpy as np
from typing import Any, Dict, List, Optional

try:
    import shap
    HAS_SHAP = True
except ImportError:
    HAS_SHAP = False


def compute_shap_importance(model, X, feature_names=None, model_type="tree",
                             task_type="classification", n_background=100):
    if not HAS_SHAP:
        print("  [SHAP not installed – using model feature_importances_ if available]")
        # Fallback: use sklearn feature_importances_ if available
        if hasattr(model, "feature_importances_"):
            imp = model.feature_importances_
            names = feature_names or [f"f{i}" for i in range(len(imp))]
            d = dict(zip(names, imp.tolist()))
            return dict(sorted(d.items(), key=lambda x: -x[1]))
        if hasattr(model, "coef_"):
            imp = np.abs(model.coef_).mean(axis=0) if model.coef_.ndim > 1 else np.abs(model.coef_)
            names = feature_names or [f"f{i}" for i in range(len(imp))]
            d = dict(zip(names, imp.tolist()))
            return dict(sorted(d.items(), key=lambda x: -x[1]))
        return {}

    try:
        shap_values = None
        if model_type == "tree":
            explainer = shap.TreeExplainer(model)
            shap_values = explainer.shap_values(X)
            if isinstance(shap_values, list):
                shap_values = np.mean([np.abs(s) for s in shap_values], axis=0)
        elif model_type == "linear":
            bg = X[:min(n_background, len(X))]
            explainer = shap.LinearExplainer(model, bg)
            shap_values = explainer.shap_values(X)
        else:
            bg = X[:min(n_background, len(X))]
            def pred_fn(x):
                if hasattr(model, "predict_proba"):
                    return model.predict_proba(x)
                return model.predict(x)
            explainer = shap.KernelExplainer(pred_fn, bg)
            shap_values = explainer.shap_values(X[:min(50, len(X))], nsamples=50)
            if isinstance(shap_values, list):
                shap_values = np.mean([np.abs(s) for s in shap_values], axis=0)

        if shap_values is None:
            return {}
        mean_abs = np.abs(shap_values).mean(axis=0)
        names = feature_names or [f"f{i}" for i in range(len(mean_abs))]
        d = {n: float(v) for n, v in zip(names, mean_abs)}
        return dict(sorted(d.items(), key=lambda x: -x[1]))
    except Exception as e:
        print(f"  [SHAP error: {e}]")
        return {}


def print_feature_importance(importance: Dict[str, float], top_k: int = 15):
    if not importance:
        print("  No feature importance available.")
        return
    top = list(importance.items())[:top_k]
    max_val = max(v for _, v in top) or 1e-9
    print(f"\n  Top {len(top)} Feature Importances (SHAP / model-based):")
    for i, (name, val) in enumerate(top, 1):
        bar = "█" * max(1, int(val / max_val * 30))
        print(f"  {i:2d}. {name:<30s} {val:.4f} {bar}")