File size: 11,157 Bytes
b72652e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
import streamlit as st
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import os
import xgboost as xgb
from src.models.explain import generate_shap_explanation
from src.features.build_features import calculate_risk_score

st.set_page_config(page_title="FairValue Transfer Cap Estimator", layout="wide")

# ── Currency Config ───────────────────────────────────────────────────────────
EUR_TO_GBP = 0.85  # Approximate conversion — review quarterly

st.title("FairValue Transfer Cap Estimator")
st.markdown(
    "A rigorous, data-driven 'Transfer Ceiling Calculator' grounded in ML "
    "and Hedonic Pricing Theory."
)


# ── Data Loading ─────────────────────────────────────────────────────────────
# Fixed: was @st.cache_resource — mutations persisted across user sessions.
# @st.cache_data correctly serialises DataFrames per session.
@st.cache_data
def load_player_data():
    """Loads processed player features CSV. Returns None if file not found."""
    df_path = "data/processed/app_features.csv"
    if not os.path.exists(df_path):
        return None
    df = pd.read_csv(df_path)
    mv_rename_map = {
        col: 'market_value_in_eur'
        for col in df.columns
        if 'market' in col.lower() and 'value' in col.lower()
    }
    if mv_rename_map:
        df.rename(columns=mv_rename_map, inplace=True)
    df = df.loc[:, ~df.columns.duplicated()].copy()
    return df


# ── Model Loading ─────────────────────────────────────────────────────────────
# @st.cache_resource is correct here — the model object is shared, not copied.
@st.cache_resource
def load_model():
    """Loads XGBoost model from disk. Returns (model, size_bytes, path)."""
    xgb_model = xgb.XGBRegressor()
    for path in ["fairvalue_xgboost.json", "FairValue_xgboost.json"]:
        if os.path.exists(path):
            size = os.path.getsize(path)
            xgb_model.load_model(path)
            return xgb_model, size, path
    return None, 0, None


df = load_player_data()
model, model_size, model_path = load_model()

if df is None:
    st.error("⚠️ Data file not found: `data/processed/app_features.csv`. Re-run the pipeline.")
    st.stop()

if model is None:
    st.error("⚠️ MODEL_FILE_NOT_FOUND — ensure `fairvalue_xgboost.json` is in the project root.")
    st.stop()

if model_size < 1000:
    st.error(
        f"❌ MODEL CORRUPTED: `{model_path}` is only {model_size:,} bytes "
        "(expected ~400 KB). Please re-upload the correct model file."
    )
    st.stop()  # Fixed: was missing — execution continued and produced silent zero predictions


# ── Sidebar ───────────────────────────────────────────────────────────────────
st.sidebar.header("Player Transfer Profile")
input_mode = st.sidebar.radio("Input Mode", ["Select Existing Player", "Create Custom Player"])

name_col = next(
    (c for c in ['name', 'name_x', 'Player_Name', 'Name'] if c in df.columns), None
)

selected_name = ""

if input_mode == "Select Existing Player":
    if name_col is None:
        st.error("No player name column found in data. Please re-run the pipeline.")
        st.stop()
    player_list = sorted(df[name_col].astype(str).unique().tolist())
    selected_name = st.sidebar.selectbox("Target Database Player", player_list)
    player_data = df[df[name_col].astype(str) == selected_name].iloc[0:1].copy()

    contract_years = st.sidebar.slider(
        "Contract Years Remaining", 0.5, 6.0,
        float(player_data['Contract_Years_Left'].iloc[0]), 0.5
    )
    age = st.sidebar.slider("Age", 16, 40, int(player_data['Age'].iloc[0]))
    inj_col = next(
        (c for c in ['Injury_Days_Total_24m', 'Injury_Days'] if c in player_data.columns), None
    )
    injuries = st.sidebar.number_input(
        "Injury missed days (24m)", 0, 500,
        int(player_data[inj_col].iloc[0]) if inj_col else 10
    )

else:
    player_data = df.median(numeric_only=True).to_frame().T
    contract_years = st.sidebar.slider("Contract Years Remaining", 0.5, 6.0, 2.0, 0.5)
    age = st.sidebar.slider("Age", 16, 40, 24)
    injuries = st.sidebar.number_input("Injury missed days (24m)", 0, 500, 10)

    if 'market_value_in_eur' in player_data.columns:
        m_val = st.sidebar.number_input("Current Market Value Estimation (£m)", 1.0, 200.0, 20.0)
        player_data['market_value_in_eur'] = (m_val * 1_000_000) / EUR_TO_GBP

asking_price = st.sidebar.number_input("Selling Club Asking Price (£m)", 1.0, 300.0, 45.0)


# ── Hype Factor Integration (consumed from Page 3 Live Player Intel) ──────────
# Fixed: was never read — hard_cap calculation ignored session_state entirely.
hype_all = st.session_state.get('player_hype_metrics', {})
hype_entry = hype_all.get(selected_name.lower(), {}) if selected_name else {}
hype_premium_pct = hype_entry.get('hype_premium_percent', 0.0)

if hype_premium_pct != 0.0:
    sign = "+" if hype_premium_pct > 0 else ""
    st.sidebar.info(
        f"💡 **Hype Factor Active:** {sign}{hype_premium_pct:.1f}%  \n"
        "*Run Page 3 → Live Player Intel to refresh.*"
    )
else:
    st.sidebar.caption("No Hype Factor loaded. Run Page 3 to enrich this estimate.")


# ── Build Inference Vector ─────────────────────────────────────────────────────
expected_cols = model.feature_names_in_
player_data = player_data.copy()
player_data['Contract_Years_Left'] = contract_years
player_data['Age'] = age
if 'Injury_Days_Total_24m' in player_data.columns:
    player_data['Injury_Days_Total_24m'] = injuries

# Recompute derived risk flags so they reflect the slider values, not stale DB data.
# Without this, Risk_Contract / Risk_Age / Risk_Injury don't update when sliders move.
player_data = calculate_risk_score(
    player_data,
    contract_col='Contract_Years_Left',
    age_col='Age',
    injury_col='Injury_Days_Total_24m'
)

X_infer = player_data.reindex(columns=expected_cols, fill_value=0)


if st.button("Calculate Prediction", type="primary"):
    raw_preds = model.predict(X_infer)
    log_pv = raw_preds[0]
    baseline_pv = max(float(np.expm1(log_pv)), 0.0)

    baseline_pv_m = baseline_pv / 1_000_000
    conservative_bound = baseline_pv * 0.85

    # Internal rule-based risk discount (transparent to users)
    risk_pct = (
        (0.20 if contract_years < 1.5 else 0.0) +
        (0.15 if age > 30 else 0.0) +
        (0.10 if injuries > 60 else 0.0)
    )

    # External hype multiplier from Page 3 NLP sentiment
    hype_multiplier = 1.0 + (hype_premium_pct / 100.0)
    hard_cap = conservative_bound * (1.0 - risk_pct) * hype_multiplier
    hard_cap_m = hard_cap / 1_000_000

    col1, col2 = st.columns([2, 1])

    with col1:
        st.subheader("Price Range Recommendation")
        fig = go.Figure(go.Indicator(
            mode="gauge+number+delta",
            value=asking_price,
            delta={'reference': hard_cap_m, 'increasing': {'color': "red"}},
            title={'text': "Asking Price vs Hard Cap (£m)"},
            gauge={
                'axis': {'range': [0, max(asking_price * 1.2, 100)]},
                'threshold': {'line': {'color': "white", 'width': 4}, 'value': hard_cap_m}
            }
        ))
        st.plotly_chart(fig, use_container_width=True)

    with col2:
        st.subheader("Metrics Breakdown")
        st.metric("Predicted Market Value (Baseline)", f"£{baseline_pv_m:.1f}m")
        st.metric("Risk-Adjusted Hard Cap", f"£{hard_cap_m:.1f}m")
        if hype_premium_pct != 0.0:
            sign = "+" if hype_premium_pct > 0 else ""
            st.metric("Hype / Form Adjustment", f"{sign}{hype_premium_pct:.1f}%")
        if asking_price > hard_cap_m:
            overpay = asking_price - hard_cap_m
            st.error(f"⚠️ Winner's Curse Risk: £{overpay:.1f}m Overpay")
        else:
            st.success("✅ Asking price is within Fair Value bounds.")

    # ── SHAP Explainability Panel ─────────────────────────────────────────────
    # Fixed: shap was imported but never used. Now wired to generate_shap_explanation.
    st.markdown("---")
    st.subheader("🔬 XAI Explainability — What Drives This Valuation?")
    with st.spinner("Computing SHAP feature contributions..."):
        try:
            _, explanation_df = generate_shap_explanation(model, X_infer)
            top_shap = explanation_df.head(10).copy()
            top_shap['Label'] = top_shap['Feature'].str.replace('_', ' ').str.title()

            fig_shap = go.Figure(go.Bar(
                x=top_shap['Contribution_to_LogPrice'],
                y=top_shap['Label'],
                orientation='h',
                marker_color=[
                    '#e74c3c' if v < 0 else '#2ecc71'
                    for v in top_shap['Contribution_to_LogPrice']
                ],
                text=[f"{v:+.3f}" for v in top_shap['Contribution_to_LogPrice']],
                textposition='outside',
            ))
            fig_shap.update_layout(
                title="Top 10 Feature Contributions to Transfer Fee (Log-Price Scale)",
                xaxis_title="SHAP Value (Additive impact on log transfer fee)",
                yaxis={'categoryorder': 'total ascending'},
                height=420,
                margin=dict(l=10, r=10, t=50, b=10),
            )
            st.plotly_chart(fig_shap, use_container_width=True)
            st.caption(
                "🟢 Green = boosts transfer value | 🔴 Red = depresses transfer value | "
                "Sorted by absolute magnitude."
            )
        except Exception as shap_err:
            st.warning(f"SHAP panel unavailable: {shap_err}")

    with st.expander("🛠️ Technical Deep Dive (Internal Metrics)"):
        st.write(f"**Model:** `{model_path}` ({model_size:,} bytes)")
        st.write(f"**Raw Log-Scale Prediction:** `{log_pv:.4f}`")
        mv_val = (
            X_infer['market_value_in_eur'].iloc[0]
            if 'market_value_in_eur' in X_infer.columns else "MISSING"
        )
        if isinstance(mv_val, (int, float)):
            st.write(f"**Market Value Input (EUR):** `{mv_val:,.0f}`")
        else:
            st.write(f"**Market Value Input:** `{mv_val}`")
        st.write("**Full Feature Vector:**", X_infer)

st.markdown("---")
st.subheader("Model Performance Assessment")
st.markdown(f"**Training Set:** `{len(df):,}` transfers | **Engine:** XGBoost + RandomizedSearchCV")
st.markdown("**Validation MAE:** `~£23,980,000` | **Status:** Production Ready")