Spaces:
Running
Running
File size: 2,461 Bytes
b72652e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | import pandas as pd
import numpy as np
# Mock UEFA Coefficients for League Strength Indexing
# In a full production environment, this would be dynamically loaded
LEAGUE_COEFFICIENTS = {
'Premier League': 1.0,
'LaLiga': 0.90,
'Serie A': 0.85,
'Bundesliga': 0.82,
'Ligue 1': 0.75,
'Eredivisie': 0.60,
'Liga Portugal': 0.55
}
def calculate_risk_score(df, contract_col='Contract_Years_Left', age_col='Age', injury_col='Injury_Days_Total_24m'):
"""
Computes binary risk flags based on the project blueprint for "The Busquets Factor".
"""
df = df.copy()
# 1. Contract Risk: Entering final 12-18 months
if contract_col in df.columns:
df['Risk_Contract'] = np.where(df[contract_col] < 1.5, 1, 0)
else:
df['Risk_Contract'] = 0
# 2. Age Risk: Over 30 years old
if age_col in df.columns:
df['Risk_Age'] = np.where(df[age_col] > 30, 1, 0)
else:
df['Risk_Age'] = 0
# 3. Injury Risk: Missed significant time (e.g., > 60 days)
if injury_col in df.columns:
df['Risk_Injury'] = np.where(df[injury_col] > 60, 1, 0)
else:
df['Risk_Injury'] = 0
# Aggregate Score [0 to 3]
df['Total_Risk_Score'] = df['Risk_Contract'] + df['Risk_Age'] + df['Risk_Injury']
return df
def calculate_league_index(df, league_col='Current_League'):
"""
Applies the League Strength multiplier based on UEFA coefficients.
"""
df = df.copy()
if league_col in df.columns:
df['League_Index'] = df[league_col].map(LEAGUE_COEFFICIENTS).fillna(0.4) # Default tier 3
return df
def encode_categorical_features(df, position_col='Position', nationality_tier_col='Nationality_Tier'):
"""
Performs One-Hot Encoding for positions and handles Nationality Tiers.
"""
df = df.copy()
if position_col in df.columns:
# Prevent completely exploding dimensions if many obscure positions exist
df = pd.get_dummies(df, columns=[position_col], prefix='Pos')
if nationality_tier_col in df.columns:
# e.g., Tier 1 = EU5/SA, Tier 2 = Rest of World
df[nationality_tier_col] = df[nationality_tier_col].astype(int)
return df
def build_all_features(df):
"""
Orchestrates the feature engineering pipeline.
"""
df = calculate_risk_score(df)
df = calculate_league_index(df)
df = encode_categorical_features(df)
return df
|