File size: 2,461 Bytes
b72652e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import numpy as np

# Mock UEFA Coefficients for League Strength Indexing
# In a full production environment, this would be dynamically loaded
LEAGUE_COEFFICIENTS = {
    'Premier League': 1.0,
    'LaLiga': 0.90,
    'Serie A': 0.85,
    'Bundesliga': 0.82,
    'Ligue 1': 0.75,
    'Eredivisie': 0.60,
    'Liga Portugal': 0.55
}

def calculate_risk_score(df, contract_col='Contract_Years_Left', age_col='Age', injury_col='Injury_Days_Total_24m'):
    """
    Computes binary risk flags based on the project blueprint for "The Busquets Factor".
    """
    df = df.copy()
    
    # 1. Contract Risk: Entering final 12-18 months
    if contract_col in df.columns:
        df['Risk_Contract'] = np.where(df[contract_col] < 1.5, 1, 0)
    else:
        df['Risk_Contract'] = 0
        
    # 2. Age Risk: Over 30 years old
    if age_col in df.columns:
        df['Risk_Age'] = np.where(df[age_col] > 30, 1, 0)
    else:
        df['Risk_Age'] = 0
        
    # 3. Injury Risk: Missed significant time (e.g., > 60 days)
    if injury_col in df.columns:
        df['Risk_Injury'] = np.where(df[injury_col] > 60, 1, 0)
    else:
        df['Risk_Injury'] = 0
        
    # Aggregate Score [0 to 3]
    df['Total_Risk_Score'] = df['Risk_Contract'] + df['Risk_Age'] + df['Risk_Injury']
    
    return df

def calculate_league_index(df, league_col='Current_League'):
    """
    Applies the League Strength multiplier based on UEFA coefficients.
    """
    df = df.copy()
    if league_col in df.columns:
        df['League_Index'] = df[league_col].map(LEAGUE_COEFFICIENTS).fillna(0.4) # Default tier 3
    return df

def encode_categorical_features(df, position_col='Position', nationality_tier_col='Nationality_Tier'):
    """
    Performs One-Hot Encoding for positions and handles Nationality Tiers.
    """
    df = df.copy()
    if position_col in df.columns:
        # Prevent completely exploding dimensions if many obscure positions exist
        df = pd.get_dummies(df, columns=[position_col], prefix='Pos')
        
    if nationality_tier_col in df.columns:
        # e.g., Tier 1 = EU5/SA, Tier 2 = Rest of World
        df[nationality_tier_col] = df[nationality_tier_col].astype(int)
        
    return df

def build_all_features(df):
    """
    Orchestrates the feature engineering pipeline.
    """
    df = calculate_risk_score(df)
    df = calculate_league_index(df)
    df = encode_categorical_features(df)
    return df