fairvalue-api / src /data /preprocess.py
FairValue
feat: production web app — React/Vite frontend + FastAPI backend with Render/Vercel deployment
b72652e
import pandas as pd
import numpy as np
# A basic historical inflation index for football transfers
# Base Year: 2024 (Index = 1.00)
# (e.g., fees in 2017 are multiplied by 1.50 to equal 2024 purchasing power)
INFLATION_RATES = {
2015: 2.10,
2016: 1.85,
2017: 1.50, # Pre-Neymar era adjustment
2018: 1.40,
2019: 1.35,
2020: 1.25, # COVID market impact
2021: 1.20,
2022: 1.10,
2023: 1.05,
2024: 1.00
}
def adjust_for_inflation(df, fee_col='market_value_in_eur', year_col='date'):
"""
Adjusts past transfer fees or market values to 2024 equivalent.
Converts EUR to GBP using an approximated flat rate for the MVP.
"""
df = df.copy()
# Process dates to extract the year
if df[year_col].dtype == 'object':
df['Transfer_Year'] = pd.to_datetime(df[year_col]).dt.year
else:
df['Transfer_Year'] = df[year_col]
df['Inflation_Multiplier'] = df['Transfer_Year'].map(INFLATION_RATES).fillna(1.0)
# Adjust to 2024 values
df['Adjusted_Fee_EUR'] = df[fee_col] * df['Inflation_Multiplier']
# Convert EUR to GBP (approx. 0.85 rate)
df['Transfer_Fee_2024_GBP'] = df['Adjusted_Fee_EUR'] * 0.85
return df
def clean_data(df):
"""
Handles missing values and basic cleanup.
"""
# Future specific cleaning logic goes here
# Example: Impute missing minutes played with 0
if 'minutes_played' in df.columns:
df['minutes_played'] = df['minutes_played'].fillna(0)
return df