Mohsin Khan
Initial commit for Hugging Face Docker Spaces
5a58b2b
from typing import Any, Dict, List
import logging
logger = logging.getLogger(__name__)
# Feature Configuration (18 Features) - EXACT ORDER
MODEL_FEATURES = [
'General_Health', 'Checkup', 'Exercise', 'Skin_Cancer', 'Other_Cancer',
'Depression', 'Diabetes', 'Arthritis', 'Sex', 'Age', 'Height', 'Weight',
'BMI', 'Smoking', 'Alcohol', 'Fruit', 'Green_Vegetables', 'Fried_Potato'
]
SCALERS = {
'Weight': {'min': 29.94, 'max': 136.0},
'Height': {'min': 142.0, 'max': 200.0},
'BMI': {'min': 12.87, 'max': 43.28},
'Age': {'min': 21, 'max': 82},
'Fruit': {'min': 0, 'max': 56.0},
'Green_Vegetables': {'min': 0, 'max': 44.0},
'Fried_Potato': {'min': 0, 'max': 17.0},
'Alcohol': {'min': 0, 'max': 15.0},
'General_Health': {
'Excellent': 1.0, 'Very_Good': 0.75, 'Good': 0.5, 'Fair': 0.25, 'Poor': 0.0
},
'Checkup': {
'Within 1 year': 1.0, '1-2 years': 0.75, '2-5 years': 0.5, '5+ years': 0.25, 'Never': 0.0
},
'Diabetes': {
'No': 0.0, 'Borderline': 0.33, 'During Pregnancy': 0.66, 'Yes': 1.0
},
'Sex': {'Female': 0.0, 'Male': 1.0, '0': 0.0, '1': 1.0},
'Exercise': {'0': 0.0, '1': 1.0},
'Smoking': {'0': 0.0, '1': 1.0},
'Skin_Cancer': {'0': 0.0, '1': 1.0},
'Other_Cancer': {'0': 0.0, '1': 1.0},
'Depression': {'0': 0.0, '1': 1.0},
'Arthritis': {'0': 0.0, '1': 1.0}
}
class PreprocessingService:
@staticmethod
def process_input(data: Dict[str, Any]) -> List[float]:
input_vector = []
for feature in MODEL_FEATURES:
raw_val = data.get(feature)
processed_val = PreprocessingService._process_single_value(raw_val, feature)
input_vector.append(processed_val)
return input_vector
@staticmethod
def _process_single_value(value: Any, feature_name: str) -> float:
val_str = str(value).lower()
if feature_name in SCALERS:
config = SCALERS[feature_name]
# Numeric Range Scaling
if isinstance(config, dict) and 'min' in config:
try:
val = float(value)
norm_val = (val - config['min']) / (config['max'] - config['min'])
return max(0.0, min(1.0, norm_val))
except (ValueError, TypeError):
logger.warning(f"Failed to convert numeric value '{value}' for feature '{feature_name}'. Defaulting to 0.0.")
return 0.0
# Categorical Logic
elif isinstance(config, dict):
# Exact match
if str(value) in config:
return config[str(value)]
# Check normalized key (e.g. for "Very_Good" vs "Very Good" if needed, though Pydantic handles validation)
# But let's keep the logic from original app just in case
if val_str in ['on', 'true', 'yes']: return 1.0
if val_str in ['off', 'false', 'no']: return 0.0
logger.warning(f"Unknown categorical value '{value}' for feature '{feature_name}'. Defaulting to 0.0.")
return 0.0
# Default fallback
try:
return float(value)
except:
return 0.0