| from typing import Any, Dict, List |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| MODEL_FEATURES = [ |
| 'General_Health', 'Checkup', 'Exercise', 'Skin_Cancer', 'Other_Cancer', |
| 'Depression', 'Diabetes', 'Arthritis', 'Sex', 'Age', 'Height', 'Weight', |
| 'BMI', 'Smoking', 'Alcohol', 'Fruit', 'Green_Vegetables', 'Fried_Potato' |
| ] |
|
|
| SCALERS = { |
| 'Weight': {'min': 29.94, 'max': 136.0}, |
| 'Height': {'min': 142.0, 'max': 200.0}, |
| 'BMI': {'min': 12.87, 'max': 43.28}, |
| 'Age': {'min': 21, 'max': 82}, |
| 'Fruit': {'min': 0, 'max': 56.0}, |
| 'Green_Vegetables': {'min': 0, 'max': 44.0}, |
| 'Fried_Potato': {'min': 0, 'max': 17.0}, |
| 'Alcohol': {'min': 0, 'max': 15.0}, |
| 'General_Health': { |
| 'Excellent': 1.0, 'Very_Good': 0.75, 'Good': 0.5, 'Fair': 0.25, 'Poor': 0.0 |
| }, |
| 'Checkup': { |
| 'Within 1 year': 1.0, '1-2 years': 0.75, '2-5 years': 0.5, '5+ years': 0.25, 'Never': 0.0 |
| }, |
| 'Diabetes': { |
| 'No': 0.0, 'Borderline': 0.33, 'During Pregnancy': 0.66, 'Yes': 1.0 |
| }, |
| 'Sex': {'Female': 0.0, 'Male': 1.0, '0': 0.0, '1': 1.0}, |
| 'Exercise': {'0': 0.0, '1': 1.0}, |
| 'Smoking': {'0': 0.0, '1': 1.0}, |
| 'Skin_Cancer': {'0': 0.0, '1': 1.0}, |
| 'Other_Cancer': {'0': 0.0, '1': 1.0}, |
| 'Depression': {'0': 0.0, '1': 1.0}, |
| 'Arthritis': {'0': 0.0, '1': 1.0} |
| } |
|
|
| class PreprocessingService: |
| @staticmethod |
| def process_input(data: Dict[str, Any]) -> List[float]: |
| input_vector = [] |
| for feature in MODEL_FEATURES: |
| raw_val = data.get(feature) |
| processed_val = PreprocessingService._process_single_value(raw_val, feature) |
| input_vector.append(processed_val) |
| return input_vector |
|
|
| @staticmethod |
| def _process_single_value(value: Any, feature_name: str) -> float: |
| val_str = str(value).lower() |
| |
| if feature_name in SCALERS: |
| config = SCALERS[feature_name] |
| |
| |
| if isinstance(config, dict) and 'min' in config: |
| try: |
| val = float(value) |
| norm_val = (val - config['min']) / (config['max'] - config['min']) |
| return max(0.0, min(1.0, norm_val)) |
| except (ValueError, TypeError): |
| logger.warning(f"Failed to convert numeric value '{value}' for feature '{feature_name}'. Defaulting to 0.0.") |
| return 0.0 |
| |
| |
| elif isinstance(config, dict): |
| |
| if str(value) in config: |
| return config[str(value)] |
| |
| |
| |
| if val_str in ['on', 'true', 'yes']: return 1.0 |
| if val_str in ['off', 'false', 'no']: return 0.0 |
| |
| logger.warning(f"Unknown categorical value '{value}' for feature '{feature_name}'. Defaulting to 0.0.") |
| return 0.0 |
| |
| |
| try: |
| return float(value) |
| except: |
| return 0.0 |
|
|