""" utils/config.py — centralised config for HF Spaces deployment. API key is read from the USDA_API_KEY environment variable / Space Secret. Model and cache paths are relative to the Space working directory. """ import os from pathlib import Path from dataclasses import dataclass, field from typing import List ROOT_DIR = Path(__file__).parent.parent CACHE_DIR = ROOT_DIR / "cache" MODELS_DIR = ROOT_DIR / "models" CACHE_DIR.mkdir(exist_ok=True) MODELS_DIR.mkdir(exist_ok=True) @dataclass class SpeechConfig: backend: str = "whisper" whisper_model_size: str = "base" # base balances accuracy vs speed for Hindi translation whisper_language: str = "en" whisper_device: str = "cpu" wav2vec2_model: str = "facebook/wav2vec2-base-960h" sample_rate: int = 16000 max_audio_duration_sec: int = 120 @dataclass class NLPConfig: spacy_model: str = "en_core_web_sm" use_transformer_ner: bool = False cooking_methods: List[str] = field(default_factory=lambda: [ "fried", "deep-fried", "pan-fried", "stir-fried", "baked", "roasted", "grilled", "broiled", "boiled", "steamed", "poached", "simmered", "sautéed", "sauteed", "braised", "slow-cooked", "raw", "fresh", "smoked", "cured", ]) cooking_method_scores: dict = field(default_factory=lambda: { "raw": 0.0, "steamed": 0.1, "poached": 0.1, "boiled": 0.2, "grilled": 0.2, "broiled": 0.25, "baked": 0.3, "roasted": 0.35, "sauteed": 0.45, "sautéed": 0.45, "simmered": 0.4, "braised": 0.4, "slow-cooked": 0.35, "smoked": 0.5, "cured": 0.6, "stir-fried": 0.55, "pan-fried": 0.65, "fried": 0.85, "deep-fried": 1.0, }) @dataclass class NutritionConfig: # Read from HF Space Secret → environment variable usda_api_key: str = field(default_factory=lambda: os.getenv("USDA_API_KEY", "WIb7iBd7cI6lvOVT7udHKBknWNtW9yArpBs4CfFA")) usda_base_url: str = "https://api.nal.usda.gov/fdc/v1" cache_file: Path = field(default_factory=lambda: CACHE_DIR / "nutrition_cache.json") use_cache: bool = True default_serving_g: float = 100.0 nutrient_keys: List[str] = field(default_factory=lambda: [ "calories", "total_fat", "saturated_fat", "protein", "carbohydrates", "sugar", "fiber", "sodium", ]) @dataclass class ClassifierConfig: model_type: str = "random_forest" model_path: Path = field(default_factory=lambda: MODELS_DIR / "health_classifier.joblib") scaler_path: Path = field(default_factory=lambda: MODELS_DIR / "feature_scaler.joblib") label_thresholds: dict = field(default_factory=lambda: { "Healthy": (7, 10), "Moderately Healthy": (4, 7), "Unhealthy": (0, 4), }) xgb_params: dict = field(default_factory=lambda: { "n_estimators": 200, "max_depth": 6, "learning_rate": 0.05, "subsample": 0.8, "colsample_bytree": 0.8, "eval_metric": "mlogloss", "random_state": 42, }) lgbm_params: dict = field(default_factory=lambda: { "n_estimators": 200, "max_depth": 6, "learning_rate": 0.05, "subsample": 0.8, "colsample_bytree": 0.8, "random_state": 42, "verbose": -1, }) rf_params: dict = field(default_factory=lambda: { "n_estimators": 200, "max_depth": 8, "min_samples_split": 5, "random_state": 42, "n_jobs": -1, }) daily_recommended: dict = field(default_factory=lambda: { "calories": 2000, "total_fat": 78, "saturated_fat": 20, "protein": 50, "carbohydrates": 275, "sugar": 50, "fiber": 28, "sodium": 2300, }) @dataclass class AppConfig: speech: SpeechConfig = field(default_factory=SpeechConfig) nlp: NLPConfig = field(default_factory=NLPConfig) nutrition: NutritionConfig = field(default_factory=NutritionConfig) classifier: ClassifierConfig = field(default_factory=ClassifierConfig) default_servings: int = 4 debug: bool = False log_level: str = "INFO" config = AppConfig()