| """ |
| utils/config.py — centralised config for HF Spaces deployment. |
| API key is read from the USDA_API_KEY environment variable / Space Secret. |
| Model and cache paths are relative to the Space working directory. |
| """ |
|
|
| import os |
| from pathlib import Path |
| from dataclasses import dataclass, field |
| from typing import List |
|
|
| ROOT_DIR = Path(__file__).parent.parent |
| CACHE_DIR = ROOT_DIR / "cache" |
| MODELS_DIR = ROOT_DIR / "models" |
| CACHE_DIR.mkdir(exist_ok=True) |
| MODELS_DIR.mkdir(exist_ok=True) |
|
|
|
|
| @dataclass |
| class SpeechConfig: |
| backend: str = "whisper" |
| whisper_model_size: str = "base" |
| whisper_language: str = "en" |
| whisper_device: str = "cpu" |
| wav2vec2_model: str = "facebook/wav2vec2-base-960h" |
| sample_rate: int = 16000 |
| max_audio_duration_sec: int = 120 |
|
|
|
|
| @dataclass |
| class NLPConfig: |
| spacy_model: str = "en_core_web_sm" |
| use_transformer_ner: bool = False |
| cooking_methods: List[str] = field(default_factory=lambda: [ |
| "fried", "deep-fried", "pan-fried", "stir-fried", |
| "baked", "roasted", "grilled", "broiled", |
| "boiled", "steamed", "poached", "simmered", |
| "sautéed", "sauteed", "braised", "slow-cooked", |
| "raw", "fresh", "smoked", "cured", |
| ]) |
| cooking_method_scores: dict = field(default_factory=lambda: { |
| "raw": 0.0, "steamed": 0.1, "poached": 0.1, "boiled": 0.2, |
| "grilled": 0.2, "broiled": 0.25, "baked": 0.3, "roasted": 0.35, |
| "sauteed": 0.45, "sautéed": 0.45, "simmered": 0.4, "braised": 0.4, |
| "slow-cooked": 0.35, "smoked": 0.5, "cured": 0.6, |
| "stir-fried": 0.55, "pan-fried": 0.65, |
| "fried": 0.85, "deep-fried": 1.0, |
| }) |
|
|
|
|
| @dataclass |
| class NutritionConfig: |
| |
| usda_api_key: str = field(default_factory=lambda: os.getenv("USDA_API_KEY", "WIb7iBd7cI6lvOVT7udHKBknWNtW9yArpBs4CfFA")) |
| usda_base_url: str = "https://api.nal.usda.gov/fdc/v1" |
| cache_file: Path = field(default_factory=lambda: CACHE_DIR / "nutrition_cache.json") |
| use_cache: bool = True |
| default_serving_g: float = 100.0 |
| nutrient_keys: List[str] = field(default_factory=lambda: [ |
| "calories", "total_fat", "saturated_fat", |
| "protein", "carbohydrates", "sugar", "fiber", "sodium", |
| ]) |
|
|
|
|
| @dataclass |
| class ClassifierConfig: |
| model_type: str = "random_forest" |
| model_path: Path = field(default_factory=lambda: MODELS_DIR / "health_classifier.joblib") |
| scaler_path: Path = field(default_factory=lambda: MODELS_DIR / "feature_scaler.joblib") |
| label_thresholds: dict = field(default_factory=lambda: { |
| "Healthy": (7, 10), "Moderately Healthy": (4, 7), "Unhealthy": (0, 4), |
| }) |
| xgb_params: dict = field(default_factory=lambda: { |
| "n_estimators": 200, "max_depth": 6, "learning_rate": 0.05, |
| "subsample": 0.8, "colsample_bytree": 0.8, "eval_metric": "mlogloss", |
| "random_state": 42, |
| }) |
| lgbm_params: dict = field(default_factory=lambda: { |
| "n_estimators": 200, "max_depth": 6, "learning_rate": 0.05, |
| "subsample": 0.8, "colsample_bytree": 0.8, "random_state": 42, "verbose": -1, |
| }) |
| rf_params: dict = field(default_factory=lambda: { |
| "n_estimators": 200, "max_depth": 8, "min_samples_split": 5, |
| "random_state": 42, "n_jobs": -1, |
| }) |
| daily_recommended: dict = field(default_factory=lambda: { |
| "calories": 2000, "total_fat": 78, "saturated_fat": 20, |
| "protein": 50, "carbohydrates": 275, "sugar": 50, |
| "fiber": 28, "sodium": 2300, |
| }) |
|
|
|
|
| @dataclass |
| class AppConfig: |
| speech: SpeechConfig = field(default_factory=SpeechConfig) |
| nlp: NLPConfig = field(default_factory=NLPConfig) |
| nutrition: NutritionConfig = field(default_factory=NutritionConfig) |
| classifier: ClassifierConfig = field(default_factory=ClassifierConfig) |
| default_servings: int = 4 |
| debug: bool = False |
| log_level: str = "INFO" |
|
|
|
|
| config = AppConfig() |
|
|