File size: 4,097 Bytes
f75c5b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3fc1ff
f75c5b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
utils/config.py  —  centralised config for HF Spaces deployment.
API key is read from the USDA_API_KEY environment variable / Space Secret.
Model and cache paths are relative to the Space working directory.
"""

import os
from pathlib import Path
from dataclasses import dataclass, field
from typing import List

ROOT_DIR    = Path(__file__).parent.parent
CACHE_DIR   = ROOT_DIR / "cache"
MODELS_DIR  = ROOT_DIR / "models"
CACHE_DIR.mkdir(exist_ok=True)
MODELS_DIR.mkdir(exist_ok=True)


@dataclass
class SpeechConfig:
    backend: str            = "whisper"
    whisper_model_size: str = "base"          # base balances accuracy vs speed for Hindi translation
    whisper_language: str   = "en"
    whisper_device: str     = "cpu"
    wav2vec2_model: str     = "facebook/wav2vec2-base-960h"
    sample_rate: int        = 16000
    max_audio_duration_sec: int = 120


@dataclass
class NLPConfig:
    spacy_model: str        = "en_core_web_sm"
    use_transformer_ner: bool = False
    cooking_methods: List[str] = field(default_factory=lambda: [
        "fried", "deep-fried", "pan-fried", "stir-fried",
        "baked", "roasted", "grilled", "broiled",
        "boiled", "steamed", "poached", "simmered",
        "sautéed", "sauteed", "braised", "slow-cooked",
        "raw", "fresh", "smoked", "cured",
    ])
    cooking_method_scores: dict = field(default_factory=lambda: {
        "raw": 0.0, "steamed": 0.1, "poached": 0.1, "boiled": 0.2,
        "grilled": 0.2, "broiled": 0.25, "baked": 0.3, "roasted": 0.35,
        "sauteed": 0.45, "sautéed": 0.45, "simmered": 0.4, "braised": 0.4,
        "slow-cooked": 0.35, "smoked": 0.5, "cured": 0.6,
        "stir-fried": 0.55, "pan-fried": 0.65,
        "fried": 0.85, "deep-fried": 1.0,
    })


@dataclass
class NutritionConfig:
    # Read from HF Space Secret → environment variable
    usda_api_key: str  = field(default_factory=lambda: os.getenv("USDA_API_KEY", "WIb7iBd7cI6lvOVT7udHKBknWNtW9yArpBs4CfFA"))
    usda_base_url: str = "https://api.nal.usda.gov/fdc/v1"
    cache_file: Path   = field(default_factory=lambda: CACHE_DIR / "nutrition_cache.json")
    use_cache: bool    = True
    default_serving_g: float = 100.0
    nutrient_keys: List[str] = field(default_factory=lambda: [
        "calories", "total_fat", "saturated_fat",
        "protein", "carbohydrates", "sugar", "fiber", "sodium",
    ])


@dataclass
class ClassifierConfig:
    model_type: str  = "random_forest"
    model_path: Path = field(default_factory=lambda: MODELS_DIR / "health_classifier.joblib")
    scaler_path: Path = field(default_factory=lambda: MODELS_DIR / "feature_scaler.joblib")
    label_thresholds: dict = field(default_factory=lambda: {
        "Healthy": (7, 10), "Moderately Healthy": (4, 7), "Unhealthy": (0, 4),
    })
    xgb_params: dict = field(default_factory=lambda: {
        "n_estimators": 200, "max_depth": 6, "learning_rate": 0.05,
        "subsample": 0.8, "colsample_bytree": 0.8, "eval_metric": "mlogloss",
        "random_state": 42,
    })
    lgbm_params: dict = field(default_factory=lambda: {
        "n_estimators": 200, "max_depth": 6, "learning_rate": 0.05,
        "subsample": 0.8, "colsample_bytree": 0.8, "random_state": 42, "verbose": -1,
    })
    rf_params: dict = field(default_factory=lambda: {
        "n_estimators": 200, "max_depth": 8, "min_samples_split": 5,
        "random_state": 42, "n_jobs": -1,
    })
    daily_recommended: dict = field(default_factory=lambda: {
        "calories": 2000, "total_fat": 78, "saturated_fat": 20,
        "protein": 50, "carbohydrates": 275, "sugar": 50,
        "fiber": 28, "sodium": 2300,
    })


@dataclass
class AppConfig:
    speech: SpeechConfig         = field(default_factory=SpeechConfig)
    nlp: NLPConfig               = field(default_factory=NLPConfig)
    nutrition: NutritionConfig   = field(default_factory=NutritionConfig)
    classifier: ClassifierConfig = field(default_factory=ClassifierConfig)
    default_servings: int        = 4
    debug: bool                  = False
    log_level: str               = "INFO"


config = AppConfig()