he99codes's picture
Deploying latest raw changes and full functionality
a3fc1ff
"""
utils/config.py — centralised config for HF Spaces deployment.
API key is read from the USDA_API_KEY environment variable / Space Secret.
Model and cache paths are relative to the Space working directory.
"""
import os
from pathlib import Path
from dataclasses import dataclass, field
from typing import List
ROOT_DIR = Path(__file__).parent.parent
CACHE_DIR = ROOT_DIR / "cache"
MODELS_DIR = ROOT_DIR / "models"
CACHE_DIR.mkdir(exist_ok=True)
MODELS_DIR.mkdir(exist_ok=True)
@dataclass
class SpeechConfig:
backend: str = "whisper"
whisper_model_size: str = "base" # base balances accuracy vs speed for Hindi translation
whisper_language: str = "en"
whisper_device: str = "cpu"
wav2vec2_model: str = "facebook/wav2vec2-base-960h"
sample_rate: int = 16000
max_audio_duration_sec: int = 120
@dataclass
class NLPConfig:
spacy_model: str = "en_core_web_sm"
use_transformer_ner: bool = False
cooking_methods: List[str] = field(default_factory=lambda: [
"fried", "deep-fried", "pan-fried", "stir-fried",
"baked", "roasted", "grilled", "broiled",
"boiled", "steamed", "poached", "simmered",
"sautéed", "sauteed", "braised", "slow-cooked",
"raw", "fresh", "smoked", "cured",
])
cooking_method_scores: dict = field(default_factory=lambda: {
"raw": 0.0, "steamed": 0.1, "poached": 0.1, "boiled": 0.2,
"grilled": 0.2, "broiled": 0.25, "baked": 0.3, "roasted": 0.35,
"sauteed": 0.45, "sautéed": 0.45, "simmered": 0.4, "braised": 0.4,
"slow-cooked": 0.35, "smoked": 0.5, "cured": 0.6,
"stir-fried": 0.55, "pan-fried": 0.65,
"fried": 0.85, "deep-fried": 1.0,
})
@dataclass
class NutritionConfig:
# Read from HF Space Secret → environment variable
usda_api_key: str = field(default_factory=lambda: os.getenv("USDA_API_KEY", "WIb7iBd7cI6lvOVT7udHKBknWNtW9yArpBs4CfFA"))
usda_base_url: str = "https://api.nal.usda.gov/fdc/v1"
cache_file: Path = field(default_factory=lambda: CACHE_DIR / "nutrition_cache.json")
use_cache: bool = True
default_serving_g: float = 100.0
nutrient_keys: List[str] = field(default_factory=lambda: [
"calories", "total_fat", "saturated_fat",
"protein", "carbohydrates", "sugar", "fiber", "sodium",
])
@dataclass
class ClassifierConfig:
model_type: str = "random_forest"
model_path: Path = field(default_factory=lambda: MODELS_DIR / "health_classifier.joblib")
scaler_path: Path = field(default_factory=lambda: MODELS_DIR / "feature_scaler.joblib")
label_thresholds: dict = field(default_factory=lambda: {
"Healthy": (7, 10), "Moderately Healthy": (4, 7), "Unhealthy": (0, 4),
})
xgb_params: dict = field(default_factory=lambda: {
"n_estimators": 200, "max_depth": 6, "learning_rate": 0.05,
"subsample": 0.8, "colsample_bytree": 0.8, "eval_metric": "mlogloss",
"random_state": 42,
})
lgbm_params: dict = field(default_factory=lambda: {
"n_estimators": 200, "max_depth": 6, "learning_rate": 0.05,
"subsample": 0.8, "colsample_bytree": 0.8, "random_state": 42, "verbose": -1,
})
rf_params: dict = field(default_factory=lambda: {
"n_estimators": 200, "max_depth": 8, "min_samples_split": 5,
"random_state": 42, "n_jobs": -1,
})
daily_recommended: dict = field(default_factory=lambda: {
"calories": 2000, "total_fat": 78, "saturated_fat": 20,
"protein": 50, "carbohydrates": 275, "sugar": 50,
"fiber": 28, "sodium": 2300,
})
@dataclass
class AppConfig:
speech: SpeechConfig = field(default_factory=SpeechConfig)
nlp: NLPConfig = field(default_factory=NLPConfig)
nutrition: NutritionConfig = field(default_factory=NutritionConfig)
classifier: ClassifierConfig = field(default_factory=ClassifierConfig)
default_servings: int = 4
debug: bool = False
log_level: str = "INFO"
config = AppConfig()