| """ |
| app.py โ Local Gradio app with Hindi speech-to-text support. |
| - English text input (Stage 2โ5 unchanged) |
| - English audio upload/record |
| - Hindi audio upload/record โ Whisper translates to English โ Stage 2โ5 |
| """ |
|
|
| import os |
| import sys |
| from pathlib import Path |
|
|
| sys.path.insert(0, str(Path(__file__).parent)) |
|
|
| from utils.config import config |
| from utils.logger import logger |
|
|
| |
| def _ensure_spacy(): |
| try: |
| import spacy |
| spacy.load("en_core_web_sm") |
| except OSError: |
| logger.info("Downloading spaCy en_core_web_sm โฆ") |
| from spacy.cli import download |
| download("en_core_web_sm") |
| logger.info("spaCy model ready.") |
|
|
| _ensure_spacy() |
|
|
| |
| def _ensure_model(): |
| from health_classifier.model import HealthClassifier |
| from health_classifier.feature_engineering import generate_synthetic_training_data, FEATURE_NAMES |
| clf = HealthClassifier(model_type="random_forest") |
| if clf.load(): |
| logger.info("Loaded saved classifier.") |
| return |
| logger.info("No saved model โ training on synthetic data โฆ") |
| df = generate_synthetic_training_data(n_samples=1000) |
| metrics = clf.train(df[FEATURE_NAMES], df["label"]) |
| clf.save() |
| logger.info(f"Classifier ready. acc={metrics['test_accuracy']:.3f}") |
|
|
| _ensure_model() |
|
|
| |
| import traceback |
| import gradio as gr |
| import pandas as pd |
|
|
| from recipe_nlp.extractor import RecipeExtractor |
| from nutrition_engine.mapper import NutritionMapper, NutritionAggregator |
| from health_classifier.model import HealthClassifier, LABEL_EMOJI, LABEL_NAMES |
| from health_classifier.explainer import RecipeExplainer |
| from health_classifier.feature_engineering import FeatureEngineer |
|
|
| |
|
|
| _BASE_PIPELINE = { |
| "extractor": RecipeExtractor(), |
| "mapper": NutritionMapper(), |
| "aggregator": NutritionAggregator(), |
| "classifier": HealthClassifier(), |
| "fe": FeatureEngineer(), |
| } |
|
|
|
|
| def run_pipeline(text: str): |
| """Stages 2โ5 โ completely unchanged.""" |
| p = _BASE_PIPELINE |
|
|
| try: |
| structure = p["extractor"].extract(text) |
| except Exception as e: |
| raise Exception(f"NLP extraction failed: {e}") |
|
|
| if not structure.ingredients: |
| raise Exception( |
| "No ingredients found. Try being more specific, " |
| "e.g. '2 cups flour, 1 egg, 300g chicken'." |
| ) |
|
|
| try: |
| ing_nutritions = p["mapper"].map_ingredients(structure.ingredients) |
| nutrition = p["aggregator"].aggregate( |
| ing_nutritions, structure.servings_hint, structure.cooking_methods |
| ) |
| except Exception as e: |
| raise Exception(f"Nutrition mapping failed: {e}") |
|
|
| try: |
| features = p["fe"].extract(nutrition) |
| label, score, probabilities = p["classifier"].predict(features) |
| except Exception as e: |
| raise Exception(f"Classification failed: {e}") |
|
|
| try: |
| explainer = RecipeExplainer(p["classifier"]) |
| explanation = explainer.explain(features, label, score, probabilities) |
| except Exception as e: |
| logger.warning(f"Explainer failed (non-fatal): {e}") |
| explanation = None |
|
|
| return label, score, probabilities, nutrition, structure, explanation |
|
|
|
|
| def transcribe_audio(audio_path: str, language: str = None, task: str = "transcribe") -> str: |
| """ |
| Transcribe audio using Whisper. |
| For Hindi โ English: language="hi", task="translate" |
| For English: language=None, task="transcribe" |
| """ |
| try: |
| from speech_module.transcriber1 import SpeechTranscriber |
| transcriber = SpeechTranscriber() |
| text, conf = transcriber.transcribe(audio_path, language=language, task=task) |
| logger.info(f"Transcribed: lang={language or 'auto'} task={task} conf={conf:.2f}") |
| return text |
| except Exception as e: |
| err = str(e) |
| if "WinError 2" in err or "ffmpeg" in err.lower() or "No such file" in err: |
| raise Exception( |
| "ffmpeg not found. Download from https://ffmpeg.org, " |
| "extract to C:\\ffmpeg, add C:\\ffmpeg\\bin to PATH, " |
| "then restart the app." |
| ) |
| raise Exception(f"Audio transcription failed: {e}") |
|
|
|
|
| |
|
|
| DAILY = config.classifier.daily_recommended |
| UNITS = { |
| "calories": "kcal", "total_fat": "g", "saturated_fat": "g", |
| "protein": "g", "carbohydrates": "g", "sugar": "g", |
| "fiber": "g", "sodium": "mg", |
| } |
| NUTR_LABELS = { |
| "calories": "๐ฅ Calories", "total_fat": "๐ฅ Total fat", |
| "saturated_fat": "โ Saturated fat", "protein": "๐ช Protein", |
| "carbohydrates": "๐ Carbs", "sugar": "๐ฌ Sugar", |
| "fiber": "๐พ Fiber", "sodium": "๐ง Sodium", |
| } |
|
|
|
|
| def _score_html(label: str, score: float, proba: dict) -> str: |
| if score >= 7: |
| clr, bg, text_clr, border_clr, emoji = "#22c55e", "#f0fdf4", "#14532d", "#bbf7d0", "๐ข" |
| elif score >= 4: |
| clr, bg, text_clr, border_clr, emoji = "#f59e0b", "#fffbeb", "#78350f", "#fde68a", "๐ก" |
| else: |
| clr, bg, text_clr, border_clr, emoji = "#ef4444", "#fef2f2", "#7f1d1d", "#fecaca", "๐ด" |
| bar = max(0, min(100, score * 10)) |
| proba_rows = "" |
| for lbl, p in sorted(proba.items(), key=lambda x: x[1], reverse=True): |
| if not lbl: |
| continue |
| proba_rows += f""" |
| <div style="display:flex;justify-content:space-between;align-items:center; |
| padding:6px 4px;border-bottom:1px solid {border_clr}; |
| font-size:13px;color:#4b5563;"> |
| <span style="font-weight:600;color:#374151;">{lbl}</span> |
| <span style="font-weight:700;color:{text_clr};background:rgba(255,255,255,0.7); |
| padding:2px 8px;border-radius:12px;">{p:.0%}</span> |
| </div>""" |
| return f""" |
| <div style="font-family:system-ui,-apple-system,sans-serif;padding:32px 28px; |
| border-radius:20px;background:{bg};border:1px solid {border_clr}; |
| text-align:center;max-width:420px;margin:0 auto;"> |
| <div style="font-size:48px;margin-bottom:4px;">{emoji}</div> |
| <div style="font-size:12px;font-weight:700;color:#6b7280; |
| letter-spacing:0.1em;text-transform:uppercase;margin-bottom:12px;"> |
| Health Rating |
| </div> |
| <div style="font-size:72px;font-weight:800;color:{clr};line-height:1; |
| letter-spacing:-0.02em;margin-bottom:16px;"> |
| {score}<span style="font-size:24px;color:#9ca3af;font-weight:500;">/10</span> |
| </div> |
| <div style="background:{clr};color:white;padding:6px 16px;border-radius:999px; |
| font-size:13px;font-weight:700;text-transform:uppercase; |
| letter-spacing:0.05em;display:inline-block;margin-bottom:20px;"> |
| {label} |
| </div> |
| <div style="background:rgba(0,0,0,0.05);border-radius:999px;height:10px; |
| margin:0 0 20px;overflow:hidden;"> |
| <div style="background:{clr};width:{bar}%;height:100%;border-radius:999px;"></div> |
| </div> |
| <div style="background:rgba(255,255,255,0.6);border-radius:16px; |
| border:1px solid {border_clr};padding:16px;text-align:left;"> |
| <div style="font-size:11px;color:#6b7280;font-weight:700; |
| letter-spacing:0.08em;margin-bottom:12px;">CLASS PROBABILITIES</div> |
| {proba_rows} |
| </div> |
| </div>""" |
|
|
|
|
| def _error_html(msg: str) -> str: |
| return f""" |
| <div style="font-family:system-ui;padding:20px;border-radius:12px; |
| background:#fef2f2;border:2px solid #ef4444;max-width:420px;margin:0 auto;"> |
| <div style="font-size:18px;font-weight:600;color:#991b1b;margin-bottom:8px;">โ Error</div> |
| <div style="font-size:13px;line-height:1.6;color:#7f1d1d;">{msg}</div> |
| </div>""" |
|
|
|
|
| def _empty_html() -> str: |
| return """ |
| <div style="font-family:system-ui;padding:32px;border-radius:16px; |
| background:#f9fafb;border:2px dashed #e5e7eb;text-align:center; |
| color:#9ca3af;max-width:420px;margin:0 auto;"> |
| <div style="font-size:40px;margin-bottom:10px;">๐ฅ</div> |
| <div style="font-size:14px;">Results will appear here after analysis</div> |
| </div>""" |
|
|
|
|
| def _nutr_df(per_serving: dict) -> pd.DataFrame: |
| rows = [] |
| for key, unit in UNITS.items(): |
| val = per_serving.get(key, 0) |
| ref = DAILY.get(key, 1) or 1 |
| pct = val / ref * 100 |
| good = key in ("fiber", "protein") |
| status = ("โ
Good" if pct >= 20 else "โ ๏ธ Low" if pct >= 10 else "โ Low") if good else \ |
| ("โ Very high" if pct > 75 else "โ ๏ธ High" if pct > 40 else "โ
OK") |
| rows.append({"Nutrient": NUTR_LABELS.get(key, key), |
| "Amount": f"{val:.1f} {unit}", |
| "% Daily value": f"{pct:.0f}%", |
| "Status": status}) |
| return pd.DataFrame(rows) |
|
|
|
|
| def _ing_df(structure) -> pd.DataFrame: |
| if not structure or not structure.ingredients: |
| return pd.DataFrame(columns=["Ingredient", "Quantity", "Method", "Flag"]) |
| rows = [] |
| for i in structure.ingredients: |
| flag = "โ High-risk" if i.is_high_risk else ("โ Healthy" if i.is_healthy else "") |
| rows.append({"Ingredient": i.name, "Quantity": i.quantity or "โ", |
| "Method": i.method or "โ", "Flag": flag}) |
| return pd.DataFrame(rows) |
|
|
|
|
| def _expl_html(explanation) -> str: |
| if not explanation: |
| return "" |
| try: |
| d = explanation.to_dict() |
| factors_html = "".join( |
| f'<div style="display:flex;gap:10px;align-items:flex-start;margin:6px 0;font-size:13px;color:#1f2937;">' |
| f'<span style="color:{"#ef4444" if i["direction"]=="negative" else "#22c55e"};font-weight:700;flex-shrink:0;">' |
| f'{"โ" if i["direction"]=="negative" else "โ"}</span><span>{i["message"]}</span></div>' |
| for i in d.get("factors", [])[:5] |
| ) |
| suggs_html = "".join( |
| f'<div style="font-size:13px;color:#4b5563;margin:4px 0 4px 22px;">โ {s}</div>' |
| for s in d.get("suggestions", []) |
| ) |
| sugg_section = ( |
| f"<div style='font-weight:600;font-size:14px;margin:14px 0 8px;color:#1f2937;'>" |
| f"๐ก Suggestions</div>{suggs_html}" if suggs_html else "" |
| ) |
| return f""" |
| <div style="font-family:system-ui;padding:16px;"> |
| <div style="font-weight:600;font-size:15px;margin-bottom:10px;color:#1f2937;"> |
| ๐ Key health factors (SHAP)</div> |
| {factors_html}{sugg_section} |
| </div>""" |
| except Exception as e: |
| logger.warning(f"Explanation render failed: {e}") |
| return "" |
|
|
|
|
| EMPTY_DF = pd.DataFrame() |
| EXAMPLES = [ |
| "Take 2 cups of butter, deep fry 300g chicken thighs. Serve with 1 cup heavy cream sauce and 100g cheddar cheese.", |
| "Grill 200g salmon. Serve over 1 cup brown rice with 200g steamed broccoli, half an avocado, 1 tbsp olive oil, and 100g spinach.", |
| "Simmer 2 cups red lentils with 4 cups broth, 2 carrots, 2 celery stalks, 1 onion, 3 garlic cloves, and a handful of spinach.", |
| "Cook 200g spaghetti. Fry 150g bacon. Mix 3 egg yolks with 100g parmesan and 1 cup heavy cream. Season with salt.", |
| ] |
|
|
|
|
| |
|
|
| def analyze_text(recipe_text: str): |
| if not recipe_text or not recipe_text.strip(): |
| return _error_html("Please enter a recipe."), EMPTY_DF, EMPTY_DF, "" |
| try: |
| label, score, proba, nutrition, structure, explanation = run_pipeline(recipe_text.strip()) |
| return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving), |
| _ing_df(structure), _expl_html(explanation)) |
| except Exception as e: |
| logger.error(f"Text error: {e}\n{traceback.format_exc()}") |
| return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "" |
|
|
|
|
| def analyze_english_audio(audio_path): |
| if not audio_path: |
| return _error_html("Please upload an audio file."), EMPTY_DF, EMPTY_DF, "", "" |
| try: |
| text = transcribe_audio(audio_path, language=None, task="transcribe") |
| except Exception as e: |
| return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", "" |
| if not text or not text.strip(): |
| return _error_html("Could not transcribe audio."), EMPTY_DF, EMPTY_DF, "", "" |
| transcript_display = f"๐ข Transcribed (English):\n{text}" |
| try: |
| label, score, proba, nutrition, structure, explanation = run_pipeline(text.strip()) |
| return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving), |
| _ing_df(structure), _expl_html(explanation), transcript_display) |
| except Exception as e: |
| return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", transcript_display |
|
|
|
|
| def analyze_hindi_audio(audio_path): |
| """ |
| Hindi audio handler. |
| Whisper uses task='translate' + language='hi' to: |
| 1. Transcribe the Hindi speech |
| 2. Translate it to English |
| All in one forward pass โ no separate translation model needed. |
| The English output goes directly into Stage 2 spaCy NLP unchanged. |
| """ |
| if not audio_path: |
| return _error_html("Please upload a Hindi audio file."), EMPTY_DF, EMPTY_DF, "", "" |
| try: |
| text = transcribe_audio(audio_path, language="hi", task="translate") |
| except Exception as e: |
| return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", "" |
| if not text or not text.strip(): |
| return _error_html("Could not transcribe Hindi audio. Please speak clearly."), EMPTY_DF, EMPTY_DF, "", "" |
| transcript_display = f"๐ข Hindi โ English:\n{text}" |
| try: |
| label, score, proba, nutrition, structure, explanation = run_pipeline(text.strip()) |
| return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving), |
| _ing_df(structure), _expl_html(explanation), transcript_display) |
| except Exception as e: |
| return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", transcript_display |
|
|
|
|
| |
|
|
| with gr.Blocks(title="๐ฅ Recipe Health Analyzer") as demo: |
|
|
| gr.Markdown(""" |
| # ๐ฅ Recipe Health Analyzer |
| **Pipeline:** Speech / Text โ NLP โ USDA Nutrition โ ML Classification โ SHAP Explainability |
| |
| Supports **English text**, **English audio**, and **Hindi audio** input. |
| """) |
|
|
| with gr.Tabs(): |
|
|
| with gr.Tab("๐ Text input"): |
| with gr.Row(): |
| with gr.Column(scale=2): |
| text_in = gr.Textbox( |
| label="Recipe text", |
| placeholder="2 cups flour, 1 egg, 300g chicken breast, 1 tbsp olive oil, steamed broccoli", |
| lines=7, |
| ) |
| text_btn = gr.Button("๐ฌ Analyze recipe", variant="primary", size="lg") |
| gr.Examples(examples=[[e] for e in EXAMPLES], inputs=text_in, |
| label="Example recipes (click to load)") |
| with gr.Column(scale=2): |
| text_score = gr.HTML(value=_empty_html(), label="Health score") |
|
|
| with gr.Tab("๐๏ธ English audio"): |
| with gr.Row(): |
| with gr.Column(scale=2): |
| eng_audio_in = gr.Audio(label="Upload or record English audio", |
| type="filepath", sources=["upload", "microphone"]) |
| eng_audio_btn = gr.Button("๐๏ธ Transcribe & analyze", variant="primary", size="lg") |
| eng_audio_text = gr.Textbox(label="Transcription", lines=4, |
| interactive=False, |
| placeholder="Transcribed English text appears here.") |
| with gr.Column(scale=2): |
| eng_audio_score = gr.HTML(value=_empty_html(), label="Health score") |
|
|
| with gr.Tab("๐ฎ๐ณ Hindi audio"): |
| gr.Markdown(""" |
| **เคนเคฟเคเคฆเฅ เคฎเฅเค เคฌเฅเคฒเฅเค** โ Speak your recipe in Hindi. |
| Whisper automatically transcribes and translates to English in one step. |
| """) |
| with gr.Row(): |
| with gr.Column(scale=2): |
| hin_audio_in = gr.Audio(label="Upload or record Hindi audio", |
| type="filepath", sources=["upload", "microphone"]) |
| hin_audio_btn = gr.Button("๐ฎ๐ณ Transcribe Hindi & analyze", |
| variant="primary", size="lg") |
| hin_audio_text = gr.Textbox(label="Hindi โ English translation", lines=4, |
| interactive=False, |
| placeholder="Whisper's English translation appears here.") |
| with gr.Column(scale=2): |
| hin_audio_score = gr.HTML(value=_empty_html(), label="Health score") |
|
|
| gr.Markdown("---") |
|
|
| with gr.Row(): |
| nutr_table = gr.Dataframe(label="๐ Nutrition per serving", interactive=False, wrap=True) |
| ing_table = gr.Dataframe(label="๐งช Identified ingredients", interactive=False, wrap=True) |
|
|
| expl_out = gr.HTML(label="๐ SHAP explanation") |
|
|
| text_btn.click(fn=analyze_text, inputs=[text_in], |
| outputs=[text_score, nutr_table, ing_table, expl_out]) |
|
|
| eng_audio_btn.click(fn=analyze_english_audio, inputs=[eng_audio_in], |
| outputs=[eng_audio_score, nutr_table, ing_table, expl_out, eng_audio_text]) |
|
|
| hin_audio_btn.click(fn=analyze_hindi_audio, inputs=[hin_audio_in], |
| outputs=[hin_audio_score, nutr_table, ing_table, expl_out, hin_audio_text]) |
|
|
| gr.Markdown(""" |
| --- |
| **Stack:** spaCy ยท USDA FoodData Central ยท scikit-learn RandomForest ยท SHAP ยท OpenAI Whisper ยท Gradio |
| *Hindi uses Whisper `task="translate"` โ no separate translation model required.* |
| """) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(share=True, |
| ssr_mode=False,) |
|
|