Spaces:

he99codes
/

Recipe_Health_Classification

Sleeping

File size: 18,897 Bytes

"""
app.py — Local Gradio app with Hindi speech-to-text support.
- English text input (Stage 2–5 unchanged)
- English audio upload/record
- Hindi audio upload/record → Whisper translates to English → Stage 2–5
"""

import os
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))

from utils.config import config
from utils.logger import logger

# ── Auto-download spaCy model if missing ─────────────────────
def _ensure_spacy():
    try:
        import spacy
        spacy.load("en_core_web_sm")
    except OSError:
        logger.info("Downloading spaCy en_core_web_sm …")
        from spacy.cli import download
        download("en_core_web_sm")
        logger.info("spaCy model ready.")

_ensure_spacy()

# ── Auto-train classifier if no saved model ───────────────────
def _ensure_model():
    from health_classifier.model import HealthClassifier
    from health_classifier.feature_engineering import generate_synthetic_training_data, FEATURE_NAMES
    clf = HealthClassifier(model_type="random_forest")
    if clf.load():
        logger.info("Loaded saved classifier.")
        return
    logger.info("No saved model — training on synthetic data …")
    df = generate_synthetic_training_data(n_samples=1000)
    metrics = clf.train(df[FEATURE_NAMES], df["label"])
    clf.save()
    logger.info(f"Classifier ready. acc={metrics['test_accuracy']:.3f}")

_ensure_model()

# ── Imports ───────────────────────────────────────────────────
import traceback
import gradio as gr
import pandas as pd

from recipe_nlp.extractor import RecipeExtractor
from nutrition_engine.mapper import NutritionMapper, NutritionAggregator
from health_classifier.model import HealthClassifier, LABEL_EMOJI, LABEL_NAMES
from health_classifier.explainer import RecipeExplainer
from health_classifier.feature_engineering import FeatureEngineer

# ── Pipeline ──────────────────────────────────────────────────

_BASE_PIPELINE = {
    "extractor":  RecipeExtractor(),
    "mapper":     NutritionMapper(),
    "aggregator": NutritionAggregator(),
    "classifier": HealthClassifier(),
    "fe":         FeatureEngineer(),
}


def run_pipeline(text: str):
    """Stages 2–5 — completely unchanged."""
    p = _BASE_PIPELINE

    try:
        structure = p["extractor"].extract(text)
    except Exception as e:
        raise Exception(f"NLP extraction failed: {e}")

    if not structure.ingredients:
        raise Exception(
            "No ingredients found. Try being more specific, "
            "e.g. '2 cups flour, 1 egg, 300g chicken'."
        )

    try:
        ing_nutritions = p["mapper"].map_ingredients(structure.ingredients)
        nutrition = p["aggregator"].aggregate(
            ing_nutritions, structure.servings_hint, structure.cooking_methods
        )
    except Exception as e:
        raise Exception(f"Nutrition mapping failed: {e}")

    try:
        features = p["fe"].extract(nutrition)
        label, score, probabilities = p["classifier"].predict(features)
    except Exception as e:
        raise Exception(f"Classification failed: {e}")

    try:
        explainer = RecipeExplainer(p["classifier"])
        explanation = explainer.explain(features, label, score, probabilities)
    except Exception as e:
        logger.warning(f"Explainer failed (non-fatal): {e}")
        explanation = None

    return label, score, probabilities, nutrition, structure, explanation


def transcribe_audio(audio_path: str, language: str = None, task: str = "transcribe") -> str:
    """
    Transcribe audio using Whisper.
    For Hindi → English: language="hi", task="translate"
    For English:         language=None,  task="transcribe"
    """
    try:
        from speech_module.transcriber1 import SpeechTranscriber
        transcriber = SpeechTranscriber()
        text, conf = transcriber.transcribe(audio_path, language=language, task=task)
        logger.info(f"Transcribed: lang={language or 'auto'} task={task} conf={conf:.2f}")
        return text
    except Exception as e:
        err = str(e)
        if "WinError 2" in err or "ffmpeg" in err.lower() or "No such file" in err:
            raise Exception(
                "ffmpeg not found. Download from https://ffmpeg.org, "
                "extract to C:\\ffmpeg, add C:\\ffmpeg\\bin to PATH, "
                "then restart the app."
            )
        raise Exception(f"Audio transcription failed: {e}")


# ── UI helpers ────────────────────────────────────────────────

DAILY = config.classifier.daily_recommended
UNITS = {
    "calories": "kcal", "total_fat": "g", "saturated_fat": "g",
    "protein": "g", "carbohydrates": "g", "sugar": "g",
    "fiber": "g", "sodium": "mg",
}
NUTR_LABELS = {
    "calories": "🔥 Calories", "total_fat": "🥑 Total fat",
    "saturated_fat": "⚠ Saturated fat", "protein": "💪 Protein",
    "carbohydrates": "🍞 Carbs", "sugar": "🍬 Sugar",
    "fiber": "🌾 Fiber", "sodium": "🧂 Sodium",
}


def _score_html(label: str, score: float, proba: dict) -> str:
    if score >= 7:
        clr, bg, text_clr, border_clr, emoji = "#22c55e", "#f0fdf4", "#14532d", "#bbf7d0", "🟢"
    elif score >= 4:
        clr, bg, text_clr, border_clr, emoji = "#f59e0b", "#fffbeb", "#78350f", "#fde68a", "🟡"
    else:
        clr, bg, text_clr, border_clr, emoji = "#ef4444", "#fef2f2", "#7f1d1d", "#fecaca", "🔴"
    bar = max(0, min(100, score * 10))
    proba_rows = ""
    for lbl, p in sorted(proba.items(), key=lambda x: x[1], reverse=True):
        if not lbl:
            continue
        proba_rows += f"""
        <div style="display:flex;justify-content:space-between;align-items:center;
                    padding:6px 4px;border-bottom:1px solid {border_clr};
                    font-size:13px;color:#4b5563;">
            <span style="font-weight:600;color:#374151;">{lbl}</span>
            <span style="font-weight:700;color:{text_clr};background:rgba(255,255,255,0.7);
                         padding:2px 8px;border-radius:12px;">{p:.0%}</span>
        </div>"""
    return f"""
    <div style="font-family:system-ui,-apple-system,sans-serif;padding:32px 28px;
                border-radius:20px;background:{bg};border:1px solid {border_clr};
                text-align:center;max-width:420px;margin:0 auto;">
        <div style="font-size:48px;margin-bottom:4px;">{emoji}</div>
        <div style="font-size:12px;font-weight:700;color:#6b7280;
                    letter-spacing:0.1em;text-transform:uppercase;margin-bottom:12px;">
            Health Rating
        </div>
        <div style="font-size:72px;font-weight:800;color:{clr};line-height:1;
                    letter-spacing:-0.02em;margin-bottom:16px;">
            {score}<span style="font-size:24px;color:#9ca3af;font-weight:500;">/10</span>
        </div>
        <div style="background:{clr};color:white;padding:6px 16px;border-radius:999px;
                    font-size:13px;font-weight:700;text-transform:uppercase;
                    letter-spacing:0.05em;display:inline-block;margin-bottom:20px;">
            {label}
        </div>
        <div style="background:rgba(0,0,0,0.05);border-radius:999px;height:10px;
                    margin:0 0 20px;overflow:hidden;">
            <div style="background:{clr};width:{bar}%;height:100%;border-radius:999px;"></div>
        </div>
        <div style="background:rgba(255,255,255,0.6);border-radius:16px;
                    border:1px solid {border_clr};padding:16px;text-align:left;">
            <div style="font-size:11px;color:#6b7280;font-weight:700;
                        letter-spacing:0.08em;margin-bottom:12px;">CLASS PROBABILITIES</div>
            {proba_rows}
        </div>
    </div>"""


def _error_html(msg: str) -> str:
    return f"""
<div style="font-family:system-ui;padding:20px;border-radius:12px;
            background:#fef2f2;border:2px solid #ef4444;max-width:420px;margin:0 auto;">
  <div style="font-size:18px;font-weight:600;color:#991b1b;margin-bottom:8px;">⚠ Error</div>
  <div style="font-size:13px;line-height:1.6;color:#7f1d1d;">{msg}</div>
</div>"""


def _empty_html() -> str:
    return """
<div style="font-family:system-ui;padding:32px;border-radius:16px;
            background:#f9fafb;border:2px dashed #e5e7eb;text-align:center;
            color:#9ca3af;max-width:420px;margin:0 auto;">
  <div style="font-size:40px;margin-bottom:10px;">🥗</div>
  <div style="font-size:14px;">Results will appear here after analysis</div>
</div>"""


def _nutr_df(per_serving: dict) -> pd.DataFrame:
    rows = []
    for key, unit in UNITS.items():
        val = per_serving.get(key, 0)
        ref = DAILY.get(key, 1) or 1
        pct = val / ref * 100
        good = key in ("fiber", "protein")
        status = ("✅ Good" if pct >= 20 else "⚠️ Low" if pct >= 10 else "❌ Low") if good else \
                 ("❌ Very high" if pct > 75 else "⚠️ High" if pct > 40 else "✅ OK")
        rows.append({"Nutrient": NUTR_LABELS.get(key, key),
                     "Amount": f"{val:.1f} {unit}",
                     "% Daily value": f"{pct:.0f}%",
                     "Status": status})
    return pd.DataFrame(rows)


def _ing_df(structure) -> pd.DataFrame:
    if not structure or not structure.ingredients:
        return pd.DataFrame(columns=["Ingredient", "Quantity", "Method", "Flag"])
    rows = []
    for i in structure.ingredients:
        flag = "⚠ High-risk" if i.is_high_risk else ("✓ Healthy" if i.is_healthy else "")
        rows.append({"Ingredient": i.name, "Quantity": i.quantity or "—",
                     "Method": i.method or "—", "Flag": flag})
    return pd.DataFrame(rows)


def _expl_html(explanation) -> str:
    if not explanation:
        return ""
    try:
        d = explanation.to_dict()
        factors_html = "".join(
            f'<div style="display:flex;gap:10px;align-items:flex-start;margin:6px 0;font-size:13px;color:#1f2937;">'
            f'<span style="color:{"#ef4444" if i["direction"]=="negative" else "#22c55e"};font-weight:700;flex-shrink:0;">'
            f'{"✗" if i["direction"]=="negative" else "✓"}</span><span>{i["message"]}</span></div>'
            for i in d.get("factors", [])[:5]
        )
        suggs_html = "".join(
            f'<div style="font-size:13px;color:#4b5563;margin:4px 0 4px 22px;">→ {s}</div>'
            for s in d.get("suggestions", [])
        )
        sugg_section = (
            f"<div style='font-weight:600;font-size:14px;margin:14px 0 8px;color:#1f2937;'>"
            f"💡 Suggestions</div>{suggs_html}" if suggs_html else ""
        )
        return f"""
<div style="font-family:system-ui;padding:16px;">
  <div style="font-weight:600;font-size:15px;margin-bottom:10px;color:#1f2937;">
    🔍 Key health factors (SHAP)</div>
  {factors_html}{sugg_section}
</div>"""
    except Exception as e:
        logger.warning(f"Explanation render failed: {e}")
        return ""


EMPTY_DF = pd.DataFrame()
EXAMPLES = [
    "Take 2 cups of butter, deep fry 300g chicken thighs. Serve with 1 cup heavy cream sauce and 100g cheddar cheese.",
    "Grill 200g salmon. Serve over 1 cup brown rice with 200g steamed broccoli, half an avocado, 1 tbsp olive oil, and 100g spinach.",
    "Simmer 2 cups red lentils with 4 cups broth, 2 carrots, 2 celery stalks, 1 onion, 3 garlic cloves, and a handful of spinach.",
    "Cook 200g spaghetti. Fry 150g bacon. Mix 3 egg yolks with 100g parmesan and 1 cup heavy cream. Season with salt.",
]


# ── Gradio handlers ───────────────────────────────────────────

def analyze_text(recipe_text: str):
    if not recipe_text or not recipe_text.strip():
        return _error_html("Please enter a recipe."), EMPTY_DF, EMPTY_DF, ""
    try:
        label, score, proba, nutrition, structure, explanation = run_pipeline(recipe_text.strip())
        return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving),
                _ing_df(structure), _expl_html(explanation))
    except Exception as e:
        logger.error(f"Text error: {e}\n{traceback.format_exc()}")
        return _error_html(str(e)), EMPTY_DF, EMPTY_DF, ""


def analyze_english_audio(audio_path):
    if not audio_path:
        return _error_html("Please upload an audio file."), EMPTY_DF, EMPTY_DF, "", ""
    try:
        text = transcribe_audio(audio_path, language=None, task="transcribe")
    except Exception as e:
        return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", ""
    if not text or not text.strip():
        return _error_html("Could not transcribe audio."), EMPTY_DF, EMPTY_DF, "", ""
    transcript_display = f"📢 Transcribed (English):\n{text}"
    try:
        label, score, proba, nutrition, structure, explanation = run_pipeline(text.strip())
        return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving),
                _ing_df(structure), _expl_html(explanation), transcript_display)
    except Exception as e:
        return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", transcript_display


def analyze_hindi_audio(audio_path):
    """
    Hindi audio handler.
    Whisper uses task='translate' + language='hi' to:
    1. Transcribe the Hindi speech
    2. Translate it to English
    All in one forward pass — no separate translation model needed.
    The English output goes directly into Stage 2 spaCy NLP unchanged.
    """
    if not audio_path:
        return _error_html("Please upload a Hindi audio file."), EMPTY_DF, EMPTY_DF, "", ""
    try:
        text = transcribe_audio(audio_path, language="hi", task="translate")
    except Exception as e:
        return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", ""
    if not text or not text.strip():
        return _error_html("Could not transcribe Hindi audio. Please speak clearly."), EMPTY_DF, EMPTY_DF, "", ""
    transcript_display = f"📢 Hindi → English:\n{text}"
    try:
        label, score, proba, nutrition, structure, explanation = run_pipeline(text.strip())
        return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving),
                _ing_df(structure), _expl_html(explanation), transcript_display)
    except Exception as e:
        return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", transcript_display


# ── Layout ────────────────────────────────────────────────────

with gr.Blocks(title="🥗 Recipe Health Analyzer") as demo:

    gr.Markdown("""
    # 🥗 Recipe Health Analyzer
    **Pipeline:** Speech / Text → NLP → USDA Nutrition → ML Classification → SHAP Explainability
    
    Supports **English text**, **English audio**, and **Hindi audio** input.
    """)

    with gr.Tabs():

        with gr.Tab("📝 Text input"):
            with gr.Row():
                with gr.Column(scale=2):
                    text_in = gr.Textbox(
                        label="Recipe text",
                        placeholder="2 cups flour, 1 egg, 300g chicken breast, 1 tbsp olive oil, steamed broccoli",
                        lines=7,
                    )
                    text_btn = gr.Button("🔬 Analyze recipe", variant="primary", size="lg")
                    gr.Examples(examples=[[e] for e in EXAMPLES], inputs=text_in,
                                label="Example recipes (click to load)")
                with gr.Column(scale=2):
                    text_score = gr.HTML(value=_empty_html(), label="Health score")

        with gr.Tab("🎙️ English audio"):
            with gr.Row():
                with gr.Column(scale=2):
                    eng_audio_in = gr.Audio(label="Upload or record English audio",
                                            type="filepath", sources=["upload", "microphone"])
                    eng_audio_btn = gr.Button("🎙️ Transcribe & analyze", variant="primary", size="lg")
                    eng_audio_text = gr.Textbox(label="Transcription", lines=4,
                                                interactive=False,
                                                placeholder="Transcribed English text appears here.")
                with gr.Column(scale=2):
                    eng_audio_score = gr.HTML(value=_empty_html(), label="Health score")

        with gr.Tab("🇮🇳 Hindi audio"):
            gr.Markdown("""
            **हिंदी में बोलें** — Speak your recipe in Hindi.  
            Whisper automatically transcribes and translates to English in one step.
            """)
            with gr.Row():
                with gr.Column(scale=2):
                    hin_audio_in = gr.Audio(label="Upload or record Hindi audio",
                                            type="filepath", sources=["upload", "microphone"])
                    hin_audio_btn = gr.Button("🇮🇳 Transcribe Hindi & analyze",
                                              variant="primary", size="lg")
                    hin_audio_text = gr.Textbox(label="Hindi → English translation", lines=4,
                                                interactive=False,
                                                placeholder="Whisper's English translation appears here.")
                with gr.Column(scale=2):
                    hin_audio_score = gr.HTML(value=_empty_html(), label="Health score")

    gr.Markdown("---")

    with gr.Row():
        nutr_table = gr.Dataframe(label="📊 Nutrition per serving", interactive=False, wrap=True)
        ing_table  = gr.Dataframe(label="🧪 Identified ingredients", interactive=False, wrap=True)

    expl_out = gr.HTML(label="🔍 SHAP explanation")

    text_btn.click(fn=analyze_text, inputs=[text_in],
                   outputs=[text_score, nutr_table, ing_table, expl_out])

    eng_audio_btn.click(fn=analyze_english_audio, inputs=[eng_audio_in],
                        outputs=[eng_audio_score, nutr_table, ing_table, expl_out, eng_audio_text])

    hin_audio_btn.click(fn=analyze_hindi_audio, inputs=[hin_audio_in],
                        outputs=[hin_audio_score, nutr_table, ing_table, expl_out, hin_audio_text])

    gr.Markdown("""
    ---
    **Stack:** spaCy · USDA FoodData Central · scikit-learn RandomForest · SHAP · OpenAI Whisper · Gradio  
    *Hindi uses Whisper `task="translate"` — no separate translation model required.*
    """)


if __name__ == "__main__":
    demo.launch(share=True,
        ssr_mode=False,)