Spaces:

he99codes
/

Recipe_Health_Classification

Sleeping

App Files Files Community

Recipe_Health_Classification / app.py

he99codes

Deploying latest raw changes and full functionality

a3fc1ff about 1 month ago

raw

history blame contribute delete

18.9 kB

	"""
	app.py — Local Gradio app with Hindi speech-to-text support.
	- English text input (Stage 2–5 unchanged)
	- English audio upload/record
	- Hindi audio upload/record → Whisper translates to English → Stage 2–5
	"""

	import os
	import sys
	from pathlib import Path

	sys.path.insert(0, str(Path(__file__).parent))

	from utils.config import config
	from utils.logger import logger

	# ── Auto-download spaCy model if missing ─────────────────────
	def _ensure_spacy():
	try:
	import spacy
	spacy.load("en_core_web_sm")
	except OSError:
	logger.info("Downloading spaCy en_core_web_sm …")
	from spacy.cli import download
	download("en_core_web_sm")
	logger.info("spaCy model ready.")

	_ensure_spacy()

	# ── Auto-train classifier if no saved model ───────────────────
	def _ensure_model():
	from health_classifier.model import HealthClassifier
	from health_classifier.feature_engineering import generate_synthetic_training_data, FEATURE_NAMES
	clf = HealthClassifier(model_type="random_forest")
	if clf.load():
	logger.info("Loaded saved classifier.")
	return
	logger.info("No saved model — training on synthetic data …")
	df = generate_synthetic_training_data(n_samples=1000)
	metrics = clf.train(df[FEATURE_NAMES], df["label"])
	clf.save()
	logger.info(f"Classifier ready. acc={metrics['test_accuracy']:.3f}")

	_ensure_model()

	# ── Imports ───────────────────────────────────────────────────
	import traceback
	import gradio as gr
	import pandas as pd

	from recipe_nlp.extractor import RecipeExtractor
	from nutrition_engine.mapper import NutritionMapper, NutritionAggregator
	from health_classifier.model import HealthClassifier, LABEL_EMOJI, LABEL_NAMES
	from health_classifier.explainer import RecipeExplainer
	from health_classifier.feature_engineering import FeatureEngineer

	# ── Pipeline ──────────────────────────────────────────────────

	_BASE_PIPELINE = {
	"extractor": RecipeExtractor(),
	"mapper": NutritionMapper(),
	"aggregator": NutritionAggregator(),
	"classifier": HealthClassifier(),
	"fe": FeatureEngineer(),
	}


	def run_pipeline(text: str):
	"""Stages 2–5 — completely unchanged."""
	p = _BASE_PIPELINE

	try:
	structure = p["extractor"].extract(text)
	except Exception as e:
	raise Exception(f"NLP extraction failed: {e}")

	if not structure.ingredients:
	raise Exception(
	"No ingredients found. Try being more specific, "
	"e.g. '2 cups flour, 1 egg, 300g chicken'."
	)

	try:
	ing_nutritions = p["mapper"].map_ingredients(structure.ingredients)
	nutrition = p["aggregator"].aggregate(
	ing_nutritions, structure.servings_hint, structure.cooking_methods
	)
	except Exception as e:
	raise Exception(f"Nutrition mapping failed: {e}")

	try:
	features = p["fe"].extract(nutrition)
	label, score, probabilities = p["classifier"].predict(features)
	except Exception as e:
	raise Exception(f"Classification failed: {e}")

	try:
	explainer = RecipeExplainer(p["classifier"])
	explanation = explainer.explain(features, label, score, probabilities)
	except Exception as e:
	logger.warning(f"Explainer failed (non-fatal): {e}")
	explanation = None

	return label, score, probabilities, nutrition, structure, explanation


	def transcribe_audio(audio_path: str, language: str = None, task: str = "transcribe") -> str:
	"""
	Transcribe audio using Whisper.
	For Hindi → English: language="hi", task="translate"
	For English: language=None, task="transcribe"
	"""
	try:
	from speech_module.transcriber1 import SpeechTranscriber
	transcriber = SpeechTranscriber()
	text, conf = transcriber.transcribe(audio_path, language=language, task=task)
	logger.info(f"Transcribed: lang={language or 'auto'} task={task} conf={conf:.2f}")
	return text
	except Exception as e:
	err = str(e)
	if "WinError 2" in err or "ffmpeg" in err.lower() or "No such file" in err:
	raise Exception(
	"ffmpeg not found. Download from https://ffmpeg.org, "
	"extract to C:\\ffmpeg, add C:\\ffmpeg\\bin to PATH, "
	"then restart the app."
	)
	raise Exception(f"Audio transcription failed: {e}")


	# ── UI helpers ────────────────────────────────────────────────

	DAILY = config.classifier.daily_recommended
	UNITS = {
	"calories": "kcal", "total_fat": "g", "saturated_fat": "g",
	"protein": "g", "carbohydrates": "g", "sugar": "g",
	"fiber": "g", "sodium": "mg",
	}
	NUTR_LABELS = {
	"calories": "🔥 Calories", "total_fat": "🥑 Total fat",
	"saturated_fat": "⚠ Saturated fat", "protein": "💪 Protein",
	"carbohydrates": "🍞 Carbs", "sugar": "🍬 Sugar",
	"fiber": "🌾 Fiber", "sodium": "🧂 Sodium",
	}


	def _score_html(label: str, score: float, proba: dict) -> str:
	if score >= 7:
	clr, bg, text_clr, border_clr, emoji = "#22c55e", "#f0fdf4", "#14532d", "#bbf7d0", "🟢"
	elif score >= 4:
	clr, bg, text_clr, border_clr, emoji = "#f59e0b", "#fffbeb", "#78350f", "#fde68a", "🟡"
	else:
	clr, bg, text_clr, border_clr, emoji = "#ef4444", "#fef2f2", "#7f1d1d", "#fecaca", "🔴"
	bar = max(0, min(100, score * 10))
	proba_rows = ""
	for lbl, p in sorted(proba.items(), key=lambda x: x[1], reverse=True):
	if not lbl:
	continue
	proba_rows += f"""
	<div style="display:flex;justify-content:space-between;align-items:center;
	padding:6px 4px;border-bottom:1px solid {border_clr};
	font-size:13px;color:#4b5563;">
	<span style="font-weight:600;color:#374151;">{lbl}</span>
	<span style="font-weight:700;color:{text_clr};background:rgba(255,255,255,0.7);
	padding:2px 8px;border-radius:12px;">{p:.0%}</span>
	</div>"""
	return f"""
	<div style="font-family:system-ui,-apple-system,sans-serif;padding:32px 28px;
	border-radius:20px;background:{bg};border:1px solid {border_clr};
	text-align:center;max-width:420px;margin:0 auto;">
	<div style="font-size:48px;margin-bottom:4px;">{emoji}</div>
	<div style="font-size:12px;font-weight:700;color:#6b7280;
	letter-spacing:0.1em;text-transform:uppercase;margin-bottom:12px;">
	Health Rating
	</div>
	<div style="font-size:72px;font-weight:800;color:{clr};line-height:1;
	letter-spacing:-0.02em;margin-bottom:16px;">
	{score}<span style="font-size:24px;color:#9ca3af;font-weight:500;">/10</span>
	</div>
	<div style="background:{clr};color:white;padding:6px 16px;border-radius:999px;
	font-size:13px;font-weight:700;text-transform:uppercase;
	letter-spacing:0.05em;display:inline-block;margin-bottom:20px;">
	{label}
	</div>
	<div style="background:rgba(0,0,0,0.05);border-radius:999px;height:10px;
	margin:0 0 20px;overflow:hidden;">
	<div style="background:{clr};width:{bar}%;height:100%;border-radius:999px;"></div>
	</div>
	<div style="background:rgba(255,255,255,0.6);border-radius:16px;
	border:1px solid {border_clr};padding:16px;text-align:left;">
	<div style="font-size:11px;color:#6b7280;font-weight:700;
	letter-spacing:0.08em;margin-bottom:12px;">CLASS PROBABILITIES</div>
	{proba_rows}
	</div>
	</div>"""


	def _error_html(msg: str) -> str:
	return f"""
	<div style="font-family:system-ui;padding:20px;border-radius:12px;
	background:#fef2f2;border:2px solid #ef4444;max-width:420px;margin:0 auto;">
	<div style="font-size:18px;font-weight:600;color:#991b1b;margin-bottom:8px;">⚠ Error</div>
	<div style="font-size:13px;line-height:1.6;color:#7f1d1d;">{msg}</div>
	</div>"""


	def _empty_html() -> str:
	return """
	<div style="font-family:system-ui;padding:32px;border-radius:16px;
	background:#f9fafb;border:2px dashed #e5e7eb;text-align:center;
	color:#9ca3af;max-width:420px;margin:0 auto;">
	<div style="font-size:40px;margin-bottom:10px;">🥗</div>
	<div style="font-size:14px;">Results will appear here after analysis</div>
	</div>"""


	def _nutr_df(per_serving: dict) -> pd.DataFrame:
	rows = []
	for key, unit in UNITS.items():
	val = per_serving.get(key, 0)
	ref = DAILY.get(key, 1) or 1
	pct = val / ref * 100
	good = key in ("fiber", "protein")
	status = ("✅ Good" if pct >= 20 else "⚠️ Low" if pct >= 10 else "❌ Low") if good else \
	("❌ Very high" if pct > 75 else "⚠️ High" if pct > 40 else "✅ OK")
	rows.append({"Nutrient": NUTR_LABELS.get(key, key),
	"Amount": f"{val:.1f} {unit}",
	"% Daily value": f"{pct:.0f}%",
	"Status": status})
	return pd.DataFrame(rows)


	def _ing_df(structure) -> pd.DataFrame:
	if not structure or not structure.ingredients:
	return pd.DataFrame(columns=["Ingredient", "Quantity", "Method", "Flag"])
	rows = []
	for i in structure.ingredients:
	flag = "⚠ High-risk" if i.is_high_risk else ("✓ Healthy" if i.is_healthy else "")
	rows.append({"Ingredient": i.name, "Quantity": i.quantity or "—",
	"Method": i.method or "—", "Flag": flag})
	return pd.DataFrame(rows)


	def _expl_html(explanation) -> str:
	if not explanation:
	return ""
	try:
	d = explanation.to_dict()
	factors_html = "".join(
	f'<div style="display:flex;gap:10px;align-items:flex-start;margin:6px 0;font-size:13px;color:#1f2937;">'
	f'<span style="color:{"#ef4444" if i["direction"]=="negative" else "#22c55e"};font-weight:700;flex-shrink:0;">'
	f'{"✗" if i["direction"]=="negative" else "✓"}</span><span>{i["message"]}</span></div>'
	for i in d.get("factors", [])[:5]
	)
	suggs_html = "".join(
	f'<div style="font-size:13px;color:#4b5563;margin:4px 0 4px 22px;">→ {s}</div>'
	for s in d.get("suggestions", [])
	)
	sugg_section = (
	f"<div style='font-weight:600;font-size:14px;margin:14px 0 8px;color:#1f2937;'>"
	f"💡 Suggestions</div>{suggs_html}" if suggs_html else ""
	)
	return f"""
	<div style="font-family:system-ui;padding:16px;">
	<div style="font-weight:600;font-size:15px;margin-bottom:10px;color:#1f2937;">
	🔍 Key health factors (SHAP)</div>
	{factors_html}{sugg_section}
	</div>"""
	except Exception as e:
	logger.warning(f"Explanation render failed: {e}")
	return ""


	EMPTY_DF = pd.DataFrame()
	EXAMPLES = [
	"Take 2 cups of butter, deep fry 300g chicken thighs. Serve with 1 cup heavy cream sauce and 100g cheddar cheese.",
	"Grill 200g salmon. Serve over 1 cup brown rice with 200g steamed broccoli, half an avocado, 1 tbsp olive oil, and 100g spinach.",
	"Simmer 2 cups red lentils with 4 cups broth, 2 carrots, 2 celery stalks, 1 onion, 3 garlic cloves, and a handful of spinach.",
	"Cook 200g spaghetti. Fry 150g bacon. Mix 3 egg yolks with 100g parmesan and 1 cup heavy cream. Season with salt.",
	]


	# ── Gradio handlers ───────────────────────────────────────────

	def analyze_text(recipe_text: str):
	if not recipe_text or not recipe_text.strip():
	return _error_html("Please enter a recipe."), EMPTY_DF, EMPTY_DF, ""
	try:
	label, score, proba, nutrition, structure, explanation = run_pipeline(recipe_text.strip())
	return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving),
	_ing_df(structure), _expl_html(explanation))
	except Exception as e:
	logger.error(f"Text error: {e}\n{traceback.format_exc()}")
	return _error_html(str(e)), EMPTY_DF, EMPTY_DF, ""


	def analyze_english_audio(audio_path):
	if not audio_path:
	return _error_html("Please upload an audio file."), EMPTY_DF, EMPTY_DF, "", ""
	try:
	text = transcribe_audio(audio_path, language=None, task="transcribe")
	except Exception as e:
	return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", ""
	if not text or not text.strip():
	return _error_html("Could not transcribe audio."), EMPTY_DF, EMPTY_DF, "", ""
	transcript_display = f"📢 Transcribed (English):\n{text}"
	try:
	label, score, proba, nutrition, structure, explanation = run_pipeline(text.strip())
	return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving),
	_ing_df(structure), _expl_html(explanation), transcript_display)
	except Exception as e:
	return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", transcript_display


	def analyze_hindi_audio(audio_path):
	"""
	Hindi audio handler.
	Whisper uses task='translate' + language='hi' to:
	1. Transcribe the Hindi speech
	2. Translate it to English
	All in one forward pass — no separate translation model needed.
	The English output goes directly into Stage 2 spaCy NLP unchanged.
	"""
	if not audio_path:
	return _error_html("Please upload a Hindi audio file."), EMPTY_DF, EMPTY_DF, "", ""
	try:
	text = transcribe_audio(audio_path, language="hi", task="translate")
	except Exception as e:
	return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", ""
	if not text or not text.strip():
	return _error_html("Could not transcribe Hindi audio. Please speak clearly."), EMPTY_DF, EMPTY_DF, "", ""
	transcript_display = f"📢 Hindi → English:\n{text}"
	try:
	label, score, proba, nutrition, structure, explanation = run_pipeline(text.strip())
	return (_score_html(label, score, proba), _nutr_df(nutrition.per_serving),
	_ing_df(structure), _expl_html(explanation), transcript_display)
	except Exception as e:
	return _error_html(str(e)), EMPTY_DF, EMPTY_DF, "", transcript_display


	# ── Layout ────────────────────────────────────────────────────

	with gr.Blocks(title="🥗 Recipe Health Analyzer") as demo:

	gr.Markdown("""
	# 🥗 Recipe Health Analyzer
	Pipeline: Speech / Text → NLP → USDA Nutrition → ML Classification → SHAP Explainability

	Supports English text, English audio, and Hindi audio input.
	""")

	with gr.Tabs():

	with gr.Tab("📝 Text input"):
	with gr.Row():
	with gr.Column(scale=2):
	text_in = gr.Textbox(
	label="Recipe text",
	placeholder="2 cups flour, 1 egg, 300g chicken breast, 1 tbsp olive oil, steamed broccoli",
	lines=7,
	)
	text_btn = gr.Button("🔬 Analyze recipe", variant="primary", size="lg")
	gr.Examples(examples=[[e] for e in EXAMPLES], inputs=text_in,
	label="Example recipes (click to load)")
	with gr.Column(scale=2):
	text_score = gr.HTML(value=_empty_html(), label="Health score")

	with gr.Tab("🎙️ English audio"):
	with gr.Row():
	with gr.Column(scale=2):
	eng_audio_in = gr.Audio(label="Upload or record English audio",
	type="filepath", sources=["upload", "microphone"])
	eng_audio_btn = gr.Button("🎙️ Transcribe & analyze", variant="primary", size="lg")
	eng_audio_text = gr.Textbox(label="Transcription", lines=4,
	interactive=False,
	placeholder="Transcribed English text appears here.")
	with gr.Column(scale=2):
	eng_audio_score = gr.HTML(value=_empty_html(), label="Health score")

	with gr.Tab("🇮🇳 Hindi audio"):
	gr.Markdown("""
	हिंदी में बोलें — Speak your recipe in Hindi.
	Whisper automatically transcribes and translates to English in one step.
	""")
	with gr.Row():
	with gr.Column(scale=2):
	hin_audio_in = gr.Audio(label="Upload or record Hindi audio",
	type="filepath", sources=["upload", "microphone"])
	hin_audio_btn = gr.Button("🇮🇳 Transcribe Hindi & analyze",
	variant="primary", size="lg")
	hin_audio_text = gr.Textbox(label="Hindi → English translation", lines=4,
	interactive=False,
	placeholder="Whisper's English translation appears here.")
	with gr.Column(scale=2):
	hin_audio_score = gr.HTML(value=_empty_html(), label="Health score")

	gr.Markdown("---")

	with gr.Row():
	nutr_table = gr.Dataframe(label="📊 Nutrition per serving", interactive=False, wrap=True)
	ing_table = gr.Dataframe(label="🧪 Identified ingredients", interactive=False, wrap=True)

	expl_out = gr.HTML(label="🔍 SHAP explanation")

	text_btn.click(fn=analyze_text, inputs=[text_in],
	outputs=[text_score, nutr_table, ing_table, expl_out])

	eng_audio_btn.click(fn=analyze_english_audio, inputs=[eng_audio_in],
	outputs=[eng_audio_score, nutr_table, ing_table, expl_out, eng_audio_text])

	hin_audio_btn.click(fn=analyze_hindi_audio, inputs=[hin_audio_in],
	outputs=[hin_audio_score, nutr_table, ing_table, expl_out, hin_audio_text])

	gr.Markdown("""
	---
	Stack: spaCy · USDA FoodData Central · scikit-learn RandomForest · SHAP · OpenAI Whisper · Gradio
	Hindi uses Whisper `task="translate"` — no separate translation model required.
	""")


	if __name__ == "__main__":
	demo.launch(share=True,
	ssr_mode=False,)