Spaces:

Devubiodee
/

Inclusion_demo

Running on Zero

App Files Files Community

Inclusion_demo / app.py

Devubiodee

Update app.py

6897d69 verified about 2 months ago

raw

history blame contribute delete

4.08 kB

	# app.py - Speech to ASL Avatar on Hugging Face Spaces (ZeroGPU compatible)

	import gradio as gr
	import whisper
	import requests
	import tempfile
	import os
	from spaces import GPU # Required for ZeroGPU hardware

	# Load API key from HF Space secrets (Settings → Secrets)
	API_KEY = os.environ.get("SIGN_SPEAK_API_KEY")
	if not API_KEY:
	raise ValueError("SIGN_SPEAK_API_KEY not set in Space secrets!")

	BASE_URL = "https://api.sign-speak.com"
	PRODUCE_SIGN_URL = f"{BASE_URL}/produce-sign"

	def get_sign_language(text: str, request_class="BLOCKING", identity="MALE"):
	"""
	Calls Sign-Speak API to generate ASL avatar video.
	This runs on CPU (external API call), no GPU needed here.
	"""
	headers = {
	"X-api-key": API_KEY,
	"Content-Type": "application/json"
	}
	payload = {
	"english": text.strip(),
	"request_class": request_class.upper(),
	"identity": identity.upper(),
	# Optional: "model_version": "SLP.2.xs" for faster/smaller model
	}
	response = requests.post(PRODUCE_SIGN_URL, json=payload, headers=headers)

	if response.status_code == 200:
	# Save MP4 bytes to temp file (Gradio Video component needs filepath)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
	tmp.write(response.content)
	return tmp.name
	elif response.status_code == 202:
	data = response.json()
	batch_id = data.get("batch_id")
	raise ValueError(f"Batch processing started (ID: {batch_id}). Add polling if needed.")
	else:
	raise ValueError(f"Sign-Speak API error {response.status_code}: {response.text}")

	@GPU(duration=120) # ← ZeroGPU decorator: request GPU for up to 120 seconds
	def transcribe_and_translate(audio_filepath):
	"""
	Heavy function: loads Whisper model and transcribes audio.
	Marked with @GPU so ZeroGPU hardware is allocated here.
	"""
	if audio_filepath is None:
	return "No audio recorded.", None

	try:
	# Load Whisper model (small for better speed on ZeroGPU)
	# Use device="cuda" to force GPU usage inside this decorated function
	model = whisper.load_model("small", device="cuda") # or "base" if even faster needed

	# Transcribe the audio file
	result = model.transcribe(audio_filepath, language="en")
	text = result["text"].strip()

	if not text:
	return "No speech detected in the recording.", None

	# Generate ASL video from text
	video_path = get_sign_language(text)

	return f"Transcribed: \"{text}\"", video_path

	except Exception as e:
	return f"Error: {str(e)}", None

	# ── Gradio UI ────────────────────────────────────────────────────────────────
	with gr.Blocks(title="Speech → ASL Avatar Translator") as demo:
	gr.Markdown("""
	# Speech to ASL Avatar (ZeroGPU)
	1. Record your voice using the microphone below
	2. Click Translate
	3. Whisper transcribes → Sign-Speak generates ASL signing video
	""")

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone", "upload"], # added upload fallback
	type="filepath",
	label="Speak here (click record) or upload audio",
	format="wav"
	)
	submit_btn = gr.Button("Sign Translate", variant="primary")

	transcript_output = gr.Textbox(label="Transcribed Text / Status", lines=3)
	video_output = gr.Video(label="ASL Avatar Signing Video", autoplay=True)

	# Wire up the button
	submit_btn.click(
	fn=transcribe_and_translate,
	inputs=audio_input,
	outputs=[transcript_output, video_output]
	)

	# Launch (HF Spaces ignores server_name/port)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	debug=False,
	ssr_mode=False # disable experimental SSR to avoid proxy issues
	)