Spaces:
Running on Zero
Running on Zero
| # app.py - Speech to ASL Avatar on Hugging Face Spaces (ZeroGPU compatible) | |
| import gradio as gr | |
| import whisper | |
| import requests | |
| import tempfile | |
| import os | |
| from spaces import GPU # Required for ZeroGPU hardware | |
| # Load API key from HF Space secrets (Settings β Secrets) | |
| API_KEY = os.environ.get("SIGN_SPEAK_API_KEY") | |
| if not API_KEY: | |
| raise ValueError("SIGN_SPEAK_API_KEY not set in Space secrets!") | |
| BASE_URL = "https://api.sign-speak.com" | |
| PRODUCE_SIGN_URL = f"{BASE_URL}/produce-sign" | |
| def get_sign_language(text: str, request_class="BLOCKING", identity="MALE"): | |
| """ | |
| Calls Sign-Speak API to generate ASL avatar video. | |
| This runs on CPU (external API call), no GPU needed here. | |
| """ | |
| headers = { | |
| "X-api-key": API_KEY, | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "english": text.strip(), | |
| "request_class": request_class.upper(), | |
| "identity": identity.upper(), | |
| # Optional: "model_version": "SLP.2.xs" for faster/smaller model | |
| } | |
| response = requests.post(PRODUCE_SIGN_URL, json=payload, headers=headers) | |
| if response.status_code == 200: | |
| # Save MP4 bytes to temp file (Gradio Video component needs filepath) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp: | |
| tmp.write(response.content) | |
| return tmp.name | |
| elif response.status_code == 202: | |
| data = response.json() | |
| batch_id = data.get("batch_id") | |
| raise ValueError(f"Batch processing started (ID: {batch_id}). Add polling if needed.") | |
| else: | |
| raise ValueError(f"Sign-Speak API error {response.status_code}: {response.text}") | |
| # β ZeroGPU decorator: request GPU for up to 120 seconds | |
| def transcribe_and_translate(audio_filepath): | |
| """ | |
| Heavy function: loads Whisper model and transcribes audio. | |
| Marked with @GPU so ZeroGPU hardware is allocated here. | |
| """ | |
| if audio_filepath is None: | |
| return "No audio recorded.", None | |
| try: | |
| # Load Whisper model (small for better speed on ZeroGPU) | |
| # Use device="cuda" to force GPU usage inside this decorated function | |
| model = whisper.load_model("small", device="cuda") # or "base" if even faster needed | |
| # Transcribe the audio file | |
| result = model.transcribe(audio_filepath, language="en") | |
| text = result["text"].strip() | |
| if not text: | |
| return "No speech detected in the recording.", None | |
| # Generate ASL video from text | |
| video_path = get_sign_language(text) | |
| return f"Transcribed: \"{text}\"", video_path | |
| except Exception as e: | |
| return f"Error: {str(e)}", None | |
| # ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Speech β ASL Avatar Translator") as demo: | |
| gr.Markdown(""" | |
| # Speech to ASL Avatar (ZeroGPU) | |
| 1. Record your voice using the microphone below | |
| 2. Click **Translate** | |
| 3. Whisper transcribes β Sign-Speak generates ASL signing video | |
| """) | |
| with gr.Row(): | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], # added upload fallback | |
| type="filepath", | |
| label="Speak here (click record) or upload audio", | |
| format="wav" | |
| ) | |
| submit_btn = gr.Button("Sign Translate", variant="primary") | |
| transcript_output = gr.Textbox(label="Transcribed Text / Status", lines=3) | |
| video_output = gr.Video(label="ASL Avatar Signing Video", autoplay=True) | |
| # Wire up the button | |
| submit_btn.click( | |
| fn=transcribe_and_translate, | |
| inputs=audio_input, | |
| outputs=[transcript_output, video_output] | |
| ) | |
| # Launch (HF Spaces ignores server_name/port) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=False, | |
| ssr_mode=False # disable experimental SSR to avoid proxy issues | |
| ) |