import gradio as gr import google.generativeai as genai import whisper import requests import subprocess import os import time import tempfile from base64 import b64decode # ── Load API keys from HF Secrets ───────────────────────────────── MURF_API_KEY = os.environ.get("MURF_API_KEY", "") GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "") # ── Configure Gemini ─────────────────────────────────────────────── genai.configure(api_key=GEMINI_API_KEY) SYSTEM_PROMPT = """ You are ARIA, a warm, witty, smart voice assistant. STRICT RULES: 1. Reply in MAXIMUM 2 short sentences — spoken aloud 2. Sound 100% human — contractions, casual words 3. Be expressive — curious, funny, empathetic 4. NEVER say 'As an AI' — talk like a real person 5. Remember everything in this conversation 6. End with a natural hook to keep conversation flowing """ gemini_model = genai.GenerativeModel( model_name="gemini-1.5-flash", system_instruction=SYSTEM_PROMPT ) chat_session = gemini_model.start_chat(history=[]) MURF_VOICE_ID = "en-US-natalie" # ── Load Whisper once at startup ─────────────────────────────────── print("Loading Whisper tiny.en...") whisper_model = whisper.load_model("tiny.en") print("Whisper ready!") def ask_gemini(text: str, retries: int = 4) -> str: for attempt in range(retries): try: return chat_session.send_message(text).text.strip() except Exception as e: if "429" in str(e): wait = 40 for tok in str(e).split(): try: v = float(tok.rstrip("s,.")) if 1 < v < 300: wait = int(v) + 5 break except: pass if attempt < retries - 1: time.sleep(wait) else: raise RuntimeError("Rate limited — try again in a minute") else: raise RuntimeError(str(e)[:150]) def speak_murf(text: str) -> str: r = requests.post( "https://api.murf.ai/v1/speech/generate", json={ "text": text, "voiceId": MURF_VOICE_ID, "style": "Conversational", "format": "mp3", "sampleRate": 24000, "speed": -5, }, headers={"api-key": MURF_API_KEY, "Content-Type": "application/json"}, timeout=20 ) if r.status_code != 200: raise RuntimeError(f"Murf error {r.status_code}: {r.text[:150]}") audio_url = r.json().get("audioFile", "") audio_bytes = requests.get(audio_url, timeout=20).content # Save to temp file for Gradio to serve tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tmp.write(audio_bytes) tmp.close() return tmp.name def process_audio(audio_path): """Main pipeline: audio file → text → Gemini → Murf → audio""" if audio_path is None: return None, "⚠️ No audio received. Please record something.", "" try: # ── Step 1: Convert to WAV ───────────────────────────────── wav_path = "/tmp/aria_input.wav" subprocess.run( ["ffmpeg", "-y", "-i", audio_path, "-ar", "16000", "-ac", "1", "-t", "8", wav_path], capture_output=True, check=True ) # ── Step 2: Whisper STT ──────────────────────────────────── result = whisper_model.transcribe( wav_path, fp16=False, language="en", task="transcribe" ) user_text = result["text"].strip() if not user_text: return None, "⚠️ Couldn't hear anything. Speak louder!", "" # ── Step 3: Gemini ───────────────────────────────────────── reply = ask_gemini(user_text) # ── Step 4: Murf TTS ─────────────────────────────────────── audio_out = speak_murf(reply) return audio_out, f"🧑 You: {user_text}", f"🤖 ARIA: {reply}" except Exception as e: return None, f"❌ Error: {str(e)}", "" # ── Gradio UI ────────────────────────────────────────────────────── css = """ #component-0 { max-width: 600px; margin: auto; } .title { text-align: center; } footer { display: none !important; } """ with gr.Blocks( title="🎤 ARIA — Voice Assistant", css=css, theme=gr.themes.Soft( primary_hue="violet", neutral_hue="slate" ) ) as demo: gr.HTML("""

🎤 ARIA

AI VOICE ASSISTANT · Gemini + Murf + Whisper

""") with gr.Row(): with gr.Column(): audio_input = gr.Audio( sources=["microphone"], type="filepath", label="🎙️ Press & Hold to Record", show_download_button=False ) submit_btn = gr.Button( "⚡ Send to ARIA", variant="primary", size="lg" ) with gr.Column(): audio_output = gr.Audio( label="🔊 ARIA's Voice Response", autoplay=True, show_download_button=False ) user_text_box = gr.Textbox( label="📝 You said", interactive=False, lines=2 ) aria_text_box = gr.Textbox( label="🤖 ARIA replied", interactive=False, lines=3 ) submit_btn.click( fn=process_audio, inputs=[audio_input], outputs=[audio_output, user_text_box, aria_text_box] ) # Also auto-submit when recording stops audio_input.stop_recording( fn=process_audio, inputs=[audio_input], outputs=[audio_output, user_text_box, aria_text_box] ) gr.HTML("""
gemini-1.5-flash · whisper tiny.en · murf conversational
""") demo.launch() ``` --- ## 📄 File 2 — `requirements.txt` ``` google-generativeai openai-whisper requests gradio ffmpeg-python ``` --- ## 🌐 Deploy Steps on Hugging Face ### Step 1 — Create account Go to **https://huggingface.co** → Sign Up (free) ### Step 2 — Create new Space ``` huggingface.co/spaces → + New Space Fill in: Space name: aria-voice-assistant SDK: Gradio ← important! Hardware: CPU Basic (free) Visibility: Public or Private ``` ### Step 3 — Add Secret API Keys ``` Space page → Settings tab → Variables and Secrets Add secret: MURF_API_KEY = your_murf_key Add secret: GEMINI_API_KEY = your_gemini_key ``` > ✅ Secrets are encrypted — nobody can see them, not even you after saving ### Step 4 — Upload files ``` Space page → Files tab → + Add File Upload: app.py Upload: requirements.txt ``` ### Step 5 — Wait for build (~3 min) ``` Space auto-builds → watch the logs When you see "Running on public URL" → it's live! ``` ### Step 6 — Your live URL ``` https://huggingface.co/spaces/YOUR_USERNAME/aria-voice-assistant