import gradio as gr import google.generativeai as genai import whisper import requests import subprocess import os import time import tempfile from base64 import b64decode # ── Load API keys from HF Secrets ───────────────────────────────── MURF_API_KEY = os.environ.get("MURF_API_KEY", "") GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "") # ── Configure Gemini ─────────────────────────────────────────────── genai.configure(api_key=GEMINI_API_KEY) SYSTEM_PROMPT = """ You are ARIA, a warm, witty, smart voice assistant. STRICT RULES: 1. Reply in MAXIMUM 2 short sentences — spoken aloud 2. Sound 100% human — contractions, casual words 3. Be expressive — curious, funny, empathetic 4. NEVER say 'As an AI' — talk like a real person 5. Remember everything in this conversation 6. End with a natural hook to keep conversation flowing """ gemini_model = genai.GenerativeModel( model_name="gemini-1.5-flash", system_instruction=SYSTEM_PROMPT ) chat_session = gemini_model.start_chat(history=[]) MURF_VOICE_ID = "en-US-natalie" # ── Load Whisper once at startup ─────────────────────────────────── print("Loading Whisper tiny.en...") whisper_model = whisper.load_model("tiny.en") print("Whisper ready!") def ask_gemini(text: str, retries: int = 4) -> str: for attempt in range(retries): try: return chat_session.send_message(text).text.strip() except Exception as e: if "429" in str(e): wait = 40 for tok in str(e).split(): try: v = float(tok.rstrip("s,.")) if 1 < v < 300: wait = int(v) + 5 break except: pass if attempt < retries - 1: time.sleep(wait) else: raise RuntimeError("Rate limited — try again in a minute") else: raise RuntimeError(str(e)[:150]) def speak_murf(text: str) -> str: r = requests.post( "https://api.murf.ai/v1/speech/generate", json={ "text": text, "voiceId": MURF_VOICE_ID, "style": "Conversational", "format": "mp3", "sampleRate": 24000, "speed": -5, }, headers={"api-key": MURF_API_KEY, "Content-Type": "application/json"}, timeout=20 ) if r.status_code != 200: raise RuntimeError(f"Murf error {r.status_code}: {r.text[:150]}") audio_url = r.json().get("audioFile", "") audio_bytes = requests.get(audio_url, timeout=20).content # Save to temp file for Gradio to serve tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tmp.write(audio_bytes) tmp.close() return tmp.name def process_audio(audio_path): """Main pipeline: audio file → text → Gemini → Murf → audio""" if audio_path is None: return None, "⚠️ No audio received. Please record something.", "" try: # ── Step 1: Convert to WAV ───────────────────────────────── wav_path = "/tmp/aria_input.wav" subprocess.run( ["ffmpeg", "-y", "-i", audio_path, "-ar", "16000", "-ac", "1", "-t", "8", wav_path], capture_output=True, check=True ) # ── Step 2: Whisper STT ──────────────────────────────────── result = whisper_model.transcribe( wav_path, fp16=False, language="en", task="transcribe" ) user_text = result["text"].strip() if not user_text: return None, "⚠️ Couldn't hear anything. Speak louder!", "" # ── Step 3: Gemini ───────────────────────────────────────── reply = ask_gemini(user_text) # ── Step 4: Murf TTS ─────────────────────────────────────── audio_out = speak_murf(reply) return audio_out, f"🧑 You: {user_text}", f"🤖 ARIA: {reply}" except Exception as e: return None, f"❌ Error: {str(e)}", "" # ── Gradio UI ────────────────────────────────────────────────────── css = """ #component-0 { max-width: 600px; margin: auto; } .title { text-align: center; } footer { display: none !important; } """ with gr.Blocks( title="🎤 ARIA — Voice Assistant", css=css, theme=gr.themes.Soft( primary_hue="violet", neutral_hue="slate" ) ) as demo: gr.HTML("""
AI VOICE ASSISTANT · Gemini + Murf + Whisper