VoiceAssistant / app.py
sumittech's picture
Create app.py
c5ff6ac verified
import gradio as gr
import google.generativeai as genai
import whisper
import requests
import subprocess
import os
import time
import tempfile
from base64 import b64decode
# ── Load API keys from HF Secrets ─────────────────────────────────
MURF_API_KEY = os.environ.get("MURF_API_KEY", "")
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
# ── Configure Gemini ───────────────────────────────────────────────
genai.configure(api_key=GEMINI_API_KEY)
SYSTEM_PROMPT = """
You are ARIA, a warm, witty, smart voice assistant.
STRICT RULES:
1. Reply in MAXIMUM 2 short sentences β€” spoken aloud
2. Sound 100% human β€” contractions, casual words
3. Be expressive β€” curious, funny, empathetic
4. NEVER say 'As an AI' β€” talk like a real person
5. Remember everything in this conversation
6. End with a natural hook to keep conversation flowing
"""
gemini_model = genai.GenerativeModel(
model_name="gemini-1.5-flash",
system_instruction=SYSTEM_PROMPT
)
chat_session = gemini_model.start_chat(history=[])
MURF_VOICE_ID = "en-US-natalie"
# ── Load Whisper once at startup ───────────────────────────────────
print("Loading Whisper tiny.en...")
whisper_model = whisper.load_model("tiny.en")
print("Whisper ready!")
def ask_gemini(text: str, retries: int = 4) -> str:
for attempt in range(retries):
try:
return chat_session.send_message(text).text.strip()
except Exception as e:
if "429" in str(e):
wait = 40
for tok in str(e).split():
try:
v = float(tok.rstrip("s,."))
if 1 < v < 300:
wait = int(v) + 5
break
except:
pass
if attempt < retries - 1:
time.sleep(wait)
else:
raise RuntimeError("Rate limited β€” try again in a minute")
else:
raise RuntimeError(str(e)[:150])
def speak_murf(text: str) -> str:
r = requests.post(
"https://api.murf.ai/v1/speech/generate",
json={
"text": text,
"voiceId": MURF_VOICE_ID,
"style": "Conversational",
"format": "mp3",
"sampleRate": 24000,
"speed": -5,
},
headers={"api-key": MURF_API_KEY, "Content-Type": "application/json"},
timeout=20
)
if r.status_code != 200:
raise RuntimeError(f"Murf error {r.status_code}: {r.text[:150]}")
audio_url = r.json().get("audioFile", "")
audio_bytes = requests.get(audio_url, timeout=20).content
# Save to temp file for Gradio to serve
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tmp.write(audio_bytes)
tmp.close()
return tmp.name
def process_audio(audio_path):
"""Main pipeline: audio file β†’ text β†’ Gemini β†’ Murf β†’ audio"""
if audio_path is None:
return None, "⚠️ No audio received. Please record something.", ""
try:
# ── Step 1: Convert to WAV ─────────────────────────────────
wav_path = "/tmp/aria_input.wav"
subprocess.run(
["ffmpeg", "-y", "-i", audio_path,
"-ar", "16000", "-ac", "1",
"-t", "8", wav_path],
capture_output=True, check=True
)
# ── Step 2: Whisper STT ────────────────────────────────────
result = whisper_model.transcribe(
wav_path,
fp16=False,
language="en",
task="transcribe"
)
user_text = result["text"].strip()
if not user_text:
return None, "⚠️ Couldn't hear anything. Speak louder!", ""
# ── Step 3: Gemini ─────────────────────────────────────────
reply = ask_gemini(user_text)
# ── Step 4: Murf TTS ───────────────────────────────────────
audio_out = speak_murf(reply)
return audio_out, f"πŸ§‘ You: {user_text}", f"πŸ€– ARIA: {reply}"
except Exception as e:
return None, f"❌ Error: {str(e)}", ""
# ── Gradio UI ──────────────────────────────────────────────────────
css = """
#component-0 { max-width: 600px; margin: auto; }
.title { text-align: center; }
footer { display: none !important; }
"""
with gr.Blocks(
title="🎀 ARIA β€” Voice Assistant",
css=css,
theme=gr.themes.Soft(
primary_hue="violet",
neutral_hue="slate"
)
) as demo:
gr.HTML("""
<div style="text-align:center; padding: 20px 0 10px">
<h1 style="font-size:2.2rem; font-weight:700; margin:0">🎀 ARIA</h1>
<p style="color:#888; margin:4px 0 0; font-size:0.95rem; letter-spacing:2px">
AI VOICE ASSISTANT Β· Gemini + Murf + Whisper
</p>
</div>
""")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="πŸŽ™οΈ Press & Hold to Record",
show_download_button=False
)
submit_btn = gr.Button(
"⚑ Send to ARIA",
variant="primary",
size="lg"
)
with gr.Column():
audio_output = gr.Audio(
label="πŸ”Š ARIA's Voice Response",
autoplay=True,
show_download_button=False
)
user_text_box = gr.Textbox(
label="πŸ“ You said",
interactive=False,
lines=2
)
aria_text_box = gr.Textbox(
label="πŸ€– ARIA replied",
interactive=False,
lines=3
)
submit_btn.click(
fn=process_audio,
inputs=[audio_input],
outputs=[audio_output, user_text_box, aria_text_box]
)
# Also auto-submit when recording stops
audio_input.stop_recording(
fn=process_audio,
inputs=[audio_input],
outputs=[audio_output, user_text_box, aria_text_box]
)
gr.HTML("""
<div style="text-align:center; color:#666; font-size:0.8rem; padding:16px 0 0">
gemini-1.5-flash Β· whisper tiny.en Β· murf conversational
</div>
""")
demo.launch()
```
---
## πŸ“„ File 2 β€” `requirements.txt`
```
google-generativeai
openai-whisper
requests
gradio
ffmpeg-python
```
---
## 🌐 Deploy Steps on Hugging Face
### Step 1 β€” Create account
Go to **https://huggingface.co** β†’ Sign Up (free)
### Step 2 β€” Create new Space
```
huggingface.co/spaces β†’ + New Space
Fill in:
Space name: aria-voice-assistant
SDK: Gradio ← important!
Hardware: CPU Basic (free)
Visibility: Public or Private
```
### Step 3 β€” Add Secret API Keys
```
Space page β†’ Settings tab β†’ Variables and Secrets
Add secret: MURF_API_KEY = your_murf_key
Add secret: GEMINI_API_KEY = your_gemini_key
```
> βœ… Secrets are encrypted β€” nobody can see them, not even you after saving
### Step 4 β€” Upload files
```
Space page β†’ Files tab β†’ + Add File
Upload: app.py
Upload: requirements.txt
```
### Step 5 β€” Wait for build (~3 min)
```
Space auto-builds β†’ watch the logs
When you see "Running on public URL" β†’ it's live!
```
### Step 6 β€” Your live URL
```
https://huggingface.co/spaces/YOUR_USERNAME/aria-voice-assistant