Spaces:

sumittech
/

VoiceAssistant

Runtime error

App Files Files Community

VoiceAssistant / app.py

sumittech

Create app.py

c5ff6ac verified 5 days ago

raw

history blame contribute delete

8.1 kB

	import gradio as gr
	import google.generativeai as genai
	import whisper
	import requests
	import subprocess
	import os
	import time
	import tempfile
	from base64 import b64decode

	# ── Load API keys from HF Secrets ─────────────────────────────────
	MURF_API_KEY = os.environ.get("MURF_API_KEY", "")
	GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")

	# ── Configure Gemini ───────────────────────────────────────────────
	genai.configure(api_key=GEMINI_API_KEY)

	SYSTEM_PROMPT = """
	You are ARIA, a warm, witty, smart voice assistant.
	STRICT RULES:
	1. Reply in MAXIMUM 2 short sentences — spoken aloud
	2. Sound 100% human — contractions, casual words
	3. Be expressive — curious, funny, empathetic
	4. NEVER say 'As an AI' — talk like a real person
	5. Remember everything in this conversation
	6. End with a natural hook to keep conversation flowing
	"""

	gemini_model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	system_instruction=SYSTEM_PROMPT
	)
	chat_session = gemini_model.start_chat(history=[])

	MURF_VOICE_ID = "en-US-natalie"

	# ── Load Whisper once at startup ───────────────────────────────────
	print("Loading Whisper tiny.en...")
	whisper_model = whisper.load_model("tiny.en")
	print("Whisper ready!")


	def ask_gemini(text: str, retries: int = 4) -> str:
	for attempt in range(retries):
	try:
	return chat_session.send_message(text).text.strip()
	except Exception as e:
	if "429" in str(e):
	wait = 40
	for tok in str(e).split():
	try:
	v = float(tok.rstrip("s,."))
	if 1 < v < 300:
	wait = int(v) + 5
	break
	except:
	pass
	if attempt < retries - 1:
	time.sleep(wait)
	else:
	raise RuntimeError("Rate limited — try again in a minute")
	else:
	raise RuntimeError(str(e)[:150])


	def speak_murf(text: str) -> str:
	r = requests.post(
	"https://api.murf.ai/v1/speech/generate",
	json={
	"text": text,
	"voiceId": MURF_VOICE_ID,
	"style": "Conversational",
	"format": "mp3",
	"sampleRate": 24000,
	"speed": -5,
	},
	headers={"api-key": MURF_API_KEY, "Content-Type": "application/json"},
	timeout=20
	)
	if r.status_code != 200:
	raise RuntimeError(f"Murf error {r.status_code}: {r.text[:150]}")
	audio_url = r.json().get("audioFile", "")
	audio_bytes = requests.get(audio_url, timeout=20).content

	# Save to temp file for Gradio to serve
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tmp.write(audio_bytes)
	tmp.close()
	return tmp.name


	def process_audio(audio_path):
	"""Main pipeline: audio file → text → Gemini → Murf → audio"""

	if audio_path is None:
	return None, "⚠️ No audio received. Please record something.", ""

	try:
	# ── Step 1: Convert to WAV ─────────────────────────────────
	wav_path = "/tmp/aria_input.wav"
	subprocess.run(
	["ffmpeg", "-y", "-i", audio_path,
	"-ar", "16000", "-ac", "1",
	"-t", "8", wav_path],
	capture_output=True, check=True
	)

	# ── Step 2: Whisper STT ────────────────────────────────────
	result = whisper_model.transcribe(
	wav_path,
	fp16=False,
	language="en",
	task="transcribe"
	)
	user_text = result["text"].strip()
	if not user_text:
	return None, "⚠️ Couldn't hear anything. Speak louder!", ""

	# ── Step 3: Gemini ─────────────────────────────────────────
	reply = ask_gemini(user_text)

	# ── Step 4: Murf TTS ───────────────────────────────────────
	audio_out = speak_murf(reply)

	return audio_out, f"🧑 You: {user_text}", f"🤖 ARIA: {reply}"

	except Exception as e:
	return None, f"❌ Error: {str(e)}", ""


	# ── Gradio UI ──────────────────────────────────────────────────────
	css = """
	#component-0 { max-width: 600px; margin: auto; }
	.title { text-align: center; }
	footer { display: none !important; }
	"""

	with gr.Blocks(
	title="🎤 ARIA — Voice Assistant",
	css=css,
	theme=gr.themes.Soft(
	primary_hue="violet",
	neutral_hue="slate"
	)
	) as demo:

	gr.HTML("""
	<div style="text-align:center; padding: 20px 0 10px">
	<h1 style="font-size:2.2rem; font-weight:700; margin:0">🎤 ARIA</h1>
	<p style="color:#888; margin:4px 0 0; font-size:0.95rem; letter-spacing:2px">
	AI VOICE ASSISTANT · Gemini + Murf + Whisper
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="🎙️ Press & Hold to Record",
	show_download_button=False
	)
	submit_btn = gr.Button(
	"⚡ Send to ARIA",
	variant="primary",
	size="lg"
	)

	with gr.Column():
	audio_output = gr.Audio(
	label="🔊 ARIA's Voice Response",
	autoplay=True,
	show_download_button=False
	)
	user_text_box = gr.Textbox(
	label="📝 You said",
	interactive=False,
	lines=2
	)
	aria_text_box = gr.Textbox(
	label="🤖 ARIA replied",
	interactive=False,
	lines=3
	)

	submit_btn.click(
	fn=process_audio,
	inputs=[audio_input],
	outputs=[audio_output, user_text_box, aria_text_box]
	)

	# Also auto-submit when recording stops
	audio_input.stop_recording(
	fn=process_audio,
	inputs=[audio_input],
	outputs=[audio_output, user_text_box, aria_text_box]
	)

	gr.HTML("""
	<div style="text-align:center; color:#666; font-size:0.8rem; padding:16px 0 0">
	gemini-1.5-flash · whisper tiny.en · murf conversational
	</div>
	""")

	demo.launch()
	```

	---

	## 📄 File 2 — `requirements.txt`
	```
	google-generativeai
	openai-whisper
	requests
	gradio
	ffmpeg-python
	```

	---

	## 🌐 Deploy Steps on Hugging Face

	### Step 1 — Create account
	Go to https://huggingface.co → Sign Up (free)

	### Step 2 — Create new Space
	```
	huggingface.co/spaces → + New Space

	Fill in:
	Space name: aria-voice-assistant
	SDK: Gradio ← important!
	Hardware: CPU Basic (free)
	Visibility: Public or Private
	```

	### Step 3 — Add Secret API Keys
	```
	Space page → Settings tab → Variables and Secrets

	Add secret: MURF_API_KEY = your_murf_key
	Add secret: GEMINI_API_KEY = your_gemini_key
	```
	> ✅ Secrets are encrypted — nobody can see them, not even you after saving

	### Step 4 — Upload files
	```
	Space page → Files tab → + Add File

	Upload: app.py
	Upload: requirements.txt
	```

	### Step 5 — Wait for build (~3 min)
	```
	Space auto-builds → watch the logs
	When you see "Running on public URL" → it's live!
	```

	### Step 6 — Your live URL
	```
	https://huggingface.co/spaces/YOUR_USERNAME/aria-voice-assistant