Stack-2-9-finetuned / stack /voice /stack_voice_integration.py
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
import requests
from typing import Optional, Union
import io
import json
from voice_client import VoiceClient
class StackWithVoice:
def __init__(self, stack_api_url: str, voice_api_url: str = "http://localhost:8000"):
self.stack_api_url = stack_api_url
self.voice_client = VoiceClient(voice_api_url)
self.session = requests.Session()
# Cache for voice models to avoid repeated API calls
self._voice_cache = {}
def _get_stack_response(self, prompt: str) -> str:
"""Get response from Stack 2.9 API"""
try:
response = self.session.post(
f"{self.stack_api_url}/api/chat",
json={"prompt": prompt, "model": "stack-2.9"},
headers={"Content-Type": "application/json"}
)
response.raise_for_status()
data = response.json()
return data.get("response", "")
except requests.RequestException as e:
raise Exception(f"Stack API request failed: {str(e)}")
def _get_voice_model(self, voice_name: str) -> Optional[dict]:
"""Get voice model info from cache or API"""
if voice_name in self._voice_cache:
return self._voice_cache[voice_name]
try:
voices = self.voice_client.list_voices()
for voice in voices:
if voice == voice_name:
self._voice_cache[voice_name] = {"name": voice_name}
return {"name": voice_name}
return None
except Exception as e:
print(f"Warning: Failed to get voice models: {e}")
return None
def voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> Optional[bytes]:
"""Complete voice chat workflow: audio → text → response → audio"""
# Step 1: Convert audio to text (placeholder - in real implementation, use speech-to-text)
print(f"Converting audio to text: {prompt_audio_path}")
prompt_text = self._audio_to_text(prompt_audio_path)
if not prompt_text:
return None
print(f"User prompt: {prompt_text}")
# Step 2: Get response from Stack 2.9
print("Getting response from Stack 2.9...")
response_text = self._get_stack_response(prompt_text)
if not response_text:
return None
print(f"Stack response: {response_text}")
# Step 3: Convert response to audio
print(f"Generating voice response with voice: {voice_name}")
audio_data = self.voice_client.synthesize(response_text, voice_name)
return audio_data
def _audio_to_text(self, audio_path: str) -> str:
"""Convert audio to text (placeholder implementation)"""
# In a real implementation, you would use a speech-to-text service
# For now, return a placeholder or read from a text file with the same name
text_path = audio_path.replace(".wav", ".txt").replace(".mp3", ".txt")
if os.path.exists(text_path):
with open(text_path, 'r') as f:
return f.read().strip()
# Fallback: return a generic prompt
return "This is a test voice prompt."
def voice_command(self, command: str, voice_name: str = "default") -> Optional[bytes]:
"""Execute voice command and get spoken response"""
print(f"Executing voice command: {command}")
# In a real implementation, you would parse the command and execute appropriate actions
# For now, just pass it to Stack 2.9 as-is
response_text = self._get_stack_response(command)
if not response_text:
return None
print(f"Command response: {response_text}")
# Generate voice response
audio_data = self.voice_client.synthesize(response_text, voice_name)
return audio_data
def streaming_voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> None:
"""Stream voice chat (placeholder implementation)"""
print("Starting streaming voice chat...")
# Get initial response
prompt_text = self._audio_to_text(prompt_audio_path)
response_text = self._get_stack_response(prompt_text)
if not response_text:
print("No response received")
return
print("Streaming response:")
print(response_text)
# In a real streaming implementation, you would:
# 1. Stream audio chunks to speech-to-text
# 2. Send partial prompts to Stack 2.9
# 3. Stream partial responses to TTS
# 4. Play audio as it's generated
# For now, just generate the complete response
audio_data = self.voice_client.synthesize(response_text, voice_name, stream=True)
# Save to file for demonstration
output_path = "./streaming_response.wav"
self.voice_client.download_audio(audio_data, output_path)
print(f"Streaming response saved to: {output_path}")
# Example usage
if __name__ == "__main__":
stack_voice = StackWithVoice(
stack_api_url="http://localhost:5000", # Example Stack 2.9 API URL
voice_api_url="http://localhost:8000"
)
print("Testing Stack with Voice integration...")
# Test voice chat
# audio_data = stack_voice.voice_chat("test_prompt.wav", "default")
# if audio_data:
# stack_voice.voice_client.download_audio(audio_data, "stack_response.wav")
# print("Voice chat response saved to stack_response.wav")
# Test voice command
# audio_data = stack_voice.voice_command("Write a Python function to calculate factorial", "default")
# if audio_data:
# stack_voice.voice_client.download_audio(audio_data, "command_response.wav")
# print("Voice command response saved to command_response.wav")
# Test streaming
# stack_voice.streaming_voice_chat("test_prompt.wav", "default")