File size: 6,161 Bytes

fcb2b04

import requests
from typing import Optional, Union
import io
import json
from voice_client import VoiceClient

class StackWithVoice:
    def __init__(self, stack_api_url: str, voice_api_url: str = "http://localhost:8000"):
        self.stack_api_url = stack_api_url
        self.voice_client = VoiceClient(voice_api_url)
        self.session = requests.Session()
        
        # Cache for voice models to avoid repeated API calls
        self._voice_cache = {}
    
    def _get_stack_response(self, prompt: str) -> str:
        """Get response from Stack 2.9 API"""
        try:
            response = self.session.post(
                f"{self.stack_api_url}/api/chat",
                json={"prompt": prompt, "model": "stack-2.9"},
                headers={"Content-Type": "application/json"}
            )
            response.raise_for_status()
            
            data = response.json()
            return data.get("response", "")
            
        except requests.RequestException as e:
            raise Exception(f"Stack API request failed: {str(e)}")
    
    def _get_voice_model(self, voice_name: str) -> Optional[dict]:
        """Get voice model info from cache or API"""
        if voice_name in self._voice_cache:
            return self._voice_cache[voice_name]
        
        try:
            voices = self.voice_client.list_voices()
            for voice in voices:
                if voice == voice_name:
                    self._voice_cache[voice_name] = {"name": voice_name}
                    return {"name": voice_name}
            return None
        except Exception as e:
            print(f"Warning: Failed to get voice models: {e}")
            return None
    
    def voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> Optional[bytes]:
        """Complete voice chat workflow: audio → text → response → audio"""
        # Step 1: Convert audio to text (placeholder - in real implementation, use speech-to-text)
        print(f"Converting audio to text: {prompt_audio_path}")
        prompt_text = self._audio_to_text(prompt_audio_path)
        if not prompt_text:
            return None
        
        print(f"User prompt: {prompt_text}")
        
        # Step 2: Get response from Stack 2.9
        print("Getting response from Stack 2.9...")
        response_text = self._get_stack_response(prompt_text)
        
        if not response_text:
            return None
        
        print(f"Stack response: {response_text}")
        
        # Step 3: Convert response to audio
        print(f"Generating voice response with voice: {voice_name}")
        audio_data = self.voice_client.synthesize(response_text, voice_name)
        
        return audio_data
    
    def _audio_to_text(self, audio_path: str) -> str:
        """Convert audio to text (placeholder implementation)"""
        # In a real implementation, you would use a speech-to-text service
        # For now, return a placeholder or read from a text file with the same name
        text_path = audio_path.replace(".wav", ".txt").replace(".mp3", ".txt")
        
        if os.path.exists(text_path):
            with open(text_path, 'r') as f:
                return f.read().strip()
        
        # Fallback: return a generic prompt
        return "This is a test voice prompt."
    
    def voice_command(self, command: str, voice_name: str = "default") -> Optional[bytes]:
        """Execute voice command and get spoken response"""
        print(f"Executing voice command: {command}")
        
        # In a real implementation, you would parse the command and execute appropriate actions
        # For now, just pass it to Stack 2.9 as-is
        response_text = self._get_stack_response(command)
        
        if not response_text:
            return None
        
        print(f"Command response: {response_text}")
        
        # Generate voice response
        audio_data = self.voice_client.synthesize(response_text, voice_name)
        
        return audio_data
    
    def streaming_voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> None:
        """Stream voice chat (placeholder implementation)"""
        print("Starting streaming voice chat...")
        
        # Get initial response
        prompt_text = self._audio_to_text(prompt_audio_path)
        response_text = self._get_stack_response(prompt_text)
        
        if not response_text:
            print("No response received")
            return
        
        print("Streaming response:")
        print(response_text)
        
        # In a real streaming implementation, you would:
        # 1. Stream audio chunks to speech-to-text
        # 2. Send partial prompts to Stack 2.9
        # 3. Stream partial responses to TTS
        # 4. Play audio as it's generated
        
        # For now, just generate the complete response
        audio_data = self.voice_client.synthesize(response_text, voice_name, stream=True)
        
        # Save to file for demonstration
        output_path = "./streaming_response.wav"
        self.voice_client.download_audio(audio_data, output_path)
        print(f"Streaming response saved to: {output_path}")

# Example usage
if __name__ == "__main__":
    stack_voice = StackWithVoice(
        stack_api_url="http://localhost:5000",  # Example Stack 2.9 API URL
        voice_api_url="http://localhost:8000"
    )
    
    print("Testing Stack with Voice integration...")
    
    # Test voice chat
    # audio_data = stack_voice.voice_chat("test_prompt.wav", "default")
    # if audio_data:
    #     stack_voice.voice_client.download_audio(audio_data, "stack_response.wav")
    #     print("Voice chat response saved to stack_response.wav")
    
    # Test voice command
    # audio_data = stack_voice.voice_command("Write a Python function to calculate factorial", "default")
    # if audio_data:
    #     stack_voice.voice_client.download_audio(audio_data, "command_response.wav")
    #     print("Voice command response saved to command_response.wav")
    
    # Test streaming
    # stack_voice.streaming_voice_chat("test_prompt.wav", "default")