File size: 6,161 Bytes
fcb2b04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import requests
from typing import Optional, Union
import io
import json
from voice_client import VoiceClient

class StackWithVoice:
    def __init__(self, stack_api_url: str, voice_api_url: str = "http://localhost:8000"):
        self.stack_api_url = stack_api_url
        self.voice_client = VoiceClient(voice_api_url)
        self.session = requests.Session()
        
        # Cache for voice models to avoid repeated API calls
        self._voice_cache = {}
    
    def _get_stack_response(self, prompt: str) -> str:
        """Get response from Stack 2.9 API"""
        try:
            response = self.session.post(
                f"{self.stack_api_url}/api/chat",
                json={"prompt": prompt, "model": "stack-2.9"},
                headers={"Content-Type": "application/json"}
            )
            response.raise_for_status()
            
            data = response.json()
            return data.get("response", "")
            
        except requests.RequestException as e:
            raise Exception(f"Stack API request failed: {str(e)}")
    
    def _get_voice_model(self, voice_name: str) -> Optional[dict]:
        """Get voice model info from cache or API"""
        if voice_name in self._voice_cache:
            return self._voice_cache[voice_name]
        
        try:
            voices = self.voice_client.list_voices()
            for voice in voices:
                if voice == voice_name:
                    self._voice_cache[voice_name] = {"name": voice_name}
                    return {"name": voice_name}
            return None
        except Exception as e:
            print(f"Warning: Failed to get voice models: {e}")
            return None
    
    def voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> Optional[bytes]:
        """Complete voice chat workflow: audio → text → response → audio"""
        # Step 1: Convert audio to text (placeholder - in real implementation, use speech-to-text)
        print(f"Converting audio to text: {prompt_audio_path}")
        prompt_text = self._audio_to_text(prompt_audio_path)
        if not prompt_text:
            return None
        
        print(f"User prompt: {prompt_text}")
        
        # Step 2: Get response from Stack 2.9
        print("Getting response from Stack 2.9...")
        response_text = self._get_stack_response(prompt_text)
        
        if not response_text:
            return None
        
        print(f"Stack response: {response_text}")
        
        # Step 3: Convert response to audio
        print(f"Generating voice response with voice: {voice_name}")
        audio_data = self.voice_client.synthesize(response_text, voice_name)
        
        return audio_data
    
    def _audio_to_text(self, audio_path: str) -> str:
        """Convert audio to text (placeholder implementation)"""
        # In a real implementation, you would use a speech-to-text service
        # For now, return a placeholder or read from a text file with the same name
        text_path = audio_path.replace(".wav", ".txt").replace(".mp3", ".txt")
        
        if os.path.exists(text_path):
            with open(text_path, 'r') as f:
                return f.read().strip()
        
        # Fallback: return a generic prompt
        return "This is a test voice prompt."
    
    def voice_command(self, command: str, voice_name: str = "default") -> Optional[bytes]:
        """Execute voice command and get spoken response"""
        print(f"Executing voice command: {command}")
        
        # In a real implementation, you would parse the command and execute appropriate actions
        # For now, just pass it to Stack 2.9 as-is
        response_text = self._get_stack_response(command)
        
        if not response_text:
            return None
        
        print(f"Command response: {response_text}")
        
        # Generate voice response
        audio_data = self.voice_client.synthesize(response_text, voice_name)
        
        return audio_data
    
    def streaming_voice_chat(self, prompt_audio_path: str, voice_name: str = "default") -> None:
        """Stream voice chat (placeholder implementation)"""
        print("Starting streaming voice chat...")
        
        # Get initial response
        prompt_text = self._audio_to_text(prompt_audio_path)
        response_text = self._get_stack_response(prompt_text)
        
        if not response_text:
            print("No response received")
            return
        
        print("Streaming response:")
        print(response_text)
        
        # In a real streaming implementation, you would:
        # 1. Stream audio chunks to speech-to-text
        # 2. Send partial prompts to Stack 2.9
        # 3. Stream partial responses to TTS
        # 4. Play audio as it's generated
        
        # For now, just generate the complete response
        audio_data = self.voice_client.synthesize(response_text, voice_name, stream=True)
        
        # Save to file for demonstration
        output_path = "./streaming_response.wav"
        self.voice_client.download_audio(audio_data, output_path)
        print(f"Streaming response saved to: {output_path}")

# Example usage
if __name__ == "__main__":
    stack_voice = StackWithVoice(
        stack_api_url="http://localhost:5000",  # Example Stack 2.9 API URL
        voice_api_url="http://localhost:8000"
    )
    
    print("Testing Stack with Voice integration...")
    
    # Test voice chat
    # audio_data = stack_voice.voice_chat("test_prompt.wav", "default")
    # if audio_data:
    #     stack_voice.voice_client.download_audio(audio_data, "stack_response.wav")
    #     print("Voice chat response saved to stack_response.wav")
    
    # Test voice command
    # audio_data = stack_voice.voice_command("Write a Python function to calculate factorial", "default")
    # if audio_data:
    #     stack_voice.voice_client.download_audio(audio_data, "command_response.wav")
    #     print("Voice command response saved to command_response.wav")
    
    # Test streaming
    # stack_voice.streaming_voice_chat("test_prompt.wav", "default")