File size: 6,356 Bytes
92ddce4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# Add references
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
import gradio as gr
from dotenv import load_dotenv
import requests
import os
import tempfile
import base64

# Placeholder for your summarization function.
# Replace this with your actual function that takes a WAV file path and returns the summary.
def summarize_audio(audio_data,sysprompt,userprompt):
    # Code to summarize the audio file using LLM and Azure OpenAI

    try: 

            # Get configuration settings 
            load_dotenv()
            project_endpoint = os.getenv("AC_PROJECT_ENDPOINT")
            model_deployment =  os.getenv("AC_MODEL_DEPLOYMENT")

            # Initialize the project client
            project_client = AIProjectClient(            
                credential=DefaultAzureCredential(
                    exclude_environment_credential=True,
                    exclude_managed_identity_credential=True
                ),
                endpoint=project_endpoint,
            )


            # Get a chat client
            openai_client = project_client.get_openai_client(api_version="2024-10-21")
            

            # Initialize prompts
            if sysprompt:
                system_message = sysprompt
            else:
                system_message = "You are an AI assistant with a charter to clearly analyse the customer enquiry."
            
            prompt = ""

            # Loop until the user types 'quit'
            while True:
                #prompt = input("\nAsk a question about the audio\n(or type 'quit' to exit)\n")
                if userprompt:
                    prompt = userprompt
                else:
                    prompt = "quit"
                if prompt.lower() == "quit":
                    break
                elif len(prompt) == 0:
                        print("Please enter a question.\n")
                else:
                    print("Getting a response ...\n")

                    # Encode the audio file
                    #audio_data = encode_audio(wav_path)

                    # Get a response to audio input
                    response = openai_client.chat.completions.create(
                        model=model_deployment,
                        messages=[
                            {"role": "system", "content": system_message},
                            { "role": "user",
                                "content": [
                                { 
                                    "type": "text",
                                    "text": prompt
                                },
                                {
                                    "type": "input_audio",
                                    "input_audio": {
                                        "data": audio_data,
                                        "format": "mp3"
                                    }
                                }
                            ] }
                        ]
                    )
                    print(response.choices[0].message.content)
                    userprompt = ""
                
    except Exception as ex:
            print(ex)
    return response.choices[0].message.content

def encode_audio(audio_file,action):
        """Encode audio files in the specified folder to base64."""
        try:
                if action == "Read":
                    with open(audio_file, 'rb') as audio_file:
                        audio_data = base64.b64encode(audio_file.read()).decode('utf-8')
                    return audio_data
                elif action == "Download":
                     audio_data = base64.b64encode(audio_file).decode('utf-8')
                     return audio_data
                     
        except Exception as e:
            raise ValueError(f"Failed to encode audio file: {str(e)}")

def download_wav_from_url(url):
    if not url:
        return None
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        return response.content
    except Exception as e:
        raise ValueError(f"Failed to download WAV from URL: {str(e)}")

def process_audio(upload_audio, record_audio, url,sysprompt,userprompt):
    wav_path = None
    temp_files = []  # To clean up temp files later if needed
    
    if upload_audio:
        wav_path = upload_audio
        audio_data = encode_audio(wav_path,"Read")
    elif record_audio:
        wav_path = record_audio
        audio_data = encode_audio(wav_path,"Read")
    elif url:
        wav_path = download_wav_from_url(url)
        audio_data = encode_audio(wav_path,"Download")
        if audio_data:
            temp_files.append(audio_data)
    
    if not wav_path:
        return "Please provide an audio file via upload, recording, or URL."
    
    try:
        summary = summarize_audio(audio_data,sysprompt,userprompt)
        return summary
    finally:
        # Optional: Clean up temp files
        for temp in temp_files:
            if os.path.exists(temp):
                os.remove(temp)

with gr.Blocks(title="Audio Summarizer UI") as demo:
    gr.Markdown("# Audio File Summarizer")
    gr.Markdown("Upload a WAV file, record audio, or provide a URL to a WAV file for summarization.")
    
    with gr.Row():
        with gr.Column():
            upload_audio = gr.Audio(sources="upload", type="filepath", label="Upload WAV File")
        with gr.Column():
            record_audio = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
        with gr.Column():
            url_input = gr.Textbox(label="Enter URL to WAV File", placeholder="https://example.com/audio.wav")
        with gr.Column():
            userprompt_input = gr.Textbox(label="Enter User Prompt", placeholder="Ask a question about the audio",value="Summarize the audio content")
        with gr.Column():
            sysprompt_input = gr.Textbox(label="Enter System Prompt",value="You are an AI assistant with a listening charter to clearly analyse the customer enquiry.")
    
    submit_btn = gr.Button("Summarize")
    output = gr.Textbox(label="Summary", lines=10)
    
    submit_btn.click(
        fn=process_audio,
        inputs=[upload_audio, record_audio, url_input,sysprompt_input,userprompt_input],
        outputs=output
    )

demo.launch()