Spaces:
Running
Running
File size: 6,356 Bytes
92ddce4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | # Add references
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
import gradio as gr
from dotenv import load_dotenv
import requests
import os
import tempfile
import base64
# Placeholder for your summarization function.
# Replace this with your actual function that takes a WAV file path and returns the summary.
def summarize_audio(audio_data,sysprompt,userprompt):
# Code to summarize the audio file using LLM and Azure OpenAI
try:
# Get configuration settings
load_dotenv()
project_endpoint = os.getenv("AC_PROJECT_ENDPOINT")
model_deployment = os.getenv("AC_MODEL_DEPLOYMENT")
# Initialize the project client
project_client = AIProjectClient(
credential=DefaultAzureCredential(
exclude_environment_credential=True,
exclude_managed_identity_credential=True
),
endpoint=project_endpoint,
)
# Get a chat client
openai_client = project_client.get_openai_client(api_version="2024-10-21")
# Initialize prompts
if sysprompt:
system_message = sysprompt
else:
system_message = "You are an AI assistant with a charter to clearly analyse the customer enquiry."
prompt = ""
# Loop until the user types 'quit'
while True:
#prompt = input("\nAsk a question about the audio\n(or type 'quit' to exit)\n")
if userprompt:
prompt = userprompt
else:
prompt = "quit"
if prompt.lower() == "quit":
break
elif len(prompt) == 0:
print("Please enter a question.\n")
else:
print("Getting a response ...\n")
# Encode the audio file
#audio_data = encode_audio(wav_path)
# Get a response to audio input
response = openai_client.chat.completions.create(
model=model_deployment,
messages=[
{"role": "system", "content": system_message},
{ "role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "input_audio",
"input_audio": {
"data": audio_data,
"format": "mp3"
}
}
] }
]
)
print(response.choices[0].message.content)
userprompt = ""
except Exception as ex:
print(ex)
return response.choices[0].message.content
def encode_audio(audio_file,action):
"""Encode audio files in the specified folder to base64."""
try:
if action == "Read":
with open(audio_file, 'rb') as audio_file:
audio_data = base64.b64encode(audio_file.read()).decode('utf-8')
return audio_data
elif action == "Download":
audio_data = base64.b64encode(audio_file).decode('utf-8')
return audio_data
except Exception as e:
raise ValueError(f"Failed to encode audio file: {str(e)}")
def download_wav_from_url(url):
if not url:
return None
try:
response = requests.get(url, stream=True)
response.raise_for_status()
return response.content
except Exception as e:
raise ValueError(f"Failed to download WAV from URL: {str(e)}")
def process_audio(upload_audio, record_audio, url,sysprompt,userprompt):
wav_path = None
temp_files = [] # To clean up temp files later if needed
if upload_audio:
wav_path = upload_audio
audio_data = encode_audio(wav_path,"Read")
elif record_audio:
wav_path = record_audio
audio_data = encode_audio(wav_path,"Read")
elif url:
wav_path = download_wav_from_url(url)
audio_data = encode_audio(wav_path,"Download")
if audio_data:
temp_files.append(audio_data)
if not wav_path:
return "Please provide an audio file via upload, recording, or URL."
try:
summary = summarize_audio(audio_data,sysprompt,userprompt)
return summary
finally:
# Optional: Clean up temp files
for temp in temp_files:
if os.path.exists(temp):
os.remove(temp)
with gr.Blocks(title="Audio Summarizer UI") as demo:
gr.Markdown("# Audio File Summarizer")
gr.Markdown("Upload a WAV file, record audio, or provide a URL to a WAV file for summarization.")
with gr.Row():
with gr.Column():
upload_audio = gr.Audio(sources="upload", type="filepath", label="Upload WAV File")
with gr.Column():
record_audio = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
with gr.Column():
url_input = gr.Textbox(label="Enter URL to WAV File", placeholder="https://example.com/audio.wav")
with gr.Column():
userprompt_input = gr.Textbox(label="Enter User Prompt", placeholder="Ask a question about the audio",value="Summarize the audio content")
with gr.Column():
sysprompt_input = gr.Textbox(label="Enter System Prompt",value="You are an AI assistant with a listening charter to clearly analyse the customer enquiry.")
submit_btn = gr.Button("Summarize")
output = gr.Textbox(label="Summary", lines=10)
submit_btn.click(
fn=process_audio,
inputs=[upload_audio, record_audio, url_input,sysprompt_input,userprompt_input],
outputs=output
)
demo.launch() |