Spaces:

samir72
/

AudioChatTranscriber

Running

File size: 6,356 Bytes

92ddce4

# Add references
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
import gradio as gr
from dotenv import load_dotenv
import requests
import os
import tempfile
import base64

# Placeholder for your summarization function.
# Replace this with your actual function that takes a WAV file path and returns the summary.
def summarize_audio(audio_data,sysprompt,userprompt):
    # Code to summarize the audio file using LLM and Azure OpenAI

    try: 

            # Get configuration settings 
            load_dotenv()
            project_endpoint = os.getenv("AC_PROJECT_ENDPOINT")
            model_deployment =  os.getenv("AC_MODEL_DEPLOYMENT")

            # Initialize the project client
            project_client = AIProjectClient(            
                credential=DefaultAzureCredential(
                    exclude_environment_credential=True,
                    exclude_managed_identity_credential=True
                ),
                endpoint=project_endpoint,
            )


            # Get a chat client
            openai_client = project_client.get_openai_client(api_version="2024-10-21")
            

            # Initialize prompts
            if sysprompt:
                system_message = sysprompt
            else:
                system_message = "You are an AI assistant with a charter to clearly analyse the customer enquiry."
            
            prompt = ""

            # Loop until the user types 'quit'
            while True:
                #prompt = input("\nAsk a question about the audio\n(or type 'quit' to exit)\n")
                if userprompt:
                    prompt = userprompt
                else:
                    prompt = "quit"
                if prompt.lower() == "quit":
                    break
                elif len(prompt) == 0:
                        print("Please enter a question.\n")
                else:
                    print("Getting a response ...\n")

                    # Encode the audio file
                    #audio_data = encode_audio(wav_path)

                    # Get a response to audio input
                    response = openai_client.chat.completions.create(
                        model=model_deployment,
                        messages=[
                            {"role": "system", "content": system_message},
                            { "role": "user",
                                "content": [
                                { 
                                    "type": "text",
                                    "text": prompt
                                },
                                {
                                    "type": "input_audio",
                                    "input_audio": {
                                        "data": audio_data,
                                        "format": "mp3"
                                    }
                                }
                            ] }
                        ]
                    )
                    print(response.choices[0].message.content)
                    userprompt = ""
                
    except Exception as ex:
            print(ex)
    return response.choices[0].message.content

def encode_audio(audio_file,action):
        """Encode audio files in the specified folder to base64."""
        try:
                if action == "Read":
                    with open(audio_file, 'rb') as audio_file:
                        audio_data = base64.b64encode(audio_file.read()).decode('utf-8')
                    return audio_data
                elif action == "Download":
                     audio_data = base64.b64encode(audio_file).decode('utf-8')
                     return audio_data
                     
        except Exception as e:
            raise ValueError(f"Failed to encode audio file: {str(e)}")

def download_wav_from_url(url):
    if not url:
        return None
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        return response.content
    except Exception as e:
        raise ValueError(f"Failed to download WAV from URL: {str(e)}")

def process_audio(upload_audio, record_audio, url,sysprompt,userprompt):
    wav_path = None
    temp_files = []  # To clean up temp files later if needed
    
    if upload_audio:
        wav_path = upload_audio
        audio_data = encode_audio(wav_path,"Read")
    elif record_audio:
        wav_path = record_audio
        audio_data = encode_audio(wav_path,"Read")
    elif url:
        wav_path = download_wav_from_url(url)
        audio_data = encode_audio(wav_path,"Download")
        if audio_data:
            temp_files.append(audio_data)
    
    if not wav_path:
        return "Please provide an audio file via upload, recording, or URL."
    
    try:
        summary = summarize_audio(audio_data,sysprompt,userprompt)
        return summary
    finally:
        # Optional: Clean up temp files
        for temp in temp_files:
            if os.path.exists(temp):
                os.remove(temp)

with gr.Blocks(title="Audio Summarizer UI") as demo:
    gr.Markdown("# Audio File Summarizer")
    gr.Markdown("Upload a WAV file, record audio, or provide a URL to a WAV file for summarization.")
    
    with gr.Row():
        with gr.Column():
            upload_audio = gr.Audio(sources="upload", type="filepath", label="Upload WAV File")
        with gr.Column():
            record_audio = gr.Audio(sources="microphone", type="filepath", label="Record Audio")
        with gr.Column():
            url_input = gr.Textbox(label="Enter URL to WAV File", placeholder="https://example.com/audio.wav")
        with gr.Column():
            userprompt_input = gr.Textbox(label="Enter User Prompt", placeholder="Ask a question about the audio",value="Summarize the audio content")
        with gr.Column():
            sysprompt_input = gr.Textbox(label="Enter System Prompt",value="You are an AI assistant with a listening charter to clearly analyse the customer enquiry.")
    
    submit_btn = gr.Button("Summarize")
    output = gr.Textbox(label="Summary", lines=10)
    
    submit_btn.click(
        fn=process_audio,
        inputs=[upload_audio, record_audio, url_input,sysprompt_input,userprompt_input],
        outputs=output
    )

demo.launch()