|
|
import os
|
|
|
from uuid import uuid4
|
|
|
import edge_tts
|
|
|
from groq import Groq
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
client = Groq()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def STT(audio_file):
|
|
|
os.makedirs("uploads", exist_ok=True)
|
|
|
file_path = f"uploads/{uuid4().hex}.wav"
|
|
|
|
|
|
with open(file_path, "wb") as f:
|
|
|
f.write(await audio_file.read())
|
|
|
|
|
|
with open(file_path, "rb") as f:
|
|
|
transcription = client.audio.transcriptions.create(
|
|
|
file=f,
|
|
|
model="whisper-large-v3-turbo",
|
|
|
response_format="verbose_json",
|
|
|
temperature=0.0
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
"text": transcription.text,
|
|
|
"segments": transcription.segments,
|
|
|
"language": transcription.language
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def TTS(text: str, voice: str = "en-US-AriaNeural") -> str:
|
|
|
"""
|
|
|
Converts text to speech and saves it to a file.
|
|
|
Returns the path to the generated audio file.
|
|
|
"""
|
|
|
os.makedirs("outputs", exist_ok=True)
|
|
|
filename = f"outputs/{uuid4().hex}.mp3"
|
|
|
|
|
|
communicate = edge_tts.Communicate(text, voice)
|
|
|
await communicate.save(filename)
|
|
|
|
|
|
return filename |