import streamlit as st import whisper import tempfile import os import time import re from pydub import AudioSegment from openpyxl import Workbook from openpyxl.styles import Font from docx import Document from docx.shared import Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from io import BytesIO from collections import Counter # --------------------------------------------------- # PAGE CONFIG # --------------------------------------------------- st.set_page_config( page_title="RecToText Pro - AI Edition", layout="wide", page_icon="🎤" ) # --------------------------------------------------- # SIDEBAR # --------------------------------------------------- st.sidebar.title("⚙️ Settings") model_option = st.sidebar.selectbox( "Select Whisper Model", ["base", "small"] ) output_mode = st.sidebar.radio( "Output Format", ["Roman Urdu", "English"] ) if st.sidebar.button("🧹 Clear Session"): st.session_state.clear() st.rerun() # --------------------------------------------------- # HEADER # --------------------------------------------------- st.markdown("

🎤 RecToText Pro - AI Enhanced

", unsafe_allow_html=True) st.markdown("

Auto Title | AI Summary | Smart Formatting

", unsafe_allow_html=True) st.divider() # --------------------------------------------------- # FUNCTIONS # --------------------------------------------------- @st.cache_resource def load_model(model_size): return whisper.load_model(model_size) def clean_text(text): filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"] pattern = r'\b(?:' + '|'.join(filler_words) + r')\b' text = re.sub(pattern, '', text, flags=re.IGNORECASE) text = re.sub(r'\s+', ' ', text).strip() return text def convert_to_roman_urdu(text): replacements = { "ہے": "hai", "میں": "main", "اور": "aur", "کیا": "kya", "آپ": "aap", "کی": "ki", "کا": "ka" } for urdu, roman in replacements.items(): text = text.replace(urdu, roman) return text # ----------------------------- # AI Title Detection # ----------------------------- def generate_title(text): words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower()) common_words = Counter(words).most_common(5) keywords = [word.capitalize() for word, _ in common_words[:3]] if keywords: return "Lecture on " + " ".join(keywords) return "Lecture Transcription" # ----------------------------- # AI Summary Generator # ----------------------------- def generate_summary(text): sentences = re.split(r'(?<=[.!?]) +', text) summary = " ".join(sentences[:5]) return summary # ----------------------------- # Smart Formatting # ----------------------------- def smart_format(text): sentences = re.split(r'(?<=[.!?]) +', text) formatted = "" for i, sentence in enumerate(sentences): if len(sentence.split()) < 8: formatted += f"\n\n{sentence.upper()}\n" else: formatted += sentence + " " return formatted.strip() # ----------------------------- # Excel Export # ----------------------------- def create_excel(segments): wb = Workbook() ws = wb.active ws.title = "Transcription" headers = ["Timestamp", "Transcribed Text", "Cleaned Output"] ws.append(headers) for col in range(1, 4): ws.cell(row=1, column=col).font = Font(bold=True) for seg in segments: timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}" raw_text = seg["text"] cleaned = clean_text(raw_text) ws.append([timestamp, raw_text, cleaned]) buffer = BytesIO() wb.save(buffer) buffer.seek(0) return buffer # ----------------------------- # Word Export # ----------------------------- def create_word_document(title, summary, formatted_text): doc = Document() # Title doc.add_heading(title, level=1).alignment = WD_ALIGN_PARAGRAPH.CENTER doc.add_page_break() # Summary Page doc.add_heading("Executive Summary", level=2) doc.add_paragraph(summary) doc.add_page_break() # Main Content doc.add_heading("Full Lecture Content", level=2) paragraphs = formatted_text.split("\n\n") for para in paragraphs: doc.add_paragraph(para).paragraph_format.space_after = Pt(12) buffer = BytesIO() doc.save(buffer) buffer.seek(0) return buffer # --------------------------------------------------- # FILE UPLOADER # --------------------------------------------------- uploaded_file = st.file_uploader( "Upload Lecture Recording (.mp3, .wav, .m4a, .aac)", type=["mp3", "wav", "m4a", "aac"] ) if uploaded_file: st.audio(uploaded_file) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: ext = uploaded_file.name.split(".")[-1] audio = AudioSegment.from_file(uploaded_file, format=ext) audio.export(tmp.name, format="wav") temp_audio_path = tmp.name st.info("Loading Whisper model...") model = load_model(model_option) start_time = time.time() with st.spinner("Transcribing..."): result = model.transcribe(temp_audio_path) end_time = time.time() os.remove(temp_audio_path) full_text = result["text"] segments = result["segments"] detected_lang = result.get("language", "Unknown") cleaned_text = clean_text(full_text) if output_mode == "Roman Urdu": cleaned_text = convert_to_roman_urdu(cleaned_text) title = generate_title(cleaned_text) summary = generate_summary(cleaned_text) formatted_text = smart_format(cleaned_text) word_count = len(cleaned_text.split()) processing_time = round(end_time - start_time, 2) col1, col2 = st.columns(2) with col1: st.subheader("📜 Raw Transcription") st.text_area("", full_text, height=350) with col2: st.subheader("✨ AI Formatted Version") st.text_area("", formatted_text, height=350) st.divider() st.write(f"**Auto Detected Title:** {title}") st.write(f"**Detected Language:** {detected_lang}") st.write(f"**Word Count:** {word_count}") st.write(f"**Processing Time:** {processing_time} sec") excel_file = create_excel(segments) word_file = create_word_document(title, summary, formatted_text) colA, colB = st.columns(2) with colA: st.download_button( "📥 Download Excel (.xlsx)", data=excel_file, file_name="RecToText_Transcription.xlsx" ) with colB: st.download_button( "📄 Download Word (.docx)", data=word_file, file_name="RecToText_AI_Lecture.docx" ) st.divider() st.markdown( "

RecToText Pro AI Edition | Auto Title | Smart Summary | AI Formatting

", unsafe_allow_html=True )