import streamlit as st import tempfile import os import re import time from pydub import AudioSegment from faster_whisper import WhisperModel from openpyxl import Workbook from openpyxl.styles import Font from docx import Document from docx.shared import Pt from io import BytesIO st.set_page_config(page_title="RecToText Pro", layout="wide") st.title("🎤 RecToText Pro – AI Polished Edition") st.caption("Professional Lecture Transcriber | Clean Story | Grammar Polished") # -------------------------------------------------- # SESSION STATE # -------------------------------------------------- if "processed_text" not in st.session_state: st.session_state.processed_text = None # -------------------------------------------------- # LOAD MODEL # -------------------------------------------------- @st.cache_resource def load_model(): return WhisperModel("base", device="cpu", compute_type="int8") model = load_model() # -------------------------------------------------- # STRICT ROMAN URDU # -------------------------------------------------- def transliterate(text): replacements = { "ہے": "hai", "میں": "main", "اور": "aur", "کیا": "kya", "کی": "ki", "کا": "ka", "سے": "se", "کو": "ko", "پر": "par", "نہیں": "nahin" } for k, v in replacements.items(): text = text.replace(k, v) return re.sub(r'[^\x00-\x7F]+', '', text) # -------------------------------------------------- # AI STYLE POLISHING (RULE BASED SAFE) # -------------------------------------------------- def polish_text(text): text = re.sub(r'\s+', ' ', text).strip() sentences = re.split(r'(?<=[.!?]) +', text) paragraphs = [] temp = "" for i, sentence in enumerate(sentences): sentence = sentence.strip().capitalize() if not sentence.endswith((".", "!", "?")): sentence += "." temp += sentence + " " if (i + 1) % 4 == 0: paragraphs.append(temp.strip()) temp = "" if temp: paragraphs.append(temp.strip()) return "\n\n".join(paragraphs) # -------------------------------------------------- # AUDIO CHUNKING # -------------------------------------------------- def chunk_audio(path): audio = AudioSegment.from_wav(path) chunks = [] chunk_len = 30 * 1000 for i in range(0, len(audio), chunk_len): chunks.append(audio[i:i + chunk_len]) return chunks # -------------------------------------------------- # EXPORT EXCEL # -------------------------------------------------- def export_excel(text): wb = Workbook() ws = wb.active ws.append(["Lecture Transcription"]) ws["A1"].font = Font(bold=True) ws.append([text]) buffer = BytesIO() wb.save(buffer) buffer.seek(0) return buffer # -------------------------------------------------- # EXPORT WORD # -------------------------------------------------- def export_word(text): doc = Document() doc.add_heading("Lecture Transcription", level=1) paragraphs = text.split("\n\n") for para in paragraphs: p = doc.add_paragraph(para) p.paragraph_format.space_after = Pt(12) buffer = BytesIO() doc.save(buffer) buffer.seek(0) return buffer # -------------------------------------------------- # CLEAR BUTTON # -------------------------------------------------- if st.sidebar.button("🧹 Clear All"): st.session_state.processed_text = None st.rerun() # -------------------------------------------------- # FILE UPLOADER # -------------------------------------------------- uploaded = st.file_uploader( "Upload Lecture (MP3, WAV, M4A, AAC) – Max 200MB", type=["mp3", "wav", "m4a", "aac"] ) output_mode = st.radio("Output Language", ["English", "Roman Urdu"]) if uploaded: try: st.audio(uploaded) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: ext = uploaded.name.split(".")[-1] audio = AudioSegment.from_file(uploaded, format=ext) audio.export(tmp.name, format="wav") temp_path = tmp.name start_time = time.time() chunks = chunk_audio(temp_path) full_text = "" for chunk in chunks: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp: chunk.export(ctmp.name, format="wav") segments, info = model.transcribe(ctmp.name) for segment in segments: full_text += segment.text + " " os.remove(ctmp.name) os.remove(temp_path) # Strict output control if output_mode == "Roman Urdu": full_text = transliterate(full_text) else: full_text = re.sub(r'[^\x00-\x7F]+', '', full_text) polished = polish_text(full_text) st.session_state.processed_text = polished word_count = len(polished.split()) processing_time = round(time.time() - start_time, 2) st.subheader("✨ Clean AI Polished Story") st.text_area("", polished, height=350) st.write(f"Word Count: {word_count}") st.write(f"Processing Time: {processing_time} sec") excel_file = export_excel(polished) word_file = export_word(polished) col1, col2 = st.columns(2) with col1: if st.download_button("Download Excel (.xlsx)", excel_file): st.session_state.processed_text = None with col2: if st.download_button("Download Word (.docx)", word_file): st.session_state.processed_text = None st.success("Story Generated Successfully.") except Exception as e: st.error("Processing Error") st.exception(e) st.markdown("---") st.markdown("