RecToTextPro / app.py
MiakOnline's picture
Update app.py
88bdbd2 verified
import streamlit as st
import tempfile
import os
import re
import time
from pydub import AudioSegment
from faster_whisper import WhisperModel
from openpyxl import Workbook
from openpyxl.styles import Font
from docx import Document
from docx.shared import Pt
from io import BytesIO
st.set_page_config(page_title="RecToText Pro", layout="wide")
st.title("🎤 RecToText Pro – AI Polished Edition")
st.caption("Professional Lecture Transcriber | Clean Story | Grammar Polished")
# --------------------------------------------------
# SESSION STATE
# --------------------------------------------------
if "processed_text" not in st.session_state:
st.session_state.processed_text = None
# --------------------------------------------------
# LOAD MODEL
# --------------------------------------------------
@st.cache_resource
def load_model():
return WhisperModel("base", device="cpu", compute_type="int8")
model = load_model()
# --------------------------------------------------
# STRICT ROMAN URDU
# --------------------------------------------------
def transliterate(text):
replacements = {
"ہے": "hai",
"میں": "main",
"اور": "aur",
"کیا": "kya",
"کی": "ki",
"کا": "ka",
"سے": "se",
"کو": "ko",
"پر": "par",
"نہیں": "nahin"
}
for k, v in replacements.items():
text = text.replace(k, v)
return re.sub(r'[^\x00-\x7F]+', '', text)
# --------------------------------------------------
# AI STYLE POLISHING (RULE BASED SAFE)
# --------------------------------------------------
def polish_text(text):
text = re.sub(r'\s+', ' ', text).strip()
sentences = re.split(r'(?<=[.!?]) +', text)
paragraphs = []
temp = ""
for i, sentence in enumerate(sentences):
sentence = sentence.strip().capitalize()
if not sentence.endswith((".", "!", "?")):
sentence += "."
temp += sentence + " "
if (i + 1) % 4 == 0:
paragraphs.append(temp.strip())
temp = ""
if temp:
paragraphs.append(temp.strip())
return "\n\n".join(paragraphs)
# --------------------------------------------------
# AUDIO CHUNKING
# --------------------------------------------------
def chunk_audio(path):
audio = AudioSegment.from_wav(path)
chunks = []
chunk_len = 30 * 1000
for i in range(0, len(audio), chunk_len):
chunks.append(audio[i:i + chunk_len])
return chunks
# --------------------------------------------------
# EXPORT EXCEL
# --------------------------------------------------
def export_excel(text):
wb = Workbook()
ws = wb.active
ws.append(["Lecture Transcription"])
ws["A1"].font = Font(bold=True)
ws.append([text])
buffer = BytesIO()
wb.save(buffer)
buffer.seek(0)
return buffer
# --------------------------------------------------
# EXPORT WORD
# --------------------------------------------------
def export_word(text):
doc = Document()
doc.add_heading("Lecture Transcription", level=1)
paragraphs = text.split("\n\n")
for para in paragraphs:
p = doc.add_paragraph(para)
p.paragraph_format.space_after = Pt(12)
buffer = BytesIO()
doc.save(buffer)
buffer.seek(0)
return buffer
# --------------------------------------------------
# CLEAR BUTTON
# --------------------------------------------------
if st.sidebar.button("🧹 Clear All"):
st.session_state.processed_text = None
st.rerun()
# --------------------------------------------------
# FILE UPLOADER
# --------------------------------------------------
uploaded = st.file_uploader(
"Upload Lecture (MP3, WAV, M4A, AAC) – Max 200MB",
type=["mp3", "wav", "m4a", "aac"]
)
output_mode = st.radio("Output Language", ["English", "Roman Urdu"])
if uploaded:
try:
st.audio(uploaded)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
ext = uploaded.name.split(".")[-1]
audio = AudioSegment.from_file(uploaded, format=ext)
audio.export(tmp.name, format="wav")
temp_path = tmp.name
start_time = time.time()
chunks = chunk_audio(temp_path)
full_text = ""
for chunk in chunks:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp:
chunk.export(ctmp.name, format="wav")
segments, info = model.transcribe(ctmp.name)
for segment in segments:
full_text += segment.text + " "
os.remove(ctmp.name)
os.remove(temp_path)
# Strict output control
if output_mode == "Roman Urdu":
full_text = transliterate(full_text)
else:
full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)
polished = polish_text(full_text)
st.session_state.processed_text = polished
word_count = len(polished.split())
processing_time = round(time.time() - start_time, 2)
st.subheader("✨ Clean AI Polished Story")
st.text_area("", polished, height=350)
st.write(f"Word Count: {word_count}")
st.write(f"Processing Time: {processing_time} sec")
excel_file = export_excel(polished)
word_file = export_word(polished)
col1, col2 = st.columns(2)
with col1:
if st.download_button("Download Excel (.xlsx)", excel_file):
st.session_state.processed_text = None
with col2:
if st.download_button("Download Word (.docx)", word_file):
st.session_state.processed_text = None
st.success("Story Generated Successfully.")
except Exception as e:
st.error("Processing Error")
st.exception(e)
st.markdown("---")
st.markdown("<center>RecToText Pro – AI Polished Edition</center>", unsafe_allow_html=True)