Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

RecToTextPro / app.py

MiakOnline

Update app.py

88bdbd2 verified 10 days ago

raw

history blame contribute delete

5.94 kB

	import streamlit as st
	import tempfile
	import os
	import re
	import time
	from pydub import AudioSegment
	from faster_whisper import WhisperModel
	from openpyxl import Workbook
	from openpyxl.styles import Font
	from docx import Document
	from docx.shared import Pt
	from io import BytesIO

	st.set_page_config(page_title="RecToText Pro", layout="wide")

	st.title("🎤 RecToText Pro – AI Polished Edition")
	st.caption("Professional Lecture Transcriber \| Clean Story \| Grammar Polished")

	# --------------------------------------------------
	# SESSION STATE
	# --------------------------------------------------
	if "processed_text" not in st.session_state:
	st.session_state.processed_text = None

	# --------------------------------------------------
	# LOAD MODEL
	# --------------------------------------------------
	@st.cache_resource
	def load_model():
	return WhisperModel("base", device="cpu", compute_type="int8")

	model = load_model()

	# --------------------------------------------------
	# STRICT ROMAN URDU
	# --------------------------------------------------
	def transliterate(text):
	replacements = {
	"ہے": "hai",
	"میں": "main",
	"اور": "aur",
	"کیا": "kya",
	"کی": "ki",
	"کا": "ka",
	"سے": "se",
	"کو": "ko",
	"پر": "par",
	"نہیں": "nahin"
	}
	for k, v in replacements.items():
	text = text.replace(k, v)

	return re.sub(r'[^\x00-\x7F]+', '', text)

	# --------------------------------------------------
	# AI STYLE POLISHING (RULE BASED SAFE)
	# --------------------------------------------------
	def polish_text(text):
	text = re.sub(r'\s+', ' ', text).strip()
	sentences = re.split(r'(?<=[.!?]) +', text)

	paragraphs = []
	temp = ""

	for i, sentence in enumerate(sentences):
	sentence = sentence.strip().capitalize()
	if not sentence.endswith((".", "!", "?")):
	sentence += "."
	temp += sentence + " "

	if (i + 1) % 4 == 0:
	paragraphs.append(temp.strip())
	temp = ""

	if temp:
	paragraphs.append(temp.strip())

	return "\n\n".join(paragraphs)

	# --------------------------------------------------
	# AUDIO CHUNKING
	# --------------------------------------------------
	def chunk_audio(path):
	audio = AudioSegment.from_wav(path)
	chunks = []
	chunk_len = 30 * 1000
	for i in range(0, len(audio), chunk_len):
	chunks.append(audio[i:i + chunk_len])
	return chunks

	# --------------------------------------------------
	# EXPORT EXCEL
	# --------------------------------------------------
	def export_excel(text):
	wb = Workbook()
	ws = wb.active
	ws.append(["Lecture Transcription"])
	ws["A1"].font = Font(bold=True)
	ws.append([text])

	buffer = BytesIO()
	wb.save(buffer)
	buffer.seek(0)
	return buffer

	# --------------------------------------------------
	# EXPORT WORD
	# --------------------------------------------------
	def export_word(text):
	doc = Document()
	doc.add_heading("Lecture Transcription", level=1)

	paragraphs = text.split("\n\n")
	for para in paragraphs:
	p = doc.add_paragraph(para)
	p.paragraph_format.space_after = Pt(12)

	buffer = BytesIO()
	doc.save(buffer)
	buffer.seek(0)
	return buffer

	# --------------------------------------------------
	# CLEAR BUTTON
	# --------------------------------------------------
	if st.sidebar.button("🧹 Clear All"):
	st.session_state.processed_text = None
	st.rerun()

	# --------------------------------------------------
	# FILE UPLOADER
	# --------------------------------------------------
	uploaded = st.file_uploader(
	"Upload Lecture (MP3, WAV, M4A, AAC) – Max 200MB",
	type=["mp3", "wav", "m4a", "aac"]
	)

	output_mode = st.radio("Output Language", ["English", "Roman Urdu"])

	if uploaded:
	try:
	st.audio(uploaded)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	ext = uploaded.name.split(".")[-1]
	audio = AudioSegment.from_file(uploaded, format=ext)
	audio.export(tmp.name, format="wav")
	temp_path = tmp.name

	start_time = time.time()
	chunks = chunk_audio(temp_path)
	full_text = ""

	for chunk in chunks:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp:
	chunk.export(ctmp.name, format="wav")
	segments, info = model.transcribe(ctmp.name)

	for segment in segments:
	full_text += segment.text + " "

	os.remove(ctmp.name)

	os.remove(temp_path)

	# Strict output control
	if output_mode == "Roman Urdu":
	full_text = transliterate(full_text)
	else:
	full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)

	polished = polish_text(full_text)

	st.session_state.processed_text = polished

	word_count = len(polished.split())
	processing_time = round(time.time() - start_time, 2)

	st.subheader("✨ Clean AI Polished Story")
	st.text_area("", polished, height=350)

	st.write(f"Word Count: {word_count}")
	st.write(f"Processing Time: {processing_time} sec")

	excel_file = export_excel(polished)
	word_file = export_word(polished)

	col1, col2 = st.columns(2)

	with col1:
	if st.download_button("Download Excel (.xlsx)", excel_file):
	st.session_state.processed_text = None

	with col2:
	if st.download_button("Download Word (.docx)", word_file):
	st.session_state.processed_text = None

	st.success("Story Generated Successfully.")

	except Exception as e:
	st.error("Processing Error")
	st.exception(e)

	st.markdown("---")
	st.markdown("<center>RecToText Pro – AI Polished Edition</center>", unsafe_allow_html=True)