Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,26 @@ def format_time(seconds):
|
|
| 14 |
ms = int((seconds % 1) * 10)
|
| 15 |
return f"{m:02d}:{s:02d}.{ms}"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def transcribe(file, model_name, language, show_timestamps, translate):
|
| 18 |
if file is None:
|
| 19 |
return "Please upload a video or audio file.", ""
|
|
@@ -29,17 +49,30 @@ def transcribe(file, model_name, language, show_timestamps, translate):
|
|
| 29 |
else:
|
| 30 |
lang = language.lower()
|
| 31 |
whisper_task = "translate" if translate == "Translate to English" else "transcribe"
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
plain = result["text"].strip()
|
| 34 |
|
| 35 |
-
# Hinglish:
|
| 36 |
if hinglish_mode:
|
| 37 |
try:
|
| 38 |
from indic_transliteration import sanscript
|
| 39 |
from indic_transliteration.transliterate import transliterate
|
| 40 |
plain = transliterate(plain, sanscript.DEVANAGARI, sanscript.ITRANS)
|
| 41 |
except Exception:
|
| 42 |
-
|
|
|
|
| 43 |
if show_timestamps:
|
| 44 |
lines = []
|
| 45 |
for seg in result["segments"]:
|
|
@@ -52,7 +85,7 @@ def transcribe(file, model_name, language, show_timestamps, translate):
|
|
| 52 |
from indic_transliteration.transliterate import transliterate
|
| 53 |
seg_text = transliterate(seg_text, sanscript.DEVANAGARI, sanscript.ITRANS)
|
| 54 |
except Exception:
|
| 55 |
-
|
| 56 |
lines.append(f"[{start} → {end}] {seg_text}")
|
| 57 |
return "\n".join(lines), plain
|
| 58 |
return plain, plain
|
|
|
|
| 14 |
ms = int((seconds % 1) * 10)
|
| 15 |
return f"{m:02d}:{s:02d}.{ms}"
|
| 16 |
|
| 17 |
+
|
| 18 |
+
# Simple Devanagari to Roman fallback map
|
| 19 |
+
DEVA_MAP = {
|
| 20 |
+
'अ':'a','आ':'aa','इ':'i','ई':'ii','उ':'u','ऊ':'uu','ए':'e','ऐ':'ai',
|
| 21 |
+
'ओ':'o','औ':'au','क':'k','ख':'kh','ग':'g','घ':'gh','च':'ch','छ':'chh',
|
| 22 |
+
'ज':'j','झ':'jh','ट':'t','ड':'d','त':'t','थ':'th','द':'d','ध':'dh',
|
| 23 |
+
'न':'n','प':'p','फ':'ph','ब':'b','भ':'bh','म':'m','य':'y','र':'r',
|
| 24 |
+
'ल':'l','व':'v','श':'sh','ष':'sh','स':'s','ह':'h','ं':'n','ः':'h',
|
| 25 |
+
'ा':'a','ि':'i','ी':'i','ु':'u','ू':'u','े':'e','ै':'ai','ो':'o',
|
| 26 |
+
'ौ':'au','्':'','ळ':'l','क्ष':'ksh','ज्ञ':'gya','ड़':'r','ढ़':'rh',
|
| 27 |
+
'ऑ':'o','ऍ':'e','ॉ':'o','।':'.','॥':'.','ऋ':'ri','ॠ':'ri',
|
| 28 |
+
'ग़':'g','ज़':'z','फ़':'f','ड़':'r','ढ़':'rh','ञ':'n','ण':'n','ङ':'n',
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def devanagari_to_roman(text):
|
| 32 |
+
result = []
|
| 33 |
+
for ch in text:
|
| 34 |
+
result.append(DEVA_MAP.get(ch, ch))
|
| 35 |
+
return ''.join(result)
|
| 36 |
+
|
| 37 |
def transcribe(file, model_name, language, show_timestamps, translate):
|
| 38 |
if file is None:
|
| 39 |
return "Please upload a video or audio file.", ""
|
|
|
|
| 49 |
else:
|
| 50 |
lang = language.lower()
|
| 51 |
whisper_task = "translate" if translate == "Translate to English" else "transcribe"
|
| 52 |
+
# Hinglish: nudge toward Roman script via initial_prompt
|
| 53 |
+
initial_prompt = None
|
| 54 |
+
if hinglish_mode:
|
| 55 |
+
initial_prompt = (
|
| 56 |
+
"Yeh ek Hinglish conversation hai. "
|
| 57 |
+
"Transcribe using Roman script only. "
|
| 58 |
+
"Hindi words phonetically in English letters. "
|
| 59 |
+
"Example: main aaj market gaya tha, it was really crowded yaar."
|
| 60 |
+
)
|
| 61 |
+
result = m.transcribe(
|
| 62 |
+
file.name, language=lang, task=whisper_task,
|
| 63 |
+
verbose=False, initial_prompt=initial_prompt
|
| 64 |
+
)
|
| 65 |
plain = result["text"].strip()
|
| 66 |
|
| 67 |
+
# Hinglish: if any Devanagari slipped through, transliterate it
|
| 68 |
if hinglish_mode:
|
| 69 |
try:
|
| 70 |
from indic_transliteration import sanscript
|
| 71 |
from indic_transliteration.transliterate import transliterate
|
| 72 |
plain = transliterate(plain, sanscript.DEVANAGARI, sanscript.ITRANS)
|
| 73 |
except Exception:
|
| 74 |
+
# Fallback: simple character-level Devanagari → Roman map
|
| 75 |
+
plain = devanagari_to_roman(plain)
|
| 76 |
if show_timestamps:
|
| 77 |
lines = []
|
| 78 |
for seg in result["segments"]:
|
|
|
|
| 85 |
from indic_transliteration.transliterate import transliterate
|
| 86 |
seg_text = transliterate(seg_text, sanscript.DEVANAGARI, sanscript.ITRANS)
|
| 87 |
except Exception:
|
| 88 |
+
seg_text = devanagari_to_roman(seg_text)
|
| 89 |
lines.append(f"[{start} → {end}] {seg_text}")
|
| 90 |
return "\n".join(lines), plain
|
| 91 |
return plain, plain
|