Kalp97 commited on
Commit
0c04064
·
verified ·
1 Parent(s): 47da534

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -4
app.py CHANGED
@@ -14,6 +14,26 @@ def format_time(seconds):
14
  ms = int((seconds % 1) * 10)
15
  return f"{m:02d}:{s:02d}.{ms}"
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def transcribe(file, model_name, language, show_timestamps, translate):
18
  if file is None:
19
  return "Please upload a video or audio file.", ""
@@ -29,17 +49,30 @@ def transcribe(file, model_name, language, show_timestamps, translate):
29
  else:
30
  lang = language.lower()
31
  whisper_task = "translate" if translate == "Translate to English" else "transcribe"
32
- result = m.transcribe(file.name, language=lang, verbose=False, task=whisper_task)
 
 
 
 
 
 
 
 
 
 
 
 
33
  plain = result["text"].strip()
34
 
35
- # Hinglish: transliterate Devanagari Roman script
36
  if hinglish_mode:
37
  try:
38
  from indic_transliteration import sanscript
39
  from indic_transliteration.transliterate import transliterate
40
  plain = transliterate(plain, sanscript.DEVANAGARI, sanscript.ITRANS)
41
  except Exception:
42
- pass # fallback: return Devanagari if transliteration fails
 
43
  if show_timestamps:
44
  lines = []
45
  for seg in result["segments"]:
@@ -52,7 +85,7 @@ def transcribe(file, model_name, language, show_timestamps, translate):
52
  from indic_transliteration.transliterate import transliterate
53
  seg_text = transliterate(seg_text, sanscript.DEVANAGARI, sanscript.ITRANS)
54
  except Exception:
55
- pass
56
  lines.append(f"[{start} → {end}] {seg_text}")
57
  return "\n".join(lines), plain
58
  return plain, plain
 
14
  ms = int((seconds % 1) * 10)
15
  return f"{m:02d}:{s:02d}.{ms}"
16
 
17
+
18
+ # Simple Devanagari to Roman fallback map
19
+ DEVA_MAP = {
20
+ 'अ':'a','आ':'aa','इ':'i','ई':'ii','उ':'u','ऊ':'uu','ए':'e','ऐ':'ai',
21
+ 'ओ':'o','औ':'au','क':'k','ख':'kh','ग':'g','घ':'gh','च':'ch','छ':'chh',
22
+ 'ज':'j','झ':'jh','ट':'t','ड':'d','त':'t','थ':'th','द':'d','ध':'dh',
23
+ 'न':'n','प':'p','फ':'ph','ब':'b','भ':'bh','म':'m','य':'y','र':'r',
24
+ 'ल':'l','व':'v','श':'sh','ष':'sh','स':'s','ह':'h','ं':'n','ः':'h',
25
+ 'ा':'a','ि':'i','ी':'i','ु':'u','ू':'u','े':'e','ै':'ai','ो':'o',
26
+ 'ौ':'au','्':'','ळ':'l','क्ष':'ksh','ज्ञ':'gya','ड़':'r','ढ़':'rh',
27
+ 'ऑ':'o','ऍ':'e','ॉ':'o','।':'.','॥':'.','ऋ':'ri','ॠ':'ri',
28
+ 'ग़':'g','ज़':'z','फ़':'f','ड़':'r','ढ़':'rh','ञ':'n','ण':'n','ङ':'n',
29
+ }
30
+
31
+ def devanagari_to_roman(text):
32
+ result = []
33
+ for ch in text:
34
+ result.append(DEVA_MAP.get(ch, ch))
35
+ return ''.join(result)
36
+
37
  def transcribe(file, model_name, language, show_timestamps, translate):
38
  if file is None:
39
  return "Please upload a video or audio file.", ""
 
49
  else:
50
  lang = language.lower()
51
  whisper_task = "translate" if translate == "Translate to English" else "transcribe"
52
+ # Hinglish: nudge toward Roman script via initial_prompt
53
+ initial_prompt = None
54
+ if hinglish_mode:
55
+ initial_prompt = (
56
+ "Yeh ek Hinglish conversation hai. "
57
+ "Transcribe using Roman script only. "
58
+ "Hindi words phonetically in English letters. "
59
+ "Example: main aaj market gaya tha, it was really crowded yaar."
60
+ )
61
+ result = m.transcribe(
62
+ file.name, language=lang, task=whisper_task,
63
+ verbose=False, initial_prompt=initial_prompt
64
+ )
65
  plain = result["text"].strip()
66
 
67
+ # Hinglish: if any Devanagari slipped through, transliterate it
68
  if hinglish_mode:
69
  try:
70
  from indic_transliteration import sanscript
71
  from indic_transliteration.transliterate import transliterate
72
  plain = transliterate(plain, sanscript.DEVANAGARI, sanscript.ITRANS)
73
  except Exception:
74
+ # Fallback: simple character-level Devanagari Roman map
75
+ plain = devanagari_to_roman(plain)
76
  if show_timestamps:
77
  lines = []
78
  for seg in result["segments"]:
 
85
  from indic_transliteration.transliterate import transliterate
86
  seg_text = transliterate(seg_text, sanscript.DEVANAGARI, sanscript.ITRANS)
87
  except Exception:
88
+ seg_text = devanagari_to_roman(seg_text)
89
  lines.append(f"[{start} → {end}] {seg_text}")
90
  return "\n".join(lines), plain
91
  return plain, plain