|
|
|
|
|
|
|
|
import os |
|
|
os.environ["OMP_NUM_THREADS"] = "1" |
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
|
|
|
|
|
import gradio as gr |
|
|
import spaces |
|
|
from transformers import pipeline |
|
|
import torch |
|
|
import gc |
|
|
import re |
|
|
|
|
|
|
|
|
def clean_asr_text(text: str) -> str: |
|
|
if not text: |
|
|
return "" |
|
|
|
|
|
|
|
|
text = re.sub(r'<[^>]+>', '', text) |
|
|
text = re.sub(r'\[.*?\]', '', text) |
|
|
|
|
|
|
|
|
text = re.sub(r'(?i)\b(unk|hik|laughter|music|cough|applause|noise|background)\b', '', text) |
|
|
|
|
|
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
text = re.sub(r'^\s+|\s+$', '', text) |
|
|
text = text.replace(' ,', ',').replace(' .', '.').replace(' ?', '?').replace(' !', '!') |
|
|
text = re.sub(r' +([.,!?])', r'\1', text) |
|
|
|
|
|
return text.strip() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU(duration=180) |
|
|
def transcribe_3min(audio_path): |
|
|
if not audio_path: |
|
|
return "Hlaðið upp hljóðskrá" |
|
|
|
|
|
pipe = pipeline( |
|
|
"automatic-speech-recognition", |
|
|
model="palli23/whisper-tiny-distilled-spjallromur-polish-v5", |
|
|
torch_dtype=torch.float16, |
|
|
device=0, |
|
|
) |
|
|
|
|
|
try: |
|
|
result = pipe( |
|
|
audio_path, |
|
|
chunk_length_s=30, |
|
|
batch_size=8, |
|
|
return_timestamps=False, |
|
|
generate_kwargs={ |
|
|
"num_beams": 5, |
|
|
"repetition_penalty": 1.3, |
|
|
"no_repeat_ngram_size": 4, |
|
|
"temperature": 0.0, |
|
|
"suppress_tokens": [-1], |
|
|
"max_new_tokens": 444, |
|
|
} |
|
|
) |
|
|
|
|
|
raw_text = result.get("text", "") |
|
|
cleaned = clean_asr_text(raw_text) |
|
|
|
|
|
except Exception as e: |
|
|
cleaned = f"Villa við umritun: {str(e)}" |
|
|
|
|
|
|
|
|
del pipe |
|
|
gc.collect() |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
return cleaned or "(ekkert texti fannst eða villa kom upp)" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Íslenskt ASR – 3 mínútur (hreinsuð útgáfa)") |
|
|
gr.Markdown( |
|
|
"**Model:** palli23/whisper-tiny-distilled-spjallromur-polish-v5 \n" |
|
|
"**Stillingar:** no timestamps, temperature=0.0, repetition_penalty=1.3, no_repeat_ngram_size=4 \n" |
|
|
"Reynir að fjarlægja <UNK>, [HIK...], [laughter] o.s.frv." |
|
|
) |
|
|
gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") |
|
|
|
|
|
audio_in = gr.Audio( |
|
|
type="filepath", |
|
|
label="Hlaðið upp .mp3 / .wav / .m4a (allt að ~5 mín)", |
|
|
format="mp3" |
|
|
) |
|
|
|
|
|
btn = gr.Button("Umrita", variant="primary", size="lg") |
|
|
|
|
|
output = gr.Textbox( |
|
|
lines=25, |
|
|
label="Útskrift (hreinsuð)", |
|
|
placeholder="Hér kemur textinn..." |
|
|
) |
|
|
|
|
|
examples = gr.Examples( |
|
|
examples=[ |
|
|
["example_clip_14nov2025.mp3"], |
|
|
], |
|
|
inputs=audio_in, |
|
|
label="Dæmi (ef þú hefur sett upp dæmi skrá)" |
|
|
) |
|
|
|
|
|
btn.click( |
|
|
fn=transcribe_3min, |
|
|
inputs=audio_in, |
|
|
outputs=output |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False, |
|
|
debug=False |
|
|
) |