Spaces:
Running on Zero
Running on Zero
| import os | |
| from huggingface_hub import snapshot_download | |
| MODEL_CACHE_DIR = "./models" | |
| SENSE_VOICE_LOCAL_PATH = os.path.join(MODEL_CACHE_DIR, "SenseVoiceSmall") | |
| PARAFORMER_LOCAL_PATH = os.path.join(MODEL_CACHE_DIR, "paraformer-zh") | |
| VAD_LOCAL_PATH = os.path.join(MODEL_CACHE_DIR, "fsmn-vad") | |
| PUNC_LOCAL_PATH = os.path.join(MODEL_CACHE_DIR, "ct-punc") | |
| os.makedirs(MODEL_CACHE_DIR, exist_ok=True) | |
| def download_if_missing(repo_id, local_path, name): | |
| if not os.path.exists(local_path): | |
| print(f"Downloading {name}...") | |
| snapshot_download(repo_id=repo_id, local_dir=local_path, ignore_patterns=["*.onnx"]) | |
| print(f"{name} ready.") | |
| else: | |
| print(f"{name} found locally.") | |
| download_if_missing("FunAudioLLM/SenseVoiceSmall", SENSE_VOICE_LOCAL_PATH, "SenseVoice") | |
| download_if_missing("funasr/paraformer-zh", PARAFORMER_LOCAL_PATH, "Paraformer-zh") | |
| download_if_missing("funasr/fsmn-vad", VAD_LOCAL_PATH, "FSMN-VAD") | |
| download_if_missing("funasr/ct-punc", PUNC_LOCAL_PATH, "CT-Punc") | |
| import gradio as gr | |
| import time | |
| import tempfile | |
| from funasr import AutoModel | |
| from funasr.utils.postprocess_utils import rich_transcription_postprocess | |
| loaded_models = {} | |
| def get_model(pipeline): | |
| if pipeline in loaded_models: | |
| return loaded_models[pipeline] | |
| if pipeline == "sensevoice": | |
| model = AutoModel( | |
| model=SENSE_VOICE_LOCAL_PATH, | |
| vad_model=VAD_LOCAL_PATH, | |
| vad_kwargs={"max_single_segment_time": 30000}, | |
| device="cpu", | |
| disable_update=True, | |
| hub="hf", | |
| ) | |
| elif pipeline == "paraformer": | |
| model = AutoModel( | |
| model=PARAFORMER_LOCAL_PATH, | |
| vad_model=VAD_LOCAL_PATH, | |
| punc_model=PUNC_LOCAL_PATH, | |
| device="cpu", | |
| disable_update=True, | |
| hub="hf", | |
| ) | |
| else: | |
| raise ValueError(f"Unknown pipeline: {pipeline}") | |
| loaded_models[pipeline] = model | |
| return model | |
| def transcribe(audio_input, pipeline_type): | |
| if audio_input is None: | |
| return "Please upload or record audio.", "" | |
| model = get_model(pipeline_type) | |
| t0 = time.time() | |
| if pipeline_type == "sensevoice": | |
| res = model.generate( | |
| input=audio_input, cache={}, language="auto", | |
| use_itn=True, batch_size_s=60, merge_vad=True, merge_length_s=15, | |
| ) | |
| else: | |
| res = model.generate(input=audio_input) | |
| text = rich_transcription_postprocess(res[0]["text"]) | |
| elapsed = time.time() - t0 | |
| metrics = f"Time: {elapsed:.2f}s | Model: {pipeline_type} | Device: CPU" | |
| return metrics, text | |
| with gr.Blocks(title="FunASR Demo") as demo: | |
| gr.Markdown(""" | |
| # FunASR: Speech Recognition Demo | |
| Industrial-grade ASR toolkit. Upload audio and get transcription instantly. | |
| - **SenseVoice**: Multi-task (ASR + emotion + events), 5 languages, ultra-fast | |
| - **Paraformer**: Non-autoregressive Chinese ASR with punctuation | |
| [GitHub](https://github.com/modelscope/FunASR) | [Docs](https://modelscope.github.io/FunASR/) | [pip install funasr](https://pypi.org/project/funasr/) | |
| """) | |
| audio_input = gr.Audio(label="Upload or Record Audio", sources=["upload", "microphone"], type="filepath") | |
| pipeline_type = gr.Dropdown( | |
| choices=["sensevoice", "paraformer"], | |
| label="Model", | |
| value="sensevoice" | |
| ) | |
| btn = gr.Button("Transcribe", variant="primary") | |
| metrics_out = gr.Textbox(label="Metrics", lines=1) | |
| text_out = gr.Textbox(label="Transcription", lines=8) | |
| btn.click(transcribe, inputs=[audio_input, pipeline_type], outputs=[metrics_out, text_out]) | |
| gr.Markdown(""" | |
| ### Install & Use Locally | |
| ```python | |
| pip install funasr | |
| from funasr import AutoModel | |
| model = AutoModel(model="funasr/paraformer-zh", hub="hf", vad_model="funasr/fsmn-vad", punc_model="funasr/ct-punc") | |
| result = model.generate(input="audio.wav") | |
| ``` | |
| """) | |
| demo.queue().launch() | |