| | import torch |
| | import torchaudio |
| | import gradio as gr |
| | import requests |
| |
|
| | |
| | hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).cuda() |
| |
|
| | |
| | acoustic_model = torch.hub.load("bshall/acoustic-model:main", "hubert_soft", trust_repo=True).cuda() |
| |
|
| | |
| | vocoder = torch.hub.load("bshall/hifigan:main", "hifigan", trust_repo=True).cuda() |
| |
|
| | def voice_conversion(input_audio): |
| | |
| | waveform, sample_rate = torchaudio.load(input_audio) |
| | |
| | |
| | with torch.no_grad(): |
| | units = hubert(waveform.cuda()) |
| | mel_spec = acoustic_model.generate(units) |
| | audio_out = vocoder(mel_spec) |
| | |
| | |
| | output_path = "output.wav" |
| | torchaudio.save(output_path, audio_out.cpu(), sample_rate) |
| | |
| | return output_path |
| |
|
| | |
| | iface = gr.Interface( |
| | fn=voice_conversion, |
| | inputs=gr.inputs.Audio(source="upload", type="filepath"), |
| | outputs=gr.outputs.Audio(type="file"), |
| | title="Voice Conversion Demo", |
| | description="Upload an audio file to convert its voice using HuBERT and other models." |
| | ) |
| |
|
| | |
| | iface.launch() |