| """ |
| Chiluka TTS - HuggingFace Hub Example |
| |
| Load model weights directly from HuggingFace Hub. |
| No need to clone the repository or download weights manually. |
| |
| Requirements: |
| pip install chiluka |
| sudo apt-get install espeak-ng |
| |
| Usage: |
| python huggingface_example.py --reference path/to/reference.wav |
| python huggingface_example.py --reference ref.wav --model telugu --language te --text "నమస్కారం" |
| """ |
|
|
| import argparse |
| from chiluka import Chiluka, list_models |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Chiluka TTS - HuggingFace Hub Example") |
| parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file") |
| parser.add_argument("--model", type=str, default="hindi_english", choices=["hindi_english", "telugu"], |
| help="Model variant to use (default: hindi_english)") |
| parser.add_argument("--text", type=str, default=None, help="Text to synthesize") |
| parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)") |
| parser.add_argument("--output", type=str, default="output_hf.wav", help="Output wav file path") |
| parser.add_argument("--device", type=str, default=None, help="Device: cuda or cpu") |
| args = parser.parse_args() |
|
|
| |
| print("Available models:") |
| for name, info in list_models().items(): |
| marker = " <--" if name == args.model else "" |
| print(f" {name}: {info['description']}{marker}") |
| print() |
|
|
| |
| if args.text is None: |
| if args.model == "telugu": |
| args.text = "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను" |
| else: |
| args.text = "Hello, I am Chiluka, a text to speech system." |
|
|
| if args.language is None: |
| if args.model == "telugu": |
| args.language = "te" |
| else: |
| args.language = "en-us" |
|
|
| |
| print(f"Loading '{args.model}' model from HuggingFace Hub...") |
| tts = Chiluka.from_pretrained(model=args.model, device=args.device) |
|
|
| |
| print(f"Synthesizing: '{args.text}'") |
| print(f"Language: {args.language}") |
| wav = tts.synthesize( |
| text=args.text, |
| reference_audio=args.reference, |
| language=args.language, |
| ) |
|
|
| |
| tts.save_wav(wav, args.output) |
| print(f"Duration: {len(wav) / 24000:.2f} seconds") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|