| """ |
| Chiluka TTS - pip install Example |
| |
| After installing via pip, model weights auto-download from HuggingFace |
| on first use and are cached locally. |
| |
| Install: |
| pip install chiluka |
| sudo apt-get install espeak-ng |
| |
| Usage: |
| python pip_example.py --reference path/to/reference.wav |
| python pip_example.py --reference ref.wav --model telugu --language te |
| """ |
|
|
| import argparse |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Chiluka TTS - pip Example") |
| parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file") |
| parser.add_argument("--model", type=str, default="hindi_english", choices=["hindi_english", "telugu"], |
| help="Model variant (default: hindi_english)") |
| parser.add_argument("--text", type=str, default=None, help="Text to synthesize") |
| parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)") |
| parser.add_argument("--output", type=str, default="output_pip.wav", help="Output wav file path") |
| args = parser.parse_args() |
|
|
| |
| from chiluka import Chiluka, list_models |
|
|
| |
| if args.text is None: |
| texts = { |
| "hindi_english": "Hello, I am Chiluka, a text to speech system.", |
| "telugu": "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను", |
| } |
| args.text = texts[args.model] |
|
|
| if args.language is None: |
| langs = {"hindi_english": "en-us", "telugu": "te"} |
| args.language = langs[args.model] |
|
|
| |
| print("Available models:") |
| for name, info in list_models().items(): |
| print(f" {name}: {info['description']}") |
| print() |
|
|
| |
| print(f"Loading '{args.model}' model...") |
| tts = Chiluka.from_pretrained(model=args.model) |
|
|
| |
| print(f"Text: '{args.text}'") |
| print(f"Language: {args.language}") |
| print(f"Reference: {args.reference}") |
| print() |
|
|
| wav = tts.synthesize( |
| text=args.text, |
| reference_audio=args.reference, |
| language=args.language, |
| alpha=0.3, |
| beta=0.7, |
| diffusion_steps=5, |
| embedding_scale=1.0, |
| ) |
|
|
| |
| tts.save_wav(wav, args.output) |
| print(f"Duration: {len(wav) / 24000:.2f} seconds") |
|
|
| |
| if args.model == "hindi_english": |
| print("\n--- Bonus: Hindi synthesis with same model ---") |
| hindi_wav = tts.synthesize( |
| text="नमस्ते, मैं चिलुका बोल रहा हूं", |
| reference_audio=args.reference, |
| language="hi", |
| ) |
| hindi_output = args.output.replace(".wav", "_hindi.wav") |
| tts.save_wav(hindi_wav, hindi_output) |
| print(f"Duration: {len(hindi_wav) / 24000:.2f} seconds") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|