Seemanth
/

chiluka

Model card Files Files and versions

chiluka / examples /pip_example.py

seemanthraju

Added streaming funciton

393129e about 2 months ago

history blame contribute delete

3.01 kB

	"""
	Chiluka TTS - pip install Example

	After installing via pip, model weights auto-download from HuggingFace
	on first use and are cached locally.

	Install:
	pip install chiluka
	sudo apt-get install espeak-ng

	Usage:
	python pip_example.py --reference path/to/reference.wav
	python pip_example.py --reference ref.wav --model telugu --language te
	"""

	import argparse


	def main():
	parser = argparse.ArgumentParser(description="Chiluka TTS - pip Example")
	parser.add_argument("--reference", type=str, required=True, help="Path to reference audio file")
	parser.add_argument("--model", type=str, default="hindi_english", choices=["hindi_english", "telugu"],
	help="Model variant (default: hindi_english)")
	parser.add_argument("--text", type=str, default=None, help="Text to synthesize")
	parser.add_argument("--language", type=str, default=None, help="Language code (en-us, hi, te)")
	parser.add_argument("--output", type=str, default="output_pip.wav", help="Output wav file path")
	args = parser.parse_args()

	# Import after argparse so --help is fast
	from chiluka import Chiluka, list_models

	# Set defaults
	if args.text is None:
	texts = {
	"hindi_english": "Hello, I am Chiluka, a text to speech system.",
	"telugu": "నమస్కారం, నేను చిలుక మాట్లాడుతున్నాను",
	}
	args.text = texts[args.model]

	if args.language is None:
	langs = {"hindi_english": "en-us", "telugu": "te"}
	args.language = langs[args.model]

	# List models
	print("Available models:")
	for name, info in list_models().items():
	print(f" {name}: {info['description']}")
	print()

	# Load model (auto-downloads weights on first run)
	print(f"Loading '{args.model}' model...")
	tts = Chiluka.from_pretrained(model=args.model)

	# Synthesize speech
	print(f"Text: '{args.text}'")
	print(f"Language: {args.language}")
	print(f"Reference: {args.reference}")
	print()

	wav = tts.synthesize(
	text=args.text,
	reference_audio=args.reference,
	language=args.language,
	alpha=0.3,
	beta=0.7,
	diffusion_steps=5,
	embedding_scale=1.0,
	)

	# Save output
	tts.save_wav(wav, args.output)
	print(f"Duration: {len(wav) / 24000:.2f} seconds")

	# --- Bonus: synthesize in another language with same model ---
	if args.model == "hindi_english":
	print("\n--- Bonus: Hindi synthesis with same model ---")
	hindi_wav = tts.synthesize(
	text="नमस्ते, मैं चिलुका बोल रहा हूं",
	reference_audio=args.reference,
	language="hi",
	)
	hindi_output = args.output.replace(".wav", "_hindi.wav")
	tts.save_wav(hindi_wav, hindi_output)
	print(f"Duration: {len(hindi_wav) / 24000:.2f} seconds")


	if __name__ == "__main__":
	main()