Spaces:
Running
Running
Fix build: replace KittenTTS with edge-tts, pin Python 3.12, add packages.txt for imagemagick
1b20d8b verified | import os | |
| from config import ROOT_DIR, get_tts_voice, is_running_in_spaces | |
| # Voice mapping: friendly name -> edge-tts voice ID | |
| EDGE_TTS_VOICES = { | |
| "Jasper": "en-US-GuyNeural", | |
| "Bella": "en-US-JennyNeural", | |
| "Luna": "en-GB-SoniaNeural", | |
| "Bruno": "en-US-ChristopherNeural", | |
| "Rosie": "en-AU-NatashaNeural", | |
| "Hugo": "en-GB-RyanNeural", | |
| "Kiki": "en-US-AriaNeural", | |
| "Leo": "en-US-DavisNeural", | |
| } | |
| def _use_edge_tts() -> bool: | |
| """Use edge-tts when KittenTTS is not available (e.g. on HF Spaces).""" | |
| if is_running_in_spaces(): | |
| return True | |
| try: | |
| from kittentts import KittenTTS # noqa: F401 | |
| return False | |
| except ImportError: | |
| return True | |
| class TTS: | |
| def __init__(self) -> None: | |
| self._voice = get_tts_voice() | |
| self._use_edge = _use_edge_tts() | |
| if not self._use_edge: | |
| import soundfile # noqa: F401 — ensure available | |
| from kittentts import KittenTTS as KittenModel | |
| self._model = KittenModel("KittenML/kitten-tts-mini-0.8") | |
| self._sample_rate = 24000 | |
| else: | |
| self._model = None | |
| def synthesize(self, text, output_file=os.path.join(ROOT_DIR, ".mp", "audio.wav")): | |
| if self._use_edge: | |
| return self._synthesize_edge(text, output_file) | |
| return self._synthesize_kitten(text, output_file) | |
| def _synthesize_kitten(self, text, output_file): | |
| import soundfile as sf | |
| audio = self._model.generate(text, voice=self._voice) | |
| sf.write(output_file, audio, self._sample_rate) | |
| return output_file | |
| def _synthesize_edge(self, text, output_file): | |
| import asyncio | |
| import edge_tts | |
| voice_id = EDGE_TTS_VOICES.get(self._voice, "en-US-GuyNeural") | |
| # edge-tts outputs mp3; we write to mp3 then keep as-is | |
| # MoviePy can handle mp3 audio via ffmpeg | |
| mp3_path = output_file.rsplit(".", 1)[0] + ".mp3" | |
| async def _generate(): | |
| communicate = edge_tts.Communicate(text, voice_id) | |
| await communicate.save(mp3_path) | |
| asyncio.run(_generate()) | |
| # Convert mp3 to wav for compatibility with the rest of the pipeline | |
| try: | |
| from pydub import AudioSegment | |
| audio = AudioSegment.from_mp3(mp3_path) | |
| audio.export(output_file, format="wav") | |
| os.remove(mp3_path) | |
| except ImportError: | |
| # If pydub not available, just use the mp3 directly | |
| # Rename mp3 to the expected output path | |
| if os.path.exists(output_file): | |
| os.remove(output_file) | |
| os.rename(mp3_path, output_file) | |
| return output_file | |