Wfloat
/

wfloat-tts

Model card Files Files and versions

wfloat-tts / src /wfloat_tts /processor.py

mitchsayre's picture

Init

f71bc95 5 days ago

3.65 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import Any, Dict, List

	from .constants import DEFAULT_ESPEAK_VOICE, EMOTION_TO_SYMBOL, INTENSITY_SYMBOLS


	@dataclass(frozen=True)
	class PreparedInput:
	text: str
	phonemes: List[str]
	token_ids: List[int]
	emotion: str
	intensity: float
	emotion_symbol: str
	intensity_symbol: str


	def clamp_unit(value: float) -> float:
	if value != value:
	return 0.0

	if value < 0.0:
	return 0.0

	if value > 1.0:
	return 1.0

	return float(value)


	def load_token_map(config: dict[str, Any]) -> Dict[str, int]:
	phoneme_id_map = config.get("phoneme_id_map")
	if not isinstance(phoneme_id_map, dict):
	raise KeyError("config.json is missing phoneme_id_map")

	token_map: Dict[str, int] = {}

	for symbol, raw_value in phoneme_id_map.items():
	if isinstance(raw_value, int):
	token_map[symbol] = raw_value
	continue

	if isinstance(raw_value, list) and len(raw_value) == 1:
	token_map[symbol] = int(raw_value[0])
	continue

	raise ValueError(
	f"Unsupported token mapping for symbol {symbol!r}: expected int or single-item list"
	)

	return token_map


	def intensity_to_symbol(intensity: float) -> str:
	value = clamp_unit(intensity)
	idx = int(value * len(INTENSITY_SYMBOLS))
	idx = max(0, min(idx, len(INTENSITY_SYMBOLS) - 1))
	return INTENSITY_SYMBOLS[idx]


	def normalize_emotion(emotion: str \| None) -> str:
	value = (emotion or "neutral").strip().lower()
	if value not in EMOTION_TO_SYMBOL:
	raise ValueError(
	f"Unsupported emotion {emotion!r}. Expected one of: {', '.join(EMOTION_TO_SYMBOL)}"
	)

	return value


	def phonemize_full_utterance(text: str, espeak_voice: str = DEFAULT_ESPEAK_VOICE) -> List[str]:
	try:
	from piper_phonemize import phonemize_espeak
	except ImportError as exc:
	raise ImportError(
	"wfloat-tts requires piper-phonemize for phonemization. "
	"Install it with: pip install \"piper-phonemize==1.3.0\" "
	"-f https://k2-fsa.github.io/icefall/piper_phonemize"
	) from exc

	sentence_groups = phonemize_espeak(text, espeak_voice)
	phonemes: List[str] = []

	for group in sentence_groups:
	if not group:
	continue

	if phonemes:
	phonemes.append(" ")

	phonemes.extend(group)

	return phonemes


	def prepare_input(
	text: str,
	config: dict[str, Any],
	emotion: str = "neutral",
	intensity: float = 0.5,
	espeak_voice: str = DEFAULT_ESPEAK_VOICE,
	) -> PreparedInput:
	normalized_emotion = normalize_emotion(emotion)
	normalized_intensity = clamp_unit(intensity)

	phonemes = phonemize_full_utterance(text, espeak_voice=espeak_voice)
	emotion_symbol = EMOTION_TO_SYMBOL[normalized_emotion]
	intensity_symbol = intensity_to_symbol(normalized_intensity)
	phonemes.extend([emotion_symbol, intensity_symbol])

	token_map = load_token_map(config)

	missing = [symbol for symbol in phonemes if symbol not in token_map]
	if missing:
	joined = ", ".join(sorted(set(missing)))
	raise KeyError(f"Missing symbol(s) in config.json phoneme_id_map: {joined}")

	token_ids = [token_map[symbol] for symbol in phonemes]

	return PreparedInput(
	text=text,
	phonemes=phonemes,
	token_ids=token_ids,
	emotion=normalized_emotion,
	intensity=normalized_intensity,
	emotion_symbol=emotion_symbol,
	intensity_symbol=intensity_symbol,
	)