Spaces:

shreyask
/

KittenTTS-WebGPU

Running

KittenTTS-WebGPU / src /lib /text-cleaner.ts

feat: KittenTTS WebGPU browser demo

9b1aef8 verified 1 day ago

1.25 kB

	/**
	* TextCleaner — maps IPA phoneme characters to integer token IDs.
	* Direct port of KittenTTS Python TextCleaner class.
	* https://github.com/KittenML/KittenTTS
	*/

	const _pad = "$";
	const _punctuation = ';:,.!?¡¿—…"«»"" ';
	const _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
	const _letters_ipa =
	"ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ";

	const symbols = [_pad, ..._punctuation, ..._letters, ..._letters_ipa];

	const charToIndex: Record<string, number> = {};
	for (let i = 0; i < symbols.length; i++) {
	charToIndex[symbols[i]] = i;
	}

	export function cleanText(text: string): number[] {
	const indexes: number[] = [];
	for (const char of text) {
	const idx = charToIndex[char];
	if (idx !== undefined) {
	indexes.push(idx);
	}
	}
	return indexes;
	}

	export function tokenize(phonemes: string): number[] {
	const tokens = cleanText(phonemes);
	// Add start/end tokens matching Python: insert 0 at start, append 10, append 0
	tokens.unshift(0);
	tokens.push(10);
	tokens.push(0);
	return tokens;
	}