Spaces:
Running
Running
| /** | |
| * TextCleaner — maps IPA phoneme characters to integer token IDs. | |
| * Direct port of KittenTTS Python TextCleaner class. | |
| * https://github.com/KittenML/KittenTTS | |
| */ | |
| const _pad = "$"; | |
| const _punctuation = ';:,.!?¡¿—…"«»"" '; | |
| const _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; | |
| const _letters_ipa = | |
| "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"; | |
| const symbols = [_pad, ..._punctuation, ..._letters, ..._letters_ipa]; | |
| const charToIndex: Record<string, number> = {}; | |
| for (let i = 0; i < symbols.length; i++) { | |
| charToIndex[symbols[i]] = i; | |
| } | |
| export function cleanText(text: string): number[] { | |
| const indexes: number[] = []; | |
| for (const char of text) { | |
| const idx = charToIndex[char]; | |
| if (idx !== undefined) { | |
| indexes.push(idx); | |
| } | |
| } | |
| return indexes; | |
| } | |
| export function tokenize(phonemes: string): number[] { | |
| const tokens = cleanText(phonemes); | |
| // Add start/end tokens matching Python: insert 0 at start, append 10, append 0 | |
| tokens.unshift(0); | |
| tokens.push(10); | |
| tokens.push(0); | |
| return tokens; | |
| } | |