| |
| |
| |
| |
|
|
| const NORMALIZATION_RULES: Record<string, string> = { |
| "damae": "damay", |
| "dama": "damay", |
| "dma": "damay", |
| "jai": "jaay", |
| "jaai": "jaay", |
| "jaye": "jaay", |
| "jendi": "jënd", |
| "fei": "fey", |
| "fay": "fey", |
| "yere": "yére", |
| "yare": "yére", |
| "sandwiche": "sandwich", |
| "pan": "mburu", |
| "cafe": "café", |
| "sabu": "sabu", |
| "omo": "omo", |
| "patat": "patas", |
| "ognon": "sooble", |
| "riz": "ceeb", |
| "yof": "Yoff", |
| "dakar": "Dakar", |
| "pikine": "Pikine", |
| "guediawaye": "Guédiawaye", |
| "keur": "kër", |
| "ker": "kër", |
| "sikarche": "ci kër", |
| "sikarshe": "ci kër", |
| "sikarce": "ci kër", |
| "sikaarché": "ci kër", |
| "quartier": "quartier", |
| "banlieu": "banlieue", |
| "si": "ci", |
| "fane": "fan", |
| "fana": "fan", |
| "lana": "lan", |
| "lanna": "lan", |
| "nakka": "naka", |
| "nakha": "naka", |
| "niak": "ñàkk", |
| "niakk": "ñàkk", |
| "dencal": "denc", |
| "limal": "lim", |
| "ganee": "gañ", |
| "gane": "gañ", |
| "borom": "boroom", |
| "xaalisou": "xaalis", |
| "xaliss": "xaalis", |
| }; |
|
|
| const CAPITALIZED_PLACES = ["Yoff", "Dakar", "Pikine", "Guédiawaye"]; |
|
|
| export interface NormalizationResult { |
| normalizedText: string; |
| changes: string[]; |
| } |
|
|
| export function normalizeWolof(rawText: string): NormalizationResult { |
| if (!rawText) return { normalizedText: '', changes: [] }; |
|
|
| let text = rawText.trim().replace(/\s{2,}/g, " "); |
| const changes: string[] = []; |
|
|
| const words = text.split(" "); |
| const processedWords = words.map(word => { |
| const lowerWord = word.toLowerCase().replace(/[.,/#!$%^&*;:{}=\-_`~()]/g, ""); |
| if (NORMALIZATION_RULES[lowerWord]) { |
| const replacement = NORMALIZATION_RULES[lowerWord]; |
| if (lowerWord !== replacement.toLowerCase()) { |
| changes.push(`${lowerWord} -> ${replacement}`); |
| } |
| return replacement; |
| } |
|
|
| const matchingPlace = CAPITALIZED_PLACES.find(p => p.toLowerCase() === lowerWord); |
| if (matchingPlace) { |
| if (matchingPlace !== word) { |
| changes.push(`${word} -> ${matchingPlace}`); |
| } |
| return matchingPlace; |
| } |
|
|
| return word; |
| }); |
|
|
| let normalizedText = processedWords.join(" "); |
|
|
| if (normalizedText.length > 0) { |
| normalizedText = normalizedText.charAt(0).toUpperCase() + normalizedText.slice(1); |
| } |
|
|
| return { normalizedText, changes: Array.from(new Set(changes)) }; |
| } |
|
|