Spaces:
Sleeping
Sleeping
| """MinimalClient — dialect-anchored plain-text LLM client for the Month 1–3 rebuild. | |
| Why this exists (and not GemmaClient): | |
| GemmaClient wraps every reply in a JSON object and runs a "teacher / child" | |
| intent-classification flow. That's fine for the full app, but for the minimal | |
| baseline it (a) spends model capacity on JSON compliance, (b) lets the model | |
| drift into neighbouring languages (Wolof, Hausa, Pulaar of Senegal, Fulfulde | |
| of Nigeria, Jula of Côte d'Ivoire), and (c) produces text that isn't clean | |
| for TTS. | |
| This client instead: | |
| - pins the target dialect explicitly (Bambara / Bamako–Mali or Pular / Fuuta | |
| Jallon–Guinea), | |
| - injects the curated 30-phrase gold list for the target language as | |
| few-shot anchoring in the system prompt, | |
| - names forbidden neighbouring languages the model must not code-switch to, | |
| - returns a plain string, ready for MMS-TTS. | |
| GemmaClient and app.py are intentionally untouched. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| from functools import lru_cache | |
| from pathlib import Path | |
| from typing import Optional | |
| logger = logging.getLogger(__name__) | |
| # configs/dialect_anchors/*.json lives at <repo>/configs/dialect_anchors | |
| _ANCHOR_DIR = ( | |
| Path(__file__).resolve().parent.parent.parent / "configs" / "dialect_anchors" | |
| ) | |
| _ANCHOR_FILE = { | |
| "bam": "bambara_mali.json", | |
| "ful": "pular_guinea.json", | |
| } | |
| LANG_FULL_NAME = { | |
| "bam": "Bambara as spoken in Bamako, Mali", | |
| "ful": "Pular of Fuuta Jallon, as spoken in Guinea", | |
| "fr": "French", | |
| "en": "English", | |
| } | |
| # Neighbouring languages the model is most likely to drift into. Empty for | |
| # fr/en — we don't need to fence those. | |
| FORBIDDEN_DRIFT = { | |
| "bam": ( | |
| "Jula / Dyula of Côte d'Ivoire, Wolof, Hausa, Swahili, Lingala, " | |
| "or any other African language" | |
| ), | |
| "ful": ( | |
| "Pulaar of Senegal, Fulfulde of Nigeria or Cameroon, Wolof, Hausa, " | |
| "Swahili, or any other African language" | |
| ), | |
| "fr": "", | |
| "en": "", | |
| } | |
| def _load_anchors(lang: str) -> list[dict]: | |
| """Load the curated gold-phrase list for `lang`. Cached per process.""" | |
| fname = _ANCHOR_FILE.get(lang) | |
| if not fname: | |
| return [] | |
| path = _ANCHOR_DIR / fname | |
| if not path.exists(): | |
| logger.warning("Dialect anchor file missing: %s", path) | |
| return [] | |
| with path.open("r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| return data.get("pairs", []) | |
| def _build_system_prompt( | |
| target_lang: str, | |
| extra_examples: Optional[list[dict]] = None, | |
| ) -> str: | |
| """Assemble the per-call system prompt for a target output language. | |
| `extra_examples`, when supplied, are appended after the curated 30-pair | |
| gold list as additional dynamic few-shot anchoring — used by app_minimal | |
| to inject the top-K nearest phrasebook entries when the strict short- | |
| circuit misses. | |
| """ | |
| full = LANG_FULL_NAME.get(target_lang, "English") | |
| forbidden = FORBIDDEN_DRIFT.get(target_lang, "") | |
| anchors = _load_anchors(target_lang) | |
| lines: list[str] = [ | |
| f"You are a warm, concise conversational assistant that replies ONLY in {full}.", | |
| "", | |
| "Your task is to REPLY to the user's message as a person would in " | |
| "conversation — NOT to translate it. If the user greets you, greet them " | |
| "back and ask how they are. If they ask a question, answer it. If they " | |
| "make a statement, respond appropriately. Never simply repeat or " | |
| "translate what they said back to them.", | |
| "", | |
| "Output format: plain natural text only. No JSON, no code fences, no " | |
| "markdown, no translations, no romanisation, no explanations, and " | |
| "ABSOLUTELY no parenthetical glosses, literal translations, or " | |
| "English/French annotations of any kind (do NOT write things like " | |
| "'(Lit: ...)', '(meaning ...)', or any '(English ...)' aside). The " | |
| f"output must be 100% {full} characters and punctuation only. Reply in " | |
| "1–3 short sentences suitable to be read aloud by a text-to-speech voice.", | |
| ] | |
| if forbidden: | |
| lines += [ | |
| "", | |
| ( | |
| f"CRITICAL — dialect fidelity: do NOT use, mix, or substitute words " | |
| f"from {forbidden}. If you are not confident a word belongs to " | |
| f"{full}, rephrase using simpler vocabulary you are certain of, or " | |
| f"apologise briefly in {full} (for example that you did not " | |
| f"understand)." | |
| ), | |
| ] | |
| if anchors: | |
| lines += [ | |
| "", | |
| f"Reference phrases in {full} — these pairs are STYLE/ORTHOGRAPHY " | |
| "examples ONLY (showing how English/French maps to the correct " | |
| "dialect). Do NOT treat them as a translation task: when the user " | |
| "writes one of these source phrases, do not just output its target " | |
| "verbatim — instead REPLY conversationally in the same dialectal " | |
| "style:", | |
| ] | |
| for item in anchors: | |
| src = item.get("source", "").strip() | |
| tgt = item.get("target", "").strip() | |
| if src and tgt: | |
| lines.append(f"- {src} → {tgt}") | |
| if extra_examples: | |
| lines += [ | |
| "", | |
| "Additional reference phrases relevant to the current user input " | |
| f"(curated gold {full} translations — STYLE references only, not a " | |
| "translation task; reply conversationally, do not echo the target " | |
| "verbatim):", | |
| ] | |
| for item in extra_examples: | |
| src = (item.get("source") or "").strip() | |
| tgt = (item.get("target") or "").strip() | |
| if src and tgt: | |
| lines.append(f"- {src} → {tgt}") | |
| lines += [ | |
| "", | |
| f"Always reply in {full}, even if the user writes to you in English, " | |
| "French, or another language. Never translate your own reply.", | |
| ] | |
| return "\n".join(lines) | |
| class MinimalClient: | |
| """Dialect-anchored plain-text LLM client over HF Serverless Inference. | |
| Usage: | |
| client = MinimalClient(model_id="CohereLabs/aya-expanse-32b", hf_token=TOK) | |
| reply = client.chat("Good morning", target_lang="bam") | |
| # → "I ni sɔgɔma. I ka kɛnɛ wa?" | |
| """ | |
| def __init__( | |
| self, | |
| model_id: str = "CohereLabs/aya-expanse-32b", | |
| hf_token: Optional[str] = None, | |
| ) -> None: | |
| self.model_id = model_id | |
| self.hf_token = hf_token | |
| self._client = None # lazy init | |
| def _get_client(self): | |
| if self._client is None: | |
| from huggingface_hub import InferenceClient | |
| self._client = InferenceClient(token=self.hf_token) | |
| return self._client | |
| def chat( | |
| self, | |
| user_text: str, | |
| target_lang: str = "bam", | |
| extra_examples: Optional[list[dict]] = None, | |
| ) -> str: | |
| """Return a plain-text reply in `target_lang`. | |
| `extra_examples` (optional) — list of {source, target} dicts that get | |
| appended to the system prompt as additional dynamic few-shot. Used by | |
| app_minimal to RAG-inject the top-K nearest phrasebook entries when | |
| the strict phrasebook short-circuit misses. | |
| On any error returns a short parenthetical error string so the caller | |
| can still feed something into TTS / display. | |
| """ | |
| system_prompt = _build_system_prompt(target_lang, extra_examples) | |
| try: | |
| client = self._get_client() | |
| completion = client.chat_completion( | |
| model=self.model_id, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_text}, | |
| ], | |
| max_tokens=256, | |
| temperature=0.3, | |
| ) | |
| raw = (completion.choices[0].message.content or "").strip() | |
| # Defensive: strip any stray code fences the model may emit anyway. | |
| if raw.startswith("```"): | |
| raw = raw.strip("`").strip() | |
| # If a language tag slipped in on the first line, drop it. | |
| if "\n" in raw: | |
| first, rest = raw.split("\n", 1) | |
| if len(first) < 20 and " " not in first: | |
| raw = rest.strip() | |
| # Defensive: strip parenthetical English/French glosses the model | |
| # sometimes appends despite the prompt — e.g. "Foo bar (Lit: ...)". | |
| # We only strip parentheticals that LOOK like glosses (start with | |
| # Lit/Literal/Meaning/Translation/English/French, or contain ≥3 | |
| # consecutive ASCII letters that aren't part of the target script). | |
| import re as _re | |
| raw = _re.sub( | |
| r"\s*\((?:lit\.?|literal(?:ly)?|meaning|translation|english|french|fr|en)[^)]*\)", | |
| "", | |
| raw, | |
| flags=_re.IGNORECASE, | |
| ).strip() | |
| return raw | |
| except Exception as exc: # pragma: no cover — surfaced to UI | |
| logger.error("MinimalClient error: %s", exc) | |
| return f"(LLM unavailable: {exc})" | |