Spaces:

MataStrategy
/

ground-zero

Sleeping

ground-zero / src /llm /minimal_client.py

jefffffff9

Forbid parenthetical glosses in LLM replies

757e833 about 1 month ago

9.28 kB

	"""MinimalClient — dialect-anchored plain-text LLM client for the Month 1–3 rebuild.

	Why this exists (and not GemmaClient):
	GemmaClient wraps every reply in a JSON object and runs a "teacher / child"
	intent-classification flow. That's fine for the full app, but for the minimal
	baseline it (a) spends model capacity on JSON compliance, (b) lets the model
	drift into neighbouring languages (Wolof, Hausa, Pulaar of Senegal, Fulfulde
	of Nigeria, Jula of Côte d'Ivoire), and (c) produces text that isn't clean
	for TTS.

	This client instead:
	- pins the target dialect explicitly (Bambara / Bamako–Mali or Pular / Fuuta
	Jallon–Guinea),
	- injects the curated 30-phrase gold list for the target language as
	few-shot anchoring in the system prompt,
	- names forbidden neighbouring languages the model must not code-switch to,
	- returns a plain string, ready for MMS-TTS.

	GemmaClient and app.py are intentionally untouched.
	"""
	from __future__ import annotations

	import json
	import logging
	from functools import lru_cache
	from pathlib import Path
	from typing import Optional

	logger = logging.getLogger(__name__)

	# configs/dialect_anchors/*.json lives at <repo>/configs/dialect_anchors
	_ANCHOR_DIR = (
	Path(__file__).resolve().parent.parent.parent / "configs" / "dialect_anchors"
	)

	_ANCHOR_FILE = {
	"bam": "bambara_mali.json",
	"ful": "pular_guinea.json",
	}

	LANG_FULL_NAME = {
	"bam": "Bambara as spoken in Bamako, Mali",
	"ful": "Pular of Fuuta Jallon, as spoken in Guinea",
	"fr": "French",
	"en": "English",
	}

	# Neighbouring languages the model is most likely to drift into. Empty for
	# fr/en — we don't need to fence those.
	FORBIDDEN_DRIFT = {
	"bam": (
	"Jula / Dyula of Côte d'Ivoire, Wolof, Hausa, Swahili, Lingala, "
	"or any other African language"
	),
	"ful": (
	"Pulaar of Senegal, Fulfulde of Nigeria or Cameroon, Wolof, Hausa, "
	"Swahili, or any other African language"
	),
	"fr": "",
	"en": "",
	}


	@lru_cache(maxsize=4)
	def _load_anchors(lang: str) -> list[dict]:
	"""Load the curated gold-phrase list for `lang`. Cached per process."""
	fname = _ANCHOR_FILE.get(lang)
	if not fname:
	return []
	path = _ANCHOR_DIR / fname
	if not path.exists():
	logger.warning("Dialect anchor file missing: %s", path)
	return []
	with path.open("r", encoding="utf-8") as f:
	data = json.load(f)
	return data.get("pairs", [])


	def _build_system_prompt(
	target_lang: str,
	extra_examples: Optional[list[dict]] = None,
	) -> str:
	"""Assemble the per-call system prompt for a target output language.

	`extra_examples`, when supplied, are appended after the curated 30-pair
	gold list as additional dynamic few-shot anchoring — used by app_minimal
	to inject the top-K nearest phrasebook entries when the strict short-
	circuit misses.
	"""
	full = LANG_FULL_NAME.get(target_lang, "English")
	forbidden = FORBIDDEN_DRIFT.get(target_lang, "")
	anchors = _load_anchors(target_lang)

	lines: list[str] = [
	f"You are a warm, concise conversational assistant that replies ONLY in {full}.",
	"",
	"Your task is to REPLY to the user's message as a person would in "
	"conversation — NOT to translate it. If the user greets you, greet them "
	"back and ask how they are. If they ask a question, answer it. If they "
	"make a statement, respond appropriately. Never simply repeat or "
	"translate what they said back to them.",
	"",
	"Output format: plain natural text only. No JSON, no code fences, no "
	"markdown, no translations, no romanisation, no explanations, and "
	"ABSOLUTELY no parenthetical glosses, literal translations, or "
	"English/French annotations of any kind (do NOT write things like "
	"'(Lit: ...)', '(meaning ...)', or any '(English ...)' aside). The "
	f"output must be 100% {full} characters and punctuation only. Reply in "
	"1–3 short sentences suitable to be read aloud by a text-to-speech voice.",
	]

	if forbidden:
	lines += [
	"",
	(
	f"CRITICAL — dialect fidelity: do NOT use, mix, or substitute words "
	f"from {forbidden}. If you are not confident a word belongs to "
	f"{full}, rephrase using simpler vocabulary you are certain of, or "
	f"apologise briefly in {full} (for example that you did not "
	f"understand)."
	),
	]

	if anchors:
	lines += [
	"",
	f"Reference phrases in {full} — these pairs are STYLE/ORTHOGRAPHY "
	"examples ONLY (showing how English/French maps to the correct "
	"dialect). Do NOT treat them as a translation task: when the user "
	"writes one of these source phrases, do not just output its target "
	"verbatim — instead REPLY conversationally in the same dialectal "
	"style:",
	]
	for item in anchors:
	src = item.get("source", "").strip()
	tgt = item.get("target", "").strip()
	if src and tgt:
	lines.append(f"- {src} → {tgt}")

	if extra_examples:
	lines += [
	"",
	"Additional reference phrases relevant to the current user input "
	f"(curated gold {full} translations — STYLE references only, not a "
	"translation task; reply conversationally, do not echo the target "
	"verbatim):",
	]
	for item in extra_examples:
	src = (item.get("source") or "").strip()
	tgt = (item.get("target") or "").strip()
	if src and tgt:
	lines.append(f"- {src} → {tgt}")

	lines += [
	"",
	f"Always reply in {full}, even if the user writes to you in English, "
	"French, or another language. Never translate your own reply.",
	]
	return "\n".join(lines)


	class MinimalClient:
	"""Dialect-anchored plain-text LLM client over HF Serverless Inference.

	Usage:
	client = MinimalClient(model_id="CohereLabs/aya-expanse-32b", hf_token=TOK)
	reply = client.chat("Good morning", target_lang="bam")
	# → "I ni sɔgɔma. I ka kɛnɛ wa?"
	"""

	def __init__(
	self,
	model_id: str = "CohereLabs/aya-expanse-32b",
	hf_token: Optional[str] = None,
	) -> None:
	self.model_id = model_id
	self.hf_token = hf_token
	self._client = None # lazy init

	def _get_client(self):
	if self._client is None:
	from huggingface_hub import InferenceClient
	self._client = InferenceClient(token=self.hf_token)
	return self._client

	def chat(
	self,
	user_text: str,
	target_lang: str = "bam",
	extra_examples: Optional[list[dict]] = None,
	) -> str:
	"""Return a plain-text reply in `target_lang`.

	`extra_examples` (optional) — list of {source, target} dicts that get
	appended to the system prompt as additional dynamic few-shot. Used by
	app_minimal to RAG-inject the top-K nearest phrasebook entries when
	the strict phrasebook short-circuit misses.

	On any error returns a short parenthetical error string so the caller
	can still feed something into TTS / display.
	"""
	system_prompt = _build_system_prompt(target_lang, extra_examples)
	try:
	client = self._get_client()
	completion = client.chat_completion(
	model=self.model_id,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_text},
	],
	max_tokens=256,
	temperature=0.3,
	)
	raw = (completion.choices[0].message.content or "").strip()
	# Defensive: strip any stray code fences the model may emit anyway.
	if raw.startswith("```"):
	raw = raw.strip("`").strip()
	# If a language tag slipped in on the first line, drop it.
	if "\n" in raw:
	first, rest = raw.split("\n", 1)
	if len(first) < 20 and " " not in first:
	raw = rest.strip()
	# Defensive: strip parenthetical English/French glosses the model
	# sometimes appends despite the prompt — e.g. "Foo bar (Lit: ...)".
	# We only strip parentheticals that LOOK like glosses (start with
	# Lit/Literal/Meaning/Translation/English/French, or contain ≥3
	# consecutive ASCII letters that aren't part of the target script).
	import re as _re
	raw = _re.sub(
	r"\s\((?:lit\.?\|literal(?:ly)?\|meaning\|translation\|english\|french\|fr\|en)[^)]\)",
	"",
	raw,
	flags=_re.IGNORECASE,
	).strip()
	return raw
	except Exception as exc: # pragma: no cover — surfaced to UI
	logger.error("MinimalClient error: %s", exc)
	return f"(LLM unavailable: {exc})"