"""TurnLogger — per-turn JSONL telemetry for the minimal baseline. Every voice or text turn writes one self-contained line to `data/field_turns.jsonl` (path overridable via FIELD_TURNS_PATH). This is the foundation for: - field-test review (read the JSONL after a session) - phrasebook hit-rate measurement - LLM A/B comparisons - eventually, Stage-4 LoRA training-data curation (every line already pairs an English/French input with a vetted Bambara/Pular reply; we'll filter on phrasebook-hit + user-confirmed turns later). Schema (one JSON object per line): { "ts": "", "tab": "voice" | "text", "input_lang": "bam" | "ful" | "fr" | "en" | null, "output_lang": "bam" | "ful" | "fr" | "en", "user_text": "", "transcript": "" | null, "transcribe_ms": | null, "phrasebook": { match, score, category, source, target } | null, "llm_model": "" | null, "llm_ms": | null, "reply_text": "", "tts_ms": | null, "total_ms": , "error": "" | null } Notes: - File path is gitignored (data/ is excluded by .gitignore). - Append mode + line-buffered + lock — safe for the single-process Gradio server. Not designed for multi-worker writes. """ from __future__ import annotations import json import logging import os import threading from datetime import datetime, timezone from pathlib import Path from typing import Any, Optional logger = logging.getLogger(__name__) _DEFAULT_PATH = ( Path(__file__).resolve().parent.parent.parent / "data" / "field_turns.jsonl" ) def _utcnow_iso() -> str: return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") class TurnLogger: """Append-only JSONL logger. Thread-safe for one process.""" def __init__(self, path: Optional[str] = None) -> None: env_path = os.environ.get("FIELD_TURNS_PATH") self.path = Path(path or env_path or _DEFAULT_PATH) self.path.parent.mkdir(parents=True, exist_ok=True) self._lock = threading.Lock() logger.info("TurnLogger writing to %s", self.path) def log(self, **fields: Any) -> None: """Write one row. Always sets ts; leaves the rest to the caller. Never raises — telemetry must not break the user-facing pipeline. """ row = {"ts": _utcnow_iso(), **fields} try: line = json.dumps(row, ensure_ascii=False) with self._lock, self.path.open("a", encoding="utf-8") as fh: fh.write(line + "\n") except Exception as exc: # pragma: no cover logger.warning("TurnLogger.log failed: %s", exc)