"""Curate a session trace into a publishable sample. Reads the most-recent oracles-trace-*.jsonl from the traces/ dir and writes a copy into traces/sample/ under a friendlier filename. The sample is then committed alongside the repo as the Sharing-is-Caring badge deliverable. Usage: cd oracles_app ../.venv/bin/python scripts/curate_trace.py \\ --label fantasy-en-playthrough """ from __future__ import annotations import argparse import json import shutil import sys from pathlib import Path _HERE = Path(__file__).resolve().parent _APP_ROOT = _HERE.parent _TRACES_DIR = _APP_ROOT / "traces" _SAMPLE_DIR = _TRACES_DIR / "sample" def _newest_session_trace() -> Path: candidates = sorted( _TRACES_DIR.glob("oracles-trace-*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True, ) if not candidates: sys.exit( f"ERROR: no session traces found in {_TRACES_DIR}. " "Run the app first and complete at least one trial." ) return candidates[0] def _summarize(path: Path) -> dict: """Quick stats so the user can sanity-check before committing.""" stats: dict = { "n_records": 0, "modes": {}, "models_requested": {}, "models_returned": {}, "total_completion_tokens": 0, "total_prompt_tokens": 0, } with path.open() as f: for line in f: line = line.strip() if not line: continue rec = json.loads(line) stats["n_records"] += 1 stats["modes"][rec.get("mode", "?")] = stats["modes"].get(rec.get("mode", "?"), 0) + 1 mr = rec.get("model_requested") or rec.get("model", "?") stats["models_requested"][mr] = stats["models_requested"].get(mr, 0) + 1 mreturn = rec.get("model_returned", "?") stats["models_returned"][mreturn] = stats["models_returned"].get(mreturn, 0) + 1 usage = rec.get("usage") or {} stats["total_prompt_tokens"] += int(usage.get("prompt_tokens", 0) or 0) stats["total_completion_tokens"] += int(usage.get("completion_tokens", 0) or 0) return stats def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument( "--label", default=None, help="Label to embed in the output filename " "(default: derived from the source filename).", ) ap.add_argument( "--source", default=None, help="Specific source trace path. Default = newest in traces/.", ) ap.add_argument( "--summary-only", action="store_true", help="Print stats and exit without copying.", ) args = ap.parse_args() src = Path(args.source) if args.source else _newest_session_trace() if not src.exists(): sys.exit(f"ERROR: source not found: {src}") if not src.is_file(): sys.exit(f"ERROR: source is not a file: {src}") stats = _summarize(src) print(f"Source: {src}") print(f"Records: {stats['n_records']}") print(f"Modes: {stats['modes']}") print(f"Models requested: {stats['models_requested']}") print(f"Models returned: {stats['models_returned']}") print(f"Prompt tokens used: {stats['total_prompt_tokens']}") print(f"Output tokens used: {stats['total_completion_tokens']}") if args.summary_only: return 0 _SAMPLE_DIR.mkdir(parents=True, exist_ok=True) label = args.label or src.stem.replace("oracles-trace-", "session-") dst = _SAMPLE_DIR / f"{label}.jsonl" if dst.exists(): print(f"\nWARN: {dst} already exists — overwriting.", file=sys.stderr) shutil.copy2(src, dst) print(f"\nCopied to: {dst}") print(f"\nNext steps:") print(f" git add {dst.relative_to(_APP_ROOT.parent)}") print(f" git commit -m 'Add sample LLM trace from playthrough'") print(f" git push") return 0 if __name__ == "__main__": sys.exit(main())