AndrewRqy
Trace-sharing infra: traces/sample is committable, add curation script + README badge claim
fd94fc8 | """Curate a session trace into a publishable sample. | |
| Reads the most-recent oracles-trace-*.jsonl from the traces/ dir and | |
| writes a copy into traces/sample/ under a friendlier filename. The | |
| sample is then committed alongside the repo as the Sharing-is-Caring | |
| badge deliverable. | |
| Usage: | |
| cd oracles_app | |
| ../.venv/bin/python scripts/curate_trace.py \\ | |
| --label fantasy-en-playthrough | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import shutil | |
| import sys | |
| from pathlib import Path | |
| _HERE = Path(__file__).resolve().parent | |
| _APP_ROOT = _HERE.parent | |
| _TRACES_DIR = _APP_ROOT / "traces" | |
| _SAMPLE_DIR = _TRACES_DIR / "sample" | |
| def _newest_session_trace() -> Path: | |
| candidates = sorted( | |
| _TRACES_DIR.glob("oracles-trace-*.jsonl"), | |
| key=lambda p: p.stat().st_mtime, | |
| reverse=True, | |
| ) | |
| if not candidates: | |
| sys.exit( | |
| f"ERROR: no session traces found in {_TRACES_DIR}. " | |
| "Run the app first and complete at least one trial." | |
| ) | |
| return candidates[0] | |
| def _summarize(path: Path) -> dict: | |
| """Quick stats so the user can sanity-check before committing.""" | |
| stats: dict = { | |
| "n_records": 0, | |
| "modes": {}, | |
| "models_requested": {}, | |
| "models_returned": {}, | |
| "total_completion_tokens": 0, | |
| "total_prompt_tokens": 0, | |
| } | |
| with path.open() as f: | |
| for line in f: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| rec = json.loads(line) | |
| stats["n_records"] += 1 | |
| stats["modes"][rec.get("mode", "?")] = stats["modes"].get(rec.get("mode", "?"), 0) + 1 | |
| mr = rec.get("model_requested") or rec.get("model", "?") | |
| stats["models_requested"][mr] = stats["models_requested"].get(mr, 0) + 1 | |
| mreturn = rec.get("model_returned", "?") | |
| stats["models_returned"][mreturn] = stats["models_returned"].get(mreturn, 0) + 1 | |
| usage = rec.get("usage") or {} | |
| stats["total_prompt_tokens"] += int(usage.get("prompt_tokens", 0) or 0) | |
| stats["total_completion_tokens"] += int(usage.get("completion_tokens", 0) or 0) | |
| return stats | |
| def main() -> int: | |
| ap = argparse.ArgumentParser(description=__doc__) | |
| ap.add_argument( | |
| "--label", default=None, | |
| help="Label to embed in the output filename " | |
| "(default: derived from the source filename).", | |
| ) | |
| ap.add_argument( | |
| "--source", default=None, | |
| help="Specific source trace path. Default = newest in traces/.", | |
| ) | |
| ap.add_argument( | |
| "--summary-only", action="store_true", | |
| help="Print stats and exit without copying.", | |
| ) | |
| args = ap.parse_args() | |
| src = Path(args.source) if args.source else _newest_session_trace() | |
| if not src.exists(): | |
| sys.exit(f"ERROR: source not found: {src}") | |
| if not src.is_file(): | |
| sys.exit(f"ERROR: source is not a file: {src}") | |
| stats = _summarize(src) | |
| print(f"Source: {src}") | |
| print(f"Records: {stats['n_records']}") | |
| print(f"Modes: {stats['modes']}") | |
| print(f"Models requested: {stats['models_requested']}") | |
| print(f"Models returned: {stats['models_returned']}") | |
| print(f"Prompt tokens used: {stats['total_prompt_tokens']}") | |
| print(f"Output tokens used: {stats['total_completion_tokens']}") | |
| if args.summary_only: | |
| return 0 | |
| _SAMPLE_DIR.mkdir(parents=True, exist_ok=True) | |
| label = args.label or src.stem.replace("oracles-trace-", "session-") | |
| dst = _SAMPLE_DIR / f"{label}.jsonl" | |
| if dst.exists(): | |
| print(f"\nWARN: {dst} already exists — overwriting.", file=sys.stderr) | |
| shutil.copy2(src, dst) | |
| print(f"\nCopied to: {dst}") | |
| print(f"\nNext steps:") | |
| print(f" git add {dst.relative_to(_APP_ROOT.parent)}") | |
| print(f" git commit -m 'Add sample LLM trace from playthrough'") | |
| print(f" git push") | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |