Spaces:
Running
Running
| """Export REAL agent traces for the Hub (Build Small "Sharing is Caring" badge). | |
| Captures two genuine traces from the live, fully-local stack: | |
| 1. CASE GENERATION - every prompt the pipeline sends to the in-process llama.cpp model | |
| and the raw completion that came back, for one complete authored case; | |
| 2. LIVE INTERROGATION - a short playthrough against the served case: questions (one with | |
| evidence presented), the suspect's spoken reply, and the server-authoritative | |
| suspicion/flags, with wall-clock latency per turn. | |
| Writes ``traces/case0_traces.jsonl`` + ``traces/README.md``. Upload with: | |
| python scripts/export_traces.py # produce the files | |
| python scripts/export_traces.py --push # produce AND push to the Hub dataset | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import sys | |
| import time | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parent.parent | |
| sys.path.insert(0, str(ROOT / "src")) | |
| from case_zero.config import get_settings # noqa: E402 | |
| from case_zero.generator.pipeline import generate_case # noqa: E402 | |
| from case_zero.llm.backend import GenParams, LLMBackend, make_backend # noqa: E402 | |
| DATASET_ID = "HusseinEid/case0-traces" | |
| OUT_DIR = ROOT / "traces" | |
| _DATASET_README = """--- | |
| license: apache-2.0 | |
| tags: | |
| - build-small-hackathon | |
| - agent-trace | |
| - text-generation | |
| pretty_name: Case Zero agent traces | |
| --- | |
| # Case Zero - agent traces | |
| Real traces from [Case Zero](https://huggingface.co/spaces/build-small-hackathon/case0), | |
| a procedural detective game where a single **Qwen2.5-1.5B** model (in-process llama.cpp, | |
| CPU-only, no cloud APIs) authors a complete mystery and then role-plays every suspect | |
| live under interrogation. | |
| `case0_traces.jsonl` - one JSON object per line: | |
| - `type: "generation_call"` - one pipeline LLM call while authoring a case: the exact | |
| `prompt`, the raw `completion`, sampling params, and latency. Two calls author a full | |
| case (world+cast, then mystery); deterministic Python assembles and solver-checks it. | |
| - `type: "interrogation_turn"` - one live turn against the running game server: the | |
| player's `question` (optionally `presented_clue`), the suspect's spoken `reply`, and | |
| the server-authoritative `suspicion` / `flags` that came back, with latency. | |
| Everything was produced by the shipped game code - no hand-editing, no cloud calls. | |
| """ | |
| class _TracingBackend: | |
| """Wraps the real backend and records every (prompt, completion) pair.""" | |
| def __init__(self, inner: LLMBackend) -> None: | |
| self._inner = inner | |
| self.calls: list[dict] = [] | |
| def generate(self, prompt: str, params: GenParams) -> str: | |
| t0 = time.time() | |
| out = self._inner.generate(prompt, params) | |
| self.calls.append({ | |
| "type": "generation_call", | |
| "prompt": prompt, | |
| "completion": out, | |
| "temperature": params.temperature, | |
| "max_tokens": params.max_tokens, | |
| "constrained": bool(params.grammar or params.json_schema), | |
| "latency_s": round(time.time() - t0, 2), | |
| }) | |
| return out | |
| def stream(self, prompt: str, params: GenParams): | |
| yield self.generate(prompt, params) | |
| def _generation_trace(records: list[dict]) -> None: | |
| backend = _TracingBackend(make_backend(get_settings())) | |
| result = generate_case(backend, seed=77321) | |
| for call in backend.calls: | |
| records.append(call) | |
| records.append({ | |
| "type": "generation_result", | |
| "case_id": result.case.case_id, | |
| "crime_kind": result.case.crime_kind.value, | |
| "title": result.case.title, | |
| "solvable": result.report.ok, | |
| "attempts": result.attempts, | |
| "n_suspects": len(result.case.suspects), | |
| "n_clues": len(result.case.clues), | |
| }) | |
| def _interrogation_trace(records: list[dict]) -> None: | |
| from starlette.testclient import TestClient | |
| from case_zero.api.server import build_server | |
| client = TestClient(build_server()) | |
| case = client.post("/api/case", json={}).json() | |
| run_id = case["runId"] | |
| pub = case["case"] | |
| records.append({ | |
| "type": "case_served", | |
| "case_id": pub["id"], | |
| "kind": pub.get("kind", "homicide"), | |
| "title": pub["title"], | |
| }) | |
| sus = pub["suspects"] | |
| breaking = pub["evidence"][0]["id"] | |
| plan = [ | |
| (sus[0]["id"], "Where were you when it happened?", None), | |
| (sus[0]["id"], "Did you have any quarrel with the victim?", None), | |
| (sus[1]["id"], "Walk me through your evening, minute by minute.", None), | |
| (sus[1]["id"], "Explain this.", breaking), | |
| ] | |
| for sus_id, question, clue in plan: | |
| t0 = time.time() | |
| body: dict = {"freeText": question} | |
| if clue: | |
| body["presentEvidenceId"] = clue | |
| r = client.post(f"/api/run/{run_id}/interrogate/{sus_id}", json=body).json() | |
| records.append({ | |
| "type": "interrogation_turn", | |
| "suspect": sus_id, | |
| "question": question, | |
| "presented_clue": clue, | |
| "reply": r.get("reply"), | |
| "suspicion": r.get("suspicion"), | |
| "suspicion_delta": r.get("suspicionDelta"), | |
| "flags": r.get("flags"), | |
| "latency_s": round(time.time() - t0, 2), | |
| }) | |
| def main() -> int: | |
| OUT_DIR.mkdir(parents=True, exist_ok=True) | |
| records: list[dict] = [] | |
| print("tracing one full case generation (two model calls)...") | |
| _generation_trace(records) | |
| print("tracing a live interrogation playthrough...") | |
| _interrogation_trace(records) | |
| path = OUT_DIR / "case0_traces.jsonl" | |
| with path.open("w", encoding="utf-8") as fh: | |
| for rec in records: | |
| fh.write(json.dumps(rec, ensure_ascii=False) + "\n") | |
| (OUT_DIR / "README.md").write_text(_DATASET_README, encoding="utf-8") | |
| print(f"wrote {len(records)} records -> {path}") | |
| if "--push" in sys.argv: | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| api.create_repo(DATASET_ID, repo_type="dataset", exist_ok=True) | |
| api.upload_folder(repo_id=DATASET_ID, repo_type="dataset", folder_path=str(OUT_DIR)) | |
| print(f"pushed -> https://huggingface.co/datasets/{DATASET_ID}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |