"""Export the Chief Engineer's multi-persona DELIBERATION as a HF-ready trace.

The lesson ledger (scripts/export_trace.py) shares *what the agent learned*. This
shares *how the agent thinks*: the turn-by-turn argument between the personas on
each job — O'Brien proposes, the Spine vetoes unsafe values, La Forge gives a
skeptical second opinion (and can dispute → the operator overrides), the
deterministic world prints, La Forge grades each run, then delivers a run verdict.

Our own schema (one row per turn): session_id, track, turn, agent, role, act, stance,
content, + the job context (material/geometry/bed/env) so each row is self-describing.

Side-effect-free: runs against a throwaway ledger + policy in a temp dir, so the
shipped state is never touched. Offline-safe: with no LLM the personas fall back to
their deterministic voices, so the trace is fully reproducible.

Run: `make deliberation`  (or `uv run python -m scripts.export_deliberation`)  →  dist/deliberation/
"""

from __future__ import annotations

import sys
import tempfile
from datetime import datetime, timedelta, timezone
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))  # repo root on path

from core import inspector, seed_lessons
from core.chief_engineer import advise
from core.ledger import LedgerManager
from core.models import Advice, Environment, Job, PrintSettings
from core.spine import SpineValidator
from learn.loop import run_iteration
from learn.policy import LearnedPolicy

try:  # ingestion is optional / removable (mirrors app.py)
    from ingest.distill import reference_block
except Exception:
    def reference_block(_material):  # type: ignore
        return []

DIST = Path(__file__).resolve().parent.parent / "dist" / "deliberation"
HF_REPO = "kylebrodeur/chief-engineer-deliberation"

# Representative jobs, chosen to exercise the full range of La Forge's stances
# (concur / caution / dispute→override) and the print-loop's climb to clean.
JOBS = [
    ("ABS",  "overhang",  "edge",   26.0, 60.0, 4),  # off-center ABS + thin fan → dispute → override
    ("PETG", "overhang",  "center", 24.0, 55.0, 3),  # thin fan for an overhang → caution
    ("PLA",  "adhesion",  "center", 21.0, 45.0, 3),  # inside sane bounds → concur
    ("TPU",  "stringing", "edge",   23.0, 65.0, 3),  # humid + short retraction → caution
]

ROLE = {
    "O'Brien": "Chief Engineer",
    "La Forge": "QA Inspector",
    "Spine": "Safety Spine",
    "World": "Outcome Simulator",
    "Operator": "Operator",
}

CARD = """---
license: mit
task_categories: [text-generation]
language: [en]
tags: [3d-printing, additive-manufacturing, agent-trace, multi-agent, deliberation, build-small-hackathon]
pretty_name: Chief Engineer — Deliberation Traces
---

# Chief Engineer — Deliberation Traces

Turn-by-turn **multi-persona deliberation** from **The Chief Engineer**, a small local
Gemma agent built for the HF Build Small hackathon (Backyard AI). Where the
[lesson ledger](https://huggingface.co/datasets/kylebrodeur/chief-engineer-ledger)
records *what the agent learned*, this records *how it reasons*: the argument between
the personas on each job. It grows two ways: a reproducible static export
(`make deliberation`) and **live turns logged on every run of the Space** (gated on
`HF_TOKEN`; config + agent reasoning only, never PII or uploaded files).

Each row is one **turn**:

- **O'Brien** (Chief Engineer) — proposes settings + reasoning over precedent.
- **Spine** (Safety Spine) — deterministically vetoes/clamps unsafe values.
- **La Forge** (QA Inspector) — a separate, skeptical voice: second opinion before the
  print (`concur` / `caution` / `dispute`), a grade on each run, and a run verdict.
- **Operator** — the human, who can override a `dispute` and proceed.
- **World** (Outcome Simulator) — the deterministic physics-lite world that reports the
  actual print outcome (the agent never grades its own work).

The integrity rule made literal: the proposer never marks its own homework.

## Schema

`session_id, track, turn, agent, role, act, stance, content, material, geometry,
bed_position, env_temp, env_humidity, ts`

`track` is the phase — `preflight` (propose → veto → second opinion → override),
`print-loop` (simulate → grade, per iteration), `review` (run verdict).
"""


def _settings_line(s: PrintSettings) -> str:
    return (f"nozzle {s.nozzle_temp:.0f}°C, bed {s.bed_temp:.0f}°C, fan {s.fan_pct:.0f}%, "
            f"first-layer fan {s.first_layer_fan_pct:.0f}%, retraction {s.retraction_mm:.1f}mm")


def export() -> Path:
    DIST.mkdir(parents=True, exist_ok=True)
    out = DIST / "deliberations.jsonl"

    # throwaway state so the shipped ledger/policy are never mutated
    tmp = Path(tempfile.mkdtemp(prefix="ce-delib-"))
    ledger = LedgerManager(path=tmp / "ledger.jsonl")
    seed_lessons.ensure_seeded(ledger)
    spine = SpineValidator()

    import json

    clock = datetime(2026, 6, 14, 12, 0, 0, tzinfo=timezone.utc)
    rows: list[dict] = []

    def emit(job_id, track, turn, agent, act, content, *, ctx, stance=""):
        nonlocal clock
        clock += timedelta(seconds=7)
        rows.append({
            "session_id": job_id, "track": track, "turn": turn,
            "agent": agent, "role": ROLE[agent], "act": act, "stance": stance,
            "content": content.strip(),
            "material": ctx["material"], "geometry": ctx["geometry"],
            "bed_position": ctx["bed_position"],
            "env_temp": ctx["env_temp"], "env_humidity": ctx["env_humidity"],
            "ts": clock.isoformat(),
        })

    for material, geometry, bed, temp, hum, iters in JOBS:
        job_id = f"{material}-{geometry}-{bed}".lower()
        job = Job(geometry_type=geometry, material=material, bed_position=bed)
        env = Environment(temp=temp, humidity=hum)
        ctx = {"material": material, "geometry": geometry, "bed_position": bed,
               "env_temp": temp, "env_humidity": hum}
        # fresh policy per job so the loop's climb starts from baseline each time
        policy = LearnedPolicy(path=tmp / f"policy-{job_id}.json")

        # ── preflight: propose → veto → second opinion → (override) ──
        retrieved = ledger.retrieve(material, geometry, env.temp, env.humidity)
        rec = advise(job, env, retrieved, reference_block(material),
                     policy.policy_note(material, geometry, env))
        checked = spine.check(rec.advice.settings, material)
        t = 1
        emit(job_id, "preflight", t, "O'Brien", "propose",
             f"{rec.advice.reasoning}\nProposed: {_settings_line(checked.settings)}.", ctx=ctx)
        t += 1
        emit(job_id, "preflight", t, "Spine", "veto",
             ("Clamped: " + " · ".join(checked.vetoes)) if checked.vetoes
             else "Within the safe envelope for this material — no clamp.", ctx=ctx,
             stance="clamped" if checked.requires_approval else "clear")
        t += 1
        verdict = inspector.second_opinion(job, env, checked.settings, rec.advice)
        emit(job_id, "preflight", t, "La Forge", "second_opinion",
             f"{verdict.headline} — {verdict.detail}", ctx=ctx, stance=verdict.stance)
        if verdict.stance.lower() == "dispute":
            t += 1
            emit(job_id, "preflight", t, "Operator", "override",
                 "Acknowledged La Forge's objection. Proceeding to print on the operator's call.",
                 ctx=ctx, stance="override")

        # ── print-loop: simulate → grade, per iteration ──
        for n in range(1, iters + 1):
            t += 1
            r = run_iteration(job, env, policy, ledger, n, record=False)
            clamp = " (Spine clamped a setting)" if r.clamped else ""
            emit(job_id, "print-loop", t, "World", "simulate",
                 f"Iteration {n}: {r.result.detail}.{clamp} Policy: {r.learned}.", ctx=ctx,
                 stance=r.result.outcome)
            t += 1
            g = inspector.grade_iteration(geometry, r.result)
            emit(job_id, "print-loop", t, "La Forge", "grade",
                 f"{g.headline} — {g.detail}", ctx=ctx, stance=g.stance)

        # ── review: one verdict across the run ──
        # rebuild records for the summary from a fresh deterministic pass
        sess_records = []
        rpolicy = LearnedPolicy(path=tmp / f"policy-rev-{job_id}.json")
        for n in range(1, iters + 1):
            sess_records.append(run_iteration(job, env, rpolicy, ledger, n, record=False))
        summary = inspector.summarize_run(sess_records, material=material, geometry=geometry)
        t += 1
        emit(job_id, "review", t, "La Forge", "verdict",
             f"{summary.headline} — {summary.detail}", ctx=ctx, stance=summary.stance)

    with out.open("w", encoding="utf-8") as f:
        for row in rows:
            f.write(json.dumps(row, ensure_ascii=False) + "\n")
    (DIST / "README.md").write_text(CARD, encoding="utf-8")

    jobs = len({r["session_id"] for r in rows})
    print(f"exported {len(rows)} turns across {jobs} jobs → {out}")
    print(f"dataset card → {DIST / 'README.md'}")
    print(f"publish:  hf upload {HF_REPO} {DIST} . --repo-type dataset")
    return out


if __name__ == "__main__":
    export()