microfactory-lab / scripts /export_deliberation.py
kylebrodeur's picture
deploy: update Space from deploy_preflight --push
a2fa1e1 verified
Raw
History Blame Contribute Delete
9.34 kB
"""Export the Chief Engineer's multi-persona DELIBERATION as a HF-ready trace.
The lesson ledger (scripts/export_trace.py) shares *what the agent learned*. This
shares *how the agent thinks*: the turn-by-turn argument between the personas on
each job β€” O'Brien proposes, the Spine vetoes unsafe values, La Forge gives a
skeptical second opinion (and can dispute β†’ the operator overrides), the
deterministic world prints, La Forge grades each run, then delivers a run verdict.
Our own schema (one row per turn): session_id, track, turn, agent, role, act, stance,
content, + the job context (material/geometry/bed/env) so each row is self-describing.
Side-effect-free: runs against a throwaway ledger + policy in a temp dir, so the
shipped state is never touched. Offline-safe: with no LLM the personas fall back to
their deterministic voices, so the trace is fully reproducible.
Run: `make deliberation` (or `uv run python -m scripts.export_deliberation`) β†’ dist/deliberation/
"""
from __future__ import annotations
import sys
import tempfile
from datetime import datetime, timedelta, timezone
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) # repo root on path
from core import inspector, seed_lessons
from core.chief_engineer import advise
from core.ledger import LedgerManager
from core.models import Advice, Environment, Job, PrintSettings
from core.spine import SpineValidator
from learn.loop import run_iteration
from learn.policy import LearnedPolicy
try: # ingestion is optional / removable (mirrors app.py)
from ingest.distill import reference_block
except Exception:
def reference_block(_material): # type: ignore
return []
DIST = Path(__file__).resolve().parent.parent / "dist" / "deliberation"
HF_REPO = "kylebrodeur/chief-engineer-deliberation"
# Representative jobs, chosen to exercise the full range of La Forge's stances
# (concur / caution / dispute→override) and the print-loop's climb to clean.
JOBS = [
("ABS", "overhang", "edge", 26.0, 60.0, 4), # off-center ABS + thin fan β†’ dispute β†’ override
("PETG", "overhang", "center", 24.0, 55.0, 3), # thin fan for an overhang β†’ caution
("PLA", "adhesion", "center", 21.0, 45.0, 3), # inside sane bounds β†’ concur
("TPU", "stringing", "edge", 23.0, 65.0, 3), # humid + short retraction β†’ caution
]
ROLE = {
"O'Brien": "Chief Engineer",
"La Forge": "QA Inspector",
"Spine": "Safety Spine",
"World": "Outcome Simulator",
"Operator": "Operator",
}
CARD = """---
license: mit
task_categories: [text-generation]
language: [en]
tags: [3d-printing, additive-manufacturing, agent-trace, multi-agent, deliberation, build-small-hackathon]
pretty_name: Chief Engineer β€” Deliberation Traces
---
# Chief Engineer β€” Deliberation Traces
Turn-by-turn **multi-persona deliberation** from **The Chief Engineer**, a small local
Gemma agent built for the HF Build Small hackathon (Backyard AI). Where the
[lesson ledger](https://huggingface.co/datasets/kylebrodeur/chief-engineer-ledger)
records *what the agent learned*, this records *how it reasons*: the argument between
the personas on each job. It grows two ways: a reproducible static export
(`make deliberation`) and **live turns logged on every run of the Space** (gated on
`HF_TOKEN`; config + agent reasoning only, never PII or uploaded files).
Each row is one **turn**:
- **O'Brien** (Chief Engineer) β€” proposes settings + reasoning over precedent.
- **Spine** (Safety Spine) β€” deterministically vetoes/clamps unsafe values.
- **La Forge** (QA Inspector) β€” a separate, skeptical voice: second opinion before the
print (`concur` / `caution` / `dispute`), a grade on each run, and a run verdict.
- **Operator** β€” the human, who can override a `dispute` and proceed.
- **World** (Outcome Simulator) β€” the deterministic physics-lite world that reports the
actual print outcome (the agent never grades its own work).
The integrity rule made literal: the proposer never marks its own homework.
## Schema
`session_id, track, turn, agent, role, act, stance, content, material, geometry,
bed_position, env_temp, env_humidity, ts`
`track` is the phase β€” `preflight` (propose β†’ veto β†’ second opinion β†’ override),
`print-loop` (simulate β†’ grade, per iteration), `review` (run verdict).
"""
def _settings_line(s: PrintSettings) -> str:
return (f"nozzle {s.nozzle_temp:.0f}Β°C, bed {s.bed_temp:.0f}Β°C, fan {s.fan_pct:.0f}%, "
f"first-layer fan {s.first_layer_fan_pct:.0f}%, retraction {s.retraction_mm:.1f}mm")
def export() -> Path:
DIST.mkdir(parents=True, exist_ok=True)
out = DIST / "deliberations.jsonl"
# throwaway state so the shipped ledger/policy are never mutated
tmp = Path(tempfile.mkdtemp(prefix="ce-delib-"))
ledger = LedgerManager(path=tmp / "ledger.jsonl")
seed_lessons.ensure_seeded(ledger)
spine = SpineValidator()
import json
clock = datetime(2026, 6, 14, 12, 0, 0, tzinfo=timezone.utc)
rows: list[dict] = []
def emit(job_id, track, turn, agent, act, content, *, ctx, stance=""):
nonlocal clock
clock += timedelta(seconds=7)
rows.append({
"session_id": job_id, "track": track, "turn": turn,
"agent": agent, "role": ROLE[agent], "act": act, "stance": stance,
"content": content.strip(),
"material": ctx["material"], "geometry": ctx["geometry"],
"bed_position": ctx["bed_position"],
"env_temp": ctx["env_temp"], "env_humidity": ctx["env_humidity"],
"ts": clock.isoformat(),
})
for material, geometry, bed, temp, hum, iters in JOBS:
job_id = f"{material}-{geometry}-{bed}".lower()
job = Job(geometry_type=geometry, material=material, bed_position=bed)
env = Environment(temp=temp, humidity=hum)
ctx = {"material": material, "geometry": geometry, "bed_position": bed,
"env_temp": temp, "env_humidity": hum}
# fresh policy per job so the loop's climb starts from baseline each time
policy = LearnedPolicy(path=tmp / f"policy-{job_id}.json")
# ── preflight: propose β†’ veto β†’ second opinion β†’ (override) ──
retrieved = ledger.retrieve(material, geometry, env.temp, env.humidity)
rec = advise(job, env, retrieved, reference_block(material),
policy.policy_note(material, geometry, env))
checked = spine.check(rec.advice.settings, material)
t = 1
emit(job_id, "preflight", t, "O'Brien", "propose",
f"{rec.advice.reasoning}\nProposed: {_settings_line(checked.settings)}.", ctx=ctx)
t += 1
emit(job_id, "preflight", t, "Spine", "veto",
("Clamped: " + " Β· ".join(checked.vetoes)) if checked.vetoes
else "Within the safe envelope for this material β€” no clamp.", ctx=ctx,
stance="clamped" if checked.requires_approval else "clear")
t += 1
verdict = inspector.second_opinion(job, env, checked.settings, rec.advice)
emit(job_id, "preflight", t, "La Forge", "second_opinion",
f"{verdict.headline} β€” {verdict.detail}", ctx=ctx, stance=verdict.stance)
if verdict.stance.lower() == "dispute":
t += 1
emit(job_id, "preflight", t, "Operator", "override",
"Acknowledged La Forge's objection. Proceeding to print on the operator's call.",
ctx=ctx, stance="override")
# ── print-loop: simulate β†’ grade, per iteration ──
for n in range(1, iters + 1):
t += 1
r = run_iteration(job, env, policy, ledger, n, record=False)
clamp = " (Spine clamped a setting)" if r.clamped else ""
emit(job_id, "print-loop", t, "World", "simulate",
f"Iteration {n}: {r.result.detail}.{clamp} Policy: {r.learned}.", ctx=ctx,
stance=r.result.outcome)
t += 1
g = inspector.grade_iteration(geometry, r.result)
emit(job_id, "print-loop", t, "La Forge", "grade",
f"{g.headline} β€” {g.detail}", ctx=ctx, stance=g.stance)
# ── review: one verdict across the run ──
# rebuild records for the summary from a fresh deterministic pass
sess_records = []
rpolicy = LearnedPolicy(path=tmp / f"policy-rev-{job_id}.json")
for n in range(1, iters + 1):
sess_records.append(run_iteration(job, env, rpolicy, ledger, n, record=False))
summary = inspector.summarize_run(sess_records, material=material, geometry=geometry)
t += 1
emit(job_id, "review", t, "La Forge", "verdict",
f"{summary.headline} β€” {summary.detail}", ctx=ctx, stance=summary.stance)
with out.open("w", encoding="utf-8") as f:
for row in rows:
f.write(json.dumps(row, ensure_ascii=False) + "\n")
(DIST / "README.md").write_text(CARD, encoding="utf-8")
jobs = len({r["session_id"] for r in rows})
print(f"exported {len(rows)} turns across {jobs} jobs β†’ {out}")
print(f"dataset card β†’ {DIST / 'README.md'}")
print(f"publish: hf upload {HF_REPO} {DIST} . --repo-type dataset")
return out
if __name__ == "__main__":
export()