kylebrodeur's picture
deploy: update Space from deploy_preflight --push
0b338fe verified
Raw
History Blame Contribute Delete
3.54 kB
"""The closed learning loop — the primary thing we demo.
One iteration: the policy PROPOSES settings → the Spine vetoes unsafe values →
the simulated world returns an outcome (the printer's role) → the outcome is
written to the ledger as precedent AND folded into the policy. Run it N times
on the same job and watch quality climb from failure to clean: knowledge
compounding, made literal and self-driving.
Deterministic and offline by design (no LLM call in the loop) so the demo is
reproducible and fast. The cockpit's live path still runs the real LLM with
this same policy injected — see chief_engineer.advise / prompts.policy_note.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime, timezone
from core.ledger import LedgerManager
from learn.policy import LearnedPolicy
from core.models import Environment, Job, LessonEntry, PrintSettings
from sim.outcome import SimResult, simulate
from core.spine import SpineValidator
_SPINE = SpineValidator()
@dataclass
class IterationRecord:
n: int
settings: PrintSettings
result: SimResult
learned: str # what the policy changed
clamped: bool
@dataclass
class SessionResult:
job: Job
env: Environment
records: list[IterationRecord] = field(default_factory=list)
@property
def trajectory(self) -> list[float]:
return [r.result.quality for r in self.records]
@property
def first_success(self) -> int | None:
for r in self.records:
if r.result.outcome == "success":
return r.n
return None
def _record_lesson(job: Job, env: Environment, s: PrintSettings, result: SimResult,
ledger: LedgerManager) -> None:
verb = {"success": "printed clean", "failed_sag": "sagged",
"failed_stringing": "strung"}.get(result.outcome, result.outcome)
lesson = (f"[sim] {job.material} {job.geometry_type} at {env.temp:.0f}°C/{env.humidity:.0f}% RH "
f"{verb} (q={result.quality:.2f}) with nozzle {s.nozzle_temp:.0f}°C, fan {s.fan_pct:.0f}%, "
f"retraction {s.retraction_mm:.1f}mm.")
ledger.append(LessonEntry(
job_id=f"sim-{datetime.now(timezone.utc).strftime('%H%M%S%f')}",
material=job.material, geometry_type=job.geometry_type,
env_temp=env.temp, env_humidity=env.humidity, outcome=result.outcome,
lesson=lesson, source="sim", timestamp=datetime.now(timezone.utc).isoformat()))
def run_iteration(job: Job, env: Environment, policy: LearnedPolicy, ledger: LedgerManager,
n: int, record: bool = True, overrides: PrintSettings | None = None) -> IterationRecord:
proposed = overrides if overrides is not None else policy.propose(job.material, job.geometry_type, env)
checked = _SPINE.check(proposed, job.material)
settings = checked.settings
result = simulate(settings, job, env)
if record:
_record_lesson(job, env, settings, result, ledger)
learned = policy.update(job.material, job.geometry_type, env, result)
return IterationRecord(n=n, settings=settings, result=result, learned=learned,
clamped=bool(checked.vetoes))
def run_session(job: Job, env: Environment, iterations: int, policy: LearnedPolicy,
ledger: LedgerManager) -> SessionResult:
sess = SessionResult(job=job, env=env)
for i in range(1, iterations + 1):
sess.records.append(run_iteration(job, env, policy, ledger, i))
return sess