Spaces:

build-small-hackathon
/

microfactory-lab

Runtime error

App Files Files Community

microfactory-lab / learn /loop.py

kylebrodeur

deploy: update Space from deploy_preflight --push

0b338fe verified 11 days ago

Raw

History Blame Contribute Delete

3.54 kB

	"""The closed learning loop — the primary thing we demo.

	One iteration: the policy PROPOSES settings → the Spine vetoes unsafe values →
	the simulated world returns an outcome (the printer's role) → the outcome is
	written to the ledger as precedent AND folded into the policy. Run it N times
	on the same job and watch quality climb from failure to clean: knowledge
	compounding, made literal and self-driving.

	Deterministic and offline by design (no LLM call in the loop) so the demo is
	reproducible and fast. The cockpit's live path still runs the real LLM with
	this same policy injected — see chief_engineer.advise / prompts.policy_note.
	"""

	from __future__ import annotations

	from dataclasses import dataclass, field
	from datetime import datetime, timezone

	from core.ledger import LedgerManager
	from learn.policy import LearnedPolicy
	from core.models import Environment, Job, LessonEntry, PrintSettings
	from sim.outcome import SimResult, simulate
	from core.spine import SpineValidator

	_SPINE = SpineValidator()


	@dataclass
	class IterationRecord:
	n: int
	settings: PrintSettings
	result: SimResult
	learned: str # what the policy changed
	clamped: bool


	@dataclass
	class SessionResult:
	job: Job
	env: Environment
	records: list[IterationRecord] = field(default_factory=list)

	@property
	def trajectory(self) -> list[float]:
	return [r.result.quality for r in self.records]

	@property
	def first_success(self) -> int \| None:
	for r in self.records:
	if r.result.outcome == "success":
	return r.n
	return None


	def _record_lesson(job: Job, env: Environment, s: PrintSettings, result: SimResult,
	ledger: LedgerManager) -> None:
	verb = {"success": "printed clean", "failed_sag": "sagged",
	"failed_stringing": "strung"}.get(result.outcome, result.outcome)
	lesson = (f"[sim] {job.material} {job.geometry_type} at {env.temp:.0f}°C/{env.humidity:.0f}% RH "
	f"{verb} (q={result.quality:.2f}) with nozzle {s.nozzle_temp:.0f}°C, fan {s.fan_pct:.0f}%, "
	f"retraction {s.retraction_mm:.1f}mm.")
	ledger.append(LessonEntry(
	job_id=f"sim-{datetime.now(timezone.utc).strftime('%H%M%S%f')}",
	material=job.material, geometry_type=job.geometry_type,
	env_temp=env.temp, env_humidity=env.humidity, outcome=result.outcome,
	lesson=lesson, source="sim", timestamp=datetime.now(timezone.utc).isoformat()))


	def run_iteration(job: Job, env: Environment, policy: LearnedPolicy, ledger: LedgerManager,
	n: int, record: bool = True, overrides: PrintSettings \| None = None) -> IterationRecord:
	proposed = overrides if overrides is not None else policy.propose(job.material, job.geometry_type, env)
	checked = _SPINE.check(proposed, job.material)
	settings = checked.settings
	result = simulate(settings, job, env)
	if record:
	_record_lesson(job, env, settings, result, ledger)
	learned = policy.update(job.material, job.geometry_type, env, result)
	return IterationRecord(n=n, settings=settings, result=result, learned=learned,
	clamped=bool(checked.vetoes))


	def run_session(job: Job, env: Environment, iterations: int, policy: LearnedPolicy,
	ledger: LedgerManager) -> SessionResult:
	sess = SessionResult(job=job, env=env)
	for i in range(1, iterations + 1):
	sess.records.append(run_iteration(job, env, policy, ledger, i))
	return sess