lean-laguna / scripts /parse_rl_run_big.py

Lean Laguna: Laguna XS.2 + DFlash — lossless single-GPU speedup + cheaper RL rollouts

8612587 1 day ago

3.6 kB

	#!/usr/bin/env python3
	"""parse_rl_run_big.py — extract the BIGGER hosted-RL run's evidence WITHOUT clobbering the
	canonical 20-step files (rl_after.json / rl_train_curve.json the card cites).

	Bigger run qaeh3796d0zdtdb0uzs1xo2j: 40 steps, train pool HumanEval 0-99 (num=100), DISJOINT
	held-out HumanEval 100-163 (num=64), eval_base_model=true. Writes results/rl_after_big.json +
	results/rl_train_curve_big.json. Honest about whatever the numbers are; pre-committed split, no
	eval-shopping; report null/negative truthfully.

	Usage: parse_rl_run_big.py [run_id]
	"""
	import json, re, subprocess, sys
	from pathlib import Path

	RUN = sys.argv[1] if len(sys.argv) > 1 else "qaeh3796d0zdtdb0uzs1xo2j"
	RESULTS = Path(__file__).resolve().parents[1] / "results"

	logs = subprocess.run(["prime", "train", "logs", RUN], capture_output=True, text=True).stdout

	steps = []
	for m in re.finditer(r"Step (\d+) \\| Time: ([\d.]+)s \\| Reward: ([\d.]+) \\| Seq\. Length: ([\d.]+)", logs):
	steps.append({"step": int(m.group(1)), "train_reward": round(float(m.group(3)), 4),
	"time_s": round(float(m.group(2)), 2), "seq_len": round(float(m.group(4)), 1)})
	steps.sort(key=lambda d: d["step"])

	evals = []
	pending_step = None
	for line in logs.splitlines():
	ms = re.search(r"Running evals at step=(\d+)", line)
	if ms:
	pending_step = int(ms.group(1))
	me = re.search(r"Evaluated art87able/spec-rl in ([\d.]+)s \(Avg@1=([\d.]+)", line)
	if me and pending_step is not None:
	evals.append({"step": pending_step, "heldout_avg_at_1": round(float(me.group(2)), 4),
	"eval_time_s": round(float(me.group(1)), 2)})
	pending_step = None
	evals.sort(key=lambda d: d["step"])

	base = evals[0]["heldout_avg_at_1"] if evals else None
	final = evals[-1]["heldout_avg_at_1"] if evals else None
	clean = (final is not None and base is not None and all(e["heldout_avg_at_1"] >= base for e in evals))

	train_curve = {
	"note": "Per-step dense unit-test reward on the TRAIN pool (HumanEval 0-99) during the BIGGER real "
	"hosted GRPO post-train of Laguna XS.2 on art87able/spec-rl. temperature=1.0; read the trajectory.",
	"run_id": RUN, "env": "art87able/spec-rl@0.1.5", "model": "poolside/Laguna-XS.2",
	"max_steps": 40, "batch_size": 64, "rollouts_per_example": 8, "learning_rate": 1e-6,
	"train_pool": "HumanEval/0-99", "free_hosted_train": True, "steps": steps,
	}
	after = {
	"note": "BIGGER run held-out (HumanEval 100-163, DISJOINT from train pool 0-99, n=64) eval trajectory "
	"computed BY THE TRAINER via eval_base_model=true + interval=5. step 0 = BEFORE (untrained base); "
	"later steps = AFTER. Same harness, hosted inference, no checkpoint served. Pre-committed split, "
	"no eval-shopping; MoE/temperature eval noise applies (see determinism_check.json).",
	"run_id": RUN, "env": "art87able/spec-rl@0.1.5", "split": "HumanEval/100-163 (held-out, n=64)",
	"model": "poolside/Laguna-XS.2", "max_steps": 40,
	"before_step0_heldout": base, "after_final_heldout": final,
	"delta": round(final - base, 4) if (base is not None and final is not None) else None,
	"every_ckpt_ge_base": clean, "trajectory": evals,
	}
	(RESULTS / "rl_train_curve_big.json").write_text(json.dumps(train_curve, indent=2))
	(RESULTS / "rl_after_big.json").write_text(json.dumps(after, indent=2))
	print("train steps parsed:", len(steps), "rewards:", [s["train_reward"] for s in steps])
	print("held-out evals:", evals)
	print("BEFORE:", base, "AFTER:", final, "delta:", after["delta"], "every_ckpt>=base:", clean)