Spaces:

HuggingAI4Engineering
/

CADGenBench

Running

CADGenBench / tools /backfill_report_grid.py

Michael Rabinovich

leaderboard: backfill tool to grid-ify already-published reports

ba3eefb 6 days ago

9.61 kB

	#!/usr/bin/env python3
	"""Backfill the thumbnail-grid summary view into already-published reports.

	The report generator (``cadgenbench.eval.report.single_run``) now renders the
	summary view as a grouped thumbnail grid instead of a flat table. Reports
	produced before that change are static HTML files in the submissions dataset
	(``reports/<id>.html``); changing the generator does nothing to them. This
	one-time tool rewrites those published reports **in place, without re-evaluating
	or regenerating from run dirs**:

	- it reads each report's existing summary table (sample number, status, CAD
	score) and detail cards (which fixtures are editing) plus the render-bucket
	base URL already embedded in the file;
	- rebuilds the summary view as the grid using the shared builders from
	``single_run`` (so a backfilled report is byte-identical to a freshly
	generated one), pointing editing cards at the ``edit_diff.png`` still and
	generation cards at the output ``iso.png`` — all assets that already exist;
	- injects the shared grid CSS/JS; the detail cards, header, score text and
	download button are left untouched.

	Run on local files (writes alongside, good for eyeballing)::

	python tools/backfill_report_grid.py --files /tmp/report.html -o /tmp/out.html

	Rewrite every published report in the submissions dataset (needs a write token)::

	HF_TOKEN=<write-token> python tools/backfill_report_grid.py --dataset
	python tools/backfill_report_grid.py --dataset --dry-run # list only
	"""
	from __future__ import annotations

	import argparse
	import os
	import re
	import sys
	from pathlib import Path

	from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download

	# cadgenbench (for the shared grid builders) must be importable.
	_REPO_ROOT = Path(__file__).resolve().parents[2]
	_SRC = _REPO_ROOT / "cadgenbench" / "src"
	if _SRC.is_dir():
	sys.path.insert(0, str(_SRC))

	from cadgenbench.eval.report.single_run import ( # noqa: E402
	_GRID_CSS,
	_GRID_JS,
	_render_grid_controls,
	grid_card_html,
	render_grid_groups,
	)

	HF_ORG = os.getenv("HF_ORG", "HuggingAI4Engineering")
	SUBMISSIONS_REPO = os.getenv("HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions")
	INPUT_PROXY_BASE = "/task-input"
	EDIT_DIFF_STILL = "edit_diff.png"

	# --- parsing the old flat-table report -------------------------------------
	_RENDER_BASE_RE = re.compile(
	r'(https?://[^\s"\']+?/resolve/renders/[^/"\']+)/[^/"\']+/[^"\']+\.(?:png\|webp)'
	)
	_ROW_RE = re.compile(
	r'<tr class="q-[a-z]+" onclick="showDetail\((\d+)\)"[^>]>(.?)</tr>', re.S
	)
	_NAME_RE = re.compile(r"<td>([^<]+)</td>")
	_STATUS_RE = re.compile(r'status-pill status-\w+">([^<]+)<')
	_CAD_RE = re.compile(r'<td data-v="([^"]+)"><b>')
	_SUMMARY_VIEW_RE = re.compile(r'(<div id="summary-view">).*?(</div>)', re.S)
	_GRID_HELP = (
	'<p class="grid-help">Click a card to view details. '
	'<span class="kbd">j</span>/<span class="kbd">k</span> to navigate, '
	'<span class="kbd">Esc</span> to return. Each card shows the input and the '
	"candidate output. Score tint: "
	"<span class='gtint q-high'>≥0.90</span> "
	"<span class='gtint q-mid'>≥0.60</span> "
	"<span class='gtint q-low'><0.60</span> CAD score.</p>"
	)


	_INPUT_SHAPE_RE = re.compile(re.escape(INPUT_PROXY_BASE) + r"/[^\"']+/renders/")


	def _editing_idxs(doc: str) -> set[int]:
	"""Indices whose detail card is an editing task (has a STEP input).

	Detected by the Input column showing the starting shape's renders
	(``/task-input/<fixture>/renders/...``), which the report emits for every
	editing sample because it derives from the ``input.step`` input. This is
	deliberately not keyed on the edit-diff turntable / ``(edit diff)`` heading:
	the old generator rendered an invalid editing candidate with the
	generation layout (no diff), so those markers miss invalid edits, whereas
	the starting-shape renders are always present. Matches the new generator's
	``wants_shape`` grouping so a backfilled report and a freshly generated one
	classify identically.
	"""
	out: set[int] = set()
	for block in doc.split('<div class="fixture-card"')[1:]:
	m = re.match(r'\s*data-idx="(\d+)"', block)
	if m and _INPUT_SHAPE_RE.search(block):
	out.add(int(m.group(1)))
	return out


	def rewrite_report_html(doc: str) -> str \| None:
	"""Return the report rewritten with the grid summary view, or ``None``.

	``None`` means "leave unchanged": the report is already a grid, or it
	isn't a hosted report we can rebuild (no render-bucket URL to point the
	output thumbnails at)."""
	if 'class="ggrid"' in doc or 'id="groups"' in doc:
	return None # already backfilled
	base_m = _RENDER_BASE_RE.search(doc)
	if not base_m:
	return None # not a hosted report (e.g. base64-inlined local report)
	render_base = base_m.group(1)
	edit_idxs = _editing_idxs(doc)

	gen_cards: list[str] = []
	edit_cards: list[str] = []
	for m in _ROW_RE.finditer(doc):
	idx = int(m.group(1))
	cells = m.group(2)
	name_m = _NAME_RE.search(cells)
	if not name_m:
	continue
	name = name_m.group(1).strip()
	status_m = _STATUS_RE.search(cells)
	status = status_m.group(1).strip() if status_m else "?"
	cad_m = _CAD_RE.search(cells)
	cad: float \| None = None
	if cad_m:
	try:
	v = float(cad_m.group(1))
	cad = v if v >= 0 else None
	except ValueError:
	cad = None
	is_editing = idx in edit_idxs
	if is_editing:
	in_src = f"{INPUT_PROXY_BASE}/{name}/renders/iso.png"
	out_src = f"{render_base}/{name}/{EDIT_DIFF_STILL}"
	else:
	in_src = f"{INPUT_PROXY_BASE}/{name}/input.png"
	out_src = f"{render_base}/{name}/iso.png"
	card = grid_card_html(
	idx=idx, name=name, is_editing=is_editing, status=status,
	cad=cad, in_src=in_src, out_src=out_src,
	)
	(edit_cards if is_editing else gen_cards).append(card)

	if not gen_cards and not edit_cards:
	return None

	new_inner = _GRID_HELP + _render_grid_controls() + render_grid_groups(
	gen_cards, edit_cards,
	)
	if not _SUMMARY_VIEW_RE.search(doc):
	return None
	doc = _SUMMARY_VIEW_RE.sub(
	lambda mm: mm.group(1) + new_inner + "</div>", doc, count=1,
	)
	# Inject the shared grid styles + filtering behavior.
	doc = doc.replace("</style>", _GRID_CSS + "</style>", 1)
	doc = doc.replace("</body>", f"<script>{_GRID_JS}</script></body>", 1)
	return doc


	def _run_files(files: list[Path], out: Path \| None) -> int:
	for f in files:
	doc = f.read_text()
	new = rewrite_report_html(doc)
	if new is None:
	print(f" SKIP {f} (already grid / not a hosted report)")
	continue
	dest = out or f
	dest.write_text(new)
	print(f" wrote {dest} ({len(new) // 1024} KB)")
	return 0


	def _run_dataset(api: HfApi, token: str \| None, dry_run: bool, limit: int \| None) -> int:
	files = [
	f for f in api.list_repo_files(SUBMISSIONS_REPO, repo_type="dataset")
	if f.startswith("reports/") and f.endswith(".html")
	]
	files.sort()
	if limit is not None:
	files = files[:limit]
	print(f"Found {len(files)} report(s) in {SUBMISSIONS_REPO}.")
	ops: list[CommitOperationAdd] = []
	for i, rel in enumerate(files, start=1):
	local = hf_hub_download(
	repo_id=SUBMISSIONS_REPO, filename=rel, repo_type="dataset", token=token,
	)
	new = rewrite_report_html(Path(local).read_text())
	if new is None:
	print(f" [{i}/{len(files)}] SKIP {rel} (already grid / not hosted)")
	continue
	print(f" [{i}/{len(files)}] {rel} -> grid ({len(new) // 1024} KB)")
	if not dry_run:
	ops.append(CommitOperationAdd(path_in_repo=rel, path_or_fileobj=new.encode()))
	if dry_run:
	print(f"Dry run: would rewrite {len([f for f in files])} candidate(s).")
	return 0
	if not ops:
	print("Nothing to rewrite.")
	return 0
	if not token:
	print("HF_TOKEN required to commit.", file=sys.stderr)
	return 2
	api.create_commit(
	repo_id=SUBMISSIONS_REPO, repo_type="dataset", operations=ops,
	commit_message="reports: backfill thumbnail-grid summary view",
	)
	print(f"Committed {len(ops)} rewritten report(s) to {SUBMISSIONS_REPO}.")
	return 0


	def main() -> int:
	parser = argparse.ArgumentParser(description=__doc__)
	src = parser.add_mutually_exclusive_group(required=True)
	src.add_argument("--files", nargs="+", type=Path, help="Local report HTML files.")
	src.add_argument(
	"--dataset", action="store_true",
	help="Rewrite every reports/*.html in the submissions dataset.",
	)
	parser.add_argument("-o", "--output", type=Path, help="Output path (single --files).")
	parser.add_argument("--dry-run", action="store_true", help="List only (dataset mode).")
	parser.add_argument("--limit", type=int, default=None)
	args = parser.parse_args()

	if args.files:
	if args.output and len(args.files) != 1:
	parser.error("-o/--output only valid with a single --files argument.")
	return _run_files(args.files, args.output)

	token = os.environ.get("HF_TOKEN")
	return _run_dataset(HfApi(token=token), token, args.dry_run, args.limit)


	if __name__ == "__main__":
	raise SystemExit(main())