| |
| """Generate the ground-truth "answer key" edit-diff turntables (editing fixtures). |
| |
| For each *editing* fixture (one that ships an ``input.step`` seed) this renders |
| the reference companion to the per-submission edit diff: the GT drawn as a |
| translucent ghost with the **correct change painted blue** (added material on the |
| GT body, removed material as a blue phantom of the input). See |
| :func:`cadgenbench.common.edit_diff.build_gt_edit_diff_shapes`. |
| |
| Like :mod:`generate_gt_turntables`, the result is a property of the **data |
| revision** (GT vs input), not of any submission, so this runs once per data |
| revision and both the gallery's ground-truth row and every per-submission report |
| reference the same webp via the GT proxy. One clip is written per fixture: |
| |
| - ``<fixture>/renders/edit_diff_gt.webp`` -- full turntable. |
| |
| The GT mesh comes from the trusted sidecar (no tessellation); the input mesh is |
| tessellated once at the GT's deflection so the GT-vs-input edit region is found |
| at one consistent scale (mirrors the eval's ``_editing_input_mesh``). |
| |
| Run locally (against checkouts), render only:: |
| |
| python tools/generate_gt_edit_diff.py \ |
| --gt-root ../cadgenbench-data-gt --inputs-root ../cadgenbench-data \ |
| --out-dir ../out/gt_edit_diff --no-upload |
| |
| Add ``--upload`` (and an ``HF_TOKEN`` with **write** scope on the private GT |
| dataset) to commit the webps, or run it on an HF GPU job exactly like |
| ``generate_gt_turntables.py``. |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import os |
| import subprocess |
| import sys |
| import tempfile |
| from pathlib import Path |
|
|
| from huggingface_hub import CommitOperationAdd, HfApi, hf_hub_download |
|
|
| |
| |
| _REPO_ROOT = Path(__file__).resolve().parents[2] |
| _SRC = _REPO_ROOT / "cadgenbench" / "src" |
| if _SRC.is_dir(): |
| sys.path.insert(0, str(_SRC)) |
|
|
| from cadgenbench.common.artifacts import StepArtifacts |
| from cadgenbench.common.edit_diff import render_gt_edit_diff_turntable |
|
|
| GT_STEP_NAME = "ground_truth.step" |
| GT_SIDECAR_NAME = "ground_truth.mesh.npz" |
| INPUT_STEP_NAME = "input.step" |
| FULL_NAME = "renders/edit_diff_gt.webp" |
| |
| |
| COMMIT_CHUNK = 60 |
|
|
|
|
| def _default_repo_id() -> str: |
| return os.getenv( |
| "HF_DATA_GT_REPO", |
| f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data-gt", |
| ) |
|
|
|
|
| def _default_inputs_repo_id() -> str: |
| return os.getenv( |
| "HF_DATA_REPO", |
| f"{os.getenv('HF_ORG', 'HuggingAI4Engineering')}/cadgenbench-data", |
| ) |
|
|
|
|
| def _editing_fixture_ids( |
| api: HfApi, |
| gt_repo: str, |
| inputs_repo: str, |
| gt_root: Path | None, |
| inputs_root: Path | None, |
| ) -> list[str]: |
| """Fixture ids with BOTH a ``ground_truth.step`` and an ``input.step``. |
| |
| The ``input.step`` is what defines an editing fixture, so the intersection |
| of the two repos (or two checkouts) is exactly the editing set. |
| """ |
| if gt_root is not None: |
| gt_ids = { |
| p.name for p in gt_root.iterdir() |
| if p.is_dir() and (p / GT_STEP_NAME).is_file() |
| } |
| else: |
| files = api.list_repo_files(gt_repo, repo_type="dataset") |
| gt_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + GT_STEP_NAME)} |
|
|
| if inputs_root is not None: |
| in_ids = { |
| p.name for p in inputs_root.iterdir() |
| if p.is_dir() and (p / INPUT_STEP_NAME).is_file() |
| } |
| else: |
| files = api.list_repo_files(inputs_repo, repo_type="dataset") |
| in_ids = {f.split("/", 1)[0] for f in files if f.endswith("/" + INPUT_STEP_NAME)} |
|
|
| return sorted(gt_ids & in_ids, key=lambda s: (len(s), s)) |
|
|
|
|
| def _materialize_gt( |
| api: HfApi, repo_id: str, fixture: str, gt_root: Path | None, |
| cache_dir: Path, token: str | None, |
| ) -> Path: |
| """Local dir holding this fixture's GT STEP + trusted mesh sidecar. |
| |
| The sidecar must sit next to the STEP so ``StepArtifacts`` takes the |
| trusted-mesh path (no tessellation, no validation). |
| """ |
| if gt_root is not None: |
| return gt_root / fixture |
| dest = cache_dir / "gt" / fixture |
| dest.mkdir(parents=True, exist_ok=True) |
| for name in (GT_STEP_NAME, GT_SIDECAR_NAME): |
| local = hf_hub_download( |
| repo_id=repo_id, filename=f"{fixture}/{name}", |
| repo_type="dataset", token=token, |
| ) |
| target = dest / name |
| if not target.exists(): |
| target.write_bytes(Path(local).read_bytes()) |
| return dest |
|
|
|
|
| def _materialize_input( |
| api: HfApi, repo_id: str, fixture: str, inputs_root: Path | None, |
| cache_dir: Path, token: str | None, |
| ) -> Path: |
| """Local path to this fixture's ``input.step`` (checkout or Hub download).""" |
| if inputs_root is not None: |
| return inputs_root / fixture / INPUT_STEP_NAME |
| local = hf_hub_download( |
| repo_id=repo_id, filename=f"{fixture}/{INPUT_STEP_NAME}", |
| repo_type="dataset", token=token, |
| ) |
| return Path(local) |
|
|
|
|
| def _render_fixture(gt_dir: Path, input_step: Path) -> bytes: |
| """Render the full answer-key turntable WebP for one editing fixture.""" |
| gt_mesh = StepArtifacts(gt_dir / GT_STEP_NAME, is_ground_truth=True).mesh() |
| input_mesh = StepArtifacts( |
| input_step, deflection_override=gt_mesh.linear_deflection_mm, |
| ).mesh() |
| return render_gt_edit_diff_turntable(gt_mesh, input_mesh) |
|
|
|
|
| def _commit_in_chunks(api: HfApi, repo_id: str, ops: list[CommitOperationAdd]) -> None: |
| for i in range(0, len(ops), COMMIT_CHUNK): |
| chunk = ops[i:i + COMMIT_CHUNK] |
| api.create_commit( |
| repo_id=repo_id, repo_type="dataset", operations=chunk, |
| commit_message=f"add GT edit-diff answer-key webp(s) [{i + 1}-{i + len(chunk)}]", |
| ) |
| print(f" committed {len(chunk)} file(s)", flush=True) |
|
|
|
|
| def _resolved_fixtures( |
| parser: argparse.ArgumentParser, args: argparse.Namespace, |
| api: HfApi, gt_root: Path | None, inputs_root: Path | None, |
| ) -> list[str]: |
| fixtures = _editing_fixture_ids( |
| api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root, |
| ) |
| if args.fixtures: |
| wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()} |
| fixtures = [f for f in fixtures if f in wanted] |
| if args.limit is not None: |
| fixtures = fixtures[: args.limit] |
| if not fixtures: |
| parser.error("No editing fixtures matched.") |
| return fixtures |
|
|
|
|
| def _upload_from_out_dir(api: HfApi, repo_id: str, out_dir: Path, fixtures: list[str]) -> None: |
| """Commit already-rendered webps/pngs under *out_dir* to the GT dataset.""" |
| ops: list[CommitOperationAdd] = [] |
| for fixture in fixtures: |
| local = out_dir / fixture / "renders" / "edit_diff_gt.webp" |
| if local.exists(): |
| ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", local.read_bytes())) |
| if not ops: |
| print("Nothing to upload (no rendered files found in --out-dir).", flush=True) |
| return |
| print(f"Uploading {len(ops)} file(s) to {repo_id} ...", flush=True) |
| _commit_in_chunks(api, repo_id, ops) |
|
|
|
|
| def _run_upload_only(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: |
| """Commit already-rendered ``edit_diff_gt.webp`` files from --out-dir.""" |
| if args.out_dir is None or not args.out_dir.is_dir(): |
| parser.error("--upload-only requires an existing --out-dir.") |
| token = os.environ.get("HF_TOKEN") |
| api = HfApi(token=token) |
| out_dir = args.out_dir.resolve() |
| fixtures = sorted( |
| (p.parent.parent.name for p in out_dir.glob("*/renders/edit_diff_gt.webp")), |
| key=lambda s: (len(s), s), |
| ) |
| if not fixtures: |
| parser.error(f"No edit_diff_gt.webp found under {out_dir}") |
| print(f"Uploading {len(fixtures)} fixture webp(s) from {out_dir} -> {args.repo_id}", flush=True) |
| print(f"FIXTURES: {' '.join(fixtures)}", flush=True) |
| _upload_from_out_dir(api, args.repo_id, out_dir, fixtures) |
| print("Done.", flush=True) |
| return 0 |
|
|
|
|
| def _run_isolated(parser: argparse.ArgumentParser, args: argparse.Namespace) -> int: |
| """Render each fixture in a fresh subprocess (one fixture == ~240 plotters). |
| |
| Spawns this same tool with ``--fixtures <id> --no-upload`` per fixture so the |
| GL context is fully released between fixtures, then (optionally) uploads once |
| from ``--out-dir``. Worker stdout/stderr inherit the parent's, so progress |
| and the VTK noise land in the same streams the non-isolated path uses. |
| """ |
| if args.out_dir is None: |
| parser.error("--isolate requires --out-dir (workers render to disk).") |
| token = os.environ.get("HF_TOKEN") |
| if not args.no_upload and not token: |
| parser.error("HF_TOKEN required to upload (or pass --no-upload).") |
| api = HfApi(token=token) |
| gt_root = args.gt_root.resolve() if args.gt_root else None |
| inputs_root = args.inputs_root.resolve() if args.inputs_root else None |
| for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)): |
| if root is not None and not root.is_dir(): |
| parser.error(f"{label} does not exist: {root}") |
|
|
| fixtures = _resolved_fixtures(parser, args, api, gt_root, inputs_root) |
| print(f"Isolated render of {len(fixtures)} editing fixture(s) (one subprocess each).", flush=True) |
| print(f"FIXTURES: {' '.join(fixtures)}", flush=True) |
|
|
| base_cmd = [sys.executable, str(Path(__file__).resolve()), |
| "--out-dir", str(args.out_dir), "--no-upload", |
| "--repo-id", args.repo_id, "--inputs-repo-id", args.inputs_repo_id] |
| if gt_root is not None: |
| base_cmd += ["--gt-root", str(gt_root)] |
| if inputs_root is not None: |
| base_cmd += ["--inputs-root", str(inputs_root)] |
|
|
| failures: list[str] = [] |
| for i, fixture in enumerate(fixtures, start=1): |
| print(f"=== [{i}/{len(fixtures)}] {fixture} ===", flush=True) |
| proc = subprocess.run([*base_cmd, "--fixtures", fixture]) |
| if proc.returncode != 0: |
| failures.append(fixture) |
|
|
| done = len(fixtures) - len(failures) |
| print(f"Isolated render complete: {done}/{len(fixtures)} ok, {len(failures)} failed.", flush=True) |
| if failures: |
| print(f"FAILED: {' '.join(failures)}", flush=True) |
| if not args.no_upload: |
| _upload_from_out_dir(api, args.repo_id, args.out_dir, fixtures) |
| print("Done.", flush=True) |
| return 1 if failures else 0 |
|
|
|
|
| def main() -> int: |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| "--gt-root", type=Path, default=None, |
| help="Local cadgenbench-data-gt checkout. Omit to download from the Hub.", |
| ) |
| parser.add_argument( |
| "--inputs-root", type=Path, default=None, |
| help="Local cadgenbench-data checkout (holds input.step). Omit for Hub.", |
| ) |
| parser.add_argument("--repo-id", default=_default_repo_id()) |
| parser.add_argument("--inputs-repo-id", default=_default_inputs_repo_id()) |
| parser.add_argument("--fixtures", help="Comma-separated fixture ids. Omit for all editing fixtures.") |
| parser.add_argument("--limit", type=int, default=None) |
| parser.add_argument( |
| "--out-dir", type=Path, default=None, |
| help="Also write each webp/png here (e.g. for local inspection).", |
| ) |
| parser.add_argument( |
| "--no-upload", action="store_true", |
| help="Render only; do not commit to the GT dataset.", |
| ) |
| parser.add_argument( |
| "--upload-only", action="store_true", |
| help=( |
| "Skip rendering; commit the ``edit_diff_gt.webp`` files already under " |
| "--out-dir to the GT dataset. Use after an isolated render run." |
| ), |
| ) |
| parser.add_argument( |
| "--isolate", action="store_true", |
| help=( |
| "Render each fixture in its own subprocess. Works around macOS " |
| "offscreen VTK losing its GL context after many sequential Plotter " |
| "create/close cycles (not needed on the Linux EGL eval job). Implies " |
| "render-to-out-dir; upload, if requested, runs once from --out-dir." |
| ), |
| ) |
| args = parser.parse_args() |
|
|
| if args.upload_only: |
| return _run_upload_only(parser, args) |
| if args.isolate: |
| return _run_isolated(parser, args) |
|
|
| token = os.environ.get("HF_TOKEN") |
| api = HfApi(token=token) |
| gt_root = args.gt_root.resolve() if args.gt_root else None |
| inputs_root = args.inputs_root.resolve() if args.inputs_root else None |
| for label, root in (("--gt-root", gt_root), ("--inputs-root", inputs_root)): |
| if root is not None and not root.is_dir(): |
| parser.error(f"{label} does not exist: {root}") |
|
|
| fixtures = _editing_fixture_ids( |
| api, args.repo_id, args.inputs_repo_id, gt_root, inputs_root, |
| ) |
| if args.fixtures: |
| wanted = {f.strip() for f in args.fixtures.split(",") if f.strip()} |
| fixtures = [f for f in fixtures if f in wanted] |
| if args.limit is not None: |
| fixtures = fixtures[: args.limit] |
| if not fixtures: |
| parser.error("No editing fixtures matched.") |
|
|
| if not args.no_upload and not token: |
| parser.error("HF_TOKEN required to upload (or pass --no-upload).") |
|
|
| print( |
| f"Rendering {len(fixtures)} editing GT answer-key turntable(s)" |
| + ("" if args.no_upload else f" -> {args.repo_id} (will upload)"), |
| flush=True, |
| ) |
| print(f"FIXTURES: {' '.join(fixtures)}", flush=True) |
|
|
| ops: list[CommitOperationAdd] = [] |
| failures: list[str] = [] |
| with tempfile.TemporaryDirectory(prefix="gt-edit-diff-") as tmp: |
| cache_dir = Path(tmp) |
| for i, fixture in enumerate(fixtures, start=1): |
| print(f"[{i}/{len(fixtures)}] {fixture} ...", flush=True) |
| try: |
| gt_dir = _materialize_gt( |
| api, args.repo_id, fixture, gt_root, cache_dir, token, |
| ) |
| input_step = _materialize_input( |
| api, args.inputs_repo_id, fixture, inputs_root, cache_dir, token, |
| ) |
| full = _render_fixture(gt_dir, input_step) |
| except Exception as e: |
| print(f" FAILED {type(e).__name__}: {e}", flush=True) |
| failures.append(fixture) |
| continue |
|
|
| print(f" ok: full={len(full) // 1024}KB", flush=True) |
|
|
| if args.out_dir is not None: |
| fx_out = args.out_dir / fixture / "renders" |
| fx_out.mkdir(parents=True, exist_ok=True) |
| (fx_out / "edit_diff_gt.webp").write_bytes(full) |
|
|
| ops.append(CommitOperationAdd(f"{fixture}/{FULL_NAME}", full)) |
|
|
| done = len(fixtures) - len(failures) |
| print( |
| f"Rendered {done}/{len(fixtures)} fixture(s) ({len(failures)} failed).", |
| flush=True, |
| ) |
| if failures: |
| print(f"FAILED: {' '.join(failures)}", flush=True) |
| if args.no_upload: |
| print("Upload skipped (--no-upload).", flush=True) |
| return 1 if failures else 0 |
| print(f"Uploading {len(ops)} file(s) to {args.repo_id} ...", flush=True) |
| _commit_in_chunks(api, args.repo_id, ops) |
| print("Done.", flush=True) |
| return 1 if failures else 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|