Spaces:

HuggingAI4Engineering
/

cadgenbench-eval-gpu

Paused

Michael Rabinovich Cursor commited on 4 days ago

Commit

ed16355

1 Parent(s): 55616a2

eval: pass submission name + metrics URL to report generator

Reads submission_name from the unpacked meta.json and forwards it plus
the Space's /metrics route to `report single` so the hosted report
titles itself with the submission name and its metric pills deep-link to
the metrics explainer.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show

eval_job.py +36 -1

eval_job.py CHANGED Viewed

@@ -93,6 +93,9 @@ HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co").rstrip("/"
 # with the routes registered in the leaderboard Space's ``app.py``.
 GT_PROXY_BASE_URL = "/gt"
 INPUT_PROXY_BASE_URL = "/task-input"
 def _render_base_url(submission_id: str) -> str:
@@ -267,7 +270,9 @@ def main() -> int:
     html_path = REPORT_HTML_DIR / f"{submission_id}.html"
     _run_report(
         RUN_DIR, html_path,
         render_base_url=_render_base_url(submission_id),
         download_url=_submission_zip_url(submission_id, submissions_repo),
     )
     report_json = _build_report_json(RUN_DIR)
@@ -453,22 +458,52 @@ def _run_eval(run_dir: Path, workers: int) -> None:
         )
 def _run_report(
     run_dir: Path,
     html_out: Path,
     *,
     render_base_url: str | None = None,
     download_url: str | None = None,
 ) -> None:
     """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
     Passes ``--render-base-url`` so candidate renders are referenced from the
-    public bucket rather than base64-inlined into the hosted HTML.
     """
     cmd = [
         sys.executable, "-m", "cadgenbench.cli", "report", "single",
         str(run_dir), "-o", str(html_out),
     ]
     if render_base_url:
         cmd += [
             "--render-base-url", render_base_url,

 # with the routes registered in the leaderboard Space's ``app.py``.
 GT_PROXY_BASE_URL = "/gt"
 INPUT_PROXY_BASE_URL = "/task-input"
+# Same-origin metrics explainer route (served by the Space). Passed to the
+# report so its headline metric pills deep-link to `/metrics#<anchor>`.
+METRICS_PAGE_URL = "/metrics"
 def _render_base_url(submission_id: str) -> str:
     html_path = REPORT_HTML_DIR / f"{submission_id}.html"
     _run_report(
         RUN_DIR, html_path,
+        submission_name=_read_submission_name(RUN_DIR),
         render_base_url=_render_base_url(submission_id),
+        metrics_base_url=METRICS_PAGE_URL,
         download_url=_submission_zip_url(submission_id, submissions_repo),
     )
     report_json = _build_report_json(RUN_DIR)
         )
+def _read_submission_name(run_dir: Path) -> str | None:
+    """Submission name from the unpacked ``meta.json``, for the report heading.
+    The submission zip carries a top-level ``meta.json`` with
+    ``submission_name`` (see the Space's submission format); reading it
+    back here lets the report title itself with the human-readable name.
+    Best effort: any missing/malformed file returns ``None`` and the
+    report falls back to its ``CADGenBench / <timestamp>`` heading.
+    """
+    meta_path = run_dir / "meta.json"
+    if not meta_path.is_file():
+        return None
+    try:
+        meta = json.loads(meta_path.read_text(encoding="utf-8"))
+    except Exception as e:  # noqa: BLE001 - heading is cosmetic, never fail eval
+        print(f"[eval_job] could not read submission_name: {e}", flush=True)
+        return None
+    name = meta.get("submission_name")
+    return str(name) if name else None
 def _run_report(
     run_dir: Path,
     html_out: Path,
     *,
+    submission_name: str | None = None,
     render_base_url: str | None = None,
+    metrics_base_url: str | None = None,
     download_url: str | None = None,
 ) -> None:
     """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
     Passes ``--render-base-url`` so candidate renders are referenced from the
+    public bucket rather than base64-inlined into the hosted HTML,
+    ``--submission-name`` (when known) so the report titles itself with the
+    submission name, and ``--metrics-base-url`` so the headline metric pills
+    deep-link to the Space's metrics explainer.
     """
     cmd = [
         sys.executable, "-m", "cadgenbench.cli", "report", "single",
         str(run_dir), "-o", str(html_out),
     ]
+    if submission_name:
+        cmd += ["--submission-name", submission_name]
+    if metrics_base_url:
+        cmd += ["--metrics-base-url", metrics_base_url]
     if render_base_url:
         cmd += [
             "--render-base-url", render_base_url,