Michael Rabinovich Cursor commited on
Commit ·
ed16355
1
Parent(s): 55616a2
eval: pass submission name + metrics URL to report generator
Browse filesReads submission_name from the unpacked meta.json and forwards it plus
the Space's /metrics route to `report single` so the hosted report
titles itself with the submission name and its metric pills deep-link to
the metrics explainer.
Co-authored-by: Cursor <cursoragent@cursor.com>
- eval_job.py +36 -1
eval_job.py
CHANGED
|
@@ -93,6 +93,9 @@ HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co").rstrip("/"
|
|
| 93 |
# with the routes registered in the leaderboard Space's ``app.py``.
|
| 94 |
GT_PROXY_BASE_URL = "/gt"
|
| 95 |
INPUT_PROXY_BASE_URL = "/task-input"
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
def _render_base_url(submission_id: str) -> str:
|
|
@@ -267,7 +270,9 @@ def main() -> int:
|
|
| 267 |
html_path = REPORT_HTML_DIR / f"{submission_id}.html"
|
| 268 |
_run_report(
|
| 269 |
RUN_DIR, html_path,
|
|
|
|
| 270 |
render_base_url=_render_base_url(submission_id),
|
|
|
|
| 271 |
download_url=_submission_zip_url(submission_id, submissions_repo),
|
| 272 |
)
|
| 273 |
report_json = _build_report_json(RUN_DIR)
|
|
@@ -453,22 +458,52 @@ def _run_eval(run_dir: Path, workers: int) -> None:
|
|
| 453 |
)
|
| 454 |
|
| 455 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
def _run_report(
|
| 457 |
run_dir: Path,
|
| 458 |
html_out: Path,
|
| 459 |
*,
|
|
|
|
| 460 |
render_base_url: str | None = None,
|
|
|
|
| 461 |
download_url: str | None = None,
|
| 462 |
) -> None:
|
| 463 |
"""Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
|
| 464 |
|
| 465 |
Passes ``--render-base-url`` so candidate renders are referenced from the
|
| 466 |
-
public bucket rather than base64-inlined into the hosted HTML
|
|
|
|
|
|
|
|
|
|
| 467 |
"""
|
| 468 |
cmd = [
|
| 469 |
sys.executable, "-m", "cadgenbench.cli", "report", "single",
|
| 470 |
str(run_dir), "-o", str(html_out),
|
| 471 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
if render_base_url:
|
| 473 |
cmd += [
|
| 474 |
"--render-base-url", render_base_url,
|
|
|
|
| 93 |
# with the routes registered in the leaderboard Space's ``app.py``.
|
| 94 |
GT_PROXY_BASE_URL = "/gt"
|
| 95 |
INPUT_PROXY_BASE_URL = "/task-input"
|
| 96 |
+
# Same-origin metrics explainer route (served by the Space). Passed to the
|
| 97 |
+
# report so its headline metric pills deep-link to `/metrics#<anchor>`.
|
| 98 |
+
METRICS_PAGE_URL = "/metrics"
|
| 99 |
|
| 100 |
|
| 101 |
def _render_base_url(submission_id: str) -> str:
|
|
|
|
| 270 |
html_path = REPORT_HTML_DIR / f"{submission_id}.html"
|
| 271 |
_run_report(
|
| 272 |
RUN_DIR, html_path,
|
| 273 |
+
submission_name=_read_submission_name(RUN_DIR),
|
| 274 |
render_base_url=_render_base_url(submission_id),
|
| 275 |
+
metrics_base_url=METRICS_PAGE_URL,
|
| 276 |
download_url=_submission_zip_url(submission_id, submissions_repo),
|
| 277 |
)
|
| 278 |
report_json = _build_report_json(RUN_DIR)
|
|
|
|
| 458 |
)
|
| 459 |
|
| 460 |
|
| 461 |
+
def _read_submission_name(run_dir: Path) -> str | None:
|
| 462 |
+
"""Submission name from the unpacked ``meta.json``, for the report heading.
|
| 463 |
+
|
| 464 |
+
The submission zip carries a top-level ``meta.json`` with
|
| 465 |
+
``submission_name`` (see the Space's submission format); reading it
|
| 466 |
+
back here lets the report title itself with the human-readable name.
|
| 467 |
+
Best effort: any missing/malformed file returns ``None`` and the
|
| 468 |
+
report falls back to its ``CADGenBench / <timestamp>`` heading.
|
| 469 |
+
"""
|
| 470 |
+
meta_path = run_dir / "meta.json"
|
| 471 |
+
if not meta_path.is_file():
|
| 472 |
+
return None
|
| 473 |
+
try:
|
| 474 |
+
meta = json.loads(meta_path.read_text(encoding="utf-8"))
|
| 475 |
+
except Exception as e: # noqa: BLE001 - heading is cosmetic, never fail eval
|
| 476 |
+
print(f"[eval_job] could not read submission_name: {e}", flush=True)
|
| 477 |
+
return None
|
| 478 |
+
name = meta.get("submission_name")
|
| 479 |
+
return str(name) if name else None
|
| 480 |
+
|
| 481 |
+
|
| 482 |
def _run_report(
|
| 483 |
run_dir: Path,
|
| 484 |
html_out: Path,
|
| 485 |
*,
|
| 486 |
+
submission_name: str | None = None,
|
| 487 |
render_base_url: str | None = None,
|
| 488 |
+
metrics_base_url: str | None = None,
|
| 489 |
download_url: str | None = None,
|
| 490 |
) -> None:
|
| 491 |
"""Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
|
| 492 |
|
| 493 |
Passes ``--render-base-url`` so candidate renders are referenced from the
|
| 494 |
+
public bucket rather than base64-inlined into the hosted HTML,
|
| 495 |
+
``--submission-name`` (when known) so the report titles itself with the
|
| 496 |
+
submission name, and ``--metrics-base-url`` so the headline metric pills
|
| 497 |
+
deep-link to the Space's metrics explainer.
|
| 498 |
"""
|
| 499 |
cmd = [
|
| 500 |
sys.executable, "-m", "cadgenbench.cli", "report", "single",
|
| 501 |
str(run_dir), "-o", str(html_out),
|
| 502 |
]
|
| 503 |
+
if submission_name:
|
| 504 |
+
cmd += ["--submission-name", submission_name]
|
| 505 |
+
if metrics_base_url:
|
| 506 |
+
cmd += ["--metrics-base-url", metrics_base_url]
|
| 507 |
if render_base_url:
|
| 508 |
cmd += [
|
| 509 |
"--render-base-url", render_base_url,
|