Michael Rabinovich Cursor commited on
Commit
ed16355
·
1 Parent(s): 55616a2

eval: pass submission name + metrics URL to report generator

Browse files

Reads submission_name from the unpacked meta.json and forwards it plus
the Space's /metrics route to `report single` so the hosted report
titles itself with the submission name and its metric pills deep-link to
the metrics explainer.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (1) hide show
  1. eval_job.py +36 -1
eval_job.py CHANGED
@@ -93,6 +93,9 @@ HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co").rstrip("/"
93
  # with the routes registered in the leaderboard Space's ``app.py``.
94
  GT_PROXY_BASE_URL = "/gt"
95
  INPUT_PROXY_BASE_URL = "/task-input"
 
 
 
96
 
97
 
98
  def _render_base_url(submission_id: str) -> str:
@@ -267,7 +270,9 @@ def main() -> int:
267
  html_path = REPORT_HTML_DIR / f"{submission_id}.html"
268
  _run_report(
269
  RUN_DIR, html_path,
 
270
  render_base_url=_render_base_url(submission_id),
 
271
  download_url=_submission_zip_url(submission_id, submissions_repo),
272
  )
273
  report_json = _build_report_json(RUN_DIR)
@@ -453,22 +458,52 @@ def _run_eval(run_dir: Path, workers: int) -> None:
453
  )
454
 
455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  def _run_report(
457
  run_dir: Path,
458
  html_out: Path,
459
  *,
 
460
  render_base_url: str | None = None,
 
461
  download_url: str | None = None,
462
  ) -> None:
463
  """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
464
 
465
  Passes ``--render-base-url`` so candidate renders are referenced from the
466
- public bucket rather than base64-inlined into the hosted HTML.
 
 
 
467
  """
468
  cmd = [
469
  sys.executable, "-m", "cadgenbench.cli", "report", "single",
470
  str(run_dir), "-o", str(html_out),
471
  ]
 
 
 
 
472
  if render_base_url:
473
  cmd += [
474
  "--render-base-url", render_base_url,
 
93
  # with the routes registered in the leaderboard Space's ``app.py``.
94
  GT_PROXY_BASE_URL = "/gt"
95
  INPUT_PROXY_BASE_URL = "/task-input"
96
+ # Same-origin metrics explainer route (served by the Space). Passed to the
97
+ # report so its headline metric pills deep-link to `/metrics#<anchor>`.
98
+ METRICS_PAGE_URL = "/metrics"
99
 
100
 
101
  def _render_base_url(submission_id: str) -> str:
 
270
  html_path = REPORT_HTML_DIR / f"{submission_id}.html"
271
  _run_report(
272
  RUN_DIR, html_path,
273
+ submission_name=_read_submission_name(RUN_DIR),
274
  render_base_url=_render_base_url(submission_id),
275
+ metrics_base_url=METRICS_PAGE_URL,
276
  download_url=_submission_zip_url(submission_id, submissions_repo),
277
  )
278
  report_json = _build_report_json(RUN_DIR)
 
458
  )
459
 
460
 
461
+ def _read_submission_name(run_dir: Path) -> str | None:
462
+ """Submission name from the unpacked ``meta.json``, for the report heading.
463
+
464
+ The submission zip carries a top-level ``meta.json`` with
465
+ ``submission_name`` (see the Space's submission format); reading it
466
+ back here lets the report title itself with the human-readable name.
467
+ Best effort: any missing/malformed file returns ``None`` and the
468
+ report falls back to its ``CADGenBench / <timestamp>`` heading.
469
+ """
470
+ meta_path = run_dir / "meta.json"
471
+ if not meta_path.is_file():
472
+ return None
473
+ try:
474
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
475
+ except Exception as e: # noqa: BLE001 - heading is cosmetic, never fail eval
476
+ print(f"[eval_job] could not read submission_name: {e}", flush=True)
477
+ return None
478
+ name = meta.get("submission_name")
479
+ return str(name) if name else None
480
+
481
+
482
  def _run_report(
483
  run_dir: Path,
484
  html_out: Path,
485
  *,
486
+ submission_name: str | None = None,
487
  render_base_url: str | None = None,
488
+ metrics_base_url: str | None = None,
489
  download_url: str | None = None,
490
  ) -> None:
491
  """Invoke ``cadgenbench report single`` for *run_dir*; raise on non-zero.
492
 
493
  Passes ``--render-base-url`` so candidate renders are referenced from the
494
+ public bucket rather than base64-inlined into the hosted HTML,
495
+ ``--submission-name`` (when known) so the report titles itself with the
496
+ submission name, and ``--metrics-base-url`` so the headline metric pills
497
+ deep-link to the Space's metrics explainer.
498
  """
499
  cmd = [
500
  sys.executable, "-m", "cadgenbench.cli", "report", "single",
501
  str(run_dir), "-o", str(html_out),
502
  ]
503
+ if submission_name:
504
+ cmd += ["--submission-name", submission_name]
505
+ if metrics_base_url:
506
+ cmd += ["--metrics-base-url", metrics_base_url]
507
  if render_base_url:
508
  cmd += [
509
  "--render-base-url", render_base_url,