Michael Rabinovich Cursor commited on
Commit
5140b0a
·
1 Parent(s): d2161b1

leaderboard: serve GT report assets via proxy; link them in hosted report

Browse files

Add a /gt/{fixture}/{relpath} proxy route that streams the private GT view
PNGs + PDF (token-holding), and pass gt_base_url/input_base_url to
generate_html in the merge path so the hosted report links GT/input/overlay
(lazy) instead of base64-inlining them. Bump CADGENBENCH_SHA to 25943a0.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (3) hide show
  1. Dockerfile +1 -1
  2. app.py +55 -0
  3. submit.py +14 -4
Dockerfile CHANGED
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
- ARG CADGENBENCH_SHA=3d49822
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
 
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
+ ARG CADGENBENCH_SHA=25943a0
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
app.py CHANGED
@@ -609,6 +609,52 @@ def serve_gt_render(fixture: str) -> Response:
609
  )
610
 
611
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  def _gallery_iframe_html() -> str:
613
  """Build the gallery as a self-contained ``srcdoc`` iframe.
614
 
@@ -1075,6 +1121,15 @@ app.add_api_route(
1075
  serve_gt_render,
1076
  methods=["GET"],
1077
  )
 
 
 
 
 
 
 
 
 
1078
  # Task-browser input assets (drawings + starting-shape renders). The
1079
  # `:path` converter lets `relpath` carry a slash (e.g. renders/iso.png).
1080
  # Registered before the Gradio mount so it's not shadowed.
 
609
  )
610
 
611
 
612
+ def _fetch_gt_file(fixture: str, relpath: str) -> bytes | None:
613
+ """Pull an arbitrary GT asset (``<fixture>/<relpath>``) from the GT dataset.
614
+
615
+ Serves the hosted report's ground-truth column: the per-view PNGs
616
+ (``renders/<view>.png``) and the ``ground_truth.pdf``. The GT dataset is
617
+ **private**, so these are proxied through the Space (which holds the read
618
+ token) rather than linked directly. ``hf_hub_download`` does the
619
+ per-revision disk cache. Returns ``None`` on any failure (the report hides
620
+ the broken tile via the browser's normal missing-image handling).
621
+ """
622
+ try:
623
+ local_path = hf_hub_download(
624
+ repo_id=HF_DATA_GT_REPO,
625
+ filename=f"{fixture}/{relpath}",
626
+ repo_type="dataset",
627
+ )
628
+ return Path(local_path).read_bytes()
629
+ except Exception as e: # noqa: BLE001 - any Hub failure -> 404
630
+ logger.warning(
631
+ "Failed to fetch GT file %s/%s (%s: %s)",
632
+ fixture, relpath, type(e).__name__, e,
633
+ )
634
+ return None
635
+
636
+
637
+ def serve_gt_file(fixture: str, relpath: str) -> Response:
638
+ """Stream a GT asset (view PNG / PDF) with long-lived immutable caching.
639
+
640
+ Path-traversal-guarded (``..`` rejected). The hosted report references
641
+ ``/gt/<fixture>/<relpath>`` and the browser fetches it lazily; the bytes
642
+ are a property of the data revision (not any submission), so the same
643
+ immutable ``Cache-Control`` as the render/input proxies applies.
644
+ """
645
+ if ".." in fixture or ".." in relpath:
646
+ return Response(status_code=404)
647
+ data = _fetch_gt_file(fixture, relpath)
648
+ if data is None:
649
+ return Response(status_code=404)
650
+ media_type = mimetypes.guess_type(relpath)[0] or "application/octet-stream"
651
+ return Response(
652
+ content=data,
653
+ media_type=media_type,
654
+ headers={"Cache-Control": RENDER_CACHE_CONTROL},
655
+ )
656
+
657
+
658
  def _gallery_iframe_html() -> str:
659
  """Build the gallery as a self-contained ``srcdoc`` iframe.
660
 
 
1121
  serve_gt_render,
1122
  methods=["GET"],
1123
  )
1124
+ # Ground-truth assets the hosted report links lazily (per-view PNGs + PDF).
1125
+ # GT is private, so this token-holding proxy streams them; the `:path`
1126
+ # converter lets `relpath` carry a slash (e.g. renders/iso.png). Registered
1127
+ # before the Gradio mount so it isn't shadowed by the catch-all sub-app.
1128
+ app.add_api_route(
1129
+ "/gt/{fixture}/{relpath:path}",
1130
+ serve_gt_file,
1131
+ methods=["GET"],
1132
+ )
1133
  # Task-browser input assets (drawings + starting-shape renders). The
1134
  # `:path` converter lets `relpath` carry a slash (e.g. renders/iso.png).
1135
  # Registered before the Gradio mount so it's not shadowed.
submit.py CHANGED
@@ -139,6 +139,11 @@ SUBMISSION_ID_SLUG_MAX = 40
139
  RESULTS_FILENAME = "results.jsonl"
140
  SUBMISSIONS_DIR = "submissions"
141
  REPORTS_DIR = "reports"
 
 
 
 
 
142
  DATA_REV_SHORT_LEN = 12
143
  FAILURE_REASON_MAX_CHARS = 200
144
  SHA256_BLOCK_SIZE = 64 * 1024
@@ -1536,11 +1541,16 @@ def _merge_shards_and_publish(
1536
  report_json = _build_report_json(merged_run)
1537
 
1538
  run_data = discover_run(merged_run)
1539
- # Hosted report references the candidate renders from the public bucket
1540
- # (uploaded by the shard jobs), keeping the heavy WebP/PNG bytes out of
1541
- # the committed HTML. GT/input stay inlined (GT is private).
 
 
1542
  html = generate_html(
1543
- run_data, render_base_url=render_submission_base_url(submission_id),
 
 
 
1544
  )
1545
  html_path = tmp / f"{submission_id}.html"
1546
  html_path.write_text(html, encoding="utf-8")
 
139
  RESULTS_FILENAME = "results.jsonl"
140
  SUBMISSIONS_DIR = "submissions"
141
  REPORTS_DIR = "reports"
142
+ # Space-relative proxy roots the hosted report links its *private* assets
143
+ # through (GT + inputs can't be public-bucket URLs). Must match the routes
144
+ # registered in app.py and the constants in the eval job's eval_job.py.
145
+ GT_PROXY_BASE_URL = "/gt"
146
+ INPUT_PROXY_BASE_URL = "/task-input"
147
  DATA_REV_SHORT_LEN = 12
148
  FAILURE_REASON_MAX_CHARS = 200
149
  SHA256_BLOCK_SIZE = 64 * 1024
 
1541
  report_json = _build_report_json(merged_run)
1542
 
1543
  run_data = discover_run(merged_run)
1544
+ # Hosted report links every heavy asset (lazy-loaded) instead of
1545
+ # base64-inlining it, so the committed HTML stays small: candidate
1546
+ # renders + interface overlay come from the public bucket (uploaded by
1547
+ # the shard jobs); GT views + inputs are private, so they link through
1548
+ # the Space's token-holding proxy routes.
1549
  html = generate_html(
1550
+ run_data,
1551
+ render_base_url=render_submission_base_url(submission_id),
1552
+ gt_base_url=GT_PROXY_BASE_URL,
1553
+ input_base_url=INPUT_PROXY_BASE_URL,
1554
  )
1555
  html_path = tmp / f"{submission_id}.html"
1556
  html_path.write_text(html, encoding="utf-8")