Michael Rabinovich Cursor commited on
Commit ·
b224eee
1
Parent(s): dc8ff2a
submit: retry Hub commits on 429/5xx + persistent status panel
Browse filesWrap every Space-side commit (zip upload, results.jsonl RMW, report +
gallery uploads, shard cleanup) in a backoff-with-jitter retry that
honors Retry-After, so a rate-limited submissions repo delays rather
than fails a submit. Make handle_submit a generator streaming a
durable status panel (validating -> uploading -> queued, or the
rejection reason) instead of relying on transient toasts.
Co-authored-by: Cursor <cursoragent@cursor.com>
- app.py +17 -5
- submit.py +186 -74
- tests/test_submit.py +82 -0
app.py
CHANGED
|
@@ -123,6 +123,10 @@ VALIDATION_GUIDELINES_MD = f"""Submissions appear on the **Unvalidated** table t
|
|
| 123 |
Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL})."""
|
| 124 |
|
| 125 |
DETAIL_PLACEHOLDER = "_Click a row above for details._"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
|
| 128 |
def _has(value) -> bool:
|
|
@@ -784,11 +788,19 @@ to publish the resulting row on the public leaderboard.
|
|
| 784 |
# Starts disabled; the `blocks.load` handler below flips it
|
| 785 |
# to interactive when an OAuthProfile is present.
|
| 786 |
submit_btn = gr.Button("Submit", variant="primary", interactive=False)
|
| 787 |
-
#
|
| 788 |
-
#
|
| 789 |
-
#
|
| 790 |
-
#
|
| 791 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 792 |
|
| 793 |
with gr.Tab("About"):
|
| 794 |
gr.Markdown(ABOUT_MD)
|
|
|
|
| 123 |
Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL})."""
|
| 124 |
|
| 125 |
DETAIL_PLACEHOLDER = "_Click a row above for details._"
|
| 126 |
+
SUBMIT_STATUS_IDLE = (
|
| 127 |
+
"_Log in, attach a zip, and click **Submit**. Progress and any "
|
| 128 |
+
"errors appear here._"
|
| 129 |
+
)
|
| 130 |
|
| 131 |
|
| 132 |
def _has(value) -> bool:
|
|
|
|
| 788 |
# Starts disabled; the `blocks.load` handler below flips it
|
| 789 |
# to interactive when an OAuthProfile is present.
|
| 790 |
submit_btn = gr.Button("Submit", variant="primary", interactive=False)
|
| 791 |
+
# Persistent status panel. handle_submit is a generator that
|
| 792 |
+
# streams stage updates (validating -> uploading/queuing ->
|
| 793 |
+
# queued) and any rejection reason here, so the outcome
|
| 794 |
+
# survives instead of vanishing with a transient toast. The
|
| 795 |
+
# handler also reads `gr.OAuthProfile` implicitly via its
|
| 796 |
+
# parameter type annotation (Gradio's dependency-injection
|
| 797 |
+
# convention).
|
| 798 |
+
submit_status = gr.Markdown(value=SUBMIT_STATUS_IDLE)
|
| 799 |
+
submit_btn.click(
|
| 800 |
+
fn=handle_submit,
|
| 801 |
+
inputs=[zip_in],
|
| 802 |
+
outputs=[submit_status],
|
| 803 |
+
)
|
| 804 |
|
| 805 |
with gr.Tab("About"):
|
| 806 |
gr.Markdown(ABOUT_MD)
|
submit.py
CHANGED
|
@@ -88,6 +88,7 @@ import hashlib
|
|
| 88 |
import json
|
| 89 |
import logging
|
| 90 |
import os
|
|
|
|
| 91 |
import re
|
| 92 |
import shutil
|
| 93 |
import tempfile
|
|
@@ -111,7 +112,7 @@ from huggingface_hub import (
|
|
| 111 |
run_job,
|
| 112 |
snapshot_download,
|
| 113 |
)
|
| 114 |
-
from huggingface_hub.errors import EntryNotFoundError
|
| 115 |
|
| 116 |
from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
|
| 117 |
|
|
@@ -138,6 +139,18 @@ GALLERY_THUMB_VIEW = "iso"
|
|
| 138 |
DATA_REV_SHORT_LEN = 12
|
| 139 |
FAILURE_REASON_MAX_CHARS = 200
|
| 140 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
STUCK_PENDING_THRESHOLD_SECONDS = 30 * 60
|
| 142 |
SUBMITTED_AT_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
|
| 143 |
STUCK_PENDING_REASON = "evaluation interrupted by Space restart"
|
|
@@ -214,49 +227,109 @@ class _HubWriteError(Exception):
|
|
| 214 |
"""Raised when a Hub upload fails after validation succeeded."""
|
| 215 |
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
def handle_submit(
|
| 218 |
zip_file,
|
| 219 |
profile: gr.OAuthProfile | None,
|
| 220 |
-
)
|
| 221 |
-
"""Validate a submission
|
| 222 |
|
| 223 |
Requires the user to be logged in via ``gr.LoginButton`` so the
|
| 224 |
row's ``hf_username`` is the canonical HF identity (not a
|
| 225 |
free-text claim). The submit button in :mod:`app` is disabled
|
| 226 |
-
until login completes; this
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
1. ``"Validating submission..."`` on click, with a wall-time hint
|
| 240 |
-
so the user knows the post-queue eval takes ~1 minute.
|
| 241 |
-
2. ``"Submission <id> queued ..."`` once the row + zip are on the
|
| 242 |
-
Hub and the worker has been spawned.
|
| 243 |
-
|
| 244 |
-
On rejection (login-missing, form-level, validation gate, dedup,
|
| 245 |
-
or Hub write), a single ``gr.Error`` toast carries the message;
|
| 246 |
-
no second toast.
|
| 247 |
"""
|
| 248 |
if profile is None:
|
| 249 |
-
|
|
|
|
|
|
|
| 250 |
form_err = _validate_form(zip_file)
|
| 251 |
if form_err is not None:
|
|
|
|
| 252 |
raise gr.Error(form_err)
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
"Validating submission... evaluation usually takes "
|
| 259 |
-
"~1 minute after queuing."
|
| 260 |
)
|
| 261 |
|
| 262 |
zip_path = Path(zip_file.name)
|
|
@@ -276,7 +349,9 @@ def handle_submit(
|
|
| 276 |
fixture_names = _validate_fixture_set(run_dir)
|
| 277 |
_validate_steps_parseable(run_dir, fixture_names)
|
| 278 |
except _ValidationError as e:
|
| 279 |
-
|
|
|
|
|
|
|
| 280 |
|
| 281 |
# Dedup gate: hash the raw zip bytes and reject if an existing
|
| 282 |
# row carries the same hash. Runs after validation so a clearly
|
|
@@ -284,15 +359,23 @@ def handle_submit(
|
|
| 284 |
zip_sha256 = _compute_sha256(zip_path)
|
| 285 |
existing_id = _find_existing_submission_by_sha256(zip_sha256)
|
| 286 |
if existing_id is not None:
|
| 287 |
-
|
| 288 |
f"This zip's contents are identical to an existing "
|
| 289 |
f"submission ({existing_id}). Resubmit only after changing "
|
| 290 |
f"at least one byte of the upload."
|
| 291 |
)
|
|
|
|
|
|
|
| 292 |
|
| 293 |
submission_id = _mint_submission_id(
|
| 294 |
meta["submitter_name"], meta["submission_name"]
|
| 295 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
try:
|
| 297 |
blob_url = _upload_submission_zip(submission_id, zip_path)
|
| 298 |
row = _build_pending_row(
|
|
@@ -301,14 +384,17 @@ def handle_submit(
|
|
| 301 |
)
|
| 302 |
_append_pending_row(row)
|
| 303 |
except _HubWriteError as e:
|
| 304 |
-
|
|
|
|
|
|
|
| 305 |
|
| 306 |
_spawn_worker(submission_id, blob_url, sorted(fixture_names))
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
f"({len(fixture_names)}
|
| 310 |
-
f"HF Jobs GPU;
|
| 311 |
-
f"
|
|
|
|
| 312 |
)
|
| 313 |
finally:
|
| 314 |
shutil.rmtree(tmp, ignore_errors=True)
|
|
@@ -509,12 +595,15 @@ def _upload_submission_zip(submission_id: str, zip_path: Path) -> str:
|
|
| 509 |
"""
|
| 510 |
repo_path = f"{SUBMISSIONS_DIR}/{submission_id}.zip"
|
| 511 |
try:
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
|
|
|
|
|
|
|
|
|
| 518 |
)
|
| 519 |
except Exception as e: # noqa: BLE001 - Hub API surface is broad
|
| 520 |
logger.exception("Failed to upload submission zip %s", submission_id)
|
|
@@ -667,8 +756,14 @@ def _hub_rmw_results(
|
|
| 667 |
The lock is held only for the read-modify-write cycle (~1-2s),
|
| 668 |
never for eval time. Concurrent submitters serialise here, not
|
| 669 |
in the eval pipeline. Treats a missing file as the empty list.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 670 |
"""
|
| 671 |
-
|
| 672 |
existing = _download_results_jsonl()
|
| 673 |
rows: list[dict[str, Any]] = [
|
| 674 |
json.loads(line) for line in existing.splitlines() if line.strip()
|
|
@@ -687,6 +782,9 @@ def _hub_rmw_results(
|
|
| 687 |
commit_message=commit_message,
|
| 688 |
)
|
| 689 |
|
|
|
|
|
|
|
|
|
|
| 690 |
|
| 691 |
def _download_results_jsonl() -> str:
|
| 692 |
"""Fetch the current ``results.jsonl`` body as text, or ``""`` if absent."""
|
|
@@ -1239,21 +1337,27 @@ def _upload_reports(
|
|
| 1239 |
artifacts land at the exact paths the leaderboard + the row-flip
|
| 1240 |
expect. Uses the process ``HfApi`` (Space HF_TOKEN env).
|
| 1241 |
"""
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
|
|
|
|
|
|
|
|
|
| 1248 |
)
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
|
|
|
|
|
|
|
|
|
| 1257 |
)
|
| 1258 |
logger.info("Uploaded merged reports/%s.{html,json}", submission_id)
|
| 1259 |
|
|
@@ -1280,16 +1384,21 @@ def _upload_gallery_renders_from_dir(
|
|
| 1280 |
logger.info("No gallery renders to upload for %s", submission_id)
|
| 1281 |
return
|
| 1282 |
for iso_png, fixture_name in staged:
|
| 1283 |
-
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1292 |
),
|
|
|
|
| 1293 |
)
|
| 1294 |
logger.info(
|
| 1295 |
"Uploaded %d gallery render(s) under %s/%s/",
|
|
@@ -1306,11 +1415,14 @@ def _cleanup_shard_artifacts(submission_id: str) -> None:
|
|
| 1306 |
submission.
|
| 1307 |
"""
|
| 1308 |
try:
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
|
| 1313 |
-
|
|
|
|
|
|
|
|
|
|
| 1314 |
)
|
| 1315 |
logger.info("Cleaned up shard artifacts for %s", submission_id)
|
| 1316 |
except Exception as e: # noqa: BLE001 - cleanup is best-effort
|
|
|
|
| 88 |
import json
|
| 89 |
import logging
|
| 90 |
import os
|
| 91 |
+
import random
|
| 92 |
import re
|
| 93 |
import shutil
|
| 94 |
import tempfile
|
|
|
|
| 112 |
run_job,
|
| 113 |
snapshot_download,
|
| 114 |
)
|
| 115 |
+
from huggingface_hub.errors import EntryNotFoundError, HfHubHTTPError
|
| 116 |
|
| 117 |
from leaderboard import HF_DATA_REPO, HF_ORG, HF_SUBMISSIONS_REPO
|
| 118 |
|
|
|
|
| 139 |
DATA_REV_SHORT_LEN = 12
|
| 140 |
FAILURE_REASON_MAX_CHARS = 200
|
| 141 |
SHA256_BLOCK_SIZE = 64 * 1024
|
| 142 |
+
|
| 143 |
+
# Hub-write retry policy. HF rate-limits commits per repo, so a burst
|
| 144 |
+
# of concurrent commits (e.g. bulk baseline seeding writing into this
|
| 145 |
+
# same dataset) can 429 an otherwise-valid submit at the commit step.
|
| 146 |
+
# Retry transient statuses with exponential backoff + jitter, honoring
|
| 147 |
+
# a server Retry-After, up to a wall-clock cap. Applied to every
|
| 148 |
+
# Space-side commit so a busy repo delays a submit instead of failing
|
| 149 |
+
# it.
|
| 150 |
+
_HUB_RETRY_STATUSES = frozenset({429, 500, 502, 503, 504})
|
| 151 |
+
HUB_RETRY_MAX_SECONDS = 120
|
| 152 |
+
HUB_RETRY_BASE_DELAY_SECONDS = 2.0
|
| 153 |
+
HUB_RETRY_MAX_DELAY_SECONDS = 20.0
|
| 154 |
STUCK_PENDING_THRESHOLD_SECONDS = 30 * 60
|
| 155 |
SUBMITTED_AT_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
|
| 156 |
STUCK_PENDING_REASON = "evaluation interrupted by Space restart"
|
|
|
|
| 227 |
"""Raised when a Hub upload fails after validation succeeded."""
|
| 228 |
|
| 229 |
|
| 230 |
+
def _retry_after_seconds(error: HfHubHTTPError) -> float | None:
|
| 231 |
+
"""Parse a ``Retry-After`` header (seconds form) off a Hub error, if any."""
|
| 232 |
+
response = getattr(error, "response", None)
|
| 233 |
+
if response is None:
|
| 234 |
+
return None
|
| 235 |
+
raw = response.headers.get("Retry-After")
|
| 236 |
+
if not raw:
|
| 237 |
+
return None
|
| 238 |
+
try:
|
| 239 |
+
return float(raw)
|
| 240 |
+
except (TypeError, ValueError):
|
| 241 |
+
return None
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _with_hub_retries(fn, *, what: str):
|
| 245 |
+
"""Run *fn* (a Hub commit) retrying transient HTTP errors with backoff.
|
| 246 |
+
|
| 247 |
+
Retries only the statuses in :data:`_HUB_RETRY_STATUSES` (rate
|
| 248 |
+
limits + transient 5xx); any other error (auth, validation, a
|
| 249 |
+
``LookupError`` from a mutate closure) propagates immediately.
|
| 250 |
+
Backoff is exponential with jitter, clamped to
|
| 251 |
+
:data:`HUB_RETRY_MAX_DELAY_SECONDS`, never sleeps past the
|
| 252 |
+
:data:`HUB_RETRY_MAX_SECONDS` wall cap, and honors a server
|
| 253 |
+
``Retry-After`` when present. *fn* must be idempotent across calls
|
| 254 |
+
-- every caller here re-reads the remote state inside *fn* before
|
| 255 |
+
committing, so a retried commit can't double-apply.
|
| 256 |
+
"""
|
| 257 |
+
deadline = time.monotonic() + HUB_RETRY_MAX_SECONDS
|
| 258 |
+
attempt = 0
|
| 259 |
+
while True:
|
| 260 |
+
attempt += 1
|
| 261 |
+
try:
|
| 262 |
+
return fn()
|
| 263 |
+
except HfHubHTTPError as e:
|
| 264 |
+
status = getattr(getattr(e, "response", None), "status_code", None)
|
| 265 |
+
now = time.monotonic()
|
| 266 |
+
if status not in _HUB_RETRY_STATUSES or now >= deadline:
|
| 267 |
+
raise
|
| 268 |
+
delay = min(
|
| 269 |
+
HUB_RETRY_MAX_DELAY_SECONDS,
|
| 270 |
+
HUB_RETRY_BASE_DELAY_SECONDS * (2 ** (attempt - 1)),
|
| 271 |
+
)
|
| 272 |
+
delay = delay / 2 + random.uniform(0, delay / 2)
|
| 273 |
+
retry_after = _retry_after_seconds(e)
|
| 274 |
+
if retry_after is not None:
|
| 275 |
+
delay = max(delay, retry_after)
|
| 276 |
+
delay = min(delay, max(0.0, deadline - now))
|
| 277 |
+
logger.warning(
|
| 278 |
+
"Hub %s got HTTP %s; retry %d in %.1fs (cap %ds)",
|
| 279 |
+
what, status, attempt, delay, HUB_RETRY_MAX_SECONDS,
|
| 280 |
+
)
|
| 281 |
+
time.sleep(delay)
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def _submit_status(state: str, message: str) -> str:
|
| 285 |
+
"""Markdown for the persistent submit-status panel.
|
| 286 |
+
|
| 287 |
+
The panel is the durable counterpart to the transient ``gr.Info`` /
|
| 288 |
+
``gr.Error`` toasts: a submitter always sees the current stage and
|
| 289 |
+
any rejection reason without having to catch an ephemeral toast.
|
| 290 |
+
*state* picks the leading glyph (``working`` / ``queued`` /
|
| 291 |
+
``error``).
|
| 292 |
+
"""
|
| 293 |
+
glyph = {"working": "⏳", "queued": "✅", "error": "❌"}.get(state, "•")
|
| 294 |
+
return f"{glyph} {message}"
|
| 295 |
+
|
| 296 |
+
|
| 297 |
def handle_submit(
|
| 298 |
zip_file,
|
| 299 |
profile: gr.OAuthProfile | None,
|
| 300 |
+
):
|
| 301 |
+
"""Validate + queue a submission, streaming status to a panel.
|
| 302 |
|
| 303 |
Requires the user to be logged in via ``gr.LoginButton`` so the
|
| 304 |
row's ``hf_username`` is the canonical HF identity (not a
|
| 305 |
free-text claim). The submit button in :mod:`app` is disabled
|
| 306 |
+
until login completes; this generator also rejects defensively if
|
| 307 |
+
it's called without a profile so a UI mishap can't write an
|
| 308 |
+
anonymous row.
|
| 309 |
+
|
| 310 |
+
Generator: each ``yield`` is a Markdown string pushed to a
|
| 311 |
+
persistent status panel, the durable counterpart to the transient
|
| 312 |
+
toasts. Happy-path stages: validating -> uploading/queuing ->
|
| 313 |
+
queued. Every rejection (login-missing, form-level, validation
|
| 314 |
+
gate, dedup, Hub write) yields a final error panel **and** raises
|
| 315 |
+
``gr.Error`` for a toast; the outer ``try/finally`` still runs to
|
| 316 |
+
clean up the temp dir. The Hub writes inside ride out transient
|
| 317 |
+
rate limits via :func:`_with_hub_retries`, so a busy submissions
|
| 318 |
+
repo delays rather than fails the submit.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
"""
|
| 320 |
if profile is None:
|
| 321 |
+
msg = "Please log in via the HF button before submitting."
|
| 322 |
+
yield _submit_status("error", msg)
|
| 323 |
+
raise gr.Error(msg)
|
| 324 |
form_err = _validate_form(zip_file)
|
| 325 |
if form_err is not None:
|
| 326 |
+
yield _submit_status("error", form_err)
|
| 327 |
raise gr.Error(form_err)
|
| 328 |
|
| 329 |
+
yield _submit_status(
|
| 330 |
+
"working",
|
| 331 |
+
"Validating submission (unpacking the zip, checking the fixture set "
|
| 332 |
+
"and STEP files)…",
|
|
|
|
|
|
|
| 333 |
)
|
| 334 |
|
| 335 |
zip_path = Path(zip_file.name)
|
|
|
|
| 349 |
fixture_names = _validate_fixture_set(run_dir)
|
| 350 |
_validate_steps_parseable(run_dir, fixture_names)
|
| 351 |
except _ValidationError as e:
|
| 352 |
+
msg = f"Submission rejected: {e}"
|
| 353 |
+
yield _submit_status("error", msg)
|
| 354 |
+
raise gr.Error(msg)
|
| 355 |
|
| 356 |
# Dedup gate: hash the raw zip bytes and reject if an existing
|
| 357 |
# row carries the same hash. Runs after validation so a clearly
|
|
|
|
| 359 |
zip_sha256 = _compute_sha256(zip_path)
|
| 360 |
existing_id = _find_existing_submission_by_sha256(zip_sha256)
|
| 361 |
if existing_id is not None:
|
| 362 |
+
msg = (
|
| 363 |
f"This zip's contents are identical to an existing "
|
| 364 |
f"submission ({existing_id}). Resubmit only after changing "
|
| 365 |
f"at least one byte of the upload."
|
| 366 |
)
|
| 367 |
+
yield _submit_status("error", msg)
|
| 368 |
+
raise gr.Error(msg)
|
| 369 |
|
| 370 |
submission_id = _mint_submission_id(
|
| 371 |
meta["submitter_name"], meta["submission_name"]
|
| 372 |
)
|
| 373 |
+
yield _submit_status(
|
| 374 |
+
"working",
|
| 375 |
+
f"Uploading `{submission_id}` ({len(fixture_names)} fixtures) and "
|
| 376 |
+
f"queuing the evaluation… (this can take a moment, and retries "
|
| 377 |
+
f"automatically if the Hub is busy).",
|
| 378 |
+
)
|
| 379 |
try:
|
| 380 |
blob_url = _upload_submission_zip(submission_id, zip_path)
|
| 381 |
row = _build_pending_row(
|
|
|
|
| 384 |
)
|
| 385 |
_append_pending_row(row)
|
| 386 |
except _HubWriteError as e:
|
| 387 |
+
msg = f"Submission rejected: {e}"
|
| 388 |
+
yield _submit_status("error", msg)
|
| 389 |
+
raise gr.Error(msg)
|
| 390 |
|
| 391 |
_spawn_worker(submission_id, blob_url, sorted(fixture_names))
|
| 392 |
+
yield _submit_status(
|
| 393 |
+
"queued",
|
| 394 |
+
f"Submission `{submission_id}` queued ({len(fixture_names)} "
|
| 395 |
+
f"fixtures). The eval runs on an HF Jobs GPU; your row appears on "
|
| 396 |
+
f"the **Unvalidated** leaderboard and flips to completed when the "
|
| 397 |
+
f"job finishes (typically 1–3 minutes).",
|
| 398 |
)
|
| 399 |
finally:
|
| 400 |
shutil.rmtree(tmp, ignore_errors=True)
|
|
|
|
| 595 |
"""
|
| 596 |
repo_path = f"{SUBMISSIONS_DIR}/{submission_id}.zip"
|
| 597 |
try:
|
| 598 |
+
_with_hub_retries(
|
| 599 |
+
lambda: _HF_API.upload_file(
|
| 600 |
+
path_or_fileobj=str(zip_path),
|
| 601 |
+
path_in_repo=repo_path,
|
| 602 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 603 |
+
repo_type="dataset",
|
| 604 |
+
commit_message=f"add submission zip for {submission_id}",
|
| 605 |
+
),
|
| 606 |
+
what="submission-zip upload",
|
| 607 |
)
|
| 608 |
except Exception as e: # noqa: BLE001 - Hub API surface is broad
|
| 609 |
logger.exception("Failed to upload submission zip %s", submission_id)
|
|
|
|
| 756 |
The lock is held only for the read-modify-write cycle (~1-2s),
|
| 757 |
never for eval time. Concurrent submitters serialise here, not
|
| 758 |
in the eval pipeline. Treats a missing file as the empty list.
|
| 759 |
+
|
| 760 |
+
The whole download->mutate->upload cycle is retried as a unit on a
|
| 761 |
+
transient Hub error (:func:`_with_hub_retries`): re-reading the
|
| 762 |
+
current file each attempt keeps the mutation idempotent and also
|
| 763 |
+
folds in any concurrent change, so a rate-limited commit waits and
|
| 764 |
+
retries instead of failing the caller.
|
| 765 |
"""
|
| 766 |
+
def _download_mutate_upload() -> None:
|
| 767 |
existing = _download_results_jsonl()
|
| 768 |
rows: list[dict[str, Any]] = [
|
| 769 |
json.loads(line) for line in existing.splitlines() if line.strip()
|
|
|
|
| 782 |
commit_message=commit_message,
|
| 783 |
)
|
| 784 |
|
| 785 |
+
with _HUB_LOCK:
|
| 786 |
+
_with_hub_retries(_download_mutate_upload, what="results.jsonl write")
|
| 787 |
+
|
| 788 |
|
| 789 |
def _download_results_jsonl() -> str:
|
| 790 |
"""Fetch the current ``results.jsonl`` body as text, or ``""`` if absent."""
|
|
|
|
| 1337 |
artifacts land at the exact paths the leaderboard + the row-flip
|
| 1338 |
expect. Uses the process ``HfApi`` (Space HF_TOKEN env).
|
| 1339 |
"""
|
| 1340 |
+
_with_hub_retries(
|
| 1341 |
+
lambda: _HF_API.upload_file(
|
| 1342 |
+
path_or_fileobj=str(html_path),
|
| 1343 |
+
path_in_repo=f"{REPORTS_DIR}/{submission_id}.html",
|
| 1344 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 1345 |
+
repo_type="dataset",
|
| 1346 |
+
commit_message=f"add merged HTML report for {submission_id}",
|
| 1347 |
+
),
|
| 1348 |
+
what="report-html upload",
|
| 1349 |
)
|
| 1350 |
+
_with_hub_retries(
|
| 1351 |
+
lambda: _HF_API.upload_file(
|
| 1352 |
+
path_or_fileobj=json.dumps(
|
| 1353 |
+
report_json, ensure_ascii=False, indent=2,
|
| 1354 |
+
).encode("utf-8"),
|
| 1355 |
+
path_in_repo=f"{REPORTS_DIR}/{submission_id}.json",
|
| 1356 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 1357 |
+
repo_type="dataset",
|
| 1358 |
+
commit_message=f"add merged JSON report for {submission_id}",
|
| 1359 |
+
),
|
| 1360 |
+
what="report-json upload",
|
| 1361 |
)
|
| 1362 |
logger.info("Uploaded merged reports/%s.{html,json}", submission_id)
|
| 1363 |
|
|
|
|
| 1384 |
logger.info("No gallery renders to upload for %s", submission_id)
|
| 1385 |
return
|
| 1386 |
for iso_png, fixture_name in staged:
|
| 1387 |
+
_with_hub_retries(
|
| 1388 |
+
lambda iso_png=iso_png, fixture_name=fixture_name: (
|
| 1389 |
+
_HF_API.upload_file(
|
| 1390 |
+
path_or_fileobj=str(iso_png),
|
| 1391 |
+
path_in_repo=(
|
| 1392 |
+
f"{RENDERS_DIR}/{submission_id}/{fixture_name}.png"
|
| 1393 |
+
),
|
| 1394 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 1395 |
+
repo_type="dataset",
|
| 1396 |
+
commit_message=(
|
| 1397 |
+
f"add gallery render {fixture_name} for {submission_id}"
|
| 1398 |
+
),
|
| 1399 |
+
)
|
| 1400 |
),
|
| 1401 |
+
what="gallery-render upload",
|
| 1402 |
)
|
| 1403 |
logger.info(
|
| 1404 |
"Uploaded %d gallery render(s) under %s/%s/",
|
|
|
|
| 1415 |
submission.
|
| 1416 |
"""
|
| 1417 |
try:
|
| 1418 |
+
_with_hub_retries(
|
| 1419 |
+
lambda: _HF_API.delete_folder(
|
| 1420 |
+
path_in_repo=f"{REPORTS_DIR}/{submission_id}/{SHARDS_SUBDIR}",
|
| 1421 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 1422 |
+
repo_type="dataset",
|
| 1423 |
+
commit_message=f"clean up eval shards for {submission_id}",
|
| 1424 |
+
),
|
| 1425 |
+
what="shard cleanup",
|
| 1426 |
)
|
| 1427 |
logger.info("Cleaned up shard artifacts for %s", submission_id)
|
| 1428 |
except Exception as e: # noqa: BLE001 - cleanup is best-effort
|
tests/test_submit.py
CHANGED
|
@@ -12,10 +12,92 @@ network traffic.
|
|
| 12 |
from __future__ import annotations
|
| 13 |
|
| 14 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
import submit
|
| 17 |
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
def _stub_meta() -> dict:
|
| 20 |
"""Minimum meta.json shape that survives ``_load_and_validate_meta``."""
|
| 21 |
return {
|
|
|
|
| 12 |
from __future__ import annotations
|
| 13 |
|
| 14 |
from pathlib import Path
|
| 15 |
+
from types import SimpleNamespace
|
| 16 |
+
|
| 17 |
+
import pytest
|
| 18 |
|
| 19 |
import submit
|
| 20 |
|
| 21 |
|
| 22 |
+
def _hub_http_error(status: int, headers: dict | None = None) -> submit.HfHubHTTPError:
|
| 23 |
+
"""An ``HfHubHTTPError`` with a minimal response carrying *status*.
|
| 24 |
+
|
| 25 |
+
Built without going through the real Hub: construct the exception
|
| 26 |
+
with no response, then attach a stand-in so ``_with_hub_retries``
|
| 27 |
+
can read ``response.status_code`` / ``response.headers``.
|
| 28 |
+
"""
|
| 29 |
+
err = submit.HfHubHTTPError(f"HTTP {status}")
|
| 30 |
+
err.response = SimpleNamespace(status_code=status, headers=headers or {})
|
| 31 |
+
return err
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_with_hub_retries_recovers_after_transient(monkeypatch):
|
| 35 |
+
"""A 429 (then a 503) is retried; the eventual success is returned."""
|
| 36 |
+
monkeypatch.setattr(submit.time, "sleep", lambda *_: None)
|
| 37 |
+
statuses = iter([429, 503])
|
| 38 |
+
calls = {"n": 0}
|
| 39 |
+
|
| 40 |
+
def flaky():
|
| 41 |
+
calls["n"] += 1
|
| 42 |
+
status = next(statuses, None)
|
| 43 |
+
if status is not None:
|
| 44 |
+
raise _hub_http_error(status)
|
| 45 |
+
return "ok"
|
| 46 |
+
|
| 47 |
+
assert submit._with_hub_retries(flaky, what="test") == "ok"
|
| 48 |
+
assert calls["n"] == 3
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def test_with_hub_retries_reraises_non_retryable(monkeypatch):
|
| 52 |
+
"""A 403 is not in the retry set, so it propagates on the first try."""
|
| 53 |
+
monkeypatch.setattr(submit.time, "sleep", lambda *_: None)
|
| 54 |
+
calls = {"n": 0}
|
| 55 |
+
|
| 56 |
+
def forbidden():
|
| 57 |
+
calls["n"] += 1
|
| 58 |
+
raise _hub_http_error(403)
|
| 59 |
+
|
| 60 |
+
with pytest.raises(submit.HfHubHTTPError):
|
| 61 |
+
submit._with_hub_retries(forbidden, what="test")
|
| 62 |
+
assert calls["n"] == 1
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_with_hub_retries_gives_up_after_wall_cap(monkeypatch):
|
| 66 |
+
"""Past the wall cap, a persistent 429 stops being retried and raises."""
|
| 67 |
+
monkeypatch.setattr(submit.time, "sleep", lambda *_: None)
|
| 68 |
+
# Force the deadline check to trip after the first failure: the
|
| 69 |
+
# second monotonic() read (post-failure) already exceeds the cap.
|
| 70 |
+
ticks = iter([0.0, submit.HUB_RETRY_MAX_SECONDS + 1])
|
| 71 |
+
monkeypatch.setattr(
|
| 72 |
+
submit.time, "monotonic", lambda: next(ticks, submit.HUB_RETRY_MAX_SECONDS + 1)
|
| 73 |
+
)
|
| 74 |
+
calls = {"n": 0}
|
| 75 |
+
|
| 76 |
+
def always_429():
|
| 77 |
+
calls["n"] += 1
|
| 78 |
+
raise _hub_http_error(429)
|
| 79 |
+
|
| 80 |
+
with pytest.raises(submit.HfHubHTTPError):
|
| 81 |
+
submit._with_hub_retries(always_429, what="test")
|
| 82 |
+
assert calls["n"] == 1
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def test_retry_after_header_is_honored(monkeypatch):
|
| 86 |
+
"""A ``Retry-After`` seconds value sets the floor for the sleep delay."""
|
| 87 |
+
slept: list[float] = []
|
| 88 |
+
monkeypatch.setattr(submit.time, "sleep", lambda d: slept.append(d))
|
| 89 |
+
calls = {"n": 0}
|
| 90 |
+
|
| 91 |
+
def flaky():
|
| 92 |
+
calls["n"] += 1
|
| 93 |
+
if calls["n"] == 1:
|
| 94 |
+
raise _hub_http_error(429, headers={"Retry-After": "7"})
|
| 95 |
+
return "ok"
|
| 96 |
+
|
| 97 |
+
assert submit._with_hub_retries(flaky, what="test") == "ok"
|
| 98 |
+
assert slept and slept[0] >= 7.0
|
| 99 |
+
|
| 100 |
+
|
| 101 |
def _stub_meta() -> dict:
|
| 102 |
"""Minimum meta.json shape that survives ``_load_and_validate_meta``."""
|
| 103 |
return {
|