Spaces:
Running
Running
| """Harbor Visualiser — FastAPI backend + Harbor Hub UI. | |
| Serves a single-page "Harbor Hub" themed UI (static/) plus a JSON API that | |
| reuses the existing loader/parser: | |
| GET / → the SPA (static/index.html) | |
| GET /api/hub/datasets → live list of Harbor-tagged HF datasets | |
| GET /api/hub/count?id= → task count for one Hub dataset (memoised) | |
| GET /api/dataset?uri= → fetch a dataset, return its task ids + meta | |
| GET /api/task?uri=&task= → one task's parsed spec (files + metadata) | |
| GET /healthz | |
| Run locally: | |
| pip install -r requirements.txt | |
| uvicorn app:app --reload --port 7860 # → http://127.0.0.1:7860 | |
| On a Hugging Face Docker Space it runs via the Dockerfile (uvicorn :7860). | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from pathlib import Path | |
| from fastapi import FastAPI, HTTPException, Query | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from viewer import fetch_dataset, fetch_hf_task, list_tasks, load_task, parse_dataset_uri | |
| from viewer.hub import count_tasks, list_harbor_datasets, list_hf_tasks | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s") | |
| logger = logging.getLogger("harbor-visualiser") | |
| HERE = Path(__file__).resolve().parent | |
| STATIC = HERE / "static" | |
| app = FastAPI(title="Harbor Visualiser", docs_url="/api/docs") | |
| # --------------------------------------------------------------------------- | |
| # API | |
| # --------------------------------------------------------------------------- | |
| def api_hub_datasets( | |
| q: str | None = Query(None, description="substring filter on dataset id"), | |
| sort: str = Query("downloads"), | |
| limit: int = Query(500, ge=1, le=2000), | |
| ) -> JSONResponse: | |
| """Live list of Harbor-tagged datasets on the HF Hub (no stale cache).""" | |
| try: | |
| ds = list_harbor_datasets(query=q, sort=sort, limit=limit) | |
| except Exception as exc: # noqa: BLE001 | |
| raise HTTPException(502, f"HF Hub listing failed: {exc}") from exc | |
| return JSONResponse({"datasets": [d.as_dict() for d in ds], "count": len(ds)}) | |
| def api_hub_count(id: str = Query(..., description="dataset id, e.g. owner/name")) -> JSONResponse: | |
| """Task count for a single Hub dataset (one cheap list_repo_files call).""" | |
| return JSONResponse({"id": id, "tasks": count_tasks(id)}) | |
| def api_dataset( | |
| uri: str = Query(..., description="owner/name | hf:// | gh:// | harbor:// | local path"), | |
| refresh: int = Query(0, description="1 = force re-fetch (bypass cache)"), | |
| ) -> JSONResponse: | |
| """Fetch a dataset and return its task ids + source metadata.""" | |
| try: | |
| source = parse_dataset_uri(uri) | |
| except ValueError as exc: | |
| raise HTTPException(400, str(exc)) from exc | |
| try: | |
| if source.kind == "hf": | |
| # List task ids via the Hub API — no download. Critical for large | |
| # datasets (2k+ tasks) which would otherwise snapshot the whole repo. | |
| tasks = list_hf_tasks(source.ident, source.revision) | |
| else: | |
| root = fetch_dataset(source, force=bool(refresh)) | |
| tasks = list_tasks(root) | |
| except Exception as exc: # noqa: BLE001 | |
| raise HTTPException(502, f"fetch failed: {exc}") from exc | |
| return JSONResponse({ | |
| "uri": uri, | |
| "display": source.display, | |
| "kind": source.kind, | |
| "ident": source.ident, | |
| "revision": source.revision, | |
| "tasks": tasks, | |
| "count": len(tasks), | |
| }) | |
| def api_task( | |
| uri: str = Query(...), | |
| task: str = Query(..., description="task id (directory name)"), | |
| refresh: int = Query(0), | |
| ) -> JSONResponse: | |
| """Return one task's full parsed spec — metadata + every file.""" | |
| try: | |
| source = parse_dataset_uri(uri) | |
| except ValueError as exc: | |
| raise HTTPException(400, str(exc)) from exc | |
| try: | |
| if source.kind == "hf": | |
| # Pull just this one task's files, not the entire dataset. | |
| root = fetch_hf_task(source, task, force=bool(refresh)) | |
| else: | |
| root = fetch_dataset(source, force=bool(refresh)) | |
| t = load_task(root, task) | |
| except FileNotFoundError as exc: | |
| raise HTTPException(404, str(exc)) from exc | |
| except Exception as exc: # noqa: BLE001 | |
| raise HTTPException(502, f"load failed: {exc}") from exc | |
| return JSONResponse({ | |
| "id": t.id, | |
| "name": t.name, | |
| "org": t.org, | |
| "version": t.version, | |
| "description": t.description, | |
| "instruction_inline": t.instruction_inline, | |
| "difficulty": t.difficulty, | |
| "category": t.category, | |
| "keywords": t.keywords, | |
| "agent_timeout_sec": t.agent_timeout_sec, | |
| "verifier_timeout_sec": t.verifier_timeout_sec, | |
| "repo2env": t.repo2env, | |
| "task_toml_raw": t.task_toml_raw, | |
| "files": t.files, | |
| }) | |
| def api_config() -> JSONResponse: | |
| """Runtime config for the UI. On a Hugging Face Space, $SPACE_HOST is the | |
| public app host (e.g. owner-name.hf.space) — we surface it so the deep-link | |
| / badge examples show the real Space URL instead of localhost.""" | |
| import os | |
| return JSONResponse({ | |
| "space_host": os.environ.get("SPACE_HOST") or None, | |
| "space_id": os.environ.get("SPACE_ID") or None, | |
| }) | |
| def healthz() -> dict: | |
| return {"ok": True} | |
| # --------------------------------------------------------------------------- | |
| # UI (static SPA) | |
| # --------------------------------------------------------------------------- | |
| def index() -> FileResponse: | |
| return FileResponse(STATIC / "index.html", media_type="text/html") | |
| app.mount("/static", StaticFiles(directory=str(STATIC)), name="static") | |