"""Harbor Visualiser — FastAPI backend + Harbor Hub UI. Serves a single-page "Harbor Hub" themed UI (static/) plus a JSON API that reuses the existing loader/parser: GET / → the SPA (static/index.html) GET /api/hub/datasets → live list of Harbor-tagged HF datasets GET /api/hub/count?id= → task count for one Hub dataset (memoised) GET /api/dataset?uri= → fetch a dataset, return its task ids + meta GET /api/task?uri=&task= → one task's parsed spec (files + metadata) GET /healthz Run locally: pip install -r requirements.txt uvicorn app:app --reload --port 7860 # → http://127.0.0.1:7860 On a Hugging Face Docker Space it runs via the Dockerfile (uvicorn :7860). """ from __future__ import annotations import logging from pathlib import Path from fastapi import FastAPI, HTTPException, Query from fastapi.responses import FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from viewer import fetch_dataset, fetch_hf_task, list_tasks, load_task, parse_dataset_uri from viewer.hub import count_tasks, list_harbor_datasets, list_hf_tasks logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s") logger = logging.getLogger("harbor-visualiser") HERE = Path(__file__).resolve().parent STATIC = HERE / "static" app = FastAPI(title="Harbor Visualiser", docs_url="/api/docs") # --------------------------------------------------------------------------- # API # --------------------------------------------------------------------------- @app.get("/api/hub/datasets") def api_hub_datasets( q: str | None = Query(None, description="substring filter on dataset id"), sort: str = Query("downloads"), limit: int = Query(500, ge=1, le=2000), ) -> JSONResponse: """Live list of Harbor-tagged datasets on the HF Hub (no stale cache).""" try: ds = list_harbor_datasets(query=q, sort=sort, limit=limit) except Exception as exc: # noqa: BLE001 raise HTTPException(502, f"HF Hub listing failed: {exc}") from exc return JSONResponse({"datasets": [d.as_dict() for d in ds], "count": len(ds)}) @app.get("/api/hub/count") def api_hub_count(id: str = Query(..., description="dataset id, e.g. owner/name")) -> JSONResponse: """Task count for a single Hub dataset (one cheap list_repo_files call).""" return JSONResponse({"id": id, "tasks": count_tasks(id)}) @app.get("/api/dataset") def api_dataset( uri: str = Query(..., description="owner/name | hf:// | gh:// | harbor:// | local path"), refresh: int = Query(0, description="1 = force re-fetch (bypass cache)"), ) -> JSONResponse: """Fetch a dataset and return its task ids + source metadata.""" try: source = parse_dataset_uri(uri) except ValueError as exc: raise HTTPException(400, str(exc)) from exc try: if source.kind == "hf": # List task ids via the Hub API — no download. Critical for large # datasets (2k+ tasks) which would otherwise snapshot the whole repo. tasks = list_hf_tasks(source.ident, source.revision) else: root = fetch_dataset(source, force=bool(refresh)) tasks = list_tasks(root) except Exception as exc: # noqa: BLE001 raise HTTPException(502, f"fetch failed: {exc}") from exc return JSONResponse({ "uri": uri, "display": source.display, "kind": source.kind, "ident": source.ident, "revision": source.revision, "tasks": tasks, "count": len(tasks), }) @app.get("/api/task") def api_task( uri: str = Query(...), task: str = Query(..., description="task id (directory name)"), refresh: int = Query(0), ) -> JSONResponse: """Return one task's full parsed spec — metadata + every file.""" try: source = parse_dataset_uri(uri) except ValueError as exc: raise HTTPException(400, str(exc)) from exc try: if source.kind == "hf": # Pull just this one task's files, not the entire dataset. root = fetch_hf_task(source, task, force=bool(refresh)) else: root = fetch_dataset(source, force=bool(refresh)) t = load_task(root, task) except FileNotFoundError as exc: raise HTTPException(404, str(exc)) from exc except Exception as exc: # noqa: BLE001 raise HTTPException(502, f"load failed: {exc}") from exc return JSONResponse({ "id": t.id, "name": t.name, "org": t.org, "version": t.version, "description": t.description, "instruction_inline": t.instruction_inline, "difficulty": t.difficulty, "category": t.category, "keywords": t.keywords, "agent_timeout_sec": t.agent_timeout_sec, "verifier_timeout_sec": t.verifier_timeout_sec, "repo2env": t.repo2env, "task_toml_raw": t.task_toml_raw, "files": t.files, }) @app.get("/api/config") def api_config() -> JSONResponse: """Runtime config for the UI. On a Hugging Face Space, $SPACE_HOST is the public app host (e.g. owner-name.hf.space) — we surface it so the deep-link / badge examples show the real Space URL instead of localhost.""" import os return JSONResponse({ "space_host": os.environ.get("SPACE_HOST") or None, "space_id": os.environ.get("SPACE_ID") or None, }) @app.get("/healthz") def healthz() -> dict: return {"ok": True} # --------------------------------------------------------------------------- # UI (static SPA) # --------------------------------------------------------------------------- @app.get("/") def index() -> FileResponse: return FileResponse(STATIC / "index.html", media_type="text/html") app.mount("/static", StaticFiles(directory=str(STATIC)), name="static")