AdithyaSK's picture
AdithyaSK HF Staff
Port Harbor Visualiser from Gradio to FastAPI + Hugging Face theme
a301de7
"""Harbor Visualiser — FastAPI backend + Harbor Hub UI.
Serves a single-page "Harbor Hub" themed UI (static/) plus a JSON API that
reuses the existing loader/parser:
GET / → the SPA (static/index.html)
GET /api/hub/datasets → live list of Harbor-tagged HF datasets
GET /api/hub/count?id= → task count for one Hub dataset (memoised)
GET /api/dataset?uri= → fetch a dataset, return its task ids + meta
GET /api/task?uri=&task= → one task's parsed spec (files + metadata)
GET /healthz
Run locally:
pip install -r requirements.txt
uvicorn app:app --reload --port 7860 # → http://127.0.0.1:7860
On a Hugging Face Docker Space it runs via the Dockerfile (uvicorn :7860).
"""
from __future__ import annotations
import logging
from pathlib import Path
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from viewer import fetch_dataset, fetch_hf_task, list_tasks, load_task, parse_dataset_uri
from viewer.hub import count_tasks, list_harbor_datasets, list_hf_tasks
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
logger = logging.getLogger("harbor-visualiser")
HERE = Path(__file__).resolve().parent
STATIC = HERE / "static"
app = FastAPI(title="Harbor Visualiser", docs_url="/api/docs")
# ---------------------------------------------------------------------------
# API
# ---------------------------------------------------------------------------
@app.get("/api/hub/datasets")
def api_hub_datasets(
q: str | None = Query(None, description="substring filter on dataset id"),
sort: str = Query("downloads"),
limit: int = Query(500, ge=1, le=2000),
) -> JSONResponse:
"""Live list of Harbor-tagged datasets on the HF Hub (no stale cache)."""
try:
ds = list_harbor_datasets(query=q, sort=sort, limit=limit)
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"HF Hub listing failed: {exc}") from exc
return JSONResponse({"datasets": [d.as_dict() for d in ds], "count": len(ds)})
@app.get("/api/hub/count")
def api_hub_count(id: str = Query(..., description="dataset id, e.g. owner/name")) -> JSONResponse:
"""Task count for a single Hub dataset (one cheap list_repo_files call)."""
return JSONResponse({"id": id, "tasks": count_tasks(id)})
@app.get("/api/dataset")
def api_dataset(
uri: str = Query(..., description="owner/name | hf:// | gh:// | harbor:// | local path"),
refresh: int = Query(0, description="1 = force re-fetch (bypass cache)"),
) -> JSONResponse:
"""Fetch a dataset and return its task ids + source metadata."""
try:
source = parse_dataset_uri(uri)
except ValueError as exc:
raise HTTPException(400, str(exc)) from exc
try:
if source.kind == "hf":
# List task ids via the Hub API — no download. Critical for large
# datasets (2k+ tasks) which would otherwise snapshot the whole repo.
tasks = list_hf_tasks(source.ident, source.revision)
else:
root = fetch_dataset(source, force=bool(refresh))
tasks = list_tasks(root)
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"fetch failed: {exc}") from exc
return JSONResponse({
"uri": uri,
"display": source.display,
"kind": source.kind,
"ident": source.ident,
"revision": source.revision,
"tasks": tasks,
"count": len(tasks),
})
@app.get("/api/task")
def api_task(
uri: str = Query(...),
task: str = Query(..., description="task id (directory name)"),
refresh: int = Query(0),
) -> JSONResponse:
"""Return one task's full parsed spec — metadata + every file."""
try:
source = parse_dataset_uri(uri)
except ValueError as exc:
raise HTTPException(400, str(exc)) from exc
try:
if source.kind == "hf":
# Pull just this one task's files, not the entire dataset.
root = fetch_hf_task(source, task, force=bool(refresh))
else:
root = fetch_dataset(source, force=bool(refresh))
t = load_task(root, task)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc)) from exc
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"load failed: {exc}") from exc
return JSONResponse({
"id": t.id,
"name": t.name,
"org": t.org,
"version": t.version,
"description": t.description,
"instruction_inline": t.instruction_inline,
"difficulty": t.difficulty,
"category": t.category,
"keywords": t.keywords,
"agent_timeout_sec": t.agent_timeout_sec,
"verifier_timeout_sec": t.verifier_timeout_sec,
"repo2env": t.repo2env,
"task_toml_raw": t.task_toml_raw,
"files": t.files,
})
@app.get("/api/config")
def api_config() -> JSONResponse:
"""Runtime config for the UI. On a Hugging Face Space, $SPACE_HOST is the
public app host (e.g. owner-name.hf.space) — we surface it so the deep-link
/ badge examples show the real Space URL instead of localhost."""
import os
return JSONResponse({
"space_host": os.environ.get("SPACE_HOST") or None,
"space_id": os.environ.get("SPACE_ID") or None,
})
@app.get("/healthz")
def healthz() -> dict:
return {"ok": True}
# ---------------------------------------------------------------------------
# UI (static SPA)
# ---------------------------------------------------------------------------
@app.get("/")
def index() -> FileResponse:
return FileResponse(STATIC / "index.html", media_type="text/html")
app.mount("/static", StaticFiles(directory=str(STATIC)), name="static")