Spaces:
Running
Running
File size: 5,907 Bytes
a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 e265a14 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 e3c4a23 a301de7 f718aea a301de7 f718aea a301de7 f718aea a301de7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | """Harbor Visualiser β FastAPI backend + Harbor Hub UI.
Serves a single-page "Harbor Hub" themed UI (static/) plus a JSON API that
reuses the existing loader/parser:
GET / β the SPA (static/index.html)
GET /api/hub/datasets β live list of Harbor-tagged HF datasets
GET /api/hub/count?id= β task count for one Hub dataset (memoised)
GET /api/dataset?uri= β fetch a dataset, return its task ids + meta
GET /api/task?uri=&task= β one task's parsed spec (files + metadata)
GET /healthz
Run locally:
pip install -r requirements.txt
uvicorn app:app --reload --port 7860 # β http://127.0.0.1:7860
On a Hugging Face Docker Space it runs via the Dockerfile (uvicorn :7860).
"""
from __future__ import annotations
import logging
from pathlib import Path
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from viewer import fetch_dataset, fetch_hf_task, list_tasks, load_task, parse_dataset_uri
from viewer.hub import count_tasks, list_harbor_datasets, list_hf_tasks
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s %(message)s")
logger = logging.getLogger("harbor-visualiser")
HERE = Path(__file__).resolve().parent
STATIC = HERE / "static"
app = FastAPI(title="Harbor Visualiser", docs_url="/api/docs")
# ---------------------------------------------------------------------------
# API
# ---------------------------------------------------------------------------
@app.get("/api/hub/datasets")
def api_hub_datasets(
q: str | None = Query(None, description="substring filter on dataset id"),
sort: str = Query("downloads"),
limit: int = Query(500, ge=1, le=2000),
) -> JSONResponse:
"""Live list of Harbor-tagged datasets on the HF Hub (no stale cache)."""
try:
ds = list_harbor_datasets(query=q, sort=sort, limit=limit)
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"HF Hub listing failed: {exc}") from exc
return JSONResponse({"datasets": [d.as_dict() for d in ds], "count": len(ds)})
@app.get("/api/hub/count")
def api_hub_count(id: str = Query(..., description="dataset id, e.g. owner/name")) -> JSONResponse:
"""Task count for a single Hub dataset (one cheap list_repo_files call)."""
return JSONResponse({"id": id, "tasks": count_tasks(id)})
@app.get("/api/dataset")
def api_dataset(
uri: str = Query(..., description="owner/name | hf:// | gh:// | harbor:// | local path"),
refresh: int = Query(0, description="1 = force re-fetch (bypass cache)"),
) -> JSONResponse:
"""Fetch a dataset and return its task ids + source metadata."""
try:
source = parse_dataset_uri(uri)
except ValueError as exc:
raise HTTPException(400, str(exc)) from exc
try:
if source.kind == "hf":
# List task ids via the Hub API β no download. Critical for large
# datasets (2k+ tasks) which would otherwise snapshot the whole repo.
tasks = list_hf_tasks(source.ident, source.revision)
else:
root = fetch_dataset(source, force=bool(refresh))
tasks = list_tasks(root)
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"fetch failed: {exc}") from exc
return JSONResponse({
"uri": uri,
"display": source.display,
"kind": source.kind,
"ident": source.ident,
"revision": source.revision,
"tasks": tasks,
"count": len(tasks),
})
@app.get("/api/task")
def api_task(
uri: str = Query(...),
task: str = Query(..., description="task id (directory name)"),
refresh: int = Query(0),
) -> JSONResponse:
"""Return one task's full parsed spec β metadata + every file."""
try:
source = parse_dataset_uri(uri)
except ValueError as exc:
raise HTTPException(400, str(exc)) from exc
try:
if source.kind == "hf":
# Pull just this one task's files, not the entire dataset.
root = fetch_hf_task(source, task, force=bool(refresh))
else:
root = fetch_dataset(source, force=bool(refresh))
t = load_task(root, task)
except FileNotFoundError as exc:
raise HTTPException(404, str(exc)) from exc
except Exception as exc: # noqa: BLE001
raise HTTPException(502, f"load failed: {exc}") from exc
return JSONResponse({
"id": t.id,
"name": t.name,
"org": t.org,
"version": t.version,
"description": t.description,
"instruction_inline": t.instruction_inline,
"difficulty": t.difficulty,
"category": t.category,
"keywords": t.keywords,
"agent_timeout_sec": t.agent_timeout_sec,
"verifier_timeout_sec": t.verifier_timeout_sec,
"repo2env": t.repo2env,
"task_toml_raw": t.task_toml_raw,
"files": t.files,
})
@app.get("/api/config")
def api_config() -> JSONResponse:
"""Runtime config for the UI. On a Hugging Face Space, $SPACE_HOST is the
public app host (e.g. owner-name.hf.space) β we surface it so the deep-link
/ badge examples show the real Space URL instead of localhost."""
import os
return JSONResponse({
"space_host": os.environ.get("SPACE_HOST") or None,
"space_id": os.environ.get("SPACE_ID") or None,
})
@app.get("/healthz")
def healthz() -> dict:
return {"ok": True}
# ---------------------------------------------------------------------------
# UI (static SPA)
# ---------------------------------------------------------------------------
@app.get("/")
def index() -> FileResponse:
return FileResponse(STATIC / "index.html", media_type="text/html")
app.mount("/static", StaticFiles(directory=str(STATIC)), name="static")
|