Spaces:

HuggingFaceH4
/

harbor-visualiser

Running

AdithyaSK HF Staff commited on 2 days ago

Commit

ff3a8e7

1 Parent(s): 119621d

Don't list non-Harbor datasets as if they were task spec

The TaskTrove chip surfaced 'no task' errors because its top-level dirs
hold tasks.parquet bundles, not Harbor task.toml specs. list_hf_tasks'
flat-layout fallback was listing every top-level folder regardless of
whether it actually contained a task.toml.

- Verify the layout by sampling the first ~3 candidate dirs for a
task.toml; if none qualify, return [] instead of listing random folders.
- Swap TaskTrove (parquet-based, not a Harbor task-spec dataset) for
AdithyaSK/dabstep-harbor (450 verified tasks) in the example chips.
- When a dataset has zero tasks, show a clear 'not a Harbor task-spec
format' message in the detail pane instead of a generic 'select a task'.

Files changed (2) hide show

static/app.js +7 -3
viewer/hub.py +15 -2

static/app.js CHANGED Viewed

@@ -49,9 +49,9 @@ function copyButton(text, cls = 'copy') {
 /* ── curated example datasets (shown as bubbles) ──── */
 const EXAMPLES = [
   { label: 'Terminal-Bench 2.0', uri: 'harborframework/terminal-bench-2.0' },
-  { label: 'TaskTrove', uri: 'open-thoughts/TaskTrove' },
   { label: 'Repo2RLEnv · PR diffs', uri: 'AdithyaSK/repo2rlenv-v083-pr_diff' },
   { label: 'TitanBench', uri: 'billshockley/titanbench' },
   { label: 'Harbor tasks demo', uri: 'gh://adithya-s-k/harbor-tasks-demo' },
 ];
 function srcTag(uri) {
@@ -368,8 +368,12 @@ async function renderWorkspace(params) {
     document.getElementById('crumb-pos').textContent = '';
     history.replaceState(null, '', '#' + `dataset?${qs({ uri })}`);
     tree.innerHTML = '';
-    content.innerHTML = `<div class="emptysel"><div class="ic">${ICON.panel}</div>
-      <p>Select a task from the list to view its spec, files & run command.</p></div>`;
   }
   // ── load one task's detail into the tree + content (no full re-render) ──

 /* ── curated example datasets (shown as bubbles) ──── */
 const EXAMPLES = [
   { label: 'Terminal-Bench 2.0', uri: 'harborframework/terminal-bench-2.0' },
   { label: 'Repo2RLEnv · PR diffs', uri: 'AdithyaSK/repo2rlenv-v083-pr_diff' },
   { label: 'TitanBench', uri: 'billshockley/titanbench' },
+  { label: 'DABstep · Harbor', uri: 'AdithyaSK/dabstep-harbor' },
   { label: 'Harbor tasks demo', uri: 'gh://adithya-s-k/harbor-tasks-demo' },
 ];
 function srcTag(uri) {
     document.getElementById('crumb-pos').textContent = '';
     history.replaceState(null, '', '#' + `dataset?${qs({ uri })}`);
     tree.innerHTML = '';
+    content.innerHTML = siblings.length
+      ? `<div class="emptysel"><div class="ic">${ICON.panel}</div>
+         <p>Select a task from the list to view its spec, files & run command.</p></div>`
+      : `<div class="emptysel"><div class="ic">${ICON.info}</div>
+         <p><strong style="color:var(--text)">No Harbor tasks found in this dataset.</strong><br>
+         The visualiser looks for <code>task.toml</code> files (either at the root or under <code>tasks/</code>). This dataset doesn't seem to follow the Harbor task-spec format.</p></div>`;
   }
   // ── load one task's detail into the tree + content (no full re-render) ──

viewer/hub.py CHANGED Viewed

@@ -105,8 +105,21 @@ def list_hf_tasks(dataset_id: str, revision: str | None = None, *, ttl: float =
         sub = api.list_repo_tree(dataset_id, "tasks", repo_type="dataset", revision=revision, recursive=False)
         ids = sorted(e.path.split("/")[-1] for e in sub if _is_dir(e))
     else:
-        # flat layout: top-level folders are the tasks (skip dotfiles/README/etc.)
-        ids = sorted(e.path for e in root if _is_dir(e) and not e.path.startswith("."))
     _TASKS_CACHE[key] = (ids, now)
     return ids

         sub = api.list_repo_tree(dataset_id, "tasks", repo_type="dataset", revision=revision, recursive=False)
         ids = sorted(e.path.split("/")[-1] for e in sub if _is_dir(e))
     else:
+        # Flat layout: top-level folders MAY be tasks (skip dotfiles/README/etc.).
+        # But some datasets (e.g. TaskTrove) have top-level dirs that aren't Harbor
+        # tasks — they hold `tasks.parquet` or similar. Verify the layout by sampling
+        # the first few candidates for a `task.toml`; if none have one, this isn't a
+        # Harbor task-spec dataset and we return [] rather than listing random folders.
+        candidates = sorted(e.path for e in root if _is_dir(e) and not e.path.startswith("."))
+        ids = []
+        for sample in candidates[:3]:
+            try:
+                sub = list(api.list_repo_tree(dataset_id, sample, repo_type="dataset", revision=revision, recursive=False))
+            except Exception:  # noqa: BLE001
+                continue
+            if any(getattr(e, "path", "").endswith("task.toml") for e in sub):
+                ids = candidates
+                break
     _TASKS_CACHE[key] = (ids, now)
     return ids