AdithyaSK HF Staff commited on
Commit
ff3a8e7
·
1 Parent(s): 119621d

Don't list non-Harbor datasets as if they were task spec

Browse files

The TaskTrove chip surfaced 'no task' errors because its top-level dirs
hold tasks.parquet bundles, not Harbor task.toml specs. list_hf_tasks'
flat-layout fallback was listing every top-level folder regardless of
whether it actually contained a task.toml.

- Verify the layout by sampling the first ~3 candidate dirs for a
task.toml; if none qualify, return [] instead of listing random folders.
- Swap TaskTrove (parquet-based, not a Harbor task-spec dataset) for
AdithyaSK/dabstep-harbor (450 verified tasks) in the example chips.
- When a dataset has zero tasks, show a clear 'not a Harbor task-spec
format' message in the detail pane instead of a generic 'select a task'.

Files changed (2) hide show
  1. static/app.js +7 -3
  2. viewer/hub.py +15 -2
static/app.js CHANGED
@@ -49,9 +49,9 @@ function copyButton(text, cls = 'copy') {
49
  /* ── curated example datasets (shown as bubbles) ──── */
50
  const EXAMPLES = [
51
  { label: 'Terminal-Bench 2.0', uri: 'harborframework/terminal-bench-2.0' },
52
- { label: 'TaskTrove', uri: 'open-thoughts/TaskTrove' },
53
  { label: 'Repo2RLEnv · PR diffs', uri: 'AdithyaSK/repo2rlenv-v083-pr_diff' },
54
  { label: 'TitanBench', uri: 'billshockley/titanbench' },
 
55
  { label: 'Harbor tasks demo', uri: 'gh://adithya-s-k/harbor-tasks-demo' },
56
  ];
57
  function srcTag(uri) {
@@ -368,8 +368,12 @@ async function renderWorkspace(params) {
368
  document.getElementById('crumb-pos').textContent = '';
369
  history.replaceState(null, '', '#' + `dataset?${qs({ uri })}`);
370
  tree.innerHTML = '';
371
- content.innerHTML = `<div class="emptysel"><div class="ic">${ICON.panel}</div>
372
- <p>Select a task from the list to view its spec, files & run command.</p></div>`;
 
 
 
 
373
  }
374
 
375
  // ── load one task's detail into the tree + content (no full re-render) ──
 
49
  /* ── curated example datasets (shown as bubbles) ──── */
50
  const EXAMPLES = [
51
  { label: 'Terminal-Bench 2.0', uri: 'harborframework/terminal-bench-2.0' },
 
52
  { label: 'Repo2RLEnv · PR diffs', uri: 'AdithyaSK/repo2rlenv-v083-pr_diff' },
53
  { label: 'TitanBench', uri: 'billshockley/titanbench' },
54
+ { label: 'DABstep · Harbor', uri: 'AdithyaSK/dabstep-harbor' },
55
  { label: 'Harbor tasks demo', uri: 'gh://adithya-s-k/harbor-tasks-demo' },
56
  ];
57
  function srcTag(uri) {
 
368
  document.getElementById('crumb-pos').textContent = '';
369
  history.replaceState(null, '', '#' + `dataset?${qs({ uri })}`);
370
  tree.innerHTML = '';
371
+ content.innerHTML = siblings.length
372
+ ? `<div class="emptysel"><div class="ic">${ICON.panel}</div>
373
+ <p>Select a task from the list to view its spec, files & run command.</p></div>`
374
+ : `<div class="emptysel"><div class="ic">${ICON.info}</div>
375
+ <p><strong style="color:var(--text)">No Harbor tasks found in this dataset.</strong><br>
376
+ The visualiser looks for <code>task.toml</code> files (either at the root or under <code>tasks/</code>). This dataset doesn't seem to follow the Harbor task-spec format.</p></div>`;
377
  }
378
 
379
  // ── load one task's detail into the tree + content (no full re-render) ──
viewer/hub.py CHANGED
@@ -105,8 +105,21 @@ def list_hf_tasks(dataset_id: str, revision: str | None = None, *, ttl: float =
105
  sub = api.list_repo_tree(dataset_id, "tasks", repo_type="dataset", revision=revision, recursive=False)
106
  ids = sorted(e.path.split("/")[-1] for e in sub if _is_dir(e))
107
  else:
108
- # flat layout: top-level folders are the tasks (skip dotfiles/README/etc.)
109
- ids = sorted(e.path for e in root if _is_dir(e) and not e.path.startswith("."))
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  _TASKS_CACHE[key] = (ids, now)
112
  return ids
 
105
  sub = api.list_repo_tree(dataset_id, "tasks", repo_type="dataset", revision=revision, recursive=False)
106
  ids = sorted(e.path.split("/")[-1] for e in sub if _is_dir(e))
107
  else:
108
+ # Flat layout: top-level folders MAY be tasks (skip dotfiles/README/etc.).
109
+ # But some datasets (e.g. TaskTrove) have top-level dirs that aren't Harbor
110
+ # tasks — they hold `tasks.parquet` or similar. Verify the layout by sampling
111
+ # the first few candidates for a `task.toml`; if none have one, this isn't a
112
+ # Harbor task-spec dataset and we return [] rather than listing random folders.
113
+ candidates = sorted(e.path for e in root if _is_dir(e) and not e.path.startswith("."))
114
+ ids = []
115
+ for sample in candidates[:3]:
116
+ try:
117
+ sub = list(api.list_repo_tree(dataset_id, sample, repo_type="dataset", revision=revision, recursive=False))
118
+ except Exception: # noqa: BLE001
119
+ continue
120
+ if any(getattr(e, "path", "").endswith("task.toml") for e in sub):
121
+ ids = candidates
122
+ break
123
 
124
  _TASKS_CACHE[key] = (ids, now)
125
  return ids