Spaces:
Running
Running
Don't list non-Harbor datasets as if they were task spec
Browse filesThe TaskTrove chip surfaced 'no task' errors because its top-level dirs
hold tasks.parquet bundles, not Harbor task.toml specs. list_hf_tasks'
flat-layout fallback was listing every top-level folder regardless of
whether it actually contained a task.toml.
- Verify the layout by sampling the first ~3 candidate dirs for a
task.toml; if none qualify, return [] instead of listing random folders.
- Swap TaskTrove (parquet-based, not a Harbor task-spec dataset) for
AdithyaSK/dabstep-harbor (450 verified tasks) in the example chips.
- When a dataset has zero tasks, show a clear 'not a Harbor task-spec
format' message in the detail pane instead of a generic 'select a task'.
- static/app.js +7 -3
- viewer/hub.py +15 -2
static/app.js
CHANGED
|
@@ -49,9 +49,9 @@ function copyButton(text, cls = 'copy') {
|
|
| 49 |
/* ── curated example datasets (shown as bubbles) ──── */
|
| 50 |
const EXAMPLES = [
|
| 51 |
{ label: 'Terminal-Bench 2.0', uri: 'harborframework/terminal-bench-2.0' },
|
| 52 |
-
{ label: 'TaskTrove', uri: 'open-thoughts/TaskTrove' },
|
| 53 |
{ label: 'Repo2RLEnv · PR diffs', uri: 'AdithyaSK/repo2rlenv-v083-pr_diff' },
|
| 54 |
{ label: 'TitanBench', uri: 'billshockley/titanbench' },
|
|
|
|
| 55 |
{ label: 'Harbor tasks demo', uri: 'gh://adithya-s-k/harbor-tasks-demo' },
|
| 56 |
];
|
| 57 |
function srcTag(uri) {
|
|
@@ -368,8 +368,12 @@ async function renderWorkspace(params) {
|
|
| 368 |
document.getElementById('crumb-pos').textContent = '';
|
| 369 |
history.replaceState(null, '', '#' + `dataset?${qs({ uri })}`);
|
| 370 |
tree.innerHTML = '';
|
| 371 |
-
content.innerHTML =
|
| 372 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
}
|
| 374 |
|
| 375 |
// ── load one task's detail into the tree + content (no full re-render) ──
|
|
|
|
| 49 |
/* ── curated example datasets (shown as bubbles) ──── */
|
| 50 |
const EXAMPLES = [
|
| 51 |
{ label: 'Terminal-Bench 2.0', uri: 'harborframework/terminal-bench-2.0' },
|
|
|
|
| 52 |
{ label: 'Repo2RLEnv · PR diffs', uri: 'AdithyaSK/repo2rlenv-v083-pr_diff' },
|
| 53 |
{ label: 'TitanBench', uri: 'billshockley/titanbench' },
|
| 54 |
+
{ label: 'DABstep · Harbor', uri: 'AdithyaSK/dabstep-harbor' },
|
| 55 |
{ label: 'Harbor tasks demo', uri: 'gh://adithya-s-k/harbor-tasks-demo' },
|
| 56 |
];
|
| 57 |
function srcTag(uri) {
|
|
|
|
| 368 |
document.getElementById('crumb-pos').textContent = '';
|
| 369 |
history.replaceState(null, '', '#' + `dataset?${qs({ uri })}`);
|
| 370 |
tree.innerHTML = '';
|
| 371 |
+
content.innerHTML = siblings.length
|
| 372 |
+
? `<div class="emptysel"><div class="ic">${ICON.panel}</div>
|
| 373 |
+
<p>Select a task from the list to view its spec, files & run command.</p></div>`
|
| 374 |
+
: `<div class="emptysel"><div class="ic">${ICON.info}</div>
|
| 375 |
+
<p><strong style="color:var(--text)">No Harbor tasks found in this dataset.</strong><br>
|
| 376 |
+
The visualiser looks for <code>task.toml</code> files (either at the root or under <code>tasks/</code>). This dataset doesn't seem to follow the Harbor task-spec format.</p></div>`;
|
| 377 |
}
|
| 378 |
|
| 379 |
// ── load one task's detail into the tree + content (no full re-render) ──
|
viewer/hub.py
CHANGED
|
@@ -105,8 +105,21 @@ def list_hf_tasks(dataset_id: str, revision: str | None = None, *, ttl: float =
|
|
| 105 |
sub = api.list_repo_tree(dataset_id, "tasks", repo_type="dataset", revision=revision, recursive=False)
|
| 106 |
ids = sorted(e.path.split("/")[-1] for e in sub if _is_dir(e))
|
| 107 |
else:
|
| 108 |
-
#
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
_TASKS_CACHE[key] = (ids, now)
|
| 112 |
return ids
|
|
|
|
| 105 |
sub = api.list_repo_tree(dataset_id, "tasks", repo_type="dataset", revision=revision, recursive=False)
|
| 106 |
ids = sorted(e.path.split("/")[-1] for e in sub if _is_dir(e))
|
| 107 |
else:
|
| 108 |
+
# Flat layout: top-level folders MAY be tasks (skip dotfiles/README/etc.).
|
| 109 |
+
# But some datasets (e.g. TaskTrove) have top-level dirs that aren't Harbor
|
| 110 |
+
# tasks — they hold `tasks.parquet` or similar. Verify the layout by sampling
|
| 111 |
+
# the first few candidates for a `task.toml`; if none have one, this isn't a
|
| 112 |
+
# Harbor task-spec dataset and we return [] rather than listing random folders.
|
| 113 |
+
candidates = sorted(e.path for e in root if _is_dir(e) and not e.path.startswith("."))
|
| 114 |
+
ids = []
|
| 115 |
+
for sample in candidates[:3]:
|
| 116 |
+
try:
|
| 117 |
+
sub = list(api.list_repo_tree(dataset_id, sample, repo_type="dataset", revision=revision, recursive=False))
|
| 118 |
+
except Exception: # noqa: BLE001
|
| 119 |
+
continue
|
| 120 |
+
if any(getattr(e, "path", "").endswith("task.toml") for e in sub):
|
| 121 |
+
ids = candidates
|
| 122 |
+
break
|
| 123 |
|
| 124 |
_TASKS_CACHE[key] = (ids, now)
|
| 125 |
return ids
|