dashboard / scripts /upload_browsecomp_plus.py
timchen0618
Add BrowseComp+ tab: dataset explorer with question, evidence docs, gold answer
5cc2a94
"""Upload browsecomp-plus benchmark to HuggingFace for the dashboard visualizer."""
import json
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../packages/key_handler"))
from key_handler import KeyHandler
KeyHandler.set_env_key()
from datasets import Dataset
DATA_PATH = os.path.join(
os.path.dirname(__file__),
"../../../BrowseComp-Plus/data/browsecomp_plus_decrypted.jsonl",
)
HF_REPO = "timchen0618/browsecomp-plus-benchmark"
rows = []
with open(DATA_PATH) as f:
for line in f:
row = json.loads(line)
rows.append({
"query_id": str(row["query_id"]),
"query": row["query"],
"answer": row["answer"],
"evidence_docs": json.dumps(row.get("evidence_docs", [])),
"gold_docs": json.dumps(row.get("gold_docs", [])),
})
print(f"Loaded {len(rows)} examples")
ds = Dataset.from_list(rows)
print("Dataset:", ds)
token = os.environ.get("HF_TOKEN", "")
ds.push_to_hub(HF_REPO, token=token)
print(f"Uploaded to {HF_REPO}")