Spaces:
Running
Running
| """Upload browsecomp-plus benchmark to HuggingFace for the dashboard visualizer.""" | |
| import json | |
| import sys | |
| import os | |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../packages/key_handler")) | |
| from key_handler import KeyHandler | |
| KeyHandler.set_env_key() | |
| from datasets import Dataset | |
| DATA_PATH = os.path.join( | |
| os.path.dirname(__file__), | |
| "../../../BrowseComp-Plus/data/browsecomp_plus_decrypted.jsonl", | |
| ) | |
| HF_REPO = "timchen0618/browsecomp-plus-benchmark" | |
| rows = [] | |
| with open(DATA_PATH) as f: | |
| for line in f: | |
| row = json.loads(line) | |
| rows.append({ | |
| "query_id": str(row["query_id"]), | |
| "query": row["query"], | |
| "answer": row["answer"], | |
| "evidence_docs": json.dumps(row.get("evidence_docs", [])), | |
| "gold_docs": json.dumps(row.get("gold_docs", [])), | |
| }) | |
| print(f"Loaded {len(rows)} examples") | |
| ds = Dataset.from_list(rows) | |
| print("Dataset:", ds) | |
| token = os.environ.get("HF_TOKEN", "") | |
| ds.push_to_hub(HF_REPO, token=token) | |
| print(f"Uploaded to {HF_REPO}") | |