| from typing import Iterable, Dict |
| import gzip |
| import json |
| import os |
|
|
|
|
| ROOT = os.path.dirname(os.path.abspath(__file__)) |
| HUMAN_EVAL = os.path.join(ROOT, "..", "data", "HumanEval.jsonl.gz") |
|
|
|
|
| def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]: |
| return {task["task_id"]: task for task in stream_jsonl(evalset_file)} |
|
|
|
|
| def stream_jsonl(filename: str) -> Iterable[Dict]: |
| """ |
| Parses each jsonl line and yields it as a dictionary |
| """ |
| if filename.endswith(".gz"): |
| with open(filename, "rb") as gzfp: |
| with gzip.open(gzfp, 'rt') as fp: |
| for line in fp: |
| if any(not x.isspace() for x in line): |
| yield json.loads(line) |
| else: |
| with open(filename, "r", encoding="utf-8") as fp: |
| for line in fp: |
| if any(not x.isspace() for x in line): |
| yield json.loads(line) |
|
|
|
|
| def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False): |
| """ |
| Writes an iterable of dictionaries to jsonl |
| """ |
| if append: |
| mode = 'ab' |
| else: |
| mode = 'wb' |
| filename = os.path.expanduser(filename) |
| if filename.endswith(".gz"): |
| with open(filename, mode) as fp: |
| with gzip.GzipFile(fileobj=fp, mode='wb') as gzfp: |
| for x in data: |
| gzfp.write((json.dumps(x) + "\n").encode('utf-8')) |
| else: |
| with open(filename, mode) as fp: |
| for x in data: |
| fp.write((json.dumps(x) + "\n").encode('utf-8')) |
|
|