| import json | |
| import os | |
| import math | |
| anno_json_path = ( | |
| "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json" | |
| ) | |
| with open(anno_json_path, "r") as f: | |
| annotation_data = json.load(f) | |
| total_annotations = len(annotation_data) | |
| num_parts = 8 | |
| annotations_per_part = math.ceil(total_annotations / num_parts) | |
| anno_output_dir = "../annotations/" | |
| if not os.path.exists(anno_output_dir): | |
| os.makedirs(anno_output_dir) | |
| for i in range(num_parts): | |
| start_idx = i * annotations_per_part | |
| end_idx = min((i + 1) * annotations_per_part, total_annotations) | |
| annotations_subset = annotation_data[start_idx:end_idx] | |
| part_anno_json_path = os.path.join( | |
| anno_output_dir, f"annotations_part_{i + 1}.json" | |
| ) | |
| with open(part_anno_json_path, "w") as f: | |
| json.dump(annotations_subset, f) | |
| print(len(annotations_subset)) | |
| print("标注已成功分成8份,并保存到文件夹中。") | |