| import json |
| import random |
| import tqdm |
| from pycocotools.coco import COCO |
| import os |
| import numpy as np |
|
|
| def ViPLLaVADataset_load_fn(data_path, repeat_time, **kwargs): |
| with open(data_path, 'r') as f: |
| json_data = json.load(f) |
| |
| ret = [] |
| for source in json_data: |
| if 'image' not in source: |
| continue |
| dataset_type = source['id'].split('-')[0] |
| if dataset_type not in ['refcocog', 'vcr', 'vg_rel', 'flickr30k', 'v7w', 'pointQA_twice']: |
| continue |
| ret.append(source) |
|
|
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| return ret, ret |
|
|
|
|
| def LLaVAInstructDataset_load_fn(data_path, repeat_time, **kwargs): |
| try: |
| ret = json.load(open(data_path)) |
| except: |
| ret = [] |
| with open(data_path, 'r', encoding="utf-8") as f: |
| for line in tqdm.tqdm(f): |
| _data = json.loads(line) |
| ret.append(_data) |
| |
| for idx in range(len(ret)): |
| if "id" in ret[idx].keys() and isinstance(ret[idx]['id'], int): |
| ret[idx]['id'] = str(ret[idx]['id']) |
|
|
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| return None, ret |
|
|
| |
|
|
| def RegionCaptionDataset_load_fn(data_path, repeat_time, **kwargs): |
| with open(data_path, 'r') as f: |
| json_file = json.load(f) |
| |
| ret, hf_ret = [], [] |
| for item in json_file: |
| item.update({'image': item['file_name']}) |
| if len(item["description"]) != len(item["annotation"]): |
| print("The number of description is not equal to seg !!!") |
| else: |
| ret.append(item) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| for item in ret: |
| image = item["file_name"] |
| description = item["description"] |
| hf_required_info = {"image": image, "description": description} |
| hf_ret.append(hf_required_info) |
|
|
| return ret, hf_ret |
|
|
|
|
| def RegionConversationDataset_load_fn(data_path, repeat_time, **kwargs): |
| with open(data_path, 'r') as f: |
| json_file = json.load(f) |
|
|
| if 'part_level' in data_path or 'short_form' in data_path: |
| limit_str = ' Answer the question using a single word or phrase.' |
| else: |
| limit_str = '' |
| |
| ret, hf_ret = [], [] |
| for dataset_info in json_file: |
| if 'annotation' not in dataset_info or len(dataset_info['annotation']) == 0: |
| print("The annotation is not valid, filter out!!!") |
| continue |
| dataset_info.update({'image': dataset_info['file_name'], 'limit_str': limit_str}) |
| ret.append(dataset_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
|
|
| for dataset_info in ret: |
| conversations = dataset_info["conversations"] |
| image = dataset_info['file_name'] |
| num_regions = len(dataset_info['annotation']) |
| required_info = {'image': image, 'conversations': conversations, |
| 'num_regions': num_regions} |
| hf_ret.append(required_info) |
| |
| return ret, hf_ret |
|
|
| def RegionShortCapVGDataset_load_fn(data_path, repeat_time, **kwargs): |
| coco = COCO(data_path) |
| img_ids = coco.getImgIds() |
| |
| ret, hf_ret = [], [] |
| for img_id in img_ids: |
| img_info = coco.loadImgs([img_id])[0] |
| ann_ids = coco.getAnnIds(imgIds=[img_id]) |
| ann_info = coco.loadAnns(ann_ids) |
| if len(ann_info) == 0: |
| continue |
|
|
| data_info = dict( |
| image=img_info['file_name'], |
| description=[], |
| annotation=[] |
| ) |
| for i, ann in enumerate(ann_info): |
| if ann.get('ignore', False): |
| continue |
| data_info['annotation'].append( |
| {'bbox': ann['bbox'], 'segmentation': ann['segmentation']} |
| ) |
| data_info['description'].append(ann['caption']) |
| ret.append(data_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
|
|
| for item in ret: |
| image = item["image"] |
| description = item["description"] |
| hf_required_info = {"image": image, "description": description} |
| hf_ret.append(hf_required_info) |
| |
| return ret, hf_ret |
|
|
|
|
| def CoCoRefClassificationDataset_load_fn(data_path, repeat_time, **kwargs): |
| coco = COCO(data_path) |
| img_ids = coco.getImgIds() |
|
|
| ret, hf_ret = [], [] |
| for img_id in img_ids: |
| img_info = coco.loadImgs([img_id])[0] |
| data_info=dict( |
| image=img_info['file_name'], |
| categories=[], |
| annotation=[], |
| ) |
|
|
| ann_ids = coco.getAnnIds(imgIds=[img_id]) |
| ann_info = coco.loadAnns(ann_ids) |
| if len(ann_info) == 0: |
| continue |
|
|
| for ann in ann_info: |
| data_info['annotation'].append( |
| {'bbox': ann['bbox'], 'segmentation': ann['segmentation']} |
| ) |
| cat = coco.loadCats(ann['category_id']) |
| data_info['categories'].append( |
| cat[0]['name'] |
| ) |
| ret.append(data_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| for dataset_info in ret: |
| categories = dataset_info["categories"] |
| image = dataset_info["image"] |
| required_info = {'image': image, 'categories': categories} |
| hf_ret.append(required_info) |
| |
| return ret, hf_ret |
|
|
|
|
| def RefCOCOShortCaptionDataset_load_fn(data_path, repeat_time, **kwargs): |
| coco = COCO(data_path) |
| img_ids = coco.getImgIds() |
|
|
| ret, hf_ret = [], [] |
| for img_id in img_ids: |
| img_info = coco.loadImgs([img_id])[0] |
| data_info=dict( |
| image=img_info['file_name'], |
| description=[img_info['caption']], |
| annotation=[], |
| ) |
|
|
| ann_ids = coco.getAnnIds(imgIds=[img_id]) |
| ann_info = coco.loadAnns(ann_ids) |
| if len(ann_info) == 0: |
| continue |
|
|
| for ann in ann_info: |
| data_info['annotation'].append( |
| {'bbox': ann['bbox'], 'segmentation': ann['segmentation']} |
| ) |
| ret.append(data_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| for item in ret: |
| image = item["image"] |
| description = item["description"] |
| hf_required_info = {"image": image, "description": description} |
| hf_ret.append(hf_required_info) |
|
|
| return ret, hf_ret |
|
|
|
|
| def PartClassificationDataset_load_fn(data_path, repeat_time, **kwargs): |
| coco = COCO(data_path) |
| img_ids = coco.getImgIds() |
|
|
| ret, hf_ret = [], [] |
| for img_id in img_ids: |
| img_info = coco.loadImgs([img_id])[0] |
|
|
| data_info = dict( |
| image=img_info['file_name'], |
| categories=[], |
| annotation=[], |
| ) |
|
|
| ann_ids = coco.getAnnIds(imgIds=[img_id]) |
| ann_info = coco.loadAnns(ann_ids) |
| if len(ann_info) == 0: |
| continue |
|
|
| for ann in ann_info: |
| cat = coco.loadCats(ann['category_id']) |
| data_info['categories'].append(cat[0]['name']) |
| data_info['annotation'].append( |
| {'bbox': ann['bbox'], 'segmentation': ann['segmentation']} |
| ) |
| ret.append(data_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| for item in ret: |
| image = item["image"] |
| categories = item["categories"] |
| hf_required_info = {"image": image, "categories": categories} |
| hf_ret.append(hf_required_info) |
| |
| return ret, hf_ret |
|
|
|
|
| def MDPVPointConversationDataset_load_fn(data_path, repeat_time, **kwargs): |
| with open(data_path, 'r') as f: |
| json_data = json.load(f) |
|
|
| ret = [] |
| for source in json_data: |
| data_info = dict( |
| image=source['image'].split('/')[-1], |
| conversations=source['conversations'], |
| annotation=[] |
| ) |
|
|
| for point in source['points']: |
| data_info['annotation'].append( |
| {'point': [point]} |
| ) |
| ret.append(data_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| hf_ret = [] |
| for item in ret: |
| image = item['image'] |
| conversations = item['conversations'] |
| num_regions = len(item['annotation']) |
| hf_required_info = {"image": image, "num_regions": num_regions, "conversations": conversations} |
| hf_ret.append(hf_required_info) |
| |
| return ret, hf_ret |
|
|
|
|
| def MDPVBoxConversationDataset_load_fn(data_path, repeat_time, **kwargs): |
| image_folder = kwargs['image_folder'] |
| json_data = [] |
| for source_file in data_path: |
| with open(source_file, 'r') as f: |
| json_data.extend(json.load(f)) |
|
|
| if repeat_time < 1: |
| json_data = random.sample(json_data, int(len(json_data) * repeat_time)) |
| |
| ret = [] |
| for source in json_data: |
| data_info = dict( |
| image=source['image'].split('/')[-1], |
| conversations=source['conversations'], |
| annotation=[] |
| ) |
| if not os.path.exists(image_folder+data_info['image']): |
| |
| continue |
|
|
| for bbox in source['bbox']: |
| x0, y0, w, h = bbox |
| data_info['annotation'].append( |
| {'bbox': [x0, y0, x0+w, y0+h]} |
| ) |
| ret.append(data_info) |
| |
| |
| |
| if repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| hf_ret = [] |
| for item in ret: |
| image = item['image'] |
| conversations = item['conversations'] |
| num_regions = len(item['annotation']) |
| hf_required_info = {"image": image, "num_regions": num_regions, "conversations": conversations} |
| hf_ret.append(hf_required_info) |
| |
| return ret, hf_ret |
|
|
| def MDPVBoxOCRDataset_load_fn(data_path, repeat_time, **kwargs): |
| image_folder = kwargs['image_folder'] |
| json_data = [] |
| for source_file in data_path: |
| with open(source_file, 'r') as f: |
| json_data.extend(json.load(f)) |
| |
| ret = [] |
| for source in json_data: |
| data_info = dict( |
| image=source['image'], |
| conversations=source['conversations'], |
| annotation=[] |
| ) |
| if not os.path.exists(image_folder+data_info['image']): |
| |
| continue |
|
|
| for bbox in source['bbox']: |
| x0, y0, w, h = bbox |
| data_info['annotation'].append( |
| {'bbox': [x0, y0, x0+w, y0+h]} |
| ) |
| ret.append(data_info) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| hf_ret = [] |
| for item in ret: |
| image = item['image'] |
| conversations = item['conversations'] |
| num_regions = len(item['annotation']) |
| hf_required_info = {"image": image, "num_regions": num_regions, "conversations": conversations} |
| hf_ret.append(hf_required_info) |
| |
| return ret, hf_ret |
|
|
|
|
| def MatchDataset_load_fn(data_path, repeat_time, **kwargs): |
| with open(data_path, 'r') as f: |
| json_file = json.load(f) |
| |
| ret, hf_ret = [], [] |
| for item in json_file: |
| if not item['file_names'][0].startswith('./data/'): |
| item['file_names'] = ['./data/'+file_name[2:] for file_name in item['file_names']] |
| if 'AVA' in item['file_names'][0]: |
| continue |
| if 'HACS' in item['file_names'][0]: |
| continue |
| item.update({'image': item['file_names']}) |
| ret.append(item) |
| |
| if repeat_time < 1: |
| ret = random.sample(ret, int(len(ret) * repeat_time)) |
| elif repeat_time > 1: |
| int_repeat_time = int(repeat_time) |
| remaining_repeat_time = repeat_time - int_repeat_time |
| if remaining_repeat_time > 0: |
| remaining_ret = random.sample( |
| ret, int(len(ret) * remaining_repeat_time)) |
| ret = ret * int_repeat_time |
| ret.extend(remaining_ret) |
| else: |
| ret = ret * int_repeat_time |
| |
| for item in ret: |
| images = item["file_names"] |
| if "description" in item: |
| description = item["description"] |
| hf_required_info = {"image": images, "description": description} |
| else: |
| hf_required_info = {"image": images, } |
| hf_ret.append(hf_required_info) |
|
|
| return ret, hf_ret |