| | result_model = '/proj/cvl/users/x_fahkh2/UI-R1/ckpt/Qwen2.5-VL-3B-Instruct-Trained' |
| |
|
| | tasks = ['mobile', 'desktop', 'web'] |
| | |
| | types = ['icon', 'text'] |
| |
|
| | import os |
| | import json |
| | import re |
| |
|
| | result = {} |
| | total = {} |
| |
|
| | for task in tasks: |
| | ref_json = f'/proj/cvl/users/x_fahkh2/UI-R1/data/ScreenSpot/annotations/screenspot_{task}.json' |
| | data = json.load(open(ref_json, 'r')) |
| |
|
| | bbox_type = {} |
| | for d in data: |
| | bbox = str(d['bbox']).strip('[]') |
| | bbox_type[bbox] = d['data_type'] |
| |
|
| | result_jsonl = os.path.join(result_model, f'infer/prediction_results_screenspot_{task}.jsonl') |
| | with open(result_jsonl, 'r') as f: |
| | lines = f.readlines() |
| | for line in lines: |
| | gt_bbox_match = re.search(r'"gt_bbox": \[(.*?)\]', line) |
| | gt_bbox = gt_bbox_match.group(1) |
| | |
| |
|
| | type = bbox_type[gt_bbox] |
| | if "true" in line or "True" in line: |
| | if f'{task}_{type}' not in result: |
| | result[f'{task}_{type}'] = 1 |
| | else: |
| | result[f'{task}_{type}'] = result[f'{task}_{type}'] + 1 |
| |
|
| | if f'{task}_{type}' not in total: |
| | total[f'{task}_{type}'] = 1 |
| | else: |
| | total[f'{task}_{type}'] = total[f'{task}_{type}'] + 1 |
| |
|
| | |
| | |
| | for k in result.keys(): |
| | print(k,result[k]/total[k]) |
| | print(sum(result.values()) / sum(total.values())) |
| |
|