import csv import json csv.field_size_limit(10 * 1024 * 1024 * 1024) # 10MB score_dict = {} def load_score(): with open('res2.csv', 'r') as f: reader = csv.reader(f) amount = 0 for row in reader: row_json = None try: start_index, end_index = row[1].find('['), row[1].find(']') row_json = json.loads(row[1][start_index: end_index+1]) except: pass if row_json is not None and isinstance(row_json, list): for i in row_json: try: if isinstance(i, dict) and 'relevance_score' in i and 'function_start_line' in i: if i['relevance_score'] is not None and int(i['relevance_score']) > 1 and i['function_start_line'] is not None and int(i['function_start_line']) > 1: # yield row_json, row[0] amount += 1 score_dict[row[0]] = row_json except: pass print(amount) def load_code_file(): with open('/home/weifengsun/tangou1/domain_code/src/datasets/data_merged/dataset_all.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f) for row in reader: if row[0] in score_dict: score_dict[row[0]] = {'code_file': row, 'score_json': score_dict[row[0]]} print(score_dict[row[0]]) break if __name__ == '__main__': load_score() load_code_file() # print(len(score_dict))