| | import os |
| | import jsonlines |
| | import json |
| |
|
| | def get_function_scores(dir): |
| | scores = [] |
| | subdirs = sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]) |
| | for subdir in subdirs: |
| | md_path = os.path.join(dir, subdir, 'readme_summary.json') |
| | md_score = 0 |
| | with open(md_path, 'r', encoding='utf-8', errors='ignore') as f: |
| | md_score = json.load(f)['score'] |
| |
|
| | json_path = os.path.join(dir, subdir, 'functions.jsonl') |
| | contents = [] |
| | with jsonlines.open(json_path) as reader: |
| | for obj in reader: |
| | if 'score' in obj: |
| | contents.append(obj['score'] * md_score) |
| | scores.extend(contents) |
| | return scores |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | def output_scores(dir, output_path, score): |
| | subdirs = sorted([d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]) |
| | for subdir in subdirs: |
| | md_path = os.path.join(dir, subdir, 'readme_summary.json') |
| | md_summary = '' |
| | md_score = 0 |
| | with open(md_path, 'r', encoding='utf-8', errors='ignore') as f: |
| | data = json.load(f) |
| | md_summary = data['readme_summary'] |
| | md_score = data['score'] |
| |
|
| | json_path = os.path.join(dir, subdir, 'functions.jsonl') |
| | contents = [] |
| | with jsonlines.open(json_path) as reader: |
| | for obj in reader: |
| | if 'score' in obj and obj['score'] * md_score > score: |
| | obj['md_summary'] = md_summary |
| | obj['md_score'] = md_score |
| | obj['final_score'] = obj['score'] * md_score |
| | with open(obj['file'], 'r', encoding='utf-8', errors='ignore') as f: |
| | obj['code_content'] = ''.join(f.readlines()[obj['start_line']-1:obj['end_line']]) |
| | contents.append(obj) |
| | with jsonlines.open(output_path, 'a', flush=True) as writer: |
| | writer.write_all(contents) |
| |
|
| | output_scores('/home/weifengsun/tangou1/step2/step22/dataset', '/home/weifengsun/tangou1/step2/step22/output/function_filtered_scores.jsonl', 0.1282891692796717) |
| | |
| | |
| | |
| |
|
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|