| | import jsonlines |
| |
|
| | |
| | input_file = "data/thirdStep_file.jsonl" |
| |
|
| | |
| | output_file = "data/train4465" |
| |
|
| | |
| | threshold = 0.5 |
| |
|
| | |
| | options = [ |
| | {"id": "CapitalRequirements", "text": "Capital Requirements", "meta": "0.00"}, |
| | {"id": "ConsumerProtection", "text": "Consumer Protection", "meta": "0.00"}, |
| | {"id": "RiskManagement", "text": "Risk Management", "meta": "0.00"}, |
| | {"id": "ReportingAndCompliance", "text": "Reporting And Compliance", "meta": "0.00"}, |
| | {"id": "CorporateGovernance", "text": "Corporate Governance", "meta": "0.00"} |
| | ] |
| |
|
| | |
| | def process_record(record): |
| | |
| | text = record["text"] |
| | predicted_labels = record["predicted_labels"] |
| | |
| | |
| | accepted_categories = [label for label, score in predicted_labels.items() if score > threshold] |
| | |
| | |
| | answer = "accept" if accepted_categories else "reject" |
| | |
| | |
| | options_with_meta = [ |
| | {"id": option["id"], "text": option["text"], "meta": option["meta"]} for option in options |
| | ] |
| | |
| | |
| | output_record = { |
| | "text": text, |
| | "cats": predicted_labels, |
| | "accept": accepted_categories, |
| | "answer": answer, |
| | "options": options_with_meta |
| | } |
| | |
| | return output_record |
| |
|
| | |
| | with jsonlines.open(input_file, "r") as infile, jsonlines.open(output_file, "w") as outfile: |
| | for record in infile: |
| | output_record = process_record(record) |
| | outfile.write(output_record) |
| |
|