louisguthmann's picture
Update 2B adapter with repair_v3b full run
432b303 verified
{
"components": {
"any_exact_rate": 0.25,
"ask_rate": 0.75,
"cannot_rate": 1.0,
"command_rate": 0.7604,
"ok_rate": 0.775,
"parse_ok_rate": 0.98,
"weak_category_rate": 0.7678571428571429
},
"exact_summary": "/root/bitnet-nl2sh/output/autoresearch_proxy_qwen35_2b/repair_v3b_full_v1/qwen35_2b_batch8_repair_v3b_full_v1/eval_exact/summary.json",
"score": 276.5033,
"score_0_to_1": 2.765033,
"verifier_summary": "/root/bitnet-nl2sh/output/autoresearch_proxy_qwen35_2b/repair_v3b_full_v1/qwen35_2b_batch8_repair_v3b_full_v1/eval_verifier/summary.json",
"weak_categories": [
"ambiguous_delete",
"ambiguous_secret",
"create_archive",
"enabled_services",
"find_jpgs",
"json_query",
"top_ips"
],
"weak_category_rates": {
"ambiguous_delete": 0.5,
"ambiguous_secret": 1.0,
"create_archive": 1.0,
"enabled_services": 1.0,
"find_jpgs": 0.25,
"json_query": 0.625,
"top_ips": 1.0
},
"weights": {
"any_exact_rate": 0.2,
"ask_rate": 1.0,
"cannot_rate": 0.25,
"command_rate": 0.35,
"ok_rate": 1.0,
"parse_ok_rate": 0.1,
"weak_category_rate": 0.75
}
}