| { |
| "mas_shape": "deliberation", |
| "mas_task": "math", |
| "non_last_loss_weight": 0.1, |
| "num_recursive_rounds": 3, |
| "outer_rt_in_dim": 2560, |
| "outer_rt_out_dim": 2560, |
| "outer_rt_type": "outer_ln_res_adapter", |
| "outer_tr_in_dim": 2560, |
| "outer_tr_out_dim": 2560, |
| "outer_tr_type": "outer_ln_res_adapter", |
| "preserve_inner_input_grad": 1, |
| "reflector_inner_aligner_path": "checkpoints/innerloop_deliberation/deliberation_arpo_v1_reflector_qwen35_4b_ln_res/checkpoint-10000", |
| "reflector_model_name_or_path": "Qwen/Qwen3.5-4B", |
| "supervise_final_only": 0, |
| "toolcaller_inner_aligner_path": "checkpoints/innerloop_deliberation/deliberation_arpo_v1_toolcaller_qwen35_4b_ln_res/checkpoint-10000", |
| "toolcaller_model_name_or_path": "Qwen/Qwen3.5-4B" |
| } |