{ "mas_shape": "deliberation", "mas_task": "math", "non_last_loss_weight": 0.1, "num_recursive_rounds": 3, "outer_rt_in_dim": 2560, "outer_rt_out_dim": 2560, "outer_rt_type": "outer_ln_res_adapter", "outer_tr_in_dim": 2560, "outer_tr_out_dim": 2560, "outer_tr_type": "outer_ln_res_adapter", "preserve_inner_input_grad": 1, "reflector_inner_aligner_path": "checkpoints/innerloop_deliberation/deliberation_arpo_v1_reflector_qwen35_4b_ln_res/checkpoint-10000", "reflector_model_name_or_path": "Qwen/Qwen3.5-4B", "supervise_final_only": 0, "toolcaller_inner_aligner_path": "checkpoints/innerloop_deliberation/deliberation_arpo_v1_toolcaller_qwen35_4b_ln_res/checkpoint-10000", "toolcaller_model_name_or_path": "Qwen/Qwen3.5-4B" }