File size: 757 Bytes
3f09a92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
{
  "mas_shape": "deliberation",
  "mas_task": "math",
  "non_last_loss_weight": 0.1,
  "num_recursive_rounds": 3,
  "outer_rt_in_dim": 2560,
  "outer_rt_out_dim": 2560,
  "outer_rt_type": "outer_ln_res_adapter",
  "outer_tr_in_dim": 2560,
  "outer_tr_out_dim": 2560,
  "outer_tr_type": "outer_ln_res_adapter",
  "preserve_inner_input_grad": 1,
  "reflector_inner_aligner_path": "checkpoints/innerloop_deliberation/deliberation_arpo_v1_reflector_qwen35_4b_ln_res/checkpoint-10000",
  "reflector_model_name_or_path": "Qwen/Qwen3.5-4B",
  "supervise_final_only": 0,
  "toolcaller_inner_aligner_path": "checkpoints/innerloop_deliberation/deliberation_arpo_v1_toolcaller_qwen35_4b_ln_res/checkpoint-10000",
  "toolcaller_model_name_or_path": "Qwen/Qwen3.5-4B"
}