| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8076923076923077, |
| "eval_steps": 500, |
| "global_step": 84, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0229166672565043, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3560.9, |
| "completions/mean_length": 522.7101684570313, |
| "completions/mean_terminated_length": 438.9754211425781, |
| "completions/min_length": 75.0, |
| "completions/min_terminated_length": 75.0, |
| "entropy": 0.23113284446299076, |
| "epoch": 0.09615384615384616, |
| "frac_reward_zero_std": 0.02291666707023978, |
| "grad_norm": 0.0001292548916491907, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": -0.0001, |
| "num_tokens": 7139751.0, |
| "reward": -0.3463470071554184, |
| "reward_std": 0.19510238319635392, |
| "rewards/grpo_reward_function/mean": -0.34634698629379274, |
| "rewards/grpo_reward_function/std": 0.45176688134670256, |
| "sampling/importance_sampling_ratio/max": 0.2500600881874561, |
| "sampling/importance_sampling_ratio/mean": 0.003153010329697281, |
| "sampling/importance_sampling_ratio/min": 4.422263656305887e-39, |
| "sampling/sampling_logp_difference/max": 1.7915182709693909, |
| "sampling/sampling_logp_difference/mean": 0.04889127053320408, |
| "step": 10, |
| "step_time": 190.44577815867962 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.023697917722165583, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3451.8, |
| "completions/mean_length": 534.3229278564453, |
| "completions/mean_terminated_length": 448.0358642578125, |
| "completions/min_length": 88.7, |
| "completions/min_terminated_length": 88.7, |
| "entropy": 0.2307925283908844, |
| "epoch": 0.19230769230769232, |
| "frac_reward_zero_std": 0.030208333767950533, |
| "grad_norm": 7.460668293122211e-05, |
| "learning_rate": 9.047619047619049e-06, |
| "loss": -0.0002, |
| "num_tokens": 14396583.0, |
| "reward": -0.37422587275505065, |
| "reward_std": 0.20355914533138275, |
| "rewards/grpo_reward_function/mean": -0.3742258608341217, |
| "rewards/grpo_reward_function/std": 0.44767938256263734, |
| "sampling/importance_sampling_ratio/max": 0.19214814556762577, |
| "sampling/importance_sampling_ratio/mean": 0.0025246856122976167, |
| "sampling/importance_sampling_ratio/min": 5.423524047837388e-32, |
| "sampling/sampling_logp_difference/max": 1.7129023194313049, |
| "sampling/sampling_logp_difference/mean": 0.04906127564609051, |
| "step": 20, |
| "step_time": 190.18596366122364 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.021354167256504297, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3362.2, |
| "completions/mean_length": 523.9229278564453, |
| "completions/mean_terminated_length": 446.04356384277344, |
| "completions/min_length": 94.6, |
| "completions/min_terminated_length": 94.6, |
| "entropy": 0.22211343944072723, |
| "epoch": 0.28846153846153844, |
| "frac_reward_zero_std": 0.027083334140479566, |
| "grad_norm": 0.00013486736422447032, |
| "learning_rate": 9.772520313857777e-06, |
| "loss": -0.0001, |
| "num_tokens": 21575715.0, |
| "reward": -0.3505605816841125, |
| "reward_std": 0.18663191050291061, |
| "rewards/grpo_reward_function/mean": -0.35056057274341584, |
| "rewards/grpo_reward_function/std": 0.43719949424266813, |
| "sampling/importance_sampling_ratio/max": 0.27146717831492423, |
| "sampling/importance_sampling_ratio/mean": 0.0029611586302053182, |
| "sampling/importance_sampling_ratio/min": 3.801722733713229e-43, |
| "sampling/sampling_logp_difference/max": 1.808135986328125, |
| "sampling/sampling_logp_difference/mean": 0.04732150360941887, |
| "step": 30, |
| "step_time": 188.15189309306442 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01718750074505806, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3561.5, |
| "completions/mean_length": 516.6395965576172, |
| "completions/mean_terminated_length": 454.0846740722656, |
| "completions/min_length": 95.4, |
| "completions/min_terminated_length": 95.4, |
| "entropy": 0.23415493555366992, |
| "epoch": 0.38461538461538464, |
| "frac_reward_zero_std": 0.03125000027939677, |
| "grad_norm": 0.000136481436864881, |
| "learning_rate": 8.883744025880429e-06, |
| "loss": -0.0001, |
| "num_tokens": 28670267.0, |
| "reward": -0.3659424602985382, |
| "reward_std": 0.17396533936262132, |
| "rewards/grpo_reward_function/mean": -0.36594244837760925, |
| "rewards/grpo_reward_function/std": 0.4192140996456146, |
| "sampling/importance_sampling_ratio/max": 0.15043866001069545, |
| "sampling/importance_sampling_ratio/mean": 0.0019588641240261494, |
| "sampling/importance_sampling_ratio/min": 4.154415544199142e-41, |
| "sampling/sampling_logp_difference/max": 1.778145396709442, |
| "sampling/sampling_logp_difference/mean": 0.050116677582263944, |
| "step": 40, |
| "step_time": 187.47059447690845 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.02057291716337204, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3445.9, |
| "completions/mean_length": 533.0474151611328, |
| "completions/mean_terminated_length": 458.3273406982422, |
| "completions/min_length": 89.6, |
| "completions/min_terminated_length": 89.6, |
| "entropy": 0.21524659655988215, |
| "epoch": 0.4807692307692308, |
| "frac_reward_zero_std": 0.03437500102445483, |
| "grad_norm": 0.0005527222513477307, |
| "learning_rate": 7.445169960349167e-06, |
| "loss": 0.0001, |
| "num_tokens": 35851893.0, |
| "reward": -0.3463290870189667, |
| "reward_std": 0.19029797241091728, |
| "rewards/grpo_reward_function/mean": -0.34632907509803773, |
| "rewards/grpo_reward_function/std": 0.468786346912384, |
| "sampling/importance_sampling_ratio/max": 0.3124311394989491, |
| "sampling/importance_sampling_ratio/mean": 0.003457725653424859, |
| "sampling/importance_sampling_ratio/min": 6.522424396341872e-34, |
| "sampling/sampling_logp_difference/max": 1.7508255124092102, |
| "sampling/sampling_logp_difference/mean": 0.04619445130228996, |
| "step": 50, |
| "step_time": 187.6461409341544 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.019531250465661287, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3405.2, |
| "completions/mean_length": 567.5502746582031, |
| "completions/mean_terminated_length": 497.2075164794922, |
| "completions/min_length": 101.0, |
| "completions/min_terminated_length": 101.0, |
| "entropy": 0.21499812975525856, |
| "epoch": 0.5769230769230769, |
| "frac_reward_zero_std": 0.023958333674818276, |
| "grad_norm": 0.0002768893576569048, |
| "learning_rate": 5.660448208208513e-06, |
| "loss": -0.0, |
| "num_tokens": 43292650.0, |
| "reward": -0.3658351093530655, |
| "reward_std": 0.2153971642255783, |
| "rewards/grpo_reward_function/mean": -0.36583509147167204, |
| "rewards/grpo_reward_function/std": 0.46224400103092195, |
| "sampling/importance_sampling_ratio/max": 0.2361154653131962, |
| "sampling/importance_sampling_ratio/mean": 0.0022639297298155726, |
| "sampling/importance_sampling_ratio/min": 3.015787716454037e-38, |
| "sampling/sampling_logp_difference/max": 1.769980216026306, |
| "sampling/sampling_logp_difference/mean": 0.04610508680343628, |
| "step": 60, |
| "step_time": 187.66385944783687 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.027604167629033328, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3738.3, |
| "completions/mean_length": 598.5659057617188, |
| "completions/mean_terminated_length": 499.3739959716797, |
| "completions/min_length": 111.5, |
| "completions/min_terminated_length": 111.5, |
| "entropy": 0.21177774332463742, |
| "epoch": 0.6730769230769231, |
| "frac_reward_zero_std": 0.025000000931322576, |
| "grad_norm": 7.337733368660454e-05, |
| "learning_rate": 3.782230861445041e-06, |
| "loss": 0.0, |
| "num_tokens": 50694323.0, |
| "reward": -0.4116409093141556, |
| "reward_std": 0.22881191819906235, |
| "rewards/grpo_reward_function/mean": -0.4116408973932266, |
| "rewards/grpo_reward_function/std": 0.4689377576112747, |
| "sampling/importance_sampling_ratio/max": 0.12385215684771538, |
| "sampling/importance_sampling_ratio/mean": 0.0014503307553241029, |
| "sampling/importance_sampling_ratio/min": 5.254869241218064e-41, |
| "sampling/sampling_logp_difference/max": 1.805948269367218, |
| "sampling/sampling_logp_difference/mean": 0.04531049989163875, |
| "step": 70, |
| "step_time": 189.22959316521883 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.02161458395421505, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3813.9, |
| "completions/mean_length": 575.7130401611328, |
| "completions/mean_terminated_length": 497.99981689453125, |
| "completions/min_length": 90.0, |
| "completions/min_terminated_length": 90.0, |
| "entropy": 0.211932235956192, |
| "epoch": 0.7692307692307693, |
| "frac_reward_zero_std": 0.03229166744276881, |
| "grad_norm": 6.826014804673302e-05, |
| "learning_rate": 2.0764056088797646e-06, |
| "loss": -0.0001, |
| "num_tokens": 58011305.0, |
| "reward": -0.3900724709033966, |
| "reward_std": 0.21758214086294175, |
| "rewards/grpo_reward_function/mean": -0.3900724589824677, |
| "rewards/grpo_reward_function/std": 0.4689822793006897, |
| "sampling/importance_sampling_ratio/max": 0.2649656251072884, |
| "sampling/importance_sampling_ratio/mean": 0.0030477925203740595, |
| "sampling/importance_sampling_ratio/min": 1.6902078646393643e-36, |
| "sampling/sampling_logp_difference/max": 1.7864648818969726, |
| "sampling/sampling_logp_difference/mean": 0.04543661251664162, |
| "step": 80, |
| "step_time": 187.76453976780175 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 104, |
| "num_input_tokens_seen": 60954036, |
| "num_train_epochs": 1, |
| "save_steps": 21, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|