| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6153846153846154, |
| "eval_steps": 500, |
| "global_step": 48, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0943359375, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3681.1, |
| "completions/mean_length": 1021.4728515625, |
| "completions/mean_terminated_length": 701.1630737304688, |
| "completions/min_length": 103.7, |
| "completions/min_terminated_length": 103.7, |
| "entropy": 0.17240249924361706, |
| "epoch": 0.1282051282051282, |
| "frac_reward_zero_std": 0.0515625, |
| "grad_norm": 0.00012006821180025731, |
| "learning_rate": 5.625e-06, |
| "loss": -0.0001, |
| "num_tokens": 10714829.0, |
| "reward": -0.35699094235897066, |
| "reward_std": 0.3353965669870377, |
| "rewards/grpo_reward_function/mean": -0.35699094235897066, |
| "rewards/grpo_reward_function/std": 0.7091993272304535, |
| "sampling/importance_sampling_ratio/max": 0.1970108050853014, |
| "sampling/importance_sampling_ratio/mean": 0.0012754969735397025, |
| "sampling/importance_sampling_ratio/min": 0.0, |
| "sampling/sampling_logp_difference/max": 1.831837797164917, |
| "sampling/sampling_logp_difference/mean": 0.0368979848921299, |
| "step": 10, |
| "step_time": 179.07797421738505 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.078125, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3652.6, |
| "completions/mean_length": 961.977734375, |
| "completions/mean_terminated_length": 696.3083312988281, |
| "completions/min_length": 86.3, |
| "completions/min_terminated_length": 86.3, |
| "entropy": 0.17260038182139398, |
| "epoch": 0.2564102564102564, |
| "frac_reward_zero_std": 0.04453125, |
| "grad_norm": 0.00011115181306767518, |
| "learning_rate": 9.942341621640558e-06, |
| "loss": -0.0003, |
| "num_tokens": 21190031.0, |
| "reward": -0.3344299763441086, |
| "reward_std": 0.32805776298046113, |
| "rewards/grpo_reward_function/mean": -0.3344299763441086, |
| "rewards/grpo_reward_function/std": 0.7096675753593444, |
| "sampling/importance_sampling_ratio/max": 0.21866981824859977, |
| "sampling/importance_sampling_ratio/mean": 0.0013687322207260877, |
| "sampling/importance_sampling_ratio/min": 0.0, |
| "sampling/sampling_logp_difference/max": 1.826985728740692, |
| "sampling/sampling_logp_difference/mean": 0.03691292852163315, |
| "step": 20, |
| "step_time": 178.9407274030149 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0720703125, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3748.0, |
| "completions/mean_length": 916.4140625, |
| "completions/mean_terminated_length": 670.6310546875, |
| "completions/min_length": 91.7, |
| "completions/min_terminated_length": 91.7, |
| "entropy": 0.17158033028244973, |
| "epoch": 0.38461538461538464, |
| "frac_reward_zero_std": 0.046875, |
| "grad_norm": 0.00029510410537975974, |
| "learning_rate": 8.953878684688492e-06, |
| "loss": -0.0, |
| "num_tokens": 31354631.0, |
| "reward": -0.3493777960538864, |
| "reward_std": 0.3407163262367249, |
| "rewards/grpo_reward_function/mean": -0.34937779903411864, |
| "rewards/grpo_reward_function/std": 0.7120605766773224, |
| "sampling/importance_sampling_ratio/max": 0.3080031927675009, |
| "sampling/importance_sampling_ratio/mean": 0.002376599394483492, |
| "sampling/importance_sampling_ratio/min": 0.0, |
| "sampling/sampling_logp_difference/max": 2.0400707840919496, |
| "sampling/sampling_logp_difference/mean": 0.03684176206588745, |
| "step": 30, |
| "step_time": 171.81590754203498 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0662109375, |
| "completions/max_length": 4096.0, |
| "completions/max_terminated_length": 3509.1, |
| "completions/mean_length": 912.234375, |
| "completions/mean_terminated_length": 686.69970703125, |
| "completions/min_length": 83.6, |
| "completions/min_terminated_length": 83.6, |
| "entropy": 0.16810744479298592, |
| "epoch": 0.5128205128205128, |
| "frac_reward_zero_std": 0.06328125, |
| "grad_norm": 1.6541331856452166e-05, |
| "learning_rate": 6.971779275566593e-06, |
| "loss": -0.0001, |
| "num_tokens": 41526511.0, |
| "reward": -0.3015082895755768, |
| "reward_std": 0.3079641282558441, |
| "rewards/grpo_reward_function/mean": -0.301508292555809, |
| "rewards/grpo_reward_function/std": 0.729301530122757, |
| "sampling/importance_sampling_ratio/max": 0.3999401656910777, |
| "sampling/importance_sampling_ratio/mean": 0.003650264849420637, |
| "sampling/importance_sampling_ratio/min": 1.04037442665946e-40, |
| "sampling/sampling_logp_difference/max": 1.7756866335868835, |
| "sampling/sampling_logp_difference/mean": 0.0364519115537405, |
| "step": 40, |
| "step_time": 178.55749775655568 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 78, |
| "num_input_tokens_seen": 49637699, |
| "num_train_epochs": 1, |
| "save_steps": 16, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|