| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.20048019207683074, |
| "eval_steps": 500, |
| "global_step": 167, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.012500000186264515, |
| "completions/max_length": 2954.7, |
| "completions/max_terminated_length": 1951.6, |
| "completions/mean_length": 545.102099609375, |
| "completions/mean_terminated_length": 500.0404846191406, |
| "completions/min_length": 117.8, |
| "completions/min_terminated_length": 117.8, |
| "entropy": 0.1633674878627062, |
| "epoch": 0.012004801920768308, |
| "frac_reward_zero_std": 0.05000000149011612, |
| "grad_norm": 0.0819886103272438, |
| "learning_rate": 5.389221556886228e-07, |
| "loss": 0.0538, |
| "num_tokens": 1060997.0, |
| "reward": -0.349642014503479, |
| "reward_std": 0.18912948295474052, |
| "rewards/grpo_reward_function/mean": -0.3496420085430145, |
| "rewards/grpo_reward_function/std": 0.4486300081014633, |
| "sampling/importance_sampling_ratio/max": 2.3219674229621887, |
| "sampling/importance_sampling_ratio/mean": 0.3698740124702454, |
| "sampling/importance_sampling_ratio/min": 1.1996005980563495e-06, |
| "sampling/sampling_logp_difference/max": 2.5826863408088685, |
| "sampling/sampling_logp_difference/mean": 0.019079525023698807, |
| "step": 10, |
| "step_time": 591.6221411965787 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01250000037252903, |
| "completions/max_length": 2285.6, |
| "completions/max_terminated_length": 1834.2, |
| "completions/mean_length": 525.1979339599609, |
| "completions/mean_terminated_length": 481.30433044433596, |
| "completions/min_length": 111.4, |
| "completions/min_terminated_length": 111.4, |
| "entropy": 0.15604820642620326, |
| "epoch": 0.024009603841536616, |
| "frac_reward_zero_std": 0.0416666679084301, |
| "grad_norm": 0.09551126509904861, |
| "learning_rate": 1.1377245508982037e-06, |
| "loss": -0.0139, |
| "num_tokens": 2123212.0, |
| "reward": -0.298923921585083, |
| "reward_std": 0.21090517602860928, |
| "rewards/grpo_reward_function/mean": -0.2989239178597927, |
| "rewards/grpo_reward_function/std": 0.4665490254759789, |
| "sampling/importance_sampling_ratio/max": 2.135484504699707, |
| "sampling/importance_sampling_ratio/mean": 0.40152732133865354, |
| "sampling/importance_sampling_ratio/min": 2.7301041336613706e-05, |
| "sampling/sampling_logp_difference/max": 2.5806180000305177, |
| "sampling/sampling_logp_difference/mean": 0.019163084402680396, |
| "step": 20, |
| "step_time": 554.5896356501617 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01666666716337204, |
| "completions/max_length": 3269.3, |
| "completions/max_terminated_length": 2072.6, |
| "completions/mean_length": 556.327099609375, |
| "completions/mean_terminated_length": 496.14097595214844, |
| "completions/min_length": 132.3, |
| "completions/min_terminated_length": 132.3, |
| "entropy": 0.18481314480304717, |
| "epoch": 0.03601440576230492, |
| "frac_reward_zero_std": 0.05000000074505806, |
| "grad_norm": 0.11139781028032303, |
| "learning_rate": 1.7365269461077847e-06, |
| "loss": 0.0256, |
| "num_tokens": 3227189.0, |
| "reward": -0.409818297624588, |
| "reward_std": 0.22780176997184753, |
| "rewards/grpo_reward_function/mean": -0.4098182961344719, |
| "rewards/grpo_reward_function/std": 0.5334153980016708, |
| "sampling/importance_sampling_ratio/max": 2.277985179424286, |
| "sampling/importance_sampling_ratio/mean": 0.3370798110961914, |
| "sampling/importance_sampling_ratio/min": 2.6906073216806556e-05, |
| "sampling/sampling_logp_difference/max": 2.5000483632087707, |
| "sampling/sampling_logp_difference/mean": 0.02049510907381773, |
| "step": 30, |
| "step_time": 555.5971007851883 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01041666679084301, |
| "completions/max_length": 2135.6, |
| "completions/max_terminated_length": 1478.5, |
| "completions/mean_length": 508.5041778564453, |
| "completions/mean_terminated_length": 472.1120574951172, |
| "completions/min_length": 147.4, |
| "completions/min_terminated_length": 147.4, |
| "entropy": 0.1670758031308651, |
| "epoch": 0.04801920768307323, |
| "frac_reward_zero_std": 0.0416666679084301, |
| "grad_norm": 0.06704321503639221, |
| "learning_rate": 2.3353293413173654e-06, |
| "loss": -0.0127, |
| "num_tokens": 4318559.0, |
| "reward": -0.2258751168847084, |
| "reward_std": 0.16097248084843158, |
| "rewards/grpo_reward_function/mean": -0.22587510757148266, |
| "rewards/grpo_reward_function/std": 0.49552616477012634, |
| "sampling/importance_sampling_ratio/max": 2.3126969814300535, |
| "sampling/importance_sampling_ratio/mean": 0.35644740611314774, |
| "sampling/importance_sampling_ratio/min": 2.7391963689638034e-05, |
| "sampling/sampling_logp_difference/max": 2.8060175657272337, |
| "sampling/sampling_logp_difference/mean": 0.01994446888566017, |
| "step": 40, |
| "step_time": 554.565231207572 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00416666679084301, |
| "completions/max_length": 1859.8, |
| "completions/max_terminated_length": 1757.3, |
| "completions/mean_length": 513.3979309082031, |
| "completions/mean_terminated_length": 499.66423950195315, |
| "completions/min_length": 117.0, |
| "completions/min_terminated_length": 117.0, |
| "entropy": 0.17271990440785884, |
| "epoch": 0.060024009603841535, |
| "frac_reward_zero_std": 0.06666666865348816, |
| "grad_norm": 0.09258268028497696, |
| "learning_rate": 2.9341317365269463e-06, |
| "loss": 0.0015, |
| "num_tokens": 5370018.0, |
| "reward": -0.3178896278142929, |
| "reward_std": 0.1635244082659483, |
| "rewards/grpo_reward_function/mean": -0.3178896352648735, |
| "rewards/grpo_reward_function/std": 0.45689679607748984, |
| "sampling/importance_sampling_ratio/max": 2.195555794239044, |
| "sampling/importance_sampling_ratio/mean": 0.3522403955459595, |
| "sampling/importance_sampling_ratio/min": 7.164277021729504e-05, |
| "sampling/sampling_logp_difference/max": 2.4301879167556764, |
| "sampling/sampling_logp_difference/mean": 0.02071673283353448, |
| "step": 50, |
| "step_time": 548.9367319711484 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00416666679084301, |
| "completions/max_length": 2202.6, |
| "completions/max_terminated_length": 1812.6, |
| "completions/mean_length": 506.99168395996094, |
| "completions/mean_terminated_length": 492.1666778564453, |
| "completions/min_length": 127.8, |
| "completions/min_terminated_length": 127.8, |
| "entropy": 0.160004629381001, |
| "epoch": 0.07202881152460984, |
| "frac_reward_zero_std": 0.0416666679084301, |
| "grad_norm": 0.05922295153141022, |
| "learning_rate": 3.5329341317365273e-06, |
| "loss": -0.0033, |
| "num_tokens": 6466162.0, |
| "reward": -0.34021527171134947, |
| "reward_std": 0.18639734461903573, |
| "rewards/grpo_reward_function/mean": -0.34021526128053664, |
| "rewards/grpo_reward_function/std": 0.5187867254018783, |
| "sampling/importance_sampling_ratio/max": 2.010967791080475, |
| "sampling/importance_sampling_ratio/mean": 0.30313637256622317, |
| "sampling/importance_sampling_ratio/min": 3.7146345167826667e-06, |
| "sampling/sampling_logp_difference/max": 2.6932833194732666, |
| "sampling/sampling_logp_difference/mean": 0.02041825857013464, |
| "step": 60, |
| "step_time": 530.4414538932033 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.016666666977107523, |
| "completions/max_length": 2579.7, |
| "completions/max_terminated_length": 2077.7, |
| "completions/mean_length": 557.7854309082031, |
| "completions/mean_terminated_length": 499.6593933105469, |
| "completions/min_length": 121.5, |
| "completions/min_terminated_length": 121.5, |
| "entropy": 0.16313071362674236, |
| "epoch": 0.08403361344537816, |
| "frac_reward_zero_std": 0.10000000223517418, |
| "grad_norm": 0.014785214327275753, |
| "learning_rate": 4.131736526946108e-06, |
| "loss": 0.0424, |
| "num_tokens": 7608683.0, |
| "reward": -0.33393135815858843, |
| "reward_std": 0.19106332510709761, |
| "rewards/grpo_reward_function/mean": -0.33393134772777555, |
| "rewards/grpo_reward_function/std": 0.5677398703992367, |
| "sampling/importance_sampling_ratio/max": 1.9730541229248046, |
| "sampling/importance_sampling_ratio/mean": 0.3427995890378952, |
| "sampling/importance_sampling_ratio/min": 1.0688633483368904e-05, |
| "sampling/sampling_logp_difference/max": 2.8619158267974854, |
| "sampling/sampling_logp_difference/mean": 0.0201931843534112, |
| "step": 70, |
| "step_time": 541.6015901661478 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01666666716337204, |
| "completions/max_length": 3292.7, |
| "completions/max_terminated_length": 2336.3, |
| "completions/mean_length": 605.0791870117188, |
| "completions/mean_terminated_length": 546.5141571044921, |
| "completions/min_length": 142.3, |
| "completions/min_terminated_length": 142.3, |
| "entropy": 0.16853776723146438, |
| "epoch": 0.09603841536614646, |
| "frac_reward_zero_std": 0.10000000298023223, |
| "grad_norm": 0.0669359341263771, |
| "learning_rate": 4.730538922155689e-06, |
| "loss": 0.003, |
| "num_tokens": 8693089.0, |
| "reward": -0.36407424658536913, |
| "reward_std": 0.15138040184974672, |
| "rewards/grpo_reward_function/mean": -0.364074233174324, |
| "rewards/grpo_reward_function/std": 0.4984104484319687, |
| "sampling/importance_sampling_ratio/max": 2.417657721042633, |
| "sampling/importance_sampling_ratio/mean": 0.3272848010063171, |
| "sampling/importance_sampling_ratio/min": 6.19425904005766e-05, |
| "sampling/sampling_logp_difference/max": 2.832179582118988, |
| "sampling/sampling_logp_difference/mean": 0.019849142245948314, |
| "step": 80, |
| "step_time": 559.4207322074101 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.018750000558793545, |
| "completions/max_length": 3217.2, |
| "completions/max_terminated_length": 2202.3, |
| "completions/mean_length": 652.0937744140625, |
| "completions/mean_terminated_length": 587.0378173828125, |
| "completions/min_length": 164.3, |
| "completions/min_terminated_length": 164.3, |
| "entropy": 0.16202539429068566, |
| "epoch": 0.10804321728691477, |
| "frac_reward_zero_std": 0.10000000223517418, |
| "grad_norm": 0.03904345631599426, |
| "learning_rate": 5.32934131736527e-06, |
| "loss": -0.0041, |
| "num_tokens": 9849890.0, |
| "reward": -0.3628114402294159, |
| "reward_std": 0.2544385172426701, |
| "rewards/grpo_reward_function/mean": -0.3628114327788353, |
| "rewards/grpo_reward_function/std": 0.6255016416311264, |
| "sampling/importance_sampling_ratio/max": 1.7914996325969696, |
| "sampling/importance_sampling_ratio/mean": 0.3064177379012108, |
| "sampling/importance_sampling_ratio/min": 1.5591655392199753e-05, |
| "sampling/sampling_logp_difference/max": 2.840235471725464, |
| "sampling/sampling_logp_difference/mean": 0.01814730800688267, |
| "step": 90, |
| "step_time": 555.9057860235683 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.010416666977107525, |
| "completions/max_length": 2567.4, |
| "completions/max_terminated_length": 2092.1, |
| "completions/mean_length": 548.7250213623047, |
| "completions/mean_terminated_length": 511.10838317871094, |
| "completions/min_length": 116.3, |
| "completions/min_terminated_length": 116.3, |
| "entropy": 0.16799122765660285, |
| "epoch": 0.12004801920768307, |
| "frac_reward_zero_std": 0.10000000223517418, |
| "grad_norm": 0.07849112898111343, |
| "learning_rate": 5.928143712574851e-06, |
| "loss": -0.0009, |
| "num_tokens": 10951862.0, |
| "reward": -0.37651871144771576, |
| "reward_std": 0.17827629819512367, |
| "rewards/grpo_reward_function/mean": -0.3765186980366707, |
| "rewards/grpo_reward_function/std": 0.4988637834787369, |
| "sampling/importance_sampling_ratio/max": 2.2177215456962585, |
| "sampling/importance_sampling_ratio/mean": 0.3616850808262825, |
| "sampling/importance_sampling_ratio/min": 0.0, |
| "sampling/sampling_logp_difference/max": 2.7038833022117617, |
| "sampling/sampling_logp_difference/mean": 0.019389390759170056, |
| "step": 100, |
| "step_time": 539.0174913492053 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.012500000186264515, |
| "completions/max_length": 2678.3, |
| "completions/max_terminated_length": 1888.8, |
| "completions/mean_length": 535.3271057128907, |
| "completions/mean_terminated_length": 491.43089904785154, |
| "completions/min_length": 127.5, |
| "completions/min_terminated_length": 127.5, |
| "entropy": 0.1744688918814063, |
| "epoch": 0.13205282112845138, |
| "frac_reward_zero_std": 0.0833333358168602, |
| "grad_norm": 0.043163955211639404, |
| "learning_rate": 6.526946107784432e-06, |
| "loss": -0.0173, |
| "num_tokens": 12060171.0, |
| "reward": -0.2666824638843536, |
| "reward_std": 0.12587157338857652, |
| "rewards/grpo_reward_function/mean": -0.26668245121836665, |
| "rewards/grpo_reward_function/std": 0.40698017328977587, |
| "sampling/importance_sampling_ratio/max": 1.899654006958008, |
| "sampling/importance_sampling_ratio/mean": 0.37773958817124365, |
| "sampling/importance_sampling_ratio/min": 3.46376573256979e-14, |
| "sampling/sampling_logp_difference/max": 2.272650396823883, |
| "sampling/sampling_logp_difference/mean": 0.01878545032814145, |
| "step": 110, |
| "step_time": 554.1334780954755 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01666666716337204, |
| "completions/max_length": 3077.6, |
| "completions/max_terminated_length": 1915.6, |
| "completions/mean_length": 565.0541870117188, |
| "completions/mean_terminated_length": 504.5126983642578, |
| "completions/min_length": 138.5, |
| "completions/min_terminated_length": 138.5, |
| "entropy": 0.17697170842438936, |
| "epoch": 0.14405762304921968, |
| "frac_reward_zero_std": 0.0833333358168602, |
| "grad_norm": 0.07391675561666489, |
| "learning_rate": 7.125748502994012e-06, |
| "loss": 0.042, |
| "num_tokens": 13168921.0, |
| "reward": -0.3262443482875824, |
| "reward_std": 0.20909521877765655, |
| "rewards/grpo_reward_function/mean": -0.3262443423271179, |
| "rewards/grpo_reward_function/std": 0.5003126785159111, |
| "sampling/importance_sampling_ratio/max": 2.301289737224579, |
| "sampling/importance_sampling_ratio/mean": 0.38998747766017916, |
| "sampling/importance_sampling_ratio/min": 7.411608444201079e-05, |
| "sampling/sampling_logp_difference/max": 2.7557363152503966, |
| "sampling/sampling_logp_difference/mean": 0.01885297931730747, |
| "step": 120, |
| "step_time": 549.9578140962869 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00416666679084301, |
| "completions/max_length": 2290.1, |
| "completions/max_terminated_length": 1886.2, |
| "completions/mean_length": 499.789599609375, |
| "completions/mean_terminated_length": 484.77695617675784, |
| "completions/min_length": 127.0, |
| "completions/min_terminated_length": 127.0, |
| "entropy": 0.19473983831703662, |
| "epoch": 0.15606242496998798, |
| "frac_reward_zero_std": 0.03333333432674408, |
| "grad_norm": 0.06443756073713303, |
| "learning_rate": 7.724550898203594e-06, |
| "loss": -0.0203, |
| "num_tokens": 14212700.0, |
| "reward": -0.26288305670022966, |
| "reward_std": 0.1738448791205883, |
| "rewards/grpo_reward_function/mean": -0.26288305073976515, |
| "rewards/grpo_reward_function/std": 0.48268924951553344, |
| "sampling/importance_sampling_ratio/max": 2.1470563650131225, |
| "sampling/importance_sampling_ratio/mean": 0.35831653475761416, |
| "sampling/importance_sampling_ratio/min": 1.968140890369341e-05, |
| "sampling/sampling_logp_difference/max": 2.0650948524475097, |
| "sampling/sampling_logp_difference/mean": 0.01968124657869339, |
| "step": 130, |
| "step_time": 535.0768479405903 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01250000037252903, |
| "completions/max_length": 3015.9, |
| "completions/max_terminated_length": 1749.5, |
| "completions/mean_length": 492.37709045410156, |
| "completions/mean_terminated_length": 446.93010864257815, |
| "completions/min_length": 114.7, |
| "completions/min_terminated_length": 114.7, |
| "entropy": 0.21128173358738422, |
| "epoch": 0.16806722689075632, |
| "frac_reward_zero_std": 0.05833333432674408, |
| "grad_norm": 0.09337731450796127, |
| "learning_rate": 8.323353293413174e-06, |
| "loss": 0.0277, |
| "num_tokens": 15293077.0, |
| "reward": -0.26287811398506167, |
| "reward_std": 0.14675465896725653, |
| "rewards/grpo_reward_function/mean": -0.26287810802459716, |
| "rewards/grpo_reward_function/std": 0.37649901360273363, |
| "sampling/importance_sampling_ratio/max": 2.3814776659011843, |
| "sampling/importance_sampling_ratio/mean": 0.46577124297618866, |
| "sampling/importance_sampling_ratio/min": 1.3427511260960534e-06, |
| "sampling/sampling_logp_difference/max": 2.155888545513153, |
| "sampling/sampling_logp_difference/mean": 0.019245322328060865, |
| "step": 140, |
| "step_time": 552.9976656335406 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 4.9971032422035935e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 4.9971032422035935e-05, |
| "completions/clipped_ratio": 0.01041666679084301, |
| "completions/max_length": 2447.6, |
| "completions/max_terminated_length": 1785.5, |
| "completions/mean_length": 552.295849609375, |
| "completions/mean_terminated_length": 515.5673645019531, |
| "completions/min_length": 144.7, |
| "completions/min_terminated_length": 144.7, |
| "entropy": 0.2773955374956131, |
| "epoch": 0.18007202881152462, |
| "frac_reward_zero_std": 0.02500000074505806, |
| "grad_norm": 0.1007687970995903, |
| "learning_rate": 8.922155688622756e-06, |
| "loss": -0.0141, |
| "num_tokens": 16425531.0, |
| "reward": -0.26935882605612277, |
| "reward_std": 0.12829533144831656, |
| "rewards/grpo_reward_function/mean": -0.2693588202819228, |
| "rewards/grpo_reward_function/std": 0.33063299730420115, |
| "sampling/importance_sampling_ratio/max": 2.1382891178131103, |
| "sampling/importance_sampling_ratio/mean": 0.4445547193288803, |
| "sampling/importance_sampling_ratio/min": 7.412413807410812e-05, |
| "sampling/sampling_logp_difference/max": 1.7894923090934753, |
| "sampling/sampling_logp_difference/mean": 0.019091704115271568, |
| "step": 150, |
| "step_time": 547.844954107888 |
| }, |
| { |
| "clip_ratio/high_max": 4.80769231216982e-05, |
| "clip_ratio/high_mean": 8.012820762814954e-06, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 8.012820762814954e-06, |
| "completions/clipped_ratio": 0.010416666977107525, |
| "completions/max_length": 2388.9, |
| "completions/max_terminated_length": 1982.4, |
| "completions/mean_length": 544.4896026611328, |
| "completions/mean_terminated_length": 507.44931945800784, |
| "completions/min_length": 122.8, |
| "completions/min_terminated_length": 122.8, |
| "entropy": 0.3212181769311428, |
| "epoch": 0.19207683073229292, |
| "frac_reward_zero_std": 0.06666666865348816, |
| "grad_norm": 0.06833557039499283, |
| "learning_rate": 9.520958083832336e-06, |
| "loss": 0.0134, |
| "num_tokens": 17564686.0, |
| "reward": -0.28017824441194533, |
| "reward_std": 0.20728585943579675, |
| "rewards/grpo_reward_function/mean": -0.28017824441194533, |
| "rewards/grpo_reward_function/std": 0.5502792000770569, |
| "sampling/importance_sampling_ratio/max": 2.1408735513687134, |
| "sampling/importance_sampling_ratio/mean": 0.5016659319400787, |
| "sampling/importance_sampling_ratio/min": 0.000778414961314411, |
| "sampling/sampling_logp_difference/max": 1.89249027967453, |
| "sampling/sampling_logp_difference/mean": 0.01902961954474449, |
| "step": 160, |
| "step_time": 536.2867619435303 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 833, |
| "num_input_tokens_seen": 18381921, |
| "num_train_epochs": 1, |
| "save_steps": 167, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|