| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.20048019207683074, |
| "eval_steps": 500, |
| "global_step": 167, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01041666679084301, |
| "completions/max_length": 2827.7, |
| "completions/max_terminated_length": 2365.2, |
| "completions/mean_length": 630.6958435058593, |
| "completions/mean_terminated_length": 594.2115203857422, |
| "completions/min_length": 132.0, |
| "completions/min_terminated_length": 132.0, |
| "entropy": 0.11724737156182527, |
| "epoch": 0.012004801920768308, |
| "frac_reward_zero_std": 0.13333333507180214, |
| "grad_norm": 0.038755763322114944, |
| "learning_rate": 5.389221556886228e-07, |
| "loss": 0.0113, |
| "num_tokens": 799206.0, |
| "reward": -0.18518302096053957, |
| "reward_std": 0.20015475898981094, |
| "rewards/grpo_reward_function/mean": -0.18518302938900888, |
| "rewards/grpo_reward_function/std": 0.6885311886668205, |
| "sampling/importance_sampling_ratio/max": 2.1990586280822755, |
| "sampling/importance_sampling_ratio/mean": 0.4647279143333435, |
| "sampling/importance_sampling_ratio/min": 0.0005068443759228102, |
| "sampling/sampling_logp_difference/max": 2.5390082478523253, |
| "sampling/sampling_logp_difference/mean": 0.013516949955374002, |
| "step": 10, |
| "step_time": 569.2043406252749 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.010416666977107525, |
| "completions/max_length": 2742.3, |
| "completions/max_terminated_length": 2382.7, |
| "completions/mean_length": 646.3521057128906, |
| "completions/mean_terminated_length": 610.6208740234375, |
| "completions/min_length": 131.6, |
| "completions/min_terminated_length": 131.6, |
| "entropy": 0.1267015876248479, |
| "epoch": 0.024009603841536616, |
| "frac_reward_zero_std": 0.1083333358168602, |
| "grad_norm": 0.10266362875699997, |
| "learning_rate": 1.1377245508982037e-06, |
| "loss": -0.0225, |
| "num_tokens": 1617099.0, |
| "reward": 0.01770310625433922, |
| "reward_std": 0.23654931634664536, |
| "rewards/grpo_reward_function/mean": 0.0177031047642231, |
| "rewards/grpo_reward_function/std": 0.8463600814342499, |
| "sampling/importance_sampling_ratio/max": 1.9834328293800354, |
| "sampling/importance_sampling_ratio/mean": 0.40317725837230683, |
| "sampling/importance_sampling_ratio/min": 0.0032052009667828283, |
| "sampling/sampling_logp_difference/max": 2.1022926926612855, |
| "sampling/sampling_logp_difference/mean": 0.01352061601355672, |
| "step": 20, |
| "step_time": 548.7977518392727 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.014583333767950535, |
| "completions/max_length": 3068.7, |
| "completions/max_terminated_length": 2151.3, |
| "completions/mean_length": 682.6271087646485, |
| "completions/mean_terminated_length": 632.6335662841797, |
| "completions/min_length": 188.7, |
| "completions/min_terminated_length": 188.7, |
| "entropy": 0.125905223749578, |
| "epoch": 0.03601440576230492, |
| "frac_reward_zero_std": 0.11666666939854622, |
| "grad_norm": 0.06394433230161667, |
| "learning_rate": 1.7365269461077847e-06, |
| "loss": 0.0229, |
| "num_tokens": 2465988.0, |
| "reward": -0.18962360136210918, |
| "reward_std": 0.19849726594984532, |
| "rewards/grpo_reward_function/mean": -0.18962358720600606, |
| "rewards/grpo_reward_function/std": 0.6894359931349754, |
| "sampling/importance_sampling_ratio/max": 2.460964298248291, |
| "sampling/importance_sampling_ratio/mean": 0.4253284126520157, |
| "sampling/importance_sampling_ratio/min": 1.070212653598215e-05, |
| "sampling/sampling_logp_difference/max": 2.8448187589645384, |
| "sampling/sampling_logp_difference/mean": 0.013953791093081236, |
| "step": 30, |
| "step_time": 554.4699578347615 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01458333358168602, |
| "completions/max_length": 2789.3, |
| "completions/max_terminated_length": 1984.1, |
| "completions/mean_length": 639.958349609375, |
| "completions/mean_terminated_length": 589.7686553955078, |
| "completions/min_length": 163.7, |
| "completions/min_terminated_length": 163.7, |
| "entropy": 0.11666738856583833, |
| "epoch": 0.04801920768307323, |
| "frac_reward_zero_std": 0.1166666679084301, |
| "grad_norm": 0.08781701326370239, |
| "learning_rate": 2.3353293413173654e-06, |
| "loss": -0.0064, |
| "num_tokens": 3297428.0, |
| "reward": -0.03914917185902596, |
| "reward_std": 0.21894535794854164, |
| "rewards/grpo_reward_function/mean": -0.03914917148649692, |
| "rewards/grpo_reward_function/std": 0.8605277180671692, |
| "sampling/importance_sampling_ratio/max": 2.0170334696769716, |
| "sampling/importance_sampling_ratio/mean": 0.4818507760763168, |
| "sampling/importance_sampling_ratio/min": 0.0015486635098906688, |
| "sampling/sampling_logp_difference/max": 2.52269823551178, |
| "sampling/sampling_logp_difference/mean": 0.012881174683570862, |
| "step": 40, |
| "step_time": 541.3490906376392 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 4.2163060425082224e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 4.2163060425082224e-05, |
| "completions/clipped_ratio": 0.016666667349636555, |
| "completions/max_length": 2619.1, |
| "completions/max_terminated_length": 2081.8, |
| "completions/mean_length": 660.1000122070312, |
| "completions/mean_terminated_length": 602.3141662597657, |
| "completions/min_length": 205.5, |
| "completions/min_terminated_length": 205.5, |
| "entropy": 0.12803181819617748, |
| "epoch": 0.060024009603841535, |
| "frac_reward_zero_std": 0.1416666701436043, |
| "grad_norm": 0.03966222703456879, |
| "learning_rate": 2.9341317365269463e-06, |
| "loss": 0.0112, |
| "num_tokens": 4129824.0, |
| "reward": -0.11274411627091467, |
| "reward_std": 0.2275936236605048, |
| "rewards/grpo_reward_function/mean": -0.1127441140357405, |
| "rewards/grpo_reward_function/std": 0.8841595828533173, |
| "sampling/importance_sampling_ratio/max": 2.008124852180481, |
| "sampling/importance_sampling_ratio/mean": 0.46366433799266815, |
| "sampling/importance_sampling_ratio/min": 0.0006394427657710367, |
| "sampling/sampling_logp_difference/max": 2.58479106426239, |
| "sampling/sampling_logp_difference/mean": 0.013615725003182888, |
| "step": 50, |
| "step_time": 545.8774313618429 |
| }, |
| { |
| "clip_ratio/high_max": 4.673766961786896e-05, |
| "clip_ratio/high_mean": 7.7896114817122e-06, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 7.7896114817122e-06, |
| "completions/clipped_ratio": 0.01250000037252903, |
| "completions/max_length": 2304.9, |
| "completions/max_terminated_length": 1602.8, |
| "completions/mean_length": 596.5750213623047, |
| "completions/mean_terminated_length": 552.8821624755859, |
| "completions/min_length": 150.0, |
| "completions/min_terminated_length": 150.0, |
| "entropy": 0.12017892487347126, |
| "epoch": 0.07202881152460984, |
| "frac_reward_zero_std": 0.14166666865348815, |
| "grad_norm": 0.09313877671957016, |
| "learning_rate": 3.5329341317365273e-06, |
| "loss": -0.0307, |
| "num_tokens": 4936176.0, |
| "reward": -0.03057028874754906, |
| "reward_std": 0.2686158835887909, |
| "rewards/grpo_reward_function/mean": -0.03057028613984585, |
| "rewards/grpo_reward_function/std": 0.8661522060632706, |
| "sampling/importance_sampling_ratio/max": 2.2043559432029722, |
| "sampling/importance_sampling_ratio/mean": 0.4847503274679184, |
| "sampling/importance_sampling_ratio/min": 7.13271651690217e-05, |
| "sampling/sampling_logp_difference/max": 2.4851160287857055, |
| "sampling/sampling_logp_difference/mean": 0.013510057888925075, |
| "step": 60, |
| "step_time": 546.4311281181872 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 6.860105058876797e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 6.860105058876797e-05, |
| "completions/clipped_ratio": 0.018750000558793545, |
| "completions/max_length": 3205.2, |
| "completions/max_terminated_length": 2270.1, |
| "completions/mean_length": 702.3604431152344, |
| "completions/mean_terminated_length": 636.497216796875, |
| "completions/min_length": 131.8, |
| "completions/min_terminated_length": 131.8, |
| "entropy": 0.11768119670450687, |
| "epoch": 0.08403361344537816, |
| "frac_reward_zero_std": 0.1083333358168602, |
| "grad_norm": 0.0433771014213562, |
| "learning_rate": 4.131736526946108e-06, |
| "loss": 0.0553, |
| "num_tokens": 5841149.0, |
| "reward": -0.0784481130540371, |
| "reward_std": 0.23132488708943127, |
| "rewards/grpo_reward_function/mean": -0.07844811640679836, |
| "rewards/grpo_reward_function/std": 0.8492624998092652, |
| "sampling/importance_sampling_ratio/max": 2.233333742618561, |
| "sampling/importance_sampling_ratio/mean": 0.4808308959007263, |
| "sampling/importance_sampling_ratio/min": 0.0008302704439188347, |
| "sampling/sampling_logp_difference/max": 2.9827078700065615, |
| "sampling/sampling_logp_difference/mean": 0.012630783580243587, |
| "step": 70, |
| "step_time": 561.5568902881816 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 8.41788569232449e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 8.41788569232449e-05, |
| "completions/clipped_ratio": 0.01250000037252903, |
| "completions/max_length": 2541.9, |
| "completions/max_terminated_length": 2033.2, |
| "completions/mean_length": 612.5875213623046, |
| "completions/mean_terminated_length": 569.556509399414, |
| "completions/min_length": 149.1, |
| "completions/min_terminated_length": 149.1, |
| "entropy": 0.13153507560491562, |
| "epoch": 0.09603841536614646, |
| "frac_reward_zero_std": 0.10833333656191826, |
| "grad_norm": 0.08665835857391357, |
| "learning_rate": 4.730538922155689e-06, |
| "loss": 0.0701, |
| "num_tokens": 6606395.0, |
| "reward": -0.011540251970291137, |
| "reward_std": 0.19073452726006507, |
| "rewards/grpo_reward_function/mean": -0.011540257930755615, |
| "rewards/grpo_reward_function/std": 0.784630474448204, |
| "sampling/importance_sampling_ratio/max": 2.1984647274017335, |
| "sampling/importance_sampling_ratio/mean": 0.5050391256809235, |
| "sampling/importance_sampling_ratio/min": 0.00014755414913452113, |
| "sampling/sampling_logp_difference/max": 1.8997669577598573, |
| "sampling/sampling_logp_difference/mean": 0.013426258694380522, |
| "step": 80, |
| "step_time": 551.2647462010384 |
| }, |
| { |
| "clip_ratio/high_max": 2.2563176753465086e-05, |
| "clip_ratio/high_mean": 3.760529580176808e-06, |
| "clip_ratio/low_mean": 1.3224284339230508e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 1.6984813919407314e-05, |
| "completions/clipped_ratio": 0.01041666679084301, |
| "completions/max_length": 2347.6, |
| "completions/max_terminated_length": 2078.4, |
| "completions/mean_length": 621.4354370117187, |
| "completions/mean_terminated_length": 586.118701171875, |
| "completions/min_length": 163.2, |
| "completions/min_terminated_length": 163.2, |
| "entropy": 0.1206895818002522, |
| "epoch": 0.10804321728691477, |
| "frac_reward_zero_std": 0.1083333358168602, |
| "grad_norm": 0.034579165279865265, |
| "learning_rate": 5.32934131736527e-06, |
| "loss": 0.0011, |
| "num_tokens": 7424828.0, |
| "reward": 0.02708094713743776, |
| "reward_std": 0.23181376457214356, |
| "rewards/grpo_reward_function/mean": 0.027080959058366716, |
| "rewards/grpo_reward_function/std": 0.8183064997196198, |
| "sampling/importance_sampling_ratio/max": 2.499999237060547, |
| "sampling/importance_sampling_ratio/mean": 0.486982923746109, |
| "sampling/importance_sampling_ratio/min": 0.000999147113179788, |
| "sampling/sampling_logp_difference/max": 2.079863798618317, |
| "sampling/sampling_logp_difference/mean": 0.012986462097615004, |
| "step": 90, |
| "step_time": 550.7672496054322 |
| }, |
| { |
| "clip_ratio/high_max": 0.00031043787457747386, |
| "clip_ratio/high_mean": 5.173964618734317e-05, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 5.173964618734317e-05, |
| "completions/clipped_ratio": 0.01666666716337204, |
| "completions/max_length": 2765.9, |
| "completions/max_terminated_length": 1842.8, |
| "completions/mean_length": 675.4458435058593, |
| "completions/mean_terminated_length": 618.1135345458985, |
| "completions/min_length": 178.2, |
| "completions/min_terminated_length": 178.2, |
| "entropy": 0.149181258212775, |
| "epoch": 0.12004801920768307, |
| "frac_reward_zero_std": 0.1416666701436043, |
| "grad_norm": 0.13291294872760773, |
| "learning_rate": 5.928143712574851e-06, |
| "loss": 0.0212, |
| "num_tokens": 8278282.0, |
| "reward": 0.0703774506226182, |
| "reward_std": 0.2336222641170025, |
| "rewards/grpo_reward_function/mean": 0.07037745183333755, |
| "rewards/grpo_reward_function/std": 0.8314530551433563, |
| "sampling/importance_sampling_ratio/max": 2.2870466232299806, |
| "sampling/importance_sampling_ratio/mean": 0.4643064886331558, |
| "sampling/importance_sampling_ratio/min": 2.921815394074656e-05, |
| "sampling/sampling_logp_difference/max": 1.9242668151855469, |
| "sampling/sampling_logp_difference/mean": 0.014198462665081023, |
| "step": 100, |
| "step_time": 547.0480061549694 |
| }, |
| { |
| "clip_ratio/high_max": 0.0003238706885895226, |
| "clip_ratio/high_mean": 5.397844997787615e-05, |
| "clip_ratio/low_mean": 7.069677012623287e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.00012467522010410902, |
| "completions/clipped_ratio": 0.020833334513008596, |
| "completions/max_length": 2925.3, |
| "completions/max_terminated_length": 2297.9, |
| "completions/mean_length": 680.045849609375, |
| "completions/mean_terminated_length": 605.8596954345703, |
| "completions/min_length": 186.9, |
| "completions/min_terminated_length": 186.9, |
| "entropy": 0.1511568833142519, |
| "epoch": 0.13205282112845138, |
| "frac_reward_zero_std": 0.08333333507180214, |
| "grad_norm": 0.0421764962375164, |
| "learning_rate": 6.526946107784432e-06, |
| "loss": -0.0031, |
| "num_tokens": 9165800.0, |
| "reward": 0.04289367534220219, |
| "reward_std": 0.24053554534912108, |
| "rewards/grpo_reward_function/mean": 0.042893677949905396, |
| "rewards/grpo_reward_function/std": 0.835248938202858, |
| "sampling/importance_sampling_ratio/max": 2.332168984413147, |
| "sampling/importance_sampling_ratio/mean": 0.4403663039207458, |
| "sampling/importance_sampling_ratio/min": 0.0001706225667930994, |
| "sampling/sampling_logp_difference/max": 2.4483426809310913, |
| "sampling/sampling_logp_difference/mean": 0.014532316662371158, |
| "step": 110, |
| "step_time": 548.8019280240871 |
| }, |
| { |
| "clip_ratio/high_max": 0.00012998266611248256, |
| "clip_ratio/high_mean": 2.16637781704776e-05, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 2.16637781704776e-05, |
| "completions/clipped_ratio": 0.002083333395421505, |
| "completions/max_length": 1812.6, |
| "completions/max_terminated_length": 1592.9, |
| "completions/mean_length": 602.1333526611328, |
| "completions/mean_terminated_length": 594.61220703125, |
| "completions/min_length": 179.7, |
| "completions/min_terminated_length": 179.7, |
| "entropy": 0.16710406728088856, |
| "epoch": 0.14405762304921968, |
| "frac_reward_zero_std": 0.0833333358168602, |
| "grad_norm": 0.07664494961500168, |
| "learning_rate": 7.125748502994012e-06, |
| "loss": -0.0309, |
| "num_tokens": 9975204.0, |
| "reward": 0.0826782912015915, |
| "reward_std": 0.23934805542230606, |
| "rewards/grpo_reward_function/mean": 0.0826782874763012, |
| "rewards/grpo_reward_function/std": 0.8862796187400818, |
| "sampling/importance_sampling_ratio/max": 2.2503564238548277, |
| "sampling/importance_sampling_ratio/mean": 0.4635925680398941, |
| "sampling/importance_sampling_ratio/min": 0.0009416027547558823, |
| "sampling/sampling_logp_difference/max": 2.073215699195862, |
| "sampling/sampling_logp_difference/mean": 0.01480921907350421, |
| "step": 120, |
| "step_time": 539.0031213279814 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 1.9831826648442074e-06, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 1.9831826648442074e-06, |
| "completions/clipped_ratio": 0.006250000186264515, |
| "completions/max_length": 2290.7, |
| "completions/max_terminated_length": 1880.6, |
| "completions/mean_length": 599.5916900634766, |
| "completions/mean_terminated_length": 578.0104217529297, |
| "completions/min_length": 168.6, |
| "completions/min_terminated_length": 168.6, |
| "entropy": 0.16012020353227854, |
| "epoch": 0.15606242496998798, |
| "frac_reward_zero_std": 0.11666667014360428, |
| "grad_norm": 0.05220530927181244, |
| "learning_rate": 7.724550898203594e-06, |
| "loss": -0.0377, |
| "num_tokens": 10768324.0, |
| "reward": -0.0507307555526495, |
| "reward_std": 0.18351687043905257, |
| "rewards/grpo_reward_function/mean": -0.05073075201362372, |
| "rewards/grpo_reward_function/std": 0.7544578343629837, |
| "sampling/importance_sampling_ratio/max": 2.375898337364197, |
| "sampling/importance_sampling_ratio/mean": 0.524286350607872, |
| "sampling/importance_sampling_ratio/min": 0.00018855740054277704, |
| "sampling/sampling_logp_difference/max": 2.009746181964874, |
| "sampling/sampling_logp_difference/mean": 0.01376222250983119, |
| "step": 130, |
| "step_time": 548.6409472068772 |
| }, |
| { |
| "clip_ratio/high_max": 0.00017211703816428782, |
| "clip_ratio/high_mean": 2.86861730273813e-05, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 2.86861730273813e-05, |
| "completions/clipped_ratio": 0.00833333358168602, |
| "completions/max_length": 3136.9, |
| "completions/max_terminated_length": 2221.0, |
| "completions/mean_length": 684.8500244140625, |
| "completions/mean_terminated_length": 655.5515625, |
| "completions/min_length": 168.7, |
| "completions/min_terminated_length": 168.7, |
| "entropy": 0.12020768839865922, |
| "epoch": 0.16806722689075632, |
| "frac_reward_zero_std": 0.09166666939854622, |
| "grad_norm": 0.058197326958179474, |
| "learning_rate": 8.323353293413174e-06, |
| "loss": -0.0342, |
| "num_tokens": 11642436.0, |
| "reward": 0.04102597634773701, |
| "reward_std": 0.2864942252635956, |
| "rewards/grpo_reward_function/mean": 0.041025977826211604, |
| "rewards/grpo_reward_function/std": 0.8844284832477569, |
| "sampling/importance_sampling_ratio/max": 2.37525737285614, |
| "sampling/importance_sampling_ratio/mean": 0.46323378682136535, |
| "sampling/importance_sampling_ratio/min": 2.6157076149502247e-08, |
| "sampling/sampling_logp_difference/max": 2.5657184720039368, |
| "sampling/sampling_logp_difference/mean": 0.012760929018259048, |
| "step": 140, |
| "step_time": 550.6772611703724 |
| }, |
| { |
| "clip_ratio/high_max": 0.00029233113455120476, |
| "clip_ratio/high_mean": 4.872185563726816e-05, |
| "clip_ratio/low_mean": 4.4254150270717216e-05, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 9.297600590798538e-05, |
| "completions/clipped_ratio": 0.01041666679084301, |
| "completions/max_length": 2211.6, |
| "completions/max_terminated_length": 1961.7, |
| "completions/mean_length": 601.2291839599609, |
| "completions/mean_terminated_length": 565.5705291748047, |
| "completions/min_length": 143.6, |
| "completions/min_terminated_length": 143.6, |
| "entropy": 0.10819828314706684, |
| "epoch": 0.18007202881152462, |
| "frac_reward_zero_std": 0.10833333656191826, |
| "grad_norm": 0.04749957472085953, |
| "learning_rate": 8.922155688622756e-06, |
| "loss": -0.0236, |
| "num_tokens": 12486318.0, |
| "reward": 0.03778684511780739, |
| "reward_std": 0.25178585574030876, |
| "rewards/grpo_reward_function/mean": 0.03778683394193649, |
| "rewards/grpo_reward_function/std": 0.7447861909866333, |
| "sampling/importance_sampling_ratio/max": 2.481464517116547, |
| "sampling/importance_sampling_ratio/mean": 0.5163449585437775, |
| "sampling/importance_sampling_ratio/min": 3.668112331070006e-05, |
| "sampling/sampling_logp_difference/max": 2.379316544532776, |
| "sampling/sampling_logp_difference/mean": 0.012185737490653992, |
| "step": 150, |
| "step_time": 551.4488250606694 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 5.082125426270068e-06, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 5.082125426270068e-06, |
| "completions/clipped_ratio": 0.002083333395421505, |
| "completions/max_length": 2301.5, |
| "completions/max_terminated_length": 2174.7, |
| "completions/mean_length": 625.8979309082031, |
| "completions/mean_terminated_length": 618.7978820800781, |
| "completions/min_length": 170.2, |
| "completions/min_terminated_length": 170.2, |
| "entropy": 0.10213978644460439, |
| "epoch": 0.19207683073229292, |
| "frac_reward_zero_std": 0.08333333507180214, |
| "grad_norm": 0.05615560710430145, |
| "learning_rate": 9.520958083832336e-06, |
| "loss": 0.0043, |
| "num_tokens": 13325121.0, |
| "reward": 0.06630225274711847, |
| "reward_std": 0.18489644899964333, |
| "rewards/grpo_reward_function/mean": 0.0663022572407499, |
| "rewards/grpo_reward_function/std": 0.7666326016187668, |
| "sampling/importance_sampling_ratio/max": 2.1301008343696592, |
| "sampling/importance_sampling_ratio/mean": 0.44916791915893556, |
| "sampling/importance_sampling_ratio/min": 7.07070047610614e-05, |
| "sampling/sampling_logp_difference/max": 2.524372959136963, |
| "sampling/sampling_logp_difference/mean": 0.013301923777908087, |
| "step": 160, |
| "step_time": 538.0738848904148 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 833, |
| "num_input_tokens_seen": 13935881, |
| "num_train_epochs": 1, |
| "save_steps": 167, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|