AutoDecompiler-30B-pscode / trainer_state.json
AutoDecompiler's picture
Upload 21 files
bee38f2 verified
raw
history blame
23.9 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.20048019207683074,
"eval_steps": 500,
"global_step": 167,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01041666679084301,
"completions/max_length": 2827.7,
"completions/max_terminated_length": 2365.2,
"completions/mean_length": 630.6958435058593,
"completions/mean_terminated_length": 594.2115203857422,
"completions/min_length": 132.0,
"completions/min_terminated_length": 132.0,
"entropy": 0.11724737156182527,
"epoch": 0.012004801920768308,
"frac_reward_zero_std": 0.13333333507180214,
"grad_norm": 0.038755763322114944,
"learning_rate": 5.389221556886228e-07,
"loss": 0.0113,
"num_tokens": 799206.0,
"reward": -0.18518302096053957,
"reward_std": 0.20015475898981094,
"rewards/grpo_reward_function/mean": -0.18518302938900888,
"rewards/grpo_reward_function/std": 0.6885311886668205,
"sampling/importance_sampling_ratio/max": 2.1990586280822755,
"sampling/importance_sampling_ratio/mean": 0.4647279143333435,
"sampling/importance_sampling_ratio/min": 0.0005068443759228102,
"sampling/sampling_logp_difference/max": 2.5390082478523253,
"sampling/sampling_logp_difference/mean": 0.013516949955374002,
"step": 10,
"step_time": 569.2043406252749
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666977107525,
"completions/max_length": 2742.3,
"completions/max_terminated_length": 2382.7,
"completions/mean_length": 646.3521057128906,
"completions/mean_terminated_length": 610.6208740234375,
"completions/min_length": 131.6,
"completions/min_terminated_length": 131.6,
"entropy": 0.1267015876248479,
"epoch": 0.024009603841536616,
"frac_reward_zero_std": 0.1083333358168602,
"grad_norm": 0.10266362875699997,
"learning_rate": 1.1377245508982037e-06,
"loss": -0.0225,
"num_tokens": 1617099.0,
"reward": 0.01770310625433922,
"reward_std": 0.23654931634664536,
"rewards/grpo_reward_function/mean": 0.0177031047642231,
"rewards/grpo_reward_function/std": 0.8463600814342499,
"sampling/importance_sampling_ratio/max": 1.9834328293800354,
"sampling/importance_sampling_ratio/mean": 0.40317725837230683,
"sampling/importance_sampling_ratio/min": 0.0032052009667828283,
"sampling/sampling_logp_difference/max": 2.1022926926612855,
"sampling/sampling_logp_difference/mean": 0.01352061601355672,
"step": 20,
"step_time": 548.7977518392727
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.014583333767950535,
"completions/max_length": 3068.7,
"completions/max_terminated_length": 2151.3,
"completions/mean_length": 682.6271087646485,
"completions/mean_terminated_length": 632.6335662841797,
"completions/min_length": 188.7,
"completions/min_terminated_length": 188.7,
"entropy": 0.125905223749578,
"epoch": 0.03601440576230492,
"frac_reward_zero_std": 0.11666666939854622,
"grad_norm": 0.06394433230161667,
"learning_rate": 1.7365269461077847e-06,
"loss": 0.0229,
"num_tokens": 2465988.0,
"reward": -0.18962360136210918,
"reward_std": 0.19849726594984532,
"rewards/grpo_reward_function/mean": -0.18962358720600606,
"rewards/grpo_reward_function/std": 0.6894359931349754,
"sampling/importance_sampling_ratio/max": 2.460964298248291,
"sampling/importance_sampling_ratio/mean": 0.4253284126520157,
"sampling/importance_sampling_ratio/min": 1.070212653598215e-05,
"sampling/sampling_logp_difference/max": 2.8448187589645384,
"sampling/sampling_logp_difference/mean": 0.013953791093081236,
"step": 30,
"step_time": 554.4699578347615
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01458333358168602,
"completions/max_length": 2789.3,
"completions/max_terminated_length": 1984.1,
"completions/mean_length": 639.958349609375,
"completions/mean_terminated_length": 589.7686553955078,
"completions/min_length": 163.7,
"completions/min_terminated_length": 163.7,
"entropy": 0.11666738856583833,
"epoch": 0.04801920768307323,
"frac_reward_zero_std": 0.1166666679084301,
"grad_norm": 0.08781701326370239,
"learning_rate": 2.3353293413173654e-06,
"loss": -0.0064,
"num_tokens": 3297428.0,
"reward": -0.03914917185902596,
"reward_std": 0.21894535794854164,
"rewards/grpo_reward_function/mean": -0.03914917148649692,
"rewards/grpo_reward_function/std": 0.8605277180671692,
"sampling/importance_sampling_ratio/max": 2.0170334696769716,
"sampling/importance_sampling_ratio/mean": 0.4818507760763168,
"sampling/importance_sampling_ratio/min": 0.0015486635098906688,
"sampling/sampling_logp_difference/max": 2.52269823551178,
"sampling/sampling_logp_difference/mean": 0.012881174683570862,
"step": 40,
"step_time": 541.3490906376392
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 4.2163060425082224e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 4.2163060425082224e-05,
"completions/clipped_ratio": 0.016666667349636555,
"completions/max_length": 2619.1,
"completions/max_terminated_length": 2081.8,
"completions/mean_length": 660.1000122070312,
"completions/mean_terminated_length": 602.3141662597657,
"completions/min_length": 205.5,
"completions/min_terminated_length": 205.5,
"entropy": 0.12803181819617748,
"epoch": 0.060024009603841535,
"frac_reward_zero_std": 0.1416666701436043,
"grad_norm": 0.03966222703456879,
"learning_rate": 2.9341317365269463e-06,
"loss": 0.0112,
"num_tokens": 4129824.0,
"reward": -0.11274411627091467,
"reward_std": 0.2275936236605048,
"rewards/grpo_reward_function/mean": -0.1127441140357405,
"rewards/grpo_reward_function/std": 0.8841595828533173,
"sampling/importance_sampling_ratio/max": 2.008124852180481,
"sampling/importance_sampling_ratio/mean": 0.46366433799266815,
"sampling/importance_sampling_ratio/min": 0.0006394427657710367,
"sampling/sampling_logp_difference/max": 2.58479106426239,
"sampling/sampling_logp_difference/mean": 0.013615725003182888,
"step": 50,
"step_time": 545.8774313618429
},
{
"clip_ratio/high_max": 4.673766961786896e-05,
"clip_ratio/high_mean": 7.7896114817122e-06,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 7.7896114817122e-06,
"completions/clipped_ratio": 0.01250000037252903,
"completions/max_length": 2304.9,
"completions/max_terminated_length": 1602.8,
"completions/mean_length": 596.5750213623047,
"completions/mean_terminated_length": 552.8821624755859,
"completions/min_length": 150.0,
"completions/min_terminated_length": 150.0,
"entropy": 0.12017892487347126,
"epoch": 0.07202881152460984,
"frac_reward_zero_std": 0.14166666865348815,
"grad_norm": 0.09313877671957016,
"learning_rate": 3.5329341317365273e-06,
"loss": -0.0307,
"num_tokens": 4936176.0,
"reward": -0.03057028874754906,
"reward_std": 0.2686158835887909,
"rewards/grpo_reward_function/mean": -0.03057028613984585,
"rewards/grpo_reward_function/std": 0.8661522060632706,
"sampling/importance_sampling_ratio/max": 2.2043559432029722,
"sampling/importance_sampling_ratio/mean": 0.4847503274679184,
"sampling/importance_sampling_ratio/min": 7.13271651690217e-05,
"sampling/sampling_logp_difference/max": 2.4851160287857055,
"sampling/sampling_logp_difference/mean": 0.013510057888925075,
"step": 60,
"step_time": 546.4311281181872
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 6.860105058876797e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 6.860105058876797e-05,
"completions/clipped_ratio": 0.018750000558793545,
"completions/max_length": 3205.2,
"completions/max_terminated_length": 2270.1,
"completions/mean_length": 702.3604431152344,
"completions/mean_terminated_length": 636.497216796875,
"completions/min_length": 131.8,
"completions/min_terminated_length": 131.8,
"entropy": 0.11768119670450687,
"epoch": 0.08403361344537816,
"frac_reward_zero_std": 0.1083333358168602,
"grad_norm": 0.0433771014213562,
"learning_rate": 4.131736526946108e-06,
"loss": 0.0553,
"num_tokens": 5841149.0,
"reward": -0.0784481130540371,
"reward_std": 0.23132488708943127,
"rewards/grpo_reward_function/mean": -0.07844811640679836,
"rewards/grpo_reward_function/std": 0.8492624998092652,
"sampling/importance_sampling_ratio/max": 2.233333742618561,
"sampling/importance_sampling_ratio/mean": 0.4808308959007263,
"sampling/importance_sampling_ratio/min": 0.0008302704439188347,
"sampling/sampling_logp_difference/max": 2.9827078700065615,
"sampling/sampling_logp_difference/mean": 0.012630783580243587,
"step": 70,
"step_time": 561.5568902881816
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 8.41788569232449e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 8.41788569232449e-05,
"completions/clipped_ratio": 0.01250000037252903,
"completions/max_length": 2541.9,
"completions/max_terminated_length": 2033.2,
"completions/mean_length": 612.5875213623046,
"completions/mean_terminated_length": 569.556509399414,
"completions/min_length": 149.1,
"completions/min_terminated_length": 149.1,
"entropy": 0.13153507560491562,
"epoch": 0.09603841536614646,
"frac_reward_zero_std": 0.10833333656191826,
"grad_norm": 0.08665835857391357,
"learning_rate": 4.730538922155689e-06,
"loss": 0.0701,
"num_tokens": 6606395.0,
"reward": -0.011540251970291137,
"reward_std": 0.19073452726006507,
"rewards/grpo_reward_function/mean": -0.011540257930755615,
"rewards/grpo_reward_function/std": 0.784630474448204,
"sampling/importance_sampling_ratio/max": 2.1984647274017335,
"sampling/importance_sampling_ratio/mean": 0.5050391256809235,
"sampling/importance_sampling_ratio/min": 0.00014755414913452113,
"sampling/sampling_logp_difference/max": 1.8997669577598573,
"sampling/sampling_logp_difference/mean": 0.013426258694380522,
"step": 80,
"step_time": 551.2647462010384
},
{
"clip_ratio/high_max": 2.2563176753465086e-05,
"clip_ratio/high_mean": 3.760529580176808e-06,
"clip_ratio/low_mean": 1.3224284339230508e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 1.6984813919407314e-05,
"completions/clipped_ratio": 0.01041666679084301,
"completions/max_length": 2347.6,
"completions/max_terminated_length": 2078.4,
"completions/mean_length": 621.4354370117187,
"completions/mean_terminated_length": 586.118701171875,
"completions/min_length": 163.2,
"completions/min_terminated_length": 163.2,
"entropy": 0.1206895818002522,
"epoch": 0.10804321728691477,
"frac_reward_zero_std": 0.1083333358168602,
"grad_norm": 0.034579165279865265,
"learning_rate": 5.32934131736527e-06,
"loss": 0.0011,
"num_tokens": 7424828.0,
"reward": 0.02708094713743776,
"reward_std": 0.23181376457214356,
"rewards/grpo_reward_function/mean": 0.027080959058366716,
"rewards/grpo_reward_function/std": 0.8183064997196198,
"sampling/importance_sampling_ratio/max": 2.499999237060547,
"sampling/importance_sampling_ratio/mean": 0.486982923746109,
"sampling/importance_sampling_ratio/min": 0.000999147113179788,
"sampling/sampling_logp_difference/max": 2.079863798618317,
"sampling/sampling_logp_difference/mean": 0.012986462097615004,
"step": 90,
"step_time": 550.7672496054322
},
{
"clip_ratio/high_max": 0.00031043787457747386,
"clip_ratio/high_mean": 5.173964618734317e-05,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 5.173964618734317e-05,
"completions/clipped_ratio": 0.01666666716337204,
"completions/max_length": 2765.9,
"completions/max_terminated_length": 1842.8,
"completions/mean_length": 675.4458435058593,
"completions/mean_terminated_length": 618.1135345458985,
"completions/min_length": 178.2,
"completions/min_terminated_length": 178.2,
"entropy": 0.149181258212775,
"epoch": 0.12004801920768307,
"frac_reward_zero_std": 0.1416666701436043,
"grad_norm": 0.13291294872760773,
"learning_rate": 5.928143712574851e-06,
"loss": 0.0212,
"num_tokens": 8278282.0,
"reward": 0.0703774506226182,
"reward_std": 0.2336222641170025,
"rewards/grpo_reward_function/mean": 0.07037745183333755,
"rewards/grpo_reward_function/std": 0.8314530551433563,
"sampling/importance_sampling_ratio/max": 2.2870466232299806,
"sampling/importance_sampling_ratio/mean": 0.4643064886331558,
"sampling/importance_sampling_ratio/min": 2.921815394074656e-05,
"sampling/sampling_logp_difference/max": 1.9242668151855469,
"sampling/sampling_logp_difference/mean": 0.014198462665081023,
"step": 100,
"step_time": 547.0480061549694
},
{
"clip_ratio/high_max": 0.0003238706885895226,
"clip_ratio/high_mean": 5.397844997787615e-05,
"clip_ratio/low_mean": 7.069677012623287e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.00012467522010410902,
"completions/clipped_ratio": 0.020833334513008596,
"completions/max_length": 2925.3,
"completions/max_terminated_length": 2297.9,
"completions/mean_length": 680.045849609375,
"completions/mean_terminated_length": 605.8596954345703,
"completions/min_length": 186.9,
"completions/min_terminated_length": 186.9,
"entropy": 0.1511568833142519,
"epoch": 0.13205282112845138,
"frac_reward_zero_std": 0.08333333507180214,
"grad_norm": 0.0421764962375164,
"learning_rate": 6.526946107784432e-06,
"loss": -0.0031,
"num_tokens": 9165800.0,
"reward": 0.04289367534220219,
"reward_std": 0.24053554534912108,
"rewards/grpo_reward_function/mean": 0.042893677949905396,
"rewards/grpo_reward_function/std": 0.835248938202858,
"sampling/importance_sampling_ratio/max": 2.332168984413147,
"sampling/importance_sampling_ratio/mean": 0.4403663039207458,
"sampling/importance_sampling_ratio/min": 0.0001706225667930994,
"sampling/sampling_logp_difference/max": 2.4483426809310913,
"sampling/sampling_logp_difference/mean": 0.014532316662371158,
"step": 110,
"step_time": 548.8019280240871
},
{
"clip_ratio/high_max": 0.00012998266611248256,
"clip_ratio/high_mean": 2.16637781704776e-05,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 2.16637781704776e-05,
"completions/clipped_ratio": 0.002083333395421505,
"completions/max_length": 1812.6,
"completions/max_terminated_length": 1592.9,
"completions/mean_length": 602.1333526611328,
"completions/mean_terminated_length": 594.61220703125,
"completions/min_length": 179.7,
"completions/min_terminated_length": 179.7,
"entropy": 0.16710406728088856,
"epoch": 0.14405762304921968,
"frac_reward_zero_std": 0.0833333358168602,
"grad_norm": 0.07664494961500168,
"learning_rate": 7.125748502994012e-06,
"loss": -0.0309,
"num_tokens": 9975204.0,
"reward": 0.0826782912015915,
"reward_std": 0.23934805542230606,
"rewards/grpo_reward_function/mean": 0.0826782874763012,
"rewards/grpo_reward_function/std": 0.8862796187400818,
"sampling/importance_sampling_ratio/max": 2.2503564238548277,
"sampling/importance_sampling_ratio/mean": 0.4635925680398941,
"sampling/importance_sampling_ratio/min": 0.0009416027547558823,
"sampling/sampling_logp_difference/max": 2.073215699195862,
"sampling/sampling_logp_difference/mean": 0.01480921907350421,
"step": 120,
"step_time": 539.0031213279814
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 1.9831826648442074e-06,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 1.9831826648442074e-06,
"completions/clipped_ratio": 0.006250000186264515,
"completions/max_length": 2290.7,
"completions/max_terminated_length": 1880.6,
"completions/mean_length": 599.5916900634766,
"completions/mean_terminated_length": 578.0104217529297,
"completions/min_length": 168.6,
"completions/min_terminated_length": 168.6,
"entropy": 0.16012020353227854,
"epoch": 0.15606242496998798,
"frac_reward_zero_std": 0.11666667014360428,
"grad_norm": 0.05220530927181244,
"learning_rate": 7.724550898203594e-06,
"loss": -0.0377,
"num_tokens": 10768324.0,
"reward": -0.0507307555526495,
"reward_std": 0.18351687043905257,
"rewards/grpo_reward_function/mean": -0.05073075201362372,
"rewards/grpo_reward_function/std": 0.7544578343629837,
"sampling/importance_sampling_ratio/max": 2.375898337364197,
"sampling/importance_sampling_ratio/mean": 0.524286350607872,
"sampling/importance_sampling_ratio/min": 0.00018855740054277704,
"sampling/sampling_logp_difference/max": 2.009746181964874,
"sampling/sampling_logp_difference/mean": 0.01376222250983119,
"step": 130,
"step_time": 548.6409472068772
},
{
"clip_ratio/high_max": 0.00017211703816428782,
"clip_ratio/high_mean": 2.86861730273813e-05,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 2.86861730273813e-05,
"completions/clipped_ratio": 0.00833333358168602,
"completions/max_length": 3136.9,
"completions/max_terminated_length": 2221.0,
"completions/mean_length": 684.8500244140625,
"completions/mean_terminated_length": 655.5515625,
"completions/min_length": 168.7,
"completions/min_terminated_length": 168.7,
"entropy": 0.12020768839865922,
"epoch": 0.16806722689075632,
"frac_reward_zero_std": 0.09166666939854622,
"grad_norm": 0.058197326958179474,
"learning_rate": 8.323353293413174e-06,
"loss": -0.0342,
"num_tokens": 11642436.0,
"reward": 0.04102597634773701,
"reward_std": 0.2864942252635956,
"rewards/grpo_reward_function/mean": 0.041025977826211604,
"rewards/grpo_reward_function/std": 0.8844284832477569,
"sampling/importance_sampling_ratio/max": 2.37525737285614,
"sampling/importance_sampling_ratio/mean": 0.46323378682136535,
"sampling/importance_sampling_ratio/min": 2.6157076149502247e-08,
"sampling/sampling_logp_difference/max": 2.5657184720039368,
"sampling/sampling_logp_difference/mean": 0.012760929018259048,
"step": 140,
"step_time": 550.6772611703724
},
{
"clip_ratio/high_max": 0.00029233113455120476,
"clip_ratio/high_mean": 4.872185563726816e-05,
"clip_ratio/low_mean": 4.4254150270717216e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 9.297600590798538e-05,
"completions/clipped_ratio": 0.01041666679084301,
"completions/max_length": 2211.6,
"completions/max_terminated_length": 1961.7,
"completions/mean_length": 601.2291839599609,
"completions/mean_terminated_length": 565.5705291748047,
"completions/min_length": 143.6,
"completions/min_terminated_length": 143.6,
"entropy": 0.10819828314706684,
"epoch": 0.18007202881152462,
"frac_reward_zero_std": 0.10833333656191826,
"grad_norm": 0.04749957472085953,
"learning_rate": 8.922155688622756e-06,
"loss": -0.0236,
"num_tokens": 12486318.0,
"reward": 0.03778684511780739,
"reward_std": 0.25178585574030876,
"rewards/grpo_reward_function/mean": 0.03778683394193649,
"rewards/grpo_reward_function/std": 0.7447861909866333,
"sampling/importance_sampling_ratio/max": 2.481464517116547,
"sampling/importance_sampling_ratio/mean": 0.5163449585437775,
"sampling/importance_sampling_ratio/min": 3.668112331070006e-05,
"sampling/sampling_logp_difference/max": 2.379316544532776,
"sampling/sampling_logp_difference/mean": 0.012185737490653992,
"step": 150,
"step_time": 551.4488250606694
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 5.082125426270068e-06,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 5.082125426270068e-06,
"completions/clipped_ratio": 0.002083333395421505,
"completions/max_length": 2301.5,
"completions/max_terminated_length": 2174.7,
"completions/mean_length": 625.8979309082031,
"completions/mean_terminated_length": 618.7978820800781,
"completions/min_length": 170.2,
"completions/min_terminated_length": 170.2,
"entropy": 0.10213978644460439,
"epoch": 0.19207683073229292,
"frac_reward_zero_std": 0.08333333507180214,
"grad_norm": 0.05615560710430145,
"learning_rate": 9.520958083832336e-06,
"loss": 0.0043,
"num_tokens": 13325121.0,
"reward": 0.06630225274711847,
"reward_std": 0.18489644899964333,
"rewards/grpo_reward_function/mean": 0.0663022572407499,
"rewards/grpo_reward_function/std": 0.7666326016187668,
"sampling/importance_sampling_ratio/max": 2.1301008343696592,
"sampling/importance_sampling_ratio/mean": 0.44916791915893556,
"sampling/importance_sampling_ratio/min": 7.07070047610614e-05,
"sampling/sampling_logp_difference/max": 2.524372959136963,
"sampling/sampling_logp_difference/mean": 0.013301923777908087,
"step": 160,
"step_time": 538.0738848904148
}
],
"logging_steps": 10,
"max_steps": 833,
"num_input_tokens_seen": 13935881,
"num_train_epochs": 1,
"save_steps": 167,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}