AutoDecompiler-30B-e2e / trainer_state.json
AutoDecompiler's picture
Upload 22 files
4f94f6f verified
raw
history blame
23.4 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.20048019207683074,
"eval_steps": 500,
"global_step": 167,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012500000186264515,
"completions/max_length": 2954.7,
"completions/max_terminated_length": 1951.6,
"completions/mean_length": 545.102099609375,
"completions/mean_terminated_length": 500.0404846191406,
"completions/min_length": 117.8,
"completions/min_terminated_length": 117.8,
"entropy": 0.1633674878627062,
"epoch": 0.012004801920768308,
"frac_reward_zero_std": 0.05000000149011612,
"grad_norm": 0.0819886103272438,
"learning_rate": 5.389221556886228e-07,
"loss": 0.0538,
"num_tokens": 1060997.0,
"reward": -0.349642014503479,
"reward_std": 0.18912948295474052,
"rewards/grpo_reward_function/mean": -0.3496420085430145,
"rewards/grpo_reward_function/std": 0.4486300081014633,
"sampling/importance_sampling_ratio/max": 2.3219674229621887,
"sampling/importance_sampling_ratio/mean": 0.3698740124702454,
"sampling/importance_sampling_ratio/min": 1.1996005980563495e-06,
"sampling/sampling_logp_difference/max": 2.5826863408088685,
"sampling/sampling_logp_difference/mean": 0.019079525023698807,
"step": 10,
"step_time": 591.6221411965787
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01250000037252903,
"completions/max_length": 2285.6,
"completions/max_terminated_length": 1834.2,
"completions/mean_length": 525.1979339599609,
"completions/mean_terminated_length": 481.30433044433596,
"completions/min_length": 111.4,
"completions/min_terminated_length": 111.4,
"entropy": 0.15604820642620326,
"epoch": 0.024009603841536616,
"frac_reward_zero_std": 0.0416666679084301,
"grad_norm": 0.09551126509904861,
"learning_rate": 1.1377245508982037e-06,
"loss": -0.0139,
"num_tokens": 2123212.0,
"reward": -0.298923921585083,
"reward_std": 0.21090517602860928,
"rewards/grpo_reward_function/mean": -0.2989239178597927,
"rewards/grpo_reward_function/std": 0.4665490254759789,
"sampling/importance_sampling_ratio/max": 2.135484504699707,
"sampling/importance_sampling_ratio/mean": 0.40152732133865354,
"sampling/importance_sampling_ratio/min": 2.7301041336613706e-05,
"sampling/sampling_logp_difference/max": 2.5806180000305177,
"sampling/sampling_logp_difference/mean": 0.019163084402680396,
"step": 20,
"step_time": 554.5896356501617
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01666666716337204,
"completions/max_length": 3269.3,
"completions/max_terminated_length": 2072.6,
"completions/mean_length": 556.327099609375,
"completions/mean_terminated_length": 496.14097595214844,
"completions/min_length": 132.3,
"completions/min_terminated_length": 132.3,
"entropy": 0.18481314480304717,
"epoch": 0.03601440576230492,
"frac_reward_zero_std": 0.05000000074505806,
"grad_norm": 0.11139781028032303,
"learning_rate": 1.7365269461077847e-06,
"loss": 0.0256,
"num_tokens": 3227189.0,
"reward": -0.409818297624588,
"reward_std": 0.22780176997184753,
"rewards/grpo_reward_function/mean": -0.4098182961344719,
"rewards/grpo_reward_function/std": 0.5334153980016708,
"sampling/importance_sampling_ratio/max": 2.277985179424286,
"sampling/importance_sampling_ratio/mean": 0.3370798110961914,
"sampling/importance_sampling_ratio/min": 2.6906073216806556e-05,
"sampling/sampling_logp_difference/max": 2.5000483632087707,
"sampling/sampling_logp_difference/mean": 0.02049510907381773,
"step": 30,
"step_time": 555.5971007851883
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01041666679084301,
"completions/max_length": 2135.6,
"completions/max_terminated_length": 1478.5,
"completions/mean_length": 508.5041778564453,
"completions/mean_terminated_length": 472.1120574951172,
"completions/min_length": 147.4,
"completions/min_terminated_length": 147.4,
"entropy": 0.1670758031308651,
"epoch": 0.04801920768307323,
"frac_reward_zero_std": 0.0416666679084301,
"grad_norm": 0.06704321503639221,
"learning_rate": 2.3353293413173654e-06,
"loss": -0.0127,
"num_tokens": 4318559.0,
"reward": -0.2258751168847084,
"reward_std": 0.16097248084843158,
"rewards/grpo_reward_function/mean": -0.22587510757148266,
"rewards/grpo_reward_function/std": 0.49552616477012634,
"sampling/importance_sampling_ratio/max": 2.3126969814300535,
"sampling/importance_sampling_ratio/mean": 0.35644740611314774,
"sampling/importance_sampling_ratio/min": 2.7391963689638034e-05,
"sampling/sampling_logp_difference/max": 2.8060175657272337,
"sampling/sampling_logp_difference/mean": 0.01994446888566017,
"step": 40,
"step_time": 554.565231207572
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00416666679084301,
"completions/max_length": 1859.8,
"completions/max_terminated_length": 1757.3,
"completions/mean_length": 513.3979309082031,
"completions/mean_terminated_length": 499.66423950195315,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"entropy": 0.17271990440785884,
"epoch": 0.060024009603841535,
"frac_reward_zero_std": 0.06666666865348816,
"grad_norm": 0.09258268028497696,
"learning_rate": 2.9341317365269463e-06,
"loss": 0.0015,
"num_tokens": 5370018.0,
"reward": -0.3178896278142929,
"reward_std": 0.1635244082659483,
"rewards/grpo_reward_function/mean": -0.3178896352648735,
"rewards/grpo_reward_function/std": 0.45689679607748984,
"sampling/importance_sampling_ratio/max": 2.195555794239044,
"sampling/importance_sampling_ratio/mean": 0.3522403955459595,
"sampling/importance_sampling_ratio/min": 7.164277021729504e-05,
"sampling/sampling_logp_difference/max": 2.4301879167556764,
"sampling/sampling_logp_difference/mean": 0.02071673283353448,
"step": 50,
"step_time": 548.9367319711484
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00416666679084301,
"completions/max_length": 2202.6,
"completions/max_terminated_length": 1812.6,
"completions/mean_length": 506.99168395996094,
"completions/mean_terminated_length": 492.1666778564453,
"completions/min_length": 127.8,
"completions/min_terminated_length": 127.8,
"entropy": 0.160004629381001,
"epoch": 0.07202881152460984,
"frac_reward_zero_std": 0.0416666679084301,
"grad_norm": 0.05922295153141022,
"learning_rate": 3.5329341317365273e-06,
"loss": -0.0033,
"num_tokens": 6466162.0,
"reward": -0.34021527171134947,
"reward_std": 0.18639734461903573,
"rewards/grpo_reward_function/mean": -0.34021526128053664,
"rewards/grpo_reward_function/std": 0.5187867254018783,
"sampling/importance_sampling_ratio/max": 2.010967791080475,
"sampling/importance_sampling_ratio/mean": 0.30313637256622317,
"sampling/importance_sampling_ratio/min": 3.7146345167826667e-06,
"sampling/sampling_logp_difference/max": 2.6932833194732666,
"sampling/sampling_logp_difference/mean": 0.02041825857013464,
"step": 60,
"step_time": 530.4414538932033
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.016666666977107523,
"completions/max_length": 2579.7,
"completions/max_terminated_length": 2077.7,
"completions/mean_length": 557.7854309082031,
"completions/mean_terminated_length": 499.6593933105469,
"completions/min_length": 121.5,
"completions/min_terminated_length": 121.5,
"entropy": 0.16313071362674236,
"epoch": 0.08403361344537816,
"frac_reward_zero_std": 0.10000000223517418,
"grad_norm": 0.014785214327275753,
"learning_rate": 4.131736526946108e-06,
"loss": 0.0424,
"num_tokens": 7608683.0,
"reward": -0.33393135815858843,
"reward_std": 0.19106332510709761,
"rewards/grpo_reward_function/mean": -0.33393134772777555,
"rewards/grpo_reward_function/std": 0.5677398703992367,
"sampling/importance_sampling_ratio/max": 1.9730541229248046,
"sampling/importance_sampling_ratio/mean": 0.3427995890378952,
"sampling/importance_sampling_ratio/min": 1.0688633483368904e-05,
"sampling/sampling_logp_difference/max": 2.8619158267974854,
"sampling/sampling_logp_difference/mean": 0.0201931843534112,
"step": 70,
"step_time": 541.6015901661478
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01666666716337204,
"completions/max_length": 3292.7,
"completions/max_terminated_length": 2336.3,
"completions/mean_length": 605.0791870117188,
"completions/mean_terminated_length": 546.5141571044921,
"completions/min_length": 142.3,
"completions/min_terminated_length": 142.3,
"entropy": 0.16853776723146438,
"epoch": 0.09603841536614646,
"frac_reward_zero_std": 0.10000000298023223,
"grad_norm": 0.0669359341263771,
"learning_rate": 4.730538922155689e-06,
"loss": 0.003,
"num_tokens": 8693089.0,
"reward": -0.36407424658536913,
"reward_std": 0.15138040184974672,
"rewards/grpo_reward_function/mean": -0.364074233174324,
"rewards/grpo_reward_function/std": 0.4984104484319687,
"sampling/importance_sampling_ratio/max": 2.417657721042633,
"sampling/importance_sampling_ratio/mean": 0.3272848010063171,
"sampling/importance_sampling_ratio/min": 6.19425904005766e-05,
"sampling/sampling_logp_difference/max": 2.832179582118988,
"sampling/sampling_logp_difference/mean": 0.019849142245948314,
"step": 80,
"step_time": 559.4207322074101
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.018750000558793545,
"completions/max_length": 3217.2,
"completions/max_terminated_length": 2202.3,
"completions/mean_length": 652.0937744140625,
"completions/mean_terminated_length": 587.0378173828125,
"completions/min_length": 164.3,
"completions/min_terminated_length": 164.3,
"entropy": 0.16202539429068566,
"epoch": 0.10804321728691477,
"frac_reward_zero_std": 0.10000000223517418,
"grad_norm": 0.03904345631599426,
"learning_rate": 5.32934131736527e-06,
"loss": -0.0041,
"num_tokens": 9849890.0,
"reward": -0.3628114402294159,
"reward_std": 0.2544385172426701,
"rewards/grpo_reward_function/mean": -0.3628114327788353,
"rewards/grpo_reward_function/std": 0.6255016416311264,
"sampling/importance_sampling_ratio/max": 1.7914996325969696,
"sampling/importance_sampling_ratio/mean": 0.3064177379012108,
"sampling/importance_sampling_ratio/min": 1.5591655392199753e-05,
"sampling/sampling_logp_difference/max": 2.840235471725464,
"sampling/sampling_logp_difference/mean": 0.01814730800688267,
"step": 90,
"step_time": 555.9057860235683
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.010416666977107525,
"completions/max_length": 2567.4,
"completions/max_terminated_length": 2092.1,
"completions/mean_length": 548.7250213623047,
"completions/mean_terminated_length": 511.10838317871094,
"completions/min_length": 116.3,
"completions/min_terminated_length": 116.3,
"entropy": 0.16799122765660285,
"epoch": 0.12004801920768307,
"frac_reward_zero_std": 0.10000000223517418,
"grad_norm": 0.07849112898111343,
"learning_rate": 5.928143712574851e-06,
"loss": -0.0009,
"num_tokens": 10951862.0,
"reward": -0.37651871144771576,
"reward_std": 0.17827629819512367,
"rewards/grpo_reward_function/mean": -0.3765186980366707,
"rewards/grpo_reward_function/std": 0.4988637834787369,
"sampling/importance_sampling_ratio/max": 2.2177215456962585,
"sampling/importance_sampling_ratio/mean": 0.3616850808262825,
"sampling/importance_sampling_ratio/min": 0.0,
"sampling/sampling_logp_difference/max": 2.7038833022117617,
"sampling/sampling_logp_difference/mean": 0.019389390759170056,
"step": 100,
"step_time": 539.0174913492053
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.012500000186264515,
"completions/max_length": 2678.3,
"completions/max_terminated_length": 1888.8,
"completions/mean_length": 535.3271057128907,
"completions/mean_terminated_length": 491.43089904785154,
"completions/min_length": 127.5,
"completions/min_terminated_length": 127.5,
"entropy": 0.1744688918814063,
"epoch": 0.13205282112845138,
"frac_reward_zero_std": 0.0833333358168602,
"grad_norm": 0.043163955211639404,
"learning_rate": 6.526946107784432e-06,
"loss": -0.0173,
"num_tokens": 12060171.0,
"reward": -0.2666824638843536,
"reward_std": 0.12587157338857652,
"rewards/grpo_reward_function/mean": -0.26668245121836665,
"rewards/grpo_reward_function/std": 0.40698017328977587,
"sampling/importance_sampling_ratio/max": 1.899654006958008,
"sampling/importance_sampling_ratio/mean": 0.37773958817124365,
"sampling/importance_sampling_ratio/min": 3.46376573256979e-14,
"sampling/sampling_logp_difference/max": 2.272650396823883,
"sampling/sampling_logp_difference/mean": 0.01878545032814145,
"step": 110,
"step_time": 554.1334780954755
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01666666716337204,
"completions/max_length": 3077.6,
"completions/max_terminated_length": 1915.6,
"completions/mean_length": 565.0541870117188,
"completions/mean_terminated_length": 504.5126983642578,
"completions/min_length": 138.5,
"completions/min_terminated_length": 138.5,
"entropy": 0.17697170842438936,
"epoch": 0.14405762304921968,
"frac_reward_zero_std": 0.0833333358168602,
"grad_norm": 0.07391675561666489,
"learning_rate": 7.125748502994012e-06,
"loss": 0.042,
"num_tokens": 13168921.0,
"reward": -0.3262443482875824,
"reward_std": 0.20909521877765655,
"rewards/grpo_reward_function/mean": -0.3262443423271179,
"rewards/grpo_reward_function/std": 0.5003126785159111,
"sampling/importance_sampling_ratio/max": 2.301289737224579,
"sampling/importance_sampling_ratio/mean": 0.38998747766017916,
"sampling/importance_sampling_ratio/min": 7.411608444201079e-05,
"sampling/sampling_logp_difference/max": 2.7557363152503966,
"sampling/sampling_logp_difference/mean": 0.01885297931730747,
"step": 120,
"step_time": 549.9578140962869
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00416666679084301,
"completions/max_length": 2290.1,
"completions/max_terminated_length": 1886.2,
"completions/mean_length": 499.789599609375,
"completions/mean_terminated_length": 484.77695617675784,
"completions/min_length": 127.0,
"completions/min_terminated_length": 127.0,
"entropy": 0.19473983831703662,
"epoch": 0.15606242496998798,
"frac_reward_zero_std": 0.03333333432674408,
"grad_norm": 0.06443756073713303,
"learning_rate": 7.724550898203594e-06,
"loss": -0.0203,
"num_tokens": 14212700.0,
"reward": -0.26288305670022966,
"reward_std": 0.1738448791205883,
"rewards/grpo_reward_function/mean": -0.26288305073976515,
"rewards/grpo_reward_function/std": 0.48268924951553344,
"sampling/importance_sampling_ratio/max": 2.1470563650131225,
"sampling/importance_sampling_ratio/mean": 0.35831653475761416,
"sampling/importance_sampling_ratio/min": 1.968140890369341e-05,
"sampling/sampling_logp_difference/max": 2.0650948524475097,
"sampling/sampling_logp_difference/mean": 0.01968124657869339,
"step": 130,
"step_time": 535.0768479405903
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01250000037252903,
"completions/max_length": 3015.9,
"completions/max_terminated_length": 1749.5,
"completions/mean_length": 492.37709045410156,
"completions/mean_terminated_length": 446.93010864257815,
"completions/min_length": 114.7,
"completions/min_terminated_length": 114.7,
"entropy": 0.21128173358738422,
"epoch": 0.16806722689075632,
"frac_reward_zero_std": 0.05833333432674408,
"grad_norm": 0.09337731450796127,
"learning_rate": 8.323353293413174e-06,
"loss": 0.0277,
"num_tokens": 15293077.0,
"reward": -0.26287811398506167,
"reward_std": 0.14675465896725653,
"rewards/grpo_reward_function/mean": -0.26287810802459716,
"rewards/grpo_reward_function/std": 0.37649901360273363,
"sampling/importance_sampling_ratio/max": 2.3814776659011843,
"sampling/importance_sampling_ratio/mean": 0.46577124297618866,
"sampling/importance_sampling_ratio/min": 1.3427511260960534e-06,
"sampling/sampling_logp_difference/max": 2.155888545513153,
"sampling/sampling_logp_difference/mean": 0.019245322328060865,
"step": 140,
"step_time": 552.9976656335406
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 4.9971032422035935e-05,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 4.9971032422035935e-05,
"completions/clipped_ratio": 0.01041666679084301,
"completions/max_length": 2447.6,
"completions/max_terminated_length": 1785.5,
"completions/mean_length": 552.295849609375,
"completions/mean_terminated_length": 515.5673645019531,
"completions/min_length": 144.7,
"completions/min_terminated_length": 144.7,
"entropy": 0.2773955374956131,
"epoch": 0.18007202881152462,
"frac_reward_zero_std": 0.02500000074505806,
"grad_norm": 0.1007687970995903,
"learning_rate": 8.922155688622756e-06,
"loss": -0.0141,
"num_tokens": 16425531.0,
"reward": -0.26935882605612277,
"reward_std": 0.12829533144831656,
"rewards/grpo_reward_function/mean": -0.2693588202819228,
"rewards/grpo_reward_function/std": 0.33063299730420115,
"sampling/importance_sampling_ratio/max": 2.1382891178131103,
"sampling/importance_sampling_ratio/mean": 0.4445547193288803,
"sampling/importance_sampling_ratio/min": 7.412413807410812e-05,
"sampling/sampling_logp_difference/max": 1.7894923090934753,
"sampling/sampling_logp_difference/mean": 0.019091704115271568,
"step": 150,
"step_time": 547.844954107888
},
{
"clip_ratio/high_max": 4.80769231216982e-05,
"clip_ratio/high_mean": 8.012820762814954e-06,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 8.012820762814954e-06,
"completions/clipped_ratio": 0.010416666977107525,
"completions/max_length": 2388.9,
"completions/max_terminated_length": 1982.4,
"completions/mean_length": 544.4896026611328,
"completions/mean_terminated_length": 507.44931945800784,
"completions/min_length": 122.8,
"completions/min_terminated_length": 122.8,
"entropy": 0.3212181769311428,
"epoch": 0.19207683073229292,
"frac_reward_zero_std": 0.06666666865348816,
"grad_norm": 0.06833557039499283,
"learning_rate": 9.520958083832336e-06,
"loss": 0.0134,
"num_tokens": 17564686.0,
"reward": -0.28017824441194533,
"reward_std": 0.20728585943579675,
"rewards/grpo_reward_function/mean": -0.28017824441194533,
"rewards/grpo_reward_function/std": 0.5502792000770569,
"sampling/importance_sampling_ratio/max": 2.1408735513687134,
"sampling/importance_sampling_ratio/mean": 0.5016659319400787,
"sampling/importance_sampling_ratio/min": 0.000778414961314411,
"sampling/sampling_logp_difference/max": 1.89249027967453,
"sampling/sampling_logp_difference/mean": 0.01902961954474449,
"step": 160,
"step_time": 536.2867619435303
}
],
"logging_steps": 10,
"max_steps": 833,
"num_input_tokens_seen": 18381921,
"num_train_epochs": 1,
"save_steps": 167,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}