AutoDecompiler-6.7B-pscode / trainer_state.json
AutoDecompiler's picture
Upload 23 files
e844d4d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.40384615384615385,
"eval_steps": 500,
"global_step": 126,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2993.0,
"completions/mean_length": 842.1671875,
"completions/mean_terminated_length": 625.3808288574219,
"completions/min_length": 143.7,
"completions/min_terminated_length": 143.7,
"entropy": 0.18502369821071624,
"epoch": 0.03205128205128205,
"frac_reward_zero_std": 0.046875,
"grad_norm": 0.00025154745623815123,
"learning_rate": 1.4285714285714286e-06,
"loss": -0.0002,
"num_tokens": 2443746.0,
"reward": -0.22845993265509607,
"reward_std": 0.2702211543917656,
"rewards/grpo_reward_function/mean": -0.2284599334001541,
"rewards/grpo_reward_function/std": 0.7218511283397675,
"sampling/importance_sampling_ratio/max": 0.12355739073827862,
"sampling/importance_sampling_ratio/mean": 0.0029692337644519284,
"sampling/importance_sampling_ratio/min": 5.253219159649809e-30,
"sampling/sampling_logp_difference/max": 1.5544446587562561,
"sampling/sampling_logp_difference/mean": 0.03719679862260818,
"step": 10,
"step_time": 152.91457959786058
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05625,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2889.4,
"completions/mean_length": 863.446875,
"completions/mean_terminated_length": 671.0787475585937,
"completions/min_length": 164.7,
"completions/min_terminated_length": 164.7,
"entropy": 0.1817332101985812,
"epoch": 0.0641025641025641,
"frac_reward_zero_std": 0.05,
"grad_norm": 1.9538725158444555e-05,
"learning_rate": 3.015873015873016e-06,
"loss": -0.0001,
"num_tokens": 4916654.0,
"reward": -0.18899608142673968,
"reward_std": 0.3027258589863777,
"rewards/grpo_reward_function/mean": -0.18899608589708805,
"rewards/grpo_reward_function/std": 0.7454370498657227,
"sampling/importance_sampling_ratio/max": 0.0923810960026458,
"sampling/importance_sampling_ratio/mean": 0.0023226177279866535,
"sampling/importance_sampling_ratio/min": 6.460355799993298e-36,
"sampling/sampling_logp_difference/max": 1.6125647306442261,
"sampling/sampling_logp_difference/mean": 0.03694682139903307,
"step": 20,
"step_time": 159.88802388124168
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2620.1,
"completions/mean_length": 832.69296875,
"completions/mean_terminated_length": 672.2784484863281,
"completions/min_length": 167.4,
"completions/min_terminated_length": 167.4,
"entropy": 0.19116276763379575,
"epoch": 0.09615384615384616,
"frac_reward_zero_std": 0.03125,
"grad_norm": 0.00031796028415218583,
"learning_rate": 4.603174603174604e-06,
"loss": -0.0005,
"num_tokens": 7342561.0,
"reward": -0.20435776934027672,
"reward_std": 0.2867868050932884,
"rewards/grpo_reward_function/mean": -0.20435777083039283,
"rewards/grpo_reward_function/std": 0.7296508550643921,
"sampling/importance_sampling_ratio/max": 0.06920737095642834,
"sampling/importance_sampling_ratio/mean": 0.0011224002329981886,
"sampling/importance_sampling_ratio/min": 5.1338851255929605e-34,
"sampling/sampling_logp_difference/max": 1.7805282711982726,
"sampling/sampling_logp_difference/mean": 0.03969721123576164,
"step": 30,
"step_time": 153.14357568509877
},
{
"clip_ratio/high_max": 5.1770552090602e-06,
"clip_ratio/high_mean": 6.47131901132525e-07,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 6.47131901132525e-07,
"completions/clipped_ratio": 0.075,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2996.0,
"completions/mean_length": 908.88203125,
"completions/mean_terminated_length": 650.461572265625,
"completions/min_length": 172.5,
"completions/min_terminated_length": 172.5,
"entropy": 0.18940101601183415,
"epoch": 0.1282051282051282,
"frac_reward_zero_std": 0.0375,
"grad_norm": 0.00046653815964065254,
"learning_rate": 6.1904761904761914e-06,
"loss": -0.0007,
"num_tokens": 9897290.0,
"reward": -0.20948485173285009,
"reward_std": 0.3274365648627281,
"rewards/grpo_reward_function/mean": -0.20948485508561135,
"rewards/grpo_reward_function/std": 0.7691417872905731,
"sampling/importance_sampling_ratio/max": 0.12555439178831876,
"sampling/importance_sampling_ratio/mean": 0.0029983414759044537,
"sampling/importance_sampling_ratio/min": 7.312246668686746e-35,
"sampling/sampling_logp_difference/max": 1.8727038741111754,
"sampling/sampling_logp_difference/mean": 0.03728015031665564,
"step": 40,
"step_time": 146.35395600683987
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0515625,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2913.9,
"completions/mean_length": 806.88984375,
"completions/mean_terminated_length": 627.2792419433594,
"completions/min_length": 123.4,
"completions/min_terminated_length": 123.4,
"entropy": 0.17744111455976963,
"epoch": 0.16025641025641027,
"frac_reward_zero_std": 0.05625,
"grad_norm": 9.61458607191603e-05,
"learning_rate": 7.77777777777778e-06,
"loss": 0.0003,
"num_tokens": 12302889.0,
"reward": -0.18009744100272657,
"reward_std": 0.28012245148420334,
"rewards/grpo_reward_function/mean": -0.18009743914008142,
"rewards/grpo_reward_function/std": 0.7488289535045624,
"sampling/importance_sampling_ratio/max": 0.17218044362962245,
"sampling/importance_sampling_ratio/mean": 0.004992885573301464,
"sampling/importance_sampling_ratio/min": 1.0306885153000913e-30,
"sampling/sampling_logp_difference/max": 1.6258580565452576,
"sampling/sampling_logp_difference/mean": 0.03607845064252615,
"step": 50,
"step_time": 147.80184614248574
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03984375,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2814.3,
"completions/mean_length": 809.65078125,
"completions/mean_terminated_length": 673.7666534423828,
"completions/min_length": 141.0,
"completions/min_terminated_length": 141.0,
"entropy": 0.18477382734417916,
"epoch": 0.19230769230769232,
"frac_reward_zero_std": 0.034375,
"grad_norm": 0.0005422528630453429,
"learning_rate": 9.365079365079366e-06,
"loss": -0.0002,
"num_tokens": 14762678.0,
"reward": -0.18410459933802487,
"reward_std": 0.3047805741429329,
"rewards/grpo_reward_function/mean": -0.18410460744053125,
"rewards/grpo_reward_function/std": 0.7665571451187134,
"sampling/importance_sampling_ratio/max": 0.10517687269020826,
"sampling/importance_sampling_ratio/mean": 0.0030420748858887236,
"sampling/importance_sampling_ratio/min": 3.9674455768173534e-27,
"sampling/sampling_logp_difference/max": 1.571552813053131,
"sampling/sampling_logp_difference/mean": 0.03778362385928631,
"step": 60,
"step_time": 144.8840892329812
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.059375,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2730.5,
"completions/mean_length": 846.08828125,
"completions/mean_terminated_length": 641.0483703613281,
"completions/min_length": 134.2,
"completions/min_terminated_length": 134.2,
"entropy": 0.18578412756323814,
"epoch": 0.22435897435897437,
"frac_reward_zero_std": 0.03125,
"grad_norm": 0.00012190250477276098,
"learning_rate": 9.985680226398261e-06,
"loss": -0.0002,
"num_tokens": 17196835.0,
"reward": -0.23204001784324646,
"reward_std": 0.3367327839136124,
"rewards/grpo_reward_function/mean": -0.23204001784324646,
"rewards/grpo_reward_function/std": 0.7099093854427337,
"sampling/importance_sampling_ratio/max": 0.04683403689414263,
"sampling/importance_sampling_ratio/mean": 0.0009577752702170983,
"sampling/importance_sampling_ratio/min": 1.4199457666368224e-36,
"sampling/sampling_logp_difference/max": 1.5709129929542542,
"sampling/sampling_logp_difference/mean": 0.037703079357743266,
"step": 70,
"step_time": 156.71906763464213
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0421875,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2464.4,
"completions/mean_length": 817.75546875,
"completions/mean_terminated_length": 674.2539978027344,
"completions/min_length": 149.3,
"completions/min_terminated_length": 149.3,
"entropy": 0.18018180541694165,
"epoch": 0.2564102564102564,
"frac_reward_zero_std": 0.05,
"grad_norm": 5.762501312773685e-05,
"learning_rate": 9.89846735808731e-06,
"loss": -0.0,
"num_tokens": 19646058.0,
"reward": -0.1727930422872305,
"reward_std": 0.31547281742095945,
"rewards/grpo_reward_function/mean": -0.1727930411696434,
"rewards/grpo_reward_function/std": 0.7881518006324768,
"sampling/importance_sampling_ratio/max": 0.13909520097076894,
"sampling/importance_sampling_ratio/mean": 0.004160506054176949,
"sampling/importance_sampling_ratio/min": 6.660712543136218e-25,
"sampling/sampling_logp_difference/max": 1.5812078952789306,
"sampling/sampling_logp_difference/mean": 0.03748476393520832,
"step": 80,
"step_time": 151.56825386658312
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04609375,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2902.4,
"completions/mean_length": 803.97109375,
"completions/mean_terminated_length": 644.9527221679688,
"completions/min_length": 159.1,
"completions/min_terminated_length": 159.1,
"entropy": 0.18126422837376593,
"epoch": 0.28846153846153844,
"frac_reward_zero_std": 0.071875,
"grad_norm": 1.7400093375084389e-06,
"learning_rate": 9.733381816303395e-06,
"loss": 0.0001,
"num_tokens": 22062549.0,
"reward": -0.1791908085346222,
"reward_std": 0.283918160200119,
"rewards/grpo_reward_function/mean": -0.1791908085346222,
"rewards/grpo_reward_function/std": 0.7836591958999634,
"sampling/importance_sampling_ratio/max": 0.15381892090663313,
"sampling/importance_sampling_ratio/mean": 0.004421875764819561,
"sampling/importance_sampling_ratio/min": 3.314493692446194e-30,
"sampling/sampling_logp_difference/max": 1.59914892911911,
"sampling/sampling_logp_difference/mean": 0.03814994990825653,
"step": 90,
"step_time": 143.37383015677332
},
{
"clip_ratio/high_max": 2.9168124456191437e-06,
"clip_ratio/high_mean": 3.6460155570239296e-07,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 3.6460155570239296e-07,
"completions/clipped_ratio": 0.06015625,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3118.0,
"completions/mean_length": 833.84453125,
"completions/mean_terminated_length": 625.6610107421875,
"completions/min_length": 146.9,
"completions/min_terminated_length": 146.9,
"entropy": 0.17976893596351146,
"epoch": 0.32051282051282054,
"frac_reward_zero_std": 0.053125,
"grad_norm": 1.5939340167787748e-05,
"learning_rate": 9.493048024473413e-06,
"loss": 0.0002,
"num_tokens": 24458486.0,
"reward": -0.188871893286705,
"reward_std": 0.31348400712013247,
"rewards/grpo_reward_function/mean": -0.18887189254164696,
"rewards/grpo_reward_function/std": 0.7829755127429963,
"sampling/importance_sampling_ratio/max": 0.16449878164567053,
"sampling/importance_sampling_ratio/mean": 0.005532666263025021,
"sampling/importance_sampling_ratio/min": 3.4595329788052715e-35,
"sampling/sampling_logp_difference/max": 1.61622656583786,
"sampling/sampling_logp_difference/mean": 0.036300059966742994,
"step": 100,
"step_time": 150.7187235403806
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2412.8,
"completions/mean_length": 794.35859375,
"completions/mean_terminated_length": 620.5320068359375,
"completions/min_length": 128.8,
"completions/min_terminated_length": 128.8,
"entropy": 0.1772497620433569,
"epoch": 0.3525641025641026,
"frac_reward_zero_std": 0.046875,
"grad_norm": 4.817197293953718e-06,
"learning_rate": 9.18128665415186e-06,
"loss": -0.0001,
"num_tokens": 26866049.0,
"reward": -0.22932868972420692,
"reward_std": 0.31401748955249786,
"rewards/grpo_reward_function/mean": -0.22932869493961333,
"rewards/grpo_reward_function/std": 0.7706308960914612,
"sampling/importance_sampling_ratio/max": 0.1050915487576276,
"sampling/importance_sampling_ratio/mean": 0.0028572272043675185,
"sampling/importance_sampling_ratio/min": 5.94164918536593e-23,
"sampling/sampling_logp_difference/max": 1.725569200515747,
"sampling/sampling_logp_difference/mean": 0.03586800619959831,
"step": 110,
"step_time": 141.95229457244278
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04921875,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 2462.2,
"completions/mean_length": 793.1078125,
"completions/mean_terminated_length": 622.2306030273437,
"completions/min_length": 117.5,
"completions/min_terminated_length": 117.5,
"entropy": 0.17735922262072562,
"epoch": 0.38461538461538464,
"frac_reward_zero_std": 0.053125,
"grad_norm": 9.213808267741939e-05,
"learning_rate": 8.803053886449644e-06,
"loss": -0.0001,
"num_tokens": 29246979.0,
"reward": -0.2611034892499447,
"reward_std": 0.28704351782798765,
"rewards/grpo_reward_function/mean": -0.26110349521040915,
"rewards/grpo_reward_function/std": 0.7350896656513214,
"sampling/importance_sampling_ratio/max": 0.084448746079579,
"sampling/importance_sampling_ratio/mean": 0.001967475106357597,
"sampling/importance_sampling_ratio/min": 3.0353923760337493e-31,
"sampling/sampling_logp_difference/max": 1.6334303975105287,
"sampling/sampling_logp_difference/mean": 0.03619050718843937,
"step": 120,
"step_time": 144.3923004835844
}
],
"logging_steps": 10,
"max_steps": 312,
"num_input_tokens_seen": 30685228,
"num_train_epochs": 1,
"save_steps": 63,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}