AutoDecompiler-1.3B-e2e / trainer_state.json
AutoDecompiler's picture
Upload 16 files
361b2bd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8076923076923077,
"eval_steps": 500,
"global_step": 84,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0229166672565043,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3560.9,
"completions/mean_length": 522.7101684570313,
"completions/mean_terminated_length": 438.9754211425781,
"completions/min_length": 75.0,
"completions/min_terminated_length": 75.0,
"entropy": 0.23113284446299076,
"epoch": 0.09615384615384616,
"frac_reward_zero_std": 0.02291666707023978,
"grad_norm": 0.0001292548916491907,
"learning_rate": 4.2857142857142855e-06,
"loss": -0.0001,
"num_tokens": 7139751.0,
"reward": -0.3463470071554184,
"reward_std": 0.19510238319635392,
"rewards/grpo_reward_function/mean": -0.34634698629379274,
"rewards/grpo_reward_function/std": 0.45176688134670256,
"sampling/importance_sampling_ratio/max": 0.2500600881874561,
"sampling/importance_sampling_ratio/mean": 0.003153010329697281,
"sampling/importance_sampling_ratio/min": 4.422263656305887e-39,
"sampling/sampling_logp_difference/max": 1.7915182709693909,
"sampling/sampling_logp_difference/mean": 0.04889127053320408,
"step": 10,
"step_time": 190.44577815867962
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.023697917722165583,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3451.8,
"completions/mean_length": 534.3229278564453,
"completions/mean_terminated_length": 448.0358642578125,
"completions/min_length": 88.7,
"completions/min_terminated_length": 88.7,
"entropy": 0.2307925283908844,
"epoch": 0.19230769230769232,
"frac_reward_zero_std": 0.030208333767950533,
"grad_norm": 7.460668293122211e-05,
"learning_rate": 9.047619047619049e-06,
"loss": -0.0002,
"num_tokens": 14396583.0,
"reward": -0.37422587275505065,
"reward_std": 0.20355914533138275,
"rewards/grpo_reward_function/mean": -0.3742258608341217,
"rewards/grpo_reward_function/std": 0.44767938256263734,
"sampling/importance_sampling_ratio/max": 0.19214814556762577,
"sampling/importance_sampling_ratio/mean": 0.0025246856122976167,
"sampling/importance_sampling_ratio/min": 5.423524047837388e-32,
"sampling/sampling_logp_difference/max": 1.7129023194313049,
"sampling/sampling_logp_difference/mean": 0.04906127564609051,
"step": 20,
"step_time": 190.18596366122364
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.021354167256504297,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3362.2,
"completions/mean_length": 523.9229278564453,
"completions/mean_terminated_length": 446.04356384277344,
"completions/min_length": 94.6,
"completions/min_terminated_length": 94.6,
"entropy": 0.22211343944072723,
"epoch": 0.28846153846153844,
"frac_reward_zero_std": 0.027083334140479566,
"grad_norm": 0.00013486736422447032,
"learning_rate": 9.772520313857777e-06,
"loss": -0.0001,
"num_tokens": 21575715.0,
"reward": -0.3505605816841125,
"reward_std": 0.18663191050291061,
"rewards/grpo_reward_function/mean": -0.35056057274341584,
"rewards/grpo_reward_function/std": 0.43719949424266813,
"sampling/importance_sampling_ratio/max": 0.27146717831492423,
"sampling/importance_sampling_ratio/mean": 0.0029611586302053182,
"sampling/importance_sampling_ratio/min": 3.801722733713229e-43,
"sampling/sampling_logp_difference/max": 1.808135986328125,
"sampling/sampling_logp_difference/mean": 0.04732150360941887,
"step": 30,
"step_time": 188.15189309306442
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01718750074505806,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3561.5,
"completions/mean_length": 516.6395965576172,
"completions/mean_terminated_length": 454.0846740722656,
"completions/min_length": 95.4,
"completions/min_terminated_length": 95.4,
"entropy": 0.23415493555366992,
"epoch": 0.38461538461538464,
"frac_reward_zero_std": 0.03125000027939677,
"grad_norm": 0.000136481436864881,
"learning_rate": 8.883744025880429e-06,
"loss": -0.0001,
"num_tokens": 28670267.0,
"reward": -0.3659424602985382,
"reward_std": 0.17396533936262132,
"rewards/grpo_reward_function/mean": -0.36594244837760925,
"rewards/grpo_reward_function/std": 0.4192140996456146,
"sampling/importance_sampling_ratio/max": 0.15043866001069545,
"sampling/importance_sampling_ratio/mean": 0.0019588641240261494,
"sampling/importance_sampling_ratio/min": 4.154415544199142e-41,
"sampling/sampling_logp_difference/max": 1.778145396709442,
"sampling/sampling_logp_difference/mean": 0.050116677582263944,
"step": 40,
"step_time": 187.47059447690845
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02057291716337204,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3445.9,
"completions/mean_length": 533.0474151611328,
"completions/mean_terminated_length": 458.3273406982422,
"completions/min_length": 89.6,
"completions/min_terminated_length": 89.6,
"entropy": 0.21524659655988215,
"epoch": 0.4807692307692308,
"frac_reward_zero_std": 0.03437500102445483,
"grad_norm": 0.0005527222513477307,
"learning_rate": 7.445169960349167e-06,
"loss": 0.0001,
"num_tokens": 35851893.0,
"reward": -0.3463290870189667,
"reward_std": 0.19029797241091728,
"rewards/grpo_reward_function/mean": -0.34632907509803773,
"rewards/grpo_reward_function/std": 0.468786346912384,
"sampling/importance_sampling_ratio/max": 0.3124311394989491,
"sampling/importance_sampling_ratio/mean": 0.003457725653424859,
"sampling/importance_sampling_ratio/min": 6.522424396341872e-34,
"sampling/sampling_logp_difference/max": 1.7508255124092102,
"sampling/sampling_logp_difference/mean": 0.04619445130228996,
"step": 50,
"step_time": 187.6461409341544
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.019531250465661287,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3405.2,
"completions/mean_length": 567.5502746582031,
"completions/mean_terminated_length": 497.2075164794922,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"entropy": 0.21499812975525856,
"epoch": 0.5769230769230769,
"frac_reward_zero_std": 0.023958333674818276,
"grad_norm": 0.0002768893576569048,
"learning_rate": 5.660448208208513e-06,
"loss": -0.0,
"num_tokens": 43292650.0,
"reward": -0.3658351093530655,
"reward_std": 0.2153971642255783,
"rewards/grpo_reward_function/mean": -0.36583509147167204,
"rewards/grpo_reward_function/std": 0.46224400103092195,
"sampling/importance_sampling_ratio/max": 0.2361154653131962,
"sampling/importance_sampling_ratio/mean": 0.0022639297298155726,
"sampling/importance_sampling_ratio/min": 3.015787716454037e-38,
"sampling/sampling_logp_difference/max": 1.769980216026306,
"sampling/sampling_logp_difference/mean": 0.04610508680343628,
"step": 60,
"step_time": 187.66385944783687
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.027604167629033328,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3738.3,
"completions/mean_length": 598.5659057617188,
"completions/mean_terminated_length": 499.3739959716797,
"completions/min_length": 111.5,
"completions/min_terminated_length": 111.5,
"entropy": 0.21177774332463742,
"epoch": 0.6730769230769231,
"frac_reward_zero_std": 0.025000000931322576,
"grad_norm": 7.337733368660454e-05,
"learning_rate": 3.782230861445041e-06,
"loss": 0.0,
"num_tokens": 50694323.0,
"reward": -0.4116409093141556,
"reward_std": 0.22881191819906235,
"rewards/grpo_reward_function/mean": -0.4116408973932266,
"rewards/grpo_reward_function/std": 0.4689377576112747,
"sampling/importance_sampling_ratio/max": 0.12385215684771538,
"sampling/importance_sampling_ratio/mean": 0.0014503307553241029,
"sampling/importance_sampling_ratio/min": 5.254869241218064e-41,
"sampling/sampling_logp_difference/max": 1.805948269367218,
"sampling/sampling_logp_difference/mean": 0.04531049989163875,
"step": 70,
"step_time": 189.22959316521883
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02161458395421505,
"completions/max_length": 4096.0,
"completions/max_terminated_length": 3813.9,
"completions/mean_length": 575.7130401611328,
"completions/mean_terminated_length": 497.99981689453125,
"completions/min_length": 90.0,
"completions/min_terminated_length": 90.0,
"entropy": 0.211932235956192,
"epoch": 0.7692307692307693,
"frac_reward_zero_std": 0.03229166744276881,
"grad_norm": 6.826014804673302e-05,
"learning_rate": 2.0764056088797646e-06,
"loss": -0.0001,
"num_tokens": 58011305.0,
"reward": -0.3900724709033966,
"reward_std": 0.21758214086294175,
"rewards/grpo_reward_function/mean": -0.3900724589824677,
"rewards/grpo_reward_function/std": 0.4689822793006897,
"sampling/importance_sampling_ratio/max": 0.2649656251072884,
"sampling/importance_sampling_ratio/mean": 0.0030477925203740595,
"sampling/importance_sampling_ratio/min": 1.6902078646393643e-36,
"sampling/sampling_logp_difference/max": 1.7864648818969726,
"sampling/sampling_logp_difference/mean": 0.04543661251664162,
"step": 80,
"step_time": 187.76453976780175
}
],
"logging_steps": 10,
"max_steps": 104,
"num_input_tokens_seen": 60954036,
"num_train_epochs": 1,
"save_steps": 21,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}