HealthJudge / trainer_state.json
Maekami
upload model
ab56b09
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03537214443625645,
"grad_norm": 3483.533935546875,
"learning_rate": 7.692307692307694e-07,
"loss": 51.896,
"step": 3
},
{
"epoch": 0.0707442888725129,
"grad_norm": 2646.85693359375,
"learning_rate": 1.9230769230769234e-06,
"loss": 35.9263,
"step": 6
},
{
"epoch": 0.10611643330876934,
"grad_norm": 452.2362365722656,
"learning_rate": 3.0769230769230774e-06,
"loss": 6.4672,
"step": 9
},
{
"epoch": 0.1414885777450258,
"grad_norm": 224.29888916015625,
"learning_rate": 4.230769230769231e-06,
"loss": 2.9289,
"step": 12
},
{
"epoch": 0.17686072218128224,
"grad_norm": 343.0766296386719,
"learning_rate": 5.384615384615385e-06,
"loss": 2.6691,
"step": 15
},
{
"epoch": 0.21223286661753868,
"grad_norm": 241.66685485839844,
"learning_rate": 6.538461538461539e-06,
"loss": 2.4547,
"step": 18
},
{
"epoch": 0.24760501105379515,
"grad_norm": 266.5543212890625,
"learning_rate": 7.692307692307694e-06,
"loss": 2.2923,
"step": 21
},
{
"epoch": 0.2829771554900516,
"grad_norm": 200.54481506347656,
"learning_rate": 8.846153846153847e-06,
"loss": 3.5719,
"step": 24
},
{
"epoch": 0.318349299926308,
"grad_norm": 223.88656616210938,
"learning_rate": 1e-05,
"loss": 3.1235,
"step": 27
},
{
"epoch": 0.3537214443625645,
"grad_norm": 289.29449462890625,
"learning_rate": 9.999672943258572e-06,
"loss": 2.9122,
"step": 30
},
{
"epoch": 0.38909358879882094,
"grad_norm": 152.24334716796875,
"learning_rate": 9.998691815820732e-06,
"loss": 2.7438,
"step": 33
},
{
"epoch": 0.42446573323507736,
"grad_norm": 43.65974426269531,
"learning_rate": 9.997056746040215e-06,
"loss": 1.9228,
"step": 36
},
{
"epoch": 0.45983787767133383,
"grad_norm": 115.5759048461914,
"learning_rate": 9.994767947821261e-06,
"loss": 2.8869,
"step": 39
},
{
"epoch": 0.4952100221075903,
"grad_norm": 72.44747924804688,
"learning_rate": 9.991825720590627e-06,
"loss": 1.8182,
"step": 42
},
{
"epoch": 0.5305821665438467,
"grad_norm": 49.48904800415039,
"learning_rate": 9.988230449258409e-06,
"loss": 2.5766,
"step": 45
},
{
"epoch": 0.5659543109801032,
"grad_norm": 53.719970703125,
"learning_rate": 9.983982604167699e-06,
"loss": 2.5584,
"step": 48
},
{
"epoch": 0.6013264554163597,
"grad_norm": 72.06404113769531,
"learning_rate": 9.979082741033047e-06,
"loss": 2.0214,
"step": 51
},
{
"epoch": 0.636698599852616,
"grad_norm": 197.82850646972656,
"learning_rate": 9.973531500867761e-06,
"loss": 2.4626,
"step": 54
},
{
"epoch": 0.6720707442888725,
"grad_norm": 79.25627899169922,
"learning_rate": 9.96732960990005e-06,
"loss": 1.9808,
"step": 57
},
{
"epoch": 0.707442888725129,
"grad_norm": 38.53336715698242,
"learning_rate": 9.96047787947801e-06,
"loss": 2.0431,
"step": 60
},
{
"epoch": 0.7428150331613854,
"grad_norm": 106.73336029052734,
"learning_rate": 9.952977205963496e-06,
"loss": 2.3707,
"step": 63
},
{
"epoch": 0.7781871775976419,
"grad_norm": 106.87248229980469,
"learning_rate": 9.94482857061484e-06,
"loss": 1.9355,
"step": 66
},
{
"epoch": 0.8135593220338984,
"grad_norm": 30.651411056518555,
"learning_rate": 9.936033039458494e-06,
"loss": 1.9553,
"step": 69
},
{
"epoch": 0.8489314664701547,
"grad_norm": 50.57050323486328,
"learning_rate": 9.92659176314956e-06,
"loss": 1.85,
"step": 72
},
{
"epoch": 0.8843036109064112,
"grad_norm": 252.79238891601562,
"learning_rate": 9.916505976821262e-06,
"loss": 2.2428,
"step": 75
},
{
"epoch": 0.9196757553426677,
"grad_norm": 159.39630126953125,
"learning_rate": 9.905776999923369e-06,
"loss": 2.4031,
"step": 78
},
{
"epoch": 0.9550478997789241,
"grad_norm": 50.61412048339844,
"learning_rate": 9.894406236049569e-06,
"loss": 1.9067,
"step": 81
},
{
"epoch": 0.9904200442151806,
"grad_norm": 55.9256706237793,
"learning_rate": 9.882395172753852e-06,
"loss": 1.8086,
"step": 84
},
{
"epoch": 1.0,
"eval_loss": 0.05901043117046356,
"eval_runtime": 22.6695,
"eval_samples_per_second": 44.112,
"eval_steps_per_second": 22.056,
"step": 85
},
{
"epoch": 1.023581429624171,
"grad_norm": 27.12094497680664,
"learning_rate": 9.869745381355906e-06,
"loss": 1.3243,
"step": 87
},
{
"epoch": 1.0589535740604274,
"grad_norm": 37.28285217285156,
"learning_rate": 9.856458516735558e-06,
"loss": 1.1639,
"step": 90
},
{
"epoch": 1.094325718496684,
"grad_norm": 89.23674011230469,
"learning_rate": 9.842536317116262e-06,
"loss": 1.0754,
"step": 93
},
{
"epoch": 1.1296978629329404,
"grad_norm": 98.14556121826172,
"learning_rate": 9.827980603837715e-06,
"loss": 1.0517,
"step": 96
},
{
"epoch": 1.1650700073691969,
"grad_norm": 134.01821899414062,
"learning_rate": 9.81279328111758e-06,
"loss": 0.9979,
"step": 99
},
{
"epoch": 1.2004421518054533,
"grad_norm": 37.08598709106445,
"learning_rate": 9.796976335802369e-06,
"loss": 1.2897,
"step": 102
},
{
"epoch": 1.2358142962417096,
"grad_norm": 138.39120483398438,
"learning_rate": 9.780531837107519e-06,
"loss": 1.2761,
"step": 105
},
{
"epoch": 1.271186440677966,
"grad_norm": 99.29012298583984,
"learning_rate": 9.763461936346694e-06,
"loss": 1.7901,
"step": 108
},
{
"epoch": 1.3065585851142225,
"grad_norm": 117.44629669189453,
"learning_rate": 9.745768866650339e-06,
"loss": 1.2258,
"step": 111
},
{
"epoch": 1.341930729550479,
"grad_norm": 48.49049377441406,
"learning_rate": 9.727454942673544e-06,
"loss": 1.4136,
"step": 114
},
{
"epoch": 1.3773028739867355,
"grad_norm": 50.829097747802734,
"learning_rate": 9.70852256029323e-06,
"loss": 0.8894,
"step": 117
},
{
"epoch": 1.412675018422992,
"grad_norm": 113.78025817871094,
"learning_rate": 9.68897419629471e-06,
"loss": 1.6923,
"step": 120
},
{
"epoch": 1.4480471628592484,
"grad_norm": 80.82630157470703,
"learning_rate": 9.66881240804768e-06,
"loss": 1.0206,
"step": 123
},
{
"epoch": 1.4834193072955049,
"grad_norm": 162.958984375,
"learning_rate": 9.648039833171639e-06,
"loss": 1.9516,
"step": 126
},
{
"epoch": 1.518791451731761,
"grad_norm": 36.6719856262207,
"learning_rate": 9.626659189190852e-06,
"loss": 0.749,
"step": 129
},
{
"epoch": 1.5541635961680176,
"grad_norm": 94.12837219238281,
"learning_rate": 9.60467327317882e-06,
"loss": 1.639,
"step": 132
},
{
"epoch": 1.589535740604274,
"grad_norm": 190.69659423828125,
"learning_rate": 9.582084961392358e-06,
"loss": 1.209,
"step": 135
},
{
"epoch": 1.6249078850405305,
"grad_norm": 52.122528076171875,
"learning_rate": 9.55889720889533e-06,
"loss": 1.2829,
"step": 138
},
{
"epoch": 1.660280029476787,
"grad_norm": 92.39747619628906,
"learning_rate": 9.53511304917204e-06,
"loss": 1.3792,
"step": 141
},
{
"epoch": 1.6956521739130435,
"grad_norm": 174.52503967285156,
"learning_rate": 9.510735593730402e-06,
"loss": 1.9879,
"step": 144
},
{
"epoch": 1.7310243183493,
"grad_norm": 69.46823120117188,
"learning_rate": 9.485768031694872e-06,
"loss": 1.1063,
"step": 147
},
{
"epoch": 1.7663964627855564,
"grad_norm": 56.77116775512695,
"learning_rate": 9.460213629389241e-06,
"loss": 1.3528,
"step": 150
},
{
"epoch": 1.8017686072218129,
"grad_norm": 49.163902282714844,
"learning_rate": 9.43407572990933e-06,
"loss": 1.4749,
"step": 153
},
{
"epoch": 1.8371407516580693,
"grad_norm": 96.97235870361328,
"learning_rate": 9.407357752685628e-06,
"loss": 1.3238,
"step": 156
},
{
"epoch": 1.8725128960943258,
"grad_norm": 57.185482025146484,
"learning_rate": 9.380063193035968e-06,
"loss": 1.2848,
"step": 159
},
{
"epoch": 1.9078850405305823,
"grad_norm": 55.79048156738281,
"learning_rate": 9.352195621708239e-06,
"loss": 1.1498,
"step": 162
},
{
"epoch": 1.9432571849668387,
"grad_norm": 30.209585189819336,
"learning_rate": 9.323758684413272e-06,
"loss": 1.0718,
"step": 165
},
{
"epoch": 1.9786293294030952,
"grad_norm": 37.25614547729492,
"learning_rate": 9.294756101347888e-06,
"loss": 0.647,
"step": 168
},
{
"epoch": 2.0,
"eval_loss": 0.046793434768915176,
"eval_runtime": 21.4672,
"eval_samples_per_second": 46.583,
"eval_steps_per_second": 23.291,
"step": 170
}
],
"logging_steps": 3,
"max_steps": 850,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.100963244849234e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}