efficientrag-filter / trainer_state.json
Necent's picture
EfficientRAG bilingual (en+ru) — trained on Necent/efficientrag-*-training-data
dcd620f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 200,
"global_step": 878,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04555808656036447,
"grad_norm": 14.260233879089355,
"learning_rate": 5.000000000000001e-07,
"loss": 0.6404,
"step": 20
},
{
"epoch": 0.09111617312072894,
"grad_norm": 13.856605529785156,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.6072,
"step": 40
},
{
"epoch": 0.1366742596810934,
"grad_norm": 10.69395637512207,
"learning_rate": 1.5e-06,
"loss": 0.4863,
"step": 60
},
{
"epoch": 0.18223234624145787,
"grad_norm": 6.665396213531494,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.3266,
"step": 80
},
{
"epoch": 0.22779043280182232,
"grad_norm": 2.674818754196167,
"learning_rate": 2.5e-06,
"loss": 0.1972,
"step": 100
},
{
"epoch": 0.2733485193621868,
"grad_norm": 0.46903184056282043,
"learning_rate": 3e-06,
"loss": 0.0999,
"step": 120
},
{
"epoch": 0.31890660592255127,
"grad_norm": 0.6415174603462219,
"learning_rate": 3.5e-06,
"loss": 0.0698,
"step": 140
},
{
"epoch": 0.36446469248291574,
"grad_norm": 0.3701508343219757,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0656,
"step": 160
},
{
"epoch": 0.41002277904328016,
"grad_norm": 0.4025450348854065,
"learning_rate": 4.5e-06,
"loss": 0.0574,
"step": 180
},
{
"epoch": 0.45558086560364464,
"grad_norm": 0.5695033073425293,
"learning_rate": 5e-06,
"loss": 0.0562,
"step": 200
},
{
"epoch": 0.45558086560364464,
"eval_accuracy": 0.8391241361293846,
"eval_f1": 0.8391241361293846,
"eval_f1_marco": 0.8249262659790968,
"eval_loss": 0.056919749826192856,
"eval_negative_f1": 0.8747828015823136,
"eval_positive_f1": 0.7750697303758799,
"eval_precision": 0.8391241361293846,
"eval_recall": 0.8391241361293846,
"eval_runtime": 9.3166,
"eval_samples_per_second": 79.106,
"eval_steps_per_second": 1.288,
"step": 200
},
{
"epoch": 0.5011389521640092,
"grad_norm": 0.527057945728302,
"learning_rate": 4.852507374631269e-06,
"loss": 0.0622,
"step": 220
},
{
"epoch": 0.5466970387243736,
"grad_norm": 0.47804608941078186,
"learning_rate": 4.705014749262537e-06,
"loss": 0.0527,
"step": 240
},
{
"epoch": 0.592255125284738,
"grad_norm": 0.3202660381793976,
"learning_rate": 4.557522123893805e-06,
"loss": 0.0555,
"step": 260
},
{
"epoch": 0.6378132118451025,
"grad_norm": 0.40933147072792053,
"learning_rate": 4.410029498525074e-06,
"loss": 0.0563,
"step": 280
},
{
"epoch": 0.683371298405467,
"grad_norm": 0.4464198648929596,
"learning_rate": 4.2625368731563425e-06,
"loss": 0.054,
"step": 300
},
{
"epoch": 0.7289293849658315,
"grad_norm": 0.5946183204650879,
"learning_rate": 4.115044247787611e-06,
"loss": 0.0543,
"step": 320
},
{
"epoch": 0.7744874715261959,
"grad_norm": 0.8823751211166382,
"learning_rate": 3.967551622418879e-06,
"loss": 0.0552,
"step": 340
},
{
"epoch": 0.8200455580865603,
"grad_norm": 0.29086050391197205,
"learning_rate": 3.820058997050148e-06,
"loss": 0.0556,
"step": 360
},
{
"epoch": 0.8656036446469249,
"grad_norm": 0.36109957098960876,
"learning_rate": 3.6725663716814163e-06,
"loss": 0.0547,
"step": 380
},
{
"epoch": 0.9111617312072893,
"grad_norm": 0.357105553150177,
"learning_rate": 3.5250737463126845e-06,
"loss": 0.054,
"step": 400
},
{
"epoch": 0.9111617312072893,
"eval_accuracy": 0.857600873963949,
"eval_f1": 0.857600873963949,
"eval_f1_marco": 0.8454311469742184,
"eval_loss": 0.05031890422105789,
"eval_negative_f1": 0.8888023441267016,
"eval_positive_f1": 0.8020599498217351,
"eval_precision": 0.857600873963949,
"eval_recall": 0.857600873963949,
"eval_runtime": 9.081,
"eval_samples_per_second": 81.158,
"eval_steps_per_second": 1.321,
"step": 400
},
{
"epoch": 0.9567198177676538,
"grad_norm": 0.3978097438812256,
"learning_rate": 3.3775811209439528e-06,
"loss": 0.0513,
"step": 420
},
{
"epoch": 1.0022779043280183,
"grad_norm": 0.327373206615448,
"learning_rate": 3.2300884955752214e-06,
"loss": 0.0527,
"step": 440
},
{
"epoch": 1.0478359908883828,
"grad_norm": 0.39979490637779236,
"learning_rate": 3.08259587020649e-06,
"loss": 0.0474,
"step": 460
},
{
"epoch": 1.0933940774487472,
"grad_norm": 0.37922340631484985,
"learning_rate": 2.935103244837758e-06,
"loss": 0.0501,
"step": 480
},
{
"epoch": 1.1389521640091116,
"grad_norm": 0.4099065363407135,
"learning_rate": 2.7876106194690266e-06,
"loss": 0.0461,
"step": 500
},
{
"epoch": 1.184510250569476,
"grad_norm": 0.3328123390674591,
"learning_rate": 2.6401179941002952e-06,
"loss": 0.048,
"step": 520
},
{
"epoch": 1.2300683371298406,
"grad_norm": 0.647693932056427,
"learning_rate": 2.4926253687315635e-06,
"loss": 0.0495,
"step": 540
},
{
"epoch": 1.275626423690205,
"grad_norm": 0.6742229461669922,
"learning_rate": 2.345132743362832e-06,
"loss": 0.0496,
"step": 560
},
{
"epoch": 1.3211845102505695,
"grad_norm": 0.2932397425174713,
"learning_rate": 2.1976401179941004e-06,
"loss": 0.0479,
"step": 580
},
{
"epoch": 1.366742596810934,
"grad_norm": 0.32655268907546997,
"learning_rate": 2.050147492625369e-06,
"loss": 0.0472,
"step": 600
},
{
"epoch": 1.366742596810934,
"eval_accuracy": 0.8680504429192296,
"eval_f1": 0.8680504429192296,
"eval_f1_marco": 0.853667569896885,
"eval_loss": 0.04778573289513588,
"eval_negative_f1": 0.8995443697114341,
"eval_positive_f1": 0.8077907700823358,
"eval_precision": 0.8680504429192296,
"eval_recall": 0.8680504429192296,
"eval_runtime": 9.6373,
"eval_samples_per_second": 76.473,
"eval_steps_per_second": 1.245,
"step": 600
},
{
"epoch": 1.4123006833712983,
"grad_norm": 0.3308158814907074,
"learning_rate": 1.9026548672566373e-06,
"loss": 0.0455,
"step": 620
},
{
"epoch": 1.4578587699316627,
"grad_norm": 0.4326237738132477,
"learning_rate": 1.7551622418879058e-06,
"loss": 0.0476,
"step": 640
},
{
"epoch": 1.5034168564920274,
"grad_norm": 0.9873289465904236,
"learning_rate": 1.607669616519174e-06,
"loss": 0.0469,
"step": 660
},
{
"epoch": 1.5489749430523918,
"grad_norm": 0.4288870096206665,
"learning_rate": 1.4601769911504427e-06,
"loss": 0.0459,
"step": 680
},
{
"epoch": 1.5945330296127562,
"grad_norm": 0.4720146358013153,
"learning_rate": 1.312684365781711e-06,
"loss": 0.0494,
"step": 700
},
{
"epoch": 1.6400911161731209,
"grad_norm": 0.6934795379638672,
"learning_rate": 1.1651917404129796e-06,
"loss": 0.05,
"step": 720
},
{
"epoch": 1.6856492027334853,
"grad_norm": 0.3552420735359192,
"learning_rate": 1.017699115044248e-06,
"loss": 0.0499,
"step": 740
},
{
"epoch": 1.7312072892938497,
"grad_norm": 0.32909882068634033,
"learning_rate": 8.702064896755164e-07,
"loss": 0.0485,
"step": 760
},
{
"epoch": 1.7767653758542141,
"grad_norm": 0.2789745628833771,
"learning_rate": 7.227138643067848e-07,
"loss": 0.0468,
"step": 780
},
{
"epoch": 1.8223234624145785,
"grad_norm": 0.3232771158218384,
"learning_rate": 5.752212389380532e-07,
"loss": 0.0481,
"step": 800
},
{
"epoch": 1.8223234624145785,
"eval_accuracy": 0.8677417056546417,
"eval_f1": 0.8677417056546417,
"eval_f1_marco": 0.8536354633333805,
"eval_loss": 0.04745788872241974,
"eval_negative_f1": 0.8990739230504359,
"eval_positive_f1": 0.8081970036163251,
"eval_precision": 0.8677417056546417,
"eval_recall": 0.8677417056546417,
"eval_runtime": 8.8278,
"eval_samples_per_second": 83.487,
"eval_steps_per_second": 1.359,
"step": 800
},
{
"epoch": 1.867881548974943,
"grad_norm": 0.4403178095817566,
"learning_rate": 4.277286135693216e-07,
"loss": 0.0502,
"step": 820
},
{
"epoch": 1.9134396355353074,
"grad_norm": 0.4974011182785034,
"learning_rate": 2.8023598820059e-07,
"loss": 0.0453,
"step": 840
},
{
"epoch": 1.958997722095672,
"grad_norm": 0.3315702974796295,
"learning_rate": 1.327433628318584e-07,
"loss": 0.0494,
"step": 860
}
],
"logging_steps": 20,
"max_steps": 878,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 1.952805421392077e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}