| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 200, |
| "global_step": 878, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04555808656036447, |
| "grad_norm": 14.260233879089355, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.6404, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09111617312072894, |
| "grad_norm": 13.856605529785156, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.6072, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1366742596810934, |
| "grad_norm": 10.69395637512207, |
| "learning_rate": 1.5e-06, |
| "loss": 0.4863, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18223234624145787, |
| "grad_norm": 6.665396213531494, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.3266, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22779043280182232, |
| "grad_norm": 2.674818754196167, |
| "learning_rate": 2.5e-06, |
| "loss": 0.1972, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2733485193621868, |
| "grad_norm": 0.46903184056282043, |
| "learning_rate": 3e-06, |
| "loss": 0.0999, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.31890660592255127, |
| "grad_norm": 0.6415174603462219, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0698, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36446469248291574, |
| "grad_norm": 0.3701508343219757, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0656, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.41002277904328016, |
| "grad_norm": 0.4025450348854065, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0574, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.45558086560364464, |
| "grad_norm": 0.5695033073425293, |
| "learning_rate": 5e-06, |
| "loss": 0.0562, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.45558086560364464, |
| "eval_accuracy": 0.8391241361293846, |
| "eval_f1": 0.8391241361293846, |
| "eval_f1_marco": 0.8249262659790968, |
| "eval_loss": 0.056919749826192856, |
| "eval_negative_f1": 0.8747828015823136, |
| "eval_positive_f1": 0.7750697303758799, |
| "eval_precision": 0.8391241361293846, |
| "eval_recall": 0.8391241361293846, |
| "eval_runtime": 9.3166, |
| "eval_samples_per_second": 79.106, |
| "eval_steps_per_second": 1.288, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5011389521640092, |
| "grad_norm": 0.527057945728302, |
| "learning_rate": 4.852507374631269e-06, |
| "loss": 0.0622, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5466970387243736, |
| "grad_norm": 0.47804608941078186, |
| "learning_rate": 4.705014749262537e-06, |
| "loss": 0.0527, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.592255125284738, |
| "grad_norm": 0.3202660381793976, |
| "learning_rate": 4.557522123893805e-06, |
| "loss": 0.0555, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6378132118451025, |
| "grad_norm": 0.40933147072792053, |
| "learning_rate": 4.410029498525074e-06, |
| "loss": 0.0563, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.683371298405467, |
| "grad_norm": 0.4464198648929596, |
| "learning_rate": 4.2625368731563425e-06, |
| "loss": 0.054, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7289293849658315, |
| "grad_norm": 0.5946183204650879, |
| "learning_rate": 4.115044247787611e-06, |
| "loss": 0.0543, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7744874715261959, |
| "grad_norm": 0.8823751211166382, |
| "learning_rate": 3.967551622418879e-06, |
| "loss": 0.0552, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8200455580865603, |
| "grad_norm": 0.29086050391197205, |
| "learning_rate": 3.820058997050148e-06, |
| "loss": 0.0556, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8656036446469249, |
| "grad_norm": 0.36109957098960876, |
| "learning_rate": 3.6725663716814163e-06, |
| "loss": 0.0547, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "grad_norm": 0.357105553150177, |
| "learning_rate": 3.5250737463126845e-06, |
| "loss": 0.054, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "eval_accuracy": 0.857600873963949, |
| "eval_f1": 0.857600873963949, |
| "eval_f1_marco": 0.8454311469742184, |
| "eval_loss": 0.05031890422105789, |
| "eval_negative_f1": 0.8888023441267016, |
| "eval_positive_f1": 0.8020599498217351, |
| "eval_precision": 0.857600873963949, |
| "eval_recall": 0.857600873963949, |
| "eval_runtime": 9.081, |
| "eval_samples_per_second": 81.158, |
| "eval_steps_per_second": 1.321, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9567198177676538, |
| "grad_norm": 0.3978097438812256, |
| "learning_rate": 3.3775811209439528e-06, |
| "loss": 0.0513, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0022779043280183, |
| "grad_norm": 0.327373206615448, |
| "learning_rate": 3.2300884955752214e-06, |
| "loss": 0.0527, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.0478359908883828, |
| "grad_norm": 0.39979490637779236, |
| "learning_rate": 3.08259587020649e-06, |
| "loss": 0.0474, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0933940774487472, |
| "grad_norm": 0.37922340631484985, |
| "learning_rate": 2.935103244837758e-06, |
| "loss": 0.0501, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.1389521640091116, |
| "grad_norm": 0.4099065363407135, |
| "learning_rate": 2.7876106194690266e-06, |
| "loss": 0.0461, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.184510250569476, |
| "grad_norm": 0.3328123390674591, |
| "learning_rate": 2.6401179941002952e-06, |
| "loss": 0.048, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.2300683371298406, |
| "grad_norm": 0.647693932056427, |
| "learning_rate": 2.4926253687315635e-06, |
| "loss": 0.0495, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.275626423690205, |
| "grad_norm": 0.6742229461669922, |
| "learning_rate": 2.345132743362832e-06, |
| "loss": 0.0496, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.3211845102505695, |
| "grad_norm": 0.2932397425174713, |
| "learning_rate": 2.1976401179941004e-06, |
| "loss": 0.0479, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.366742596810934, |
| "grad_norm": 0.32655268907546997, |
| "learning_rate": 2.050147492625369e-06, |
| "loss": 0.0472, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.366742596810934, |
| "eval_accuracy": 0.8680504429192296, |
| "eval_f1": 0.8680504429192296, |
| "eval_f1_marco": 0.853667569896885, |
| "eval_loss": 0.04778573289513588, |
| "eval_negative_f1": 0.8995443697114341, |
| "eval_positive_f1": 0.8077907700823358, |
| "eval_precision": 0.8680504429192296, |
| "eval_recall": 0.8680504429192296, |
| "eval_runtime": 9.6373, |
| "eval_samples_per_second": 76.473, |
| "eval_steps_per_second": 1.245, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4123006833712983, |
| "grad_norm": 0.3308158814907074, |
| "learning_rate": 1.9026548672566373e-06, |
| "loss": 0.0455, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4578587699316627, |
| "grad_norm": 0.4326237738132477, |
| "learning_rate": 1.7551622418879058e-06, |
| "loss": 0.0476, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.5034168564920274, |
| "grad_norm": 0.9873289465904236, |
| "learning_rate": 1.607669616519174e-06, |
| "loss": 0.0469, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5489749430523918, |
| "grad_norm": 0.4288870096206665, |
| "learning_rate": 1.4601769911504427e-06, |
| "loss": 0.0459, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5945330296127562, |
| "grad_norm": 0.4720146358013153, |
| "learning_rate": 1.312684365781711e-06, |
| "loss": 0.0494, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.6400911161731209, |
| "grad_norm": 0.6934795379638672, |
| "learning_rate": 1.1651917404129796e-06, |
| "loss": 0.05, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6856492027334853, |
| "grad_norm": 0.3552420735359192, |
| "learning_rate": 1.017699115044248e-06, |
| "loss": 0.0499, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.7312072892938497, |
| "grad_norm": 0.32909882068634033, |
| "learning_rate": 8.702064896755164e-07, |
| "loss": 0.0485, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.7767653758542141, |
| "grad_norm": 0.2789745628833771, |
| "learning_rate": 7.227138643067848e-07, |
| "loss": 0.0468, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.8223234624145785, |
| "grad_norm": 0.3232771158218384, |
| "learning_rate": 5.752212389380532e-07, |
| "loss": 0.0481, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8223234624145785, |
| "eval_accuracy": 0.8677417056546417, |
| "eval_f1": 0.8677417056546417, |
| "eval_f1_marco": 0.8536354633333805, |
| "eval_loss": 0.04745788872241974, |
| "eval_negative_f1": 0.8990739230504359, |
| "eval_positive_f1": 0.8081970036163251, |
| "eval_precision": 0.8677417056546417, |
| "eval_recall": 0.8677417056546417, |
| "eval_runtime": 8.8278, |
| "eval_samples_per_second": 83.487, |
| "eval_steps_per_second": 1.359, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.867881548974943, |
| "grad_norm": 0.4403178095817566, |
| "learning_rate": 4.277286135693216e-07, |
| "loss": 0.0502, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.9134396355353074, |
| "grad_norm": 0.4974011182785034, |
| "learning_rate": 2.8023598820059e-07, |
| "loss": 0.0453, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.958997722095672, |
| "grad_norm": 0.3315702974796295, |
| "learning_rate": 1.327433628318584e-07, |
| "loss": 0.0494, |
| "step": 860 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 878, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "total_flos": 1.952805421392077e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|