{ "best_global_step": 500, "best_metric": 0.8730387325792941, "best_model_checkpoint": "taskA-unixcoder-focal/checkpoint-500", "epoch": 0.030588523186100575, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006117704637220115, "grad_norm": 5.2196149826049805, "learning_rate": 9.178990311065784e-08, "loss": 0.2114, "step": 10 }, { "epoch": 0.001223540927444023, "grad_norm": 2.4952661991119385, "learning_rate": 1.937786843447221e-07, "loss": 0.179, "step": 20 }, { "epoch": 0.0018353113911660345, "grad_norm": 3.8488683700561523, "learning_rate": 2.9576746557878637e-07, "loss": 0.1949, "step": 30 }, { "epoch": 0.002447081854888046, "grad_norm": 3.7437973022460938, "learning_rate": 3.9775624681285064e-07, "loss": 0.2007, "step": 40 }, { "epoch": 0.0030588523186100575, "grad_norm": 3.8342156410217285, "learning_rate": 4.997450280469149e-07, "loss": 0.1828, "step": 50 }, { "epoch": 0.003670622782332069, "grad_norm": 3.510793685913086, "learning_rate": 6.017338092809792e-07, "loss": 0.1875, "step": 60 }, { "epoch": 0.0042823932460540805, "grad_norm": 3.223860740661621, "learning_rate": 7.037225905150434e-07, "loss": 0.1661, "step": 70 }, { "epoch": 0.004894163709776092, "grad_norm": 3.791808605194092, "learning_rate": 8.057113717491076e-07, "loss": 0.1762, "step": 80 }, { "epoch": 0.0055059341734981036, "grad_norm": 3.8649113178253174, "learning_rate": 9.077001529831719e-07, "loss": 0.1774, "step": 90 }, { "epoch": 0.006117704637220115, "grad_norm": 2.9645302295684814, "learning_rate": 1.0096889342172361e-06, "loss": 0.1741, "step": 100 }, { "epoch": 0.006729475100942127, "grad_norm": 2.9103174209594727, "learning_rate": 1.1116777154513005e-06, "loss": 0.1808, "step": 110 }, { "epoch": 0.007341245564664138, "grad_norm": 2.3954272270202637, "learning_rate": 1.2136664966853646e-06, "loss": 0.161, "step": 120 }, { "epoch": 0.00795301602838615, "grad_norm": 3.030513048171997, "learning_rate": 1.3156552779194289e-06, "loss": 0.1689, "step": 130 }, { "epoch": 0.008564786492108161, "grad_norm": 1.7593587636947632, "learning_rate": 1.4176440591534932e-06, "loss": 0.1595, "step": 140 }, { "epoch": 0.009176556955830173, "grad_norm": 1.9787609577178955, "learning_rate": 1.5196328403875573e-06, "loss": 0.1429, "step": 150 }, { "epoch": 0.009788327419552184, "grad_norm": 2.549748659133911, "learning_rate": 1.6216216216216219e-06, "loss": 0.154, "step": 160 }, { "epoch": 0.010400097883274196, "grad_norm": 2.4056687355041504, "learning_rate": 1.723610402855686e-06, "loss": 0.1582, "step": 170 }, { "epoch": 0.011011868346996207, "grad_norm": 3.3082435131073, "learning_rate": 1.8255991840897503e-06, "loss": 0.1502, "step": 180 }, { "epoch": 0.011623638810718219, "grad_norm": 3.446185350418091, "learning_rate": 1.9275879653238146e-06, "loss": 0.1452, "step": 190 }, { "epoch": 0.01223540927444023, "grad_norm": 2.6021993160247803, "learning_rate": 2.029576746557879e-06, "loss": 0.1404, "step": 200 }, { "epoch": 0.012847179738162242, "grad_norm": 2.1967263221740723, "learning_rate": 2.1315655277919432e-06, "loss": 0.1421, "step": 210 }, { "epoch": 0.013458950201884253, "grad_norm": 1.9831513166427612, "learning_rate": 2.233554309026007e-06, "loss": 0.1431, "step": 220 }, { "epoch": 0.014070720665606265, "grad_norm": 1.7445921897888184, "learning_rate": 2.3355430902600715e-06, "loss": 0.1242, "step": 230 }, { "epoch": 0.014682491129328276, "grad_norm": 2.7725071907043457, "learning_rate": 2.4375318714941358e-06, "loss": 0.1346, "step": 240 }, { "epoch": 0.015294261593050288, "grad_norm": 1.7258802652359009, "learning_rate": 2.5395206527282e-06, "loss": 0.1371, "step": 250 }, { "epoch": 0.0159060320567723, "grad_norm": 2.1158506870269775, "learning_rate": 2.6415094339622644e-06, "loss": 0.1271, "step": 260 }, { "epoch": 0.01651780252049431, "grad_norm": 2.0771985054016113, "learning_rate": 2.7434982151963283e-06, "loss": 0.1214, "step": 270 }, { "epoch": 0.017129572984216322, "grad_norm": 2.924894332885742, "learning_rate": 2.8454869964303926e-06, "loss": 0.1281, "step": 280 }, { "epoch": 0.017741343447938332, "grad_norm": 2.122687578201294, "learning_rate": 2.947475777664457e-06, "loss": 0.132, "step": 290 }, { "epoch": 0.018353113911660345, "grad_norm": 3.2565481662750244, "learning_rate": 3.0494645588985217e-06, "loss": 0.1336, "step": 300 }, { "epoch": 0.018964884375382355, "grad_norm": 1.2805263996124268, "learning_rate": 3.151453340132586e-06, "loss": 0.1183, "step": 310 }, { "epoch": 0.019576654839104368, "grad_norm": 2.420628786087036, "learning_rate": 3.25344212136665e-06, "loss": 0.1125, "step": 320 }, { "epoch": 0.020188425302826378, "grad_norm": 2.887117862701416, "learning_rate": 3.3554309026007142e-06, "loss": 0.1041, "step": 330 }, { "epoch": 0.02080019576654839, "grad_norm": 1.6021567583084106, "learning_rate": 3.4574196838347786e-06, "loss": 0.1221, "step": 340 }, { "epoch": 0.0214119662302704, "grad_norm": 2.9132304191589355, "learning_rate": 3.5594084650688425e-06, "loss": 0.1094, "step": 350 }, { "epoch": 0.022023736693992414, "grad_norm": 2.5460691452026367, "learning_rate": 3.6613972463029068e-06, "loss": 0.1096, "step": 360 }, { "epoch": 0.022635507157714424, "grad_norm": 2.2170896530151367, "learning_rate": 3.763386027536971e-06, "loss": 0.0933, "step": 370 }, { "epoch": 0.023247277621436437, "grad_norm": 3.799591541290283, "learning_rate": 3.865374808771036e-06, "loss": 0.1142, "step": 380 }, { "epoch": 0.023859048085158447, "grad_norm": 4.609161376953125, "learning_rate": 3.9673635900051e-06, "loss": 0.0948, "step": 390 }, { "epoch": 0.02447081854888046, "grad_norm": 3.329650402069092, "learning_rate": 4.0693523712391645e-06, "loss": 0.0944, "step": 400 }, { "epoch": 0.02508258901260247, "grad_norm": 2.135805368423462, "learning_rate": 4.171341152473228e-06, "loss": 0.0847, "step": 410 }, { "epoch": 0.025694359476324483, "grad_norm": 2.006049156188965, "learning_rate": 4.273329933707292e-06, "loss": 0.1077, "step": 420 }, { "epoch": 0.026306129940046493, "grad_norm": 5.709030628204346, "learning_rate": 4.375318714941357e-06, "loss": 0.099, "step": 430 }, { "epoch": 0.026917900403768506, "grad_norm": 2.466867446899414, "learning_rate": 4.477307496175421e-06, "loss": 0.0771, "step": 440 }, { "epoch": 0.027529670867490516, "grad_norm": 2.3829548358917236, "learning_rate": 4.579296277409485e-06, "loss": 0.0802, "step": 450 }, { "epoch": 0.02814144133121253, "grad_norm": 2.990241050720215, "learning_rate": 4.6812850586435496e-06, "loss": 0.1048, "step": 460 }, { "epoch": 0.02875321179493454, "grad_norm": 3.7126529216766357, "learning_rate": 4.783273839877614e-06, "loss": 0.0879, "step": 470 }, { "epoch": 0.029364982258656552, "grad_norm": 2.6238865852355957, "learning_rate": 4.885262621111678e-06, "loss": 0.0825, "step": 480 }, { "epoch": 0.029976752722378562, "grad_norm": 2.1882681846618652, "learning_rate": 4.987251402345742e-06, "loss": 0.0821, "step": 490 }, { "epoch": 0.030588523186100575, "grad_norm": 2.1399929523468018, "learning_rate": 5.089240183579807e-06, "loss": 0.0926, "step": 500 }, { "epoch": 0.030588523186100575, "eval_accuracy": 0.8732, "eval_f1_weighted": 0.8728301347090768, "eval_loss": 0.08081420511007309, "eval_macro_f1": 0.8730387325792941, "eval_precision": 0.8842823474890878, "eval_recall": 0.8732, "eval_runtime": 1731.362, "eval_samples_per_second": 57.758, "eval_steps_per_second": 0.903, "step": 500 } ], "logging_steps": 10, "max_steps": 16346, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4207556886230400.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }