| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 24.928092042186, |
| "eval_steps": 500, |
| "global_step": 26000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.009587727708533078, |
| "grad_norm": 16.952646255493164, |
| "learning_rate": 6.923076923076923e-07, |
| "loss": 1.0981, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019175455417066157, |
| "grad_norm": 10.460495948791504, |
| "learning_rate": 1.4615384615384616e-06, |
| "loss": 1.0812, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.028763183125599234, |
| "grad_norm": 5.855829238891602, |
| "learning_rate": 2.2307692307692307e-06, |
| "loss": 0.8546, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.038350910834132314, |
| "grad_norm": 3.5648763179779053, |
| "learning_rate": 3e-06, |
| "loss": 0.4892, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04793863854266539, |
| "grad_norm": 1.662581205368042, |
| "learning_rate": 3.7692307692307694e-06, |
| "loss": 0.3639, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05752636625119847, |
| "grad_norm": 1.4345495700836182, |
| "learning_rate": 4.538461538461539e-06, |
| "loss": 0.2922, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06711409395973154, |
| "grad_norm": 0.9792284369468689, |
| "learning_rate": 5.307692307692308e-06, |
| "loss": 0.2074, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.07670182166826463, |
| "grad_norm": 0.9946898818016052, |
| "learning_rate": 6.0769230769230775e-06, |
| "loss": 0.2146, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0862895493767977, |
| "grad_norm": 1.3608415126800537, |
| "learning_rate": 6.846153846153847e-06, |
| "loss": 0.1745, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.09587727708533078, |
| "grad_norm": 1.0509544610977173, |
| "learning_rate": 7.615384615384616e-06, |
| "loss": 0.1748, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.10546500479386385, |
| "grad_norm": 0.9403872489929199, |
| "learning_rate": 8.384615384615385e-06, |
| "loss": 0.1564, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.11505273250239693, |
| "grad_norm": 0.9270913004875183, |
| "learning_rate": 9.153846153846155e-06, |
| "loss": 0.1696, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12464046021093, |
| "grad_norm": 1.0389190912246704, |
| "learning_rate": 9.923076923076923e-06, |
| "loss": 0.1392, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1342281879194631, |
| "grad_norm": 0.9624547958374023, |
| "learning_rate": 1.0692307692307694e-05, |
| "loss": 0.1311, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.14381591562799617, |
| "grad_norm": 1.0129961967468262, |
| "learning_rate": 1.1461538461538462e-05, |
| "loss": 0.1212, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.15340364333652926, |
| "grad_norm": 1.2572994232177734, |
| "learning_rate": 1.2230769230769232e-05, |
| "loss": 0.1267, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1629913710450623, |
| "grad_norm": 1.06370210647583, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.1241, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1725790987535954, |
| "grad_norm": 1.2056634426116943, |
| "learning_rate": 1.3769230769230771e-05, |
| "loss": 0.1207, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.18216682646212848, |
| "grad_norm": 1.5257799625396729, |
| "learning_rate": 1.453846153846154e-05, |
| "loss": 0.1078, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.19175455417066156, |
| "grad_norm": 1.0377438068389893, |
| "learning_rate": 1.5307692307692308e-05, |
| "loss": 0.1191, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.20134228187919462, |
| "grad_norm": 1.1318110227584839, |
| "learning_rate": 1.607692307692308e-05, |
| "loss": 0.1211, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2109300095877277, |
| "grad_norm": 0.8394863605499268, |
| "learning_rate": 1.684615384615385e-05, |
| "loss": 0.1103, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.22051773729626079, |
| "grad_norm": 0.6863590478897095, |
| "learning_rate": 1.7615384615384615e-05, |
| "loss": 0.101, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.23010546500479387, |
| "grad_norm": 0.9169079661369324, |
| "learning_rate": 1.8384615384615386e-05, |
| "loss": 0.1034, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.23969319271332695, |
| "grad_norm": 1.088216781616211, |
| "learning_rate": 1.9153846153846156e-05, |
| "loss": 0.0964, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.24928092042186, |
| "grad_norm": 0.8121523261070251, |
| "learning_rate": 1.9923076923076926e-05, |
| "loss": 0.0959, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2588686481303931, |
| "grad_norm": 1.408576250076294, |
| "learning_rate": 2.0692307692307693e-05, |
| "loss": 0.1017, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2684563758389262, |
| "grad_norm": 1.0147638320922852, |
| "learning_rate": 2.1461538461538463e-05, |
| "loss": 0.0935, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.27804410354745923, |
| "grad_norm": 1.0343986749649048, |
| "learning_rate": 2.2230769230769233e-05, |
| "loss": 0.1039, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.28763183125599234, |
| "grad_norm": 1.32474684715271, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.0926, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2972195589645254, |
| "grad_norm": 0.9797191619873047, |
| "learning_rate": 2.376923076923077e-05, |
| "loss": 0.0967, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3068072866730585, |
| "grad_norm": 0.6505740880966187, |
| "learning_rate": 2.453846153846154e-05, |
| "loss": 0.0792, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.31639501438159157, |
| "grad_norm": 0.9509547352790833, |
| "learning_rate": 2.530769230769231e-05, |
| "loss": 0.0827, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3259827420901246, |
| "grad_norm": 1.0206745862960815, |
| "learning_rate": 2.6076923076923077e-05, |
| "loss": 0.0771, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.33557046979865773, |
| "grad_norm": 1.0349384546279907, |
| "learning_rate": 2.6846153846153848e-05, |
| "loss": 0.0799, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3451581975071908, |
| "grad_norm": 0.9108182191848755, |
| "learning_rate": 2.7615384615384614e-05, |
| "loss": 0.0873, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3547459252157239, |
| "grad_norm": 0.5712908506393433, |
| "learning_rate": 2.8384615384615388e-05, |
| "loss": 0.0831, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.36433365292425696, |
| "grad_norm": 0.9796934127807617, |
| "learning_rate": 2.9153846153846155e-05, |
| "loss": 0.0719, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.37392138063279, |
| "grad_norm": 1.1480381488800049, |
| "learning_rate": 2.9923076923076925e-05, |
| "loss": 0.0828, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3835091083413231, |
| "grad_norm": 0.5862910151481628, |
| "learning_rate": 3.069230769230769e-05, |
| "loss": 0.0709, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3930968360498562, |
| "grad_norm": 0.7163400650024414, |
| "learning_rate": 3.146153846153846e-05, |
| "loss": 0.0721, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.40268456375838924, |
| "grad_norm": 0.7817345261573792, |
| "learning_rate": 3.223076923076923e-05, |
| "loss": 0.0717, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.41227229146692235, |
| "grad_norm": 0.6121333837509155, |
| "learning_rate": 3.3e-05, |
| "loss": 0.0665, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4218600191754554, |
| "grad_norm": 0.774795413017273, |
| "learning_rate": 3.376923076923077e-05, |
| "loss": 0.0691, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.4314477468839885, |
| "grad_norm": 0.898847222328186, |
| "learning_rate": 3.453846153846154e-05, |
| "loss": 0.0741, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.44103547459252157, |
| "grad_norm": 0.7293726801872253, |
| "learning_rate": 3.5307692307692306e-05, |
| "loss": 0.0645, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.4506232023010546, |
| "grad_norm": 0.5674548149108887, |
| "learning_rate": 3.607692307692308e-05, |
| "loss": 0.0629, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.46021093000958774, |
| "grad_norm": 0.7961140275001526, |
| "learning_rate": 3.684615384615385e-05, |
| "loss": 0.0628, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4697986577181208, |
| "grad_norm": 0.6253398656845093, |
| "learning_rate": 3.761538461538462e-05, |
| "loss": 0.0645, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4793863854266539, |
| "grad_norm": 0.9273212552070618, |
| "learning_rate": 3.838461538461539e-05, |
| "loss": 0.062, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.48897411313518696, |
| "grad_norm": 0.6789622902870178, |
| "learning_rate": 3.915384615384616e-05, |
| "loss": 0.0654, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.49856184084372, |
| "grad_norm": 0.8248144388198853, |
| "learning_rate": 3.992307692307692e-05, |
| "loss": 0.0665, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5081495685522531, |
| "grad_norm": 0.8695188164710999, |
| "learning_rate": 4.06923076923077e-05, |
| "loss": 0.0717, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5177372962607862, |
| "grad_norm": 0.6595909595489502, |
| "learning_rate": 4.146153846153846e-05, |
| "loss": 0.0628, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.5273250239693192, |
| "grad_norm": 0.7226746678352356, |
| "learning_rate": 4.223076923076924e-05, |
| "loss": 0.0657, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5369127516778524, |
| "grad_norm": 0.6370866298675537, |
| "learning_rate": 4.3e-05, |
| "loss": 0.0581, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5465004793863855, |
| "grad_norm": 0.47755175828933716, |
| "learning_rate": 4.376923076923077e-05, |
| "loss": 0.052, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5560882070949185, |
| "grad_norm": 0.7424858808517456, |
| "learning_rate": 4.453846153846154e-05, |
| "loss": 0.0606, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.5656759348034516, |
| "grad_norm": 0.4627436399459839, |
| "learning_rate": 4.530769230769231e-05, |
| "loss": 0.0618, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5752636625119847, |
| "grad_norm": 0.5372833609580994, |
| "learning_rate": 4.6076923076923076e-05, |
| "loss": 0.0616, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5848513902205177, |
| "grad_norm": 0.8923951387405396, |
| "learning_rate": 4.684615384615385e-05, |
| "loss": 0.0659, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5944391179290508, |
| "grad_norm": 0.9428364038467407, |
| "learning_rate": 4.7615384615384616e-05, |
| "loss": 0.0707, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.6040268456375839, |
| "grad_norm": 0.7362667322158813, |
| "learning_rate": 4.8384615384615386e-05, |
| "loss": 0.062, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.613614573346117, |
| "grad_norm": 0.7807226181030273, |
| "learning_rate": 4.9153846153846157e-05, |
| "loss": 0.0662, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.62320230105465, |
| "grad_norm": 0.5898621678352356, |
| "learning_rate": 4.992307692307693e-05, |
| "loss": 0.0594, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6327900287631831, |
| "grad_norm": 0.4694168269634247, |
| "learning_rate": 5.06923076923077e-05, |
| "loss": 0.0572, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.6423777564717162, |
| "grad_norm": 0.6720401048660278, |
| "learning_rate": 5.146153846153846e-05, |
| "loss": 0.0697, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6519654841802492, |
| "grad_norm": 0.5371865034103394, |
| "learning_rate": 5.223076923076924e-05, |
| "loss": 0.059, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6615532118887824, |
| "grad_norm": 0.6751993894577026, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.0566, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "grad_norm": 0.7496346831321716, |
| "learning_rate": 5.376923076923077e-05, |
| "loss": 0.0592, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6807286673058485, |
| "grad_norm": 0.7620933055877686, |
| "learning_rate": 5.453846153846154e-05, |
| "loss": 0.0645, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6903163950143816, |
| "grad_norm": 0.9095701575279236, |
| "learning_rate": 5.5307692307692305e-05, |
| "loss": 0.0568, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6999041227229147, |
| "grad_norm": 0.7606950998306274, |
| "learning_rate": 5.607692307692308e-05, |
| "loss": 0.0624, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.7094918504314478, |
| "grad_norm": 1.0387766361236572, |
| "learning_rate": 5.684615384615385e-05, |
| "loss": 0.0584, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.7190795781399808, |
| "grad_norm": 0.7113978862762451, |
| "learning_rate": 5.7615384615384615e-05, |
| "loss": 0.0652, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7286673058485139, |
| "grad_norm": 0.604448139667511, |
| "learning_rate": 5.838461538461538e-05, |
| "loss": 0.0654, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.738255033557047, |
| "grad_norm": 0.8723410367965698, |
| "learning_rate": 5.915384615384616e-05, |
| "loss": 0.0531, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.74784276126558, |
| "grad_norm": 0.5730307102203369, |
| "learning_rate": 5.9923076923076926e-05, |
| "loss": 0.0559, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.7574304889741131, |
| "grad_norm": 0.7451117634773254, |
| "learning_rate": 6.0692307692307696e-05, |
| "loss": 0.0643, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7670182166826462, |
| "grad_norm": 0.3902491331100464, |
| "learning_rate": 6.146153846153846e-05, |
| "loss": 0.0611, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7766059443911792, |
| "grad_norm": 0.6148221492767334, |
| "learning_rate": 6.223076923076924e-05, |
| "loss": 0.0549, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7861936720997124, |
| "grad_norm": 0.5791975259780884, |
| "learning_rate": 6.3e-05, |
| "loss": 0.0589, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7957813998082455, |
| "grad_norm": 0.5318537950515747, |
| "learning_rate": 6.376923076923077e-05, |
| "loss": 0.0618, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.8053691275167785, |
| "grad_norm": 0.8901371359825134, |
| "learning_rate": 6.453846153846154e-05, |
| "loss": 0.0563, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.8149568552253116, |
| "grad_norm": 0.8964536786079407, |
| "learning_rate": 6.530769230769231e-05, |
| "loss": 0.0656, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8245445829338447, |
| "grad_norm": 0.5159094929695129, |
| "learning_rate": 6.607692307692308e-05, |
| "loss": 0.0582, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.8341323106423778, |
| "grad_norm": 0.6684253811836243, |
| "learning_rate": 6.684615384615385e-05, |
| "loss": 0.0569, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.8437200383509108, |
| "grad_norm": 0.5698950290679932, |
| "learning_rate": 6.761538461538461e-05, |
| "loss": 0.0549, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.8533077660594439, |
| "grad_norm": 0.44796323776245117, |
| "learning_rate": 6.838461538461539e-05, |
| "loss": 0.0557, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.862895493767977, |
| "grad_norm": 0.7032187581062317, |
| "learning_rate": 6.915384615384616e-05, |
| "loss": 0.069, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.87248322147651, |
| "grad_norm": 0.538271963596344, |
| "learning_rate": 6.992307692307692e-05, |
| "loss": 0.0568, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8820709491850431, |
| "grad_norm": 0.46786853671073914, |
| "learning_rate": 7.069230769230769e-05, |
| "loss": 0.0623, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8916586768935763, |
| "grad_norm": 0.6529656052589417, |
| "learning_rate": 7.146153846153847e-05, |
| "loss": 0.064, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.9012464046021093, |
| "grad_norm": 0.9618151187896729, |
| "learning_rate": 7.223076923076923e-05, |
| "loss": 0.0557, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.9108341323106424, |
| "grad_norm": 0.5643552541732788, |
| "learning_rate": 7.3e-05, |
| "loss": 0.0651, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9204218600191755, |
| "grad_norm": 0.7007706761360168, |
| "learning_rate": 7.376923076923077e-05, |
| "loss": 0.0514, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.9300095877277086, |
| "grad_norm": 0.4530331492424011, |
| "learning_rate": 7.453846153846154e-05, |
| "loss": 0.0563, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.9395973154362416, |
| "grad_norm": 0.6113521456718445, |
| "learning_rate": 7.530769230769231e-05, |
| "loss": 0.0606, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.9491850431447747, |
| "grad_norm": 0.5007736682891846, |
| "learning_rate": 7.607692307692308e-05, |
| "loss": 0.0561, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.9587727708533078, |
| "grad_norm": 0.49903005361557007, |
| "learning_rate": 7.684615384615385e-05, |
| "loss": 0.0578, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9683604985618408, |
| "grad_norm": 0.629622220993042, |
| "learning_rate": 7.761538461538462e-05, |
| "loss": 0.0572, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9779482262703739, |
| "grad_norm": 0.5830038785934448, |
| "learning_rate": 7.838461538461539e-05, |
| "loss": 0.0586, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.987535953978907, |
| "grad_norm": 0.502075731754303, |
| "learning_rate": 7.915384615384616e-05, |
| "loss": 0.052, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.99712368168744, |
| "grad_norm": 0.6076005101203918, |
| "learning_rate": 7.992307692307692e-05, |
| "loss": 0.0536, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.0067114093959733, |
| "grad_norm": 0.6297442317008972, |
| "learning_rate": 8.06923076923077e-05, |
| "loss": 0.0565, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0162991371045063, |
| "grad_norm": 0.6776733994483948, |
| "learning_rate": 8.146153846153847e-05, |
| "loss": 0.0556, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.0258868648130393, |
| "grad_norm": 0.5691619515419006, |
| "learning_rate": 8.223076923076923e-05, |
| "loss": 0.0528, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.0354745925215725, |
| "grad_norm": 0.7027555108070374, |
| "learning_rate": 8.3e-05, |
| "loss": 0.0614, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.0450623202301055, |
| "grad_norm": 0.7508878111839294, |
| "learning_rate": 8.376923076923078e-05, |
| "loss": 0.0496, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.0546500479386385, |
| "grad_norm": 0.6663224101066589, |
| "learning_rate": 8.453846153846154e-05, |
| "loss": 0.0507, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0642377756471717, |
| "grad_norm": 0.5372412204742432, |
| "learning_rate": 8.530769230769231e-05, |
| "loss": 0.0547, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.0738255033557047, |
| "grad_norm": 0.6460400223731995, |
| "learning_rate": 8.607692307692308e-05, |
| "loss": 0.0598, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0834132310642377, |
| "grad_norm": 0.5155197381973267, |
| "learning_rate": 8.684615384615385e-05, |
| "loss": 0.0601, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.093000958772771, |
| "grad_norm": 0.42931079864501953, |
| "learning_rate": 8.761538461538462e-05, |
| "loss": 0.0602, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.102588686481304, |
| "grad_norm": 0.5317569971084595, |
| "learning_rate": 8.838461538461539e-05, |
| "loss": 0.0577, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.112176414189837, |
| "grad_norm": 0.6564596891403198, |
| "learning_rate": 8.915384615384616e-05, |
| "loss": 0.0596, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.1217641418983701, |
| "grad_norm": 0.43666043877601624, |
| "learning_rate": 8.992307692307693e-05, |
| "loss": 0.0549, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.1313518696069031, |
| "grad_norm": 0.6105823516845703, |
| "learning_rate": 9.06923076923077e-05, |
| "loss": 0.0641, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.1409395973154361, |
| "grad_norm": 0.5657874345779419, |
| "learning_rate": 9.146153846153847e-05, |
| "loss": 0.0591, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.1505273250239694, |
| "grad_norm": 0.5609491467475891, |
| "learning_rate": 9.223076923076923e-05, |
| "loss": 0.0622, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1601150527325024, |
| "grad_norm": 0.6493374705314636, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 0.0589, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.1697027804410354, |
| "grad_norm": 0.7406426072120667, |
| "learning_rate": 9.376923076923078e-05, |
| "loss": 0.0579, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.1792905081495686, |
| "grad_norm": 0.6438266634941101, |
| "learning_rate": 9.453846153846154e-05, |
| "loss": 0.058, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.1888782358581016, |
| "grad_norm": 0.49737435579299927, |
| "learning_rate": 9.530769230769231e-05, |
| "loss": 0.0599, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.1984659635666346, |
| "grad_norm": 0.5221342444419861, |
| "learning_rate": 9.607692307692309e-05, |
| "loss": 0.0613, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2080536912751678, |
| "grad_norm": 0.5636175870895386, |
| "learning_rate": 9.684615384615385e-05, |
| "loss": 0.054, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.2176414189837008, |
| "grad_norm": 0.6858579516410828, |
| "learning_rate": 9.761538461538462e-05, |
| "loss": 0.0633, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.2272291466922338, |
| "grad_norm": 0.5884243845939636, |
| "learning_rate": 9.838461538461539e-05, |
| "loss": 0.0576, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.236816874400767, |
| "grad_norm": 0.753278374671936, |
| "learning_rate": 9.915384615384616e-05, |
| "loss": 0.0624, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.2464046021093, |
| "grad_norm": 0.5968719720840454, |
| "learning_rate": 9.992307692307693e-05, |
| "loss": 0.0615, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.255992329817833, |
| "grad_norm": 0.4386919140815735, |
| "learning_rate": 9.99999672409862e-05, |
| "loss": 0.0612, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.2655800575263663, |
| "grad_norm": 0.7106592655181885, |
| "learning_rate": 9.999985400000595e-05, |
| "loss": 0.0582, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.2751677852348993, |
| "grad_norm": 0.642195463180542, |
| "learning_rate": 9.999965987281012e-05, |
| "loss": 0.0539, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.2847555129434325, |
| "grad_norm": 0.8102270364761353, |
| "learning_rate": 9.999938485971279e-05, |
| "loss": 0.0571, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.2943432406519655, |
| "grad_norm": 0.5724937319755554, |
| "learning_rate": 9.999902896115882e-05, |
| "loss": 0.059, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.3039309683604985, |
| "grad_norm": 0.5850300788879395, |
| "learning_rate": 9.999859217772396e-05, |
| "loss": 0.0546, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.3135186960690317, |
| "grad_norm": 0.5836851000785828, |
| "learning_rate": 9.999807451011483e-05, |
| "loss": 0.0574, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.3231064237775647, |
| "grad_norm": 0.4875651001930237, |
| "learning_rate": 9.999747595916886e-05, |
| "loss": 0.0584, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.332694151486098, |
| "grad_norm": 0.6385061144828796, |
| "learning_rate": 9.999679652585436e-05, |
| "loss": 0.0551, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.342281879194631, |
| "grad_norm": 0.6868314743041992, |
| "learning_rate": 9.999603621127043e-05, |
| "loss": 0.0644, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.351869606903164, |
| "grad_norm": 0.879398763179779, |
| "learning_rate": 9.99951950166471e-05, |
| "loss": 0.0556, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.3614573346116972, |
| "grad_norm": 0.5804061889648438, |
| "learning_rate": 9.999427294334516e-05, |
| "loss": 0.066, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.3710450623202302, |
| "grad_norm": 0.8581869602203369, |
| "learning_rate": 9.999326999285628e-05, |
| "loss": 0.0604, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.3806327900287632, |
| "grad_norm": 0.5264695882797241, |
| "learning_rate": 9.999218616680299e-05, |
| "loss": 0.0616, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.3902205177372964, |
| "grad_norm": 0.9933851957321167, |
| "learning_rate": 9.999102146693859e-05, |
| "loss": 0.0593, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.3998082454458294, |
| "grad_norm": 0.4718506932258606, |
| "learning_rate": 9.998977589514729e-05, |
| "loss": 0.056, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.4093959731543624, |
| "grad_norm": 0.46576133370399475, |
| "learning_rate": 9.998844945344405e-05, |
| "loss": 0.0547, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.4189837008628956, |
| "grad_norm": 0.8062454462051392, |
| "learning_rate": 9.99870421439747e-05, |
| "loss": 0.0624, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.6128931641578674, |
| "learning_rate": 9.99855539690159e-05, |
| "loss": 0.054, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.4381591562799616, |
| "grad_norm": 0.781894326210022, |
| "learning_rate": 9.998398493097511e-05, |
| "loss": 0.0593, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4477468839884948, |
| "grad_norm": 0.4165836572647095, |
| "learning_rate": 9.998233503239059e-05, |
| "loss": 0.0467, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.4573346116970278, |
| "grad_norm": 0.2851610779762268, |
| "learning_rate": 9.998060427593146e-05, |
| "loss": 0.0544, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.4669223394055608, |
| "grad_norm": 0.578106164932251, |
| "learning_rate": 9.997879266439758e-05, |
| "loss": 0.0518, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.476510067114094, |
| "grad_norm": 0.5424726009368896, |
| "learning_rate": 9.997690020071968e-05, |
| "loss": 0.0589, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.486097794822627, |
| "grad_norm": 0.3104839622974396, |
| "learning_rate": 9.997492688795924e-05, |
| "loss": 0.0675, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.49568552253116, |
| "grad_norm": 0.3031683564186096, |
| "learning_rate": 9.997287272930854e-05, |
| "loss": 0.0568, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.5052732502396933, |
| "grad_norm": 0.3921635150909424, |
| "learning_rate": 9.997073772809065e-05, |
| "loss": 0.0475, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.5148609779482263, |
| "grad_norm": 0.32904309034347534, |
| "learning_rate": 9.996852188775942e-05, |
| "loss": 0.0475, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.5244487056567593, |
| "grad_norm": 0.5768727660179138, |
| "learning_rate": 9.996622521189952e-05, |
| "loss": 0.0471, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.5340364333652925, |
| "grad_norm": 0.5629034042358398, |
| "learning_rate": 9.996384770422629e-05, |
| "loss": 0.0498, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5436241610738255, |
| "grad_norm": 0.5300479531288147, |
| "learning_rate": 9.996138936858593e-05, |
| "loss": 0.0498, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.5532118887823585, |
| "grad_norm": 0.5257939696311951, |
| "learning_rate": 9.995885020895536e-05, |
| "loss": 0.0512, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.5627996164908917, |
| "grad_norm": 0.5948016047477722, |
| "learning_rate": 9.995623022944223e-05, |
| "loss": 0.0512, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.5723873441994247, |
| "grad_norm": 0.5377443432807922, |
| "learning_rate": 9.995352943428497e-05, |
| "loss": 0.0563, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.5819750719079577, |
| "grad_norm": 0.658616840839386, |
| "learning_rate": 9.995074782785275e-05, |
| "loss": 0.0591, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.591562799616491, |
| "grad_norm": 0.5048621296882629, |
| "learning_rate": 9.994788541464543e-05, |
| "loss": 0.0461, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.601150527325024, |
| "grad_norm": 0.30649903416633606, |
| "learning_rate": 9.994494219929365e-05, |
| "loss": 0.0472, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.610738255033557, |
| "grad_norm": 0.5432398319244385, |
| "learning_rate": 9.99419181865587e-05, |
| "loss": 0.0516, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.6203259827420902, |
| "grad_norm": 0.458732545375824, |
| "learning_rate": 9.993881338133261e-05, |
| "loss": 0.0471, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.6299137104506232, |
| "grad_norm": 0.4103093445301056, |
| "learning_rate": 9.993562778863817e-05, |
| "loss": 0.0533, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6395014381591562, |
| "grad_norm": 0.7458987832069397, |
| "learning_rate": 9.993236141362874e-05, |
| "loss": 0.0533, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.6490891658676894, |
| "grad_norm": 0.4409146010875702, |
| "learning_rate": 9.992901426158848e-05, |
| "loss": 0.0574, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.6586768935762224, |
| "grad_norm": 0.43476009368896484, |
| "learning_rate": 9.992558633793212e-05, |
| "loss": 0.0551, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.6682646212847554, |
| "grad_norm": 0.5552487373352051, |
| "learning_rate": 9.992207764820516e-05, |
| "loss": 0.0544, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.6778523489932886, |
| "grad_norm": 0.3948347270488739, |
| "learning_rate": 9.99184881980837e-05, |
| "loss": 0.0549, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.6874400767018218, |
| "grad_norm": 0.36312541365623474, |
| "learning_rate": 9.991481799337448e-05, |
| "loss": 0.0628, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.6970278044103546, |
| "grad_norm": 0.48039504885673523, |
| "learning_rate": 9.991106704001491e-05, |
| "loss": 0.0518, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.7066155321188878, |
| "grad_norm": 0.43102404475212097, |
| "learning_rate": 9.990723534407302e-05, |
| "loss": 0.0531, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.716203259827421, |
| "grad_norm": 0.635412335395813, |
| "learning_rate": 9.990332291174747e-05, |
| "loss": 0.0623, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.7257909875359538, |
| "grad_norm": 0.41768330335617065, |
| "learning_rate": 9.989932974936746e-05, |
| "loss": 0.0489, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.735378715244487, |
| "grad_norm": 0.4321722984313965, |
| "learning_rate": 9.98952558633929e-05, |
| "loss": 0.0578, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.7449664429530203, |
| "grad_norm": 0.5160396099090576, |
| "learning_rate": 9.98911012604142e-05, |
| "loss": 0.0538, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.754554170661553, |
| "grad_norm": 0.5091599822044373, |
| "learning_rate": 9.98868659471524e-05, |
| "loss": 0.062, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.7641418983700863, |
| "grad_norm": 0.289798378944397, |
| "learning_rate": 9.988254993045908e-05, |
| "loss": 0.0561, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.7737296260786195, |
| "grad_norm": 0.6626523733139038, |
| "learning_rate": 9.98781532173164e-05, |
| "loss": 0.0584, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.7833173537871523, |
| "grad_norm": 0.4821811020374298, |
| "learning_rate": 9.987367581483705e-05, |
| "loss": 0.0597, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.7929050814956855, |
| "grad_norm": 0.45109039545059204, |
| "learning_rate": 9.986911773026422e-05, |
| "loss": 0.0618, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.8024928092042187, |
| "grad_norm": 0.5203428864479065, |
| "learning_rate": 9.98644789709717e-05, |
| "loss": 0.054, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.8120805369127517, |
| "grad_norm": 0.3689659833908081, |
| "learning_rate": 9.985975954446372e-05, |
| "loss": 0.0506, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.8216682646212847, |
| "grad_norm": 0.5378998517990112, |
| "learning_rate": 9.985495945837504e-05, |
| "loss": 0.0527, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.831255992329818, |
| "grad_norm": 0.36838144063949585, |
| "learning_rate": 9.985007872047088e-05, |
| "loss": 0.0484, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.840843720038351, |
| "grad_norm": 0.3217353820800781, |
| "learning_rate": 9.984511733864698e-05, |
| "loss": 0.0495, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.850431447746884, |
| "grad_norm": 0.5914832353591919, |
| "learning_rate": 9.984007532092951e-05, |
| "loss": 0.0562, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.8600191754554172, |
| "grad_norm": 0.44079649448394775, |
| "learning_rate": 9.983495267547508e-05, |
| "loss": 0.0515, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.8696069031639502, |
| "grad_norm": 0.5204843878746033, |
| "learning_rate": 9.982974941057073e-05, |
| "loss": 0.0547, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.8791946308724832, |
| "grad_norm": 0.505711555480957, |
| "learning_rate": 9.982446553463397e-05, |
| "loss": 0.0445, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.8887823585810164, |
| "grad_norm": 0.3592546582221985, |
| "learning_rate": 9.981910105621262e-05, |
| "loss": 0.0586, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.8983700862895494, |
| "grad_norm": 0.3347618281841278, |
| "learning_rate": 9.9813655983985e-05, |
| "loss": 0.0616, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.9079578139980824, |
| "grad_norm": 0.6229729056358337, |
| "learning_rate": 9.980813032675974e-05, |
| "loss": 0.0486, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.9175455417066156, |
| "grad_norm": 0.4660274386405945, |
| "learning_rate": 9.980252409347588e-05, |
| "loss": 0.0481, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9271332694151486, |
| "grad_norm": 0.2956122159957886, |
| "learning_rate": 9.979683729320275e-05, |
| "loss": 0.0511, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.9367209971236816, |
| "grad_norm": 0.45697900652885437, |
| "learning_rate": 9.97910699351401e-05, |
| "loss": 0.0519, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.9463087248322148, |
| "grad_norm": 0.5107268691062927, |
| "learning_rate": 9.97852220286179e-05, |
| "loss": 0.0563, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.9558964525407478, |
| "grad_norm": 0.3761272728443146, |
| "learning_rate": 9.97792935830965e-05, |
| "loss": 0.0532, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.9654841802492808, |
| "grad_norm": 0.4759978950023651, |
| "learning_rate": 9.977328460816654e-05, |
| "loss": 0.0588, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.975071907957814, |
| "grad_norm": 0.4457103908061981, |
| "learning_rate": 9.976719511354889e-05, |
| "loss": 0.0459, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.984659635666347, |
| "grad_norm": 0.31241118907928467, |
| "learning_rate": 9.976102510909469e-05, |
| "loss": 0.0521, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.99424736337488, |
| "grad_norm": 0.5308888554573059, |
| "learning_rate": 9.975477460478538e-05, |
| "loss": 0.0514, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.0038350910834133, |
| "grad_norm": 0.35070937871932983, |
| "learning_rate": 9.974844361073252e-05, |
| "loss": 0.0524, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.0134228187919465, |
| "grad_norm": 0.47052425146102905, |
| "learning_rate": 9.9742032137178e-05, |
| "loss": 0.0476, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.0230105465004793, |
| "grad_norm": 0.6150134205818176, |
| "learning_rate": 9.973554019449383e-05, |
| "loss": 0.0412, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.0325982742090125, |
| "grad_norm": 0.5497679114341736, |
| "learning_rate": 9.972896779318219e-05, |
| "loss": 0.0592, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.0421860019175457, |
| "grad_norm": 0.5127347111701965, |
| "learning_rate": 9.972231494387547e-05, |
| "loss": 0.0468, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.0517737296260785, |
| "grad_norm": 0.43948736786842346, |
| "learning_rate": 9.971558165733619e-05, |
| "loss": 0.0484, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.0613614573346117, |
| "grad_norm": 0.47324222326278687, |
| "learning_rate": 9.970876794445694e-05, |
| "loss": 0.0517, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.070949185043145, |
| "grad_norm": 0.34907156229019165, |
| "learning_rate": 9.970187381626048e-05, |
| "loss": 0.0566, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.0805369127516777, |
| "grad_norm": 0.51346355676651, |
| "learning_rate": 9.969489928389965e-05, |
| "loss": 0.0409, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.090124640460211, |
| "grad_norm": 0.34040042757987976, |
| "learning_rate": 9.968784435865737e-05, |
| "loss": 0.0462, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.099712368168744, |
| "grad_norm": 0.4003884792327881, |
| "learning_rate": 9.968070905194656e-05, |
| "loss": 0.0434, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.109300095877277, |
| "grad_norm": 0.4381425380706787, |
| "learning_rate": 9.967349337531023e-05, |
| "loss": 0.0438, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.11888782358581, |
| "grad_norm": 0.5975500345230103, |
| "learning_rate": 9.966619734042139e-05, |
| "loss": 0.0441, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.1284755512943434, |
| "grad_norm": 0.39649492502212524, |
| "learning_rate": 9.965882095908305e-05, |
| "loss": 0.0485, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.138063279002876, |
| "grad_norm": 0.5102829337120056, |
| "learning_rate": 9.96513642432282e-05, |
| "loss": 0.0462, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.1476510067114094, |
| "grad_norm": 0.5115483999252319, |
| "learning_rate": 9.964382720491976e-05, |
| "loss": 0.0539, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.1572387344199426, |
| "grad_norm": 0.4768059551715851, |
| "learning_rate": 9.963620985635065e-05, |
| "loss": 0.0521, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.1668264621284754, |
| "grad_norm": 0.4891989827156067, |
| "learning_rate": 9.962851220984366e-05, |
| "loss": 0.0486, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.1764141898370086, |
| "grad_norm": 0.5893239974975586, |
| "learning_rate": 9.962073427785149e-05, |
| "loss": 0.053, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.186001917545542, |
| "grad_norm": 0.640600323677063, |
| "learning_rate": 9.961287607295673e-05, |
| "loss": 0.0516, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.1955896452540746, |
| "grad_norm": 0.5314393639564514, |
| "learning_rate": 9.960493760787184e-05, |
| "loss": 0.0552, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.205177372962608, |
| "grad_norm": 0.4695710241794586, |
| "learning_rate": 9.95969188954391e-05, |
| "loss": 0.0488, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.214765100671141, |
| "grad_norm": 0.41498687863349915, |
| "learning_rate": 9.958881994863058e-05, |
| "loss": 0.0554, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.224352828379674, |
| "grad_norm": 0.3587738573551178, |
| "learning_rate": 9.958064078054823e-05, |
| "loss": 0.0415, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.233940556088207, |
| "grad_norm": 0.3993861973285675, |
| "learning_rate": 9.957238140442371e-05, |
| "loss": 0.0529, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.2435282837967403, |
| "grad_norm": 0.4770705997943878, |
| "learning_rate": 9.956404183361845e-05, |
| "loss": 0.0521, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.253116011505273, |
| "grad_norm": 0.5887109041213989, |
| "learning_rate": 9.955562208162362e-05, |
| "loss": 0.0632, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.2627037392138063, |
| "grad_norm": 0.6732892990112305, |
| "learning_rate": 9.954712216206008e-05, |
| "loss": 0.06, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.2722914669223395, |
| "grad_norm": 0.37186869978904724, |
| "learning_rate": 9.953854208867841e-05, |
| "loss": 0.0572, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.2818791946308723, |
| "grad_norm": 0.3546556234359741, |
| "learning_rate": 9.952988187535886e-05, |
| "loss": 0.0495, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.2914669223394055, |
| "grad_norm": 0.23416608572006226, |
| "learning_rate": 9.952114153611128e-05, |
| "loss": 0.0463, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.3010546500479387, |
| "grad_norm": 0.5339412689208984, |
| "learning_rate": 9.951232108507517e-05, |
| "loss": 0.0503, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.310642377756472, |
| "grad_norm": 0.34483078122138977, |
| "learning_rate": 9.950342053651967e-05, |
| "loss": 0.0428, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.3202301054650047, |
| "grad_norm": 0.449236124753952, |
| "learning_rate": 9.949443990484342e-05, |
| "loss": 0.0495, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.329817833173538, |
| "grad_norm": 0.40906885266304016, |
| "learning_rate": 9.948537920457466e-05, |
| "loss": 0.0442, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.3394055608820707, |
| "grad_norm": 0.3320155143737793, |
| "learning_rate": 9.947623845037112e-05, |
| "loss": 0.0469, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.348993288590604, |
| "grad_norm": 0.3933449387550354, |
| "learning_rate": 9.946701765702012e-05, |
| "loss": 0.0499, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.358581016299137, |
| "grad_norm": 0.42711353302001953, |
| "learning_rate": 9.945771683943836e-05, |
| "loss": 0.0465, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.3681687440076704, |
| "grad_norm": 0.3379175364971161, |
| "learning_rate": 9.944833601267207e-05, |
| "loss": 0.0446, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.377756471716203, |
| "grad_norm": 0.2655797302722931, |
| "learning_rate": 9.943887519189685e-05, |
| "loss": 0.0457, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.3873441994247364, |
| "grad_norm": 0.534376859664917, |
| "learning_rate": 9.94293343924178e-05, |
| "loss": 0.0386, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.396931927133269, |
| "grad_norm": 0.5116010904312134, |
| "learning_rate": 9.941971362966929e-05, |
| "loss": 0.0488, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.4065196548418024, |
| "grad_norm": 0.33155950903892517, |
| "learning_rate": 9.941001291921512e-05, |
| "loss": 0.0561, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.4161073825503356, |
| "grad_norm": 0.4785441756248474, |
| "learning_rate": 9.940023227674844e-05, |
| "loss": 0.055, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.425695110258869, |
| "grad_norm": 0.4031260907649994, |
| "learning_rate": 9.939037171809167e-05, |
| "loss": 0.0489, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.4352828379674016, |
| "grad_norm": 0.4069255590438843, |
| "learning_rate": 9.93804312591965e-05, |
| "loss": 0.0499, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.444870565675935, |
| "grad_norm": 0.4854568541049957, |
| "learning_rate": 9.937041091614392e-05, |
| "loss": 0.0508, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.4544582933844676, |
| "grad_norm": 0.42022451758384705, |
| "learning_rate": 9.936031070514413e-05, |
| "loss": 0.0533, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.464046021093001, |
| "grad_norm": 0.3417539894580841, |
| "learning_rate": 9.935013064253652e-05, |
| "loss": 0.0487, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.473633748801534, |
| "grad_norm": 0.7130690813064575, |
| "learning_rate": 9.933987074478969e-05, |
| "loss": 0.0482, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.4832214765100673, |
| "grad_norm": 0.328921914100647, |
| "learning_rate": 9.932953102850136e-05, |
| "loss": 0.0462, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.4928092042186, |
| "grad_norm": 0.27391597628593445, |
| "learning_rate": 9.931911151039838e-05, |
| "loss": 0.0543, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.5023969319271333, |
| "grad_norm": 0.3968970775604248, |
| "learning_rate": 9.930861220733674e-05, |
| "loss": 0.0446, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.511984659635666, |
| "grad_norm": 0.31161823868751526, |
| "learning_rate": 9.929803313630145e-05, |
| "loss": 0.0542, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.5215723873441993, |
| "grad_norm": 0.49789026379585266, |
| "learning_rate": 9.928737431440658e-05, |
| "loss": 0.0496, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.5311601150527325, |
| "grad_norm": 0.3426557779312134, |
| "learning_rate": 9.927663575889521e-05, |
| "loss": 0.0451, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.5407478427612658, |
| "grad_norm": 0.35124093294143677, |
| "learning_rate": 9.926581748713942e-05, |
| "loss": 0.0469, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.5503355704697985, |
| "grad_norm": 0.5212651491165161, |
| "learning_rate": 9.925491951664023e-05, |
| "loss": 0.0574, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.5599232981783318, |
| "grad_norm": 0.5474659204483032, |
| "learning_rate": 9.92439418650276e-05, |
| "loss": 0.0592, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.569511025886865, |
| "grad_norm": 0.36428266763687134, |
| "learning_rate": 9.923288455006045e-05, |
| "loss": 0.0534, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.5790987535953978, |
| "grad_norm": 0.3940581977367401, |
| "learning_rate": 9.922174758962645e-05, |
| "loss": 0.0493, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.588686481303931, |
| "grad_norm": 0.32265448570251465, |
| "learning_rate": 9.921053100174223e-05, |
| "loss": 0.0465, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.598274209012464, |
| "grad_norm": 0.35290199518203735, |
| "learning_rate": 9.919923480455317e-05, |
| "loss": 0.048, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.607861936720997, |
| "grad_norm": 0.4928702712059021, |
| "learning_rate": 9.918785901633345e-05, |
| "loss": 0.0463, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.61744966442953, |
| "grad_norm": 0.39868831634521484, |
| "learning_rate": 9.917640365548604e-05, |
| "loss": 0.0478, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.6270373921380634, |
| "grad_norm": 0.48915326595306396, |
| "learning_rate": 9.916486874054259e-05, |
| "loss": 0.0452, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.636625119846596, |
| "grad_norm": 0.3415433466434479, |
| "learning_rate": 9.915325429016345e-05, |
| "loss": 0.0399, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.6462128475551294, |
| "grad_norm": 0.4320572316646576, |
| "learning_rate": 9.914156032313768e-05, |
| "loss": 0.052, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.6558005752636626, |
| "grad_norm": 0.5043158531188965, |
| "learning_rate": 9.912978685838294e-05, |
| "loss": 0.05, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.665388302972196, |
| "grad_norm": 0.3065243363380432, |
| "learning_rate": 9.911793391494552e-05, |
| "loss": 0.0449, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.6749760306807286, |
| "grad_norm": 0.42839324474334717, |
| "learning_rate": 9.910600151200025e-05, |
| "loss": 0.0506, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.684563758389262, |
| "grad_norm": 0.32670149207115173, |
| "learning_rate": 9.909398966885053e-05, |
| "loss": 0.0482, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.6941514860977946, |
| "grad_norm": 0.49310222268104553, |
| "learning_rate": 9.908189840492827e-05, |
| "loss": 0.0457, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.703739213806328, |
| "grad_norm": 0.43462368845939636, |
| "learning_rate": 9.906972773979388e-05, |
| "loss": 0.0494, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.713326941514861, |
| "grad_norm": 0.3611735701560974, |
| "learning_rate": 9.905747769313616e-05, |
| "loss": 0.0472, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.7229146692233943, |
| "grad_norm": 0.3046175539493561, |
| "learning_rate": 9.90451482847724e-05, |
| "loss": 0.046, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.732502396931927, |
| "grad_norm": 0.5815914869308472, |
| "learning_rate": 9.903273953464821e-05, |
| "loss": 0.0505, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.7420901246404603, |
| "grad_norm": 0.4920728802680969, |
| "learning_rate": 9.902025146283761e-05, |
| "loss": 0.0475, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.751677852348993, |
| "grad_norm": 0.3602769374847412, |
| "learning_rate": 9.90076840895429e-05, |
| "loss": 0.0425, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.7612655800575263, |
| "grad_norm": 0.580506443977356, |
| "learning_rate": 9.899503743509471e-05, |
| "loss": 0.0493, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.7708533077660595, |
| "grad_norm": 0.4402373135089874, |
| "learning_rate": 9.898231151995187e-05, |
| "loss": 0.0468, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.7804410354745928, |
| "grad_norm": 0.5210007429122925, |
| "learning_rate": 9.896950636470147e-05, |
| "loss": 0.0461, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.7900287631831255, |
| "grad_norm": 0.4113840162754059, |
| "learning_rate": 9.89566219900588e-05, |
| "loss": 0.0561, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.7996164908916588, |
| "grad_norm": 0.4887576699256897, |
| "learning_rate": 9.894365841686726e-05, |
| "loss": 0.0484, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.8092042186001915, |
| "grad_norm": 0.3261569142341614, |
| "learning_rate": 9.893061566609843e-05, |
| "loss": 0.0457, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.8187919463087248, |
| "grad_norm": 0.3729310631752014, |
| "learning_rate": 9.891749375885191e-05, |
| "loss": 0.0459, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.828379674017258, |
| "grad_norm": 0.4186583459377289, |
| "learning_rate": 9.890429271635541e-05, |
| "loss": 0.0448, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.837967401725791, |
| "grad_norm": 0.4808233380317688, |
| "learning_rate": 9.889101255996466e-05, |
| "loss": 0.0513, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.847555129434324, |
| "grad_norm": 0.24302266538143158, |
| "learning_rate": 9.887765331116331e-05, |
| "loss": 0.0439, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.28988000750541687, |
| "learning_rate": 9.886421499156305e-05, |
| "loss": 0.0448, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.86673058485139, |
| "grad_norm": 0.408470094203949, |
| "learning_rate": 9.88506976229034e-05, |
| "loss": 0.0457, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.876318312559923, |
| "grad_norm": 0.279012531042099, |
| "learning_rate": 9.883710122705184e-05, |
| "loss": 0.0521, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.8859060402684564, |
| "grad_norm": 0.3104060888290405, |
| "learning_rate": 9.882342582600361e-05, |
| "loss": 0.0479, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.8954937679769897, |
| "grad_norm": 0.36359190940856934, |
| "learning_rate": 9.880967144188184e-05, |
| "loss": 0.0545, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.9050814956855224, |
| "grad_norm": 0.3486534059047699, |
| "learning_rate": 9.879583809693738e-05, |
| "loss": 0.0469, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.9146692233940557, |
| "grad_norm": 0.35138458013534546, |
| "learning_rate": 9.878192581354883e-05, |
| "loss": 0.0486, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.9242569511025884, |
| "grad_norm": 0.3004566431045532, |
| "learning_rate": 9.87679346142225e-05, |
| "loss": 0.048, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.9338446788111217, |
| "grad_norm": 0.4111393988132477, |
| "learning_rate": 9.875386452159237e-05, |
| "loss": 0.0526, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.943432406519655, |
| "grad_norm": 0.5051720142364502, |
| "learning_rate": 9.873971555842e-05, |
| "loss": 0.0521, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.953020134228188, |
| "grad_norm": 0.3800508677959442, |
| "learning_rate": 9.872548774759465e-05, |
| "loss": 0.0509, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.962607861936721, |
| "grad_norm": 0.3587114214897156, |
| "learning_rate": 9.871118111213299e-05, |
| "loss": 0.0463, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.972195589645254, |
| "grad_norm": 0.3234626352787018, |
| "learning_rate": 9.869679567517931e-05, |
| "loss": 0.0421, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.981783317353787, |
| "grad_norm": 0.37090590596199036, |
| "learning_rate": 9.868233146000535e-05, |
| "loss": 0.0497, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.99137104506232, |
| "grad_norm": 0.274735689163208, |
| "learning_rate": 9.86677884900103e-05, |
| "loss": 0.0492, |
| "step": 3120 |
| }, |
| { |
| "epoch": 3.0009587727708533, |
| "grad_norm": 0.35899800062179565, |
| "learning_rate": 9.865316678872073e-05, |
| "loss": 0.0436, |
| "step": 3130 |
| }, |
| { |
| "epoch": 3.0105465004793865, |
| "grad_norm": 0.3196784555912018, |
| "learning_rate": 9.863846637979057e-05, |
| "loss": 0.0472, |
| "step": 3140 |
| }, |
| { |
| "epoch": 3.0201342281879193, |
| "grad_norm": 0.3077564537525177, |
| "learning_rate": 9.862368728700115e-05, |
| "loss": 0.0527, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.0297219558964525, |
| "grad_norm": 0.40086090564727783, |
| "learning_rate": 9.860882953426099e-05, |
| "loss": 0.0507, |
| "step": 3160 |
| }, |
| { |
| "epoch": 3.0393096836049858, |
| "grad_norm": 0.6561570763587952, |
| "learning_rate": 9.859389314560595e-05, |
| "loss": 0.0545, |
| "step": 3170 |
| }, |
| { |
| "epoch": 3.0488974113135185, |
| "grad_norm": 0.34068262577056885, |
| "learning_rate": 9.857887814519902e-05, |
| "loss": 0.0458, |
| "step": 3180 |
| }, |
| { |
| "epoch": 3.0584851390220518, |
| "grad_norm": 0.31878864765167236, |
| "learning_rate": 9.856378455733042e-05, |
| "loss": 0.0399, |
| "step": 3190 |
| }, |
| { |
| "epoch": 3.068072866730585, |
| "grad_norm": 0.41648054122924805, |
| "learning_rate": 9.854861240641748e-05, |
| "loss": 0.0452, |
| "step": 3200 |
| }, |
| { |
| "epoch": 3.0776605944391178, |
| "grad_norm": 0.35710304975509644, |
| "learning_rate": 9.853336171700464e-05, |
| "loss": 0.0509, |
| "step": 3210 |
| }, |
| { |
| "epoch": 3.087248322147651, |
| "grad_norm": 0.3782924711704254, |
| "learning_rate": 9.851803251376336e-05, |
| "loss": 0.0445, |
| "step": 3220 |
| }, |
| { |
| "epoch": 3.096836049856184, |
| "grad_norm": 0.49359890818595886, |
| "learning_rate": 9.85026248214922e-05, |
| "loss": 0.0515, |
| "step": 3230 |
| }, |
| { |
| "epoch": 3.106423777564717, |
| "grad_norm": 0.4491162598133087, |
| "learning_rate": 9.848713866511655e-05, |
| "loss": 0.0444, |
| "step": 3240 |
| }, |
| { |
| "epoch": 3.11601150527325, |
| "grad_norm": 0.3755772113800049, |
| "learning_rate": 9.847157406968885e-05, |
| "loss": 0.0417, |
| "step": 3250 |
| }, |
| { |
| "epoch": 3.1255992329817834, |
| "grad_norm": 0.32591307163238525, |
| "learning_rate": 9.84559310603884e-05, |
| "loss": 0.0571, |
| "step": 3260 |
| }, |
| { |
| "epoch": 3.135186960690316, |
| "grad_norm": 0.4025377333164215, |
| "learning_rate": 9.844020966252137e-05, |
| "loss": 0.0479, |
| "step": 3270 |
| }, |
| { |
| "epoch": 3.1447746883988494, |
| "grad_norm": 0.3106444478034973, |
| "learning_rate": 9.842440990152068e-05, |
| "loss": 0.0472, |
| "step": 3280 |
| }, |
| { |
| "epoch": 3.1543624161073827, |
| "grad_norm": 0.3832003176212311, |
| "learning_rate": 9.840853180294608e-05, |
| "loss": 0.0566, |
| "step": 3290 |
| }, |
| { |
| "epoch": 3.1639501438159154, |
| "grad_norm": 0.2815271019935608, |
| "learning_rate": 9.839257539248403e-05, |
| "loss": 0.0396, |
| "step": 3300 |
| }, |
| { |
| "epoch": 3.1735378715244487, |
| "grad_norm": 0.38503342866897583, |
| "learning_rate": 9.83765406959477e-05, |
| "loss": 0.048, |
| "step": 3310 |
| }, |
| { |
| "epoch": 3.183125599232982, |
| "grad_norm": 0.31450656056404114, |
| "learning_rate": 9.836042773927685e-05, |
| "loss": 0.0383, |
| "step": 3320 |
| }, |
| { |
| "epoch": 3.1927133269415147, |
| "grad_norm": 0.39521682262420654, |
| "learning_rate": 9.834423654853791e-05, |
| "loss": 0.0449, |
| "step": 3330 |
| }, |
| { |
| "epoch": 3.202301054650048, |
| "grad_norm": 0.4725668728351593, |
| "learning_rate": 9.832796714992381e-05, |
| "loss": 0.0436, |
| "step": 3340 |
| }, |
| { |
| "epoch": 3.211888782358581, |
| "grad_norm": 0.43373286724090576, |
| "learning_rate": 9.831161956975405e-05, |
| "loss": 0.0502, |
| "step": 3350 |
| }, |
| { |
| "epoch": 3.221476510067114, |
| "grad_norm": 0.30628758668899536, |
| "learning_rate": 9.829519383447456e-05, |
| "loss": 0.0454, |
| "step": 3360 |
| }, |
| { |
| "epoch": 3.231064237775647, |
| "grad_norm": 0.6050196290016174, |
| "learning_rate": 9.827868997065777e-05, |
| "loss": 0.0528, |
| "step": 3370 |
| }, |
| { |
| "epoch": 3.2406519654841803, |
| "grad_norm": 0.36287015676498413, |
| "learning_rate": 9.826210800500242e-05, |
| "loss": 0.0529, |
| "step": 3380 |
| }, |
| { |
| "epoch": 3.2502396931927136, |
| "grad_norm": 0.41856274008750916, |
| "learning_rate": 9.824544796433366e-05, |
| "loss": 0.0489, |
| "step": 3390 |
| }, |
| { |
| "epoch": 3.2598274209012463, |
| "grad_norm": 0.35269007086753845, |
| "learning_rate": 9.82287098756029e-05, |
| "loss": 0.049, |
| "step": 3400 |
| }, |
| { |
| "epoch": 3.2694151486097796, |
| "grad_norm": 0.35962244868278503, |
| "learning_rate": 9.821189376588786e-05, |
| "loss": 0.0471, |
| "step": 3410 |
| }, |
| { |
| "epoch": 3.2790028763183123, |
| "grad_norm": 0.5149263739585876, |
| "learning_rate": 9.819499966239243e-05, |
| "loss": 0.0533, |
| "step": 3420 |
| }, |
| { |
| "epoch": 3.2885906040268456, |
| "grad_norm": 0.3651978075504303, |
| "learning_rate": 9.81780275924467e-05, |
| "loss": 0.0428, |
| "step": 3430 |
| }, |
| { |
| "epoch": 3.2981783317353788, |
| "grad_norm": 0.377916157245636, |
| "learning_rate": 9.816097758350688e-05, |
| "loss": 0.0527, |
| "step": 3440 |
| }, |
| { |
| "epoch": 3.307766059443912, |
| "grad_norm": 0.39240193367004395, |
| "learning_rate": 9.814384966315526e-05, |
| "loss": 0.0498, |
| "step": 3450 |
| }, |
| { |
| "epoch": 3.3173537871524448, |
| "grad_norm": 0.4727850556373596, |
| "learning_rate": 9.812664385910018e-05, |
| "loss": 0.0519, |
| "step": 3460 |
| }, |
| { |
| "epoch": 3.326941514860978, |
| "grad_norm": 0.3471921980381012, |
| "learning_rate": 9.810936019917595e-05, |
| "loss": 0.043, |
| "step": 3470 |
| }, |
| { |
| "epoch": 3.336529242569511, |
| "grad_norm": 0.3818338215351105, |
| "learning_rate": 9.809199871134287e-05, |
| "loss": 0.0427, |
| "step": 3480 |
| }, |
| { |
| "epoch": 3.346116970278044, |
| "grad_norm": 0.34183284640312195, |
| "learning_rate": 9.807455942368711e-05, |
| "loss": 0.0414, |
| "step": 3490 |
| }, |
| { |
| "epoch": 3.3557046979865772, |
| "grad_norm": 0.3725120425224304, |
| "learning_rate": 9.805704236442073e-05, |
| "loss": 0.0493, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.3652924256951104, |
| "grad_norm": 0.4457106590270996, |
| "learning_rate": 9.803944756188157e-05, |
| "loss": 0.0423, |
| "step": 3510 |
| }, |
| { |
| "epoch": 3.3748801534036432, |
| "grad_norm": 0.3035670220851898, |
| "learning_rate": 9.802177504453326e-05, |
| "loss": 0.0431, |
| "step": 3520 |
| }, |
| { |
| "epoch": 3.3844678811121764, |
| "grad_norm": 0.36193615198135376, |
| "learning_rate": 9.800402484096513e-05, |
| "loss": 0.0461, |
| "step": 3530 |
| }, |
| { |
| "epoch": 3.3940556088207097, |
| "grad_norm": 0.39786848425865173, |
| "learning_rate": 9.798619697989222e-05, |
| "loss": 0.0558, |
| "step": 3540 |
| }, |
| { |
| "epoch": 3.4036433365292424, |
| "grad_norm": 0.3743523061275482, |
| "learning_rate": 9.796829149015517e-05, |
| "loss": 0.0439, |
| "step": 3550 |
| }, |
| { |
| "epoch": 3.4132310642377757, |
| "grad_norm": 0.36101034283638, |
| "learning_rate": 9.79503084007202e-05, |
| "loss": 0.0483, |
| "step": 3560 |
| }, |
| { |
| "epoch": 3.422818791946309, |
| "grad_norm": 0.3148845434188843, |
| "learning_rate": 9.79322477406791e-05, |
| "loss": 0.043, |
| "step": 3570 |
| }, |
| { |
| "epoch": 3.4324065196548417, |
| "grad_norm": 0.45851582288742065, |
| "learning_rate": 9.79141095392491e-05, |
| "loss": 0.0492, |
| "step": 3580 |
| }, |
| { |
| "epoch": 3.441994247363375, |
| "grad_norm": 0.6849660277366638, |
| "learning_rate": 9.789589382577291e-05, |
| "loss": 0.0434, |
| "step": 3590 |
| }, |
| { |
| "epoch": 3.451581975071908, |
| "grad_norm": 0.5036081671714783, |
| "learning_rate": 9.787760062971861e-05, |
| "loss": 0.0525, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.461169702780441, |
| "grad_norm": 0.46620407700538635, |
| "learning_rate": 9.785922998067963e-05, |
| "loss": 0.0491, |
| "step": 3610 |
| }, |
| { |
| "epoch": 3.470757430488974, |
| "grad_norm": 0.5096569657325745, |
| "learning_rate": 9.784078190837472e-05, |
| "loss": 0.0514, |
| "step": 3620 |
| }, |
| { |
| "epoch": 3.4803451581975073, |
| "grad_norm": 0.2947571873664856, |
| "learning_rate": 9.782225644264784e-05, |
| "loss": 0.0457, |
| "step": 3630 |
| }, |
| { |
| "epoch": 3.48993288590604, |
| "grad_norm": 0.4548271894454956, |
| "learning_rate": 9.780365361346821e-05, |
| "loss": 0.0448, |
| "step": 3640 |
| }, |
| { |
| "epoch": 3.4995206136145733, |
| "grad_norm": 0.5976017713546753, |
| "learning_rate": 9.778497345093013e-05, |
| "loss": 0.0495, |
| "step": 3650 |
| }, |
| { |
| "epoch": 3.5091083413231066, |
| "grad_norm": 0.3194081783294678, |
| "learning_rate": 9.776621598525305e-05, |
| "loss": 0.0473, |
| "step": 3660 |
| }, |
| { |
| "epoch": 3.5186960690316393, |
| "grad_norm": 0.2841929793357849, |
| "learning_rate": 9.774738124678148e-05, |
| "loss": 0.0429, |
| "step": 3670 |
| }, |
| { |
| "epoch": 3.5282837967401726, |
| "grad_norm": 0.2357761263847351, |
| "learning_rate": 9.772846926598491e-05, |
| "loss": 0.0494, |
| "step": 3680 |
| }, |
| { |
| "epoch": 3.537871524448706, |
| "grad_norm": 0.893323540687561, |
| "learning_rate": 9.770948007345779e-05, |
| "loss": 0.0497, |
| "step": 3690 |
| }, |
| { |
| "epoch": 3.547459252157239, |
| "grad_norm": 0.23153287172317505, |
| "learning_rate": 9.769041369991953e-05, |
| "loss": 0.0457, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.557046979865772, |
| "grad_norm": 0.36270731687545776, |
| "learning_rate": 9.767127017621431e-05, |
| "loss": 0.0535, |
| "step": 3710 |
| }, |
| { |
| "epoch": 3.566634707574305, |
| "grad_norm": 0.39080706238746643, |
| "learning_rate": 9.76520495333112e-05, |
| "loss": 0.0462, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.576222435282838, |
| "grad_norm": 0.5226278901100159, |
| "learning_rate": 9.763275180230395e-05, |
| "loss": 0.0486, |
| "step": 3730 |
| }, |
| { |
| "epoch": 3.585810162991371, |
| "grad_norm": 0.2358178198337555, |
| "learning_rate": 9.761337701441111e-05, |
| "loss": 0.0452, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.5953978906999042, |
| "grad_norm": 0.47069254517555237, |
| "learning_rate": 9.759392520097581e-05, |
| "loss": 0.049, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.6049856184084375, |
| "grad_norm": 0.327800452709198, |
| "learning_rate": 9.75743963934658e-05, |
| "loss": 0.0411, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.6145733461169702, |
| "grad_norm": 0.4534970819950104, |
| "learning_rate": 9.755479062347344e-05, |
| "loss": 0.0472, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.6241610738255035, |
| "grad_norm": 0.2962513566017151, |
| "learning_rate": 9.753510792271549e-05, |
| "loss": 0.0523, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.6337488015340362, |
| "grad_norm": 0.46883541345596313, |
| "learning_rate": 9.75153483230333e-05, |
| "loss": 0.0468, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.6433365292425695, |
| "grad_norm": 0.2845245599746704, |
| "learning_rate": 9.749551185639249e-05, |
| "loss": 0.0438, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.6529242569511027, |
| "grad_norm": 0.2763413190841675, |
| "learning_rate": 9.747559855488313e-05, |
| "loss": 0.0472, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.662511984659636, |
| "grad_norm": 0.27591028809547424, |
| "learning_rate": 9.74556084507195e-05, |
| "loss": 0.0457, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.6720997123681687, |
| "grad_norm": 0.36455026268959045, |
| "learning_rate": 9.743554157624023e-05, |
| "loss": 0.0453, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.681687440076702, |
| "grad_norm": 0.4757814407348633, |
| "learning_rate": 9.741539796390804e-05, |
| "loss": 0.0496, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.6912751677852347, |
| "grad_norm": 0.3472752869129181, |
| "learning_rate": 9.739517764630984e-05, |
| "loss": 0.0438, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.700862895493768, |
| "grad_norm": 0.39700034260749817, |
| "learning_rate": 9.737488065615665e-05, |
| "loss": 0.045, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.710450623202301, |
| "grad_norm": 0.2766479551792145, |
| "learning_rate": 9.735450702628348e-05, |
| "loss": 0.0361, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.7200383509108343, |
| "grad_norm": 0.3525460660457611, |
| "learning_rate": 9.733405678964935e-05, |
| "loss": 0.044, |
| "step": 3880 |
| }, |
| { |
| "epoch": 3.729626078619367, |
| "grad_norm": 0.35298100113868713, |
| "learning_rate": 9.731352997933718e-05, |
| "loss": 0.0392, |
| "step": 3890 |
| }, |
| { |
| "epoch": 3.7392138063279003, |
| "grad_norm": 0.32511138916015625, |
| "learning_rate": 9.729292662855383e-05, |
| "loss": 0.0463, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.748801534036433, |
| "grad_norm": 0.33208218216896057, |
| "learning_rate": 9.727224677062992e-05, |
| "loss": 0.0479, |
| "step": 3910 |
| }, |
| { |
| "epoch": 3.7583892617449663, |
| "grad_norm": 0.43648335337638855, |
| "learning_rate": 9.725149043901985e-05, |
| "loss": 0.0459, |
| "step": 3920 |
| }, |
| { |
| "epoch": 3.7679769894534996, |
| "grad_norm": 0.3617904782295227, |
| "learning_rate": 9.723065766730172e-05, |
| "loss": 0.0545, |
| "step": 3930 |
| }, |
| { |
| "epoch": 3.777564717162033, |
| "grad_norm": 0.34762272238731384, |
| "learning_rate": 9.720974848917735e-05, |
| "loss": 0.0433, |
| "step": 3940 |
| }, |
| { |
| "epoch": 3.7871524448705656, |
| "grad_norm": 0.3334721028804779, |
| "learning_rate": 9.71887629384721e-05, |
| "loss": 0.0445, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.796740172579099, |
| "grad_norm": 0.4064335823059082, |
| "learning_rate": 9.716770104913492e-05, |
| "loss": 0.0436, |
| "step": 3960 |
| }, |
| { |
| "epoch": 3.8063279002876316, |
| "grad_norm": 0.4279939532279968, |
| "learning_rate": 9.714656285523821e-05, |
| "loss": 0.0534, |
| "step": 3970 |
| }, |
| { |
| "epoch": 3.815915627996165, |
| "grad_norm": 0.28922349214553833, |
| "learning_rate": 9.71253483909779e-05, |
| "loss": 0.0488, |
| "step": 3980 |
| }, |
| { |
| "epoch": 3.825503355704698, |
| "grad_norm": 0.701637327671051, |
| "learning_rate": 9.710405769067317e-05, |
| "loss": 0.0465, |
| "step": 3990 |
| }, |
| { |
| "epoch": 3.8350910834132312, |
| "grad_norm": 0.3132900595664978, |
| "learning_rate": 9.708269078876666e-05, |
| "loss": 0.046, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.844678811121764, |
| "grad_norm": 0.2793468236923218, |
| "learning_rate": 9.706124771982421e-05, |
| "loss": 0.0382, |
| "step": 4010 |
| }, |
| { |
| "epoch": 3.8542665388302972, |
| "grad_norm": 0.32509496808052063, |
| "learning_rate": 9.703972851853488e-05, |
| "loss": 0.0419, |
| "step": 4020 |
| }, |
| { |
| "epoch": 3.8638542665388305, |
| "grad_norm": 0.5768635869026184, |
| "learning_rate": 9.701813321971091e-05, |
| "loss": 0.0513, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.8734419942473632, |
| "grad_norm": 0.37095797061920166, |
| "learning_rate": 9.699646185828768e-05, |
| "loss": 0.0493, |
| "step": 4040 |
| }, |
| { |
| "epoch": 3.8830297219558965, |
| "grad_norm": 0.4116993844509125, |
| "learning_rate": 9.697471446932353e-05, |
| "loss": 0.0481, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.8926174496644297, |
| "grad_norm": 0.4631316363811493, |
| "learning_rate": 9.695289108799989e-05, |
| "loss": 0.057, |
| "step": 4060 |
| }, |
| { |
| "epoch": 3.9022051773729625, |
| "grad_norm": 0.5926663279533386, |
| "learning_rate": 9.693099174962103e-05, |
| "loss": 0.0541, |
| "step": 4070 |
| }, |
| { |
| "epoch": 3.9117929050814957, |
| "grad_norm": 0.4884685277938843, |
| "learning_rate": 9.690901648961418e-05, |
| "loss": 0.0444, |
| "step": 4080 |
| }, |
| { |
| "epoch": 3.921380632790029, |
| "grad_norm": 0.5205138921737671, |
| "learning_rate": 9.688696534352935e-05, |
| "loss": 0.0469, |
| "step": 4090 |
| }, |
| { |
| "epoch": 3.9309683604985617, |
| "grad_norm": 0.3476182222366333, |
| "learning_rate": 9.68648383470393e-05, |
| "loss": 0.0506, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.940556088207095, |
| "grad_norm": 0.310553640127182, |
| "learning_rate": 9.684263553593953e-05, |
| "loss": 0.0454, |
| "step": 4110 |
| }, |
| { |
| "epoch": 3.950143815915628, |
| "grad_norm": 0.2743299603462219, |
| "learning_rate": 9.682035694614817e-05, |
| "loss": 0.0517, |
| "step": 4120 |
| }, |
| { |
| "epoch": 3.959731543624161, |
| "grad_norm": 0.33413469791412354, |
| "learning_rate": 9.679800261370594e-05, |
| "loss": 0.0428, |
| "step": 4130 |
| }, |
| { |
| "epoch": 3.969319271332694, |
| "grad_norm": 0.4639144837856293, |
| "learning_rate": 9.677557257477609e-05, |
| "loss": 0.0444, |
| "step": 4140 |
| }, |
| { |
| "epoch": 3.9789069990412274, |
| "grad_norm": 0.33329275250434875, |
| "learning_rate": 9.675306686564437e-05, |
| "loss": 0.0472, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.98849472674976, |
| "grad_norm": 0.4461182653903961, |
| "learning_rate": 9.673048552271889e-05, |
| "loss": 0.0375, |
| "step": 4160 |
| }, |
| { |
| "epoch": 3.9980824544582934, |
| "grad_norm": 0.26508504152297974, |
| "learning_rate": 9.670782858253015e-05, |
| "loss": 0.0468, |
| "step": 4170 |
| }, |
| { |
| "epoch": 4.007670182166827, |
| "grad_norm": 0.4112192690372467, |
| "learning_rate": 9.668509608173094e-05, |
| "loss": 0.0419, |
| "step": 4180 |
| }, |
| { |
| "epoch": 4.01725790987536, |
| "grad_norm": 0.3724784255027771, |
| "learning_rate": 9.66622880570963e-05, |
| "loss": 0.0526, |
| "step": 4190 |
| }, |
| { |
| "epoch": 4.026845637583893, |
| "grad_norm": 0.43858179450035095, |
| "learning_rate": 9.663940454552342e-05, |
| "loss": 0.0481, |
| "step": 4200 |
| }, |
| { |
| "epoch": 4.036433365292425, |
| "grad_norm": 0.27318644523620605, |
| "learning_rate": 9.661644558403162e-05, |
| "loss": 0.0372, |
| "step": 4210 |
| }, |
| { |
| "epoch": 4.046021093000959, |
| "grad_norm": 0.36369022727012634, |
| "learning_rate": 9.659341120976229e-05, |
| "loss": 0.0421, |
| "step": 4220 |
| }, |
| { |
| "epoch": 4.055608820709492, |
| "grad_norm": 0.3167804479598999, |
| "learning_rate": 9.657030145997878e-05, |
| "loss": 0.0437, |
| "step": 4230 |
| }, |
| { |
| "epoch": 4.065196548418025, |
| "grad_norm": 0.37195485830307007, |
| "learning_rate": 9.654711637206644e-05, |
| "loss": 0.0391, |
| "step": 4240 |
| }, |
| { |
| "epoch": 4.074784276126558, |
| "grad_norm": 0.26798343658447266, |
| "learning_rate": 9.652385598353244e-05, |
| "loss": 0.0424, |
| "step": 4250 |
| }, |
| { |
| "epoch": 4.0843720038350915, |
| "grad_norm": 0.38160890340805054, |
| "learning_rate": 9.650052033200578e-05, |
| "loss": 0.0473, |
| "step": 4260 |
| }, |
| { |
| "epoch": 4.093959731543624, |
| "grad_norm": 0.3133178651332855, |
| "learning_rate": 9.647710945523725e-05, |
| "loss": 0.0446, |
| "step": 4270 |
| }, |
| { |
| "epoch": 4.103547459252157, |
| "grad_norm": 0.314330130815506, |
| "learning_rate": 9.645362339109927e-05, |
| "loss": 0.0402, |
| "step": 4280 |
| }, |
| { |
| "epoch": 4.11313518696069, |
| "grad_norm": 0.6541547775268555, |
| "learning_rate": 9.643006217758594e-05, |
| "loss": 0.0417, |
| "step": 4290 |
| }, |
| { |
| "epoch": 4.1227229146692235, |
| "grad_norm": 0.3850661814212799, |
| "learning_rate": 9.640642585281292e-05, |
| "loss": 0.0483, |
| "step": 4300 |
| }, |
| { |
| "epoch": 4.132310642377757, |
| "grad_norm": 0.4512180984020233, |
| "learning_rate": 9.638271445501739e-05, |
| "loss": 0.0382, |
| "step": 4310 |
| }, |
| { |
| "epoch": 4.14189837008629, |
| "grad_norm": 0.277205228805542, |
| "learning_rate": 9.635892802255794e-05, |
| "loss": 0.0456, |
| "step": 4320 |
| }, |
| { |
| "epoch": 4.151486097794822, |
| "grad_norm": 0.39535996317863464, |
| "learning_rate": 9.63350665939146e-05, |
| "loss": 0.0402, |
| "step": 4330 |
| }, |
| { |
| "epoch": 4.1610738255033555, |
| "grad_norm": 0.3694916069507599, |
| "learning_rate": 9.63111302076887e-05, |
| "loss": 0.0414, |
| "step": 4340 |
| }, |
| { |
| "epoch": 4.170661553211889, |
| "grad_norm": 0.4235345423221588, |
| "learning_rate": 9.628711890260279e-05, |
| "loss": 0.0475, |
| "step": 4350 |
| }, |
| { |
| "epoch": 4.180249280920422, |
| "grad_norm": 0.46871331334114075, |
| "learning_rate": 9.626303271750069e-05, |
| "loss": 0.044, |
| "step": 4360 |
| }, |
| { |
| "epoch": 4.189837008628955, |
| "grad_norm": 0.33372747898101807, |
| "learning_rate": 9.623887169134731e-05, |
| "loss": 0.0479, |
| "step": 4370 |
| }, |
| { |
| "epoch": 4.199424736337488, |
| "grad_norm": 0.29731622338294983, |
| "learning_rate": 9.621463586322863e-05, |
| "loss": 0.0477, |
| "step": 4380 |
| }, |
| { |
| "epoch": 4.209012464046021, |
| "grad_norm": 0.3897842466831207, |
| "learning_rate": 9.619032527235168e-05, |
| "loss": 0.0449, |
| "step": 4390 |
| }, |
| { |
| "epoch": 4.218600191754554, |
| "grad_norm": 0.375775009393692, |
| "learning_rate": 9.616593995804437e-05, |
| "loss": 0.0527, |
| "step": 4400 |
| }, |
| { |
| "epoch": 4.228187919463087, |
| "grad_norm": 0.44136837124824524, |
| "learning_rate": 9.614147995975557e-05, |
| "loss": 0.0465, |
| "step": 4410 |
| }, |
| { |
| "epoch": 4.23777564717162, |
| "grad_norm": 0.49286016821861267, |
| "learning_rate": 9.611694531705493e-05, |
| "loss": 0.0478, |
| "step": 4420 |
| }, |
| { |
| "epoch": 4.247363374880154, |
| "grad_norm": 0.36331725120544434, |
| "learning_rate": 9.609233606963282e-05, |
| "loss": 0.0453, |
| "step": 4430 |
| }, |
| { |
| "epoch": 4.256951102588687, |
| "grad_norm": 0.4064538776874542, |
| "learning_rate": 9.606765225730035e-05, |
| "loss": 0.0512, |
| "step": 4440 |
| }, |
| { |
| "epoch": 4.26653883029722, |
| "grad_norm": 0.3883167505264282, |
| "learning_rate": 9.604289391998925e-05, |
| "loss": 0.0416, |
| "step": 4450 |
| }, |
| { |
| "epoch": 4.276126558005752, |
| "grad_norm": 0.364762544631958, |
| "learning_rate": 9.601806109775179e-05, |
| "loss": 0.0483, |
| "step": 4460 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.3354509472846985, |
| "learning_rate": 9.599315383076075e-05, |
| "loss": 0.0475, |
| "step": 4470 |
| }, |
| { |
| "epoch": 4.295302013422819, |
| "grad_norm": 0.3480011224746704, |
| "learning_rate": 9.596817215930934e-05, |
| "loss": 0.0441, |
| "step": 4480 |
| }, |
| { |
| "epoch": 4.304889741131352, |
| "grad_norm": 0.37383604049682617, |
| "learning_rate": 9.594311612381114e-05, |
| "loss": 0.0566, |
| "step": 4490 |
| }, |
| { |
| "epoch": 4.314477468839885, |
| "grad_norm": 0.5128716826438904, |
| "learning_rate": 9.591798576480001e-05, |
| "loss": 0.0452, |
| "step": 4500 |
| }, |
| { |
| "epoch": 4.324065196548418, |
| "grad_norm": 0.2675018310546875, |
| "learning_rate": 9.589278112293007e-05, |
| "loss": 0.0453, |
| "step": 4510 |
| }, |
| { |
| "epoch": 4.333652924256951, |
| "grad_norm": 0.39544346928596497, |
| "learning_rate": 9.586750223897562e-05, |
| "loss": 0.0479, |
| "step": 4520 |
| }, |
| { |
| "epoch": 4.343240651965484, |
| "grad_norm": 0.7438755631446838, |
| "learning_rate": 9.584214915383103e-05, |
| "loss": 0.0432, |
| "step": 4530 |
| }, |
| { |
| "epoch": 4.352828379674017, |
| "grad_norm": 0.29193535447120667, |
| "learning_rate": 9.58167219085107e-05, |
| "loss": 0.0467, |
| "step": 4540 |
| }, |
| { |
| "epoch": 4.3624161073825505, |
| "grad_norm": 0.34703853726387024, |
| "learning_rate": 9.579122054414907e-05, |
| "loss": 0.0435, |
| "step": 4550 |
| }, |
| { |
| "epoch": 4.372003835091084, |
| "grad_norm": 0.408741295337677, |
| "learning_rate": 9.576564510200038e-05, |
| "loss": 0.0433, |
| "step": 4560 |
| }, |
| { |
| "epoch": 4.381591562799617, |
| "grad_norm": 0.4278319478034973, |
| "learning_rate": 9.573999562343882e-05, |
| "loss": 0.047, |
| "step": 4570 |
| }, |
| { |
| "epoch": 4.391179290508149, |
| "grad_norm": 0.2754301428794861, |
| "learning_rate": 9.571427214995826e-05, |
| "loss": 0.0423, |
| "step": 4580 |
| }, |
| { |
| "epoch": 4.4007670182166825, |
| "grad_norm": 0.35973575711250305, |
| "learning_rate": 9.568847472317232e-05, |
| "loss": 0.0404, |
| "step": 4590 |
| }, |
| { |
| "epoch": 4.410354745925216, |
| "grad_norm": 0.31195884943008423, |
| "learning_rate": 9.566260338481425e-05, |
| "loss": 0.0476, |
| "step": 4600 |
| }, |
| { |
| "epoch": 4.419942473633749, |
| "grad_norm": 0.32485419511795044, |
| "learning_rate": 9.563665817673688e-05, |
| "loss": 0.0434, |
| "step": 4610 |
| }, |
| { |
| "epoch": 4.429530201342282, |
| "grad_norm": 0.6262250542640686, |
| "learning_rate": 9.56106391409125e-05, |
| "loss": 0.0523, |
| "step": 4620 |
| }, |
| { |
| "epoch": 4.439117929050815, |
| "grad_norm": 0.41280031204223633, |
| "learning_rate": 9.558454631943286e-05, |
| "loss": 0.0446, |
| "step": 4630 |
| }, |
| { |
| "epoch": 4.448705656759348, |
| "grad_norm": 0.32726627588272095, |
| "learning_rate": 9.55583797545091e-05, |
| "loss": 0.0424, |
| "step": 4640 |
| }, |
| { |
| "epoch": 4.458293384467881, |
| "grad_norm": 0.49836626648902893, |
| "learning_rate": 9.55321394884716e-05, |
| "loss": 0.0449, |
| "step": 4650 |
| }, |
| { |
| "epoch": 4.467881112176414, |
| "grad_norm": 0.24192816019058228, |
| "learning_rate": 9.550582556377003e-05, |
| "loss": 0.0533, |
| "step": 4660 |
| }, |
| { |
| "epoch": 4.477468839884947, |
| "grad_norm": 0.455990731716156, |
| "learning_rate": 9.547943802297317e-05, |
| "loss": 0.0473, |
| "step": 4670 |
| }, |
| { |
| "epoch": 4.487056567593481, |
| "grad_norm": 0.37101179361343384, |
| "learning_rate": 9.545297690876893e-05, |
| "loss": 0.0362, |
| "step": 4680 |
| }, |
| { |
| "epoch": 4.496644295302014, |
| "grad_norm": 0.5495269894599915, |
| "learning_rate": 9.54264422639642e-05, |
| "loss": 0.0498, |
| "step": 4690 |
| }, |
| { |
| "epoch": 4.506232023010546, |
| "grad_norm": 0.2883033752441406, |
| "learning_rate": 9.539983413148486e-05, |
| "loss": 0.0487, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.515819750719079, |
| "grad_norm": 0.36912739276885986, |
| "learning_rate": 9.537315255437565e-05, |
| "loss": 0.0388, |
| "step": 4710 |
| }, |
| { |
| "epoch": 4.525407478427613, |
| "grad_norm": 0.4408855438232422, |
| "learning_rate": 9.534639757580013e-05, |
| "loss": 0.0439, |
| "step": 4720 |
| }, |
| { |
| "epoch": 4.534995206136146, |
| "grad_norm": 0.4027664363384247, |
| "learning_rate": 9.531956923904062e-05, |
| "loss": 0.0443, |
| "step": 4730 |
| }, |
| { |
| "epoch": 4.544582933844679, |
| "grad_norm": 0.41703498363494873, |
| "learning_rate": 9.52926675874981e-05, |
| "loss": 0.0405, |
| "step": 4740 |
| }, |
| { |
| "epoch": 4.554170661553212, |
| "grad_norm": 0.5367491245269775, |
| "learning_rate": 9.526569266469213e-05, |
| "loss": 0.0518, |
| "step": 4750 |
| }, |
| { |
| "epoch": 4.563758389261745, |
| "grad_norm": 0.5591031312942505, |
| "learning_rate": 9.523864451426086e-05, |
| "loss": 0.0436, |
| "step": 4760 |
| }, |
| { |
| "epoch": 4.573346116970278, |
| "grad_norm": 0.6005666851997375, |
| "learning_rate": 9.521152317996083e-05, |
| "loss": 0.0408, |
| "step": 4770 |
| }, |
| { |
| "epoch": 4.582933844678811, |
| "grad_norm": 0.36557164788246155, |
| "learning_rate": 9.518432870566703e-05, |
| "loss": 0.0441, |
| "step": 4780 |
| }, |
| { |
| "epoch": 4.592521572387344, |
| "grad_norm": 0.3382973372936249, |
| "learning_rate": 9.515706113537275e-05, |
| "loss": 0.0448, |
| "step": 4790 |
| }, |
| { |
| "epoch": 4.6021093000958775, |
| "grad_norm": 0.36412662267684937, |
| "learning_rate": 9.512972051318952e-05, |
| "loss": 0.0447, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.611697027804411, |
| "grad_norm": 0.28740257024765015, |
| "learning_rate": 9.510230688334709e-05, |
| "loss": 0.0489, |
| "step": 4810 |
| }, |
| { |
| "epoch": 4.621284755512944, |
| "grad_norm": 0.37100985646247864, |
| "learning_rate": 9.507482029019324e-05, |
| "loss": 0.0417, |
| "step": 4820 |
| }, |
| { |
| "epoch": 4.630872483221476, |
| "grad_norm": 0.4594654142856598, |
| "learning_rate": 9.504726077819387e-05, |
| "loss": 0.0426, |
| "step": 4830 |
| }, |
| { |
| "epoch": 4.6404602109300095, |
| "grad_norm": 0.37358155846595764, |
| "learning_rate": 9.501962839193277e-05, |
| "loss": 0.0491, |
| "step": 4840 |
| }, |
| { |
| "epoch": 4.650047938638543, |
| "grad_norm": 0.31801337003707886, |
| "learning_rate": 9.499192317611167e-05, |
| "loss": 0.0444, |
| "step": 4850 |
| }, |
| { |
| "epoch": 4.659635666347076, |
| "grad_norm": 0.4786074161529541, |
| "learning_rate": 9.496414517555012e-05, |
| "loss": 0.0426, |
| "step": 4860 |
| }, |
| { |
| "epoch": 4.669223394055609, |
| "grad_norm": 0.23249605298042297, |
| "learning_rate": 9.493629443518537e-05, |
| "loss": 0.0495, |
| "step": 4870 |
| }, |
| { |
| "epoch": 4.6788111217641415, |
| "grad_norm": 0.4823112487792969, |
| "learning_rate": 9.490837100007237e-05, |
| "loss": 0.0505, |
| "step": 4880 |
| }, |
| { |
| "epoch": 4.688398849472675, |
| "grad_norm": 0.38305050134658813, |
| "learning_rate": 9.488037491538369e-05, |
| "loss": 0.0441, |
| "step": 4890 |
| }, |
| { |
| "epoch": 4.697986577181208, |
| "grad_norm": 0.4120224714279175, |
| "learning_rate": 9.485230622640939e-05, |
| "loss": 0.0464, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.707574304889741, |
| "grad_norm": 0.26718661189079285, |
| "learning_rate": 9.482416497855705e-05, |
| "loss": 0.0442, |
| "step": 4910 |
| }, |
| { |
| "epoch": 4.717162032598274, |
| "grad_norm": 0.35078132152557373, |
| "learning_rate": 9.47959512173515e-05, |
| "loss": 0.0402, |
| "step": 4920 |
| }, |
| { |
| "epoch": 4.726749760306808, |
| "grad_norm": 0.2796134948730469, |
| "learning_rate": 9.476766498843504e-05, |
| "loss": 0.0444, |
| "step": 4930 |
| }, |
| { |
| "epoch": 4.736337488015341, |
| "grad_norm": 0.37330299615859985, |
| "learning_rate": 9.473930633756706e-05, |
| "loss": 0.0433, |
| "step": 4940 |
| }, |
| { |
| "epoch": 4.745925215723873, |
| "grad_norm": 0.47067347168922424, |
| "learning_rate": 9.471087531062424e-05, |
| "loss": 0.0479, |
| "step": 4950 |
| }, |
| { |
| "epoch": 4.755512943432406, |
| "grad_norm": 0.3017641603946686, |
| "learning_rate": 9.468237195360023e-05, |
| "loss": 0.0427, |
| "step": 4960 |
| }, |
| { |
| "epoch": 4.76510067114094, |
| "grad_norm": 0.41320186853408813, |
| "learning_rate": 9.465379631260574e-05, |
| "loss": 0.052, |
| "step": 4970 |
| }, |
| { |
| "epoch": 4.774688398849473, |
| "grad_norm": 0.4640481173992157, |
| "learning_rate": 9.462514843386845e-05, |
| "loss": 0.0463, |
| "step": 4980 |
| }, |
| { |
| "epoch": 4.784276126558006, |
| "grad_norm": 0.2581227123737335, |
| "learning_rate": 9.459642836373282e-05, |
| "loss": 0.0371, |
| "step": 4990 |
| }, |
| { |
| "epoch": 4.793863854266538, |
| "grad_norm": 0.3752846121788025, |
| "learning_rate": 9.456763614866016e-05, |
| "loss": 0.0437, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.803451581975072, |
| "grad_norm": 0.27923133969306946, |
| "learning_rate": 9.453877183522848e-05, |
| "loss": 0.0442, |
| "step": 5010 |
| }, |
| { |
| "epoch": 4.813039309683605, |
| "grad_norm": 0.31683099269866943, |
| "learning_rate": 9.450983547013242e-05, |
| "loss": 0.0396, |
| "step": 5020 |
| }, |
| { |
| "epoch": 4.822627037392138, |
| "grad_norm": 0.5572241544723511, |
| "learning_rate": 9.448082710018317e-05, |
| "loss": 0.0464, |
| "step": 5030 |
| }, |
| { |
| "epoch": 4.832214765100671, |
| "grad_norm": 0.4878758490085602, |
| "learning_rate": 9.44517467723084e-05, |
| "loss": 0.0462, |
| "step": 5040 |
| }, |
| { |
| "epoch": 4.8418024928092045, |
| "grad_norm": 0.2646120488643646, |
| "learning_rate": 9.442259453355222e-05, |
| "loss": 0.0434, |
| "step": 5050 |
| }, |
| { |
| "epoch": 4.851390220517738, |
| "grad_norm": 0.23440934717655182, |
| "learning_rate": 9.439337043107506e-05, |
| "loss": 0.0454, |
| "step": 5060 |
| }, |
| { |
| "epoch": 4.86097794822627, |
| "grad_norm": 0.2339864820241928, |
| "learning_rate": 9.436407451215356e-05, |
| "loss": 0.0388, |
| "step": 5070 |
| }, |
| { |
| "epoch": 4.870565675934803, |
| "grad_norm": 0.3039968013763428, |
| "learning_rate": 9.433470682418061e-05, |
| "loss": 0.0466, |
| "step": 5080 |
| }, |
| { |
| "epoch": 4.8801534036433365, |
| "grad_norm": 0.29253584146499634, |
| "learning_rate": 9.430526741466519e-05, |
| "loss": 0.0421, |
| "step": 5090 |
| }, |
| { |
| "epoch": 4.88974113135187, |
| "grad_norm": 0.1914910078048706, |
| "learning_rate": 9.427575633123224e-05, |
| "loss": 0.0476, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.899328859060403, |
| "grad_norm": 0.2769542932510376, |
| "learning_rate": 9.424617362162271e-05, |
| "loss": 0.0498, |
| "step": 5110 |
| }, |
| { |
| "epoch": 4.908916586768935, |
| "grad_norm": 0.4235975444316864, |
| "learning_rate": 9.421651933369345e-05, |
| "loss": 0.0479, |
| "step": 5120 |
| }, |
| { |
| "epoch": 4.9185043144774685, |
| "grad_norm": 0.2977217733860016, |
| "learning_rate": 9.4186793515417e-05, |
| "loss": 0.0367, |
| "step": 5130 |
| }, |
| { |
| "epoch": 4.928092042186002, |
| "grad_norm": 0.4000433087348938, |
| "learning_rate": 9.415699621488172e-05, |
| "loss": 0.0452, |
| "step": 5140 |
| }, |
| { |
| "epoch": 4.937679769894535, |
| "grad_norm": 0.3901826739311218, |
| "learning_rate": 9.412712748029157e-05, |
| "loss": 0.0431, |
| "step": 5150 |
| }, |
| { |
| "epoch": 4.947267497603068, |
| "grad_norm": 0.45422032475471497, |
| "learning_rate": 9.409718735996605e-05, |
| "loss": 0.0419, |
| "step": 5160 |
| }, |
| { |
| "epoch": 4.956855225311601, |
| "grad_norm": 0.29559481143951416, |
| "learning_rate": 9.406717590234016e-05, |
| "loss": 0.0404, |
| "step": 5170 |
| }, |
| { |
| "epoch": 4.966442953020135, |
| "grad_norm": 0.39736929535865784, |
| "learning_rate": 9.403709315596431e-05, |
| "loss": 0.0409, |
| "step": 5180 |
| }, |
| { |
| "epoch": 4.976030680728667, |
| "grad_norm": 0.37043488025665283, |
| "learning_rate": 9.400693916950427e-05, |
| "loss": 0.0494, |
| "step": 5190 |
| }, |
| { |
| "epoch": 4.9856184084372, |
| "grad_norm": 0.35436293482780457, |
| "learning_rate": 9.397671399174096e-05, |
| "loss": 0.0505, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.995206136145733, |
| "grad_norm": 0.24993938207626343, |
| "learning_rate": 9.394641767157056e-05, |
| "loss": 0.0491, |
| "step": 5210 |
| }, |
| { |
| "epoch": 5.004793863854267, |
| "grad_norm": 0.3652108609676361, |
| "learning_rate": 9.391605025800431e-05, |
| "loss": 0.0474, |
| "step": 5220 |
| }, |
| { |
| "epoch": 5.0143815915628, |
| "grad_norm": 0.3362497389316559, |
| "learning_rate": 9.388561180016844e-05, |
| "loss": 0.0481, |
| "step": 5230 |
| }, |
| { |
| "epoch": 5.023969319271333, |
| "grad_norm": 0.25596174597740173, |
| "learning_rate": 9.385510234730415e-05, |
| "loss": 0.0475, |
| "step": 5240 |
| }, |
| { |
| "epoch": 5.033557046979865, |
| "grad_norm": 0.4541703760623932, |
| "learning_rate": 9.382452194876743e-05, |
| "loss": 0.0448, |
| "step": 5250 |
| }, |
| { |
| "epoch": 5.043144774688399, |
| "grad_norm": 0.5844725966453552, |
| "learning_rate": 9.379387065402911e-05, |
| "loss": 0.0531, |
| "step": 5260 |
| }, |
| { |
| "epoch": 5.052732502396932, |
| "grad_norm": 0.5136455297470093, |
| "learning_rate": 9.376314851267468e-05, |
| "loss": 0.0478, |
| "step": 5270 |
| }, |
| { |
| "epoch": 5.062320230105465, |
| "grad_norm": 0.36073240637779236, |
| "learning_rate": 9.373235557440423e-05, |
| "loss": 0.0413, |
| "step": 5280 |
| }, |
| { |
| "epoch": 5.071907957813998, |
| "grad_norm": 0.3564154803752899, |
| "learning_rate": 9.370149188903238e-05, |
| "loss": 0.0474, |
| "step": 5290 |
| }, |
| { |
| "epoch": 5.0814956855225315, |
| "grad_norm": 0.269563764333725, |
| "learning_rate": 9.367055750648823e-05, |
| "loss": 0.05, |
| "step": 5300 |
| }, |
| { |
| "epoch": 5.091083413231064, |
| "grad_norm": 0.32311663031578064, |
| "learning_rate": 9.363955247681522e-05, |
| "loss": 0.0443, |
| "step": 5310 |
| }, |
| { |
| "epoch": 5.100671140939597, |
| "grad_norm": 0.2627108097076416, |
| "learning_rate": 9.360847685017109e-05, |
| "loss": 0.0442, |
| "step": 5320 |
| }, |
| { |
| "epoch": 5.11025886864813, |
| "grad_norm": 0.34790635108947754, |
| "learning_rate": 9.357733067682777e-05, |
| "loss": 0.0441, |
| "step": 5330 |
| }, |
| { |
| "epoch": 5.1198465963566635, |
| "grad_norm": 0.22408638894557953, |
| "learning_rate": 9.354611400717135e-05, |
| "loss": 0.0415, |
| "step": 5340 |
| }, |
| { |
| "epoch": 5.129434324065197, |
| "grad_norm": 0.3347373306751251, |
| "learning_rate": 9.351482689170193e-05, |
| "loss": 0.0427, |
| "step": 5350 |
| }, |
| { |
| "epoch": 5.13902205177373, |
| "grad_norm": 0.30321311950683594, |
| "learning_rate": 9.348346938103359e-05, |
| "loss": 0.0434, |
| "step": 5360 |
| }, |
| { |
| "epoch": 5.148609779482262, |
| "grad_norm": 0.2402300387620926, |
| "learning_rate": 9.345204152589428e-05, |
| "loss": 0.0475, |
| "step": 5370 |
| }, |
| { |
| "epoch": 5.1581975071907955, |
| "grad_norm": 0.5249261856079102, |
| "learning_rate": 9.342054337712576e-05, |
| "loss": 0.0486, |
| "step": 5380 |
| }, |
| { |
| "epoch": 5.167785234899329, |
| "grad_norm": 0.3607705533504486, |
| "learning_rate": 9.338897498568349e-05, |
| "loss": 0.0417, |
| "step": 5390 |
| }, |
| { |
| "epoch": 5.177372962607862, |
| "grad_norm": 0.38747304677963257, |
| "learning_rate": 9.33573364026366e-05, |
| "loss": 0.0477, |
| "step": 5400 |
| }, |
| { |
| "epoch": 5.186960690316395, |
| "grad_norm": 0.36637309193611145, |
| "learning_rate": 9.332562767916771e-05, |
| "loss": 0.044, |
| "step": 5410 |
| }, |
| { |
| "epoch": 5.196548418024928, |
| "grad_norm": 0.31087052822113037, |
| "learning_rate": 9.329384886657296e-05, |
| "loss": 0.0373, |
| "step": 5420 |
| }, |
| { |
| "epoch": 5.206136145733462, |
| "grad_norm": 0.3998284935951233, |
| "learning_rate": 9.326200001626184e-05, |
| "loss": 0.036, |
| "step": 5430 |
| }, |
| { |
| "epoch": 5.215723873441994, |
| "grad_norm": 0.3035097122192383, |
| "learning_rate": 9.323008117975718e-05, |
| "loss": 0.0429, |
| "step": 5440 |
| }, |
| { |
| "epoch": 5.225311601150527, |
| "grad_norm": 0.3162848949432373, |
| "learning_rate": 9.319809240869502e-05, |
| "loss": 0.0524, |
| "step": 5450 |
| }, |
| { |
| "epoch": 5.23489932885906, |
| "grad_norm": 0.3142375946044922, |
| "learning_rate": 9.316603375482449e-05, |
| "loss": 0.0479, |
| "step": 5460 |
| }, |
| { |
| "epoch": 5.244487056567594, |
| "grad_norm": 0.3951794505119324, |
| "learning_rate": 9.313390527000783e-05, |
| "loss": 0.044, |
| "step": 5470 |
| }, |
| { |
| "epoch": 5.254074784276127, |
| "grad_norm": 0.26764142513275146, |
| "learning_rate": 9.310170700622021e-05, |
| "loss": 0.0409, |
| "step": 5480 |
| }, |
| { |
| "epoch": 5.263662511984659, |
| "grad_norm": 0.3293421268463135, |
| "learning_rate": 9.306943901554972e-05, |
| "loss": 0.0413, |
| "step": 5490 |
| }, |
| { |
| "epoch": 5.273250239693192, |
| "grad_norm": 0.39588311314582825, |
| "learning_rate": 9.30371013501972e-05, |
| "loss": 0.0475, |
| "step": 5500 |
| }, |
| { |
| "epoch": 5.282837967401726, |
| "grad_norm": 0.330180287361145, |
| "learning_rate": 9.300469406247621e-05, |
| "loss": 0.0409, |
| "step": 5510 |
| }, |
| { |
| "epoch": 5.292425695110259, |
| "grad_norm": 0.37915417551994324, |
| "learning_rate": 9.297221720481302e-05, |
| "loss": 0.0413, |
| "step": 5520 |
| }, |
| { |
| "epoch": 5.302013422818792, |
| "grad_norm": 0.3579411208629608, |
| "learning_rate": 9.293967082974632e-05, |
| "loss": 0.0476, |
| "step": 5530 |
| }, |
| { |
| "epoch": 5.311601150527325, |
| "grad_norm": 0.30744969844818115, |
| "learning_rate": 9.29070549899274e-05, |
| "loss": 0.0454, |
| "step": 5540 |
| }, |
| { |
| "epoch": 5.3211888782358585, |
| "grad_norm": 0.31515830755233765, |
| "learning_rate": 9.287436973811978e-05, |
| "loss": 0.0343, |
| "step": 5550 |
| }, |
| { |
| "epoch": 5.330776605944391, |
| "grad_norm": 0.26603803038597107, |
| "learning_rate": 9.284161512719938e-05, |
| "loss": 0.0402, |
| "step": 5560 |
| }, |
| { |
| "epoch": 5.340364333652924, |
| "grad_norm": 0.39798933267593384, |
| "learning_rate": 9.280879121015428e-05, |
| "loss": 0.0339, |
| "step": 5570 |
| }, |
| { |
| "epoch": 5.349952061361457, |
| "grad_norm": 0.35744068026542664, |
| "learning_rate": 9.277589804008467e-05, |
| "loss": 0.0442, |
| "step": 5580 |
| }, |
| { |
| "epoch": 5.3595397890699905, |
| "grad_norm": 0.426455557346344, |
| "learning_rate": 9.27429356702028e-05, |
| "loss": 0.0515, |
| "step": 5590 |
| }, |
| { |
| "epoch": 5.369127516778524, |
| "grad_norm": 0.5735211372375488, |
| "learning_rate": 9.270990415383285e-05, |
| "loss": 0.0447, |
| "step": 5600 |
| }, |
| { |
| "epoch": 5.378715244487057, |
| "grad_norm": 0.40888845920562744, |
| "learning_rate": 9.267680354441087e-05, |
| "loss": 0.0523, |
| "step": 5610 |
| }, |
| { |
| "epoch": 5.388302972195589, |
| "grad_norm": 0.5509734153747559, |
| "learning_rate": 9.264363389548465e-05, |
| "loss": 0.0475, |
| "step": 5620 |
| }, |
| { |
| "epoch": 5.3978906999041225, |
| "grad_norm": 0.4060477912425995, |
| "learning_rate": 9.261039526071374e-05, |
| "loss": 0.0456, |
| "step": 5630 |
| }, |
| { |
| "epoch": 5.407478427612656, |
| "grad_norm": 0.3927951157093048, |
| "learning_rate": 9.257708769386919e-05, |
| "loss": 0.0588, |
| "step": 5640 |
| }, |
| { |
| "epoch": 5.417066155321189, |
| "grad_norm": 0.2928200662136078, |
| "learning_rate": 9.254371124883366e-05, |
| "loss": 0.0412, |
| "step": 5650 |
| }, |
| { |
| "epoch": 5.426653883029722, |
| "grad_norm": 0.37971609830856323, |
| "learning_rate": 9.251026597960117e-05, |
| "loss": 0.0473, |
| "step": 5660 |
| }, |
| { |
| "epoch": 5.436241610738255, |
| "grad_norm": 0.3287939131259918, |
| "learning_rate": 9.247675194027712e-05, |
| "loss": 0.055, |
| "step": 5670 |
| }, |
| { |
| "epoch": 5.445829338446788, |
| "grad_norm": 0.2745339870452881, |
| "learning_rate": 9.244316918507813e-05, |
| "loss": 0.044, |
| "step": 5680 |
| }, |
| { |
| "epoch": 5.455417066155321, |
| "grad_norm": 0.2364960014820099, |
| "learning_rate": 9.240951776833202e-05, |
| "loss": 0.045, |
| "step": 5690 |
| }, |
| { |
| "epoch": 5.465004793863854, |
| "grad_norm": 0.5563991069793701, |
| "learning_rate": 9.237579774447765e-05, |
| "loss": 0.042, |
| "step": 5700 |
| }, |
| { |
| "epoch": 5.474592521572387, |
| "grad_norm": 0.3112446367740631, |
| "learning_rate": 9.234200916806486e-05, |
| "loss": 0.0488, |
| "step": 5710 |
| }, |
| { |
| "epoch": 5.484180249280921, |
| "grad_norm": 0.32364800572395325, |
| "learning_rate": 9.230815209375446e-05, |
| "loss": 0.039, |
| "step": 5720 |
| }, |
| { |
| "epoch": 5.493767976989454, |
| "grad_norm": 0.35172006487846375, |
| "learning_rate": 9.227422657631796e-05, |
| "loss": 0.0443, |
| "step": 5730 |
| }, |
| { |
| "epoch": 5.503355704697986, |
| "grad_norm": 0.3294823169708252, |
| "learning_rate": 9.22402326706377e-05, |
| "loss": 0.0466, |
| "step": 5740 |
| }, |
| { |
| "epoch": 5.512943432406519, |
| "grad_norm": 0.32464146614074707, |
| "learning_rate": 9.220617043170661e-05, |
| "loss": 0.0456, |
| "step": 5750 |
| }, |
| { |
| "epoch": 5.522531160115053, |
| "grad_norm": 0.36492130160331726, |
| "learning_rate": 9.217203991462815e-05, |
| "loss": 0.041, |
| "step": 5760 |
| }, |
| { |
| "epoch": 5.532118887823586, |
| "grad_norm": 0.35331547260284424, |
| "learning_rate": 9.213784117461624e-05, |
| "loss": 0.0377, |
| "step": 5770 |
| }, |
| { |
| "epoch": 5.541706615532119, |
| "grad_norm": 0.2622346580028534, |
| "learning_rate": 9.210357426699519e-05, |
| "loss": 0.0422, |
| "step": 5780 |
| }, |
| { |
| "epoch": 5.551294343240652, |
| "grad_norm": 0.4553088843822479, |
| "learning_rate": 9.206923924719955e-05, |
| "loss": 0.0467, |
| "step": 5790 |
| }, |
| { |
| "epoch": 5.5608820709491855, |
| "grad_norm": 0.38138529658317566, |
| "learning_rate": 9.203483617077411e-05, |
| "loss": 0.0397, |
| "step": 5800 |
| }, |
| { |
| "epoch": 5.570469798657718, |
| "grad_norm": 0.4665132761001587, |
| "learning_rate": 9.200036509337369e-05, |
| "loss": 0.0518, |
| "step": 5810 |
| }, |
| { |
| "epoch": 5.580057526366251, |
| "grad_norm": 0.27688702940940857, |
| "learning_rate": 9.196582607076319e-05, |
| "loss": 0.0469, |
| "step": 5820 |
| }, |
| { |
| "epoch": 5.589645254074784, |
| "grad_norm": 0.3505072295665741, |
| "learning_rate": 9.193121915881737e-05, |
| "loss": 0.0526, |
| "step": 5830 |
| }, |
| { |
| "epoch": 5.5992329817833175, |
| "grad_norm": 0.23712855577468872, |
| "learning_rate": 9.189654441352082e-05, |
| "loss": 0.0431, |
| "step": 5840 |
| }, |
| { |
| "epoch": 5.608820709491851, |
| "grad_norm": 0.41854333877563477, |
| "learning_rate": 9.186180189096791e-05, |
| "loss": 0.0505, |
| "step": 5850 |
| }, |
| { |
| "epoch": 5.618408437200383, |
| "grad_norm": 0.2426682859659195, |
| "learning_rate": 9.182699164736264e-05, |
| "loss": 0.0444, |
| "step": 5860 |
| }, |
| { |
| "epoch": 5.627996164908916, |
| "grad_norm": 0.6301522850990295, |
| "learning_rate": 9.17921137390185e-05, |
| "loss": 0.0495, |
| "step": 5870 |
| }, |
| { |
| "epoch": 5.6375838926174495, |
| "grad_norm": 0.3933928608894348, |
| "learning_rate": 9.175716822235854e-05, |
| "loss": 0.0524, |
| "step": 5880 |
| }, |
| { |
| "epoch": 5.647171620325983, |
| "grad_norm": 0.4735229015350342, |
| "learning_rate": 9.17221551539151e-05, |
| "loss": 0.0416, |
| "step": 5890 |
| }, |
| { |
| "epoch": 5.656759348034516, |
| "grad_norm": 0.23624800145626068, |
| "learning_rate": 9.168707459032988e-05, |
| "loss": 0.0436, |
| "step": 5900 |
| }, |
| { |
| "epoch": 5.666347075743049, |
| "grad_norm": 0.35046079754829407, |
| "learning_rate": 9.165192658835369e-05, |
| "loss": 0.043, |
| "step": 5910 |
| }, |
| { |
| "epoch": 5.675934803451582, |
| "grad_norm": 0.43765562772750854, |
| "learning_rate": 9.161671120484649e-05, |
| "loss": 0.0449, |
| "step": 5920 |
| }, |
| { |
| "epoch": 5.685522531160115, |
| "grad_norm": 0.2839658856391907, |
| "learning_rate": 9.158142849677723e-05, |
| "loss": 0.0429, |
| "step": 5930 |
| }, |
| { |
| "epoch": 5.695110258868648, |
| "grad_norm": 0.34485873579978943, |
| "learning_rate": 9.154607852122376e-05, |
| "loss": 0.0444, |
| "step": 5940 |
| }, |
| { |
| "epoch": 5.704697986577181, |
| "grad_norm": 1.9406903982162476, |
| "learning_rate": 9.15106613353728e-05, |
| "loss": 0.0491, |
| "step": 5950 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.25078949332237244, |
| "learning_rate": 9.14751769965197e-05, |
| "loss": 0.0455, |
| "step": 5960 |
| }, |
| { |
| "epoch": 5.723873441994248, |
| "grad_norm": 0.342736154794693, |
| "learning_rate": 9.143962556206853e-05, |
| "loss": 0.0418, |
| "step": 5970 |
| }, |
| { |
| "epoch": 5.73346116970278, |
| "grad_norm": 0.36492887139320374, |
| "learning_rate": 9.140400708953189e-05, |
| "loss": 0.0457, |
| "step": 5980 |
| }, |
| { |
| "epoch": 5.743048897411313, |
| "grad_norm": 0.3328196108341217, |
| "learning_rate": 9.136832163653083e-05, |
| "loss": 0.0434, |
| "step": 5990 |
| }, |
| { |
| "epoch": 5.752636625119846, |
| "grad_norm": 0.31458353996276855, |
| "learning_rate": 9.13325692607947e-05, |
| "loss": 0.0426, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.76222435282838, |
| "grad_norm": 0.2388927936553955, |
| "learning_rate": 9.129675002016119e-05, |
| "loss": 0.0412, |
| "step": 6010 |
| }, |
| { |
| "epoch": 5.771812080536913, |
| "grad_norm": 0.29163026809692383, |
| "learning_rate": 9.126086397257612e-05, |
| "loss": 0.0464, |
| "step": 6020 |
| }, |
| { |
| "epoch": 5.781399808245446, |
| "grad_norm": 0.3460707664489746, |
| "learning_rate": 9.122491117609336e-05, |
| "loss": 0.0417, |
| "step": 6030 |
| }, |
| { |
| "epoch": 5.790987535953979, |
| "grad_norm": 0.17329041659832, |
| "learning_rate": 9.118889168887483e-05, |
| "loss": 0.0394, |
| "step": 6040 |
| }, |
| { |
| "epoch": 5.800575263662512, |
| "grad_norm": 0.2987213730812073, |
| "learning_rate": 9.11528055691903e-05, |
| "loss": 0.0399, |
| "step": 6050 |
| }, |
| { |
| "epoch": 5.810162991371045, |
| "grad_norm": 0.4310978353023529, |
| "learning_rate": 9.111665287541733e-05, |
| "loss": 0.0387, |
| "step": 6060 |
| }, |
| { |
| "epoch": 5.819750719079578, |
| "grad_norm": 0.3461402952671051, |
| "learning_rate": 9.108043366604115e-05, |
| "loss": 0.0388, |
| "step": 6070 |
| }, |
| { |
| "epoch": 5.829338446788111, |
| "grad_norm": 0.3460417091846466, |
| "learning_rate": 9.104414799965468e-05, |
| "loss": 0.0458, |
| "step": 6080 |
| }, |
| { |
| "epoch": 5.8389261744966445, |
| "grad_norm": 0.26389792561531067, |
| "learning_rate": 9.100779593495825e-05, |
| "loss": 0.0416, |
| "step": 6090 |
| }, |
| { |
| "epoch": 5.848513902205178, |
| "grad_norm": 0.39147645235061646, |
| "learning_rate": 9.097137753075966e-05, |
| "loss": 0.0392, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.85810162991371, |
| "grad_norm": 0.4331185221672058, |
| "learning_rate": 9.093489284597404e-05, |
| "loss": 0.0388, |
| "step": 6110 |
| }, |
| { |
| "epoch": 5.867689357622243, |
| "grad_norm": 0.1933136284351349, |
| "learning_rate": 9.089834193962372e-05, |
| "loss": 0.0408, |
| "step": 6120 |
| }, |
| { |
| "epoch": 5.8772770853307765, |
| "grad_norm": 0.29839614033699036, |
| "learning_rate": 9.086172487083815e-05, |
| "loss": 0.0374, |
| "step": 6130 |
| }, |
| { |
| "epoch": 5.88686481303931, |
| "grad_norm": 0.23067611455917358, |
| "learning_rate": 9.082504169885381e-05, |
| "loss": 0.044, |
| "step": 6140 |
| }, |
| { |
| "epoch": 5.896452540747843, |
| "grad_norm": 0.2817287743091583, |
| "learning_rate": 9.078829248301417e-05, |
| "loss": 0.036, |
| "step": 6150 |
| }, |
| { |
| "epoch": 5.906040268456376, |
| "grad_norm": 0.2695087790489197, |
| "learning_rate": 9.07514772827695e-05, |
| "loss": 0.0371, |
| "step": 6160 |
| }, |
| { |
| "epoch": 5.9156279961649085, |
| "grad_norm": 0.5207583904266357, |
| "learning_rate": 9.071459615767679e-05, |
| "loss": 0.0406, |
| "step": 6170 |
| }, |
| { |
| "epoch": 5.925215723873442, |
| "grad_norm": 0.3283056914806366, |
| "learning_rate": 9.067764916739971e-05, |
| "loss": 0.0421, |
| "step": 6180 |
| }, |
| { |
| "epoch": 5.934803451581975, |
| "grad_norm": 0.4326401352882385, |
| "learning_rate": 9.06406363717085e-05, |
| "loss": 0.0397, |
| "step": 6190 |
| }, |
| { |
| "epoch": 5.944391179290508, |
| "grad_norm": 0.3044590651988983, |
| "learning_rate": 9.060355783047982e-05, |
| "loss": 0.045, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.953978906999041, |
| "grad_norm": 0.2913448214530945, |
| "learning_rate": 9.056641360369672e-05, |
| "loss": 0.0364, |
| "step": 6210 |
| }, |
| { |
| "epoch": 5.963566634707575, |
| "grad_norm": 0.4203062057495117, |
| "learning_rate": 9.052920375144847e-05, |
| "loss": 0.0544, |
| "step": 6220 |
| }, |
| { |
| "epoch": 5.973154362416107, |
| "grad_norm": 0.34060561656951904, |
| "learning_rate": 9.049192833393055e-05, |
| "loss": 0.0428, |
| "step": 6230 |
| }, |
| { |
| "epoch": 5.98274209012464, |
| "grad_norm": 0.28594672679901123, |
| "learning_rate": 9.045458741144446e-05, |
| "loss": 0.0497, |
| "step": 6240 |
| }, |
| { |
| "epoch": 5.992329817833173, |
| "grad_norm": 0.4794290065765381, |
| "learning_rate": 9.041718104439772e-05, |
| "loss": 0.0462, |
| "step": 6250 |
| }, |
| { |
| "epoch": 6.001917545541707, |
| "grad_norm": 0.47997909784317017, |
| "learning_rate": 9.037970929330368e-05, |
| "loss": 0.0489, |
| "step": 6260 |
| }, |
| { |
| "epoch": 6.01150527325024, |
| "grad_norm": 0.37695473432540894, |
| "learning_rate": 9.03421722187815e-05, |
| "loss": 0.0409, |
| "step": 6270 |
| }, |
| { |
| "epoch": 6.021093000958773, |
| "grad_norm": 0.4723213315010071, |
| "learning_rate": 9.030456988155596e-05, |
| "loss": 0.0458, |
| "step": 6280 |
| }, |
| { |
| "epoch": 6.030680728667305, |
| "grad_norm": 0.4923066794872284, |
| "learning_rate": 9.026690234245749e-05, |
| "loss": 0.048, |
| "step": 6290 |
| }, |
| { |
| "epoch": 6.040268456375839, |
| "grad_norm": 0.3513863980770111, |
| "learning_rate": 9.022916966242192e-05, |
| "loss": 0.0414, |
| "step": 6300 |
| }, |
| { |
| "epoch": 6.049856184084372, |
| "grad_norm": 0.34284889698028564, |
| "learning_rate": 9.019137190249055e-05, |
| "loss": 0.0435, |
| "step": 6310 |
| }, |
| { |
| "epoch": 6.059443911792905, |
| "grad_norm": 0.33619949221611023, |
| "learning_rate": 9.015350912380989e-05, |
| "loss": 0.0428, |
| "step": 6320 |
| }, |
| { |
| "epoch": 6.069031639501438, |
| "grad_norm": 0.5763192176818848, |
| "learning_rate": 9.011558138763165e-05, |
| "loss": 0.0386, |
| "step": 6330 |
| }, |
| { |
| "epoch": 6.0786193672099715, |
| "grad_norm": 0.5095228552818298, |
| "learning_rate": 9.007758875531264e-05, |
| "loss": 0.041, |
| "step": 6340 |
| }, |
| { |
| "epoch": 6.088207094918504, |
| "grad_norm": 0.3965105414390564, |
| "learning_rate": 9.003953128831464e-05, |
| "loss": 0.0409, |
| "step": 6350 |
| }, |
| { |
| "epoch": 6.097794822627037, |
| "grad_norm": 0.3434533178806305, |
| "learning_rate": 9.000140904820432e-05, |
| "loss": 0.0393, |
| "step": 6360 |
| }, |
| { |
| "epoch": 6.10738255033557, |
| "grad_norm": 0.2840021252632141, |
| "learning_rate": 8.996322209665313e-05, |
| "loss": 0.0476, |
| "step": 6370 |
| }, |
| { |
| "epoch": 6.1169702780441035, |
| "grad_norm": 0.3020944893360138, |
| "learning_rate": 8.992497049543722e-05, |
| "loss": 0.042, |
| "step": 6380 |
| }, |
| { |
| "epoch": 6.126558005752637, |
| "grad_norm": 0.36320140957832336, |
| "learning_rate": 8.988665430643732e-05, |
| "loss": 0.0376, |
| "step": 6390 |
| }, |
| { |
| "epoch": 6.13614573346117, |
| "grad_norm": 0.34080708026885986, |
| "learning_rate": 8.984827359163863e-05, |
| "loss": 0.0428, |
| "step": 6400 |
| }, |
| { |
| "epoch": 6.145733461169703, |
| "grad_norm": 0.28345417976379395, |
| "learning_rate": 8.980982841313074e-05, |
| "loss": 0.041, |
| "step": 6410 |
| }, |
| { |
| "epoch": 6.1553211888782355, |
| "grad_norm": 0.37377986311912537, |
| "learning_rate": 8.977131883310757e-05, |
| "loss": 0.0429, |
| "step": 6420 |
| }, |
| { |
| "epoch": 6.164908916586769, |
| "grad_norm": 0.30983594059944153, |
| "learning_rate": 8.973274491386712e-05, |
| "loss": 0.0406, |
| "step": 6430 |
| }, |
| { |
| "epoch": 6.174496644295302, |
| "grad_norm": 0.32864126563072205, |
| "learning_rate": 8.96941067178116e-05, |
| "loss": 0.0414, |
| "step": 6440 |
| }, |
| { |
| "epoch": 6.184084372003835, |
| "grad_norm": 0.28770530223846436, |
| "learning_rate": 8.965540430744712e-05, |
| "loss": 0.0371, |
| "step": 6450 |
| }, |
| { |
| "epoch": 6.193672099712368, |
| "grad_norm": 0.36449259519577026, |
| "learning_rate": 8.961663774538367e-05, |
| "loss": 0.0337, |
| "step": 6460 |
| }, |
| { |
| "epoch": 6.203259827420902, |
| "grad_norm": 0.3418562412261963, |
| "learning_rate": 8.957780709433509e-05, |
| "loss": 0.0417, |
| "step": 6470 |
| }, |
| { |
| "epoch": 6.212847555129434, |
| "grad_norm": 0.2991498112678528, |
| "learning_rate": 8.95389124171188e-05, |
| "loss": 0.0402, |
| "step": 6480 |
| }, |
| { |
| "epoch": 6.222435282837967, |
| "grad_norm": 1.1912015676498413, |
| "learning_rate": 8.94999537766559e-05, |
| "loss": 0.045, |
| "step": 6490 |
| }, |
| { |
| "epoch": 6.2320230105465, |
| "grad_norm": 0.6341769695281982, |
| "learning_rate": 8.946093123597088e-05, |
| "loss": 0.0531, |
| "step": 6500 |
| }, |
| { |
| "epoch": 6.241610738255034, |
| "grad_norm": 0.34102702140808105, |
| "learning_rate": 8.942184485819162e-05, |
| "loss": 0.0432, |
| "step": 6510 |
| }, |
| { |
| "epoch": 6.251198465963567, |
| "grad_norm": 0.4004610776901245, |
| "learning_rate": 8.938269470654936e-05, |
| "loss": 0.0476, |
| "step": 6520 |
| }, |
| { |
| "epoch": 6.2607861936721, |
| "grad_norm": 0.4373878240585327, |
| "learning_rate": 8.934348084437835e-05, |
| "loss": 0.0428, |
| "step": 6530 |
| }, |
| { |
| "epoch": 6.270373921380632, |
| "grad_norm": 0.36895817518234253, |
| "learning_rate": 8.930420333511606e-05, |
| "loss": 0.0503, |
| "step": 6540 |
| }, |
| { |
| "epoch": 6.279961649089166, |
| "grad_norm": 0.4267611503601074, |
| "learning_rate": 8.926486224230282e-05, |
| "loss": 0.0429, |
| "step": 6550 |
| }, |
| { |
| "epoch": 6.289549376797699, |
| "grad_norm": 0.4211304485797882, |
| "learning_rate": 8.922545762958188e-05, |
| "loss": 0.0428, |
| "step": 6560 |
| }, |
| { |
| "epoch": 6.299137104506232, |
| "grad_norm": 0.41338953375816345, |
| "learning_rate": 8.918598956069919e-05, |
| "loss": 0.047, |
| "step": 6570 |
| }, |
| { |
| "epoch": 6.308724832214765, |
| "grad_norm": 0.35418424010276794, |
| "learning_rate": 8.914645809950344e-05, |
| "loss": 0.0508, |
| "step": 6580 |
| }, |
| { |
| "epoch": 6.3183125599232985, |
| "grad_norm": 0.5311810374259949, |
| "learning_rate": 8.91068633099458e-05, |
| "loss": 0.0469, |
| "step": 6590 |
| }, |
| { |
| "epoch": 6.327900287631831, |
| "grad_norm": 0.2737090587615967, |
| "learning_rate": 8.90672052560799e-05, |
| "loss": 0.0438, |
| "step": 6600 |
| }, |
| { |
| "epoch": 6.337488015340364, |
| "grad_norm": 0.2861912250518799, |
| "learning_rate": 8.902748400206174e-05, |
| "loss": 0.0498, |
| "step": 6610 |
| }, |
| { |
| "epoch": 6.347075743048897, |
| "grad_norm": 0.37531477212905884, |
| "learning_rate": 8.898769961214952e-05, |
| "loss": 0.0456, |
| "step": 6620 |
| }, |
| { |
| "epoch": 6.3566634707574305, |
| "grad_norm": 0.34681612253189087, |
| "learning_rate": 8.894785215070365e-05, |
| "loss": 0.0418, |
| "step": 6630 |
| }, |
| { |
| "epoch": 6.366251198465964, |
| "grad_norm": 0.28546613454818726, |
| "learning_rate": 8.890794168218649e-05, |
| "loss": 0.0428, |
| "step": 6640 |
| }, |
| { |
| "epoch": 6.375838926174497, |
| "grad_norm": 0.35588616132736206, |
| "learning_rate": 8.886796827116237e-05, |
| "loss": 0.0433, |
| "step": 6650 |
| }, |
| { |
| "epoch": 6.385426653883029, |
| "grad_norm": 0.362427294254303, |
| "learning_rate": 8.882793198229744e-05, |
| "loss": 0.0421, |
| "step": 6660 |
| }, |
| { |
| "epoch": 6.3950143815915625, |
| "grad_norm": 0.4580886960029602, |
| "learning_rate": 8.878783288035957e-05, |
| "loss": 0.034, |
| "step": 6670 |
| }, |
| { |
| "epoch": 6.404602109300096, |
| "grad_norm": 0.37446141242980957, |
| "learning_rate": 8.874767103021824e-05, |
| "loss": 0.0485, |
| "step": 6680 |
| }, |
| { |
| "epoch": 6.414189837008629, |
| "grad_norm": 0.2968175411224365, |
| "learning_rate": 8.870744649684444e-05, |
| "loss": 0.0354, |
| "step": 6690 |
| }, |
| { |
| "epoch": 6.423777564717162, |
| "grad_norm": 0.3441408574581146, |
| "learning_rate": 8.866715934531057e-05, |
| "loss": 0.0427, |
| "step": 6700 |
| }, |
| { |
| "epoch": 6.433365292425695, |
| "grad_norm": 0.3193801939487457, |
| "learning_rate": 8.862680964079031e-05, |
| "loss": 0.0377, |
| "step": 6710 |
| }, |
| { |
| "epoch": 6.442953020134228, |
| "grad_norm": 0.3228664696216583, |
| "learning_rate": 8.858639744855857e-05, |
| "loss": 0.0451, |
| "step": 6720 |
| }, |
| { |
| "epoch": 6.452540747842761, |
| "grad_norm": 0.4861704111099243, |
| "learning_rate": 8.85459228339913e-05, |
| "loss": 0.0426, |
| "step": 6730 |
| }, |
| { |
| "epoch": 6.462128475551294, |
| "grad_norm": 0.281361848115921, |
| "learning_rate": 8.85053858625655e-05, |
| "loss": 0.0411, |
| "step": 6740 |
| }, |
| { |
| "epoch": 6.471716203259827, |
| "grad_norm": 0.39643704891204834, |
| "learning_rate": 8.846478659985895e-05, |
| "loss": 0.0376, |
| "step": 6750 |
| }, |
| { |
| "epoch": 6.481303930968361, |
| "grad_norm": 0.269710510969162, |
| "learning_rate": 8.84241251115503e-05, |
| "loss": 0.0393, |
| "step": 6760 |
| }, |
| { |
| "epoch": 6.490891658676894, |
| "grad_norm": 0.31520572304725647, |
| "learning_rate": 8.838340146341881e-05, |
| "loss": 0.0486, |
| "step": 6770 |
| }, |
| { |
| "epoch": 6.500479386385427, |
| "grad_norm": 0.3355605900287628, |
| "learning_rate": 8.83426157213443e-05, |
| "loss": 0.045, |
| "step": 6780 |
| }, |
| { |
| "epoch": 6.510067114093959, |
| "grad_norm": 0.2806301712989807, |
| "learning_rate": 8.830176795130707e-05, |
| "loss": 0.0447, |
| "step": 6790 |
| }, |
| { |
| "epoch": 6.519654841802493, |
| "grad_norm": 0.27659860253334045, |
| "learning_rate": 8.82608582193877e-05, |
| "loss": 0.0426, |
| "step": 6800 |
| }, |
| { |
| "epoch": 6.529242569511026, |
| "grad_norm": 0.2935637831687927, |
| "learning_rate": 8.82198865917671e-05, |
| "loss": 0.0454, |
| "step": 6810 |
| }, |
| { |
| "epoch": 6.538830297219559, |
| "grad_norm": 0.3571741580963135, |
| "learning_rate": 8.817885313472623e-05, |
| "loss": 0.0454, |
| "step": 6820 |
| }, |
| { |
| "epoch": 6.548418024928092, |
| "grad_norm": 0.3467845022678375, |
| "learning_rate": 8.813775791464611e-05, |
| "loss": 0.043, |
| "step": 6830 |
| }, |
| { |
| "epoch": 6.558005752636625, |
| "grad_norm": 0.4052905738353729, |
| "learning_rate": 8.80966009980077e-05, |
| "loss": 0.0449, |
| "step": 6840 |
| }, |
| { |
| "epoch": 6.567593480345158, |
| "grad_norm": 0.3361055254936218, |
| "learning_rate": 8.805538245139169e-05, |
| "loss": 0.0464, |
| "step": 6850 |
| }, |
| { |
| "epoch": 6.577181208053691, |
| "grad_norm": 0.29235902428627014, |
| "learning_rate": 8.801410234147855e-05, |
| "loss": 0.0432, |
| "step": 6860 |
| }, |
| { |
| "epoch": 6.586768935762224, |
| "grad_norm": 0.4435720443725586, |
| "learning_rate": 8.797276073504832e-05, |
| "loss": 0.0483, |
| "step": 6870 |
| }, |
| { |
| "epoch": 6.5963566634707576, |
| "grad_norm": 0.36006295680999756, |
| "learning_rate": 8.793135769898048e-05, |
| "loss": 0.0389, |
| "step": 6880 |
| }, |
| { |
| "epoch": 6.605944391179291, |
| "grad_norm": 0.30433642864227295, |
| "learning_rate": 8.788989330025397e-05, |
| "loss": 0.0455, |
| "step": 6890 |
| }, |
| { |
| "epoch": 6.615532118887824, |
| "grad_norm": 0.2952471673488617, |
| "learning_rate": 8.784836760594692e-05, |
| "loss": 0.0373, |
| "step": 6900 |
| }, |
| { |
| "epoch": 6.625119846596356, |
| "grad_norm": 0.42555341124534607, |
| "learning_rate": 8.780678068323666e-05, |
| "loss": 0.041, |
| "step": 6910 |
| }, |
| { |
| "epoch": 6.6347075743048896, |
| "grad_norm": 0.3166603744029999, |
| "learning_rate": 8.776513259939957e-05, |
| "loss": 0.0441, |
| "step": 6920 |
| }, |
| { |
| "epoch": 6.644295302013423, |
| "grad_norm": 0.5082001686096191, |
| "learning_rate": 8.772342342181095e-05, |
| "loss": 0.0501, |
| "step": 6930 |
| }, |
| { |
| "epoch": 6.653883029721956, |
| "grad_norm": 0.32811877131462097, |
| "learning_rate": 8.768165321794496e-05, |
| "loss": 0.0449, |
| "step": 6940 |
| }, |
| { |
| "epoch": 6.663470757430489, |
| "grad_norm": 0.39213889837265015, |
| "learning_rate": 8.763982205537446e-05, |
| "loss": 0.0497, |
| "step": 6950 |
| }, |
| { |
| "epoch": 6.673058485139022, |
| "grad_norm": 0.33301976323127747, |
| "learning_rate": 8.759793000177094e-05, |
| "loss": 0.0466, |
| "step": 6960 |
| }, |
| { |
| "epoch": 6.682646212847555, |
| "grad_norm": 0.33493635058403015, |
| "learning_rate": 8.755597712490442e-05, |
| "loss": 0.0485, |
| "step": 6970 |
| }, |
| { |
| "epoch": 6.692233940556088, |
| "grad_norm": 0.43134915828704834, |
| "learning_rate": 8.751396349264324e-05, |
| "loss": 0.051, |
| "step": 6980 |
| }, |
| { |
| "epoch": 6.701821668264621, |
| "grad_norm": 0.3931342363357544, |
| "learning_rate": 8.747188917295409e-05, |
| "loss": 0.0436, |
| "step": 6990 |
| }, |
| { |
| "epoch": 6.7114093959731544, |
| "grad_norm": 0.3660528063774109, |
| "learning_rate": 8.742975423390183e-05, |
| "loss": 0.0393, |
| "step": 7000 |
| }, |
| { |
| "epoch": 6.720997123681688, |
| "grad_norm": 0.33165839314460754, |
| "learning_rate": 8.738755874364937e-05, |
| "loss": 0.0366, |
| "step": 7010 |
| }, |
| { |
| "epoch": 6.730584851390221, |
| "grad_norm": 0.3469119966030121, |
| "learning_rate": 8.734530277045759e-05, |
| "loss": 0.0378, |
| "step": 7020 |
| }, |
| { |
| "epoch": 6.740172579098753, |
| "grad_norm": 0.27698802947998047, |
| "learning_rate": 8.730298638268516e-05, |
| "loss": 0.0362, |
| "step": 7030 |
| }, |
| { |
| "epoch": 6.7497603068072864, |
| "grad_norm": 0.4078359603881836, |
| "learning_rate": 8.726060964878858e-05, |
| "loss": 0.046, |
| "step": 7040 |
| }, |
| { |
| "epoch": 6.75934803451582, |
| "grad_norm": 0.34536081552505493, |
| "learning_rate": 8.721817263732191e-05, |
| "loss": 0.0537, |
| "step": 7050 |
| }, |
| { |
| "epoch": 6.768935762224353, |
| "grad_norm": 0.7122533917427063, |
| "learning_rate": 8.717567541693673e-05, |
| "loss": 0.0466, |
| "step": 7060 |
| }, |
| { |
| "epoch": 6.778523489932886, |
| "grad_norm": 0.24024972319602966, |
| "learning_rate": 8.7133118056382e-05, |
| "loss": 0.0492, |
| "step": 7070 |
| }, |
| { |
| "epoch": 6.788111217641419, |
| "grad_norm": 0.41367456316947937, |
| "learning_rate": 8.709050062450403e-05, |
| "loss": 0.0424, |
| "step": 7080 |
| }, |
| { |
| "epoch": 6.797698945349952, |
| "grad_norm": 0.35695597529411316, |
| "learning_rate": 8.70478231902463e-05, |
| "loss": 0.0425, |
| "step": 7090 |
| }, |
| { |
| "epoch": 6.807286673058485, |
| "grad_norm": 0.38064390420913696, |
| "learning_rate": 8.700508582264928e-05, |
| "loss": 0.0488, |
| "step": 7100 |
| }, |
| { |
| "epoch": 6.816874400767018, |
| "grad_norm": 0.3264651894569397, |
| "learning_rate": 8.696228859085049e-05, |
| "loss": 0.0429, |
| "step": 7110 |
| }, |
| { |
| "epoch": 6.826462128475551, |
| "grad_norm": 0.36960527300834656, |
| "learning_rate": 8.691943156408425e-05, |
| "loss": 0.0465, |
| "step": 7120 |
| }, |
| { |
| "epoch": 6.836049856184085, |
| "grad_norm": 0.34985673427581787, |
| "learning_rate": 8.687651481168158e-05, |
| "loss": 0.0389, |
| "step": 7130 |
| }, |
| { |
| "epoch": 6.845637583892618, |
| "grad_norm": 0.308672696352005, |
| "learning_rate": 8.68335384030702e-05, |
| "loss": 0.0426, |
| "step": 7140 |
| }, |
| { |
| "epoch": 6.855225311601151, |
| "grad_norm": 0.3914170563220978, |
| "learning_rate": 8.679050240777427e-05, |
| "loss": 0.0421, |
| "step": 7150 |
| }, |
| { |
| "epoch": 6.864813039309683, |
| "grad_norm": 0.2807207703590393, |
| "learning_rate": 8.674740689541439e-05, |
| "loss": 0.0484, |
| "step": 7160 |
| }, |
| { |
| "epoch": 6.874400767018217, |
| "grad_norm": 0.31063312292099, |
| "learning_rate": 8.670425193570739e-05, |
| "loss": 0.0413, |
| "step": 7170 |
| }, |
| { |
| "epoch": 6.88398849472675, |
| "grad_norm": 0.3080969452857971, |
| "learning_rate": 8.666103759846634e-05, |
| "loss": 0.0438, |
| "step": 7180 |
| }, |
| { |
| "epoch": 6.893576222435283, |
| "grad_norm": 0.27219802141189575, |
| "learning_rate": 8.661776395360029e-05, |
| "loss": 0.045, |
| "step": 7190 |
| }, |
| { |
| "epoch": 6.903163950143816, |
| "grad_norm": 0.44108715653419495, |
| "learning_rate": 8.65744310711143e-05, |
| "loss": 0.0431, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.912751677852349, |
| "grad_norm": 0.34575361013412476, |
| "learning_rate": 8.653103902110922e-05, |
| "loss": 0.0419, |
| "step": 7210 |
| }, |
| { |
| "epoch": 6.922339405560882, |
| "grad_norm": 0.29534199833869934, |
| "learning_rate": 8.648758787378164e-05, |
| "loss": 0.0392, |
| "step": 7220 |
| }, |
| { |
| "epoch": 6.931927133269415, |
| "grad_norm": 0.3387232720851898, |
| "learning_rate": 8.644407769942373e-05, |
| "loss": 0.0354, |
| "step": 7230 |
| }, |
| { |
| "epoch": 6.941514860977948, |
| "grad_norm": 0.27211427688598633, |
| "learning_rate": 8.640050856842317e-05, |
| "loss": 0.0401, |
| "step": 7240 |
| }, |
| { |
| "epoch": 6.9511025886864815, |
| "grad_norm": 0.27033731341362, |
| "learning_rate": 8.635688055126299e-05, |
| "loss": 0.0389, |
| "step": 7250 |
| }, |
| { |
| "epoch": 6.960690316395015, |
| "grad_norm": 0.3898187279701233, |
| "learning_rate": 8.631319371852151e-05, |
| "loss": 0.0393, |
| "step": 7260 |
| }, |
| { |
| "epoch": 6.970278044103548, |
| "grad_norm": 0.2771322727203369, |
| "learning_rate": 8.626944814087221e-05, |
| "loss": 0.0463, |
| "step": 7270 |
| }, |
| { |
| "epoch": 6.97986577181208, |
| "grad_norm": 0.28191322088241577, |
| "learning_rate": 8.622564388908357e-05, |
| "loss": 0.0443, |
| "step": 7280 |
| }, |
| { |
| "epoch": 6.9894534995206135, |
| "grad_norm": 0.3647807240486145, |
| "learning_rate": 8.618178103401897e-05, |
| "loss": 0.044, |
| "step": 7290 |
| }, |
| { |
| "epoch": 6.999041227229147, |
| "grad_norm": 0.2619480490684509, |
| "learning_rate": 8.613785964663665e-05, |
| "loss": 0.0422, |
| "step": 7300 |
| }, |
| { |
| "epoch": 7.00862895493768, |
| "grad_norm": 0.2431744933128357, |
| "learning_rate": 8.609387979798952e-05, |
| "loss": 0.0458, |
| "step": 7310 |
| }, |
| { |
| "epoch": 7.018216682646213, |
| "grad_norm": 0.31808608770370483, |
| "learning_rate": 8.604984155922506e-05, |
| "loss": 0.0391, |
| "step": 7320 |
| }, |
| { |
| "epoch": 7.027804410354746, |
| "grad_norm": 0.41725489497184753, |
| "learning_rate": 8.600574500158518e-05, |
| "loss": 0.0395, |
| "step": 7330 |
| }, |
| { |
| "epoch": 7.037392138063279, |
| "grad_norm": 0.23228147625923157, |
| "learning_rate": 8.596159019640619e-05, |
| "loss": 0.0415, |
| "step": 7340 |
| }, |
| { |
| "epoch": 7.046979865771812, |
| "grad_norm": 0.25770825147628784, |
| "learning_rate": 8.59173772151186e-05, |
| "loss": 0.0428, |
| "step": 7350 |
| }, |
| { |
| "epoch": 7.056567593480345, |
| "grad_norm": 0.2742254436016083, |
| "learning_rate": 8.587310612924699e-05, |
| "loss": 0.0456, |
| "step": 7360 |
| }, |
| { |
| "epoch": 7.066155321188878, |
| "grad_norm": 0.34984004497528076, |
| "learning_rate": 8.582877701041004e-05, |
| "loss": 0.0304, |
| "step": 7370 |
| }, |
| { |
| "epoch": 7.075743048897412, |
| "grad_norm": 0.34064123034477234, |
| "learning_rate": 8.578438993032021e-05, |
| "loss": 0.038, |
| "step": 7380 |
| }, |
| { |
| "epoch": 7.085330776605945, |
| "grad_norm": 0.3359072506427765, |
| "learning_rate": 8.57399449607838e-05, |
| "loss": 0.0463, |
| "step": 7390 |
| }, |
| { |
| "epoch": 7.094918504314477, |
| "grad_norm": 0.330243855714798, |
| "learning_rate": 8.569544217370072e-05, |
| "loss": 0.0469, |
| "step": 7400 |
| }, |
| { |
| "epoch": 7.10450623202301, |
| "grad_norm": 0.23439550399780273, |
| "learning_rate": 8.565088164106439e-05, |
| "loss": 0.0388, |
| "step": 7410 |
| }, |
| { |
| "epoch": 7.114093959731544, |
| "grad_norm": 0.45976459980010986, |
| "learning_rate": 8.56062634349617e-05, |
| "loss": 0.0454, |
| "step": 7420 |
| }, |
| { |
| "epoch": 7.123681687440077, |
| "grad_norm": 0.310160368680954, |
| "learning_rate": 8.556158762757282e-05, |
| "loss": 0.0401, |
| "step": 7430 |
| }, |
| { |
| "epoch": 7.13326941514861, |
| "grad_norm": 0.4018678665161133, |
| "learning_rate": 8.551685429117111e-05, |
| "loss": 0.0512, |
| "step": 7440 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 0.3131730556488037, |
| "learning_rate": 8.547206349812298e-05, |
| "loss": 0.0421, |
| "step": 7450 |
| }, |
| { |
| "epoch": 7.152444870565676, |
| "grad_norm": 0.30326828360557556, |
| "learning_rate": 8.542721532088778e-05, |
| "loss": 0.0461, |
| "step": 7460 |
| }, |
| { |
| "epoch": 7.162032598274209, |
| "grad_norm": 0.3814712166786194, |
| "learning_rate": 8.538230983201771e-05, |
| "loss": 0.0446, |
| "step": 7470 |
| }, |
| { |
| "epoch": 7.171620325982742, |
| "grad_norm": 0.33048462867736816, |
| "learning_rate": 8.533734710415771e-05, |
| "loss": 0.048, |
| "step": 7480 |
| }, |
| { |
| "epoch": 7.181208053691275, |
| "grad_norm": 0.2931906580924988, |
| "learning_rate": 8.529232721004527e-05, |
| "loss": 0.0405, |
| "step": 7490 |
| }, |
| { |
| "epoch": 7.1907957813998085, |
| "grad_norm": 0.3595677614212036, |
| "learning_rate": 8.524725022251039e-05, |
| "loss": 0.0404, |
| "step": 7500 |
| }, |
| { |
| "epoch": 7.200383509108342, |
| "grad_norm": 0.37149250507354736, |
| "learning_rate": 8.520211621447541e-05, |
| "loss": 0.0382, |
| "step": 7510 |
| }, |
| { |
| "epoch": 7.209971236816874, |
| "grad_norm": 0.2645772099494934, |
| "learning_rate": 8.515692525895494e-05, |
| "loss": 0.0438, |
| "step": 7520 |
| }, |
| { |
| "epoch": 7.219558964525407, |
| "grad_norm": 0.3602275848388672, |
| "learning_rate": 8.511167742905569e-05, |
| "loss": 0.0421, |
| "step": 7530 |
| }, |
| { |
| "epoch": 7.2291466922339405, |
| "grad_norm": 0.27108579874038696, |
| "learning_rate": 8.506637279797638e-05, |
| "loss": 0.0406, |
| "step": 7540 |
| }, |
| { |
| "epoch": 7.238734419942474, |
| "grad_norm": 0.329333633184433, |
| "learning_rate": 8.502101143900764e-05, |
| "loss": 0.0357, |
| "step": 7550 |
| }, |
| { |
| "epoch": 7.248322147651007, |
| "grad_norm": 0.2549634873867035, |
| "learning_rate": 8.497559342553185e-05, |
| "loss": 0.0354, |
| "step": 7560 |
| }, |
| { |
| "epoch": 7.25790987535954, |
| "grad_norm": 0.3205493092536926, |
| "learning_rate": 8.493011883102307e-05, |
| "loss": 0.0373, |
| "step": 7570 |
| }, |
| { |
| "epoch": 7.2674976030680725, |
| "grad_norm": 0.2169693112373352, |
| "learning_rate": 8.488458772904684e-05, |
| "loss": 0.0394, |
| "step": 7580 |
| }, |
| { |
| "epoch": 7.277085330776606, |
| "grad_norm": 0.37165510654449463, |
| "learning_rate": 8.483900019326017e-05, |
| "loss": 0.0381, |
| "step": 7590 |
| }, |
| { |
| "epoch": 7.286673058485139, |
| "grad_norm": 0.26651856303215027, |
| "learning_rate": 8.479335629741133e-05, |
| "loss": 0.0422, |
| "step": 7600 |
| }, |
| { |
| "epoch": 7.296260786193672, |
| "grad_norm": 0.32148563861846924, |
| "learning_rate": 8.474765611533977e-05, |
| "loss": 0.0357, |
| "step": 7610 |
| }, |
| { |
| "epoch": 7.305848513902205, |
| "grad_norm": 0.26410454511642456, |
| "learning_rate": 8.470189972097601e-05, |
| "loss": 0.0362, |
| "step": 7620 |
| }, |
| { |
| "epoch": 7.315436241610739, |
| "grad_norm": 0.43451759219169617, |
| "learning_rate": 8.465608718834152e-05, |
| "loss": 0.0394, |
| "step": 7630 |
| }, |
| { |
| "epoch": 7.325023969319272, |
| "grad_norm": 0.39956948161125183, |
| "learning_rate": 8.461021859154851e-05, |
| "loss": 0.0467, |
| "step": 7640 |
| }, |
| { |
| "epoch": 7.334611697027804, |
| "grad_norm": 0.36985108256340027, |
| "learning_rate": 8.45642940048e-05, |
| "loss": 0.0426, |
| "step": 7650 |
| }, |
| { |
| "epoch": 7.344199424736337, |
| "grad_norm": 0.27028191089630127, |
| "learning_rate": 8.451831350238947e-05, |
| "loss": 0.0404, |
| "step": 7660 |
| }, |
| { |
| "epoch": 7.353787152444871, |
| "grad_norm": 0.3216499388217926, |
| "learning_rate": 8.447227715870097e-05, |
| "loss": 0.0389, |
| "step": 7670 |
| }, |
| { |
| "epoch": 7.363374880153404, |
| "grad_norm": 0.2922750413417816, |
| "learning_rate": 8.442618504820878e-05, |
| "loss": 0.0416, |
| "step": 7680 |
| }, |
| { |
| "epoch": 7.372962607861937, |
| "grad_norm": 0.32347607612609863, |
| "learning_rate": 8.438003724547747e-05, |
| "loss": 0.0371, |
| "step": 7690 |
| }, |
| { |
| "epoch": 7.382550335570469, |
| "grad_norm": 0.37498921155929565, |
| "learning_rate": 8.433383382516169e-05, |
| "loss": 0.0388, |
| "step": 7700 |
| }, |
| { |
| "epoch": 7.392138063279003, |
| "grad_norm": 0.41235196590423584, |
| "learning_rate": 8.428757486200603e-05, |
| "loss": 0.0382, |
| "step": 7710 |
| }, |
| { |
| "epoch": 7.401725790987536, |
| "grad_norm": 0.32482102513313293, |
| "learning_rate": 8.424126043084499e-05, |
| "loss": 0.0397, |
| "step": 7720 |
| }, |
| { |
| "epoch": 7.411313518696069, |
| "grad_norm": 0.3329836130142212, |
| "learning_rate": 8.419489060660272e-05, |
| "loss": 0.0381, |
| "step": 7730 |
| }, |
| { |
| "epoch": 7.420901246404602, |
| "grad_norm": 0.28950804471969604, |
| "learning_rate": 8.41484654642931e-05, |
| "loss": 0.037, |
| "step": 7740 |
| }, |
| { |
| "epoch": 7.4304889741131355, |
| "grad_norm": 0.43603238463401794, |
| "learning_rate": 8.410198507901936e-05, |
| "loss": 0.0465, |
| "step": 7750 |
| }, |
| { |
| "epoch": 7.440076701821669, |
| "grad_norm": 0.3902181386947632, |
| "learning_rate": 8.405544952597422e-05, |
| "loss": 0.0423, |
| "step": 7760 |
| }, |
| { |
| "epoch": 7.449664429530201, |
| "grad_norm": 0.4409140348434448, |
| "learning_rate": 8.400885888043956e-05, |
| "loss": 0.0384, |
| "step": 7770 |
| }, |
| { |
| "epoch": 7.459252157238734, |
| "grad_norm": 0.33337706327438354, |
| "learning_rate": 8.396221321778645e-05, |
| "loss": 0.0407, |
| "step": 7780 |
| }, |
| { |
| "epoch": 7.4688398849472675, |
| "grad_norm": 0.29487982392311096, |
| "learning_rate": 8.391551261347493e-05, |
| "loss": 0.0407, |
| "step": 7790 |
| }, |
| { |
| "epoch": 7.478427612655801, |
| "grad_norm": 0.2853257954120636, |
| "learning_rate": 8.38687571430539e-05, |
| "loss": 0.0412, |
| "step": 7800 |
| }, |
| { |
| "epoch": 7.488015340364334, |
| "grad_norm": 0.24586626887321472, |
| "learning_rate": 8.382194688216105e-05, |
| "loss": 0.0453, |
| "step": 7810 |
| }, |
| { |
| "epoch": 7.497603068072867, |
| "grad_norm": 0.24528749287128448, |
| "learning_rate": 8.377508190652272e-05, |
| "loss": 0.0435, |
| "step": 7820 |
| }, |
| { |
| "epoch": 7.5071907957813995, |
| "grad_norm": 0.21899107098579407, |
| "learning_rate": 8.37281622919537e-05, |
| "loss": 0.0516, |
| "step": 7830 |
| }, |
| { |
| "epoch": 7.516778523489933, |
| "grad_norm": 0.5243720412254333, |
| "learning_rate": 8.368118811435726e-05, |
| "loss": 0.0373, |
| "step": 7840 |
| }, |
| { |
| "epoch": 7.526366251198466, |
| "grad_norm": 0.24362969398498535, |
| "learning_rate": 8.363415944972487e-05, |
| "loss": 0.0452, |
| "step": 7850 |
| }, |
| { |
| "epoch": 7.535953978906999, |
| "grad_norm": 0.3614483177661896, |
| "learning_rate": 8.358707637413615e-05, |
| "loss": 0.0343, |
| "step": 7860 |
| }, |
| { |
| "epoch": 7.545541706615532, |
| "grad_norm": 0.3958549201488495, |
| "learning_rate": 8.353993896375878e-05, |
| "loss": 0.0454, |
| "step": 7870 |
| }, |
| { |
| "epoch": 7.555129434324066, |
| "grad_norm": 0.3544330596923828, |
| "learning_rate": 8.349274729484832e-05, |
| "loss": 0.0434, |
| "step": 7880 |
| }, |
| { |
| "epoch": 7.564717162032598, |
| "grad_norm": 0.3171081244945526, |
| "learning_rate": 8.344550144374808e-05, |
| "loss": 0.0423, |
| "step": 7890 |
| }, |
| { |
| "epoch": 7.574304889741131, |
| "grad_norm": 0.3729722797870636, |
| "learning_rate": 8.339820148688907e-05, |
| "loss": 0.0407, |
| "step": 7900 |
| }, |
| { |
| "epoch": 7.583892617449664, |
| "grad_norm": 0.3339761197566986, |
| "learning_rate": 8.335084750078978e-05, |
| "loss": 0.0452, |
| "step": 7910 |
| }, |
| { |
| "epoch": 7.593480345158198, |
| "grad_norm": 0.20363827049732208, |
| "learning_rate": 8.330343956205615e-05, |
| "loss": 0.0387, |
| "step": 7920 |
| }, |
| { |
| "epoch": 7.603068072866731, |
| "grad_norm": 0.3942667245864868, |
| "learning_rate": 8.325597774738137e-05, |
| "loss": 0.0407, |
| "step": 7930 |
| }, |
| { |
| "epoch": 7.612655800575264, |
| "grad_norm": 0.234974667429924, |
| "learning_rate": 8.32084621335458e-05, |
| "loss": 0.0417, |
| "step": 7940 |
| }, |
| { |
| "epoch": 7.622243528283796, |
| "grad_norm": 0.4611276388168335, |
| "learning_rate": 8.316089279741682e-05, |
| "loss": 0.0455, |
| "step": 7950 |
| }, |
| { |
| "epoch": 7.63183125599233, |
| "grad_norm": 0.31897857785224915, |
| "learning_rate": 8.311326981594872e-05, |
| "loss": 0.0489, |
| "step": 7960 |
| }, |
| { |
| "epoch": 7.641418983700863, |
| "grad_norm": 0.34105560183525085, |
| "learning_rate": 8.306559326618259e-05, |
| "loss": 0.0441, |
| "step": 7970 |
| }, |
| { |
| "epoch": 7.651006711409396, |
| "grad_norm": 0.35638663172721863, |
| "learning_rate": 8.301786322524619e-05, |
| "loss": 0.0443, |
| "step": 7980 |
| }, |
| { |
| "epoch": 7.660594439117929, |
| "grad_norm": 0.4538173973560333, |
| "learning_rate": 8.297007977035376e-05, |
| "loss": 0.0414, |
| "step": 7990 |
| }, |
| { |
| "epoch": 7.6701821668264625, |
| "grad_norm": 0.37664180994033813, |
| "learning_rate": 8.292224297880598e-05, |
| "loss": 0.0453, |
| "step": 8000 |
| }, |
| { |
| "epoch": 7.679769894534996, |
| "grad_norm": 0.2357359379529953, |
| "learning_rate": 8.287435292798984e-05, |
| "loss": 0.0424, |
| "step": 8010 |
| }, |
| { |
| "epoch": 7.689357622243528, |
| "grad_norm": 0.32804933190345764, |
| "learning_rate": 8.282640969537848e-05, |
| "loss": 0.0381, |
| "step": 8020 |
| }, |
| { |
| "epoch": 7.698945349952061, |
| "grad_norm": 0.45805230736732483, |
| "learning_rate": 8.277841335853101e-05, |
| "loss": 0.0346, |
| "step": 8030 |
| }, |
| { |
| "epoch": 7.7085330776605945, |
| "grad_norm": 0.2550659775733948, |
| "learning_rate": 8.273036399509253e-05, |
| "loss": 0.044, |
| "step": 8040 |
| }, |
| { |
| "epoch": 7.718120805369128, |
| "grad_norm": 0.3587624728679657, |
| "learning_rate": 8.268226168279389e-05, |
| "loss": 0.0396, |
| "step": 8050 |
| }, |
| { |
| "epoch": 7.727708533077661, |
| "grad_norm": 0.41126248240470886, |
| "learning_rate": 8.263410649945159e-05, |
| "loss": 0.0438, |
| "step": 8060 |
| }, |
| { |
| "epoch": 7.737296260786193, |
| "grad_norm": 0.542373538017273, |
| "learning_rate": 8.258589852296765e-05, |
| "loss": 0.0473, |
| "step": 8070 |
| }, |
| { |
| "epoch": 7.7468839884947265, |
| "grad_norm": 0.44072815775871277, |
| "learning_rate": 8.253763783132955e-05, |
| "loss": 0.0413, |
| "step": 8080 |
| }, |
| { |
| "epoch": 7.75647171620326, |
| "grad_norm": 0.3905545771121979, |
| "learning_rate": 8.248932450261e-05, |
| "loss": 0.0394, |
| "step": 8090 |
| }, |
| { |
| "epoch": 7.766059443911793, |
| "grad_norm": 0.3717019855976105, |
| "learning_rate": 8.244095861496686e-05, |
| "loss": 0.0391, |
| "step": 8100 |
| }, |
| { |
| "epoch": 7.775647171620326, |
| "grad_norm": 0.28803032636642456, |
| "learning_rate": 8.239254024664304e-05, |
| "loss": 0.0398, |
| "step": 8110 |
| }, |
| { |
| "epoch": 7.785234899328859, |
| "grad_norm": 0.26609280705451965, |
| "learning_rate": 8.234406947596633e-05, |
| "loss": 0.0399, |
| "step": 8120 |
| }, |
| { |
| "epoch": 7.794822627037393, |
| "grad_norm": 0.28858163952827454, |
| "learning_rate": 8.229554638134933e-05, |
| "loss": 0.0327, |
| "step": 8130 |
| }, |
| { |
| "epoch": 7.804410354745925, |
| "grad_norm": 0.3346012830734253, |
| "learning_rate": 8.224697104128925e-05, |
| "loss": 0.0372, |
| "step": 8140 |
| }, |
| { |
| "epoch": 7.813998082454458, |
| "grad_norm": 0.3210478723049164, |
| "learning_rate": 8.219834353436781e-05, |
| "loss": 0.0424, |
| "step": 8150 |
| }, |
| { |
| "epoch": 7.823585810162991, |
| "grad_norm": 0.2401236593723297, |
| "learning_rate": 8.214966393925115e-05, |
| "loss": 0.0366, |
| "step": 8160 |
| }, |
| { |
| "epoch": 7.833173537871525, |
| "grad_norm": 0.29601314663887024, |
| "learning_rate": 8.210093233468968e-05, |
| "loss": 0.0416, |
| "step": 8170 |
| }, |
| { |
| "epoch": 7.842761265580058, |
| "grad_norm": 0.29966652393341064, |
| "learning_rate": 8.20521487995179e-05, |
| "loss": 0.0349, |
| "step": 8180 |
| }, |
| { |
| "epoch": 7.85234899328859, |
| "grad_norm": 0.3385706841945648, |
| "learning_rate": 8.200331341265436e-05, |
| "loss": 0.0421, |
| "step": 8190 |
| }, |
| { |
| "epoch": 7.861936720997123, |
| "grad_norm": 0.4073570966720581, |
| "learning_rate": 8.19544262531015e-05, |
| "loss": 0.0416, |
| "step": 8200 |
| }, |
| { |
| "epoch": 7.871524448705657, |
| "grad_norm": 0.30653032660484314, |
| "learning_rate": 8.19054873999455e-05, |
| "loss": 0.04, |
| "step": 8210 |
| }, |
| { |
| "epoch": 7.88111217641419, |
| "grad_norm": 0.24951298534870148, |
| "learning_rate": 8.185649693235614e-05, |
| "loss": 0.0397, |
| "step": 8220 |
| }, |
| { |
| "epoch": 7.890699904122723, |
| "grad_norm": 0.24890607595443726, |
| "learning_rate": 8.180745492958674e-05, |
| "loss": 0.0396, |
| "step": 8230 |
| }, |
| { |
| "epoch": 7.900287631831256, |
| "grad_norm": 0.2634108066558838, |
| "learning_rate": 8.175836147097396e-05, |
| "loss": 0.0364, |
| "step": 8240 |
| }, |
| { |
| "epoch": 7.9098753595397895, |
| "grad_norm": 0.29432108998298645, |
| "learning_rate": 8.170921663593773e-05, |
| "loss": 0.0353, |
| "step": 8250 |
| }, |
| { |
| "epoch": 7.919463087248322, |
| "grad_norm": 0.3281777799129486, |
| "learning_rate": 8.166002050398106e-05, |
| "loss": 0.0429, |
| "step": 8260 |
| }, |
| { |
| "epoch": 7.929050814956855, |
| "grad_norm": 0.24084685742855072, |
| "learning_rate": 8.161077315468997e-05, |
| "loss": 0.0454, |
| "step": 8270 |
| }, |
| { |
| "epoch": 7.938638542665388, |
| "grad_norm": 0.290452241897583, |
| "learning_rate": 8.156147466773332e-05, |
| "loss": 0.0427, |
| "step": 8280 |
| }, |
| { |
| "epoch": 7.9482262703739215, |
| "grad_norm": 0.3068200945854187, |
| "learning_rate": 8.15121251228627e-05, |
| "loss": 0.0416, |
| "step": 8290 |
| }, |
| { |
| "epoch": 7.957813998082455, |
| "grad_norm": 0.5520877242088318, |
| "learning_rate": 8.146272459991233e-05, |
| "loss": 0.0369, |
| "step": 8300 |
| }, |
| { |
| "epoch": 7.967401725790987, |
| "grad_norm": 0.268451064825058, |
| "learning_rate": 8.141327317879884e-05, |
| "loss": 0.0419, |
| "step": 8310 |
| }, |
| { |
| "epoch": 7.97698945349952, |
| "grad_norm": 0.45414549112319946, |
| "learning_rate": 8.136377093952123e-05, |
| "loss": 0.0414, |
| "step": 8320 |
| }, |
| { |
| "epoch": 7.9865771812080535, |
| "grad_norm": 0.2249930500984192, |
| "learning_rate": 8.131421796216072e-05, |
| "loss": 0.0389, |
| "step": 8330 |
| }, |
| { |
| "epoch": 7.996164908916587, |
| "grad_norm": 0.28440603613853455, |
| "learning_rate": 8.126461432688061e-05, |
| "loss": 0.038, |
| "step": 8340 |
| }, |
| { |
| "epoch": 8.00575263662512, |
| "grad_norm": 0.26801931858062744, |
| "learning_rate": 8.121496011392613e-05, |
| "loss": 0.0382, |
| "step": 8350 |
| }, |
| { |
| "epoch": 8.015340364333653, |
| "grad_norm": 0.3116857409477234, |
| "learning_rate": 8.116525540362434e-05, |
| "loss": 0.0395, |
| "step": 8360 |
| }, |
| { |
| "epoch": 8.024928092042186, |
| "grad_norm": 0.37847548723220825, |
| "learning_rate": 8.1115500276384e-05, |
| "loss": 0.0395, |
| "step": 8370 |
| }, |
| { |
| "epoch": 8.03451581975072, |
| "grad_norm": 0.3358413279056549, |
| "learning_rate": 8.10656948126954e-05, |
| "loss": 0.0443, |
| "step": 8380 |
| }, |
| { |
| "epoch": 8.044103547459253, |
| "grad_norm": 0.3593525290489197, |
| "learning_rate": 8.101583909313033e-05, |
| "loss": 0.0393, |
| "step": 8390 |
| }, |
| { |
| "epoch": 8.053691275167786, |
| "grad_norm": 0.2807999551296234, |
| "learning_rate": 8.09659331983418e-05, |
| "loss": 0.0337, |
| "step": 8400 |
| }, |
| { |
| "epoch": 8.063279002876317, |
| "grad_norm": 0.24256014823913574, |
| "learning_rate": 8.091597720906403e-05, |
| "loss": 0.0383, |
| "step": 8410 |
| }, |
| { |
| "epoch": 8.07286673058485, |
| "grad_norm": 0.4359792172908783, |
| "learning_rate": 8.086597120611228e-05, |
| "loss": 0.0389, |
| "step": 8420 |
| }, |
| { |
| "epoch": 8.082454458293384, |
| "grad_norm": 0.3423149883747101, |
| "learning_rate": 8.081591527038271e-05, |
| "loss": 0.0401, |
| "step": 8430 |
| }, |
| { |
| "epoch": 8.092042186001917, |
| "grad_norm": 0.2202298790216446, |
| "learning_rate": 8.076580948285227e-05, |
| "loss": 0.0364, |
| "step": 8440 |
| }, |
| { |
| "epoch": 8.10162991371045, |
| "grad_norm": 0.36670511960983276, |
| "learning_rate": 8.071565392457852e-05, |
| "loss": 0.0379, |
| "step": 8450 |
| }, |
| { |
| "epoch": 8.111217641418984, |
| "grad_norm": 0.22374413907527924, |
| "learning_rate": 8.066544867669961e-05, |
| "loss": 0.0363, |
| "step": 8460 |
| }, |
| { |
| "epoch": 8.120805369127517, |
| "grad_norm": 0.43999022245407104, |
| "learning_rate": 8.061519382043399e-05, |
| "loss": 0.0385, |
| "step": 8470 |
| }, |
| { |
| "epoch": 8.13039309683605, |
| "grad_norm": 0.2890577018260956, |
| "learning_rate": 8.056488943708041e-05, |
| "loss": 0.0379, |
| "step": 8480 |
| }, |
| { |
| "epoch": 8.139980824544583, |
| "grad_norm": 0.3366747200489044, |
| "learning_rate": 8.051453560801772e-05, |
| "loss": 0.0417, |
| "step": 8490 |
| }, |
| { |
| "epoch": 8.149568552253116, |
| "grad_norm": 0.2634000778198242, |
| "learning_rate": 8.046413241470478e-05, |
| "loss": 0.0351, |
| "step": 8500 |
| }, |
| { |
| "epoch": 8.15915627996165, |
| "grad_norm": 0.21788382530212402, |
| "learning_rate": 8.041367993868031e-05, |
| "loss": 0.0391, |
| "step": 8510 |
| }, |
| { |
| "epoch": 8.168744007670183, |
| "grad_norm": 0.31453433632850647, |
| "learning_rate": 8.036317826156275e-05, |
| "loss": 0.0392, |
| "step": 8520 |
| }, |
| { |
| "epoch": 8.178331735378714, |
| "grad_norm": 0.2942139506340027, |
| "learning_rate": 8.031262746505012e-05, |
| "loss": 0.0443, |
| "step": 8530 |
| }, |
| { |
| "epoch": 8.187919463087248, |
| "grad_norm": 0.24110645055770874, |
| "learning_rate": 8.02620276309199e-05, |
| "loss": 0.038, |
| "step": 8540 |
| }, |
| { |
| "epoch": 8.19750719079578, |
| "grad_norm": 0.26143452525138855, |
| "learning_rate": 8.021137884102891e-05, |
| "loss": 0.0349, |
| "step": 8550 |
| }, |
| { |
| "epoch": 8.207094918504314, |
| "grad_norm": 0.23739804327487946, |
| "learning_rate": 8.016068117731318e-05, |
| "loss": 0.0367, |
| "step": 8560 |
| }, |
| { |
| "epoch": 8.216682646212847, |
| "grad_norm": 0.31131234765052795, |
| "learning_rate": 8.010993472178778e-05, |
| "loss": 0.0383, |
| "step": 8570 |
| }, |
| { |
| "epoch": 8.22627037392138, |
| "grad_norm": 0.301734060049057, |
| "learning_rate": 8.005913955654675e-05, |
| "loss": 0.0402, |
| "step": 8580 |
| }, |
| { |
| "epoch": 8.235858101629914, |
| "grad_norm": 0.2536526620388031, |
| "learning_rate": 8.000829576376288e-05, |
| "loss": 0.0324, |
| "step": 8590 |
| }, |
| { |
| "epoch": 8.245445829338447, |
| "grad_norm": 0.398578941822052, |
| "learning_rate": 7.995740342568767e-05, |
| "loss": 0.0382, |
| "step": 8600 |
| }, |
| { |
| "epoch": 8.25503355704698, |
| "grad_norm": 0.2876124083995819, |
| "learning_rate": 7.990646262465112e-05, |
| "loss": 0.038, |
| "step": 8610 |
| }, |
| { |
| "epoch": 8.264621284755513, |
| "grad_norm": 0.30959025025367737, |
| "learning_rate": 7.985547344306161e-05, |
| "loss": 0.0464, |
| "step": 8620 |
| }, |
| { |
| "epoch": 8.274209012464047, |
| "grad_norm": 0.327210396528244, |
| "learning_rate": 7.980443596340589e-05, |
| "loss": 0.0426, |
| "step": 8630 |
| }, |
| { |
| "epoch": 8.28379674017258, |
| "grad_norm": 0.23988771438598633, |
| "learning_rate": 7.975335026824873e-05, |
| "loss": 0.043, |
| "step": 8640 |
| }, |
| { |
| "epoch": 8.293384467881111, |
| "grad_norm": 0.2276514321565628, |
| "learning_rate": 7.970221644023293e-05, |
| "loss": 0.0407, |
| "step": 8650 |
| }, |
| { |
| "epoch": 8.302972195589644, |
| "grad_norm": 0.27630215883255005, |
| "learning_rate": 7.965103456207919e-05, |
| "loss": 0.0439, |
| "step": 8660 |
| }, |
| { |
| "epoch": 8.312559923298178, |
| "grad_norm": 0.1922815442085266, |
| "learning_rate": 7.959980471658592e-05, |
| "loss": 0.0396, |
| "step": 8670 |
| }, |
| { |
| "epoch": 8.322147651006711, |
| "grad_norm": 0.303406298160553, |
| "learning_rate": 7.954852698662913e-05, |
| "loss": 0.0363, |
| "step": 8680 |
| }, |
| { |
| "epoch": 8.331735378715244, |
| "grad_norm": 0.3184201717376709, |
| "learning_rate": 7.94972014551623e-05, |
| "loss": 0.0414, |
| "step": 8690 |
| }, |
| { |
| "epoch": 8.341323106423777, |
| "grad_norm": 0.31593239307403564, |
| "learning_rate": 7.94458282052162e-05, |
| "loss": 0.0431, |
| "step": 8700 |
| }, |
| { |
| "epoch": 8.35091083413231, |
| "grad_norm": 0.2461700290441513, |
| "learning_rate": 7.939440731989887e-05, |
| "loss": 0.0447, |
| "step": 8710 |
| }, |
| { |
| "epoch": 8.360498561840844, |
| "grad_norm": 0.5149932503700256, |
| "learning_rate": 7.934293888239532e-05, |
| "loss": 0.0377, |
| "step": 8720 |
| }, |
| { |
| "epoch": 8.370086289549377, |
| "grad_norm": 0.273589164018631, |
| "learning_rate": 7.929142297596756e-05, |
| "loss": 0.0436, |
| "step": 8730 |
| }, |
| { |
| "epoch": 8.37967401725791, |
| "grad_norm": 0.37680765986442566, |
| "learning_rate": 7.92398596839544e-05, |
| "loss": 0.0351, |
| "step": 8740 |
| }, |
| { |
| "epoch": 8.389261744966444, |
| "grad_norm": 0.3258054256439209, |
| "learning_rate": 7.918824908977123e-05, |
| "loss": 0.0387, |
| "step": 8750 |
| }, |
| { |
| "epoch": 8.398849472674977, |
| "grad_norm": 0.36646002531051636, |
| "learning_rate": 7.913659127691002e-05, |
| "loss": 0.0388, |
| "step": 8760 |
| }, |
| { |
| "epoch": 8.40843720038351, |
| "grad_norm": 0.31907573342323303, |
| "learning_rate": 7.908488632893913e-05, |
| "loss": 0.043, |
| "step": 8770 |
| }, |
| { |
| "epoch": 8.418024928092041, |
| "grad_norm": 0.3218369781970978, |
| "learning_rate": 7.903313432950313e-05, |
| "loss": 0.041, |
| "step": 8780 |
| }, |
| { |
| "epoch": 8.427612655800575, |
| "grad_norm": 0.2750600576400757, |
| "learning_rate": 7.898133536232275e-05, |
| "loss": 0.0372, |
| "step": 8790 |
| }, |
| { |
| "epoch": 8.437200383509108, |
| "grad_norm": 0.3370470106601715, |
| "learning_rate": 7.892948951119467e-05, |
| "loss": 0.0381, |
| "step": 8800 |
| }, |
| { |
| "epoch": 8.446788111217641, |
| "grad_norm": 0.30544212460517883, |
| "learning_rate": 7.887759685999143e-05, |
| "loss": 0.0511, |
| "step": 8810 |
| }, |
| { |
| "epoch": 8.456375838926174, |
| "grad_norm": 0.3022957742214203, |
| "learning_rate": 7.88256574926613e-05, |
| "loss": 0.0382, |
| "step": 8820 |
| }, |
| { |
| "epoch": 8.465963566634708, |
| "grad_norm": 0.4892277121543884, |
| "learning_rate": 7.877367149322807e-05, |
| "loss": 0.0471, |
| "step": 8830 |
| }, |
| { |
| "epoch": 8.47555129434324, |
| "grad_norm": 0.2292528748512268, |
| "learning_rate": 7.872163894579103e-05, |
| "loss": 0.0374, |
| "step": 8840 |
| }, |
| { |
| "epoch": 8.485139022051774, |
| "grad_norm": 0.4441846013069153, |
| "learning_rate": 7.866955993452473e-05, |
| "loss": 0.0396, |
| "step": 8850 |
| }, |
| { |
| "epoch": 8.494726749760307, |
| "grad_norm": 0.3326236605644226, |
| "learning_rate": 7.86174345436789e-05, |
| "loss": 0.0407, |
| "step": 8860 |
| }, |
| { |
| "epoch": 8.50431447746884, |
| "grad_norm": 0.3634801506996155, |
| "learning_rate": 7.856526285757829e-05, |
| "loss": 0.0343, |
| "step": 8870 |
| }, |
| { |
| "epoch": 8.513902205177374, |
| "grad_norm": 0.3255830705165863, |
| "learning_rate": 7.851304496062254e-05, |
| "loss": 0.0391, |
| "step": 8880 |
| }, |
| { |
| "epoch": 8.523489932885907, |
| "grad_norm": 0.2465457022190094, |
| "learning_rate": 7.846078093728611e-05, |
| "loss": 0.0418, |
| "step": 8890 |
| }, |
| { |
| "epoch": 8.53307766059444, |
| "grad_norm": 0.28741371631622314, |
| "learning_rate": 7.840847087211799e-05, |
| "loss": 0.0408, |
| "step": 8900 |
| }, |
| { |
| "epoch": 8.542665388302972, |
| "grad_norm": 0.5026047825813293, |
| "learning_rate": 7.835611484974169e-05, |
| "loss": 0.0425, |
| "step": 8910 |
| }, |
| { |
| "epoch": 8.552253116011505, |
| "grad_norm": 0.29450881481170654, |
| "learning_rate": 7.830371295485506e-05, |
| "loss": 0.0386, |
| "step": 8920 |
| }, |
| { |
| "epoch": 8.561840843720038, |
| "grad_norm": 0.37559008598327637, |
| "learning_rate": 7.82512652722302e-05, |
| "loss": 0.0346, |
| "step": 8930 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.3274129033088684, |
| "learning_rate": 7.819877188671322e-05, |
| "loss": 0.0377, |
| "step": 8940 |
| }, |
| { |
| "epoch": 8.581016299137104, |
| "grad_norm": 1.9449902772903442, |
| "learning_rate": 7.81462328832242e-05, |
| "loss": 0.0422, |
| "step": 8950 |
| }, |
| { |
| "epoch": 8.590604026845638, |
| "grad_norm": 0.32859793305397034, |
| "learning_rate": 7.809364834675703e-05, |
| "loss": 0.0381, |
| "step": 8960 |
| }, |
| { |
| "epoch": 8.60019175455417, |
| "grad_norm": 0.41501474380493164, |
| "learning_rate": 7.804101836237921e-05, |
| "loss": 0.0413, |
| "step": 8970 |
| }, |
| { |
| "epoch": 8.609779482262704, |
| "grad_norm": 0.3548615574836731, |
| "learning_rate": 7.798834301523182e-05, |
| "loss": 0.0436, |
| "step": 8980 |
| }, |
| { |
| "epoch": 8.619367209971237, |
| "grad_norm": 0.3612217903137207, |
| "learning_rate": 7.793562239052928e-05, |
| "loss": 0.0364, |
| "step": 8990 |
| }, |
| { |
| "epoch": 8.62895493767977, |
| "grad_norm": 0.3534400761127472, |
| "learning_rate": 7.78828565735593e-05, |
| "loss": 0.0381, |
| "step": 9000 |
| }, |
| { |
| "epoch": 8.638542665388304, |
| "grad_norm": 0.34939974546432495, |
| "learning_rate": 7.783004564968263e-05, |
| "loss": 0.0405, |
| "step": 9010 |
| }, |
| { |
| "epoch": 8.648130393096835, |
| "grad_norm": 0.37234190106391907, |
| "learning_rate": 7.777718970433309e-05, |
| "loss": 0.0439, |
| "step": 9020 |
| }, |
| { |
| "epoch": 8.657718120805368, |
| "grad_norm": 0.40179571509361267, |
| "learning_rate": 7.772428882301724e-05, |
| "loss": 0.0428, |
| "step": 9030 |
| }, |
| { |
| "epoch": 8.667305848513902, |
| "grad_norm": 0.37865087389945984, |
| "learning_rate": 7.767134309131437e-05, |
| "loss": 0.0364, |
| "step": 9040 |
| }, |
| { |
| "epoch": 8.676893576222435, |
| "grad_norm": 0.32325470447540283, |
| "learning_rate": 7.761835259487635e-05, |
| "loss": 0.0387, |
| "step": 9050 |
| }, |
| { |
| "epoch": 8.686481303930968, |
| "grad_norm": 0.26749640703201294, |
| "learning_rate": 7.756531741942743e-05, |
| "loss": 0.048, |
| "step": 9060 |
| }, |
| { |
| "epoch": 8.696069031639501, |
| "grad_norm": 0.381815105676651, |
| "learning_rate": 7.751223765076418e-05, |
| "loss": 0.0337, |
| "step": 9070 |
| }, |
| { |
| "epoch": 8.705656759348035, |
| "grad_norm": 0.4329027533531189, |
| "learning_rate": 7.745911337475524e-05, |
| "loss": 0.0408, |
| "step": 9080 |
| }, |
| { |
| "epoch": 8.715244487056568, |
| "grad_norm": 0.4740753173828125, |
| "learning_rate": 7.740594467734131e-05, |
| "loss": 0.0368, |
| "step": 9090 |
| }, |
| { |
| "epoch": 8.724832214765101, |
| "grad_norm": 0.23423776030540466, |
| "learning_rate": 7.735273164453494e-05, |
| "loss": 0.0445, |
| "step": 9100 |
| }, |
| { |
| "epoch": 8.734419942473634, |
| "grad_norm": 0.35593661665916443, |
| "learning_rate": 7.72994743624204e-05, |
| "loss": 0.0415, |
| "step": 9110 |
| }, |
| { |
| "epoch": 8.744007670182167, |
| "grad_norm": 0.2637054920196533, |
| "learning_rate": 7.724617291715355e-05, |
| "loss": 0.0424, |
| "step": 9120 |
| }, |
| { |
| "epoch": 8.7535953978907, |
| "grad_norm": 0.25044816732406616, |
| "learning_rate": 7.719282739496167e-05, |
| "loss": 0.0384, |
| "step": 9130 |
| }, |
| { |
| "epoch": 8.763183125599234, |
| "grad_norm": 0.22907428443431854, |
| "learning_rate": 7.713943788214337e-05, |
| "loss": 0.0365, |
| "step": 9140 |
| }, |
| { |
| "epoch": 8.772770853307765, |
| "grad_norm": 0.4074908494949341, |
| "learning_rate": 7.70860044650684e-05, |
| "loss": 0.0481, |
| "step": 9150 |
| }, |
| { |
| "epoch": 8.782358581016299, |
| "grad_norm": 0.29292604327201843, |
| "learning_rate": 7.703252723017757e-05, |
| "loss": 0.0433, |
| "step": 9160 |
| }, |
| { |
| "epoch": 8.791946308724832, |
| "grad_norm": 0.2879285514354706, |
| "learning_rate": 7.697900626398255e-05, |
| "loss": 0.0388, |
| "step": 9170 |
| }, |
| { |
| "epoch": 8.801534036433365, |
| "grad_norm": 0.31987619400024414, |
| "learning_rate": 7.692544165306574e-05, |
| "loss": 0.0423, |
| "step": 9180 |
| }, |
| { |
| "epoch": 8.811121764141898, |
| "grad_norm": 0.3260093331336975, |
| "learning_rate": 7.687183348408018e-05, |
| "loss": 0.0342, |
| "step": 9190 |
| }, |
| { |
| "epoch": 8.820709491850431, |
| "grad_norm": 0.3373820185661316, |
| "learning_rate": 7.681818184374938e-05, |
| "loss": 0.0382, |
| "step": 9200 |
| }, |
| { |
| "epoch": 8.830297219558965, |
| "grad_norm": 0.17047972977161407, |
| "learning_rate": 7.676448681886715e-05, |
| "loss": 0.0375, |
| "step": 9210 |
| }, |
| { |
| "epoch": 8.839884947267498, |
| "grad_norm": 0.26559868454933167, |
| "learning_rate": 7.671074849629746e-05, |
| "loss": 0.0398, |
| "step": 9220 |
| }, |
| { |
| "epoch": 8.849472674976031, |
| "grad_norm": 0.30938103795051575, |
| "learning_rate": 7.665696696297439e-05, |
| "loss": 0.0437, |
| "step": 9230 |
| }, |
| { |
| "epoch": 8.859060402684564, |
| "grad_norm": 0.47756102681159973, |
| "learning_rate": 7.660314230590187e-05, |
| "loss": 0.0393, |
| "step": 9240 |
| }, |
| { |
| "epoch": 8.868648130393098, |
| "grad_norm": 0.3115938901901245, |
| "learning_rate": 7.654927461215362e-05, |
| "loss": 0.0389, |
| "step": 9250 |
| }, |
| { |
| "epoch": 8.87823585810163, |
| "grad_norm": 0.2378511130809784, |
| "learning_rate": 7.649536396887296e-05, |
| "loss": 0.0456, |
| "step": 9260 |
| }, |
| { |
| "epoch": 8.887823585810162, |
| "grad_norm": 0.27728554606437683, |
| "learning_rate": 7.644141046327271e-05, |
| "loss": 0.0445, |
| "step": 9270 |
| }, |
| { |
| "epoch": 8.897411313518695, |
| "grad_norm": 0.5434097051620483, |
| "learning_rate": 7.638741418263505e-05, |
| "loss": 0.0402, |
| "step": 9280 |
| }, |
| { |
| "epoch": 8.906999041227229, |
| "grad_norm": 0.23838652670383453, |
| "learning_rate": 7.633337521431127e-05, |
| "loss": 0.038, |
| "step": 9290 |
| }, |
| { |
| "epoch": 8.916586768935762, |
| "grad_norm": 0.2675243020057678, |
| "learning_rate": 7.627929364572184e-05, |
| "loss": 0.0409, |
| "step": 9300 |
| }, |
| { |
| "epoch": 8.926174496644295, |
| "grad_norm": 0.36112427711486816, |
| "learning_rate": 7.622516956435604e-05, |
| "loss": 0.038, |
| "step": 9310 |
| }, |
| { |
| "epoch": 8.935762224352828, |
| "grad_norm": 0.40189293026924133, |
| "learning_rate": 7.617100305777199e-05, |
| "loss": 0.0349, |
| "step": 9320 |
| }, |
| { |
| "epoch": 8.945349952061362, |
| "grad_norm": 0.32217565178871155, |
| "learning_rate": 7.611679421359639e-05, |
| "loss": 0.0414, |
| "step": 9330 |
| }, |
| { |
| "epoch": 8.954937679769895, |
| "grad_norm": 0.37468934059143066, |
| "learning_rate": 7.60625431195245e-05, |
| "loss": 0.0419, |
| "step": 9340 |
| }, |
| { |
| "epoch": 8.964525407478428, |
| "grad_norm": 0.25082099437713623, |
| "learning_rate": 7.600824986331989e-05, |
| "loss": 0.0361, |
| "step": 9350 |
| }, |
| { |
| "epoch": 8.974113135186961, |
| "grad_norm": 0.3598342835903168, |
| "learning_rate": 7.595391453281431e-05, |
| "loss": 0.034, |
| "step": 9360 |
| }, |
| { |
| "epoch": 8.983700862895494, |
| "grad_norm": 0.3254631459712982, |
| "learning_rate": 7.589953721590764e-05, |
| "loss": 0.0482, |
| "step": 9370 |
| }, |
| { |
| "epoch": 8.993288590604028, |
| "grad_norm": 0.3480936586856842, |
| "learning_rate": 7.584511800056759e-05, |
| "loss": 0.0359, |
| "step": 9380 |
| }, |
| { |
| "epoch": 9.002876318312559, |
| "grad_norm": 0.321119099855423, |
| "learning_rate": 7.579065697482974e-05, |
| "loss": 0.0397, |
| "step": 9390 |
| }, |
| { |
| "epoch": 9.012464046021092, |
| "grad_norm": 0.2790512144565582, |
| "learning_rate": 7.573615422679726e-05, |
| "loss": 0.0341, |
| "step": 9400 |
| }, |
| { |
| "epoch": 9.022051773729626, |
| "grad_norm": 0.6163461208343506, |
| "learning_rate": 7.568160984464083e-05, |
| "loss": 0.0361, |
| "step": 9410 |
| }, |
| { |
| "epoch": 9.031639501438159, |
| "grad_norm": 0.35653308033943176, |
| "learning_rate": 7.56270239165985e-05, |
| "loss": 0.0392, |
| "step": 9420 |
| }, |
| { |
| "epoch": 9.041227229146692, |
| "grad_norm": 0.2938978970050812, |
| "learning_rate": 7.55723965309755e-05, |
| "loss": 0.0326, |
| "step": 9430 |
| }, |
| { |
| "epoch": 9.050814956855225, |
| "grad_norm": 0.26529833674430847, |
| "learning_rate": 7.551772777614412e-05, |
| "loss": 0.0454, |
| "step": 9440 |
| }, |
| { |
| "epoch": 9.060402684563758, |
| "grad_norm": 0.351085364818573, |
| "learning_rate": 7.54630177405436e-05, |
| "loss": 0.0467, |
| "step": 9450 |
| }, |
| { |
| "epoch": 9.069990412272292, |
| "grad_norm": 0.23490998148918152, |
| "learning_rate": 7.540826651267999e-05, |
| "loss": 0.0405, |
| "step": 9460 |
| }, |
| { |
| "epoch": 9.079578139980825, |
| "grad_norm": 0.3685658276081085, |
| "learning_rate": 7.535347418112588e-05, |
| "loss": 0.0372, |
| "step": 9470 |
| }, |
| { |
| "epoch": 9.089165867689358, |
| "grad_norm": 0.36048129200935364, |
| "learning_rate": 7.529864083452046e-05, |
| "loss": 0.0378, |
| "step": 9480 |
| }, |
| { |
| "epoch": 9.098753595397891, |
| "grad_norm": 0.3054652810096741, |
| "learning_rate": 7.52437665615692e-05, |
| "loss": 0.0447, |
| "step": 9490 |
| }, |
| { |
| "epoch": 9.108341323106425, |
| "grad_norm": 0.2997536063194275, |
| "learning_rate": 7.518885145104381e-05, |
| "loss": 0.038, |
| "step": 9500 |
| }, |
| { |
| "epoch": 9.117929050814958, |
| "grad_norm": 0.5517327189445496, |
| "learning_rate": 7.513389559178209e-05, |
| "loss": 0.0472, |
| "step": 9510 |
| }, |
| { |
| "epoch": 9.12751677852349, |
| "grad_norm": 0.30378520488739014, |
| "learning_rate": 7.507889907268769e-05, |
| "loss": 0.0355, |
| "step": 9520 |
| }, |
| { |
| "epoch": 9.137104506232022, |
| "grad_norm": 0.46029695868492126, |
| "learning_rate": 7.50238619827301e-05, |
| "loss": 0.0358, |
| "step": 9530 |
| }, |
| { |
| "epoch": 9.146692233940556, |
| "grad_norm": 0.30406633019447327, |
| "learning_rate": 7.496878441094439e-05, |
| "loss": 0.0397, |
| "step": 9540 |
| }, |
| { |
| "epoch": 9.156279961649089, |
| "grad_norm": 0.4107452929019928, |
| "learning_rate": 7.491366644643118e-05, |
| "loss": 0.043, |
| "step": 9550 |
| }, |
| { |
| "epoch": 9.165867689357622, |
| "grad_norm": 0.6011910438537598, |
| "learning_rate": 7.485850817835639e-05, |
| "loss": 0.0459, |
| "step": 9560 |
| }, |
| { |
| "epoch": 9.175455417066155, |
| "grad_norm": 0.42862173914909363, |
| "learning_rate": 7.480330969595114e-05, |
| "loss": 0.0392, |
| "step": 9570 |
| }, |
| { |
| "epoch": 9.185043144774689, |
| "grad_norm": 0.3231380879878998, |
| "learning_rate": 7.474807108851163e-05, |
| "loss": 0.0379, |
| "step": 9580 |
| }, |
| { |
| "epoch": 9.194630872483222, |
| "grad_norm": 0.31742027401924133, |
| "learning_rate": 7.469279244539897e-05, |
| "loss": 0.0398, |
| "step": 9590 |
| }, |
| { |
| "epoch": 9.204218600191755, |
| "grad_norm": 0.34327855706214905, |
| "learning_rate": 7.463747385603899e-05, |
| "loss": 0.0365, |
| "step": 9600 |
| }, |
| { |
| "epoch": 9.213806327900288, |
| "grad_norm": 0.40726932883262634, |
| "learning_rate": 7.458211540992222e-05, |
| "loss": 0.0421, |
| "step": 9610 |
| }, |
| { |
| "epoch": 9.223394055608821, |
| "grad_norm": 0.28824758529663086, |
| "learning_rate": 7.452671719660359e-05, |
| "loss": 0.0392, |
| "step": 9620 |
| }, |
| { |
| "epoch": 9.232981783317355, |
| "grad_norm": 0.4298984408378601, |
| "learning_rate": 7.447127930570241e-05, |
| "loss": 0.0396, |
| "step": 9630 |
| }, |
| { |
| "epoch": 9.242569511025886, |
| "grad_norm": 0.3513946831226349, |
| "learning_rate": 7.441580182690218e-05, |
| "loss": 0.0344, |
| "step": 9640 |
| }, |
| { |
| "epoch": 9.25215723873442, |
| "grad_norm": 0.31864580512046814, |
| "learning_rate": 7.436028484995043e-05, |
| "loss": 0.0352, |
| "step": 9650 |
| }, |
| { |
| "epoch": 9.261744966442953, |
| "grad_norm": 0.42778101563453674, |
| "learning_rate": 7.430472846465856e-05, |
| "loss": 0.0345, |
| "step": 9660 |
| }, |
| { |
| "epoch": 9.271332694151486, |
| "grad_norm": 0.25153082609176636, |
| "learning_rate": 7.424913276090176e-05, |
| "loss": 0.0376, |
| "step": 9670 |
| }, |
| { |
| "epoch": 9.280920421860019, |
| "grad_norm": 0.30971595644950867, |
| "learning_rate": 7.419349782861882e-05, |
| "loss": 0.0402, |
| "step": 9680 |
| }, |
| { |
| "epoch": 9.290508149568552, |
| "grad_norm": 0.5045586228370667, |
| "learning_rate": 7.413782375781198e-05, |
| "loss": 0.0321, |
| "step": 9690 |
| }, |
| { |
| "epoch": 9.300095877277085, |
| "grad_norm": 0.26688501238822937, |
| "learning_rate": 7.40821106385468e-05, |
| "loss": 0.0405, |
| "step": 9700 |
| }, |
| { |
| "epoch": 9.309683604985619, |
| "grad_norm": 0.3186158537864685, |
| "learning_rate": 7.402635856095202e-05, |
| "loss": 0.039, |
| "step": 9710 |
| }, |
| { |
| "epoch": 9.319271332694152, |
| "grad_norm": 0.23956236243247986, |
| "learning_rate": 7.397056761521936e-05, |
| "loss": 0.0385, |
| "step": 9720 |
| }, |
| { |
| "epoch": 9.328859060402685, |
| "grad_norm": 0.35403645038604736, |
| "learning_rate": 7.391473789160352e-05, |
| "loss": 0.037, |
| "step": 9730 |
| }, |
| { |
| "epoch": 9.338446788111218, |
| "grad_norm": 0.30190348625183105, |
| "learning_rate": 7.38588694804218e-05, |
| "loss": 0.0402, |
| "step": 9740 |
| }, |
| { |
| "epoch": 9.348034515819752, |
| "grad_norm": 0.45342007279396057, |
| "learning_rate": 7.380296247205417e-05, |
| "loss": 0.0385, |
| "step": 9750 |
| }, |
| { |
| "epoch": 9.357622243528283, |
| "grad_norm": 0.355342835187912, |
| "learning_rate": 7.374701695694304e-05, |
| "loss": 0.0375, |
| "step": 9760 |
| }, |
| { |
| "epoch": 9.367209971236816, |
| "grad_norm": 0.3231160640716553, |
| "learning_rate": 7.369103302559308e-05, |
| "loss": 0.0353, |
| "step": 9770 |
| }, |
| { |
| "epoch": 9.37679769894535, |
| "grad_norm": 0.4163530766963959, |
| "learning_rate": 7.363501076857112e-05, |
| "loss": 0.0381, |
| "step": 9780 |
| }, |
| { |
| "epoch": 9.386385426653883, |
| "grad_norm": 0.24439620971679688, |
| "learning_rate": 7.357895027650598e-05, |
| "loss": 0.0347, |
| "step": 9790 |
| }, |
| { |
| "epoch": 9.395973154362416, |
| "grad_norm": 0.3847917318344116, |
| "learning_rate": 7.352285164008838e-05, |
| "loss": 0.0331, |
| "step": 9800 |
| }, |
| { |
| "epoch": 9.405560882070949, |
| "grad_norm": 0.3097192049026489, |
| "learning_rate": 7.346671495007068e-05, |
| "loss": 0.0405, |
| "step": 9810 |
| }, |
| { |
| "epoch": 9.415148609779482, |
| "grad_norm": 0.21436016261577606, |
| "learning_rate": 7.341054029726685e-05, |
| "loss": 0.0375, |
| "step": 9820 |
| }, |
| { |
| "epoch": 9.424736337488016, |
| "grad_norm": 0.41024893522262573, |
| "learning_rate": 7.335432777255225e-05, |
| "loss": 0.0463, |
| "step": 9830 |
| }, |
| { |
| "epoch": 9.434324065196549, |
| "grad_norm": 0.299177348613739, |
| "learning_rate": 7.329807746686352e-05, |
| "loss": 0.0418, |
| "step": 9840 |
| }, |
| { |
| "epoch": 9.443911792905082, |
| "grad_norm": 0.3526586890220642, |
| "learning_rate": 7.324178947119842e-05, |
| "loss": 0.0383, |
| "step": 9850 |
| }, |
| { |
| "epoch": 9.453499520613615, |
| "grad_norm": 0.277421772480011, |
| "learning_rate": 7.318546387661564e-05, |
| "loss": 0.0512, |
| "step": 9860 |
| }, |
| { |
| "epoch": 9.463087248322148, |
| "grad_norm": 0.24628502130508423, |
| "learning_rate": 7.312910077423477e-05, |
| "loss": 0.0367, |
| "step": 9870 |
| }, |
| { |
| "epoch": 9.47267497603068, |
| "grad_norm": 0.5568169951438904, |
| "learning_rate": 7.307270025523601e-05, |
| "loss": 0.0396, |
| "step": 9880 |
| }, |
| { |
| "epoch": 9.482262703739213, |
| "grad_norm": 0.30765804648399353, |
| "learning_rate": 7.301626241086012e-05, |
| "loss": 0.043, |
| "step": 9890 |
| }, |
| { |
| "epoch": 9.491850431447746, |
| "grad_norm": 0.32168257236480713, |
| "learning_rate": 7.295978733240827e-05, |
| "loss": 0.0385, |
| "step": 9900 |
| }, |
| { |
| "epoch": 9.50143815915628, |
| "grad_norm": 0.46826574206352234, |
| "learning_rate": 7.29032751112418e-05, |
| "loss": 0.0375, |
| "step": 9910 |
| }, |
| { |
| "epoch": 9.511025886864813, |
| "grad_norm": 0.19892945885658264, |
| "learning_rate": 7.284672583878219e-05, |
| "loss": 0.0432, |
| "step": 9920 |
| }, |
| { |
| "epoch": 9.520613614573346, |
| "grad_norm": 0.21767093241214752, |
| "learning_rate": 7.279013960651083e-05, |
| "loss": 0.0331, |
| "step": 9930 |
| }, |
| { |
| "epoch": 9.53020134228188, |
| "grad_norm": 0.32079631090164185, |
| "learning_rate": 7.273351650596889e-05, |
| "loss": 0.0355, |
| "step": 9940 |
| }, |
| { |
| "epoch": 9.539789069990412, |
| "grad_norm": 0.40111902356147766, |
| "learning_rate": 7.267685662875725e-05, |
| "loss": 0.0412, |
| "step": 9950 |
| }, |
| { |
| "epoch": 9.549376797698946, |
| "grad_norm": 0.58073490858078, |
| "learning_rate": 7.26201600665362e-05, |
| "loss": 0.0384, |
| "step": 9960 |
| }, |
| { |
| "epoch": 9.558964525407479, |
| "grad_norm": 0.20928962528705597, |
| "learning_rate": 7.256342691102545e-05, |
| "loss": 0.0334, |
| "step": 9970 |
| }, |
| { |
| "epoch": 9.568552253116012, |
| "grad_norm": 0.2809102535247803, |
| "learning_rate": 7.250665725400385e-05, |
| "loss": 0.0421, |
| "step": 9980 |
| }, |
| { |
| "epoch": 9.578139980824545, |
| "grad_norm": 0.2836989164352417, |
| "learning_rate": 7.244985118730933e-05, |
| "loss": 0.0394, |
| "step": 9990 |
| }, |
| { |
| "epoch": 9.587727708533077, |
| "grad_norm": 0.21493583917617798, |
| "learning_rate": 7.239300880283869e-05, |
| "loss": 0.0438, |
| "step": 10000 |
| }, |
| { |
| "epoch": 9.59731543624161, |
| "grad_norm": 0.3654724955558777, |
| "learning_rate": 7.233613019254755e-05, |
| "loss": 0.0398, |
| "step": 10010 |
| }, |
| { |
| "epoch": 9.606903163950143, |
| "grad_norm": 0.24901500344276428, |
| "learning_rate": 7.227921544845003e-05, |
| "loss": 0.0393, |
| "step": 10020 |
| }, |
| { |
| "epoch": 9.616490891658676, |
| "grad_norm": 0.21980980038642883, |
| "learning_rate": 7.222226466261883e-05, |
| "loss": 0.0386, |
| "step": 10030 |
| }, |
| { |
| "epoch": 9.62607861936721, |
| "grad_norm": 0.18104171752929688, |
| "learning_rate": 7.216527792718484e-05, |
| "loss": 0.0378, |
| "step": 10040 |
| }, |
| { |
| "epoch": 9.635666347075743, |
| "grad_norm": 0.33641284704208374, |
| "learning_rate": 7.210825533433719e-05, |
| "loss": 0.0418, |
| "step": 10050 |
| }, |
| { |
| "epoch": 9.645254074784276, |
| "grad_norm": 0.2590009570121765, |
| "learning_rate": 7.205119697632297e-05, |
| "loss": 0.0327, |
| "step": 10060 |
| }, |
| { |
| "epoch": 9.65484180249281, |
| "grad_norm": 0.40689241886138916, |
| "learning_rate": 7.199410294544713e-05, |
| "loss": 0.0542, |
| "step": 10070 |
| }, |
| { |
| "epoch": 9.664429530201343, |
| "grad_norm": 0.3199746310710907, |
| "learning_rate": 7.193697333407234e-05, |
| "loss": 0.0363, |
| "step": 10080 |
| }, |
| { |
| "epoch": 9.674017257909876, |
| "grad_norm": 0.49059638381004333, |
| "learning_rate": 7.187980823461887e-05, |
| "loss": 0.0377, |
| "step": 10090 |
| }, |
| { |
| "epoch": 9.683604985618409, |
| "grad_norm": 0.28129157423973083, |
| "learning_rate": 7.182260773956433e-05, |
| "loss": 0.0382, |
| "step": 10100 |
| }, |
| { |
| "epoch": 9.693192713326942, |
| "grad_norm": 0.3830220401287079, |
| "learning_rate": 7.176537194144362e-05, |
| "loss": 0.0349, |
| "step": 10110 |
| }, |
| { |
| "epoch": 9.702780441035475, |
| "grad_norm": 0.3658897578716278, |
| "learning_rate": 7.170810093284876e-05, |
| "loss": 0.0359, |
| "step": 10120 |
| }, |
| { |
| "epoch": 9.712368168744007, |
| "grad_norm": 0.31416580080986023, |
| "learning_rate": 7.165079480642873e-05, |
| "loss": 0.0343, |
| "step": 10130 |
| }, |
| { |
| "epoch": 9.72195589645254, |
| "grad_norm": 0.24944183230400085, |
| "learning_rate": 7.159345365488929e-05, |
| "loss": 0.0332, |
| "step": 10140 |
| }, |
| { |
| "epoch": 9.731543624161073, |
| "grad_norm": 0.2953116297721863, |
| "learning_rate": 7.153607757099292e-05, |
| "loss": 0.0354, |
| "step": 10150 |
| }, |
| { |
| "epoch": 9.741131351869607, |
| "grad_norm": 0.4103414714336395, |
| "learning_rate": 7.147866664755856e-05, |
| "loss": 0.036, |
| "step": 10160 |
| }, |
| { |
| "epoch": 9.75071907957814, |
| "grad_norm": 0.28444069623947144, |
| "learning_rate": 7.142122097746153e-05, |
| "loss": 0.0389, |
| "step": 10170 |
| }, |
| { |
| "epoch": 9.760306807286673, |
| "grad_norm": 0.2912525534629822, |
| "learning_rate": 7.136374065363334e-05, |
| "loss": 0.0345, |
| "step": 10180 |
| }, |
| { |
| "epoch": 9.769894534995206, |
| "grad_norm": 0.25480780005455017, |
| "learning_rate": 7.13062257690616e-05, |
| "loss": 0.0355, |
| "step": 10190 |
| }, |
| { |
| "epoch": 9.77948226270374, |
| "grad_norm": 0.305532306432724, |
| "learning_rate": 7.124867641678981e-05, |
| "loss": 0.0376, |
| "step": 10200 |
| }, |
| { |
| "epoch": 9.789069990412273, |
| "grad_norm": 0.32806769013404846, |
| "learning_rate": 7.119109268991723e-05, |
| "loss": 0.0357, |
| "step": 10210 |
| }, |
| { |
| "epoch": 9.798657718120806, |
| "grad_norm": 0.23281969130039215, |
| "learning_rate": 7.113347468159871e-05, |
| "loss": 0.0332, |
| "step": 10220 |
| }, |
| { |
| "epoch": 9.808245445829339, |
| "grad_norm": 0.3487169146537781, |
| "learning_rate": 7.107582248504458e-05, |
| "loss": 0.0397, |
| "step": 10230 |
| }, |
| { |
| "epoch": 9.817833173537872, |
| "grad_norm": 0.3124096989631653, |
| "learning_rate": 7.101813619352048e-05, |
| "loss": 0.0391, |
| "step": 10240 |
| }, |
| { |
| "epoch": 9.827420901246404, |
| "grad_norm": 0.39542460441589355, |
| "learning_rate": 7.09604159003472e-05, |
| "loss": 0.0361, |
| "step": 10250 |
| }, |
| { |
| "epoch": 9.837008628954937, |
| "grad_norm": 0.3044220209121704, |
| "learning_rate": 7.090266169890051e-05, |
| "loss": 0.0382, |
| "step": 10260 |
| }, |
| { |
| "epoch": 9.84659635666347, |
| "grad_norm": 0.3320329189300537, |
| "learning_rate": 7.08448736826111e-05, |
| "loss": 0.043, |
| "step": 10270 |
| }, |
| { |
| "epoch": 9.856184084372003, |
| "grad_norm": 0.25773710012435913, |
| "learning_rate": 7.078705194496429e-05, |
| "loss": 0.0363, |
| "step": 10280 |
| }, |
| { |
| "epoch": 9.865771812080537, |
| "grad_norm": 0.4256868064403534, |
| "learning_rate": 7.07291965795e-05, |
| "loss": 0.0388, |
| "step": 10290 |
| }, |
| { |
| "epoch": 9.87535953978907, |
| "grad_norm": 0.48361513018608093, |
| "learning_rate": 7.067130767981252e-05, |
| "loss": 0.0387, |
| "step": 10300 |
| }, |
| { |
| "epoch": 9.884947267497603, |
| "grad_norm": 0.3017280697822571, |
| "learning_rate": 7.061338533955043e-05, |
| "loss": 0.0334, |
| "step": 10310 |
| }, |
| { |
| "epoch": 9.894534995206136, |
| "grad_norm": 0.3394894599914551, |
| "learning_rate": 7.055542965241634e-05, |
| "loss": 0.0402, |
| "step": 10320 |
| }, |
| { |
| "epoch": 9.90412272291467, |
| "grad_norm": 0.3364240527153015, |
| "learning_rate": 7.049744071216687e-05, |
| "loss": 0.0332, |
| "step": 10330 |
| }, |
| { |
| "epoch": 9.913710450623203, |
| "grad_norm": 0.2847566604614258, |
| "learning_rate": 7.043941861261242e-05, |
| "loss": 0.0372, |
| "step": 10340 |
| }, |
| { |
| "epoch": 9.923298178331736, |
| "grad_norm": 0.6304646730422974, |
| "learning_rate": 7.038136344761703e-05, |
| "loss": 0.0338, |
| "step": 10350 |
| }, |
| { |
| "epoch": 9.93288590604027, |
| "grad_norm": 0.37469327449798584, |
| "learning_rate": 7.03232753110982e-05, |
| "loss": 0.0377, |
| "step": 10360 |
| }, |
| { |
| "epoch": 9.9424736337488, |
| "grad_norm": 0.3126644790172577, |
| "learning_rate": 7.026515429702682e-05, |
| "loss": 0.0313, |
| "step": 10370 |
| }, |
| { |
| "epoch": 9.952061361457334, |
| "grad_norm": 0.22097988426685333, |
| "learning_rate": 7.020700049942694e-05, |
| "loss": 0.037, |
| "step": 10380 |
| }, |
| { |
| "epoch": 9.961649089165867, |
| "grad_norm": 0.2554224729537964, |
| "learning_rate": 7.014881401237563e-05, |
| "loss": 0.0338, |
| "step": 10390 |
| }, |
| { |
| "epoch": 9.9712368168744, |
| "grad_norm": 0.41450753808021545, |
| "learning_rate": 7.009059493000285e-05, |
| "loss": 0.0373, |
| "step": 10400 |
| }, |
| { |
| "epoch": 9.980824544582934, |
| "grad_norm": 0.2980963885784149, |
| "learning_rate": 7.003234334649133e-05, |
| "loss": 0.0357, |
| "step": 10410 |
| }, |
| { |
| "epoch": 9.990412272291467, |
| "grad_norm": 0.34623420238494873, |
| "learning_rate": 6.997405935607635e-05, |
| "loss": 0.0393, |
| "step": 10420 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.31464067101478577, |
| "learning_rate": 6.991574305304558e-05, |
| "loss": 0.0373, |
| "step": 10430 |
| }, |
| { |
| "epoch": 10.009587727708533, |
| "grad_norm": 0.3440396785736084, |
| "learning_rate": 6.985739453173903e-05, |
| "loss": 0.0352, |
| "step": 10440 |
| }, |
| { |
| "epoch": 10.019175455417066, |
| "grad_norm": 0.3453032374382019, |
| "learning_rate": 6.979901388654879e-05, |
| "loss": 0.0384, |
| "step": 10450 |
| }, |
| { |
| "epoch": 10.0287631831256, |
| "grad_norm": 0.2174844592809677, |
| "learning_rate": 6.97406012119189e-05, |
| "loss": 0.033, |
| "step": 10460 |
| }, |
| { |
| "epoch": 10.038350910834133, |
| "grad_norm": 0.34027159214019775, |
| "learning_rate": 6.968215660234527e-05, |
| "loss": 0.0439, |
| "step": 10470 |
| }, |
| { |
| "epoch": 10.047938638542666, |
| "grad_norm": 0.29484447836875916, |
| "learning_rate": 6.962368015237543e-05, |
| "loss": 0.0406, |
| "step": 10480 |
| }, |
| { |
| "epoch": 10.0575263662512, |
| "grad_norm": 0.2926745116710663, |
| "learning_rate": 6.956517195660842e-05, |
| "loss": 0.0366, |
| "step": 10490 |
| }, |
| { |
| "epoch": 10.06711409395973, |
| "grad_norm": 0.25546324253082275, |
| "learning_rate": 6.950663210969466e-05, |
| "loss": 0.0387, |
| "step": 10500 |
| }, |
| { |
| "epoch": 10.076701821668264, |
| "grad_norm": 0.19871650636196136, |
| "learning_rate": 6.944806070633578e-05, |
| "loss": 0.0408, |
| "step": 10510 |
| }, |
| { |
| "epoch": 10.086289549376797, |
| "grad_norm": 0.432463139295578, |
| "learning_rate": 6.93894578412844e-05, |
| "loss": 0.0415, |
| "step": 10520 |
| }, |
| { |
| "epoch": 10.09587727708533, |
| "grad_norm": 0.3453048765659332, |
| "learning_rate": 6.933082360934408e-05, |
| "loss": 0.0359, |
| "step": 10530 |
| }, |
| { |
| "epoch": 10.105465004793864, |
| "grad_norm": 0.28228339552879333, |
| "learning_rate": 6.927215810536915e-05, |
| "loss": 0.0363, |
| "step": 10540 |
| }, |
| { |
| "epoch": 10.115052732502397, |
| "grad_norm": 0.2979227304458618, |
| "learning_rate": 6.921346142426448e-05, |
| "loss": 0.0349, |
| "step": 10550 |
| }, |
| { |
| "epoch": 10.12464046021093, |
| "grad_norm": 0.23034702241420746, |
| "learning_rate": 6.915473366098541e-05, |
| "loss": 0.0337, |
| "step": 10560 |
| }, |
| { |
| "epoch": 10.134228187919463, |
| "grad_norm": 0.30385303497314453, |
| "learning_rate": 6.909597491053751e-05, |
| "loss": 0.0358, |
| "step": 10570 |
| }, |
| { |
| "epoch": 10.143815915627997, |
| "grad_norm": 0.34254565834999084, |
| "learning_rate": 6.903718526797658e-05, |
| "loss": 0.0383, |
| "step": 10580 |
| }, |
| { |
| "epoch": 10.15340364333653, |
| "grad_norm": 0.3243492841720581, |
| "learning_rate": 6.897836482840828e-05, |
| "loss": 0.0388, |
| "step": 10590 |
| }, |
| { |
| "epoch": 10.162991371045063, |
| "grad_norm": 0.24607200920581818, |
| "learning_rate": 6.891951368698815e-05, |
| "loss": 0.0359, |
| "step": 10600 |
| }, |
| { |
| "epoch": 10.172579098753596, |
| "grad_norm": 0.2082456648349762, |
| "learning_rate": 6.88606319389214e-05, |
| "loss": 0.0347, |
| "step": 10610 |
| }, |
| { |
| "epoch": 10.182166826462128, |
| "grad_norm": 0.23741546273231506, |
| "learning_rate": 6.880171967946273e-05, |
| "loss": 0.0335, |
| "step": 10620 |
| }, |
| { |
| "epoch": 10.191754554170661, |
| "grad_norm": 0.7699126601219177, |
| "learning_rate": 6.874277700391623e-05, |
| "loss": 0.0402, |
| "step": 10630 |
| }, |
| { |
| "epoch": 10.201342281879194, |
| "grad_norm": 0.23752135038375854, |
| "learning_rate": 6.868380400763516e-05, |
| "loss": 0.0378, |
| "step": 10640 |
| }, |
| { |
| "epoch": 10.210930009587727, |
| "grad_norm": 0.2777273952960968, |
| "learning_rate": 6.86248007860219e-05, |
| "loss": 0.0341, |
| "step": 10650 |
| }, |
| { |
| "epoch": 10.22051773729626, |
| "grad_norm": 0.33273088932037354, |
| "learning_rate": 6.856576743452761e-05, |
| "loss": 0.0379, |
| "step": 10660 |
| }, |
| { |
| "epoch": 10.230105465004794, |
| "grad_norm": 0.22550059854984283, |
| "learning_rate": 6.850670404865227e-05, |
| "loss": 0.0323, |
| "step": 10670 |
| }, |
| { |
| "epoch": 10.239693192713327, |
| "grad_norm": 0.22732175886631012, |
| "learning_rate": 6.844761072394446e-05, |
| "loss": 0.0335, |
| "step": 10680 |
| }, |
| { |
| "epoch": 10.24928092042186, |
| "grad_norm": 0.1689731478691101, |
| "learning_rate": 6.838848755600114e-05, |
| "loss": 0.0368, |
| "step": 10690 |
| }, |
| { |
| "epoch": 10.258868648130393, |
| "grad_norm": 0.20502756536006927, |
| "learning_rate": 6.83293346404676e-05, |
| "loss": 0.041, |
| "step": 10700 |
| }, |
| { |
| "epoch": 10.268456375838927, |
| "grad_norm": 0.2094731330871582, |
| "learning_rate": 6.827015207303722e-05, |
| "loss": 0.0383, |
| "step": 10710 |
| }, |
| { |
| "epoch": 10.27804410354746, |
| "grad_norm": 0.3424762487411499, |
| "learning_rate": 6.821093994945135e-05, |
| "loss": 0.0435, |
| "step": 10720 |
| }, |
| { |
| "epoch": 10.287631831255993, |
| "grad_norm": 0.3471381366252899, |
| "learning_rate": 6.815169836549916e-05, |
| "loss": 0.04, |
| "step": 10730 |
| }, |
| { |
| "epoch": 10.297219558964525, |
| "grad_norm": 0.2713249623775482, |
| "learning_rate": 6.80924274170175e-05, |
| "loss": 0.0313, |
| "step": 10740 |
| }, |
| { |
| "epoch": 10.306807286673058, |
| "grad_norm": 0.24895431101322174, |
| "learning_rate": 6.803312719989068e-05, |
| "loss": 0.0371, |
| "step": 10750 |
| }, |
| { |
| "epoch": 10.316395014381591, |
| "grad_norm": 0.3460264205932617, |
| "learning_rate": 6.797379781005039e-05, |
| "loss": 0.0312, |
| "step": 10760 |
| }, |
| { |
| "epoch": 10.325982742090124, |
| "grad_norm": 0.36002618074417114, |
| "learning_rate": 6.791443934347553e-05, |
| "loss": 0.0443, |
| "step": 10770 |
| }, |
| { |
| "epoch": 10.335570469798657, |
| "grad_norm": 0.46812546253204346, |
| "learning_rate": 6.785505189619197e-05, |
| "loss": 0.0417, |
| "step": 10780 |
| }, |
| { |
| "epoch": 10.34515819750719, |
| "grad_norm": 0.3170137107372284, |
| "learning_rate": 6.779563556427255e-05, |
| "loss": 0.0413, |
| "step": 10790 |
| }, |
| { |
| "epoch": 10.354745925215724, |
| "grad_norm": 0.27735644578933716, |
| "learning_rate": 6.773619044383677e-05, |
| "loss": 0.0411, |
| "step": 10800 |
| }, |
| { |
| "epoch": 10.364333652924257, |
| "grad_norm": 0.2342735081911087, |
| "learning_rate": 6.767671663105075e-05, |
| "loss": 0.0327, |
| "step": 10810 |
| }, |
| { |
| "epoch": 10.37392138063279, |
| "grad_norm": 0.31249138712882996, |
| "learning_rate": 6.761721422212696e-05, |
| "loss": 0.042, |
| "step": 10820 |
| }, |
| { |
| "epoch": 10.383509108341324, |
| "grad_norm": 0.26663604378700256, |
| "learning_rate": 6.755768331332424e-05, |
| "loss": 0.0359, |
| "step": 10830 |
| }, |
| { |
| "epoch": 10.393096836049857, |
| "grad_norm": 0.30388474464416504, |
| "learning_rate": 6.749812400094742e-05, |
| "loss": 0.0443, |
| "step": 10840 |
| }, |
| { |
| "epoch": 10.40268456375839, |
| "grad_norm": 0.3067167401313782, |
| "learning_rate": 6.743853638134734e-05, |
| "loss": 0.0424, |
| "step": 10850 |
| }, |
| { |
| "epoch": 10.412272291466923, |
| "grad_norm": 0.3138778805732727, |
| "learning_rate": 6.737892055092064e-05, |
| "loss": 0.0313, |
| "step": 10860 |
| }, |
| { |
| "epoch": 10.421860019175455, |
| "grad_norm": 0.28191816806793213, |
| "learning_rate": 6.731927660610954e-05, |
| "loss": 0.0358, |
| "step": 10870 |
| }, |
| { |
| "epoch": 10.431447746883988, |
| "grad_norm": 0.37692686915397644, |
| "learning_rate": 6.725960464340182e-05, |
| "loss": 0.0317, |
| "step": 10880 |
| }, |
| { |
| "epoch": 10.441035474592521, |
| "grad_norm": 0.26821082830429077, |
| "learning_rate": 6.719990475933053e-05, |
| "loss": 0.0319, |
| "step": 10890 |
| }, |
| { |
| "epoch": 10.450623202301054, |
| "grad_norm": 0.46883681416511536, |
| "learning_rate": 6.71401770504739e-05, |
| "loss": 0.0376, |
| "step": 10900 |
| }, |
| { |
| "epoch": 10.460210930009588, |
| "grad_norm": 0.8076095581054688, |
| "learning_rate": 6.708042161345521e-05, |
| "loss": 0.0355, |
| "step": 10910 |
| }, |
| { |
| "epoch": 10.46979865771812, |
| "grad_norm": 0.29810166358947754, |
| "learning_rate": 6.702063854494254e-05, |
| "loss": 0.0269, |
| "step": 10920 |
| }, |
| { |
| "epoch": 10.479386385426654, |
| "grad_norm": 0.3273125886917114, |
| "learning_rate": 6.696082794164868e-05, |
| "loss": 0.0386, |
| "step": 10930 |
| }, |
| { |
| "epoch": 10.488974113135187, |
| "grad_norm": 0.4401116371154785, |
| "learning_rate": 6.690098990033102e-05, |
| "loss": 0.0298, |
| "step": 10940 |
| }, |
| { |
| "epoch": 10.49856184084372, |
| "grad_norm": 0.2832469642162323, |
| "learning_rate": 6.684112451779127e-05, |
| "loss": 0.0397, |
| "step": 10950 |
| }, |
| { |
| "epoch": 10.508149568552254, |
| "grad_norm": 0.3664191961288452, |
| "learning_rate": 6.67812318908754e-05, |
| "loss": 0.0382, |
| "step": 10960 |
| }, |
| { |
| "epoch": 10.517737296260787, |
| "grad_norm": 0.32039886713027954, |
| "learning_rate": 6.672131211647344e-05, |
| "loss": 0.0332, |
| "step": 10970 |
| }, |
| { |
| "epoch": 10.527325023969318, |
| "grad_norm": 0.31571629643440247, |
| "learning_rate": 6.666136529151938e-05, |
| "loss": 0.0358, |
| "step": 10980 |
| }, |
| { |
| "epoch": 10.536912751677852, |
| "grad_norm": 0.30983471870422363, |
| "learning_rate": 6.660139151299093e-05, |
| "loss": 0.0402, |
| "step": 10990 |
| }, |
| { |
| "epoch": 10.546500479386385, |
| "grad_norm": 0.35966020822525024, |
| "learning_rate": 6.65413908779094e-05, |
| "loss": 0.0418, |
| "step": 11000 |
| }, |
| { |
| "epoch": 10.556088207094918, |
| "grad_norm": 0.3868638277053833, |
| "learning_rate": 6.648136348333954e-05, |
| "loss": 0.0428, |
| "step": 11010 |
| }, |
| { |
| "epoch": 10.565675934803451, |
| "grad_norm": 0.20595276355743408, |
| "learning_rate": 6.642130942638945e-05, |
| "loss": 0.0359, |
| "step": 11020 |
| }, |
| { |
| "epoch": 10.575263662511984, |
| "grad_norm": 0.6492677927017212, |
| "learning_rate": 6.636122880421032e-05, |
| "loss": 0.0345, |
| "step": 11030 |
| }, |
| { |
| "epoch": 10.584851390220518, |
| "grad_norm": 0.22226084768772125, |
| "learning_rate": 6.630112171399628e-05, |
| "loss": 0.0322, |
| "step": 11040 |
| }, |
| { |
| "epoch": 10.594439117929051, |
| "grad_norm": 0.27300918102264404, |
| "learning_rate": 6.624098825298436e-05, |
| "loss": 0.0345, |
| "step": 11050 |
| }, |
| { |
| "epoch": 10.604026845637584, |
| "grad_norm": 0.2507658898830414, |
| "learning_rate": 6.618082851845417e-05, |
| "loss": 0.0397, |
| "step": 11060 |
| }, |
| { |
| "epoch": 10.613614573346117, |
| "grad_norm": 0.22898472845554352, |
| "learning_rate": 6.612064260772788e-05, |
| "loss": 0.0312, |
| "step": 11070 |
| }, |
| { |
| "epoch": 10.62320230105465, |
| "grad_norm": 0.2579527199268341, |
| "learning_rate": 6.606043061816998e-05, |
| "loss": 0.0319, |
| "step": 11080 |
| }, |
| { |
| "epoch": 10.632790028763184, |
| "grad_norm": 0.3027057945728302, |
| "learning_rate": 6.600019264718713e-05, |
| "loss": 0.0425, |
| "step": 11090 |
| }, |
| { |
| "epoch": 10.642377756471717, |
| "grad_norm": 0.4396612048149109, |
| "learning_rate": 6.593992879222808e-05, |
| "loss": 0.0347, |
| "step": 11100 |
| }, |
| { |
| "epoch": 10.651965484180248, |
| "grad_norm": 0.3383849561214447, |
| "learning_rate": 6.587963915078342e-05, |
| "loss": 0.0427, |
| "step": 11110 |
| }, |
| { |
| "epoch": 10.661553211888782, |
| "grad_norm": 0.39786002039909363, |
| "learning_rate": 6.581932382038542e-05, |
| "loss": 0.0325, |
| "step": 11120 |
| }, |
| { |
| "epoch": 10.671140939597315, |
| "grad_norm": 0.29470136761665344, |
| "learning_rate": 6.575898289860798e-05, |
| "loss": 0.0327, |
| "step": 11130 |
| }, |
| { |
| "epoch": 10.680728667305848, |
| "grad_norm": 0.33293044567108154, |
| "learning_rate": 6.569861648306632e-05, |
| "loss": 0.0372, |
| "step": 11140 |
| }, |
| { |
| "epoch": 10.690316395014381, |
| "grad_norm": 0.2922416627407074, |
| "learning_rate": 6.563822467141697e-05, |
| "loss": 0.0371, |
| "step": 11150 |
| }, |
| { |
| "epoch": 10.699904122722915, |
| "grad_norm": 0.37106814980506897, |
| "learning_rate": 6.557780756135749e-05, |
| "loss": 0.0358, |
| "step": 11160 |
| }, |
| { |
| "epoch": 10.709491850431448, |
| "grad_norm": 0.2364514172077179, |
| "learning_rate": 6.551736525062645e-05, |
| "loss": 0.038, |
| "step": 11170 |
| }, |
| { |
| "epoch": 10.719079578139981, |
| "grad_norm": 0.327987939119339, |
| "learning_rate": 6.545689783700307e-05, |
| "loss": 0.0399, |
| "step": 11180 |
| }, |
| { |
| "epoch": 10.728667305848514, |
| "grad_norm": 0.25306403636932373, |
| "learning_rate": 6.539640541830728e-05, |
| "loss": 0.0319, |
| "step": 11190 |
| }, |
| { |
| "epoch": 10.738255033557047, |
| "grad_norm": 0.301178902387619, |
| "learning_rate": 6.533588809239941e-05, |
| "loss": 0.0408, |
| "step": 11200 |
| }, |
| { |
| "epoch": 10.74784276126558, |
| "grad_norm": 0.2662244439125061, |
| "learning_rate": 6.527534595718007e-05, |
| "loss": 0.0381, |
| "step": 11210 |
| }, |
| { |
| "epoch": 10.757430488974114, |
| "grad_norm": 0.3115426301956177, |
| "learning_rate": 6.521477911059008e-05, |
| "loss": 0.0368, |
| "step": 11220 |
| }, |
| { |
| "epoch": 10.767018216682647, |
| "grad_norm": 0.4020492136478424, |
| "learning_rate": 6.515418765061015e-05, |
| "loss": 0.0346, |
| "step": 11230 |
| }, |
| { |
| "epoch": 10.776605944391179, |
| "grad_norm": 0.49596187472343445, |
| "learning_rate": 6.509357167526084e-05, |
| "loss": 0.0376, |
| "step": 11240 |
| }, |
| { |
| "epoch": 10.786193672099712, |
| "grad_norm": 0.33604878187179565, |
| "learning_rate": 6.50329312826024e-05, |
| "loss": 0.0395, |
| "step": 11250 |
| }, |
| { |
| "epoch": 10.795781399808245, |
| "grad_norm": 0.2914005219936371, |
| "learning_rate": 6.497226657073454e-05, |
| "loss": 0.0371, |
| "step": 11260 |
| }, |
| { |
| "epoch": 10.805369127516778, |
| "grad_norm": 0.34624671936035156, |
| "learning_rate": 6.491157763779632e-05, |
| "loss": 0.0281, |
| "step": 11270 |
| }, |
| { |
| "epoch": 10.814956855225311, |
| "grad_norm": 0.30700233578681946, |
| "learning_rate": 6.485086458196602e-05, |
| "loss": 0.0331, |
| "step": 11280 |
| }, |
| { |
| "epoch": 10.824544582933845, |
| "grad_norm": 0.3025294244289398, |
| "learning_rate": 6.479012750146087e-05, |
| "loss": 0.0341, |
| "step": 11290 |
| }, |
| { |
| "epoch": 10.834132310642378, |
| "grad_norm": 0.23997899889945984, |
| "learning_rate": 6.472936649453701e-05, |
| "loss": 0.0383, |
| "step": 11300 |
| }, |
| { |
| "epoch": 10.843720038350911, |
| "grad_norm": 0.24672740697860718, |
| "learning_rate": 6.466858165948933e-05, |
| "loss": 0.0313, |
| "step": 11310 |
| }, |
| { |
| "epoch": 10.853307766059444, |
| "grad_norm": 0.2887534201145172, |
| "learning_rate": 6.460777309465118e-05, |
| "loss": 0.039, |
| "step": 11320 |
| }, |
| { |
| "epoch": 10.862895493767978, |
| "grad_norm": 0.24179044365882874, |
| "learning_rate": 6.454694089839436e-05, |
| "loss": 0.032, |
| "step": 11330 |
| }, |
| { |
| "epoch": 10.87248322147651, |
| "grad_norm": 0.47962746024131775, |
| "learning_rate": 6.448608516912888e-05, |
| "loss": 0.0368, |
| "step": 11340 |
| }, |
| { |
| "epoch": 10.882070949185042, |
| "grad_norm": 0.26336967945098877, |
| "learning_rate": 6.44252060053028e-05, |
| "loss": 0.045, |
| "step": 11350 |
| }, |
| { |
| "epoch": 10.891658676893575, |
| "grad_norm": 0.2424604296684265, |
| "learning_rate": 6.436430350540215e-05, |
| "loss": 0.0321, |
| "step": 11360 |
| }, |
| { |
| "epoch": 10.901246404602109, |
| "grad_norm": 0.25244084000587463, |
| "learning_rate": 6.430337776795064e-05, |
| "loss": 0.0346, |
| "step": 11370 |
| }, |
| { |
| "epoch": 10.910834132310642, |
| "grad_norm": 0.30204179883003235, |
| "learning_rate": 6.42424288915096e-05, |
| "loss": 0.0362, |
| "step": 11380 |
| }, |
| { |
| "epoch": 10.920421860019175, |
| "grad_norm": 0.3095405697822571, |
| "learning_rate": 6.418145697467784e-05, |
| "loss": 0.036, |
| "step": 11390 |
| }, |
| { |
| "epoch": 10.930009587727708, |
| "grad_norm": 0.22773784399032593, |
| "learning_rate": 6.412046211609134e-05, |
| "loss": 0.0399, |
| "step": 11400 |
| }, |
| { |
| "epoch": 10.939597315436242, |
| "grad_norm": 0.3239744007587433, |
| "learning_rate": 6.40594444144233e-05, |
| "loss": 0.0374, |
| "step": 11410 |
| }, |
| { |
| "epoch": 10.949185043144775, |
| "grad_norm": 0.28157058358192444, |
| "learning_rate": 6.399840396838382e-05, |
| "loss": 0.0352, |
| "step": 11420 |
| }, |
| { |
| "epoch": 10.958772770853308, |
| "grad_norm": 0.31856581568717957, |
| "learning_rate": 6.393734087671979e-05, |
| "loss": 0.0379, |
| "step": 11430 |
| }, |
| { |
| "epoch": 10.968360498561841, |
| "grad_norm": 0.2937244772911072, |
| "learning_rate": 6.387625523821474e-05, |
| "loss": 0.0322, |
| "step": 11440 |
| }, |
| { |
| "epoch": 10.977948226270374, |
| "grad_norm": 0.2260034680366516, |
| "learning_rate": 6.38151471516887e-05, |
| "loss": 0.0319, |
| "step": 11450 |
| }, |
| { |
| "epoch": 10.987535953978908, |
| "grad_norm": 0.42635470628738403, |
| "learning_rate": 6.375401671599798e-05, |
| "loss": 0.0383, |
| "step": 11460 |
| }, |
| { |
| "epoch": 10.997123681687441, |
| "grad_norm": 0.288327693939209, |
| "learning_rate": 6.369286403003509e-05, |
| "loss": 0.0406, |
| "step": 11470 |
| }, |
| { |
| "epoch": 11.006711409395972, |
| "grad_norm": 0.2826128900051117, |
| "learning_rate": 6.363168919272846e-05, |
| "loss": 0.0356, |
| "step": 11480 |
| }, |
| { |
| "epoch": 11.016299137104506, |
| "grad_norm": 0.2275691032409668, |
| "learning_rate": 6.357049230304244e-05, |
| "loss": 0.0336, |
| "step": 11490 |
| }, |
| { |
| "epoch": 11.025886864813039, |
| "grad_norm": 0.24633708596229553, |
| "learning_rate": 6.3509273459977e-05, |
| "loss": 0.0353, |
| "step": 11500 |
| }, |
| { |
| "epoch": 11.035474592521572, |
| "grad_norm": 0.3283119201660156, |
| "learning_rate": 6.344803276256764e-05, |
| "loss": 0.0324, |
| "step": 11510 |
| }, |
| { |
| "epoch": 11.045062320230105, |
| "grad_norm": 0.5711014270782471, |
| "learning_rate": 6.338677030988521e-05, |
| "loss": 0.033, |
| "step": 11520 |
| }, |
| { |
| "epoch": 11.054650047938638, |
| "grad_norm": 0.3481939435005188, |
| "learning_rate": 6.332548620103575e-05, |
| "loss": 0.0398, |
| "step": 11530 |
| }, |
| { |
| "epoch": 11.064237775647172, |
| "grad_norm": 0.24051983654499054, |
| "learning_rate": 6.326418053516037e-05, |
| "loss": 0.04, |
| "step": 11540 |
| }, |
| { |
| "epoch": 11.073825503355705, |
| "grad_norm": 0.4249405264854431, |
| "learning_rate": 6.320285341143501e-05, |
| "loss": 0.0389, |
| "step": 11550 |
| }, |
| { |
| "epoch": 11.083413231064238, |
| "grad_norm": 0.24299634993076324, |
| "learning_rate": 6.314150492907034e-05, |
| "loss": 0.0323, |
| "step": 11560 |
| }, |
| { |
| "epoch": 11.093000958772771, |
| "grad_norm": 0.2705395817756653, |
| "learning_rate": 6.308013518731157e-05, |
| "loss": 0.0358, |
| "step": 11570 |
| }, |
| { |
| "epoch": 11.102588686481305, |
| "grad_norm": 0.3055950105190277, |
| "learning_rate": 6.301874428543833e-05, |
| "loss": 0.0299, |
| "step": 11580 |
| }, |
| { |
| "epoch": 11.112176414189838, |
| "grad_norm": 0.35363319516181946, |
| "learning_rate": 6.295733232276447e-05, |
| "loss": 0.0361, |
| "step": 11590 |
| }, |
| { |
| "epoch": 11.12176414189837, |
| "grad_norm": 0.4558916985988617, |
| "learning_rate": 6.28958993986379e-05, |
| "loss": 0.0391, |
| "step": 11600 |
| }, |
| { |
| "epoch": 11.131351869606902, |
| "grad_norm": 0.26662135124206543, |
| "learning_rate": 6.283444561244042e-05, |
| "loss": 0.0372, |
| "step": 11610 |
| }, |
| { |
| "epoch": 11.140939597315436, |
| "grad_norm": 0.24726532399654388, |
| "learning_rate": 6.27729710635876e-05, |
| "loss": 0.0346, |
| "step": 11620 |
| }, |
| { |
| "epoch": 11.150527325023969, |
| "grad_norm": 0.2278524488210678, |
| "learning_rate": 6.271147585152866e-05, |
| "loss": 0.0338, |
| "step": 11630 |
| }, |
| { |
| "epoch": 11.160115052732502, |
| "grad_norm": 0.3538067042827606, |
| "learning_rate": 6.264996007574615e-05, |
| "loss": 0.0388, |
| "step": 11640 |
| }, |
| { |
| "epoch": 11.169702780441035, |
| "grad_norm": 0.3667300045490265, |
| "learning_rate": 6.258842383575591e-05, |
| "loss": 0.0367, |
| "step": 11650 |
| }, |
| { |
| "epoch": 11.179290508149569, |
| "grad_norm": 0.29877883195877075, |
| "learning_rate": 6.252686723110696e-05, |
| "loss": 0.0348, |
| "step": 11660 |
| }, |
| { |
| "epoch": 11.188878235858102, |
| "grad_norm": 0.2846558392047882, |
| "learning_rate": 6.246529036138116e-05, |
| "loss": 0.0341, |
| "step": 11670 |
| }, |
| { |
| "epoch": 11.198465963566635, |
| "grad_norm": 0.2631428837776184, |
| "learning_rate": 6.24036933261932e-05, |
| "loss": 0.0356, |
| "step": 11680 |
| }, |
| { |
| "epoch": 11.208053691275168, |
| "grad_norm": 0.34309467673301697, |
| "learning_rate": 6.23420762251904e-05, |
| "loss": 0.0365, |
| "step": 11690 |
| }, |
| { |
| "epoch": 11.217641418983701, |
| "grad_norm": 0.2427697777748108, |
| "learning_rate": 6.228043915805254e-05, |
| "loss": 0.0378, |
| "step": 11700 |
| }, |
| { |
| "epoch": 11.227229146692235, |
| "grad_norm": 0.31478065252304077, |
| "learning_rate": 6.221878222449169e-05, |
| "loss": 0.0404, |
| "step": 11710 |
| }, |
| { |
| "epoch": 11.236816874400766, |
| "grad_norm": 0.27574971318244934, |
| "learning_rate": 6.215710552425206e-05, |
| "loss": 0.0311, |
| "step": 11720 |
| }, |
| { |
| "epoch": 11.2464046021093, |
| "grad_norm": 0.7589734792709351, |
| "learning_rate": 6.209540915710985e-05, |
| "loss": 0.0331, |
| "step": 11730 |
| }, |
| { |
| "epoch": 11.255992329817833, |
| "grad_norm": 0.2826196551322937, |
| "learning_rate": 6.203369322287306e-05, |
| "loss": 0.04, |
| "step": 11740 |
| }, |
| { |
| "epoch": 11.265580057526366, |
| "grad_norm": 0.6920874714851379, |
| "learning_rate": 6.197195782138132e-05, |
| "loss": 0.0367, |
| "step": 11750 |
| }, |
| { |
| "epoch": 11.275167785234899, |
| "grad_norm": 0.29903581738471985, |
| "learning_rate": 6.191020305250582e-05, |
| "loss": 0.0385, |
| "step": 11760 |
| }, |
| { |
| "epoch": 11.284755512943432, |
| "grad_norm": 0.2374860942363739, |
| "learning_rate": 6.184842901614902e-05, |
| "loss": 0.0349, |
| "step": 11770 |
| }, |
| { |
| "epoch": 11.294343240651965, |
| "grad_norm": 0.44580623507499695, |
| "learning_rate": 6.178663581224458e-05, |
| "loss": 0.0333, |
| "step": 11780 |
| }, |
| { |
| "epoch": 11.303930968360499, |
| "grad_norm": 0.2667308747768402, |
| "learning_rate": 6.172482354075716e-05, |
| "loss": 0.0359, |
| "step": 11790 |
| }, |
| { |
| "epoch": 11.313518696069032, |
| "grad_norm": 0.21850627660751343, |
| "learning_rate": 6.166299230168228e-05, |
| "loss": 0.0381, |
| "step": 11800 |
| }, |
| { |
| "epoch": 11.323106423777565, |
| "grad_norm": 0.27936065196990967, |
| "learning_rate": 6.16011421950461e-05, |
| "loss": 0.0371, |
| "step": 11810 |
| }, |
| { |
| "epoch": 11.332694151486098, |
| "grad_norm": 0.3284420371055603, |
| "learning_rate": 6.153927332090537e-05, |
| "loss": 0.0373, |
| "step": 11820 |
| }, |
| { |
| "epoch": 11.342281879194632, |
| "grad_norm": 0.2999724745750427, |
| "learning_rate": 6.147738577934711e-05, |
| "loss": 0.0376, |
| "step": 11830 |
| }, |
| { |
| "epoch": 11.351869606903165, |
| "grad_norm": 0.27732089161872864, |
| "learning_rate": 6.141547967048867e-05, |
| "loss": 0.0281, |
| "step": 11840 |
| }, |
| { |
| "epoch": 11.361457334611696, |
| "grad_norm": 0.22769756615161896, |
| "learning_rate": 6.135355509447727e-05, |
| "loss": 0.0407, |
| "step": 11850 |
| }, |
| { |
| "epoch": 11.37104506232023, |
| "grad_norm": 0.2970350682735443, |
| "learning_rate": 6.129161215149016e-05, |
| "loss": 0.0355, |
| "step": 11860 |
| }, |
| { |
| "epoch": 11.380632790028763, |
| "grad_norm": 0.319409042596817, |
| "learning_rate": 6.122965094173424e-05, |
| "loss": 0.0387, |
| "step": 11870 |
| }, |
| { |
| "epoch": 11.390220517737296, |
| "grad_norm": 0.31056809425354004, |
| "learning_rate": 6.116767156544592e-05, |
| "loss": 0.0353, |
| "step": 11880 |
| }, |
| { |
| "epoch": 11.39980824544583, |
| "grad_norm": 0.2925516366958618, |
| "learning_rate": 6.110567412289106e-05, |
| "loss": 0.0313, |
| "step": 11890 |
| }, |
| { |
| "epoch": 11.409395973154362, |
| "grad_norm": 0.2066742330789566, |
| "learning_rate": 6.10436587143647e-05, |
| "loss": 0.031, |
| "step": 11900 |
| }, |
| { |
| "epoch": 11.418983700862896, |
| "grad_norm": 0.2351049929857254, |
| "learning_rate": 6.0981625440191e-05, |
| "loss": 0.0384, |
| "step": 11910 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "grad_norm": 0.2299109846353531, |
| "learning_rate": 6.091957440072297e-05, |
| "loss": 0.029, |
| "step": 11920 |
| }, |
| { |
| "epoch": 11.438159156279962, |
| "grad_norm": 0.27398043870925903, |
| "learning_rate": 6.0857505696342376e-05, |
| "loss": 0.0334, |
| "step": 11930 |
| }, |
| { |
| "epoch": 11.447746883988495, |
| "grad_norm": 0.2886539697647095, |
| "learning_rate": 6.0795419427459564e-05, |
| "loss": 0.0403, |
| "step": 11940 |
| }, |
| { |
| "epoch": 11.457334611697028, |
| "grad_norm": 0.1952909678220749, |
| "learning_rate": 6.0733315694513306e-05, |
| "loss": 0.0342, |
| "step": 11950 |
| }, |
| { |
| "epoch": 11.466922339405562, |
| "grad_norm": 0.3800734281539917, |
| "learning_rate": 6.067119459797061e-05, |
| "loss": 0.0345, |
| "step": 11960 |
| }, |
| { |
| "epoch": 11.476510067114093, |
| "grad_norm": 0.2989748418331146, |
| "learning_rate": 6.060905623832656e-05, |
| "loss": 0.0397, |
| "step": 11970 |
| }, |
| { |
| "epoch": 11.486097794822626, |
| "grad_norm": 0.410169392824173, |
| "learning_rate": 6.0546900716104206e-05, |
| "loss": 0.0343, |
| "step": 11980 |
| }, |
| { |
| "epoch": 11.49568552253116, |
| "grad_norm": 0.3879852890968323, |
| "learning_rate": 6.048472813185433e-05, |
| "loss": 0.0328, |
| "step": 11990 |
| }, |
| { |
| "epoch": 11.505273250239693, |
| "grad_norm": 0.27426809072494507, |
| "learning_rate": 6.042253858615532e-05, |
| "loss": 0.0334, |
| "step": 12000 |
| }, |
| { |
| "epoch": 11.514860977948226, |
| "grad_norm": 0.3014174699783325, |
| "learning_rate": 6.036033217961303e-05, |
| "loss": 0.0305, |
| "step": 12010 |
| }, |
| { |
| "epoch": 11.52444870565676, |
| "grad_norm": 0.17615869641304016, |
| "learning_rate": 6.029810901286056e-05, |
| "loss": 0.04, |
| "step": 12020 |
| }, |
| { |
| "epoch": 11.534036433365292, |
| "grad_norm": 0.2742109000682831, |
| "learning_rate": 6.0235869186558125e-05, |
| "loss": 0.0349, |
| "step": 12030 |
| }, |
| { |
| "epoch": 11.543624161073826, |
| "grad_norm": 0.22772598266601562, |
| "learning_rate": 6.017361280139292e-05, |
| "loss": 0.0322, |
| "step": 12040 |
| }, |
| { |
| "epoch": 11.553211888782359, |
| "grad_norm": 0.2431521862745285, |
| "learning_rate": 6.011133995807888e-05, |
| "loss": 0.0334, |
| "step": 12050 |
| }, |
| { |
| "epoch": 11.562799616490892, |
| "grad_norm": 0.2893143594264984, |
| "learning_rate": 6.004905075735662e-05, |
| "loss": 0.0354, |
| "step": 12060 |
| }, |
| { |
| "epoch": 11.572387344199425, |
| "grad_norm": 0.26321181654930115, |
| "learning_rate": 5.998674529999316e-05, |
| "loss": 0.0364, |
| "step": 12070 |
| }, |
| { |
| "epoch": 11.581975071907959, |
| "grad_norm": 0.5845431685447693, |
| "learning_rate": 5.992442368678187e-05, |
| "loss": 0.0341, |
| "step": 12080 |
| }, |
| { |
| "epoch": 11.59156279961649, |
| "grad_norm": 0.23230616748332977, |
| "learning_rate": 5.986208601854222e-05, |
| "loss": 0.0316, |
| "step": 12090 |
| }, |
| { |
| "epoch": 11.601150527325023, |
| "grad_norm": 0.2684799134731293, |
| "learning_rate": 5.979973239611967e-05, |
| "loss": 0.0399, |
| "step": 12100 |
| }, |
| { |
| "epoch": 11.610738255033556, |
| "grad_norm": 0.19658780097961426, |
| "learning_rate": 5.973736292038549e-05, |
| "loss": 0.0396, |
| "step": 12110 |
| }, |
| { |
| "epoch": 11.62032598274209, |
| "grad_norm": 0.3254534602165222, |
| "learning_rate": 5.967497769223659e-05, |
| "loss": 0.0366, |
| "step": 12120 |
| }, |
| { |
| "epoch": 11.629913710450623, |
| "grad_norm": 0.573215663433075, |
| "learning_rate": 5.961257681259535e-05, |
| "loss": 0.0371, |
| "step": 12130 |
| }, |
| { |
| "epoch": 11.639501438159156, |
| "grad_norm": 0.24387991428375244, |
| "learning_rate": 5.955016038240951e-05, |
| "loss": 0.0314, |
| "step": 12140 |
| }, |
| { |
| "epoch": 11.64908916586769, |
| "grad_norm": 0.3126358091831207, |
| "learning_rate": 5.948772850265193e-05, |
| "loss": 0.0388, |
| "step": 12150 |
| }, |
| { |
| "epoch": 11.658676893576223, |
| "grad_norm": 0.2461678385734558, |
| "learning_rate": 5.9425281274320466e-05, |
| "loss": 0.0389, |
| "step": 12160 |
| }, |
| { |
| "epoch": 11.668264621284756, |
| "grad_norm": 0.2887043058872223, |
| "learning_rate": 5.936281879843782e-05, |
| "loss": 0.0316, |
| "step": 12170 |
| }, |
| { |
| "epoch": 11.677852348993289, |
| "grad_norm": 0.4977504014968872, |
| "learning_rate": 5.9300341176051364e-05, |
| "loss": 0.0369, |
| "step": 12180 |
| }, |
| { |
| "epoch": 11.687440076701822, |
| "grad_norm": 0.1966911256313324, |
| "learning_rate": 5.923784850823294e-05, |
| "loss": 0.0354, |
| "step": 12190 |
| }, |
| { |
| "epoch": 11.697027804410356, |
| "grad_norm": 0.28435948491096497, |
| "learning_rate": 5.917534089607877e-05, |
| "loss": 0.0347, |
| "step": 12200 |
| }, |
| { |
| "epoch": 11.706615532118889, |
| "grad_norm": 0.26728013157844543, |
| "learning_rate": 5.911281844070923e-05, |
| "loss": 0.0292, |
| "step": 12210 |
| }, |
| { |
| "epoch": 11.71620325982742, |
| "grad_norm": 0.24896536767482758, |
| "learning_rate": 5.905028124326869e-05, |
| "loss": 0.04, |
| "step": 12220 |
| }, |
| { |
| "epoch": 11.725790987535953, |
| "grad_norm": 0.394512414932251, |
| "learning_rate": 5.8987729404925405e-05, |
| "loss": 0.0394, |
| "step": 12230 |
| }, |
| { |
| "epoch": 11.735378715244487, |
| "grad_norm": 0.27139657735824585, |
| "learning_rate": 5.892516302687131e-05, |
| "loss": 0.0367, |
| "step": 12240 |
| }, |
| { |
| "epoch": 11.74496644295302, |
| "grad_norm": 0.30433669686317444, |
| "learning_rate": 5.886258221032184e-05, |
| "loss": 0.0315, |
| "step": 12250 |
| }, |
| { |
| "epoch": 11.754554170661553, |
| "grad_norm": 0.387657105922699, |
| "learning_rate": 5.8799987056515804e-05, |
| "loss": 0.0367, |
| "step": 12260 |
| }, |
| { |
| "epoch": 11.764141898370086, |
| "grad_norm": 0.7159243226051331, |
| "learning_rate": 5.87373776667152e-05, |
| "loss": 0.037, |
| "step": 12270 |
| }, |
| { |
| "epoch": 11.77372962607862, |
| "grad_norm": 0.4516725540161133, |
| "learning_rate": 5.867475414220506e-05, |
| "loss": 0.0389, |
| "step": 12280 |
| }, |
| { |
| "epoch": 11.783317353787153, |
| "grad_norm": 0.4054473638534546, |
| "learning_rate": 5.8612116584293266e-05, |
| "loss": 0.0349, |
| "step": 12290 |
| }, |
| { |
| "epoch": 11.792905081495686, |
| "grad_norm": 0.7706658244132996, |
| "learning_rate": 5.854946509431042e-05, |
| "loss": 0.0304, |
| "step": 12300 |
| }, |
| { |
| "epoch": 11.80249280920422, |
| "grad_norm": 0.2988179624080658, |
| "learning_rate": 5.848679977360963e-05, |
| "loss": 0.0308, |
| "step": 12310 |
| }, |
| { |
| "epoch": 11.812080536912752, |
| "grad_norm": 0.3133019804954529, |
| "learning_rate": 5.8424120723566453e-05, |
| "loss": 0.0341, |
| "step": 12320 |
| }, |
| { |
| "epoch": 11.821668264621284, |
| "grad_norm": 0.28148677945137024, |
| "learning_rate": 5.8361428045578595e-05, |
| "loss": 0.0365, |
| "step": 12330 |
| }, |
| { |
| "epoch": 11.831255992329817, |
| "grad_norm": 0.2674432098865509, |
| "learning_rate": 5.829872184106579e-05, |
| "loss": 0.0335, |
| "step": 12340 |
| }, |
| { |
| "epoch": 11.84084372003835, |
| "grad_norm": 0.2875913679599762, |
| "learning_rate": 5.823600221146974e-05, |
| "loss": 0.0324, |
| "step": 12350 |
| }, |
| { |
| "epoch": 11.850431447746884, |
| "grad_norm": 0.39202550053596497, |
| "learning_rate": 5.817326925825376e-05, |
| "loss": 0.0309, |
| "step": 12360 |
| }, |
| { |
| "epoch": 11.860019175455417, |
| "grad_norm": 0.2087734192609787, |
| "learning_rate": 5.811052308290279e-05, |
| "loss": 0.033, |
| "step": 12370 |
| }, |
| { |
| "epoch": 11.86960690316395, |
| "grad_norm": 0.2347189038991928, |
| "learning_rate": 5.804776378692313e-05, |
| "loss": 0.0357, |
| "step": 12380 |
| }, |
| { |
| "epoch": 11.879194630872483, |
| "grad_norm": 0.18789781630039215, |
| "learning_rate": 5.798499147184233e-05, |
| "loss": 0.0362, |
| "step": 12390 |
| }, |
| { |
| "epoch": 11.888782358581016, |
| "grad_norm": 0.44185203313827515, |
| "learning_rate": 5.792220623920898e-05, |
| "loss": 0.0353, |
| "step": 12400 |
| }, |
| { |
| "epoch": 11.89837008628955, |
| "grad_norm": 0.34168651700019836, |
| "learning_rate": 5.785940819059259e-05, |
| "loss": 0.0399, |
| "step": 12410 |
| }, |
| { |
| "epoch": 11.907957813998083, |
| "grad_norm": 0.3143576979637146, |
| "learning_rate": 5.779659742758336e-05, |
| "loss": 0.042, |
| "step": 12420 |
| }, |
| { |
| "epoch": 11.917545541706616, |
| "grad_norm": 0.2344186156988144, |
| "learning_rate": 5.773377405179209e-05, |
| "loss": 0.0377, |
| "step": 12430 |
| }, |
| { |
| "epoch": 11.92713326941515, |
| "grad_norm": 0.19894208014011383, |
| "learning_rate": 5.767093816484999e-05, |
| "loss": 0.0335, |
| "step": 12440 |
| }, |
| { |
| "epoch": 11.936720997123683, |
| "grad_norm": 0.332093745470047, |
| "learning_rate": 5.7608089868408486e-05, |
| "loss": 0.0324, |
| "step": 12450 |
| }, |
| { |
| "epoch": 11.946308724832214, |
| "grad_norm": 0.2045692652463913, |
| "learning_rate": 5.75452292641391e-05, |
| "loss": 0.0348, |
| "step": 12460 |
| }, |
| { |
| "epoch": 11.955896452540747, |
| "grad_norm": 0.2825275659561157, |
| "learning_rate": 5.748235645373325e-05, |
| "loss": 0.0385, |
| "step": 12470 |
| }, |
| { |
| "epoch": 11.96548418024928, |
| "grad_norm": 0.3274647891521454, |
| "learning_rate": 5.741947153890215e-05, |
| "loss": 0.0338, |
| "step": 12480 |
| }, |
| { |
| "epoch": 11.975071907957814, |
| "grad_norm": 0.29837775230407715, |
| "learning_rate": 5.7356574621376493e-05, |
| "loss": 0.0406, |
| "step": 12490 |
| }, |
| { |
| "epoch": 11.984659635666347, |
| "grad_norm": 0.3342297673225403, |
| "learning_rate": 5.729366580290646e-05, |
| "loss": 0.0325, |
| "step": 12500 |
| }, |
| { |
| "epoch": 11.99424736337488, |
| "grad_norm": 0.2670736014842987, |
| "learning_rate": 5.7230745185261505e-05, |
| "loss": 0.0361, |
| "step": 12510 |
| }, |
| { |
| "epoch": 12.003835091083413, |
| "grad_norm": 0.24446439743041992, |
| "learning_rate": 5.7167812870230094e-05, |
| "loss": 0.0298, |
| "step": 12520 |
| }, |
| { |
| "epoch": 12.013422818791947, |
| "grad_norm": 0.24025262892246246, |
| "learning_rate": 5.710486895961971e-05, |
| "loss": 0.0285, |
| "step": 12530 |
| }, |
| { |
| "epoch": 12.02301054650048, |
| "grad_norm": 0.20725701749324799, |
| "learning_rate": 5.7041913555256506e-05, |
| "loss": 0.0319, |
| "step": 12540 |
| }, |
| { |
| "epoch": 12.032598274209013, |
| "grad_norm": 0.24926726520061493, |
| "learning_rate": 5.6978946758985285e-05, |
| "loss": 0.0358, |
| "step": 12550 |
| }, |
| { |
| "epoch": 12.042186001917546, |
| "grad_norm": 0.22566866874694824, |
| "learning_rate": 5.691596867266925e-05, |
| "loss": 0.0353, |
| "step": 12560 |
| }, |
| { |
| "epoch": 12.05177372962608, |
| "grad_norm": 0.2323976457118988, |
| "learning_rate": 5.68529793981899e-05, |
| "loss": 0.0347, |
| "step": 12570 |
| }, |
| { |
| "epoch": 12.06136145733461, |
| "grad_norm": 0.2751142680644989, |
| "learning_rate": 5.6789979037446784e-05, |
| "loss": 0.0343, |
| "step": 12580 |
| }, |
| { |
| "epoch": 12.070949185043144, |
| "grad_norm": 0.20366577804088593, |
| "learning_rate": 5.672696769235744e-05, |
| "loss": 0.0346, |
| "step": 12590 |
| }, |
| { |
| "epoch": 12.080536912751677, |
| "grad_norm": 0.30414018034935, |
| "learning_rate": 5.666394546485714e-05, |
| "loss": 0.0335, |
| "step": 12600 |
| }, |
| { |
| "epoch": 12.09012464046021, |
| "grad_norm": 0.24006792902946472, |
| "learning_rate": 5.660091245689878e-05, |
| "loss": 0.0332, |
| "step": 12610 |
| }, |
| { |
| "epoch": 12.099712368168744, |
| "grad_norm": 0.25928163528442383, |
| "learning_rate": 5.653786877045266e-05, |
| "loss": 0.0345, |
| "step": 12620 |
| }, |
| { |
| "epoch": 12.109300095877277, |
| "grad_norm": 0.3005020320415497, |
| "learning_rate": 5.6474814507506426e-05, |
| "loss": 0.0279, |
| "step": 12630 |
| }, |
| { |
| "epoch": 12.11888782358581, |
| "grad_norm": 0.2962352931499481, |
| "learning_rate": 5.641174977006476e-05, |
| "loss": 0.0349, |
| "step": 12640 |
| }, |
| { |
| "epoch": 12.128475551294343, |
| "grad_norm": 0.3519500195980072, |
| "learning_rate": 5.634867466014932e-05, |
| "loss": 0.0322, |
| "step": 12650 |
| }, |
| { |
| "epoch": 12.138063279002877, |
| "grad_norm": 0.3588416576385498, |
| "learning_rate": 5.628558927979854e-05, |
| "loss": 0.0324, |
| "step": 12660 |
| }, |
| { |
| "epoch": 12.14765100671141, |
| "grad_norm": 0.29862353205680847, |
| "learning_rate": 5.622249373106748e-05, |
| "loss": 0.037, |
| "step": 12670 |
| }, |
| { |
| "epoch": 12.157238734419943, |
| "grad_norm": 0.3698887526988983, |
| "learning_rate": 5.6159388116027654e-05, |
| "loss": 0.0336, |
| "step": 12680 |
| }, |
| { |
| "epoch": 12.166826462128476, |
| "grad_norm": 0.268628865480423, |
| "learning_rate": 5.609627253676682e-05, |
| "loss": 0.0373, |
| "step": 12690 |
| }, |
| { |
| "epoch": 12.176414189837008, |
| "grad_norm": 0.23115096986293793, |
| "learning_rate": 5.603314709538891e-05, |
| "loss": 0.0393, |
| "step": 12700 |
| }, |
| { |
| "epoch": 12.186001917545541, |
| "grad_norm": 0.26541295647621155, |
| "learning_rate": 5.597001189401376e-05, |
| "loss": 0.0367, |
| "step": 12710 |
| }, |
| { |
| "epoch": 12.195589645254074, |
| "grad_norm": 0.28933706879615784, |
| "learning_rate": 5.5906867034777046e-05, |
| "loss": 0.0332, |
| "step": 12720 |
| }, |
| { |
| "epoch": 12.205177372962607, |
| "grad_norm": 0.320468008518219, |
| "learning_rate": 5.584371261983e-05, |
| "loss": 0.0351, |
| "step": 12730 |
| }, |
| { |
| "epoch": 12.21476510067114, |
| "grad_norm": 0.24627713859081268, |
| "learning_rate": 5.578054875133939e-05, |
| "loss": 0.032, |
| "step": 12740 |
| }, |
| { |
| "epoch": 12.224352828379674, |
| "grad_norm": 0.19859549403190613, |
| "learning_rate": 5.571737553148723e-05, |
| "loss": 0.0338, |
| "step": 12750 |
| }, |
| { |
| "epoch": 12.233940556088207, |
| "grad_norm": 0.2559930086135864, |
| "learning_rate": 5.565419306247065e-05, |
| "loss": 0.0372, |
| "step": 12760 |
| }, |
| { |
| "epoch": 12.24352828379674, |
| "grad_norm": 0.1816064417362213, |
| "learning_rate": 5.559100144650179e-05, |
| "loss": 0.0325, |
| "step": 12770 |
| }, |
| { |
| "epoch": 12.253116011505274, |
| "grad_norm": 0.5027087330818176, |
| "learning_rate": 5.552780078580756e-05, |
| "loss": 0.0357, |
| "step": 12780 |
| }, |
| { |
| "epoch": 12.262703739213807, |
| "grad_norm": 0.4723157584667206, |
| "learning_rate": 5.54645911826295e-05, |
| "loss": 0.0301, |
| "step": 12790 |
| }, |
| { |
| "epoch": 12.27229146692234, |
| "grad_norm": 0.18510127067565918, |
| "learning_rate": 5.5401372739223615e-05, |
| "loss": 0.0393, |
| "step": 12800 |
| }, |
| { |
| "epoch": 12.281879194630873, |
| "grad_norm": 0.19757391512393951, |
| "learning_rate": 5.533814555786021e-05, |
| "loss": 0.0319, |
| "step": 12810 |
| }, |
| { |
| "epoch": 12.291466922339406, |
| "grad_norm": 0.25884294509887695, |
| "learning_rate": 5.527490974082376e-05, |
| "loss": 0.0319, |
| "step": 12820 |
| }, |
| { |
| "epoch": 12.301054650047938, |
| "grad_norm": 0.29503270983695984, |
| "learning_rate": 5.521166539041266e-05, |
| "loss": 0.0405, |
| "step": 12830 |
| }, |
| { |
| "epoch": 12.310642377756471, |
| "grad_norm": 0.3443543016910553, |
| "learning_rate": 5.514841260893913e-05, |
| "loss": 0.0345, |
| "step": 12840 |
| }, |
| { |
| "epoch": 12.320230105465004, |
| "grad_norm": 0.3162010610103607, |
| "learning_rate": 5.508515149872903e-05, |
| "loss": 0.0374, |
| "step": 12850 |
| }, |
| { |
| "epoch": 12.329817833173538, |
| "grad_norm": 0.37343630194664, |
| "learning_rate": 5.502188216212172e-05, |
| "loss": 0.0339, |
| "step": 12860 |
| }, |
| { |
| "epoch": 12.33940556088207, |
| "grad_norm": 0.4099912643432617, |
| "learning_rate": 5.4958604701469804e-05, |
| "loss": 0.0348, |
| "step": 12870 |
| }, |
| { |
| "epoch": 12.348993288590604, |
| "grad_norm": 0.3237497806549072, |
| "learning_rate": 5.489531921913911e-05, |
| "loss": 0.0277, |
| "step": 12880 |
| }, |
| { |
| "epoch": 12.358581016299137, |
| "grad_norm": 0.2685404121875763, |
| "learning_rate": 5.483202581750838e-05, |
| "loss": 0.0326, |
| "step": 12890 |
| }, |
| { |
| "epoch": 12.36816874400767, |
| "grad_norm": 0.28428301215171814, |
| "learning_rate": 5.476872459896918e-05, |
| "loss": 0.0372, |
| "step": 12900 |
| }, |
| { |
| "epoch": 12.377756471716204, |
| "grad_norm": 0.34229859709739685, |
| "learning_rate": 5.470541566592573e-05, |
| "loss": 0.0324, |
| "step": 12910 |
| }, |
| { |
| "epoch": 12.387344199424737, |
| "grad_norm": 0.3393026292324066, |
| "learning_rate": 5.464209912079472e-05, |
| "loss": 0.034, |
| "step": 12920 |
| }, |
| { |
| "epoch": 12.39693192713327, |
| "grad_norm": 0.3479039967060089, |
| "learning_rate": 5.4578775066005196e-05, |
| "loss": 0.0384, |
| "step": 12930 |
| }, |
| { |
| "epoch": 12.406519654841803, |
| "grad_norm": 0.22416572272777557, |
| "learning_rate": 5.4515443603998304e-05, |
| "loss": 0.0339, |
| "step": 12940 |
| }, |
| { |
| "epoch": 12.416107382550335, |
| "grad_norm": 0.3005695343017578, |
| "learning_rate": 5.445210483722719e-05, |
| "loss": 0.0374, |
| "step": 12950 |
| }, |
| { |
| "epoch": 12.425695110258868, |
| "grad_norm": 0.2770855724811554, |
| "learning_rate": 5.438875886815682e-05, |
| "loss": 0.0407, |
| "step": 12960 |
| }, |
| { |
| "epoch": 12.435282837967401, |
| "grad_norm": 0.3203631043434143, |
| "learning_rate": 5.4325405799263786e-05, |
| "loss": 0.0381, |
| "step": 12970 |
| }, |
| { |
| "epoch": 12.444870565675934, |
| "grad_norm": 0.32981497049331665, |
| "learning_rate": 5.4262045733036204e-05, |
| "loss": 0.0389, |
| "step": 12980 |
| }, |
| { |
| "epoch": 12.454458293384468, |
| "grad_norm": 0.24350851774215698, |
| "learning_rate": 5.4198678771973475e-05, |
| "loss": 0.0377, |
| "step": 12990 |
| }, |
| { |
| "epoch": 12.464046021093, |
| "grad_norm": 0.25702494382858276, |
| "learning_rate": 5.413530501858621e-05, |
| "loss": 0.0308, |
| "step": 13000 |
| }, |
| { |
| "epoch": 12.473633748801534, |
| "grad_norm": 0.25904905796051025, |
| "learning_rate": 5.407192457539594e-05, |
| "loss": 0.0327, |
| "step": 13010 |
| }, |
| { |
| "epoch": 12.483221476510067, |
| "grad_norm": 0.29727786779403687, |
| "learning_rate": 5.4008537544935066e-05, |
| "loss": 0.0376, |
| "step": 13020 |
| }, |
| { |
| "epoch": 12.4928092042186, |
| "grad_norm": 0.21568791568279266, |
| "learning_rate": 5.394514402974661e-05, |
| "loss": 0.029, |
| "step": 13030 |
| }, |
| { |
| "epoch": 12.502396931927134, |
| "grad_norm": 0.31120288372039795, |
| "learning_rate": 5.3881744132384104e-05, |
| "loss": 0.0289, |
| "step": 13040 |
| }, |
| { |
| "epoch": 12.511984659635667, |
| "grad_norm": 0.3262520134449005, |
| "learning_rate": 5.381833795541141e-05, |
| "loss": 0.0458, |
| "step": 13050 |
| }, |
| { |
| "epoch": 12.5215723873442, |
| "grad_norm": 0.27970728278160095, |
| "learning_rate": 5.375492560140254e-05, |
| "loss": 0.0411, |
| "step": 13060 |
| }, |
| { |
| "epoch": 12.531160115052732, |
| "grad_norm": 0.25999292731285095, |
| "learning_rate": 5.3691507172941493e-05, |
| "loss": 0.0367, |
| "step": 13070 |
| }, |
| { |
| "epoch": 12.540747842761265, |
| "grad_norm": 0.32972532510757446, |
| "learning_rate": 5.362808277262211e-05, |
| "loss": 0.0336, |
| "step": 13080 |
| }, |
| { |
| "epoch": 12.550335570469798, |
| "grad_norm": 0.21841417253017426, |
| "learning_rate": 5.3564652503047895e-05, |
| "loss": 0.0383, |
| "step": 13090 |
| }, |
| { |
| "epoch": 12.559923298178331, |
| "grad_norm": 0.5416061878204346, |
| "learning_rate": 5.350121646683183e-05, |
| "loss": 0.0385, |
| "step": 13100 |
| }, |
| { |
| "epoch": 12.569511025886865, |
| "grad_norm": 0.28985804319381714, |
| "learning_rate": 5.343777476659621e-05, |
| "loss": 0.0334, |
| "step": 13110 |
| }, |
| { |
| "epoch": 12.579098753595398, |
| "grad_norm": 0.7717734575271606, |
| "learning_rate": 5.3374327504972544e-05, |
| "loss": 0.035, |
| "step": 13120 |
| }, |
| { |
| "epoch": 12.588686481303931, |
| "grad_norm": 0.38980838656425476, |
| "learning_rate": 5.331087478460129e-05, |
| "loss": 0.04, |
| "step": 13130 |
| }, |
| { |
| "epoch": 12.598274209012464, |
| "grad_norm": 0.43601536750793457, |
| "learning_rate": 5.324741670813178e-05, |
| "loss": 0.034, |
| "step": 13140 |
| }, |
| { |
| "epoch": 12.607861936720997, |
| "grad_norm": 0.27574917674064636, |
| "learning_rate": 5.318395337822195e-05, |
| "loss": 0.0328, |
| "step": 13150 |
| }, |
| { |
| "epoch": 12.61744966442953, |
| "grad_norm": 0.19968970119953156, |
| "learning_rate": 5.312048489753833e-05, |
| "loss": 0.0311, |
| "step": 13160 |
| }, |
| { |
| "epoch": 12.627037392138064, |
| "grad_norm": 0.4505964517593384, |
| "learning_rate": 5.305701136875566e-05, |
| "loss": 0.0326, |
| "step": 13170 |
| }, |
| { |
| "epoch": 12.636625119846597, |
| "grad_norm": 0.24829363822937012, |
| "learning_rate": 5.299353289455694e-05, |
| "loss": 0.0344, |
| "step": 13180 |
| }, |
| { |
| "epoch": 12.64621284755513, |
| "grad_norm": 0.2600340247154236, |
| "learning_rate": 5.2930049577633146e-05, |
| "loss": 0.0309, |
| "step": 13190 |
| }, |
| { |
| "epoch": 12.655800575263662, |
| "grad_norm": 0.2981709837913513, |
| "learning_rate": 5.2866561520683065e-05, |
| "loss": 0.0314, |
| "step": 13200 |
| }, |
| { |
| "epoch": 12.665388302972195, |
| "grad_norm": 0.22709107398986816, |
| "learning_rate": 5.280306882641319e-05, |
| "loss": 0.0323, |
| "step": 13210 |
| }, |
| { |
| "epoch": 12.674976030680728, |
| "grad_norm": 0.31488150358200073, |
| "learning_rate": 5.273957159753749e-05, |
| "loss": 0.0357, |
| "step": 13220 |
| }, |
| { |
| "epoch": 12.684563758389261, |
| "grad_norm": 0.5378819704055786, |
| "learning_rate": 5.2676069936777264e-05, |
| "loss": 0.0341, |
| "step": 13230 |
| }, |
| { |
| "epoch": 12.694151486097795, |
| "grad_norm": 0.3149401843547821, |
| "learning_rate": 5.2612563946861e-05, |
| "loss": 0.0369, |
| "step": 13240 |
| }, |
| { |
| "epoch": 12.703739213806328, |
| "grad_norm": 0.2183138132095337, |
| "learning_rate": 5.254905373052419e-05, |
| "loss": 0.0349, |
| "step": 13250 |
| }, |
| { |
| "epoch": 12.713326941514861, |
| "grad_norm": 1.1205395460128784, |
| "learning_rate": 5.2485539390509156e-05, |
| "loss": 0.0368, |
| "step": 13260 |
| }, |
| { |
| "epoch": 12.722914669223394, |
| "grad_norm": 0.21172510087490082, |
| "learning_rate": 5.242202102956486e-05, |
| "loss": 0.0402, |
| "step": 13270 |
| }, |
| { |
| "epoch": 12.732502396931928, |
| "grad_norm": 0.25088265538215637, |
| "learning_rate": 5.2358498750446835e-05, |
| "loss": 0.0356, |
| "step": 13280 |
| }, |
| { |
| "epoch": 12.74209012464046, |
| "grad_norm": 0.36349666118621826, |
| "learning_rate": 5.229497265591689e-05, |
| "loss": 0.0292, |
| "step": 13290 |
| }, |
| { |
| "epoch": 12.751677852348994, |
| "grad_norm": 0.3626287877559662, |
| "learning_rate": 5.2231442848743064e-05, |
| "loss": 0.0402, |
| "step": 13300 |
| }, |
| { |
| "epoch": 12.761265580057525, |
| "grad_norm": 0.18637891113758087, |
| "learning_rate": 5.2167909431699344e-05, |
| "loss": 0.0324, |
| "step": 13310 |
| }, |
| { |
| "epoch": 12.770853307766059, |
| "grad_norm": 0.28557726740837097, |
| "learning_rate": 5.2104372507565593e-05, |
| "loss": 0.0289, |
| "step": 13320 |
| }, |
| { |
| "epoch": 12.780441035474592, |
| "grad_norm": 0.3556912839412689, |
| "learning_rate": 5.204083217912732e-05, |
| "loss": 0.0343, |
| "step": 13330 |
| }, |
| { |
| "epoch": 12.790028763183125, |
| "grad_norm": 0.16443754732608795, |
| "learning_rate": 5.197728854917558e-05, |
| "loss": 0.0337, |
| "step": 13340 |
| }, |
| { |
| "epoch": 12.799616490891658, |
| "grad_norm": 0.19597065448760986, |
| "learning_rate": 5.191374172050676e-05, |
| "loss": 0.0293, |
| "step": 13350 |
| }, |
| { |
| "epoch": 12.809204218600192, |
| "grad_norm": 0.38750675320625305, |
| "learning_rate": 5.185019179592238e-05, |
| "loss": 0.0404, |
| "step": 13360 |
| }, |
| { |
| "epoch": 12.818791946308725, |
| "grad_norm": 0.25635913014411926, |
| "learning_rate": 5.178663887822901e-05, |
| "loss": 0.0259, |
| "step": 13370 |
| }, |
| { |
| "epoch": 12.828379674017258, |
| "grad_norm": 0.21815137565135956, |
| "learning_rate": 5.172308307023805e-05, |
| "loss": 0.0296, |
| "step": 13380 |
| }, |
| { |
| "epoch": 12.837967401725791, |
| "grad_norm": 0.3391851782798767, |
| "learning_rate": 5.165952447476559e-05, |
| "loss": 0.0312, |
| "step": 13390 |
| }, |
| { |
| "epoch": 12.847555129434324, |
| "grad_norm": 0.38378575444221497, |
| "learning_rate": 5.159596319463219e-05, |
| "loss": 0.0301, |
| "step": 13400 |
| }, |
| { |
| "epoch": 12.857142857142858, |
| "grad_norm": 0.29647505283355713, |
| "learning_rate": 5.15323993326628e-05, |
| "loss": 0.0355, |
| "step": 13410 |
| }, |
| { |
| "epoch": 12.86673058485139, |
| "grad_norm": 0.3213365972042084, |
| "learning_rate": 5.146883299168651e-05, |
| "loss": 0.0309, |
| "step": 13420 |
| }, |
| { |
| "epoch": 12.876318312559924, |
| "grad_norm": 0.21259522438049316, |
| "learning_rate": 5.1405264274536445e-05, |
| "loss": 0.0361, |
| "step": 13430 |
| }, |
| { |
| "epoch": 12.885906040268456, |
| "grad_norm": 0.41032230854034424, |
| "learning_rate": 5.134169328404956e-05, |
| "loss": 0.0347, |
| "step": 13440 |
| }, |
| { |
| "epoch": 12.895493767976989, |
| "grad_norm": 0.3352082371711731, |
| "learning_rate": 5.127812012306649e-05, |
| "loss": 0.0329, |
| "step": 13450 |
| }, |
| { |
| "epoch": 12.905081495685522, |
| "grad_norm": 2.1955349445343018, |
| "learning_rate": 5.1214544894431396e-05, |
| "loss": 0.036, |
| "step": 13460 |
| }, |
| { |
| "epoch": 12.914669223394055, |
| "grad_norm": 0.19683793187141418, |
| "learning_rate": 5.115096770099175e-05, |
| "loss": 0.0344, |
| "step": 13470 |
| }, |
| { |
| "epoch": 12.924256951102588, |
| "grad_norm": 0.2288978546857834, |
| "learning_rate": 5.1087388645598235e-05, |
| "loss": 0.0289, |
| "step": 13480 |
| }, |
| { |
| "epoch": 12.933844678811122, |
| "grad_norm": 0.3008512556552887, |
| "learning_rate": 5.1023807831104544e-05, |
| "loss": 0.0421, |
| "step": 13490 |
| }, |
| { |
| "epoch": 12.943432406519655, |
| "grad_norm": 0.29300564527511597, |
| "learning_rate": 5.096022536036721e-05, |
| "loss": 0.0374, |
| "step": 13500 |
| }, |
| { |
| "epoch": 12.953020134228188, |
| "grad_norm": 0.2803822457790375, |
| "learning_rate": 5.089664133624541e-05, |
| "loss": 0.0349, |
| "step": 13510 |
| }, |
| { |
| "epoch": 12.962607861936721, |
| "grad_norm": 0.35536760091781616, |
| "learning_rate": 5.083305586160089e-05, |
| "loss": 0.0311, |
| "step": 13520 |
| }, |
| { |
| "epoch": 12.972195589645255, |
| "grad_norm": 0.290683776140213, |
| "learning_rate": 5.07694690392977e-05, |
| "loss": 0.0315, |
| "step": 13530 |
| }, |
| { |
| "epoch": 12.981783317353788, |
| "grad_norm": 0.19355502724647522, |
| "learning_rate": 5.070588097220213e-05, |
| "loss": 0.0297, |
| "step": 13540 |
| }, |
| { |
| "epoch": 12.991371045062321, |
| "grad_norm": 0.2547348439693451, |
| "learning_rate": 5.06422917631824e-05, |
| "loss": 0.0325, |
| "step": 13550 |
| }, |
| { |
| "epoch": 13.000958772770852, |
| "grad_norm": 0.1929698884487152, |
| "learning_rate": 5.057870151510864e-05, |
| "loss": 0.0329, |
| "step": 13560 |
| }, |
| { |
| "epoch": 13.010546500479386, |
| "grad_norm": 0.29264265298843384, |
| "learning_rate": 5.051511033085264e-05, |
| "loss": 0.0319, |
| "step": 13570 |
| }, |
| { |
| "epoch": 13.020134228187919, |
| "grad_norm": 0.28177183866500854, |
| "learning_rate": 5.0451518313287704e-05, |
| "loss": 0.038, |
| "step": 13580 |
| }, |
| { |
| "epoch": 13.029721955896452, |
| "grad_norm": 0.3331814110279083, |
| "learning_rate": 5.0387925565288485e-05, |
| "loss": 0.0307, |
| "step": 13590 |
| }, |
| { |
| "epoch": 13.039309683604985, |
| "grad_norm": 0.297892689704895, |
| "learning_rate": 5.0324332189730796e-05, |
| "loss": 0.0339, |
| "step": 13600 |
| }, |
| { |
| "epoch": 13.048897411313519, |
| "grad_norm": 0.2248513251543045, |
| "learning_rate": 5.0260738289491516e-05, |
| "loss": 0.0227, |
| "step": 13610 |
| }, |
| { |
| "epoch": 13.058485139022052, |
| "grad_norm": 0.24514958262443542, |
| "learning_rate": 5.0197143967448335e-05, |
| "loss": 0.0335, |
| "step": 13620 |
| }, |
| { |
| "epoch": 13.068072866730585, |
| "grad_norm": 0.29958298802375793, |
| "learning_rate": 5.0133549326479645e-05, |
| "loss": 0.0305, |
| "step": 13630 |
| }, |
| { |
| "epoch": 13.077660594439118, |
| "grad_norm": 3.086843252182007, |
| "learning_rate": 5.006995446946433e-05, |
| "loss": 0.0377, |
| "step": 13640 |
| }, |
| { |
| "epoch": 13.087248322147651, |
| "grad_norm": 0.31443238258361816, |
| "learning_rate": 5.000635949928163e-05, |
| "loss": 0.0344, |
| "step": 13650 |
| }, |
| { |
| "epoch": 13.096836049856185, |
| "grad_norm": 0.27507051825523376, |
| "learning_rate": 4.994276451881098e-05, |
| "loss": 0.034, |
| "step": 13660 |
| }, |
| { |
| "epoch": 13.106423777564718, |
| "grad_norm": 0.2578774094581604, |
| "learning_rate": 4.987916963093184e-05, |
| "loss": 0.0328, |
| "step": 13670 |
| }, |
| { |
| "epoch": 13.116011505273251, |
| "grad_norm": 0.28767842054367065, |
| "learning_rate": 4.981557493852349e-05, |
| "loss": 0.0332, |
| "step": 13680 |
| }, |
| { |
| "epoch": 13.125599232981783, |
| "grad_norm": 0.17203165590763092, |
| "learning_rate": 4.975198054446492e-05, |
| "loss": 0.0327, |
| "step": 13690 |
| }, |
| { |
| "epoch": 13.135186960690316, |
| "grad_norm": 0.2606458067893982, |
| "learning_rate": 4.968838655163462e-05, |
| "loss": 0.0321, |
| "step": 13700 |
| }, |
| { |
| "epoch": 13.144774688398849, |
| "grad_norm": 0.3137904703617096, |
| "learning_rate": 4.9624793062910445e-05, |
| "loss": 0.0376, |
| "step": 13710 |
| }, |
| { |
| "epoch": 13.154362416107382, |
| "grad_norm": 0.255403608083725, |
| "learning_rate": 4.956120018116941e-05, |
| "loss": 0.0304, |
| "step": 13720 |
| }, |
| { |
| "epoch": 13.163950143815915, |
| "grad_norm": 0.32765787839889526, |
| "learning_rate": 4.94976080092876e-05, |
| "loss": 0.0352, |
| "step": 13730 |
| }, |
| { |
| "epoch": 13.173537871524449, |
| "grad_norm": 0.36302298307418823, |
| "learning_rate": 4.94340166501399e-05, |
| "loss": 0.0287, |
| "step": 13740 |
| }, |
| { |
| "epoch": 13.183125599232982, |
| "grad_norm": 0.1956561803817749, |
| "learning_rate": 4.93704262065999e-05, |
| "loss": 0.0299, |
| "step": 13750 |
| }, |
| { |
| "epoch": 13.192713326941515, |
| "grad_norm": 0.28090646862983704, |
| "learning_rate": 4.930683678153971e-05, |
| "loss": 0.0294, |
| "step": 13760 |
| }, |
| { |
| "epoch": 13.202301054650048, |
| "grad_norm": 0.3016568422317505, |
| "learning_rate": 4.9243248477829786e-05, |
| "loss": 0.0339, |
| "step": 13770 |
| }, |
| { |
| "epoch": 13.211888782358582, |
| "grad_norm": 0.34404152631759644, |
| "learning_rate": 4.9179661398338764e-05, |
| "loss": 0.0268, |
| "step": 13780 |
| }, |
| { |
| "epoch": 13.221476510067115, |
| "grad_norm": 0.35919350385665894, |
| "learning_rate": 4.911607564593331e-05, |
| "loss": 0.0308, |
| "step": 13790 |
| }, |
| { |
| "epoch": 13.231064237775648, |
| "grad_norm": 0.23123154044151306, |
| "learning_rate": 4.905249132347796e-05, |
| "loss": 0.0293, |
| "step": 13800 |
| }, |
| { |
| "epoch": 13.24065196548418, |
| "grad_norm": 0.2878974378108978, |
| "learning_rate": 4.89889085338349e-05, |
| "loss": 0.0366, |
| "step": 13810 |
| }, |
| { |
| "epoch": 13.250239693192713, |
| "grad_norm": 0.1915551722049713, |
| "learning_rate": 4.892532737986387e-05, |
| "loss": 0.0326, |
| "step": 13820 |
| }, |
| { |
| "epoch": 13.259827420901246, |
| "grad_norm": 0.29005202651023865, |
| "learning_rate": 4.886174796442193e-05, |
| "loss": 0.0332, |
| "step": 13830 |
| }, |
| { |
| "epoch": 13.269415148609779, |
| "grad_norm": 0.335665225982666, |
| "learning_rate": 4.879817039036336e-05, |
| "loss": 0.0254, |
| "step": 13840 |
| }, |
| { |
| "epoch": 13.279002876318312, |
| "grad_norm": 0.1871231645345688, |
| "learning_rate": 4.873459476053946e-05, |
| "loss": 0.0288, |
| "step": 13850 |
| }, |
| { |
| "epoch": 13.288590604026846, |
| "grad_norm": 0.26077544689178467, |
| "learning_rate": 4.867102117779834e-05, |
| "loss": 0.031, |
| "step": 13860 |
| }, |
| { |
| "epoch": 13.298178331735379, |
| "grad_norm": 0.46799513697624207, |
| "learning_rate": 4.8607449744984836e-05, |
| "loss": 0.0292, |
| "step": 13870 |
| }, |
| { |
| "epoch": 13.307766059443912, |
| "grad_norm": 0.24073362350463867, |
| "learning_rate": 4.8543880564940327e-05, |
| "loss": 0.0268, |
| "step": 13880 |
| }, |
| { |
| "epoch": 13.317353787152445, |
| "grad_norm": 0.22020606696605682, |
| "learning_rate": 4.848031374050251e-05, |
| "loss": 0.0339, |
| "step": 13890 |
| }, |
| { |
| "epoch": 13.326941514860978, |
| "grad_norm": 0.20859257876873016, |
| "learning_rate": 4.8416749374505285e-05, |
| "loss": 0.0319, |
| "step": 13900 |
| }, |
| { |
| "epoch": 13.336529242569512, |
| "grad_norm": 4.301571846008301, |
| "learning_rate": 4.835318756977856e-05, |
| "loss": 0.0382, |
| "step": 13910 |
| }, |
| { |
| "epoch": 13.346116970278045, |
| "grad_norm": 0.33860668540000916, |
| "learning_rate": 4.828962842914812e-05, |
| "loss": 0.0334, |
| "step": 13920 |
| }, |
| { |
| "epoch": 13.355704697986576, |
| "grad_norm": 0.24827070534229279, |
| "learning_rate": 4.8226072055435425e-05, |
| "loss": 0.0267, |
| "step": 13930 |
| }, |
| { |
| "epoch": 13.36529242569511, |
| "grad_norm": 0.22739817202091217, |
| "learning_rate": 4.816251855145748e-05, |
| "loss": 0.0308, |
| "step": 13940 |
| }, |
| { |
| "epoch": 13.374880153403643, |
| "grad_norm": 0.33846351504325867, |
| "learning_rate": 4.809896802002662e-05, |
| "loss": 0.0337, |
| "step": 13950 |
| }, |
| { |
| "epoch": 13.384467881112176, |
| "grad_norm": 0.2737593352794647, |
| "learning_rate": 4.8035420563950395e-05, |
| "loss": 0.0358, |
| "step": 13960 |
| }, |
| { |
| "epoch": 13.39405560882071, |
| "grad_norm": 0.3176287114620209, |
| "learning_rate": 4.797187628603136e-05, |
| "loss": 0.0273, |
| "step": 13970 |
| }, |
| { |
| "epoch": 13.403643336529242, |
| "grad_norm": 0.2898380756378174, |
| "learning_rate": 4.790833528906696e-05, |
| "loss": 0.0324, |
| "step": 13980 |
| }, |
| { |
| "epoch": 13.413231064237776, |
| "grad_norm": 0.48169559240341187, |
| "learning_rate": 4.784479767584929e-05, |
| "loss": 0.0269, |
| "step": 13990 |
| }, |
| { |
| "epoch": 13.422818791946309, |
| "grad_norm": 0.23410825431346893, |
| "learning_rate": 4.778126354916498e-05, |
| "loss": 0.0307, |
| "step": 14000 |
| }, |
| { |
| "epoch": 13.432406519654842, |
| "grad_norm": 0.39884692430496216, |
| "learning_rate": 4.771773301179506e-05, |
| "loss": 0.0324, |
| "step": 14010 |
| }, |
| { |
| "epoch": 13.441994247363375, |
| "grad_norm": 0.26422742009162903, |
| "learning_rate": 4.765420616651468e-05, |
| "loss": 0.0318, |
| "step": 14020 |
| }, |
| { |
| "epoch": 13.451581975071909, |
| "grad_norm": 0.261283278465271, |
| "learning_rate": 4.7590683116093135e-05, |
| "loss": 0.0312, |
| "step": 14030 |
| }, |
| { |
| "epoch": 13.461169702780442, |
| "grad_norm": 0.28744202852249146, |
| "learning_rate": 4.752716396329346e-05, |
| "loss": 0.0349, |
| "step": 14040 |
| }, |
| { |
| "epoch": 13.470757430488973, |
| "grad_norm": 0.2296159714460373, |
| "learning_rate": 4.746364881087244e-05, |
| "loss": 0.0329, |
| "step": 14050 |
| }, |
| { |
| "epoch": 13.480345158197506, |
| "grad_norm": 0.2238318920135498, |
| "learning_rate": 4.7400137761580376e-05, |
| "loss": 0.0287, |
| "step": 14060 |
| }, |
| { |
| "epoch": 13.48993288590604, |
| "grad_norm": 0.3209201395511627, |
| "learning_rate": 4.733663091816095e-05, |
| "loss": 0.0312, |
| "step": 14070 |
| }, |
| { |
| "epoch": 13.499520613614573, |
| "grad_norm": 4.273186206817627, |
| "learning_rate": 4.7273128383351015e-05, |
| "loss": 0.0292, |
| "step": 14080 |
| }, |
| { |
| "epoch": 13.509108341323106, |
| "grad_norm": 0.2698652446269989, |
| "learning_rate": 4.720963025988047e-05, |
| "loss": 0.0319, |
| "step": 14090 |
| }, |
| { |
| "epoch": 13.51869606903164, |
| "grad_norm": 0.28722748160362244, |
| "learning_rate": 4.714613665047207e-05, |
| "loss": 0.0285, |
| "step": 14100 |
| }, |
| { |
| "epoch": 13.528283796740173, |
| "grad_norm": 0.2316875010728836, |
| "learning_rate": 4.708264765784129e-05, |
| "loss": 0.0368, |
| "step": 14110 |
| }, |
| { |
| "epoch": 13.537871524448706, |
| "grad_norm": 0.5195225477218628, |
| "learning_rate": 4.701916338469608e-05, |
| "loss": 0.031, |
| "step": 14120 |
| }, |
| { |
| "epoch": 13.547459252157239, |
| "grad_norm": 0.40332475304603577, |
| "learning_rate": 4.6955683933736814e-05, |
| "loss": 0.032, |
| "step": 14130 |
| }, |
| { |
| "epoch": 13.557046979865772, |
| "grad_norm": 0.2699570059776306, |
| "learning_rate": 4.689220940765605e-05, |
| "loss": 0.0334, |
| "step": 14140 |
| }, |
| { |
| "epoch": 13.566634707574305, |
| "grad_norm": 0.26050880551338196, |
| "learning_rate": 4.682873990913835e-05, |
| "loss": 0.0333, |
| "step": 14150 |
| }, |
| { |
| "epoch": 13.576222435282839, |
| "grad_norm": 0.2826980650424957, |
| "learning_rate": 4.676527554086018e-05, |
| "loss": 0.0282, |
| "step": 14160 |
| }, |
| { |
| "epoch": 13.585810162991372, |
| "grad_norm": 0.17002440989017487, |
| "learning_rate": 4.6701816405489686e-05, |
| "loss": 0.0325, |
| "step": 14170 |
| }, |
| { |
| "epoch": 13.595397890699903, |
| "grad_norm": 0.33742156624794006, |
| "learning_rate": 4.6638362605686555e-05, |
| "loss": 0.0283, |
| "step": 14180 |
| }, |
| { |
| "epoch": 13.604985618408437, |
| "grad_norm": 0.29989632964134216, |
| "learning_rate": 4.657491424410185e-05, |
| "loss": 0.0327, |
| "step": 14190 |
| }, |
| { |
| "epoch": 13.61457334611697, |
| "grad_norm": 0.2583453357219696, |
| "learning_rate": 4.6511471423377815e-05, |
| "loss": 0.0285, |
| "step": 14200 |
| }, |
| { |
| "epoch": 13.624161073825503, |
| "grad_norm": 0.2405027151107788, |
| "learning_rate": 4.6448034246147754e-05, |
| "loss": 0.0262, |
| "step": 14210 |
| }, |
| { |
| "epoch": 13.633748801534036, |
| "grad_norm": 0.3429577052593231, |
| "learning_rate": 4.638460281503582e-05, |
| "loss": 0.0429, |
| "step": 14220 |
| }, |
| { |
| "epoch": 13.64333652924257, |
| "grad_norm": 0.30057376623153687, |
| "learning_rate": 4.6321177232656894e-05, |
| "loss": 0.0255, |
| "step": 14230 |
| }, |
| { |
| "epoch": 13.652924256951103, |
| "grad_norm": 0.25279220938682556, |
| "learning_rate": 4.6257757601616364e-05, |
| "loss": 0.0333, |
| "step": 14240 |
| }, |
| { |
| "epoch": 13.662511984659636, |
| "grad_norm": 1.2111369371414185, |
| "learning_rate": 4.6194344024510036e-05, |
| "loss": 0.0325, |
| "step": 14250 |
| }, |
| { |
| "epoch": 13.67209971236817, |
| "grad_norm": 1.4284824132919312, |
| "learning_rate": 4.613093660392386e-05, |
| "loss": 0.0368, |
| "step": 14260 |
| }, |
| { |
| "epoch": 13.681687440076702, |
| "grad_norm": 1.6276288032531738, |
| "learning_rate": 4.6067535442433885e-05, |
| "loss": 0.0343, |
| "step": 14270 |
| }, |
| { |
| "epoch": 13.691275167785236, |
| "grad_norm": 1.3329591751098633, |
| "learning_rate": 4.6004140642606e-05, |
| "loss": 0.0322, |
| "step": 14280 |
| }, |
| { |
| "epoch": 13.700862895493769, |
| "grad_norm": 0.2651832103729248, |
| "learning_rate": 4.5940752306995824e-05, |
| "loss": 0.0337, |
| "step": 14290 |
| }, |
| { |
| "epoch": 13.7104506232023, |
| "grad_norm": 0.1592620313167572, |
| "learning_rate": 4.58773705381485e-05, |
| "loss": 0.0309, |
| "step": 14300 |
| }, |
| { |
| "epoch": 13.720038350910833, |
| "grad_norm": 0.7592516541481018, |
| "learning_rate": 4.581399543859855e-05, |
| "loss": 0.0355, |
| "step": 14310 |
| }, |
| { |
| "epoch": 13.729626078619367, |
| "grad_norm": 0.27996954321861267, |
| "learning_rate": 4.5750627110869724e-05, |
| "loss": 0.0299, |
| "step": 14320 |
| }, |
| { |
| "epoch": 13.7392138063279, |
| "grad_norm": 0.19375735521316528, |
| "learning_rate": 4.5687265657474797e-05, |
| "loss": 0.0354, |
| "step": 14330 |
| }, |
| { |
| "epoch": 13.748801534036433, |
| "grad_norm": 0.263683944940567, |
| "learning_rate": 4.562391118091544e-05, |
| "loss": 0.0342, |
| "step": 14340 |
| }, |
| { |
| "epoch": 13.758389261744966, |
| "grad_norm": 0.4312153160572052, |
| "learning_rate": 4.556056378368203e-05, |
| "loss": 0.0349, |
| "step": 14350 |
| }, |
| { |
| "epoch": 13.7679769894535, |
| "grad_norm": 0.3268071413040161, |
| "learning_rate": 4.549722356825349e-05, |
| "loss": 0.0297, |
| "step": 14360 |
| }, |
| { |
| "epoch": 13.777564717162033, |
| "grad_norm": 0.43241703510284424, |
| "learning_rate": 4.543389063709712e-05, |
| "loss": 0.0333, |
| "step": 14370 |
| }, |
| { |
| "epoch": 13.787152444870566, |
| "grad_norm": 0.2650851905345917, |
| "learning_rate": 4.537056509266845e-05, |
| "loss": 0.0338, |
| "step": 14380 |
| }, |
| { |
| "epoch": 13.7967401725791, |
| "grad_norm": 0.34464454650878906, |
| "learning_rate": 4.530724703741104e-05, |
| "loss": 0.0334, |
| "step": 14390 |
| }, |
| { |
| "epoch": 13.806327900287632, |
| "grad_norm": 0.2718554735183716, |
| "learning_rate": 4.524393657375635e-05, |
| "loss": 0.0295, |
| "step": 14400 |
| }, |
| { |
| "epoch": 13.815915627996166, |
| "grad_norm": 0.27128416299819946, |
| "learning_rate": 4.5180633804123555e-05, |
| "loss": 0.0367, |
| "step": 14410 |
| }, |
| { |
| "epoch": 13.825503355704697, |
| "grad_norm": 0.190488800406456, |
| "learning_rate": 4.511733883091939e-05, |
| "loss": 0.0273, |
| "step": 14420 |
| }, |
| { |
| "epoch": 13.83509108341323, |
| "grad_norm": 0.45956146717071533, |
| "learning_rate": 4.5054051756537965e-05, |
| "loss": 0.0333, |
| "step": 14430 |
| }, |
| { |
| "epoch": 13.844678811121764, |
| "grad_norm": 0.2585156559944153, |
| "learning_rate": 4.499077268336063e-05, |
| "loss": 0.0277, |
| "step": 14440 |
| }, |
| { |
| "epoch": 13.854266538830297, |
| "grad_norm": 0.209930419921875, |
| "learning_rate": 4.492750171375576e-05, |
| "loss": 0.0317, |
| "step": 14450 |
| }, |
| { |
| "epoch": 13.86385426653883, |
| "grad_norm": 0.25458142161369324, |
| "learning_rate": 4.486423895007866e-05, |
| "loss": 0.0402, |
| "step": 14460 |
| }, |
| { |
| "epoch": 13.873441994247363, |
| "grad_norm": 0.2012961506843567, |
| "learning_rate": 4.480098449467132e-05, |
| "loss": 0.031, |
| "step": 14470 |
| }, |
| { |
| "epoch": 13.883029721955896, |
| "grad_norm": 0.2313721477985382, |
| "learning_rate": 4.473773844986229e-05, |
| "loss": 0.0278, |
| "step": 14480 |
| }, |
| { |
| "epoch": 13.89261744966443, |
| "grad_norm": 0.3655869960784912, |
| "learning_rate": 4.467450091796658e-05, |
| "loss": 0.0356, |
| "step": 14490 |
| }, |
| { |
| "epoch": 13.902205177372963, |
| "grad_norm": 0.2222936451435089, |
| "learning_rate": 4.461127200128536e-05, |
| "loss": 0.0335, |
| "step": 14500 |
| }, |
| { |
| "epoch": 13.911792905081496, |
| "grad_norm": 0.2714097797870636, |
| "learning_rate": 4.4548051802105914e-05, |
| "loss": 0.0289, |
| "step": 14510 |
| }, |
| { |
| "epoch": 13.92138063279003, |
| "grad_norm": 0.28923454880714417, |
| "learning_rate": 4.448484042270134e-05, |
| "loss": 0.0321, |
| "step": 14520 |
| }, |
| { |
| "epoch": 13.930968360498563, |
| "grad_norm": 0.3318518400192261, |
| "learning_rate": 4.4421637965330554e-05, |
| "loss": 0.0302, |
| "step": 14530 |
| }, |
| { |
| "epoch": 13.940556088207096, |
| "grad_norm": 0.21569694578647614, |
| "learning_rate": 4.4358444532237996e-05, |
| "loss": 0.0347, |
| "step": 14540 |
| }, |
| { |
| "epoch": 13.950143815915627, |
| "grad_norm": 0.24663789570331573, |
| "learning_rate": 4.429526022565352e-05, |
| "loss": 0.0293, |
| "step": 14550 |
| }, |
| { |
| "epoch": 13.95973154362416, |
| "grad_norm": 0.17170065641403198, |
| "learning_rate": 4.423208514779222e-05, |
| "loss": 0.0383, |
| "step": 14560 |
| }, |
| { |
| "epoch": 13.969319271332694, |
| "grad_norm": 0.2217435985803604, |
| "learning_rate": 4.4168919400854245e-05, |
| "loss": 0.0357, |
| "step": 14570 |
| }, |
| { |
| "epoch": 13.978906999041227, |
| "grad_norm": 0.18699301779270172, |
| "learning_rate": 4.4105763087024666e-05, |
| "loss": 0.0261, |
| "step": 14580 |
| }, |
| { |
| "epoch": 13.98849472674976, |
| "grad_norm": 0.35671454668045044, |
| "learning_rate": 4.404261630847329e-05, |
| "loss": 0.0356, |
| "step": 14590 |
| }, |
| { |
| "epoch": 13.998082454458293, |
| "grad_norm": 0.33537557721138, |
| "learning_rate": 4.3979479167354477e-05, |
| "loss": 0.0317, |
| "step": 14600 |
| }, |
| { |
| "epoch": 14.007670182166827, |
| "grad_norm": 0.25765296816825867, |
| "learning_rate": 4.391635176580702e-05, |
| "loss": 0.0314, |
| "step": 14610 |
| }, |
| { |
| "epoch": 14.01725790987536, |
| "grad_norm": 0.18932734429836273, |
| "learning_rate": 4.385323420595395e-05, |
| "loss": 0.036, |
| "step": 14620 |
| }, |
| { |
| "epoch": 14.026845637583893, |
| "grad_norm": 0.2255479097366333, |
| "learning_rate": 4.3790126589902344e-05, |
| "loss": 0.0329, |
| "step": 14630 |
| }, |
| { |
| "epoch": 14.036433365292426, |
| "grad_norm": 0.19790147244930267, |
| "learning_rate": 4.372702901974331e-05, |
| "loss": 0.032, |
| "step": 14640 |
| }, |
| { |
| "epoch": 14.04602109300096, |
| "grad_norm": 0.16959276795387268, |
| "learning_rate": 4.366394159755155e-05, |
| "loss": 0.0328, |
| "step": 14650 |
| }, |
| { |
| "epoch": 14.055608820709493, |
| "grad_norm": 0.36921027302742004, |
| "learning_rate": 4.3600864425385434e-05, |
| "loss": 0.0313, |
| "step": 14660 |
| }, |
| { |
| "epoch": 14.065196548418024, |
| "grad_norm": 0.1770399957895279, |
| "learning_rate": 4.3537797605286736e-05, |
| "loss": 0.0265, |
| "step": 14670 |
| }, |
| { |
| "epoch": 14.074784276126557, |
| "grad_norm": 0.28713101148605347, |
| "learning_rate": 4.347474123928048e-05, |
| "loss": 0.0282, |
| "step": 14680 |
| }, |
| { |
| "epoch": 14.08437200383509, |
| "grad_norm": 0.1728815734386444, |
| "learning_rate": 4.3411695429374793e-05, |
| "loss": 0.03, |
| "step": 14690 |
| }, |
| { |
| "epoch": 14.093959731543624, |
| "grad_norm": 0.2004602998495102, |
| "learning_rate": 4.3348660277560694e-05, |
| "loss": 0.0301, |
| "step": 14700 |
| }, |
| { |
| "epoch": 14.103547459252157, |
| "grad_norm": 0.24591505527496338, |
| "learning_rate": 4.328563588581199e-05, |
| "loss": 0.0384, |
| "step": 14710 |
| }, |
| { |
| "epoch": 14.11313518696069, |
| "grad_norm": 0.3375163674354553, |
| "learning_rate": 4.322262235608508e-05, |
| "loss": 0.0339, |
| "step": 14720 |
| }, |
| { |
| "epoch": 14.122722914669223, |
| "grad_norm": 0.22719378769397736, |
| "learning_rate": 4.315961979031875e-05, |
| "loss": 0.0323, |
| "step": 14730 |
| }, |
| { |
| "epoch": 14.132310642377757, |
| "grad_norm": 0.34426233172416687, |
| "learning_rate": 4.30966282904341e-05, |
| "loss": 0.0335, |
| "step": 14740 |
| }, |
| { |
| "epoch": 14.14189837008629, |
| "grad_norm": 0.30899283289909363, |
| "learning_rate": 4.3033647958334306e-05, |
| "loss": 0.0334, |
| "step": 14750 |
| }, |
| { |
| "epoch": 14.151486097794823, |
| "grad_norm": 0.3567700684070587, |
| "learning_rate": 4.2970678895904476e-05, |
| "loss": 0.0356, |
| "step": 14760 |
| }, |
| { |
| "epoch": 14.161073825503356, |
| "grad_norm": 0.22836564481258392, |
| "learning_rate": 4.29077212050115e-05, |
| "loss": 0.0321, |
| "step": 14770 |
| }, |
| { |
| "epoch": 14.17066155321189, |
| "grad_norm": 0.17751692235469818, |
| "learning_rate": 4.284477498750383e-05, |
| "loss": 0.0302, |
| "step": 14780 |
| }, |
| { |
| "epoch": 14.180249280920421, |
| "grad_norm": 0.3431791067123413, |
| "learning_rate": 4.278184034521144e-05, |
| "loss": 0.0332, |
| "step": 14790 |
| }, |
| { |
| "epoch": 14.189837008628954, |
| "grad_norm": 0.26100659370422363, |
| "learning_rate": 4.27189173799455e-05, |
| "loss": 0.0315, |
| "step": 14800 |
| }, |
| { |
| "epoch": 14.199424736337487, |
| "grad_norm": 0.2879122197628021, |
| "learning_rate": 4.265600619349832e-05, |
| "loss": 0.0277, |
| "step": 14810 |
| }, |
| { |
| "epoch": 14.20901246404602, |
| "grad_norm": 0.26366403698921204, |
| "learning_rate": 4.2593106887643156e-05, |
| "loss": 0.0332, |
| "step": 14820 |
| }, |
| { |
| "epoch": 14.218600191754554, |
| "grad_norm": 0.25366711616516113, |
| "learning_rate": 4.2530219564134046e-05, |
| "loss": 0.0309, |
| "step": 14830 |
| }, |
| { |
| "epoch": 14.228187919463087, |
| "grad_norm": 0.259772926568985, |
| "learning_rate": 4.246734432470563e-05, |
| "loss": 0.0302, |
| "step": 14840 |
| }, |
| { |
| "epoch": 14.23777564717162, |
| "grad_norm": 0.32079434394836426, |
| "learning_rate": 4.240448127107301e-05, |
| "loss": 0.0293, |
| "step": 14850 |
| }, |
| { |
| "epoch": 14.247363374880154, |
| "grad_norm": 0.25380274653434753, |
| "learning_rate": 4.234163050493158e-05, |
| "loss": 0.0287, |
| "step": 14860 |
| }, |
| { |
| "epoch": 14.256951102588687, |
| "grad_norm": 0.26985570788383484, |
| "learning_rate": 4.2278792127956846e-05, |
| "loss": 0.0307, |
| "step": 14870 |
| }, |
| { |
| "epoch": 14.26653883029722, |
| "grad_norm": 0.2960470914840698, |
| "learning_rate": 4.221596624180426e-05, |
| "loss": 0.0313, |
| "step": 14880 |
| }, |
| { |
| "epoch": 14.276126558005753, |
| "grad_norm": 0.41474372148513794, |
| "learning_rate": 4.21531529481091e-05, |
| "loss": 0.0287, |
| "step": 14890 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 0.2426476627588272, |
| "learning_rate": 4.2090352348486256e-05, |
| "loss": 0.0272, |
| "step": 14900 |
| }, |
| { |
| "epoch": 14.29530201342282, |
| "grad_norm": 0.2811989486217499, |
| "learning_rate": 4.202756454453007e-05, |
| "loss": 0.0328, |
| "step": 14910 |
| }, |
| { |
| "epoch": 14.304889741131351, |
| "grad_norm": 0.20871858298778534, |
| "learning_rate": 4.196478963781421e-05, |
| "loss": 0.028, |
| "step": 14920 |
| }, |
| { |
| "epoch": 14.314477468839884, |
| "grad_norm": 0.1654272824525833, |
| "learning_rate": 4.190202772989144e-05, |
| "loss": 0.0301, |
| "step": 14930 |
| }, |
| { |
| "epoch": 14.324065196548418, |
| "grad_norm": 0.6324641108512878, |
| "learning_rate": 4.183927892229354e-05, |
| "loss": 0.0284, |
| "step": 14940 |
| }, |
| { |
| "epoch": 14.33365292425695, |
| "grad_norm": 1.3468248844146729, |
| "learning_rate": 4.177654331653108e-05, |
| "loss": 0.0331, |
| "step": 14950 |
| }, |
| { |
| "epoch": 14.343240651965484, |
| "grad_norm": 0.16660985350608826, |
| "learning_rate": 4.171382101409327e-05, |
| "loss": 0.0262, |
| "step": 14960 |
| }, |
| { |
| "epoch": 14.352828379674017, |
| "grad_norm": 0.32994958758354187, |
| "learning_rate": 4.165111211644779e-05, |
| "loss": 0.0259, |
| "step": 14970 |
| }, |
| { |
| "epoch": 14.36241610738255, |
| "grad_norm": 0.20298174023628235, |
| "learning_rate": 4.158841672504066e-05, |
| "loss": 0.0298, |
| "step": 14980 |
| }, |
| { |
| "epoch": 14.372003835091084, |
| "grad_norm": 0.23911802470684052, |
| "learning_rate": 4.1525734941296026e-05, |
| "loss": 0.0315, |
| "step": 14990 |
| }, |
| { |
| "epoch": 14.381591562799617, |
| "grad_norm": 0.22921425104141235, |
| "learning_rate": 4.146306686661602e-05, |
| "loss": 0.0336, |
| "step": 15000 |
| }, |
| { |
| "epoch": 14.39117929050815, |
| "grad_norm": 0.24981558322906494, |
| "learning_rate": 4.140041260238062e-05, |
| "loss": 0.0326, |
| "step": 15010 |
| }, |
| { |
| "epoch": 14.400767018216683, |
| "grad_norm": 0.24186521768569946, |
| "learning_rate": 4.1337772249947435e-05, |
| "loss": 0.0264, |
| "step": 15020 |
| }, |
| { |
| "epoch": 14.410354745925215, |
| "grad_norm": 0.26285290718078613, |
| "learning_rate": 4.1275145910651603e-05, |
| "loss": 0.0304, |
| "step": 15030 |
| }, |
| { |
| "epoch": 14.419942473633748, |
| "grad_norm": 0.2739505469799042, |
| "learning_rate": 4.121253368580555e-05, |
| "loss": 0.0263, |
| "step": 15040 |
| }, |
| { |
| "epoch": 14.429530201342281, |
| "grad_norm": 0.6612746119499207, |
| "learning_rate": 4.1149935676698904e-05, |
| "loss": 0.0395, |
| "step": 15050 |
| }, |
| { |
| "epoch": 14.439117929050814, |
| "grad_norm": 0.2866060435771942, |
| "learning_rate": 4.108735198459827e-05, |
| "loss": 0.025, |
| "step": 15060 |
| }, |
| { |
| "epoch": 14.448705656759348, |
| "grad_norm": 0.27634814381599426, |
| "learning_rate": 4.102478271074712e-05, |
| "loss": 0.0278, |
| "step": 15070 |
| }, |
| { |
| "epoch": 14.458293384467881, |
| "grad_norm": 0.25131815671920776, |
| "learning_rate": 4.0962227956365574e-05, |
| "loss": 0.0271, |
| "step": 15080 |
| }, |
| { |
| "epoch": 14.467881112176414, |
| "grad_norm": 0.3638950288295746, |
| "learning_rate": 4.089968782265025e-05, |
| "loss": 0.0297, |
| "step": 15090 |
| }, |
| { |
| "epoch": 14.477468839884947, |
| "grad_norm": 0.2399180382490158, |
| "learning_rate": 4.083716241077419e-05, |
| "loss": 0.0284, |
| "step": 15100 |
| }, |
| { |
| "epoch": 14.48705656759348, |
| "grad_norm": 0.27603140473365784, |
| "learning_rate": 4.077465182188654e-05, |
| "loss": 0.0302, |
| "step": 15110 |
| }, |
| { |
| "epoch": 14.496644295302014, |
| "grad_norm": 0.17177820205688477, |
| "learning_rate": 4.07121561571125e-05, |
| "loss": 0.0382, |
| "step": 15120 |
| }, |
| { |
| "epoch": 14.506232023010547, |
| "grad_norm": 0.26461273431777954, |
| "learning_rate": 4.064967551755312e-05, |
| "loss": 0.0328, |
| "step": 15130 |
| }, |
| { |
| "epoch": 14.51581975071908, |
| "grad_norm": 0.31283822655677795, |
| "learning_rate": 4.058721000428514e-05, |
| "loss": 0.025, |
| "step": 15140 |
| }, |
| { |
| "epoch": 14.525407478427613, |
| "grad_norm": 0.18203134834766388, |
| "learning_rate": 4.052475971836083e-05, |
| "loss": 0.0286, |
| "step": 15150 |
| }, |
| { |
| "epoch": 14.534995206136145, |
| "grad_norm": 0.295449435710907, |
| "learning_rate": 4.0462324760807846e-05, |
| "loss": 0.033, |
| "step": 15160 |
| }, |
| { |
| "epoch": 14.544582933844678, |
| "grad_norm": 0.16782015562057495, |
| "learning_rate": 4.039990523262902e-05, |
| "loss": 0.0278, |
| "step": 15170 |
| }, |
| { |
| "epoch": 14.554170661553211, |
| "grad_norm": 0.23671366274356842, |
| "learning_rate": 4.033750123480224e-05, |
| "loss": 0.0319, |
| "step": 15180 |
| }, |
| { |
| "epoch": 14.563758389261745, |
| "grad_norm": 0.18487486243247986, |
| "learning_rate": 4.027511286828028e-05, |
| "loss": 0.0297, |
| "step": 15190 |
| }, |
| { |
| "epoch": 14.573346116970278, |
| "grad_norm": 0.19782863557338715, |
| "learning_rate": 4.0212740233990587e-05, |
| "loss": 0.0316, |
| "step": 15200 |
| }, |
| { |
| "epoch": 14.582933844678811, |
| "grad_norm": 0.30595293641090393, |
| "learning_rate": 4.0150383432835186e-05, |
| "loss": 0.0282, |
| "step": 15210 |
| }, |
| { |
| "epoch": 14.592521572387344, |
| "grad_norm": 0.2661206126213074, |
| "learning_rate": 4.00880425656905e-05, |
| "loss": 0.0286, |
| "step": 15220 |
| }, |
| { |
| "epoch": 14.602109300095877, |
| "grad_norm": 0.2635152339935303, |
| "learning_rate": 4.002571773340714e-05, |
| "loss": 0.0334, |
| "step": 15230 |
| }, |
| { |
| "epoch": 14.61169702780441, |
| "grad_norm": 0.27702832221984863, |
| "learning_rate": 3.996340903680979e-05, |
| "loss": 0.0304, |
| "step": 15240 |
| }, |
| { |
| "epoch": 14.621284755512944, |
| "grad_norm": 0.21685196459293365, |
| "learning_rate": 3.9901116576697083e-05, |
| "loss": 0.0394, |
| "step": 15250 |
| }, |
| { |
| "epoch": 14.630872483221477, |
| "grad_norm": 0.2177799493074417, |
| "learning_rate": 3.983884045384131e-05, |
| "loss": 0.0321, |
| "step": 15260 |
| }, |
| { |
| "epoch": 14.64046021093001, |
| "grad_norm": 0.21278002858161926, |
| "learning_rate": 3.977658076898836e-05, |
| "loss": 0.0329, |
| "step": 15270 |
| }, |
| { |
| "epoch": 14.650047938638544, |
| "grad_norm": 0.4188462495803833, |
| "learning_rate": 3.971433762285754e-05, |
| "loss": 0.0324, |
| "step": 15280 |
| }, |
| { |
| "epoch": 14.659635666347075, |
| "grad_norm": 0.4150042235851288, |
| "learning_rate": 3.965211111614139e-05, |
| "loss": 0.0311, |
| "step": 15290 |
| }, |
| { |
| "epoch": 14.669223394055608, |
| "grad_norm": 0.5566287040710449, |
| "learning_rate": 3.958990134950555e-05, |
| "loss": 0.028, |
| "step": 15300 |
| }, |
| { |
| "epoch": 14.678811121764141, |
| "grad_norm": 0.2592385411262512, |
| "learning_rate": 3.9527708423588546e-05, |
| "loss": 0.0354, |
| "step": 15310 |
| }, |
| { |
| "epoch": 14.688398849472675, |
| "grad_norm": 0.20564644038677216, |
| "learning_rate": 3.946553243900169e-05, |
| "loss": 0.0359, |
| "step": 15320 |
| }, |
| { |
| "epoch": 14.697986577181208, |
| "grad_norm": 0.27093440294265747, |
| "learning_rate": 3.9403373496328885e-05, |
| "loss": 0.0377, |
| "step": 15330 |
| }, |
| { |
| "epoch": 14.707574304889741, |
| "grad_norm": 0.35600170493125916, |
| "learning_rate": 3.934123169612645e-05, |
| "loss": 0.0323, |
| "step": 15340 |
| }, |
| { |
| "epoch": 14.717162032598274, |
| "grad_norm": 0.3020756244659424, |
| "learning_rate": 3.927910713892298e-05, |
| "loss": 0.0313, |
| "step": 15350 |
| }, |
| { |
| "epoch": 14.726749760306808, |
| "grad_norm": 0.26487666368484497, |
| "learning_rate": 3.921699992521917e-05, |
| "loss": 0.0322, |
| "step": 15360 |
| }, |
| { |
| "epoch": 14.73633748801534, |
| "grad_norm": 0.2509137988090515, |
| "learning_rate": 3.915491015548766e-05, |
| "loss": 0.0249, |
| "step": 15370 |
| }, |
| { |
| "epoch": 14.745925215723874, |
| "grad_norm": 0.2903117537498474, |
| "learning_rate": 3.9092837930172884e-05, |
| "loss": 0.0325, |
| "step": 15380 |
| }, |
| { |
| "epoch": 14.755512943432407, |
| "grad_norm": 2.1292974948883057, |
| "learning_rate": 3.903078334969087e-05, |
| "loss": 0.0352, |
| "step": 15390 |
| }, |
| { |
| "epoch": 14.765100671140939, |
| "grad_norm": 0.18879927694797516, |
| "learning_rate": 3.8968746514429134e-05, |
| "loss": 0.0348, |
| "step": 15400 |
| }, |
| { |
| "epoch": 14.774688398849472, |
| "grad_norm": 0.27570220828056335, |
| "learning_rate": 3.890672752474646e-05, |
| "loss": 0.0267, |
| "step": 15410 |
| }, |
| { |
| "epoch": 14.784276126558005, |
| "grad_norm": 0.28451746702194214, |
| "learning_rate": 3.884472648097276e-05, |
| "loss": 0.029, |
| "step": 15420 |
| }, |
| { |
| "epoch": 14.793863854266538, |
| "grad_norm": 0.2464732676744461, |
| "learning_rate": 3.878274348340892e-05, |
| "loss": 0.027, |
| "step": 15430 |
| }, |
| { |
| "epoch": 14.803451581975072, |
| "grad_norm": 0.1651841551065445, |
| "learning_rate": 3.872077863232665e-05, |
| "loss": 0.0275, |
| "step": 15440 |
| }, |
| { |
| "epoch": 14.813039309683605, |
| "grad_norm": 0.1864641159772873, |
| "learning_rate": 3.865883202796829e-05, |
| "loss": 0.028, |
| "step": 15450 |
| }, |
| { |
| "epoch": 14.822627037392138, |
| "grad_norm": 0.40212348103523254, |
| "learning_rate": 3.8596903770546636e-05, |
| "loss": 0.0296, |
| "step": 15460 |
| }, |
| { |
| "epoch": 14.832214765100671, |
| "grad_norm": 0.34442323446273804, |
| "learning_rate": 3.853499396024486e-05, |
| "loss": 0.0279, |
| "step": 15470 |
| }, |
| { |
| "epoch": 14.841802492809204, |
| "grad_norm": 0.21626895666122437, |
| "learning_rate": 3.8473102697216226e-05, |
| "loss": 0.0298, |
| "step": 15480 |
| }, |
| { |
| "epoch": 14.851390220517738, |
| "grad_norm": 0.22285476326942444, |
| "learning_rate": 3.841123008158405e-05, |
| "loss": 0.0265, |
| "step": 15490 |
| }, |
| { |
| "epoch": 14.860977948226271, |
| "grad_norm": 0.330901563167572, |
| "learning_rate": 3.8349376213441444e-05, |
| "loss": 0.032, |
| "step": 15500 |
| }, |
| { |
| "epoch": 14.870565675934804, |
| "grad_norm": 0.3265020251274109, |
| "learning_rate": 3.828754119285123e-05, |
| "loss": 0.0291, |
| "step": 15510 |
| }, |
| { |
| "epoch": 14.880153403643337, |
| "grad_norm": 0.2532041668891907, |
| "learning_rate": 3.822572511984569e-05, |
| "loss": 0.0267, |
| "step": 15520 |
| }, |
| { |
| "epoch": 14.889741131351869, |
| "grad_norm": 0.3086365759372711, |
| "learning_rate": 3.816392809442649e-05, |
| "loss": 0.036, |
| "step": 15530 |
| }, |
| { |
| "epoch": 14.899328859060402, |
| "grad_norm": 0.22954832017421722, |
| "learning_rate": 3.8102150216564484e-05, |
| "loss": 0.0302, |
| "step": 15540 |
| }, |
| { |
| "epoch": 14.908916586768935, |
| "grad_norm": 0.2649918496608734, |
| "learning_rate": 3.804039158619951e-05, |
| "loss": 0.037, |
| "step": 15550 |
| }, |
| { |
| "epoch": 14.918504314477468, |
| "grad_norm": 0.22433148324489594, |
| "learning_rate": 3.797865230324033e-05, |
| "loss": 0.0258, |
| "step": 15560 |
| }, |
| { |
| "epoch": 14.928092042186002, |
| "grad_norm": 0.2442513406276703, |
| "learning_rate": 3.791693246756436e-05, |
| "loss": 0.0289, |
| "step": 15570 |
| }, |
| { |
| "epoch": 14.937679769894535, |
| "grad_norm": 0.22684846818447113, |
| "learning_rate": 3.785523217901757e-05, |
| "loss": 0.032, |
| "step": 15580 |
| }, |
| { |
| "epoch": 14.947267497603068, |
| "grad_norm": 0.27900537848472595, |
| "learning_rate": 3.7793551537414313e-05, |
| "loss": 0.0284, |
| "step": 15590 |
| }, |
| { |
| "epoch": 14.956855225311601, |
| "grad_norm": 0.29420506954193115, |
| "learning_rate": 3.7731890642537154e-05, |
| "loss": 0.0278, |
| "step": 15600 |
| }, |
| { |
| "epoch": 14.966442953020135, |
| "grad_norm": 0.23950040340423584, |
| "learning_rate": 3.76702495941367e-05, |
| "loss": 0.03, |
| "step": 15610 |
| }, |
| { |
| "epoch": 14.976030680728668, |
| "grad_norm": 0.3971647322177887, |
| "learning_rate": 3.760862849193148e-05, |
| "loss": 0.0324, |
| "step": 15620 |
| }, |
| { |
| "epoch": 14.985618408437201, |
| "grad_norm": 0.18756671249866486, |
| "learning_rate": 3.754702743560773e-05, |
| "loss": 0.026, |
| "step": 15630 |
| }, |
| { |
| "epoch": 14.995206136145734, |
| "grad_norm": 0.24370504915714264, |
| "learning_rate": 3.748544652481927e-05, |
| "loss": 0.0353, |
| "step": 15640 |
| }, |
| { |
| "epoch": 15.004793863854266, |
| "grad_norm": 0.26173216104507446, |
| "learning_rate": 3.742388585918733e-05, |
| "loss": 0.0356, |
| "step": 15650 |
| }, |
| { |
| "epoch": 15.014381591562799, |
| "grad_norm": 0.22543974220752716, |
| "learning_rate": 3.736234553830038e-05, |
| "loss": 0.0314, |
| "step": 15660 |
| }, |
| { |
| "epoch": 15.023969319271332, |
| "grad_norm": 0.1632285714149475, |
| "learning_rate": 3.7300825661714e-05, |
| "loss": 0.0267, |
| "step": 15670 |
| }, |
| { |
| "epoch": 15.033557046979865, |
| "grad_norm": 0.2474079430103302, |
| "learning_rate": 3.723932632895067e-05, |
| "loss": 0.0289, |
| "step": 15680 |
| }, |
| { |
| "epoch": 15.043144774688399, |
| "grad_norm": 0.21004092693328857, |
| "learning_rate": 3.717784763949964e-05, |
| "loss": 0.0272, |
| "step": 15690 |
| }, |
| { |
| "epoch": 15.052732502396932, |
| "grad_norm": 0.20469725131988525, |
| "learning_rate": 3.7116389692816754e-05, |
| "loss": 0.0282, |
| "step": 15700 |
| }, |
| { |
| "epoch": 15.062320230105465, |
| "grad_norm": 0.4098300337791443, |
| "learning_rate": 3.7054952588324364e-05, |
| "loss": 0.0318, |
| "step": 15710 |
| }, |
| { |
| "epoch": 15.071907957813998, |
| "grad_norm": 0.1645730435848236, |
| "learning_rate": 3.699353642541103e-05, |
| "loss": 0.0307, |
| "step": 15720 |
| }, |
| { |
| "epoch": 15.081495685522532, |
| "grad_norm": 0.16053102910518646, |
| "learning_rate": 3.693214130343148e-05, |
| "loss": 0.0263, |
| "step": 15730 |
| }, |
| { |
| "epoch": 15.091083413231065, |
| "grad_norm": 0.2607749104499817, |
| "learning_rate": 3.687076732170635e-05, |
| "loss": 0.0279, |
| "step": 15740 |
| }, |
| { |
| "epoch": 15.100671140939598, |
| "grad_norm": 0.20249375700950623, |
| "learning_rate": 3.680941457952214e-05, |
| "loss": 0.031, |
| "step": 15750 |
| }, |
| { |
| "epoch": 15.110258868648131, |
| "grad_norm": 0.17298898100852966, |
| "learning_rate": 3.6748083176130955e-05, |
| "loss": 0.0304, |
| "step": 15760 |
| }, |
| { |
| "epoch": 15.119846596356663, |
| "grad_norm": 0.3816901743412018, |
| "learning_rate": 3.6686773210750385e-05, |
| "loss": 0.0267, |
| "step": 15770 |
| }, |
| { |
| "epoch": 15.129434324065196, |
| "grad_norm": 0.26607292890548706, |
| "learning_rate": 3.6625484782563345e-05, |
| "loss": 0.0285, |
| "step": 15780 |
| }, |
| { |
| "epoch": 15.139022051773729, |
| "grad_norm": 0.24211320281028748, |
| "learning_rate": 3.656421799071791e-05, |
| "loss": 0.0325, |
| "step": 15790 |
| }, |
| { |
| "epoch": 15.148609779482262, |
| "grad_norm": 0.3071950376033783, |
| "learning_rate": 3.650297293432713e-05, |
| "loss": 0.0344, |
| "step": 15800 |
| }, |
| { |
| "epoch": 15.158197507190796, |
| "grad_norm": 0.3314298689365387, |
| "learning_rate": 3.6441749712468944e-05, |
| "loss": 0.0297, |
| "step": 15810 |
| }, |
| { |
| "epoch": 15.167785234899329, |
| "grad_norm": 0.2220297008752823, |
| "learning_rate": 3.6380548424185894e-05, |
| "loss": 0.0328, |
| "step": 15820 |
| }, |
| { |
| "epoch": 15.177372962607862, |
| "grad_norm": 0.15199415385723114, |
| "learning_rate": 3.6319369168485104e-05, |
| "loss": 0.025, |
| "step": 15830 |
| }, |
| { |
| "epoch": 15.186960690316395, |
| "grad_norm": 0.2900523841381073, |
| "learning_rate": 3.625821204433803e-05, |
| "loss": 0.0261, |
| "step": 15840 |
| }, |
| { |
| "epoch": 15.196548418024928, |
| "grad_norm": 0.17855972051620483, |
| "learning_rate": 3.61970771506803e-05, |
| "loss": 0.034, |
| "step": 15850 |
| }, |
| { |
| "epoch": 15.206136145733462, |
| "grad_norm": 0.35416078567504883, |
| "learning_rate": 3.613596458641167e-05, |
| "loss": 0.0362, |
| "step": 15860 |
| }, |
| { |
| "epoch": 15.215723873441995, |
| "grad_norm": 0.21492497622966766, |
| "learning_rate": 3.6074874450395666e-05, |
| "loss": 0.0259, |
| "step": 15870 |
| }, |
| { |
| "epoch": 15.225311601150528, |
| "grad_norm": 0.2749202847480774, |
| "learning_rate": 3.6013806841459586e-05, |
| "loss": 0.0257, |
| "step": 15880 |
| }, |
| { |
| "epoch": 15.234899328859061, |
| "grad_norm": 0.16736426949501038, |
| "learning_rate": 3.595276185839426e-05, |
| "loss": 0.0328, |
| "step": 15890 |
| }, |
| { |
| "epoch": 15.244487056567593, |
| "grad_norm": 0.2754712998867035, |
| "learning_rate": 3.5891739599953945e-05, |
| "loss": 0.0276, |
| "step": 15900 |
| }, |
| { |
| "epoch": 15.254074784276126, |
| "grad_norm": 0.29541146755218506, |
| "learning_rate": 3.583074016485611e-05, |
| "loss": 0.031, |
| "step": 15910 |
| }, |
| { |
| "epoch": 15.26366251198466, |
| "grad_norm": 0.26210564374923706, |
| "learning_rate": 3.576976365178132e-05, |
| "loss": 0.0325, |
| "step": 15920 |
| }, |
| { |
| "epoch": 15.273250239693192, |
| "grad_norm": 0.2595176100730896, |
| "learning_rate": 3.5708810159373044e-05, |
| "loss": 0.0375, |
| "step": 15930 |
| }, |
| { |
| "epoch": 15.282837967401726, |
| "grad_norm": 0.21411257982254028, |
| "learning_rate": 3.564787978623753e-05, |
| "loss": 0.0277, |
| "step": 15940 |
| }, |
| { |
| "epoch": 15.292425695110259, |
| "grad_norm": 0.2823658585548401, |
| "learning_rate": 3.5586972630943594e-05, |
| "loss": 0.0259, |
| "step": 15950 |
| }, |
| { |
| "epoch": 15.302013422818792, |
| "grad_norm": 0.2719429135322571, |
| "learning_rate": 3.552608879202252e-05, |
| "loss": 0.0295, |
| "step": 15960 |
| }, |
| { |
| "epoch": 15.311601150527325, |
| "grad_norm": 0.2882955074310303, |
| "learning_rate": 3.5465228367967854e-05, |
| "loss": 0.0297, |
| "step": 15970 |
| }, |
| { |
| "epoch": 15.321188878235859, |
| "grad_norm": 0.22343681752681732, |
| "learning_rate": 3.540439145723529e-05, |
| "loss": 0.0241, |
| "step": 15980 |
| }, |
| { |
| "epoch": 15.330776605944392, |
| "grad_norm": 0.18314386904239655, |
| "learning_rate": 3.534357815824243e-05, |
| "loss": 0.0345, |
| "step": 15990 |
| }, |
| { |
| "epoch": 15.340364333652925, |
| "grad_norm": 0.22451230883598328, |
| "learning_rate": 3.528278856936874e-05, |
| "loss": 0.0259, |
| "step": 16000 |
| }, |
| { |
| "epoch": 15.349952061361458, |
| "grad_norm": 0.23394083976745605, |
| "learning_rate": 3.52220227889553e-05, |
| "loss": 0.0235, |
| "step": 16010 |
| }, |
| { |
| "epoch": 15.35953978906999, |
| "grad_norm": 0.21090802550315857, |
| "learning_rate": 3.516128091530469e-05, |
| "loss": 0.0259, |
| "step": 16020 |
| }, |
| { |
| "epoch": 15.369127516778523, |
| "grad_norm": 0.42782530188560486, |
| "learning_rate": 3.5100563046680764e-05, |
| "loss": 0.0297, |
| "step": 16030 |
| }, |
| { |
| "epoch": 15.378715244487056, |
| "grad_norm": 0.2408047765493393, |
| "learning_rate": 3.503986928130862e-05, |
| "loss": 0.0287, |
| "step": 16040 |
| }, |
| { |
| "epoch": 15.38830297219559, |
| "grad_norm": 0.24126370251178741, |
| "learning_rate": 3.49791997173743e-05, |
| "loss": 0.0295, |
| "step": 16050 |
| }, |
| { |
| "epoch": 15.397890699904123, |
| "grad_norm": 0.28855326771736145, |
| "learning_rate": 3.4918554453024746e-05, |
| "loss": 0.0272, |
| "step": 16060 |
| }, |
| { |
| "epoch": 15.407478427612656, |
| "grad_norm": 0.2622244358062744, |
| "learning_rate": 3.485793358636753e-05, |
| "loss": 0.0264, |
| "step": 16070 |
| }, |
| { |
| "epoch": 15.417066155321189, |
| "grad_norm": 0.433159202337265, |
| "learning_rate": 3.479733721547082e-05, |
| "loss": 0.0331, |
| "step": 16080 |
| }, |
| { |
| "epoch": 15.426653883029722, |
| "grad_norm": 0.35671567916870117, |
| "learning_rate": 3.47367654383631e-05, |
| "loss": 0.0309, |
| "step": 16090 |
| }, |
| { |
| "epoch": 15.436241610738255, |
| "grad_norm": 0.2572173476219177, |
| "learning_rate": 3.467621835303309e-05, |
| "loss": 0.0299, |
| "step": 16100 |
| }, |
| { |
| "epoch": 15.445829338446789, |
| "grad_norm": 0.3275107145309448, |
| "learning_rate": 3.461569605742958e-05, |
| "loss": 0.0258, |
| "step": 16110 |
| }, |
| { |
| "epoch": 15.455417066155322, |
| "grad_norm": 0.38686898350715637, |
| "learning_rate": 3.455519864946125e-05, |
| "loss": 0.0281, |
| "step": 16120 |
| }, |
| { |
| "epoch": 15.465004793863855, |
| "grad_norm": 0.5980708003044128, |
| "learning_rate": 3.449472622699651e-05, |
| "loss": 0.0266, |
| "step": 16130 |
| }, |
| { |
| "epoch": 15.474592521572387, |
| "grad_norm": 0.1607155054807663, |
| "learning_rate": 3.443427888786335e-05, |
| "loss": 0.0286, |
| "step": 16140 |
| }, |
| { |
| "epoch": 15.48418024928092, |
| "grad_norm": 0.19821766018867493, |
| "learning_rate": 3.437385672984918e-05, |
| "loss": 0.0299, |
| "step": 16150 |
| }, |
| { |
| "epoch": 15.493767976989453, |
| "grad_norm": 0.27373266220092773, |
| "learning_rate": 3.431345985070067e-05, |
| "loss": 0.0387, |
| "step": 16160 |
| }, |
| { |
| "epoch": 15.503355704697986, |
| "grad_norm": 0.24755899608135223, |
| "learning_rate": 3.425308834812364e-05, |
| "loss": 0.0268, |
| "step": 16170 |
| }, |
| { |
| "epoch": 15.51294343240652, |
| "grad_norm": 0.34930139780044556, |
| "learning_rate": 3.4192742319782805e-05, |
| "loss": 0.0358, |
| "step": 16180 |
| }, |
| { |
| "epoch": 15.522531160115053, |
| "grad_norm": 0.21849294006824493, |
| "learning_rate": 3.413242186330168e-05, |
| "loss": 0.0327, |
| "step": 16190 |
| }, |
| { |
| "epoch": 15.532118887823586, |
| "grad_norm": 0.2413625419139862, |
| "learning_rate": 3.407212707626243e-05, |
| "loss": 0.0283, |
| "step": 16200 |
| }, |
| { |
| "epoch": 15.541706615532119, |
| "grad_norm": 0.27283817529678345, |
| "learning_rate": 3.401185805620568e-05, |
| "loss": 0.0295, |
| "step": 16210 |
| }, |
| { |
| "epoch": 15.551294343240652, |
| "grad_norm": 0.3242924213409424, |
| "learning_rate": 3.395161490063037e-05, |
| "loss": 0.0328, |
| "step": 16220 |
| }, |
| { |
| "epoch": 15.560882070949186, |
| "grad_norm": 0.2872219383716583, |
| "learning_rate": 3.38913977069936e-05, |
| "loss": 0.0273, |
| "step": 16230 |
| }, |
| { |
| "epoch": 15.570469798657719, |
| "grad_norm": 0.14021213352680206, |
| "learning_rate": 3.3831206572710464e-05, |
| "loss": 0.0271, |
| "step": 16240 |
| }, |
| { |
| "epoch": 15.580057526366252, |
| "grad_norm": 0.19898459315299988, |
| "learning_rate": 3.377104159515393e-05, |
| "loss": 0.0299, |
| "step": 16250 |
| }, |
| { |
| "epoch": 15.589645254074785, |
| "grad_norm": 0.2079470306634903, |
| "learning_rate": 3.371090287165462e-05, |
| "loss": 0.031, |
| "step": 16260 |
| }, |
| { |
| "epoch": 15.599232981783317, |
| "grad_norm": 0.2817933261394501, |
| "learning_rate": 3.3650790499500675e-05, |
| "loss": 0.0273, |
| "step": 16270 |
| }, |
| { |
| "epoch": 15.60882070949185, |
| "grad_norm": 0.20972701907157898, |
| "learning_rate": 3.3590704575937655e-05, |
| "loss": 0.0279, |
| "step": 16280 |
| }, |
| { |
| "epoch": 15.618408437200383, |
| "grad_norm": 0.21050924062728882, |
| "learning_rate": 3.3530645198168295e-05, |
| "loss": 0.0327, |
| "step": 16290 |
| }, |
| { |
| "epoch": 15.627996164908916, |
| "grad_norm": 0.33600106835365295, |
| "learning_rate": 3.3470612463352376e-05, |
| "loss": 0.0314, |
| "step": 16300 |
| }, |
| { |
| "epoch": 15.63758389261745, |
| "grad_norm": 0.33707502484321594, |
| "learning_rate": 3.341060646860659e-05, |
| "loss": 0.029, |
| "step": 16310 |
| }, |
| { |
| "epoch": 15.647171620325983, |
| "grad_norm": 0.2761129140853882, |
| "learning_rate": 3.335062731100441e-05, |
| "loss": 0.0271, |
| "step": 16320 |
| }, |
| { |
| "epoch": 15.656759348034516, |
| "grad_norm": 0.2787131369113922, |
| "learning_rate": 3.3290675087575856e-05, |
| "loss": 0.0252, |
| "step": 16330 |
| }, |
| { |
| "epoch": 15.66634707574305, |
| "grad_norm": 0.23235364258289337, |
| "learning_rate": 3.3230749895307375e-05, |
| "loss": 0.0357, |
| "step": 16340 |
| }, |
| { |
| "epoch": 15.675934803451582, |
| "grad_norm": 0.22941578924655914, |
| "learning_rate": 3.317085183114168e-05, |
| "loss": 0.0223, |
| "step": 16350 |
| }, |
| { |
| "epoch": 15.685522531160116, |
| "grad_norm": 0.2411498874425888, |
| "learning_rate": 3.311098099197761e-05, |
| "loss": 0.0271, |
| "step": 16360 |
| }, |
| { |
| "epoch": 15.695110258868649, |
| "grad_norm": 0.35220983624458313, |
| "learning_rate": 3.3051137474669966e-05, |
| "loss": 0.0262, |
| "step": 16370 |
| }, |
| { |
| "epoch": 15.70469798657718, |
| "grad_norm": 0.28711986541748047, |
| "learning_rate": 3.299132137602934e-05, |
| "loss": 0.0342, |
| "step": 16380 |
| }, |
| { |
| "epoch": 15.714285714285714, |
| "grad_norm": 0.1615312546491623, |
| "learning_rate": 3.293153279282199e-05, |
| "loss": 0.0334, |
| "step": 16390 |
| }, |
| { |
| "epoch": 15.723873441994247, |
| "grad_norm": 0.17363496124744415, |
| "learning_rate": 3.287177182176961e-05, |
| "loss": 0.0279, |
| "step": 16400 |
| }, |
| { |
| "epoch": 15.73346116970278, |
| "grad_norm": 0.3049766421318054, |
| "learning_rate": 3.2812038559549275e-05, |
| "loss": 0.032, |
| "step": 16410 |
| }, |
| { |
| "epoch": 15.743048897411313, |
| "grad_norm": 0.3206036686897278, |
| "learning_rate": 3.275233310279321e-05, |
| "loss": 0.0281, |
| "step": 16420 |
| }, |
| { |
| "epoch": 15.752636625119846, |
| "grad_norm": 0.20691925287246704, |
| "learning_rate": 3.2692655548088704e-05, |
| "loss": 0.026, |
| "step": 16430 |
| }, |
| { |
| "epoch": 15.76222435282838, |
| "grad_norm": 0.2701127827167511, |
| "learning_rate": 3.263300599197781e-05, |
| "loss": 0.0247, |
| "step": 16440 |
| }, |
| { |
| "epoch": 15.771812080536913, |
| "grad_norm": 0.18183131515979767, |
| "learning_rate": 3.2573384530957384e-05, |
| "loss": 0.0249, |
| "step": 16450 |
| }, |
| { |
| "epoch": 15.781399808245446, |
| "grad_norm": 0.260061115026474, |
| "learning_rate": 3.251379126147877e-05, |
| "loss": 0.0249, |
| "step": 16460 |
| }, |
| { |
| "epoch": 15.79098753595398, |
| "grad_norm": 0.2887513041496277, |
| "learning_rate": 3.245422627994777e-05, |
| "loss": 0.0333, |
| "step": 16470 |
| }, |
| { |
| "epoch": 15.800575263662513, |
| "grad_norm": 0.3020176887512207, |
| "learning_rate": 3.239468968272436e-05, |
| "loss": 0.0289, |
| "step": 16480 |
| }, |
| { |
| "epoch": 15.810162991371046, |
| "grad_norm": 0.23766952753067017, |
| "learning_rate": 3.233518156612262e-05, |
| "loss": 0.0302, |
| "step": 16490 |
| }, |
| { |
| "epoch": 15.819750719079579, |
| "grad_norm": 0.31386175751686096, |
| "learning_rate": 3.227570202641056e-05, |
| "loss": 0.0287, |
| "step": 16500 |
| }, |
| { |
| "epoch": 15.82933844678811, |
| "grad_norm": 0.2746824026107788, |
| "learning_rate": 3.2216251159809955e-05, |
| "loss": 0.0293, |
| "step": 16510 |
| }, |
| { |
| "epoch": 15.838926174496644, |
| "grad_norm": 0.21857379376888275, |
| "learning_rate": 3.215682906249621e-05, |
| "loss": 0.0294, |
| "step": 16520 |
| }, |
| { |
| "epoch": 15.848513902205177, |
| "grad_norm": 0.16576367616653442, |
| "learning_rate": 3.209743583059817e-05, |
| "loss": 0.0271, |
| "step": 16530 |
| }, |
| { |
| "epoch": 15.85810162991371, |
| "grad_norm": 0.31498968601226807, |
| "learning_rate": 3.203807156019798e-05, |
| "loss": 0.0312, |
| "step": 16540 |
| }, |
| { |
| "epoch": 15.867689357622243, |
| "grad_norm": 0.2268988937139511, |
| "learning_rate": 3.197873634733096e-05, |
| "loss": 0.0309, |
| "step": 16550 |
| }, |
| { |
| "epoch": 15.877277085330777, |
| "grad_norm": 0.2843955159187317, |
| "learning_rate": 3.1919430287985415e-05, |
| "loss": 0.0271, |
| "step": 16560 |
| }, |
| { |
| "epoch": 15.88686481303931, |
| "grad_norm": 0.270082026720047, |
| "learning_rate": 3.186015347810245e-05, |
| "loss": 0.0267, |
| "step": 16570 |
| }, |
| { |
| "epoch": 15.896452540747843, |
| "grad_norm": 0.13555888831615448, |
| "learning_rate": 3.18009060135759e-05, |
| "loss": 0.0303, |
| "step": 16580 |
| }, |
| { |
| "epoch": 15.906040268456376, |
| "grad_norm": 0.5174959301948547, |
| "learning_rate": 3.17416879902521e-05, |
| "loss": 0.0298, |
| "step": 16590 |
| }, |
| { |
| "epoch": 15.91562799616491, |
| "grad_norm": 0.23616893589496613, |
| "learning_rate": 3.168249950392978e-05, |
| "loss": 0.026, |
| "step": 16600 |
| }, |
| { |
| "epoch": 15.925215723873443, |
| "grad_norm": 0.2044319212436676, |
| "learning_rate": 3.162334065035985e-05, |
| "loss": 0.0294, |
| "step": 16610 |
| }, |
| { |
| "epoch": 15.934803451581976, |
| "grad_norm": 0.2839745879173279, |
| "learning_rate": 3.156421152524532e-05, |
| "loss": 0.0311, |
| "step": 16620 |
| }, |
| { |
| "epoch": 15.944391179290509, |
| "grad_norm": 0.28521618247032166, |
| "learning_rate": 3.150511222424111e-05, |
| "loss": 0.029, |
| "step": 16630 |
| }, |
| { |
| "epoch": 15.95397890699904, |
| "grad_norm": 0.4045862555503845, |
| "learning_rate": 3.1446042842953845e-05, |
| "loss": 0.0347, |
| "step": 16640 |
| }, |
| { |
| "epoch": 15.963566634707574, |
| "grad_norm": 0.2557837963104248, |
| "learning_rate": 3.138700347694179e-05, |
| "loss": 0.0211, |
| "step": 16650 |
| }, |
| { |
| "epoch": 15.973154362416107, |
| "grad_norm": 0.23164719343185425, |
| "learning_rate": 3.132799422171464e-05, |
| "loss": 0.0273, |
| "step": 16660 |
| }, |
| { |
| "epoch": 15.98274209012464, |
| "grad_norm": 0.17888516187667847, |
| "learning_rate": 3.126901517273339e-05, |
| "loss": 0.0252, |
| "step": 16670 |
| }, |
| { |
| "epoch": 15.992329817833173, |
| "grad_norm": 0.2732132077217102, |
| "learning_rate": 3.121006642541014e-05, |
| "loss": 0.0259, |
| "step": 16680 |
| }, |
| { |
| "epoch": 16.001917545541705, |
| "grad_norm": 0.3599238693714142, |
| "learning_rate": 3.115114807510803e-05, |
| "loss": 0.0292, |
| "step": 16690 |
| }, |
| { |
| "epoch": 16.01150527325024, |
| "grad_norm": 0.18428216874599457, |
| "learning_rate": 3.109226021714093e-05, |
| "loss": 0.0238, |
| "step": 16700 |
| }, |
| { |
| "epoch": 16.02109300095877, |
| "grad_norm": 0.1668870896100998, |
| "learning_rate": 3.1033402946773474e-05, |
| "loss": 0.0276, |
| "step": 16710 |
| }, |
| { |
| "epoch": 16.030680728667306, |
| "grad_norm": 0.2498198300600052, |
| "learning_rate": 3.097457635922077e-05, |
| "loss": 0.0326, |
| "step": 16720 |
| }, |
| { |
| "epoch": 16.040268456375838, |
| "grad_norm": 0.27348780632019043, |
| "learning_rate": 3.09157805496483e-05, |
| "loss": 0.0337, |
| "step": 16730 |
| }, |
| { |
| "epoch": 16.049856184084373, |
| "grad_norm": 0.3426136076450348, |
| "learning_rate": 3.085701561317174e-05, |
| "loss": 0.027, |
| "step": 16740 |
| }, |
| { |
| "epoch": 16.059443911792904, |
| "grad_norm": 0.1942438781261444, |
| "learning_rate": 3.079828164485684e-05, |
| "loss": 0.0231, |
| "step": 16750 |
| }, |
| { |
| "epoch": 16.06903163950144, |
| "grad_norm": 0.3608817160129547, |
| "learning_rate": 3.073957873971925e-05, |
| "loss": 0.0246, |
| "step": 16760 |
| }, |
| { |
| "epoch": 16.07861936720997, |
| "grad_norm": 0.2943773567676544, |
| "learning_rate": 3.068090699272436e-05, |
| "loss": 0.033, |
| "step": 16770 |
| }, |
| { |
| "epoch": 16.088207094918506, |
| "grad_norm": 0.3121021091938019, |
| "learning_rate": 3.062226649878717e-05, |
| "loss": 0.0228, |
| "step": 16780 |
| }, |
| { |
| "epoch": 16.097794822627037, |
| "grad_norm": 0.2769118547439575, |
| "learning_rate": 3.056365735277209e-05, |
| "loss": 0.0228, |
| "step": 16790 |
| }, |
| { |
| "epoch": 16.107382550335572, |
| "grad_norm": 0.2802489995956421, |
| "learning_rate": 3.0505079649492853e-05, |
| "loss": 0.0281, |
| "step": 16800 |
| }, |
| { |
| "epoch": 16.116970278044104, |
| "grad_norm": 0.27936017513275146, |
| "learning_rate": 3.0446533483712304e-05, |
| "loss": 0.0285, |
| "step": 16810 |
| }, |
| { |
| "epoch": 16.126558005752635, |
| "grad_norm": 0.2072148621082306, |
| "learning_rate": 3.038801895014229e-05, |
| "loss": 0.0295, |
| "step": 16820 |
| }, |
| { |
| "epoch": 16.13614573346117, |
| "grad_norm": 0.2498210370540619, |
| "learning_rate": 3.0329536143443444e-05, |
| "loss": 0.0292, |
| "step": 16830 |
| }, |
| { |
| "epoch": 16.1457334611697, |
| "grad_norm": 0.274496853351593, |
| "learning_rate": 3.027108515822511e-05, |
| "loss": 0.0292, |
| "step": 16840 |
| }, |
| { |
| "epoch": 16.155321188878236, |
| "grad_norm": 0.40636447072029114, |
| "learning_rate": 3.0212666089045155e-05, |
| "loss": 0.0281, |
| "step": 16850 |
| }, |
| { |
| "epoch": 16.164908916586768, |
| "grad_norm": 0.22214102745056152, |
| "learning_rate": 3.0154279030409794e-05, |
| "loss": 0.0218, |
| "step": 16860 |
| }, |
| { |
| "epoch": 16.174496644295303, |
| "grad_norm": 0.26967325806617737, |
| "learning_rate": 3.0095924076773467e-05, |
| "loss": 0.0255, |
| "step": 16870 |
| }, |
| { |
| "epoch": 16.184084372003834, |
| "grad_norm": 0.23795704543590546, |
| "learning_rate": 3.003760132253868e-05, |
| "loss": 0.0327, |
| "step": 16880 |
| }, |
| { |
| "epoch": 16.19367209971237, |
| "grad_norm": 0.1818399578332901, |
| "learning_rate": 2.9979310862055842e-05, |
| "loss": 0.0312, |
| "step": 16890 |
| }, |
| { |
| "epoch": 16.2032598274209, |
| "grad_norm": 0.23240040242671967, |
| "learning_rate": 2.9921052789623137e-05, |
| "loss": 0.0294, |
| "step": 16900 |
| }, |
| { |
| "epoch": 16.212847555129436, |
| "grad_norm": 0.20948819816112518, |
| "learning_rate": 2.9862827199486327e-05, |
| "loss": 0.0271, |
| "step": 16910 |
| }, |
| { |
| "epoch": 16.222435282837967, |
| "grad_norm": 0.20456750690937042, |
| "learning_rate": 2.9804634185838614e-05, |
| "loss": 0.0258, |
| "step": 16920 |
| }, |
| { |
| "epoch": 16.232023010546502, |
| "grad_norm": 0.2674747705459595, |
| "learning_rate": 2.9746473842820578e-05, |
| "loss": 0.0287, |
| "step": 16930 |
| }, |
| { |
| "epoch": 16.241610738255034, |
| "grad_norm": 0.19764818251132965, |
| "learning_rate": 2.9688346264519866e-05, |
| "loss": 0.0284, |
| "step": 16940 |
| }, |
| { |
| "epoch": 16.251198465963565, |
| "grad_norm": 0.3688560426235199, |
| "learning_rate": 2.9630251544971165e-05, |
| "loss": 0.0289, |
| "step": 16950 |
| }, |
| { |
| "epoch": 16.2607861936721, |
| "grad_norm": 0.34308168292045593, |
| "learning_rate": 2.957218977815598e-05, |
| "loss": 0.0289, |
| "step": 16960 |
| }, |
| { |
| "epoch": 16.27037392138063, |
| "grad_norm": 0.3008866608142853, |
| "learning_rate": 2.9514161058002498e-05, |
| "loss": 0.0307, |
| "step": 16970 |
| }, |
| { |
| "epoch": 16.279961649089167, |
| "grad_norm": 0.12983451783657074, |
| "learning_rate": 2.9456165478385494e-05, |
| "loss": 0.0232, |
| "step": 16980 |
| }, |
| { |
| "epoch": 16.289549376797698, |
| "grad_norm": 0.14965233206748962, |
| "learning_rate": 2.9398203133126085e-05, |
| "loss": 0.0248, |
| "step": 16990 |
| }, |
| { |
| "epoch": 16.299137104506233, |
| "grad_norm": 0.256956547498703, |
| "learning_rate": 2.9340274115991638e-05, |
| "loss": 0.0348, |
| "step": 17000 |
| }, |
| { |
| "epoch": 16.308724832214764, |
| "grad_norm": 0.18191608786582947, |
| "learning_rate": 2.9282378520695618e-05, |
| "loss": 0.0292, |
| "step": 17010 |
| }, |
| { |
| "epoch": 16.3183125599233, |
| "grad_norm": 0.20375274121761322, |
| "learning_rate": 2.922451644089741e-05, |
| "loss": 0.0282, |
| "step": 17020 |
| }, |
| { |
| "epoch": 16.32790028763183, |
| "grad_norm": 0.24703994393348694, |
| "learning_rate": 2.9166687970202177e-05, |
| "loss": 0.0335, |
| "step": 17030 |
| }, |
| { |
| "epoch": 16.337488015340366, |
| "grad_norm": 0.266993910074234, |
| "learning_rate": 2.9108893202160702e-05, |
| "loss": 0.021, |
| "step": 17040 |
| }, |
| { |
| "epoch": 16.347075743048897, |
| "grad_norm": 0.42793118953704834, |
| "learning_rate": 2.9051132230269272e-05, |
| "loss": 0.0257, |
| "step": 17050 |
| }, |
| { |
| "epoch": 16.35666347075743, |
| "grad_norm": 0.36531713604927063, |
| "learning_rate": 2.8993405147969493e-05, |
| "loss": 0.0322, |
| "step": 17060 |
| }, |
| { |
| "epoch": 16.366251198465964, |
| "grad_norm": 0.21013452112674713, |
| "learning_rate": 2.8935712048648112e-05, |
| "loss": 0.0278, |
| "step": 17070 |
| }, |
| { |
| "epoch": 16.375838926174495, |
| "grad_norm": 0.1972169280052185, |
| "learning_rate": 2.8878053025636975e-05, |
| "loss": 0.025, |
| "step": 17080 |
| }, |
| { |
| "epoch": 16.38542665388303, |
| "grad_norm": 0.2844037115573883, |
| "learning_rate": 2.882042817221273e-05, |
| "loss": 0.0265, |
| "step": 17090 |
| }, |
| { |
| "epoch": 16.39501438159156, |
| "grad_norm": 0.18470896780490875, |
| "learning_rate": 2.8762837581596792e-05, |
| "loss": 0.0234, |
| "step": 17100 |
| }, |
| { |
| "epoch": 16.404602109300097, |
| "grad_norm": 0.27581846714019775, |
| "learning_rate": 2.8705281346955116e-05, |
| "loss": 0.0303, |
| "step": 17110 |
| }, |
| { |
| "epoch": 16.414189837008628, |
| "grad_norm": 0.27025681734085083, |
| "learning_rate": 2.86477595613981e-05, |
| "loss": 0.0309, |
| "step": 17120 |
| }, |
| { |
| "epoch": 16.423777564717163, |
| "grad_norm": 0.35465800762176514, |
| "learning_rate": 2.8590272317980437e-05, |
| "loss": 0.0318, |
| "step": 17130 |
| }, |
| { |
| "epoch": 16.433365292425695, |
| "grad_norm": 0.2873314917087555, |
| "learning_rate": 2.8532819709700854e-05, |
| "loss": 0.0335, |
| "step": 17140 |
| }, |
| { |
| "epoch": 16.44295302013423, |
| "grad_norm": 0.3287470042705536, |
| "learning_rate": 2.8475401829502124e-05, |
| "loss": 0.0308, |
| "step": 17150 |
| }, |
| { |
| "epoch": 16.45254074784276, |
| "grad_norm": 0.18719346821308136, |
| "learning_rate": 2.841801877027083e-05, |
| "loss": 0.0297, |
| "step": 17160 |
| }, |
| { |
| "epoch": 16.462128475551296, |
| "grad_norm": 0.16801686584949493, |
| "learning_rate": 2.836067062483721e-05, |
| "loss": 0.026, |
| "step": 17170 |
| }, |
| { |
| "epoch": 16.471716203259827, |
| "grad_norm": 0.3017866909503937, |
| "learning_rate": 2.830335748597502e-05, |
| "loss": 0.0298, |
| "step": 17180 |
| }, |
| { |
| "epoch": 16.48130393096836, |
| "grad_norm": 0.16507741808891296, |
| "learning_rate": 2.8246079446401386e-05, |
| "loss": 0.028, |
| "step": 17190 |
| }, |
| { |
| "epoch": 16.490891658676894, |
| "grad_norm": 0.25729814171791077, |
| "learning_rate": 2.8188836598776662e-05, |
| "loss": 0.0291, |
| "step": 17200 |
| }, |
| { |
| "epoch": 16.500479386385425, |
| "grad_norm": 0.36721915006637573, |
| "learning_rate": 2.8131629035704264e-05, |
| "loss": 0.0324, |
| "step": 17210 |
| }, |
| { |
| "epoch": 16.51006711409396, |
| "grad_norm": 5.430606365203857, |
| "learning_rate": 2.8074456849730507e-05, |
| "loss": 0.026, |
| "step": 17220 |
| }, |
| { |
| "epoch": 16.51965484180249, |
| "grad_norm": 0.18490955233573914, |
| "learning_rate": 2.8017320133344533e-05, |
| "loss": 0.0265, |
| "step": 17230 |
| }, |
| { |
| "epoch": 16.529242569511027, |
| "grad_norm": 0.17146821320056915, |
| "learning_rate": 2.7960218978978047e-05, |
| "loss": 0.0293, |
| "step": 17240 |
| }, |
| { |
| "epoch": 16.538830297219558, |
| "grad_norm": 0.21457697451114655, |
| "learning_rate": 2.7903153479005255e-05, |
| "loss": 0.0294, |
| "step": 17250 |
| }, |
| { |
| "epoch": 16.548418024928093, |
| "grad_norm": 0.2303658127784729, |
| "learning_rate": 2.7846123725742678e-05, |
| "loss": 0.0278, |
| "step": 17260 |
| }, |
| { |
| "epoch": 16.558005752636625, |
| "grad_norm": 0.20711682736873627, |
| "learning_rate": 2.778912981144898e-05, |
| "loss": 0.0245, |
| "step": 17270 |
| }, |
| { |
| "epoch": 16.56759348034516, |
| "grad_norm": 0.2282470464706421, |
| "learning_rate": 2.7732171828324872e-05, |
| "loss": 0.029, |
| "step": 17280 |
| }, |
| { |
| "epoch": 16.57718120805369, |
| "grad_norm": 0.27450570464134216, |
| "learning_rate": 2.7675249868512954e-05, |
| "loss": 0.036, |
| "step": 17290 |
| }, |
| { |
| "epoch": 16.586768935762223, |
| "grad_norm": 0.18990963697433472, |
| "learning_rate": 2.761836402409752e-05, |
| "loss": 0.0362, |
| "step": 17300 |
| }, |
| { |
| "epoch": 16.596356663470758, |
| "grad_norm": 0.19880448281764984, |
| "learning_rate": 2.7561514387104464e-05, |
| "loss": 0.0283, |
| "step": 17310 |
| }, |
| { |
| "epoch": 16.60594439117929, |
| "grad_norm": 0.2031632959842682, |
| "learning_rate": 2.750470104950109e-05, |
| "loss": 0.0253, |
| "step": 17320 |
| }, |
| { |
| "epoch": 16.615532118887824, |
| "grad_norm": 0.5270239114761353, |
| "learning_rate": 2.7447924103195976e-05, |
| "loss": 0.0278, |
| "step": 17330 |
| }, |
| { |
| "epoch": 16.625119846596355, |
| "grad_norm": 0.29472750425338745, |
| "learning_rate": 2.7391183640038847e-05, |
| "loss": 0.0284, |
| "step": 17340 |
| }, |
| { |
| "epoch": 16.63470757430489, |
| "grad_norm": 0.21734996140003204, |
| "learning_rate": 2.7334479751820396e-05, |
| "loss": 0.0294, |
| "step": 17350 |
| }, |
| { |
| "epoch": 16.644295302013422, |
| "grad_norm": 0.29278430342674255, |
| "learning_rate": 2.7277812530272147e-05, |
| "loss": 0.0297, |
| "step": 17360 |
| }, |
| { |
| "epoch": 16.653883029721957, |
| "grad_norm": 0.2573314309120178, |
| "learning_rate": 2.7221182067066307e-05, |
| "loss": 0.0241, |
| "step": 17370 |
| }, |
| { |
| "epoch": 16.66347075743049, |
| "grad_norm": 0.23133955895900726, |
| "learning_rate": 2.7164588453815602e-05, |
| "loss": 0.0258, |
| "step": 17380 |
| }, |
| { |
| "epoch": 16.673058485139023, |
| "grad_norm": 0.20745334029197693, |
| "learning_rate": 2.710803178207323e-05, |
| "loss": 0.0242, |
| "step": 17390 |
| }, |
| { |
| "epoch": 16.682646212847555, |
| "grad_norm": 0.22852954268455505, |
| "learning_rate": 2.7051512143332492e-05, |
| "loss": 0.027, |
| "step": 17400 |
| }, |
| { |
| "epoch": 16.69223394055609, |
| "grad_norm": 0.25844722986221313, |
| "learning_rate": 2.6995029629026874e-05, |
| "loss": 0.0244, |
| "step": 17410 |
| }, |
| { |
| "epoch": 16.70182166826462, |
| "grad_norm": 0.23631109297275543, |
| "learning_rate": 2.6938584330529782e-05, |
| "loss": 0.0215, |
| "step": 17420 |
| }, |
| { |
| "epoch": 16.711409395973153, |
| "grad_norm": 0.27872714400291443, |
| "learning_rate": 2.6882176339154404e-05, |
| "loss": 0.0308, |
| "step": 17430 |
| }, |
| { |
| "epoch": 16.720997123681688, |
| "grad_norm": 0.23717211186885834, |
| "learning_rate": 2.6825805746153594e-05, |
| "loss": 0.0266, |
| "step": 17440 |
| }, |
| { |
| "epoch": 16.73058485139022, |
| "grad_norm": 0.281259685754776, |
| "learning_rate": 2.6769472642719695e-05, |
| "loss": 0.0329, |
| "step": 17450 |
| }, |
| { |
| "epoch": 16.740172579098754, |
| "grad_norm": 0.257068932056427, |
| "learning_rate": 2.67131771199844e-05, |
| "loss": 0.0245, |
| "step": 17460 |
| }, |
| { |
| "epoch": 16.749760306807286, |
| "grad_norm": 0.18098169565200806, |
| "learning_rate": 2.665691926901862e-05, |
| "loss": 0.0284, |
| "step": 17470 |
| }, |
| { |
| "epoch": 16.75934803451582, |
| "grad_norm": 0.23477615416049957, |
| "learning_rate": 2.6600699180832307e-05, |
| "loss": 0.026, |
| "step": 17480 |
| }, |
| { |
| "epoch": 16.768935762224352, |
| "grad_norm": 0.24687384068965912, |
| "learning_rate": 2.654451694637433e-05, |
| "loss": 0.0255, |
| "step": 17490 |
| }, |
| { |
| "epoch": 16.778523489932887, |
| "grad_norm": 0.2607274651527405, |
| "learning_rate": 2.6488372656532322e-05, |
| "loss": 0.0294, |
| "step": 17500 |
| }, |
| { |
| "epoch": 16.78811121764142, |
| "grad_norm": 0.4215647578239441, |
| "learning_rate": 2.6432266402132532e-05, |
| "loss": 0.0283, |
| "step": 17510 |
| }, |
| { |
| "epoch": 16.797698945349953, |
| "grad_norm": 0.20454095304012299, |
| "learning_rate": 2.637619827393968e-05, |
| "loss": 0.0306, |
| "step": 17520 |
| }, |
| { |
| "epoch": 16.807286673058485, |
| "grad_norm": 0.19789418578147888, |
| "learning_rate": 2.6320168362656796e-05, |
| "loss": 0.025, |
| "step": 17530 |
| }, |
| { |
| "epoch": 16.81687440076702, |
| "grad_norm": 0.34662866592407227, |
| "learning_rate": 2.6264176758925098e-05, |
| "loss": 0.0317, |
| "step": 17540 |
| }, |
| { |
| "epoch": 16.82646212847555, |
| "grad_norm": 0.20395246148109436, |
| "learning_rate": 2.620822355332383e-05, |
| "loss": 0.0306, |
| "step": 17550 |
| }, |
| { |
| "epoch": 16.836049856184083, |
| "grad_norm": 0.39246705174446106, |
| "learning_rate": 2.615230883637012e-05, |
| "loss": 0.0259, |
| "step": 17560 |
| }, |
| { |
| "epoch": 16.845637583892618, |
| "grad_norm": 0.22869329154491425, |
| "learning_rate": 2.609643269851883e-05, |
| "loss": 0.0285, |
| "step": 17570 |
| }, |
| { |
| "epoch": 16.85522531160115, |
| "grad_norm": 0.3232511281967163, |
| "learning_rate": 2.60405952301624e-05, |
| "loss": 0.0288, |
| "step": 17580 |
| }, |
| { |
| "epoch": 16.864813039309684, |
| "grad_norm": 0.2171912044286728, |
| "learning_rate": 2.5984796521630737e-05, |
| "loss": 0.0249, |
| "step": 17590 |
| }, |
| { |
| "epoch": 16.874400767018216, |
| "grad_norm": 0.28310737013816833, |
| "learning_rate": 2.592903666319103e-05, |
| "loss": 0.0295, |
| "step": 17600 |
| }, |
| { |
| "epoch": 16.88398849472675, |
| "grad_norm": 0.19829969108104706, |
| "learning_rate": 2.587331574504761e-05, |
| "loss": 0.025, |
| "step": 17610 |
| }, |
| { |
| "epoch": 16.893576222435282, |
| "grad_norm": 0.1657049059867859, |
| "learning_rate": 2.581763385734183e-05, |
| "loss": 0.0244, |
| "step": 17620 |
| }, |
| { |
| "epoch": 16.903163950143817, |
| "grad_norm": 0.256913959980011, |
| "learning_rate": 2.5761991090151906e-05, |
| "loss": 0.0306, |
| "step": 17630 |
| }, |
| { |
| "epoch": 16.91275167785235, |
| "grad_norm": 0.2738933861255646, |
| "learning_rate": 2.5706387533492737e-05, |
| "loss": 0.0326, |
| "step": 17640 |
| }, |
| { |
| "epoch": 16.922339405560884, |
| "grad_norm": 0.2700929343700409, |
| "learning_rate": 2.5650823277315837e-05, |
| "loss": 0.0313, |
| "step": 17650 |
| }, |
| { |
| "epoch": 16.931927133269415, |
| "grad_norm": 0.2965131103992462, |
| "learning_rate": 2.5595298411509094e-05, |
| "loss": 0.0275, |
| "step": 17660 |
| }, |
| { |
| "epoch": 16.941514860977946, |
| "grad_norm": 0.3247256278991699, |
| "learning_rate": 2.553981302589671e-05, |
| "loss": 0.0326, |
| "step": 17670 |
| }, |
| { |
| "epoch": 16.95110258868648, |
| "grad_norm": 0.30926892161369324, |
| "learning_rate": 2.5484367210239e-05, |
| "loss": 0.0297, |
| "step": 17680 |
| }, |
| { |
| "epoch": 16.960690316395013, |
| "grad_norm": 0.15213845670223236, |
| "learning_rate": 2.5428961054232264e-05, |
| "loss": 0.0271, |
| "step": 17690 |
| }, |
| { |
| "epoch": 16.970278044103548, |
| "grad_norm": 0.20840811729431152, |
| "learning_rate": 2.537359464750866e-05, |
| "loss": 0.0273, |
| "step": 17700 |
| }, |
| { |
| "epoch": 16.97986577181208, |
| "grad_norm": 0.21467389166355133, |
| "learning_rate": 2.5318268079636022e-05, |
| "loss": 0.0314, |
| "step": 17710 |
| }, |
| { |
| "epoch": 16.989453499520614, |
| "grad_norm": 0.2677682638168335, |
| "learning_rate": 2.526298144011775e-05, |
| "loss": 0.0238, |
| "step": 17720 |
| }, |
| { |
| "epoch": 16.999041227229146, |
| "grad_norm": 0.16417664289474487, |
| "learning_rate": 2.5207734818392648e-05, |
| "loss": 0.0258, |
| "step": 17730 |
| }, |
| { |
| "epoch": 17.00862895493768, |
| "grad_norm": 0.20820669829845428, |
| "learning_rate": 2.5152528303834777e-05, |
| "loss": 0.0329, |
| "step": 17740 |
| }, |
| { |
| "epoch": 17.018216682646212, |
| "grad_norm": 0.19568829238414764, |
| "learning_rate": 2.5097361985753316e-05, |
| "loss": 0.0269, |
| "step": 17750 |
| }, |
| { |
| "epoch": 17.027804410354747, |
| "grad_norm": 0.1650926023721695, |
| "learning_rate": 2.5042235953392423e-05, |
| "loss": 0.026, |
| "step": 17760 |
| }, |
| { |
| "epoch": 17.03739213806328, |
| "grad_norm": 0.16357482969760895, |
| "learning_rate": 2.4987150295931082e-05, |
| "loss": 0.0305, |
| "step": 17770 |
| }, |
| { |
| "epoch": 17.046979865771814, |
| "grad_norm": 0.22878289222717285, |
| "learning_rate": 2.4932105102482955e-05, |
| "loss": 0.0276, |
| "step": 17780 |
| }, |
| { |
| "epoch": 17.056567593480345, |
| "grad_norm": 0.2666637599468231, |
| "learning_rate": 2.487710046209626e-05, |
| "loss": 0.0278, |
| "step": 17790 |
| }, |
| { |
| "epoch": 17.066155321188877, |
| "grad_norm": 0.2581173777580261, |
| "learning_rate": 2.4822136463753594e-05, |
| "loss": 0.0285, |
| "step": 17800 |
| }, |
| { |
| "epoch": 17.07574304889741, |
| "grad_norm": 0.19729219377040863, |
| "learning_rate": 2.4767213196371813e-05, |
| "loss": 0.0251, |
| "step": 17810 |
| }, |
| { |
| "epoch": 17.085330776605943, |
| "grad_norm": 0.21068008244037628, |
| "learning_rate": 2.47123307488019e-05, |
| "loss": 0.0218, |
| "step": 17820 |
| }, |
| { |
| "epoch": 17.094918504314478, |
| "grad_norm": 0.21502196788787842, |
| "learning_rate": 2.465748920982873e-05, |
| "loss": 0.0244, |
| "step": 17830 |
| }, |
| { |
| "epoch": 17.10450623202301, |
| "grad_norm": 0.20099669694900513, |
| "learning_rate": 2.4602688668171103e-05, |
| "loss": 0.0299, |
| "step": 17840 |
| }, |
| { |
| "epoch": 17.114093959731544, |
| "grad_norm": 0.6751896739006042, |
| "learning_rate": 2.4547929212481435e-05, |
| "loss": 0.0386, |
| "step": 17850 |
| }, |
| { |
| "epoch": 17.123681687440076, |
| "grad_norm": 0.32390302419662476, |
| "learning_rate": 2.4493210931345684e-05, |
| "loss": 0.029, |
| "step": 17860 |
| }, |
| { |
| "epoch": 17.13326941514861, |
| "grad_norm": 0.31073060631752014, |
| "learning_rate": 2.4438533913283206e-05, |
| "loss": 0.0232, |
| "step": 17870 |
| }, |
| { |
| "epoch": 17.142857142857142, |
| "grad_norm": 0.17248332500457764, |
| "learning_rate": 2.4383898246746596e-05, |
| "loss": 0.0214, |
| "step": 17880 |
| }, |
| { |
| "epoch": 17.152444870565677, |
| "grad_norm": 0.33149340748786926, |
| "learning_rate": 2.4329304020121558e-05, |
| "loss": 0.0298, |
| "step": 17890 |
| }, |
| { |
| "epoch": 17.16203259827421, |
| "grad_norm": 0.2364264875650406, |
| "learning_rate": 2.4274751321726762e-05, |
| "loss": 0.0333, |
| "step": 17900 |
| }, |
| { |
| "epoch": 17.171620325982744, |
| "grad_norm": 0.15520252287387848, |
| "learning_rate": 2.4220240239813684e-05, |
| "loss": 0.0196, |
| "step": 17910 |
| }, |
| { |
| "epoch": 17.181208053691275, |
| "grad_norm": 0.23256506025791168, |
| "learning_rate": 2.4165770862566494e-05, |
| "loss": 0.029, |
| "step": 17920 |
| }, |
| { |
| "epoch": 17.190795781399807, |
| "grad_norm": 0.17074307799339294, |
| "learning_rate": 2.4111343278101884e-05, |
| "loss": 0.0302, |
| "step": 17930 |
| }, |
| { |
| "epoch": 17.20038350910834, |
| "grad_norm": 0.24341343343257904, |
| "learning_rate": 2.4056957574468932e-05, |
| "loss": 0.0296, |
| "step": 17940 |
| }, |
| { |
| "epoch": 17.209971236816873, |
| "grad_norm": 0.1940905898809433, |
| "learning_rate": 2.4002613839648987e-05, |
| "loss": 0.029, |
| "step": 17950 |
| }, |
| { |
| "epoch": 17.219558964525408, |
| "grad_norm": 0.194035604596138, |
| "learning_rate": 2.3948312161555453e-05, |
| "loss": 0.0297, |
| "step": 17960 |
| }, |
| { |
| "epoch": 17.22914669223394, |
| "grad_norm": 0.14753536880016327, |
| "learning_rate": 2.389405262803375e-05, |
| "loss": 0.0259, |
| "step": 17970 |
| }, |
| { |
| "epoch": 17.238734419942475, |
| "grad_norm": 0.18068645894527435, |
| "learning_rate": 2.3839835326861104e-05, |
| "loss": 0.0284, |
| "step": 17980 |
| }, |
| { |
| "epoch": 17.248322147651006, |
| "grad_norm": 0.33698755502700806, |
| "learning_rate": 2.378566034574639e-05, |
| "loss": 0.0289, |
| "step": 17990 |
| }, |
| { |
| "epoch": 17.25790987535954, |
| "grad_norm": 0.2708437144756317, |
| "learning_rate": 2.3731527772330098e-05, |
| "loss": 0.0252, |
| "step": 18000 |
| }, |
| { |
| "epoch": 17.267497603068072, |
| "grad_norm": 0.37091711163520813, |
| "learning_rate": 2.367743769418403e-05, |
| "loss": 0.031, |
| "step": 18010 |
| }, |
| { |
| "epoch": 17.277085330776607, |
| "grad_norm": 0.22311721742153168, |
| "learning_rate": 2.362339019881129e-05, |
| "loss": 0.0356, |
| "step": 18020 |
| }, |
| { |
| "epoch": 17.28667305848514, |
| "grad_norm": 0.3006376624107361, |
| "learning_rate": 2.3569385373646068e-05, |
| "loss": 0.0283, |
| "step": 18030 |
| }, |
| { |
| "epoch": 17.29626078619367, |
| "grad_norm": 0.2278210073709488, |
| "learning_rate": 2.351542330605355e-05, |
| "loss": 0.0292, |
| "step": 18040 |
| }, |
| { |
| "epoch": 17.305848513902205, |
| "grad_norm": 0.1900917887687683, |
| "learning_rate": 2.3461504083329732e-05, |
| "loss": 0.0293, |
| "step": 18050 |
| }, |
| { |
| "epoch": 17.315436241610737, |
| "grad_norm": 0.36089229583740234, |
| "learning_rate": 2.340762779270131e-05, |
| "loss": 0.0335, |
| "step": 18060 |
| }, |
| { |
| "epoch": 17.325023969319272, |
| "grad_norm": 0.20157793164253235, |
| "learning_rate": 2.3353794521325516e-05, |
| "loss": 0.0224, |
| "step": 18070 |
| }, |
| { |
| "epoch": 17.334611697027803, |
| "grad_norm": 0.25802189111709595, |
| "learning_rate": 2.330000435629002e-05, |
| "loss": 0.0241, |
| "step": 18080 |
| }, |
| { |
| "epoch": 17.34419942473634, |
| "grad_norm": 0.19763995707035065, |
| "learning_rate": 2.32462573846127e-05, |
| "loss": 0.0324, |
| "step": 18090 |
| }, |
| { |
| "epoch": 17.35378715244487, |
| "grad_norm": 0.24877896904945374, |
| "learning_rate": 2.319255369324161e-05, |
| "loss": 0.0297, |
| "step": 18100 |
| }, |
| { |
| "epoch": 17.363374880153405, |
| "grad_norm": 0.23094792664051056, |
| "learning_rate": 2.3138893369054766e-05, |
| "loss": 0.0279, |
| "step": 18110 |
| }, |
| { |
| "epoch": 17.372962607861936, |
| "grad_norm": 0.1878676414489746, |
| "learning_rate": 2.3085276498860032e-05, |
| "loss": 0.0278, |
| "step": 18120 |
| }, |
| { |
| "epoch": 17.38255033557047, |
| "grad_norm": 0.20479904115200043, |
| "learning_rate": 2.3031703169394985e-05, |
| "loss": 0.0263, |
| "step": 18130 |
| }, |
| { |
| "epoch": 17.392138063279003, |
| "grad_norm": 0.3048153519630432, |
| "learning_rate": 2.2978173467326724e-05, |
| "loss": 0.0282, |
| "step": 18140 |
| }, |
| { |
| "epoch": 17.401725790987538, |
| "grad_norm": 0.2260926365852356, |
| "learning_rate": 2.292468747925185e-05, |
| "loss": 0.0282, |
| "step": 18150 |
| }, |
| { |
| "epoch": 17.41131351869607, |
| "grad_norm": 0.23683381080627441, |
| "learning_rate": 2.287124529169618e-05, |
| "loss": 0.0255, |
| "step": 18160 |
| }, |
| { |
| "epoch": 17.4209012464046, |
| "grad_norm": 0.21933788061141968, |
| "learning_rate": 2.2817846991114684e-05, |
| "loss": 0.0259, |
| "step": 18170 |
| }, |
| { |
| "epoch": 17.430488974113135, |
| "grad_norm": 0.2983873784542084, |
| "learning_rate": 2.2764492663891353e-05, |
| "loss": 0.0294, |
| "step": 18180 |
| }, |
| { |
| "epoch": 17.440076701821667, |
| "grad_norm": 0.2740059792995453, |
| "learning_rate": 2.271118239633902e-05, |
| "loss": 0.0292, |
| "step": 18190 |
| }, |
| { |
| "epoch": 17.449664429530202, |
| "grad_norm": 0.18633967638015747, |
| "learning_rate": 2.2657916274699265e-05, |
| "loss": 0.024, |
| "step": 18200 |
| }, |
| { |
| "epoch": 17.459252157238733, |
| "grad_norm": 0.21379147469997406, |
| "learning_rate": 2.2604694385142233e-05, |
| "loss": 0.0245, |
| "step": 18210 |
| }, |
| { |
| "epoch": 17.46883988494727, |
| "grad_norm": 0.2814527153968811, |
| "learning_rate": 2.2551516813766538e-05, |
| "loss": 0.0264, |
| "step": 18220 |
| }, |
| { |
| "epoch": 17.4784276126558, |
| "grad_norm": 0.18947578966617584, |
| "learning_rate": 2.2498383646599048e-05, |
| "loss": 0.0222, |
| "step": 18230 |
| }, |
| { |
| "epoch": 17.488015340364335, |
| "grad_norm": 0.41355225443840027, |
| "learning_rate": 2.2445294969594844e-05, |
| "loss": 0.0285, |
| "step": 18240 |
| }, |
| { |
| "epoch": 17.497603068072866, |
| "grad_norm": 0.4395101070404053, |
| "learning_rate": 2.2392250868637026e-05, |
| "loss": 0.0301, |
| "step": 18250 |
| }, |
| { |
| "epoch": 17.5071907957814, |
| "grad_norm": 0.1704569309949875, |
| "learning_rate": 2.233925142953657e-05, |
| "loss": 0.0236, |
| "step": 18260 |
| }, |
| { |
| "epoch": 17.516778523489933, |
| "grad_norm": 6.209451198577881, |
| "learning_rate": 2.2286296738032214e-05, |
| "loss": 0.03, |
| "step": 18270 |
| }, |
| { |
| "epoch": 17.526366251198468, |
| "grad_norm": 0.5336940884590149, |
| "learning_rate": 2.223338687979029e-05, |
| "loss": 0.024, |
| "step": 18280 |
| }, |
| { |
| "epoch": 17.535953978907, |
| "grad_norm": 0.2711230516433716, |
| "learning_rate": 2.2180521940404607e-05, |
| "loss": 0.025, |
| "step": 18290 |
| }, |
| { |
| "epoch": 17.54554170661553, |
| "grad_norm": 0.35838785767555237, |
| "learning_rate": 2.212770200539634e-05, |
| "loss": 0.0328, |
| "step": 18300 |
| }, |
| { |
| "epoch": 17.555129434324066, |
| "grad_norm": 0.2138790637254715, |
| "learning_rate": 2.207492716021381e-05, |
| "loss": 0.0272, |
| "step": 18310 |
| }, |
| { |
| "epoch": 17.564717162032597, |
| "grad_norm": 0.18834197521209717, |
| "learning_rate": 2.2022197490232427e-05, |
| "loss": 0.0266, |
| "step": 18320 |
| }, |
| { |
| "epoch": 17.574304889741132, |
| "grad_norm": 0.28788337111473083, |
| "learning_rate": 2.1969513080754504e-05, |
| "loss": 0.0247, |
| "step": 18330 |
| }, |
| { |
| "epoch": 17.583892617449663, |
| "grad_norm": 0.1590379774570465, |
| "learning_rate": 2.1916874017009136e-05, |
| "loss": 0.0233, |
| "step": 18340 |
| }, |
| { |
| "epoch": 17.5934803451582, |
| "grad_norm": 0.2774651050567627, |
| "learning_rate": 2.186428038415209e-05, |
| "loss": 0.022, |
| "step": 18350 |
| }, |
| { |
| "epoch": 17.60306807286673, |
| "grad_norm": 0.18108907341957092, |
| "learning_rate": 2.1811732267265577e-05, |
| "loss": 0.0228, |
| "step": 18360 |
| }, |
| { |
| "epoch": 17.612655800575265, |
| "grad_norm": 0.2790849208831787, |
| "learning_rate": 2.1759229751358217e-05, |
| "loss": 0.0295, |
| "step": 18370 |
| }, |
| { |
| "epoch": 17.622243528283796, |
| "grad_norm": 0.1974640190601349, |
| "learning_rate": 2.170677292136487e-05, |
| "loss": 0.0265, |
| "step": 18380 |
| }, |
| { |
| "epoch": 17.63183125599233, |
| "grad_norm": 0.2952618896961212, |
| "learning_rate": 2.1654361862146465e-05, |
| "loss": 0.0257, |
| "step": 18390 |
| }, |
| { |
| "epoch": 17.641418983700863, |
| "grad_norm": 0.21564097702503204, |
| "learning_rate": 2.160199665848989e-05, |
| "loss": 0.0286, |
| "step": 18400 |
| }, |
| { |
| "epoch": 17.651006711409394, |
| "grad_norm": 0.2616369426250458, |
| "learning_rate": 2.154967739510787e-05, |
| "loss": 0.0265, |
| "step": 18410 |
| }, |
| { |
| "epoch": 17.66059443911793, |
| "grad_norm": 0.22359015047550201, |
| "learning_rate": 2.1497404156638784e-05, |
| "loss": 0.0217, |
| "step": 18420 |
| }, |
| { |
| "epoch": 17.67018216682646, |
| "grad_norm": 0.26012542843818665, |
| "learning_rate": 2.144517702764657e-05, |
| "loss": 0.0265, |
| "step": 18430 |
| }, |
| { |
| "epoch": 17.679769894534996, |
| "grad_norm": 0.13236083090305328, |
| "learning_rate": 2.1392996092620555e-05, |
| "loss": 0.0203, |
| "step": 18440 |
| }, |
| { |
| "epoch": 17.689357622243527, |
| "grad_norm": 0.23233279585838318, |
| "learning_rate": 2.1340861435975384e-05, |
| "loss": 0.0239, |
| "step": 18450 |
| }, |
| { |
| "epoch": 17.698945349952062, |
| "grad_norm": 0.22985659539699554, |
| "learning_rate": 2.1288773142050794e-05, |
| "loss": 0.026, |
| "step": 18460 |
| }, |
| { |
| "epoch": 17.708533077660594, |
| "grad_norm": 0.2680293321609497, |
| "learning_rate": 2.123673129511152e-05, |
| "loss": 0.0307, |
| "step": 18470 |
| }, |
| { |
| "epoch": 17.71812080536913, |
| "grad_norm": 0.23979081213474274, |
| "learning_rate": 2.1184735979347205e-05, |
| "loss": 0.0251, |
| "step": 18480 |
| }, |
| { |
| "epoch": 17.72770853307766, |
| "grad_norm": 0.2722991704940796, |
| "learning_rate": 2.113278727887213e-05, |
| "loss": 0.0301, |
| "step": 18490 |
| }, |
| { |
| "epoch": 17.737296260786195, |
| "grad_norm": 0.22843940556049347, |
| "learning_rate": 2.1080885277725236e-05, |
| "loss": 0.0228, |
| "step": 18500 |
| }, |
| { |
| "epoch": 17.746883988494726, |
| "grad_norm": 0.34953558444976807, |
| "learning_rate": 2.1029030059869898e-05, |
| "loss": 0.0296, |
| "step": 18510 |
| }, |
| { |
| "epoch": 17.75647171620326, |
| "grad_norm": 0.12219765037298203, |
| "learning_rate": 2.0977221709193813e-05, |
| "loss": 0.0271, |
| "step": 18520 |
| }, |
| { |
| "epoch": 17.766059443911793, |
| "grad_norm": 0.33025461435317993, |
| "learning_rate": 2.0925460309508843e-05, |
| "loss": 0.0305, |
| "step": 18530 |
| }, |
| { |
| "epoch": 17.775647171620324, |
| "grad_norm": 0.3049762547016144, |
| "learning_rate": 2.087374594455092e-05, |
| "loss": 0.0316, |
| "step": 18540 |
| }, |
| { |
| "epoch": 17.78523489932886, |
| "grad_norm": 0.3146844506263733, |
| "learning_rate": 2.082207869797987e-05, |
| "loss": 0.0272, |
| "step": 18550 |
| }, |
| { |
| "epoch": 17.79482262703739, |
| "grad_norm": 0.18491698801517487, |
| "learning_rate": 2.0770458653379286e-05, |
| "loss": 0.0281, |
| "step": 18560 |
| }, |
| { |
| "epoch": 17.804410354745926, |
| "grad_norm": 0.21474412083625793, |
| "learning_rate": 2.0718885894256428e-05, |
| "loss": 0.0238, |
| "step": 18570 |
| }, |
| { |
| "epoch": 17.813998082454457, |
| "grad_norm": 0.1813114583492279, |
| "learning_rate": 2.0667360504042045e-05, |
| "loss": 0.027, |
| "step": 18580 |
| }, |
| { |
| "epoch": 17.823585810162992, |
| "grad_norm": 0.36077314615249634, |
| "learning_rate": 2.0615882566090243e-05, |
| "loss": 0.0311, |
| "step": 18590 |
| }, |
| { |
| "epoch": 17.833173537871524, |
| "grad_norm": 0.1905115246772766, |
| "learning_rate": 2.0564452163678378e-05, |
| "loss": 0.0254, |
| "step": 18600 |
| }, |
| { |
| "epoch": 17.84276126558006, |
| "grad_norm": 0.22948439419269562, |
| "learning_rate": 2.0513069380006943e-05, |
| "loss": 0.0296, |
| "step": 18610 |
| }, |
| { |
| "epoch": 17.85234899328859, |
| "grad_norm": 0.27490001916885376, |
| "learning_rate": 2.046173429819931e-05, |
| "loss": 0.0239, |
| "step": 18620 |
| }, |
| { |
| "epoch": 17.861936720997125, |
| "grad_norm": 0.21853777766227722, |
| "learning_rate": 2.0410447001301753e-05, |
| "loss": 0.028, |
| "step": 18630 |
| }, |
| { |
| "epoch": 17.871524448705657, |
| "grad_norm": 0.20548582077026367, |
| "learning_rate": 2.0359207572283224e-05, |
| "loss": 0.0225, |
| "step": 18640 |
| }, |
| { |
| "epoch": 17.88111217641419, |
| "grad_norm": 0.14802424609661102, |
| "learning_rate": 2.0308016094035226e-05, |
| "loss": 0.0295, |
| "step": 18650 |
| }, |
| { |
| "epoch": 17.890699904122723, |
| "grad_norm": 0.32737597823143005, |
| "learning_rate": 2.02568726493717e-05, |
| "loss": 0.0242, |
| "step": 18660 |
| }, |
| { |
| "epoch": 17.900287631831254, |
| "grad_norm": 0.21833331882953644, |
| "learning_rate": 2.020577732102889e-05, |
| "loss": 0.0273, |
| "step": 18670 |
| }, |
| { |
| "epoch": 17.90987535953979, |
| "grad_norm": 0.24916410446166992, |
| "learning_rate": 2.015473019166519e-05, |
| "loss": 0.0305, |
| "step": 18680 |
| }, |
| { |
| "epoch": 17.91946308724832, |
| "grad_norm": 0.18901677429676056, |
| "learning_rate": 2.0103731343861014e-05, |
| "loss": 0.0256, |
| "step": 18690 |
| }, |
| { |
| "epoch": 17.929050814956856, |
| "grad_norm": 0.20720627903938293, |
| "learning_rate": 2.0052780860118692e-05, |
| "loss": 0.0262, |
| "step": 18700 |
| }, |
| { |
| "epoch": 17.938638542665387, |
| "grad_norm": 0.20290115475654602, |
| "learning_rate": 2.0001878822862292e-05, |
| "loss": 0.0302, |
| "step": 18710 |
| }, |
| { |
| "epoch": 17.948226270373922, |
| "grad_norm": 0.28782570362091064, |
| "learning_rate": 1.995102531443752e-05, |
| "loss": 0.0272, |
| "step": 18720 |
| }, |
| { |
| "epoch": 17.957813998082454, |
| "grad_norm": 0.19285361468791962, |
| "learning_rate": 1.9900220417111577e-05, |
| "loss": 0.0226, |
| "step": 18730 |
| }, |
| { |
| "epoch": 17.96740172579099, |
| "grad_norm": 0.2487422674894333, |
| "learning_rate": 1.984946421307301e-05, |
| "loss": 0.0259, |
| "step": 18740 |
| }, |
| { |
| "epoch": 17.97698945349952, |
| "grad_norm": 0.20847800374031067, |
| "learning_rate": 1.9798756784431616e-05, |
| "loss": 0.0248, |
| "step": 18750 |
| }, |
| { |
| "epoch": 17.986577181208055, |
| "grad_norm": 0.29753822088241577, |
| "learning_rate": 1.974809821321827e-05, |
| "loss": 0.0307, |
| "step": 18760 |
| }, |
| { |
| "epoch": 17.996164908916587, |
| "grad_norm": 0.2475176304578781, |
| "learning_rate": 1.969748858138481e-05, |
| "loss": 0.0192, |
| "step": 18770 |
| }, |
| { |
| "epoch": 18.005752636625118, |
| "grad_norm": 0.24821995198726654, |
| "learning_rate": 1.9646927970803913e-05, |
| "loss": 0.0217, |
| "step": 18780 |
| }, |
| { |
| "epoch": 18.015340364333653, |
| "grad_norm": 0.24269837141036987, |
| "learning_rate": 1.959641646326894e-05, |
| "loss": 0.0267, |
| "step": 18790 |
| }, |
| { |
| "epoch": 18.024928092042185, |
| "grad_norm": 0.4261660575866699, |
| "learning_rate": 1.9545954140493828e-05, |
| "loss": 0.028, |
| "step": 18800 |
| }, |
| { |
| "epoch": 18.03451581975072, |
| "grad_norm": 0.27009981870651245, |
| "learning_rate": 1.9495541084112945e-05, |
| "loss": 0.0261, |
| "step": 18810 |
| }, |
| { |
| "epoch": 18.04410354745925, |
| "grad_norm": 0.4468768537044525, |
| "learning_rate": 1.9445177375680944e-05, |
| "loss": 0.0237, |
| "step": 18820 |
| }, |
| { |
| "epoch": 18.053691275167786, |
| "grad_norm": 0.34373733401298523, |
| "learning_rate": 1.939486309667267e-05, |
| "loss": 0.0283, |
| "step": 18830 |
| }, |
| { |
| "epoch": 18.063279002876317, |
| "grad_norm": 0.3583851456642151, |
| "learning_rate": 1.9344598328482994e-05, |
| "loss": 0.0239, |
| "step": 18840 |
| }, |
| { |
| "epoch": 18.072866730584852, |
| "grad_norm": 0.2819909453392029, |
| "learning_rate": 1.9294383152426682e-05, |
| "loss": 0.0228, |
| "step": 18850 |
| }, |
| { |
| "epoch": 18.082454458293384, |
| "grad_norm": 0.21321451663970947, |
| "learning_rate": 1.924421764973829e-05, |
| "loss": 0.0257, |
| "step": 18860 |
| }, |
| { |
| "epoch": 18.09204218600192, |
| "grad_norm": 0.20414310693740845, |
| "learning_rate": 1.9194101901572e-05, |
| "loss": 0.027, |
| "step": 18870 |
| }, |
| { |
| "epoch": 18.10162991371045, |
| "grad_norm": 0.1880536824464798, |
| "learning_rate": 1.9144035989001518e-05, |
| "loss": 0.0236, |
| "step": 18880 |
| }, |
| { |
| "epoch": 18.111217641418985, |
| "grad_norm": 0.15333381295204163, |
| "learning_rate": 1.909401999301993e-05, |
| "loss": 0.0285, |
| "step": 18890 |
| }, |
| { |
| "epoch": 18.120805369127517, |
| "grad_norm": 0.25423663854599, |
| "learning_rate": 1.904405399453955e-05, |
| "loss": 0.0253, |
| "step": 18900 |
| }, |
| { |
| "epoch": 18.13039309683605, |
| "grad_norm": 0.16123837232589722, |
| "learning_rate": 1.8994138074391843e-05, |
| "loss": 0.0269, |
| "step": 18910 |
| }, |
| { |
| "epoch": 18.139980824544583, |
| "grad_norm": 0.28160786628723145, |
| "learning_rate": 1.8944272313327226e-05, |
| "loss": 0.0289, |
| "step": 18920 |
| }, |
| { |
| "epoch": 18.149568552253115, |
| "grad_norm": 0.17112663388252258, |
| "learning_rate": 1.8894456792014996e-05, |
| "loss": 0.0273, |
| "step": 18930 |
| }, |
| { |
| "epoch": 18.15915627996165, |
| "grad_norm": 0.19048067927360535, |
| "learning_rate": 1.8844691591043173e-05, |
| "loss": 0.0225, |
| "step": 18940 |
| }, |
| { |
| "epoch": 18.16874400767018, |
| "grad_norm": 0.22992561757564545, |
| "learning_rate": 1.8794976790918363e-05, |
| "loss": 0.0229, |
| "step": 18950 |
| }, |
| { |
| "epoch": 18.178331735378716, |
| "grad_norm": 0.30747804045677185, |
| "learning_rate": 1.8745312472065635e-05, |
| "loss": 0.0259, |
| "step": 18960 |
| }, |
| { |
| "epoch": 18.187919463087248, |
| "grad_norm": 0.2523973882198334, |
| "learning_rate": 1.8695698714828406e-05, |
| "loss": 0.0249, |
| "step": 18970 |
| }, |
| { |
| "epoch": 18.197507190795783, |
| "grad_norm": 0.2866404056549072, |
| "learning_rate": 1.8646135599468297e-05, |
| "loss": 0.0252, |
| "step": 18980 |
| }, |
| { |
| "epoch": 18.207094918504314, |
| "grad_norm": 0.1944408118724823, |
| "learning_rate": 1.8596623206164987e-05, |
| "loss": 0.0265, |
| "step": 18990 |
| }, |
| { |
| "epoch": 18.21668264621285, |
| "grad_norm": 0.22918511927127838, |
| "learning_rate": 1.8547161615016116e-05, |
| "loss": 0.0272, |
| "step": 19000 |
| }, |
| { |
| "epoch": 18.22627037392138, |
| "grad_norm": 0.2857123911380768, |
| "learning_rate": 1.8497750906037148e-05, |
| "loss": 0.0307, |
| "step": 19010 |
| }, |
| { |
| "epoch": 18.235858101629915, |
| "grad_norm": 0.17393842339515686, |
| "learning_rate": 1.8448391159161204e-05, |
| "loss": 0.029, |
| "step": 19020 |
| }, |
| { |
| "epoch": 18.245445829338447, |
| "grad_norm": 0.2042463719844818, |
| "learning_rate": 1.839908245423899e-05, |
| "loss": 0.0251, |
| "step": 19030 |
| }, |
| { |
| "epoch": 18.25503355704698, |
| "grad_norm": 0.277891606092453, |
| "learning_rate": 1.8349824871038644e-05, |
| "loss": 0.0237, |
| "step": 19040 |
| }, |
| { |
| "epoch": 18.264621284755513, |
| "grad_norm": 0.16384513676166534, |
| "learning_rate": 1.8300618489245537e-05, |
| "loss": 0.0239, |
| "step": 19050 |
| }, |
| { |
| "epoch": 18.274209012464045, |
| "grad_norm": 0.27934807538986206, |
| "learning_rate": 1.8251463388462315e-05, |
| "loss": 0.0258, |
| "step": 19060 |
| }, |
| { |
| "epoch": 18.28379674017258, |
| "grad_norm": 0.28241196274757385, |
| "learning_rate": 1.8202359648208593e-05, |
| "loss": 0.0244, |
| "step": 19070 |
| }, |
| { |
| "epoch": 18.29338446788111, |
| "grad_norm": 0.22691746056079865, |
| "learning_rate": 1.8153307347920918e-05, |
| "loss": 0.0279, |
| "step": 19080 |
| }, |
| { |
| "epoch": 18.302972195589646, |
| "grad_norm": 0.31362423300743103, |
| "learning_rate": 1.8104306566952618e-05, |
| "loss": 0.0235, |
| "step": 19090 |
| }, |
| { |
| "epoch": 18.312559923298178, |
| "grad_norm": 0.5029933452606201, |
| "learning_rate": 1.805535738457368e-05, |
| "loss": 0.027, |
| "step": 19100 |
| }, |
| { |
| "epoch": 18.322147651006713, |
| "grad_norm": 0.23722821474075317, |
| "learning_rate": 1.8006459879970622e-05, |
| "loss": 0.0309, |
| "step": 19110 |
| }, |
| { |
| "epoch": 18.331735378715244, |
| "grad_norm": 0.2513883113861084, |
| "learning_rate": 1.7957614132246347e-05, |
| "loss": 0.0263, |
| "step": 19120 |
| }, |
| { |
| "epoch": 18.34132310642378, |
| "grad_norm": 0.24489589035511017, |
| "learning_rate": 1.7908820220420052e-05, |
| "loss": 0.0251, |
| "step": 19130 |
| }, |
| { |
| "epoch": 18.35091083413231, |
| "grad_norm": 0.2208951860666275, |
| "learning_rate": 1.7860078223427056e-05, |
| "loss": 0.0267, |
| "step": 19140 |
| }, |
| { |
| "epoch": 18.360498561840842, |
| "grad_norm": 0.2466048002243042, |
| "learning_rate": 1.7811388220118707e-05, |
| "loss": 0.0246, |
| "step": 19150 |
| }, |
| { |
| "epoch": 18.370086289549377, |
| "grad_norm": 0.1647568643093109, |
| "learning_rate": 1.7762750289262238e-05, |
| "loss": 0.0221, |
| "step": 19160 |
| }, |
| { |
| "epoch": 18.37967401725791, |
| "grad_norm": 0.20359550416469574, |
| "learning_rate": 1.7714164509540654e-05, |
| "loss": 0.024, |
| "step": 19170 |
| }, |
| { |
| "epoch": 18.389261744966444, |
| "grad_norm": 0.15871766209602356, |
| "learning_rate": 1.7665630959552548e-05, |
| "loss": 0.0252, |
| "step": 19180 |
| }, |
| { |
| "epoch": 18.398849472674975, |
| "grad_norm": 0.2411220222711563, |
| "learning_rate": 1.7617149717812076e-05, |
| "loss": 0.0225, |
| "step": 19190 |
| }, |
| { |
| "epoch": 18.40843720038351, |
| "grad_norm": 0.3407461643218994, |
| "learning_rate": 1.7568720862748744e-05, |
| "loss": 0.0312, |
| "step": 19200 |
| }, |
| { |
| "epoch": 18.41802492809204, |
| "grad_norm": 0.21590691804885864, |
| "learning_rate": 1.75203444727073e-05, |
| "loss": 0.0248, |
| "step": 19210 |
| }, |
| { |
| "epoch": 18.427612655800576, |
| "grad_norm": 0.17383931577205658, |
| "learning_rate": 1.7472020625947678e-05, |
| "loss": 0.0231, |
| "step": 19220 |
| }, |
| { |
| "epoch": 18.437200383509108, |
| "grad_norm": 0.31559276580810547, |
| "learning_rate": 1.742374940064474e-05, |
| "loss": 0.0263, |
| "step": 19230 |
| }, |
| { |
| "epoch": 18.446788111217643, |
| "grad_norm": 0.23316271603107452, |
| "learning_rate": 1.737553087488825e-05, |
| "loss": 0.0316, |
| "step": 19240 |
| }, |
| { |
| "epoch": 18.456375838926174, |
| "grad_norm": 0.21858806908130646, |
| "learning_rate": 1.7327365126682726e-05, |
| "loss": 0.0272, |
| "step": 19250 |
| }, |
| { |
| "epoch": 18.46596356663471, |
| "grad_norm": 0.2449788898229599, |
| "learning_rate": 1.7279252233947286e-05, |
| "loss": 0.0286, |
| "step": 19260 |
| }, |
| { |
| "epoch": 18.47555129434324, |
| "grad_norm": 0.21250544488430023, |
| "learning_rate": 1.7231192274515562e-05, |
| "loss": 0.0247, |
| "step": 19270 |
| }, |
| { |
| "epoch": 18.485139022051772, |
| "grad_norm": 0.2528996169567108, |
| "learning_rate": 1.7183185326135543e-05, |
| "loss": 0.0253, |
| "step": 19280 |
| }, |
| { |
| "epoch": 18.494726749760307, |
| "grad_norm": 0.2549261748790741, |
| "learning_rate": 1.7135231466469463e-05, |
| "loss": 0.0294, |
| "step": 19290 |
| }, |
| { |
| "epoch": 18.50431447746884, |
| "grad_norm": 0.352224200963974, |
| "learning_rate": 1.7087330773093673e-05, |
| "loss": 0.0228, |
| "step": 19300 |
| }, |
| { |
| "epoch": 18.513902205177374, |
| "grad_norm": 0.18530428409576416, |
| "learning_rate": 1.7039483323498534e-05, |
| "loss": 0.0258, |
| "step": 19310 |
| }, |
| { |
| "epoch": 18.523489932885905, |
| "grad_norm": 0.14298230409622192, |
| "learning_rate": 1.6991689195088217e-05, |
| "loss": 0.0236, |
| "step": 19320 |
| }, |
| { |
| "epoch": 18.53307766059444, |
| "grad_norm": 0.2754952311515808, |
| "learning_rate": 1.6943948465180693e-05, |
| "loss": 0.0235, |
| "step": 19330 |
| }, |
| { |
| "epoch": 18.54266538830297, |
| "grad_norm": 0.2274174690246582, |
| "learning_rate": 1.6896261211007518e-05, |
| "loss": 0.0305, |
| "step": 19340 |
| }, |
| { |
| "epoch": 18.552253116011507, |
| "grad_norm": 0.3091070055961609, |
| "learning_rate": 1.684862750971376e-05, |
| "loss": 0.0227, |
| "step": 19350 |
| }, |
| { |
| "epoch": 18.561840843720038, |
| "grad_norm": 0.15530341863632202, |
| "learning_rate": 1.6801047438357818e-05, |
| "loss": 0.0224, |
| "step": 19360 |
| }, |
| { |
| "epoch": 18.571428571428573, |
| "grad_norm": 0.25515303015708923, |
| "learning_rate": 1.675352107391139e-05, |
| "loss": 0.0291, |
| "step": 19370 |
| }, |
| { |
| "epoch": 18.581016299137104, |
| "grad_norm": 0.27960437536239624, |
| "learning_rate": 1.670604849325923e-05, |
| "loss": 0.0258, |
| "step": 19380 |
| }, |
| { |
| "epoch": 18.59060402684564, |
| "grad_norm": 0.2250082641839981, |
| "learning_rate": 1.6658629773199124e-05, |
| "loss": 0.0232, |
| "step": 19390 |
| }, |
| { |
| "epoch": 18.60019175455417, |
| "grad_norm": 0.27883338928222656, |
| "learning_rate": 1.6611264990441706e-05, |
| "loss": 0.0343, |
| "step": 19400 |
| }, |
| { |
| "epoch": 18.609779482262702, |
| "grad_norm": 0.17993752658367157, |
| "learning_rate": 1.6563954221610355e-05, |
| "loss": 0.0278, |
| "step": 19410 |
| }, |
| { |
| "epoch": 18.619367209971237, |
| "grad_norm": 0.26345837116241455, |
| "learning_rate": 1.6516697543241083e-05, |
| "loss": 0.026, |
| "step": 19420 |
| }, |
| { |
| "epoch": 18.62895493767977, |
| "grad_norm": 0.22277230024337769, |
| "learning_rate": 1.646949503178239e-05, |
| "loss": 0.0213, |
| "step": 19430 |
| }, |
| { |
| "epoch": 18.638542665388304, |
| "grad_norm": 0.2661077082157135, |
| "learning_rate": 1.642234676359516e-05, |
| "loss": 0.0243, |
| "step": 19440 |
| }, |
| { |
| "epoch": 18.648130393096835, |
| "grad_norm": 0.28437718749046326, |
| "learning_rate": 1.6375252814952487e-05, |
| "loss": 0.0263, |
| "step": 19450 |
| }, |
| { |
| "epoch": 18.65771812080537, |
| "grad_norm": 0.20834000408649445, |
| "learning_rate": 1.6328213262039637e-05, |
| "loss": 0.0225, |
| "step": 19460 |
| }, |
| { |
| "epoch": 18.6673058485139, |
| "grad_norm": 0.24616943299770355, |
| "learning_rate": 1.6281228180953857e-05, |
| "loss": 0.0233, |
| "step": 19470 |
| }, |
| { |
| "epoch": 18.676893576222437, |
| "grad_norm": 0.21522550284862518, |
| "learning_rate": 1.623429764770428e-05, |
| "loss": 0.0233, |
| "step": 19480 |
| }, |
| { |
| "epoch": 18.686481303930968, |
| "grad_norm": 0.2068173587322235, |
| "learning_rate": 1.618742173821179e-05, |
| "loss": 0.0308, |
| "step": 19490 |
| }, |
| { |
| "epoch": 18.696069031639503, |
| "grad_norm": 0.5226014256477356, |
| "learning_rate": 1.614060052830891e-05, |
| "loss": 0.031, |
| "step": 19500 |
| }, |
| { |
| "epoch": 18.705656759348035, |
| "grad_norm": 0.18240250647068024, |
| "learning_rate": 1.6093834093739647e-05, |
| "loss": 0.0245, |
| "step": 19510 |
| }, |
| { |
| "epoch": 18.715244487056566, |
| "grad_norm": 0.2039356231689453, |
| "learning_rate": 1.6047122510159458e-05, |
| "loss": 0.02, |
| "step": 19520 |
| }, |
| { |
| "epoch": 18.7248322147651, |
| "grad_norm": 0.2688858211040497, |
| "learning_rate": 1.600046585313501e-05, |
| "loss": 0.0232, |
| "step": 19530 |
| }, |
| { |
| "epoch": 18.734419942473632, |
| "grad_norm": 0.3605387806892395, |
| "learning_rate": 1.5953864198144135e-05, |
| "loss": 0.0285, |
| "step": 19540 |
| }, |
| { |
| "epoch": 18.744007670182167, |
| "grad_norm": 0.19552724063396454, |
| "learning_rate": 1.5907317620575686e-05, |
| "loss": 0.0249, |
| "step": 19550 |
| }, |
| { |
| "epoch": 18.7535953978907, |
| "grad_norm": 0.2785275876522064, |
| "learning_rate": 1.58608261957294e-05, |
| "loss": 0.0301, |
| "step": 19560 |
| }, |
| { |
| "epoch": 18.763183125599234, |
| "grad_norm": 0.265622079372406, |
| "learning_rate": 1.5814389998815836e-05, |
| "loss": 0.0244, |
| "step": 19570 |
| }, |
| { |
| "epoch": 18.772770853307765, |
| "grad_norm": 0.22419176995754242, |
| "learning_rate": 1.5768009104956137e-05, |
| "loss": 0.0197, |
| "step": 19580 |
| }, |
| { |
| "epoch": 18.7823585810163, |
| "grad_norm": 0.22098082304000854, |
| "learning_rate": 1.572168358918204e-05, |
| "loss": 0.0219, |
| "step": 19590 |
| }, |
| { |
| "epoch": 18.79194630872483, |
| "grad_norm": 0.26601535081863403, |
| "learning_rate": 1.5675413526435677e-05, |
| "loss": 0.0234, |
| "step": 19600 |
| }, |
| { |
| "epoch": 18.801534036433367, |
| "grad_norm": 0.2946853041648865, |
| "learning_rate": 1.562919899156947e-05, |
| "loss": 0.0246, |
| "step": 19610 |
| }, |
| { |
| "epoch": 18.811121764141898, |
| "grad_norm": 0.3101515471935272, |
| "learning_rate": 1.558304005934602e-05, |
| "loss": 0.0244, |
| "step": 19620 |
| }, |
| { |
| "epoch": 18.82070949185043, |
| "grad_norm": 0.24001409113407135, |
| "learning_rate": 1.5536936804437963e-05, |
| "loss": 0.0254, |
| "step": 19630 |
| }, |
| { |
| "epoch": 18.830297219558965, |
| "grad_norm": 0.1419634222984314, |
| "learning_rate": 1.549088930142788e-05, |
| "loss": 0.0231, |
| "step": 19640 |
| }, |
| { |
| "epoch": 18.839884947267496, |
| "grad_norm": 0.24882347881793976, |
| "learning_rate": 1.544489762480815e-05, |
| "loss": 0.0201, |
| "step": 19650 |
| }, |
| { |
| "epoch": 18.84947267497603, |
| "grad_norm": 0.22982530295848846, |
| "learning_rate": 1.5398961848980838e-05, |
| "loss": 0.0218, |
| "step": 19660 |
| }, |
| { |
| "epoch": 18.859060402684563, |
| "grad_norm": 0.1851414293050766, |
| "learning_rate": 1.5353082048257596e-05, |
| "loss": 0.0267, |
| "step": 19670 |
| }, |
| { |
| "epoch": 18.868648130393098, |
| "grad_norm": 0.23806796967983246, |
| "learning_rate": 1.53072582968595e-05, |
| "loss": 0.0273, |
| "step": 19680 |
| }, |
| { |
| "epoch": 18.87823585810163, |
| "grad_norm": 0.2619253396987915, |
| "learning_rate": 1.526149066891697e-05, |
| "loss": 0.0263, |
| "step": 19690 |
| }, |
| { |
| "epoch": 18.887823585810164, |
| "grad_norm": 0.24336743354797363, |
| "learning_rate": 1.5215779238469641e-05, |
| "loss": 0.0344, |
| "step": 19700 |
| }, |
| { |
| "epoch": 18.897411313518695, |
| "grad_norm": 0.21095559000968933, |
| "learning_rate": 1.5170124079466186e-05, |
| "loss": 0.0239, |
| "step": 19710 |
| }, |
| { |
| "epoch": 18.90699904122723, |
| "grad_norm": 0.3702682554721832, |
| "learning_rate": 1.51245252657643e-05, |
| "loss": 0.0265, |
| "step": 19720 |
| }, |
| { |
| "epoch": 18.916586768935762, |
| "grad_norm": 0.45442819595336914, |
| "learning_rate": 1.5078982871130504e-05, |
| "loss": 0.0284, |
| "step": 19730 |
| }, |
| { |
| "epoch": 18.926174496644297, |
| "grad_norm": 0.1986912339925766, |
| "learning_rate": 1.5033496969240057e-05, |
| "loss": 0.025, |
| "step": 19740 |
| }, |
| { |
| "epoch": 18.93576222435283, |
| "grad_norm": 0.18418286740779877, |
| "learning_rate": 1.4988067633676816e-05, |
| "loss": 0.0255, |
| "step": 19750 |
| }, |
| { |
| "epoch": 18.94534995206136, |
| "grad_norm": 0.16016803681850433, |
| "learning_rate": 1.4942694937933144e-05, |
| "loss": 0.0224, |
| "step": 19760 |
| }, |
| { |
| "epoch": 18.954937679769895, |
| "grad_norm": 0.2799144387245178, |
| "learning_rate": 1.4897378955409763e-05, |
| "loss": 0.0279, |
| "step": 19770 |
| }, |
| { |
| "epoch": 18.964525407478426, |
| "grad_norm": 0.17058733105659485, |
| "learning_rate": 1.4852119759415661e-05, |
| "loss": 0.0257, |
| "step": 19780 |
| }, |
| { |
| "epoch": 18.97411313518696, |
| "grad_norm": 0.24392423033714294, |
| "learning_rate": 1.4806917423167944e-05, |
| "loss": 0.0237, |
| "step": 19790 |
| }, |
| { |
| "epoch": 18.983700862895493, |
| "grad_norm": 0.19233231246471405, |
| "learning_rate": 1.4761772019791748e-05, |
| "loss": 0.0296, |
| "step": 19800 |
| }, |
| { |
| "epoch": 18.993288590604028, |
| "grad_norm": 0.2076229453086853, |
| "learning_rate": 1.4716683622320105e-05, |
| "loss": 0.026, |
| "step": 19810 |
| }, |
| { |
| "epoch": 19.00287631831256, |
| "grad_norm": 0.22467122972011566, |
| "learning_rate": 1.4671652303693806e-05, |
| "loss": 0.028, |
| "step": 19820 |
| }, |
| { |
| "epoch": 19.012464046021094, |
| "grad_norm": 0.16231553256511688, |
| "learning_rate": 1.4626678136761369e-05, |
| "loss": 0.0248, |
| "step": 19830 |
| }, |
| { |
| "epoch": 19.022051773729626, |
| "grad_norm": 0.24173732101917267, |
| "learning_rate": 1.4581761194278765e-05, |
| "loss": 0.0318, |
| "step": 19840 |
| }, |
| { |
| "epoch": 19.03163950143816, |
| "grad_norm": 0.21880550682544708, |
| "learning_rate": 1.4536901548909448e-05, |
| "loss": 0.0299, |
| "step": 19850 |
| }, |
| { |
| "epoch": 19.041227229146692, |
| "grad_norm": 0.3532547950744629, |
| "learning_rate": 1.4492099273224174e-05, |
| "loss": 0.0282, |
| "step": 19860 |
| }, |
| { |
| "epoch": 19.050814956855227, |
| "grad_norm": 0.20322856307029724, |
| "learning_rate": 1.4447354439700889e-05, |
| "loss": 0.0235, |
| "step": 19870 |
| }, |
| { |
| "epoch": 19.06040268456376, |
| "grad_norm": 0.2487279176712036, |
| "learning_rate": 1.4402667120724594e-05, |
| "loss": 0.0285, |
| "step": 19880 |
| }, |
| { |
| "epoch": 19.06999041227229, |
| "grad_norm": 0.19251792132854462, |
| "learning_rate": 1.4358037388587281e-05, |
| "loss": 0.0269, |
| "step": 19890 |
| }, |
| { |
| "epoch": 19.079578139980825, |
| "grad_norm": 0.2209775298833847, |
| "learning_rate": 1.4313465315487745e-05, |
| "loss": 0.0209, |
| "step": 19900 |
| }, |
| { |
| "epoch": 19.089165867689356, |
| "grad_norm": 0.15831854939460754, |
| "learning_rate": 1.4268950973531536e-05, |
| "loss": 0.023, |
| "step": 19910 |
| }, |
| { |
| "epoch": 19.09875359539789, |
| "grad_norm": 0.21416033804416656, |
| "learning_rate": 1.4224494434730794e-05, |
| "loss": 0.0217, |
| "step": 19920 |
| }, |
| { |
| "epoch": 19.108341323106423, |
| "grad_norm": 0.15104466676712036, |
| "learning_rate": 1.4180095771004154e-05, |
| "loss": 0.0212, |
| "step": 19930 |
| }, |
| { |
| "epoch": 19.117929050814958, |
| "grad_norm": 0.19750936329364777, |
| "learning_rate": 1.413575505417662e-05, |
| "loss": 0.0172, |
| "step": 19940 |
| }, |
| { |
| "epoch": 19.12751677852349, |
| "grad_norm": 0.26296430826187134, |
| "learning_rate": 1.4091472355979463e-05, |
| "loss": 0.0248, |
| "step": 19950 |
| }, |
| { |
| "epoch": 19.137104506232024, |
| "grad_norm": 0.20639511942863464, |
| "learning_rate": 1.404724774805008e-05, |
| "loss": 0.0255, |
| "step": 19960 |
| }, |
| { |
| "epoch": 19.146692233940556, |
| "grad_norm": 0.14402848482131958, |
| "learning_rate": 1.4003081301931909e-05, |
| "loss": 0.0273, |
| "step": 19970 |
| }, |
| { |
| "epoch": 19.15627996164909, |
| "grad_norm": 0.14169853925704956, |
| "learning_rate": 1.395897308907429e-05, |
| "loss": 0.0256, |
| "step": 19980 |
| }, |
| { |
| "epoch": 19.165867689357622, |
| "grad_norm": 0.17262916266918182, |
| "learning_rate": 1.3914923180832368e-05, |
| "loss": 0.0264, |
| "step": 19990 |
| }, |
| { |
| "epoch": 19.175455417066157, |
| "grad_norm": 0.13429339230060577, |
| "learning_rate": 1.3870931648466945e-05, |
| "loss": 0.0246, |
| "step": 20000 |
| }, |
| { |
| "epoch": 19.18504314477469, |
| "grad_norm": 0.2229502946138382, |
| "learning_rate": 1.3826998563144411e-05, |
| "loss": 0.0238, |
| "step": 20010 |
| }, |
| { |
| "epoch": 19.19463087248322, |
| "grad_norm": 0.1920672059059143, |
| "learning_rate": 1.3783123995936587e-05, |
| "loss": 0.0239, |
| "step": 20020 |
| }, |
| { |
| "epoch": 19.204218600191755, |
| "grad_norm": 0.22073961794376373, |
| "learning_rate": 1.373930801782064e-05, |
| "loss": 0.021, |
| "step": 20030 |
| }, |
| { |
| "epoch": 19.213806327900286, |
| "grad_norm": 0.3254948556423187, |
| "learning_rate": 1.369555069967895e-05, |
| "loss": 0.0253, |
| "step": 20040 |
| }, |
| { |
| "epoch": 19.22339405560882, |
| "grad_norm": 0.14167852699756622, |
| "learning_rate": 1.3651852112298995e-05, |
| "loss": 0.0207, |
| "step": 20050 |
| }, |
| { |
| "epoch": 19.232981783317353, |
| "grad_norm": 0.2205292135477066, |
| "learning_rate": 1.3608212326373249e-05, |
| "loss": 0.0266, |
| "step": 20060 |
| }, |
| { |
| "epoch": 19.242569511025888, |
| "grad_norm": 0.268951416015625, |
| "learning_rate": 1.3564631412499067e-05, |
| "loss": 0.0187, |
| "step": 20070 |
| }, |
| { |
| "epoch": 19.25215723873442, |
| "grad_norm": 0.18108440935611725, |
| "learning_rate": 1.3521109441178559e-05, |
| "loss": 0.0258, |
| "step": 20080 |
| }, |
| { |
| "epoch": 19.261744966442954, |
| "grad_norm": 0.2803739905357361, |
| "learning_rate": 1.3477646482818474e-05, |
| "loss": 0.0263, |
| "step": 20090 |
| }, |
| { |
| "epoch": 19.271332694151486, |
| "grad_norm": 0.2689793109893799, |
| "learning_rate": 1.3434242607730108e-05, |
| "loss": 0.0254, |
| "step": 20100 |
| }, |
| { |
| "epoch": 19.28092042186002, |
| "grad_norm": 0.3495311439037323, |
| "learning_rate": 1.3390897886129162e-05, |
| "loss": 0.0277, |
| "step": 20110 |
| }, |
| { |
| "epoch": 19.290508149568552, |
| "grad_norm": 0.17431464791297913, |
| "learning_rate": 1.334761238813566e-05, |
| "loss": 0.0211, |
| "step": 20120 |
| }, |
| { |
| "epoch": 19.300095877277084, |
| "grad_norm": 0.3109664022922516, |
| "learning_rate": 1.3304386183773809e-05, |
| "loss": 0.0239, |
| "step": 20130 |
| }, |
| { |
| "epoch": 19.30968360498562, |
| "grad_norm": 0.15485496819019318, |
| "learning_rate": 1.3261219342971887e-05, |
| "loss": 0.0283, |
| "step": 20140 |
| }, |
| { |
| "epoch": 19.31927133269415, |
| "grad_norm": 0.18140093982219696, |
| "learning_rate": 1.3218111935562149e-05, |
| "loss": 0.0275, |
| "step": 20150 |
| }, |
| { |
| "epoch": 19.328859060402685, |
| "grad_norm": 0.22020739316940308, |
| "learning_rate": 1.3175064031280703e-05, |
| "loss": 0.0318, |
| "step": 20160 |
| }, |
| { |
| "epoch": 19.338446788111217, |
| "grad_norm": 0.27302905917167664, |
| "learning_rate": 1.3132075699767393e-05, |
| "loss": 0.0204, |
| "step": 20170 |
| }, |
| { |
| "epoch": 19.34803451581975, |
| "grad_norm": 0.20312833786010742, |
| "learning_rate": 1.3089147010565689e-05, |
| "loss": 0.0243, |
| "step": 20180 |
| }, |
| { |
| "epoch": 19.357622243528283, |
| "grad_norm": 0.2138754427433014, |
| "learning_rate": 1.3046278033122577e-05, |
| "loss": 0.0246, |
| "step": 20190 |
| }, |
| { |
| "epoch": 19.367209971236818, |
| "grad_norm": 0.15521451830863953, |
| "learning_rate": 1.3003468836788446e-05, |
| "loss": 0.0202, |
| "step": 20200 |
| }, |
| { |
| "epoch": 19.37679769894535, |
| "grad_norm": 0.14165331423282623, |
| "learning_rate": 1.296071949081698e-05, |
| "loss": 0.029, |
| "step": 20210 |
| }, |
| { |
| "epoch": 19.386385426653884, |
| "grad_norm": 0.17273680865764618, |
| "learning_rate": 1.2918030064365034e-05, |
| "loss": 0.0248, |
| "step": 20220 |
| }, |
| { |
| "epoch": 19.395973154362416, |
| "grad_norm": 0.1982639878988266, |
| "learning_rate": 1.2875400626492534e-05, |
| "loss": 0.0218, |
| "step": 20230 |
| }, |
| { |
| "epoch": 19.40556088207095, |
| "grad_norm": 0.20939846336841583, |
| "learning_rate": 1.2832831246162359e-05, |
| "loss": 0.0221, |
| "step": 20240 |
| }, |
| { |
| "epoch": 19.415148609779482, |
| "grad_norm": 0.2230292111635208, |
| "learning_rate": 1.2790321992240228e-05, |
| "loss": 0.0262, |
| "step": 20250 |
| }, |
| { |
| "epoch": 19.424736337488014, |
| "grad_norm": 0.17387695610523224, |
| "learning_rate": 1.2747872933494615e-05, |
| "loss": 0.0231, |
| "step": 20260 |
| }, |
| { |
| "epoch": 19.43432406519655, |
| "grad_norm": 0.2639104723930359, |
| "learning_rate": 1.2705484138596552e-05, |
| "loss": 0.0227, |
| "step": 20270 |
| }, |
| { |
| "epoch": 19.44391179290508, |
| "grad_norm": 0.2716933488845825, |
| "learning_rate": 1.2663155676119665e-05, |
| "loss": 0.025, |
| "step": 20280 |
| }, |
| { |
| "epoch": 19.453499520613615, |
| "grad_norm": 0.2254800945520401, |
| "learning_rate": 1.2620887614539917e-05, |
| "loss": 0.0236, |
| "step": 20290 |
| }, |
| { |
| "epoch": 19.463087248322147, |
| "grad_norm": 0.1728450208902359, |
| "learning_rate": 1.2578680022235585e-05, |
| "loss": 0.0237, |
| "step": 20300 |
| }, |
| { |
| "epoch": 19.47267497603068, |
| "grad_norm": 0.2077593207359314, |
| "learning_rate": 1.253653296748712e-05, |
| "loss": 0.026, |
| "step": 20310 |
| }, |
| { |
| "epoch": 19.482262703739213, |
| "grad_norm": 0.20576708018779755, |
| "learning_rate": 1.2494446518477022e-05, |
| "loss": 0.021, |
| "step": 20320 |
| }, |
| { |
| "epoch": 19.491850431447748, |
| "grad_norm": 0.2826680839061737, |
| "learning_rate": 1.2452420743289778e-05, |
| "loss": 0.0241, |
| "step": 20330 |
| }, |
| { |
| "epoch": 19.50143815915628, |
| "grad_norm": 0.3109418451786041, |
| "learning_rate": 1.2410455709911694e-05, |
| "loss": 0.0262, |
| "step": 20340 |
| }, |
| { |
| "epoch": 19.511025886864815, |
| "grad_norm": 0.6010233759880066, |
| "learning_rate": 1.2368551486230828e-05, |
| "loss": 0.0261, |
| "step": 20350 |
| }, |
| { |
| "epoch": 19.520613614573346, |
| "grad_norm": 0.33683139085769653, |
| "learning_rate": 1.2326708140036852e-05, |
| "loss": 0.0268, |
| "step": 20360 |
| }, |
| { |
| "epoch": 19.530201342281877, |
| "grad_norm": 0.1394880712032318, |
| "learning_rate": 1.2284925739020974e-05, |
| "loss": 0.0287, |
| "step": 20370 |
| }, |
| { |
| "epoch": 19.539789069990412, |
| "grad_norm": 0.2836284935474396, |
| "learning_rate": 1.2243204350775789e-05, |
| "loss": 0.0215, |
| "step": 20380 |
| }, |
| { |
| "epoch": 19.549376797698944, |
| "grad_norm": 0.9439190626144409, |
| "learning_rate": 1.2201544042795198e-05, |
| "loss": 0.0254, |
| "step": 20390 |
| }, |
| { |
| "epoch": 19.55896452540748, |
| "grad_norm": 0.18774332106113434, |
| "learning_rate": 1.215994488247431e-05, |
| "loss": 0.0273, |
| "step": 20400 |
| }, |
| { |
| "epoch": 19.56855225311601, |
| "grad_norm": 0.4038194715976715, |
| "learning_rate": 1.211840693710926e-05, |
| "loss": 0.0186, |
| "step": 20410 |
| }, |
| { |
| "epoch": 19.578139980824545, |
| "grad_norm": 0.2532286047935486, |
| "learning_rate": 1.2076930273897214e-05, |
| "loss": 0.0303, |
| "step": 20420 |
| }, |
| { |
| "epoch": 19.587727708533077, |
| "grad_norm": 0.23393119871616364, |
| "learning_rate": 1.2035514959936144e-05, |
| "loss": 0.0223, |
| "step": 20430 |
| }, |
| { |
| "epoch": 19.59731543624161, |
| "grad_norm": 0.17693249881267548, |
| "learning_rate": 1.199416106222484e-05, |
| "loss": 0.0222, |
| "step": 20440 |
| }, |
| { |
| "epoch": 19.606903163950143, |
| "grad_norm": 0.4991660714149475, |
| "learning_rate": 1.1952868647662696e-05, |
| "loss": 0.0255, |
| "step": 20450 |
| }, |
| { |
| "epoch": 19.616490891658678, |
| "grad_norm": 0.24061451852321625, |
| "learning_rate": 1.1911637783049645e-05, |
| "loss": 0.0271, |
| "step": 20460 |
| }, |
| { |
| "epoch": 19.62607861936721, |
| "grad_norm": 0.20236246287822723, |
| "learning_rate": 1.1870468535086054e-05, |
| "loss": 0.0274, |
| "step": 20470 |
| }, |
| { |
| "epoch": 19.635666347075745, |
| "grad_norm": 0.16982276737689972, |
| "learning_rate": 1.1829360970372604e-05, |
| "loss": 0.0275, |
| "step": 20480 |
| }, |
| { |
| "epoch": 19.645254074784276, |
| "grad_norm": 0.17934417724609375, |
| "learning_rate": 1.1788315155410212e-05, |
| "loss": 0.02, |
| "step": 20490 |
| }, |
| { |
| "epoch": 19.654841802492808, |
| "grad_norm": 0.2388330101966858, |
| "learning_rate": 1.1747331156599873e-05, |
| "loss": 0.0192, |
| "step": 20500 |
| }, |
| { |
| "epoch": 19.664429530201343, |
| "grad_norm": 0.19787994027137756, |
| "learning_rate": 1.1706409040242588e-05, |
| "loss": 0.0261, |
| "step": 20510 |
| }, |
| { |
| "epoch": 19.674017257909874, |
| "grad_norm": 0.2273687720298767, |
| "learning_rate": 1.166554887253926e-05, |
| "loss": 0.0246, |
| "step": 20520 |
| }, |
| { |
| "epoch": 19.68360498561841, |
| "grad_norm": 0.33494409918785095, |
| "learning_rate": 1.1624750719590588e-05, |
| "loss": 0.0249, |
| "step": 20530 |
| }, |
| { |
| "epoch": 19.69319271332694, |
| "grad_norm": 0.2192111760377884, |
| "learning_rate": 1.158401464739689e-05, |
| "loss": 0.0239, |
| "step": 20540 |
| }, |
| { |
| "epoch": 19.702780441035475, |
| "grad_norm": 0.2234772890806198, |
| "learning_rate": 1.154334072185811e-05, |
| "loss": 0.0227, |
| "step": 20550 |
| }, |
| { |
| "epoch": 19.712368168744007, |
| "grad_norm": 0.3074262738227844, |
| "learning_rate": 1.1502729008773639e-05, |
| "loss": 0.027, |
| "step": 20560 |
| }, |
| { |
| "epoch": 19.721955896452542, |
| "grad_norm": 0.22344590723514557, |
| "learning_rate": 1.146217957384223e-05, |
| "loss": 0.0251, |
| "step": 20570 |
| }, |
| { |
| "epoch": 19.731543624161073, |
| "grad_norm": 0.12177485972642899, |
| "learning_rate": 1.1421692482661856e-05, |
| "loss": 0.0246, |
| "step": 20580 |
| }, |
| { |
| "epoch": 19.74113135186961, |
| "grad_norm": 0.12450756132602692, |
| "learning_rate": 1.1381267800729695e-05, |
| "loss": 0.0247, |
| "step": 20590 |
| }, |
| { |
| "epoch": 19.75071907957814, |
| "grad_norm": 0.26811161637306213, |
| "learning_rate": 1.1340905593441914e-05, |
| "loss": 0.0263, |
| "step": 20600 |
| }, |
| { |
| "epoch": 19.760306807286675, |
| "grad_norm": 0.18584440648555756, |
| "learning_rate": 1.1300605926093627e-05, |
| "loss": 0.0259, |
| "step": 20610 |
| }, |
| { |
| "epoch": 19.769894534995206, |
| "grad_norm": 0.15904641151428223, |
| "learning_rate": 1.1260368863878778e-05, |
| "loss": 0.0239, |
| "step": 20620 |
| }, |
| { |
| "epoch": 19.779482262703738, |
| "grad_norm": 0.22534583508968353, |
| "learning_rate": 1.1220194471890027e-05, |
| "loss": 0.0234, |
| "step": 20630 |
| }, |
| { |
| "epoch": 19.789069990412273, |
| "grad_norm": 0.22182218730449677, |
| "learning_rate": 1.1180082815118659e-05, |
| "loss": 0.0255, |
| "step": 20640 |
| }, |
| { |
| "epoch": 19.798657718120804, |
| "grad_norm": 0.13675539195537567, |
| "learning_rate": 1.114003395845446e-05, |
| "loss": 0.0203, |
| "step": 20650 |
| }, |
| { |
| "epoch": 19.80824544582934, |
| "grad_norm": 0.153213232755661, |
| "learning_rate": 1.1100047966685645e-05, |
| "loss": 0.0235, |
| "step": 20660 |
| }, |
| { |
| "epoch": 19.81783317353787, |
| "grad_norm": 0.23550502955913544, |
| "learning_rate": 1.1060124904498686e-05, |
| "loss": 0.0262, |
| "step": 20670 |
| }, |
| { |
| "epoch": 19.827420901246406, |
| "grad_norm": 0.16561271250247955, |
| "learning_rate": 1.10202648364783e-05, |
| "loss": 0.0252, |
| "step": 20680 |
| }, |
| { |
| "epoch": 19.837008628954937, |
| "grad_norm": 0.21752074360847473, |
| "learning_rate": 1.0980467827107265e-05, |
| "loss": 0.025, |
| "step": 20690 |
| }, |
| { |
| "epoch": 19.846596356663472, |
| "grad_norm": 0.3683970868587494, |
| "learning_rate": 1.0940733940766367e-05, |
| "loss": 0.0275, |
| "step": 20700 |
| }, |
| { |
| "epoch": 19.856184084372003, |
| "grad_norm": 0.19650644063949585, |
| "learning_rate": 1.090106324173426e-05, |
| "loss": 0.0227, |
| "step": 20710 |
| }, |
| { |
| "epoch": 19.86577181208054, |
| "grad_norm": 0.3195613622665405, |
| "learning_rate": 1.0861455794187398e-05, |
| "loss": 0.0246, |
| "step": 20720 |
| }, |
| { |
| "epoch": 19.87535953978907, |
| "grad_norm": 0.30373549461364746, |
| "learning_rate": 1.0821911662199874e-05, |
| "loss": 0.0289, |
| "step": 20730 |
| }, |
| { |
| "epoch": 19.8849472674976, |
| "grad_norm": 0.23305653035640717, |
| "learning_rate": 1.0782430909743407e-05, |
| "loss": 0.0317, |
| "step": 20740 |
| }, |
| { |
| "epoch": 19.894534995206136, |
| "grad_norm": 0.19694805145263672, |
| "learning_rate": 1.0743013600687146e-05, |
| "loss": 0.021, |
| "step": 20750 |
| }, |
| { |
| "epoch": 19.904122722914668, |
| "grad_norm": 0.18307265639305115, |
| "learning_rate": 1.0703659798797616e-05, |
| "loss": 0.0215, |
| "step": 20760 |
| }, |
| { |
| "epoch": 19.913710450623203, |
| "grad_norm": 0.15986226499080658, |
| "learning_rate": 1.0664369567738608e-05, |
| "loss": 0.0254, |
| "step": 20770 |
| }, |
| { |
| "epoch": 19.923298178331734, |
| "grad_norm": 0.22868862748146057, |
| "learning_rate": 1.0625142971071067e-05, |
| "loss": 0.0209, |
| "step": 20780 |
| }, |
| { |
| "epoch": 19.93288590604027, |
| "grad_norm": 0.23605976998806, |
| "learning_rate": 1.0585980072253005e-05, |
| "loss": 0.0251, |
| "step": 20790 |
| }, |
| { |
| "epoch": 19.9424736337488, |
| "grad_norm": 0.40362289547920227, |
| "learning_rate": 1.0546880934639364e-05, |
| "loss": 0.0291, |
| "step": 20800 |
| }, |
| { |
| "epoch": 19.952061361457336, |
| "grad_norm": 0.23181037604808807, |
| "learning_rate": 1.0507845621481954e-05, |
| "loss": 0.0239, |
| "step": 20810 |
| }, |
| { |
| "epoch": 19.961649089165867, |
| "grad_norm": 0.17109474539756775, |
| "learning_rate": 1.046887419592935e-05, |
| "loss": 0.0234, |
| "step": 20820 |
| }, |
| { |
| "epoch": 19.971236816874402, |
| "grad_norm": 0.19465407729148865, |
| "learning_rate": 1.0429966721026751e-05, |
| "loss": 0.0215, |
| "step": 20830 |
| }, |
| { |
| "epoch": 19.980824544582934, |
| "grad_norm": 0.22324107587337494, |
| "learning_rate": 1.0391123259715906e-05, |
| "loss": 0.0196, |
| "step": 20840 |
| }, |
| { |
| "epoch": 19.99041227229147, |
| "grad_norm": 0.3217203915119171, |
| "learning_rate": 1.0352343874835018e-05, |
| "loss": 0.0234, |
| "step": 20850 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.33047834038734436, |
| "learning_rate": 1.0313628629118616e-05, |
| "loss": 0.0276, |
| "step": 20860 |
| }, |
| { |
| "epoch": 20.00958772770853, |
| "grad_norm": 0.23915094137191772, |
| "learning_rate": 1.0274977585197482e-05, |
| "loss": 0.0225, |
| "step": 20870 |
| }, |
| { |
| "epoch": 20.019175455417066, |
| "grad_norm": 0.19355012476444244, |
| "learning_rate": 1.0236390805598516e-05, |
| "loss": 0.0232, |
| "step": 20880 |
| }, |
| { |
| "epoch": 20.028763183125598, |
| "grad_norm": 0.4942316710948944, |
| "learning_rate": 1.01978683527447e-05, |
| "loss": 0.0262, |
| "step": 20890 |
| }, |
| { |
| "epoch": 20.038350910834133, |
| "grad_norm": 0.17617733776569366, |
| "learning_rate": 1.0159410288954912e-05, |
| "loss": 0.023, |
| "step": 20900 |
| }, |
| { |
| "epoch": 20.047938638542664, |
| "grad_norm": 0.21215124428272247, |
| "learning_rate": 1.0121016676443878e-05, |
| "loss": 0.0187, |
| "step": 20910 |
| }, |
| { |
| "epoch": 20.0575263662512, |
| "grad_norm": 0.23251253366470337, |
| "learning_rate": 1.008268757732207e-05, |
| "loss": 0.027, |
| "step": 20920 |
| }, |
| { |
| "epoch": 20.06711409395973, |
| "grad_norm": 0.16768194735050201, |
| "learning_rate": 1.0044423053595559e-05, |
| "loss": 0.0182, |
| "step": 20930 |
| }, |
| { |
| "epoch": 20.076701821668266, |
| "grad_norm": 0.19847920536994934, |
| "learning_rate": 1.000622316716599e-05, |
| "loss": 0.0284, |
| "step": 20940 |
| }, |
| { |
| "epoch": 20.086289549376797, |
| "grad_norm": 0.13995741307735443, |
| "learning_rate": 9.968087979830432e-06, |
| "loss": 0.0192, |
| "step": 20950 |
| }, |
| { |
| "epoch": 20.095877277085332, |
| "grad_norm": 0.1870267242193222, |
| "learning_rate": 9.930017553281279e-06, |
| "loss": 0.0259, |
| "step": 20960 |
| }, |
| { |
| "epoch": 20.105465004793864, |
| "grad_norm": 0.22275184094905853, |
| "learning_rate": 9.892011949106172e-06, |
| "loss": 0.0248, |
| "step": 20970 |
| }, |
| { |
| "epoch": 20.1150527325024, |
| "grad_norm": 0.14587286114692688, |
| "learning_rate": 9.854071228787875e-06, |
| "loss": 0.0234, |
| "step": 20980 |
| }, |
| { |
| "epoch": 20.12464046021093, |
| "grad_norm": 0.32902705669403076, |
| "learning_rate": 9.816195453704191e-06, |
| "loss": 0.0233, |
| "step": 20990 |
| }, |
| { |
| "epoch": 20.13422818791946, |
| "grad_norm": 0.17466923594474792, |
| "learning_rate": 9.778384685127867e-06, |
| "loss": 0.023, |
| "step": 21000 |
| }, |
| { |
| "epoch": 20.143815915627997, |
| "grad_norm": 0.17678095400333405, |
| "learning_rate": 9.740638984226481e-06, |
| "loss": 0.0265, |
| "step": 21010 |
| }, |
| { |
| "epoch": 20.153403643336528, |
| "grad_norm": 0.16016939282417297, |
| "learning_rate": 9.70295841206234e-06, |
| "loss": 0.0248, |
| "step": 21020 |
| }, |
| { |
| "epoch": 20.162991371045063, |
| "grad_norm": 0.2382485419511795, |
| "learning_rate": 9.665343029592417e-06, |
| "loss": 0.0233, |
| "step": 21030 |
| }, |
| { |
| "epoch": 20.172579098753594, |
| "grad_norm": 0.24307946860790253, |
| "learning_rate": 9.627792897668175e-06, |
| "loss": 0.025, |
| "step": 21040 |
| }, |
| { |
| "epoch": 20.18216682646213, |
| "grad_norm": 0.4551367461681366, |
| "learning_rate": 9.590308077035592e-06, |
| "loss": 0.0211, |
| "step": 21050 |
| }, |
| { |
| "epoch": 20.19175455417066, |
| "grad_norm": 0.2893312871456146, |
| "learning_rate": 9.55288862833495e-06, |
| "loss": 0.0206, |
| "step": 21060 |
| }, |
| { |
| "epoch": 20.201342281879196, |
| "grad_norm": 0.16855131089687347, |
| "learning_rate": 9.515534612100746e-06, |
| "loss": 0.027, |
| "step": 21070 |
| }, |
| { |
| "epoch": 20.210930009587727, |
| "grad_norm": 0.3295097053050995, |
| "learning_rate": 9.478246088761671e-06, |
| "loss": 0.0282, |
| "step": 21080 |
| }, |
| { |
| "epoch": 20.220517737296262, |
| "grad_norm": 0.1354684680700302, |
| "learning_rate": 9.441023118640457e-06, |
| "loss": 0.0278, |
| "step": 21090 |
| }, |
| { |
| "epoch": 20.230105465004794, |
| "grad_norm": 0.16148221492767334, |
| "learning_rate": 9.403865761953779e-06, |
| "loss": 0.0287, |
| "step": 21100 |
| }, |
| { |
| "epoch": 20.239693192713325, |
| "grad_norm": 0.3596087098121643, |
| "learning_rate": 9.366774078812174e-06, |
| "loss": 0.0273, |
| "step": 21110 |
| }, |
| { |
| "epoch": 20.24928092042186, |
| "grad_norm": 0.24658294022083282, |
| "learning_rate": 9.329748129219934e-06, |
| "loss": 0.0224, |
| "step": 21120 |
| }, |
| { |
| "epoch": 20.25886864813039, |
| "grad_norm": 0.2896967828273773, |
| "learning_rate": 9.292787973075007e-06, |
| "loss": 0.0203, |
| "step": 21130 |
| }, |
| { |
| "epoch": 20.268456375838927, |
| "grad_norm": 0.2359636425971985, |
| "learning_rate": 9.255893670168919e-06, |
| "loss": 0.0241, |
| "step": 21140 |
| }, |
| { |
| "epoch": 20.278044103547458, |
| "grad_norm": 0.24353083968162537, |
| "learning_rate": 9.219065280186656e-06, |
| "loss": 0.0247, |
| "step": 21150 |
| }, |
| { |
| "epoch": 20.287631831255993, |
| "grad_norm": 0.14789700508117676, |
| "learning_rate": 9.182302862706566e-06, |
| "loss": 0.0191, |
| "step": 21160 |
| }, |
| { |
| "epoch": 20.297219558964525, |
| "grad_norm": 0.27849042415618896, |
| "learning_rate": 9.145606477200286e-06, |
| "loss": 0.0217, |
| "step": 21170 |
| }, |
| { |
| "epoch": 20.30680728667306, |
| "grad_norm": 0.4151756763458252, |
| "learning_rate": 9.108976183032613e-06, |
| "loss": 0.0233, |
| "step": 21180 |
| }, |
| { |
| "epoch": 20.31639501438159, |
| "grad_norm": 0.2625637948513031, |
| "learning_rate": 9.072412039461453e-06, |
| "loss": 0.0223, |
| "step": 21190 |
| }, |
| { |
| "epoch": 20.325982742090126, |
| "grad_norm": 0.22075968980789185, |
| "learning_rate": 9.035914105637678e-06, |
| "loss": 0.0239, |
| "step": 21200 |
| }, |
| { |
| "epoch": 20.335570469798657, |
| "grad_norm": 0.22036759555339813, |
| "learning_rate": 8.99948244060505e-06, |
| "loss": 0.0243, |
| "step": 21210 |
| }, |
| { |
| "epoch": 20.345158197507192, |
| "grad_norm": 0.4981054663658142, |
| "learning_rate": 8.963117103300134e-06, |
| "loss": 0.0207, |
| "step": 21220 |
| }, |
| { |
| "epoch": 20.354745925215724, |
| "grad_norm": 0.20227645337581635, |
| "learning_rate": 8.92681815255219e-06, |
| "loss": 0.0198, |
| "step": 21230 |
| }, |
| { |
| "epoch": 20.364333652924255, |
| "grad_norm": 0.24407237768173218, |
| "learning_rate": 8.890585647083088e-06, |
| "loss": 0.0292, |
| "step": 21240 |
| }, |
| { |
| "epoch": 20.37392138063279, |
| "grad_norm": 0.4346962869167328, |
| "learning_rate": 8.8544196455072e-06, |
| "loss": 0.0237, |
| "step": 21250 |
| }, |
| { |
| "epoch": 20.383509108341322, |
| "grad_norm": 0.32540345191955566, |
| "learning_rate": 8.818320206331327e-06, |
| "loss": 0.0237, |
| "step": 21260 |
| }, |
| { |
| "epoch": 20.393096836049857, |
| "grad_norm": 0.2086063176393509, |
| "learning_rate": 8.782287387954563e-06, |
| "loss": 0.0215, |
| "step": 21270 |
| }, |
| { |
| "epoch": 20.40268456375839, |
| "grad_norm": 0.2799685001373291, |
| "learning_rate": 8.74632124866826e-06, |
| "loss": 0.0283, |
| "step": 21280 |
| }, |
| { |
| "epoch": 20.412272291466923, |
| "grad_norm": 0.40446561574935913, |
| "learning_rate": 8.71042184665588e-06, |
| "loss": 0.0244, |
| "step": 21290 |
| }, |
| { |
| "epoch": 20.421860019175455, |
| "grad_norm": 0.20995816588401794, |
| "learning_rate": 8.674589239992931e-06, |
| "loss": 0.0301, |
| "step": 21300 |
| }, |
| { |
| "epoch": 20.43144774688399, |
| "grad_norm": 0.25973740220069885, |
| "learning_rate": 8.638823486646853e-06, |
| "loss": 0.0241, |
| "step": 21310 |
| }, |
| { |
| "epoch": 20.44103547459252, |
| "grad_norm": 0.31719037890434265, |
| "learning_rate": 8.603124644476945e-06, |
| "loss": 0.0207, |
| "step": 21320 |
| }, |
| { |
| "epoch": 20.450623202301056, |
| "grad_norm": 0.1637444943189621, |
| "learning_rate": 8.56749277123427e-06, |
| "loss": 0.0264, |
| "step": 21330 |
| }, |
| { |
| "epoch": 20.460210930009588, |
| "grad_norm": 0.3017114996910095, |
| "learning_rate": 8.531927924561538e-06, |
| "loss": 0.0271, |
| "step": 21340 |
| }, |
| { |
| "epoch": 20.469798657718123, |
| "grad_norm": 0.20100443065166473, |
| "learning_rate": 8.496430161993036e-06, |
| "loss": 0.0247, |
| "step": 21350 |
| }, |
| { |
| "epoch": 20.479386385426654, |
| "grad_norm": 0.2818273603916168, |
| "learning_rate": 8.460999540954517e-06, |
| "loss": 0.0278, |
| "step": 21360 |
| }, |
| { |
| "epoch": 20.488974113135185, |
| "grad_norm": 0.22835665941238403, |
| "learning_rate": 8.425636118763136e-06, |
| "loss": 0.0228, |
| "step": 21370 |
| }, |
| { |
| "epoch": 20.49856184084372, |
| "grad_norm": 0.24139605462551117, |
| "learning_rate": 8.390339952627324e-06, |
| "loss": 0.0279, |
| "step": 21380 |
| }, |
| { |
| "epoch": 20.508149568552252, |
| "grad_norm": 0.17489181458950043, |
| "learning_rate": 8.355111099646712e-06, |
| "loss": 0.0255, |
| "step": 21390 |
| }, |
| { |
| "epoch": 20.517737296260787, |
| "grad_norm": 0.14566893875598907, |
| "learning_rate": 8.319949616812039e-06, |
| "loss": 0.0222, |
| "step": 21400 |
| }, |
| { |
| "epoch": 20.52732502396932, |
| "grad_norm": 0.2523178160190582, |
| "learning_rate": 8.284855561005062e-06, |
| "loss": 0.0194, |
| "step": 21410 |
| }, |
| { |
| "epoch": 20.536912751677853, |
| "grad_norm": 0.20255376398563385, |
| "learning_rate": 8.249828988998448e-06, |
| "loss": 0.0233, |
| "step": 21420 |
| }, |
| { |
| "epoch": 20.546500479386385, |
| "grad_norm": 0.2267649918794632, |
| "learning_rate": 8.214869957455694e-06, |
| "loss": 0.0247, |
| "step": 21430 |
| }, |
| { |
| "epoch": 20.55608820709492, |
| "grad_norm": 0.20469827950000763, |
| "learning_rate": 8.179978522931058e-06, |
| "loss": 0.0196, |
| "step": 21440 |
| }, |
| { |
| "epoch": 20.56567593480345, |
| "grad_norm": 0.2033228874206543, |
| "learning_rate": 8.14515474186941e-06, |
| "loss": 0.0284, |
| "step": 21450 |
| }, |
| { |
| "epoch": 20.575263662511986, |
| "grad_norm": 0.20115645229816437, |
| "learning_rate": 8.1103986706062e-06, |
| "loss": 0.0263, |
| "step": 21460 |
| }, |
| { |
| "epoch": 20.584851390220518, |
| "grad_norm": 0.12906615436077118, |
| "learning_rate": 8.075710365367328e-06, |
| "loss": 0.0207, |
| "step": 21470 |
| }, |
| { |
| "epoch": 20.59443911792905, |
| "grad_norm": 0.2021467238664627, |
| "learning_rate": 8.041089882269082e-06, |
| "loss": 0.0286, |
| "step": 21480 |
| }, |
| { |
| "epoch": 20.604026845637584, |
| "grad_norm": 0.21031218767166138, |
| "learning_rate": 8.00653727731801e-06, |
| "loss": 0.0211, |
| "step": 21490 |
| }, |
| { |
| "epoch": 20.613614573346116, |
| "grad_norm": 0.19011792540550232, |
| "learning_rate": 7.972052606410873e-06, |
| "loss": 0.024, |
| "step": 21500 |
| }, |
| { |
| "epoch": 20.62320230105465, |
| "grad_norm": 0.18954189121723175, |
| "learning_rate": 7.937635925334525e-06, |
| "loss": 0.0273, |
| "step": 21510 |
| }, |
| { |
| "epoch": 20.632790028763182, |
| "grad_norm": 0.14838428795337677, |
| "learning_rate": 7.903287289765826e-06, |
| "loss": 0.0218, |
| "step": 21520 |
| }, |
| { |
| "epoch": 20.642377756471717, |
| "grad_norm": 0.5171023607254028, |
| "learning_rate": 7.869006755271568e-06, |
| "loss": 0.0221, |
| "step": 21530 |
| }, |
| { |
| "epoch": 20.65196548418025, |
| "grad_norm": 0.21473269164562225, |
| "learning_rate": 7.834794377308358e-06, |
| "loss": 0.0247, |
| "step": 21540 |
| }, |
| { |
| "epoch": 20.661553211888783, |
| "grad_norm": 0.37152165174484253, |
| "learning_rate": 7.800650211222554e-06, |
| "loss": 0.0258, |
| "step": 21550 |
| }, |
| { |
| "epoch": 20.671140939597315, |
| "grad_norm": 0.23063114285469055, |
| "learning_rate": 7.766574312250168e-06, |
| "loss": 0.0208, |
| "step": 21560 |
| }, |
| { |
| "epoch": 20.68072866730585, |
| "grad_norm": 0.12213137745857239, |
| "learning_rate": 7.732566735516777e-06, |
| "loss": 0.0229, |
| "step": 21570 |
| }, |
| { |
| "epoch": 20.69031639501438, |
| "grad_norm": 0.22326141595840454, |
| "learning_rate": 7.698627536037411e-06, |
| "loss": 0.0248, |
| "step": 21580 |
| }, |
| { |
| "epoch": 20.699904122722916, |
| "grad_norm": 0.21889561414718628, |
| "learning_rate": 7.664756768716513e-06, |
| "loss": 0.0218, |
| "step": 21590 |
| }, |
| { |
| "epoch": 20.709491850431448, |
| "grad_norm": 0.16505682468414307, |
| "learning_rate": 7.630954488347797e-06, |
| "loss": 0.0247, |
| "step": 21600 |
| }, |
| { |
| "epoch": 20.71907957813998, |
| "grad_norm": 0.18476873636245728, |
| "learning_rate": 7.5972207496142036e-06, |
| "loss": 0.0279, |
| "step": 21610 |
| }, |
| { |
| "epoch": 20.728667305848514, |
| "grad_norm": 0.18805384635925293, |
| "learning_rate": 7.56355560708778e-06, |
| "loss": 0.0269, |
| "step": 21620 |
| }, |
| { |
| "epoch": 20.738255033557046, |
| "grad_norm": 0.19865307211875916, |
| "learning_rate": 7.52995911522959e-06, |
| "loss": 0.0231, |
| "step": 21630 |
| }, |
| { |
| "epoch": 20.74784276126558, |
| "grad_norm": 0.25113141536712646, |
| "learning_rate": 7.496431328389658e-06, |
| "loss": 0.02, |
| "step": 21640 |
| }, |
| { |
| "epoch": 20.757430488974112, |
| "grad_norm": 0.2101268321275711, |
| "learning_rate": 7.4629723008068584e-06, |
| "loss": 0.0275, |
| "step": 21650 |
| }, |
| { |
| "epoch": 20.767018216682647, |
| "grad_norm": 0.2648563086986542, |
| "learning_rate": 7.429582086608849e-06, |
| "loss": 0.0239, |
| "step": 21660 |
| }, |
| { |
| "epoch": 20.77660594439118, |
| "grad_norm": 0.19610466063022614, |
| "learning_rate": 7.396260739811933e-06, |
| "loss": 0.0248, |
| "step": 21670 |
| }, |
| { |
| "epoch": 20.786193672099714, |
| "grad_norm": 0.22526168823242188, |
| "learning_rate": 7.363008314321024e-06, |
| "loss": 0.0185, |
| "step": 21680 |
| }, |
| { |
| "epoch": 20.795781399808245, |
| "grad_norm": 0.20826375484466553, |
| "learning_rate": 7.3298248639295405e-06, |
| "loss": 0.0215, |
| "step": 21690 |
| }, |
| { |
| "epoch": 20.80536912751678, |
| "grad_norm": 0.2082778960466385, |
| "learning_rate": 7.296710442319305e-06, |
| "loss": 0.0246, |
| "step": 21700 |
| }, |
| { |
| "epoch": 20.81495685522531, |
| "grad_norm": 0.19884945452213287, |
| "learning_rate": 7.2636651030604855e-06, |
| "loss": 0.0197, |
| "step": 21710 |
| }, |
| { |
| "epoch": 20.824544582933846, |
| "grad_norm": 0.30188602209091187, |
| "learning_rate": 7.230688899611487e-06, |
| "loss": 0.0188, |
| "step": 21720 |
| }, |
| { |
| "epoch": 20.834132310642378, |
| "grad_norm": 0.2548470199108124, |
| "learning_rate": 7.197781885318866e-06, |
| "loss": 0.0223, |
| "step": 21730 |
| }, |
| { |
| "epoch": 20.84372003835091, |
| "grad_norm": 0.42960646748542786, |
| "learning_rate": 7.16494411341726e-06, |
| "loss": 0.0265, |
| "step": 21740 |
| }, |
| { |
| "epoch": 20.853307766059444, |
| "grad_norm": 0.1879805475473404, |
| "learning_rate": 7.132175637029293e-06, |
| "loss": 0.0225, |
| "step": 21750 |
| }, |
| { |
| "epoch": 20.862895493767976, |
| "grad_norm": 0.29146450757980347, |
| "learning_rate": 7.099476509165459e-06, |
| "loss": 0.0254, |
| "step": 21760 |
| }, |
| { |
| "epoch": 20.87248322147651, |
| "grad_norm": 0.27178776264190674, |
| "learning_rate": 7.066846782724107e-06, |
| "loss": 0.0253, |
| "step": 21770 |
| }, |
| { |
| "epoch": 20.882070949185042, |
| "grad_norm": 0.1681770384311676, |
| "learning_rate": 7.034286510491278e-06, |
| "loss": 0.02, |
| "step": 21780 |
| }, |
| { |
| "epoch": 20.891658676893577, |
| "grad_norm": 0.17788025736808777, |
| "learning_rate": 7.001795745140683e-06, |
| "loss": 0.0265, |
| "step": 21790 |
| }, |
| { |
| "epoch": 20.90124640460211, |
| "grad_norm": 0.29857704043388367, |
| "learning_rate": 6.969374539233553e-06, |
| "loss": 0.0193, |
| "step": 21800 |
| }, |
| { |
| "epoch": 20.910834132310644, |
| "grad_norm": 0.3122943937778473, |
| "learning_rate": 6.937022945218647e-06, |
| "loss": 0.0252, |
| "step": 21810 |
| }, |
| { |
| "epoch": 20.920421860019175, |
| "grad_norm": 0.22873246669769287, |
| "learning_rate": 6.904741015432059e-06, |
| "loss": 0.0292, |
| "step": 21820 |
| }, |
| { |
| "epoch": 20.93000958772771, |
| "grad_norm": 0.23916493356227875, |
| "learning_rate": 6.872528802097211e-06, |
| "loss": 0.0224, |
| "step": 21830 |
| }, |
| { |
| "epoch": 20.93959731543624, |
| "grad_norm": 0.16214150190353394, |
| "learning_rate": 6.84038635732473e-06, |
| "loss": 0.0225, |
| "step": 21840 |
| }, |
| { |
| "epoch": 20.949185043144773, |
| "grad_norm": 0.2523308992385864, |
| "learning_rate": 6.808313733112387e-06, |
| "loss": 0.0237, |
| "step": 21850 |
| }, |
| { |
| "epoch": 20.958772770853308, |
| "grad_norm": 0.1933407187461853, |
| "learning_rate": 6.776310981344996e-06, |
| "loss": 0.021, |
| "step": 21860 |
| }, |
| { |
| "epoch": 20.96836049856184, |
| "grad_norm": 0.1911810338497162, |
| "learning_rate": 6.744378153794334e-06, |
| "loss": 0.0242, |
| "step": 21870 |
| }, |
| { |
| "epoch": 20.977948226270374, |
| "grad_norm": 0.2139926254749298, |
| "learning_rate": 6.712515302119077e-06, |
| "loss": 0.021, |
| "step": 21880 |
| }, |
| { |
| "epoch": 20.987535953978906, |
| "grad_norm": 0.3361279368400574, |
| "learning_rate": 6.680722477864665e-06, |
| "loss": 0.0263, |
| "step": 21890 |
| }, |
| { |
| "epoch": 20.99712368168744, |
| "grad_norm": 0.14909407496452332, |
| "learning_rate": 6.648999732463284e-06, |
| "loss": 0.0214, |
| "step": 21900 |
| }, |
| { |
| "epoch": 21.006711409395972, |
| "grad_norm": 0.318256139755249, |
| "learning_rate": 6.617347117233735e-06, |
| "loss": 0.0296, |
| "step": 21910 |
| }, |
| { |
| "epoch": 21.016299137104507, |
| "grad_norm": 0.15366911888122559, |
| "learning_rate": 6.585764683381379e-06, |
| "loss": 0.0262, |
| "step": 21920 |
| }, |
| { |
| "epoch": 21.02588686481304, |
| "grad_norm": 0.3859999179840088, |
| "learning_rate": 6.554252481998035e-06, |
| "loss": 0.0229, |
| "step": 21930 |
| }, |
| { |
| "epoch": 21.035474592521574, |
| "grad_norm": 0.22637765109539032, |
| "learning_rate": 6.522810564061899e-06, |
| "loss": 0.0284, |
| "step": 21940 |
| }, |
| { |
| "epoch": 21.045062320230105, |
| "grad_norm": 0.2992878556251526, |
| "learning_rate": 6.491438980437475e-06, |
| "loss": 0.0254, |
| "step": 21950 |
| }, |
| { |
| "epoch": 21.05465004793864, |
| "grad_norm": 0.2881068289279938, |
| "learning_rate": 6.460137781875497e-06, |
| "loss": 0.029, |
| "step": 21960 |
| }, |
| { |
| "epoch": 21.06423777564717, |
| "grad_norm": 0.19176606833934784, |
| "learning_rate": 6.4289070190128196e-06, |
| "loss": 0.0232, |
| "step": 21970 |
| }, |
| { |
| "epoch": 21.073825503355703, |
| "grad_norm": 0.1914961189031601, |
| "learning_rate": 6.3977467423723516e-06, |
| "loss": 0.0245, |
| "step": 21980 |
| }, |
| { |
| "epoch": 21.083413231064238, |
| "grad_norm": 0.31313207745552063, |
| "learning_rate": 6.366657002362975e-06, |
| "loss": 0.0296, |
| "step": 21990 |
| }, |
| { |
| "epoch": 21.09300095877277, |
| "grad_norm": 0.2076486051082611, |
| "learning_rate": 6.335637849279464e-06, |
| "loss": 0.0236, |
| "step": 22000 |
| }, |
| { |
| "epoch": 21.102588686481305, |
| "grad_norm": 0.27381840348243713, |
| "learning_rate": 6.304689333302416e-06, |
| "loss": 0.027, |
| "step": 22010 |
| }, |
| { |
| "epoch": 21.112176414189836, |
| "grad_norm": 0.24494417011737823, |
| "learning_rate": 6.2738115044981225e-06, |
| "loss": 0.0248, |
| "step": 22020 |
| }, |
| { |
| "epoch": 21.12176414189837, |
| "grad_norm": 0.18255186080932617, |
| "learning_rate": 6.24300441281856e-06, |
| "loss": 0.0209, |
| "step": 22030 |
| }, |
| { |
| "epoch": 21.131351869606902, |
| "grad_norm": 0.2896704077720642, |
| "learning_rate": 6.212268108101249e-06, |
| "loss": 0.0254, |
| "step": 22040 |
| }, |
| { |
| "epoch": 21.140939597315437, |
| "grad_norm": 0.19372302293777466, |
| "learning_rate": 6.1816026400692006e-06, |
| "loss": 0.0275, |
| "step": 22050 |
| }, |
| { |
| "epoch": 21.15052732502397, |
| "grad_norm": 0.19685117900371552, |
| "learning_rate": 6.151008058330832e-06, |
| "loss": 0.0244, |
| "step": 22060 |
| }, |
| { |
| "epoch": 21.160115052732504, |
| "grad_norm": 0.18076103925704956, |
| "learning_rate": 6.120484412379896e-06, |
| "loss": 0.0191, |
| "step": 22070 |
| }, |
| { |
| "epoch": 21.169702780441035, |
| "grad_norm": 0.37527182698249817, |
| "learning_rate": 6.090031751595371e-06, |
| "loss": 0.029, |
| "step": 22080 |
| }, |
| { |
| "epoch": 21.179290508149567, |
| "grad_norm": 0.1315630078315735, |
| "learning_rate": 6.059650125241412e-06, |
| "loss": 0.0222, |
| "step": 22090 |
| }, |
| { |
| "epoch": 21.188878235858102, |
| "grad_norm": 0.15893638134002686, |
| "learning_rate": 6.029339582467253e-06, |
| "loss": 0.0204, |
| "step": 22100 |
| }, |
| { |
| "epoch": 21.198465963566633, |
| "grad_norm": 0.27391794323921204, |
| "learning_rate": 5.999100172307154e-06, |
| "loss": 0.0251, |
| "step": 22110 |
| }, |
| { |
| "epoch": 21.20805369127517, |
| "grad_norm": 0.1774057298898697, |
| "learning_rate": 5.968931943680284e-06, |
| "loss": 0.0214, |
| "step": 22120 |
| }, |
| { |
| "epoch": 21.2176414189837, |
| "grad_norm": 0.18632689118385315, |
| "learning_rate": 5.938834945390653e-06, |
| "loss": 0.0222, |
| "step": 22130 |
| }, |
| { |
| "epoch": 21.227229146692235, |
| "grad_norm": 0.19212083518505096, |
| "learning_rate": 5.908809226127054e-06, |
| "loss": 0.0233, |
| "step": 22140 |
| }, |
| { |
| "epoch": 21.236816874400766, |
| "grad_norm": 0.1936277598142624, |
| "learning_rate": 5.878854834462977e-06, |
| "loss": 0.0188, |
| "step": 22150 |
| }, |
| { |
| "epoch": 21.2464046021093, |
| "grad_norm": 0.23681025207042694, |
| "learning_rate": 5.848971818856486e-06, |
| "loss": 0.0231, |
| "step": 22160 |
| }, |
| { |
| "epoch": 21.255992329817833, |
| "grad_norm": 0.13978900015354156, |
| "learning_rate": 5.819160227650216e-06, |
| "loss": 0.0176, |
| "step": 22170 |
| }, |
| { |
| "epoch": 21.265580057526368, |
| "grad_norm": 0.20834662020206451, |
| "learning_rate": 5.789420109071242e-06, |
| "loss": 0.0256, |
| "step": 22180 |
| }, |
| { |
| "epoch": 21.2751677852349, |
| "grad_norm": 0.15531818568706512, |
| "learning_rate": 5.759751511231021e-06, |
| "loss": 0.0237, |
| "step": 22190 |
| }, |
| { |
| "epoch": 21.284755512943434, |
| "grad_norm": 0.38519012928009033, |
| "learning_rate": 5.7301544821253054e-06, |
| "loss": 0.0213, |
| "step": 22200 |
| }, |
| { |
| "epoch": 21.294343240651965, |
| "grad_norm": 0.17564308643341064, |
| "learning_rate": 5.700629069634061e-06, |
| "loss": 0.0224, |
| "step": 22210 |
| }, |
| { |
| "epoch": 21.303930968360497, |
| "grad_norm": 0.21635904908180237, |
| "learning_rate": 5.67117532152141e-06, |
| "loss": 0.0231, |
| "step": 22220 |
| }, |
| { |
| "epoch": 21.313518696069032, |
| "grad_norm": 0.2563495934009552, |
| "learning_rate": 5.641793285435537e-06, |
| "loss": 0.03, |
| "step": 22230 |
| }, |
| { |
| "epoch": 21.323106423777563, |
| "grad_norm": 0.1372513771057129, |
| "learning_rate": 5.612483008908609e-06, |
| "loss": 0.0205, |
| "step": 22240 |
| }, |
| { |
| "epoch": 21.3326941514861, |
| "grad_norm": 0.4297633767127991, |
| "learning_rate": 5.583244539356719e-06, |
| "loss": 0.0283, |
| "step": 22250 |
| }, |
| { |
| "epoch": 21.34228187919463, |
| "grad_norm": 0.18425339460372925, |
| "learning_rate": 5.554077924079776e-06, |
| "loss": 0.0254, |
| "step": 22260 |
| }, |
| { |
| "epoch": 21.351869606903165, |
| "grad_norm": 0.24806487560272217, |
| "learning_rate": 5.524983210261481e-06, |
| "loss": 0.0186, |
| "step": 22270 |
| }, |
| { |
| "epoch": 21.361457334611696, |
| "grad_norm": 0.12550103664398193, |
| "learning_rate": 5.495960444969189e-06, |
| "loss": 0.0221, |
| "step": 22280 |
| }, |
| { |
| "epoch": 21.37104506232023, |
| "grad_norm": 0.40927961468696594, |
| "learning_rate": 5.467009675153861e-06, |
| "loss": 0.0215, |
| "step": 22290 |
| }, |
| { |
| "epoch": 21.380632790028763, |
| "grad_norm": 0.3342265188694, |
| "learning_rate": 5.438130947650006e-06, |
| "loss": 0.0274, |
| "step": 22300 |
| }, |
| { |
| "epoch": 21.390220517737298, |
| "grad_norm": 0.23332703113555908, |
| "learning_rate": 5.409324309175573e-06, |
| "loss": 0.0213, |
| "step": 22310 |
| }, |
| { |
| "epoch": 21.39980824544583, |
| "grad_norm": 0.6654828786849976, |
| "learning_rate": 5.380589806331904e-06, |
| "loss": 0.0305, |
| "step": 22320 |
| }, |
| { |
| "epoch": 21.409395973154364, |
| "grad_norm": 0.18974971771240234, |
| "learning_rate": 5.3519274856036414e-06, |
| "loss": 0.0255, |
| "step": 22330 |
| }, |
| { |
| "epoch": 21.418983700862896, |
| "grad_norm": 0.2501462697982788, |
| "learning_rate": 5.3233373933586405e-06, |
| "loss": 0.0273, |
| "step": 22340 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 0.205572247505188, |
| "learning_rate": 5.294819575847937e-06, |
| "loss": 0.0234, |
| "step": 22350 |
| }, |
| { |
| "epoch": 21.438159156279962, |
| "grad_norm": 0.2879716157913208, |
| "learning_rate": 5.266374079205627e-06, |
| "loss": 0.0218, |
| "step": 22360 |
| }, |
| { |
| "epoch": 21.447746883988493, |
| "grad_norm": 0.2402382642030716, |
| "learning_rate": 5.238000949448818e-06, |
| "loss": 0.0261, |
| "step": 22370 |
| }, |
| { |
| "epoch": 21.45733461169703, |
| "grad_norm": 0.23849613964557648, |
| "learning_rate": 5.209700232477543e-06, |
| "loss": 0.0244, |
| "step": 22380 |
| }, |
| { |
| "epoch": 21.46692233940556, |
| "grad_norm": 0.21024039387702942, |
| "learning_rate": 5.181471974074692e-06, |
| "loss": 0.0229, |
| "step": 22390 |
| }, |
| { |
| "epoch": 21.476510067114095, |
| "grad_norm": 0.3411503732204437, |
| "learning_rate": 5.153316219905946e-06, |
| "loss": 0.0259, |
| "step": 22400 |
| }, |
| { |
| "epoch": 21.486097794822626, |
| "grad_norm": 0.22467151284217834, |
| "learning_rate": 5.1252330155196756e-06, |
| "loss": 0.0234, |
| "step": 22410 |
| }, |
| { |
| "epoch": 21.49568552253116, |
| "grad_norm": 0.29987016320228577, |
| "learning_rate": 5.097222406346908e-06, |
| "loss": 0.0273, |
| "step": 22420 |
| }, |
| { |
| "epoch": 21.505273250239693, |
| "grad_norm": 0.26795509457588196, |
| "learning_rate": 5.06928443770121e-06, |
| "loss": 0.0217, |
| "step": 22430 |
| }, |
| { |
| "epoch": 21.514860977948228, |
| "grad_norm": 0.1902526170015335, |
| "learning_rate": 5.041419154778648e-06, |
| "loss": 0.0237, |
| "step": 22440 |
| }, |
| { |
| "epoch": 21.52444870565676, |
| "grad_norm": 0.27109450101852417, |
| "learning_rate": 5.0136266026577e-06, |
| "loss": 0.0239, |
| "step": 22450 |
| }, |
| { |
| "epoch": 21.53403643336529, |
| "grad_norm": 0.3726276457309723, |
| "learning_rate": 4.9859068262991805e-06, |
| "loss": 0.0297, |
| "step": 22460 |
| }, |
| { |
| "epoch": 21.543624161073826, |
| "grad_norm": 0.19683849811553955, |
| "learning_rate": 4.958259870546178e-06, |
| "loss": 0.0246, |
| "step": 22470 |
| }, |
| { |
| "epoch": 21.553211888782357, |
| "grad_norm": 0.2269980013370514, |
| "learning_rate": 4.930685780123978e-06, |
| "loss": 0.0209, |
| "step": 22480 |
| }, |
| { |
| "epoch": 21.562799616490892, |
| "grad_norm": 0.19175530970096588, |
| "learning_rate": 4.903184599639987e-06, |
| "loss": 0.0231, |
| "step": 22490 |
| }, |
| { |
| "epoch": 21.572387344199424, |
| "grad_norm": 0.14310085773468018, |
| "learning_rate": 4.875756373583662e-06, |
| "loss": 0.0313, |
| "step": 22500 |
| }, |
| { |
| "epoch": 21.58197507190796, |
| "grad_norm": 0.18566519021987915, |
| "learning_rate": 4.848401146326442e-06, |
| "loss": 0.0204, |
| "step": 22510 |
| }, |
| { |
| "epoch": 21.59156279961649, |
| "grad_norm": 0.1244194358587265, |
| "learning_rate": 4.821118962121668e-06, |
| "loss": 0.022, |
| "step": 22520 |
| }, |
| { |
| "epoch": 21.601150527325025, |
| "grad_norm": 0.3905356228351593, |
| "learning_rate": 4.7939098651045235e-06, |
| "loss": 0.0243, |
| "step": 22530 |
| }, |
| { |
| "epoch": 21.610738255033556, |
| "grad_norm": 0.19283372163772583, |
| "learning_rate": 4.76677389929196e-06, |
| "loss": 0.0224, |
| "step": 22540 |
| }, |
| { |
| "epoch": 21.62032598274209, |
| "grad_norm": 0.27373161911964417, |
| "learning_rate": 4.739711108582612e-06, |
| "loss": 0.0234, |
| "step": 22550 |
| }, |
| { |
| "epoch": 21.629913710450623, |
| "grad_norm": 0.23337620496749878, |
| "learning_rate": 4.712721536756743e-06, |
| "loss": 0.0185, |
| "step": 22560 |
| }, |
| { |
| "epoch": 21.639501438159158, |
| "grad_norm": 0.22057722508907318, |
| "learning_rate": 4.685805227476164e-06, |
| "loss": 0.019, |
| "step": 22570 |
| }, |
| { |
| "epoch": 21.64908916586769, |
| "grad_norm": 0.18951620161533356, |
| "learning_rate": 4.65896222428418e-06, |
| "loss": 0.0239, |
| "step": 22580 |
| }, |
| { |
| "epoch": 21.65867689357622, |
| "grad_norm": 0.19423332810401917, |
| "learning_rate": 4.632192570605481e-06, |
| "loss": 0.024, |
| "step": 22590 |
| }, |
| { |
| "epoch": 21.668264621284756, |
| "grad_norm": 0.21294209361076355, |
| "learning_rate": 4.605496309746127e-06, |
| "loss": 0.0248, |
| "step": 22600 |
| }, |
| { |
| "epoch": 21.677852348993287, |
| "grad_norm": 0.18906791508197784, |
| "learning_rate": 4.578873484893431e-06, |
| "loss": 0.0173, |
| "step": 22610 |
| }, |
| { |
| "epoch": 21.687440076701822, |
| "grad_norm": 0.24385139346122742, |
| "learning_rate": 4.552324139115905e-06, |
| "loss": 0.026, |
| "step": 22620 |
| }, |
| { |
| "epoch": 21.697027804410354, |
| "grad_norm": 0.14667080342769623, |
| "learning_rate": 4.525848315363196e-06, |
| "loss": 0.0178, |
| "step": 22630 |
| }, |
| { |
| "epoch": 21.70661553211889, |
| "grad_norm": 0.15309500694274902, |
| "learning_rate": 4.499446056466022e-06, |
| "loss": 0.0225, |
| "step": 22640 |
| }, |
| { |
| "epoch": 21.71620325982742, |
| "grad_norm": 0.7658588886260986, |
| "learning_rate": 4.473117405136073e-06, |
| "loss": 0.025, |
| "step": 22650 |
| }, |
| { |
| "epoch": 21.725790987535955, |
| "grad_norm": 0.27880871295928955, |
| "learning_rate": 4.446862403965984e-06, |
| "loss": 0.0179, |
| "step": 22660 |
| }, |
| { |
| "epoch": 21.735378715244487, |
| "grad_norm": 0.24886669218540192, |
| "learning_rate": 4.420681095429219e-06, |
| "loss": 0.0261, |
| "step": 22670 |
| }, |
| { |
| "epoch": 21.74496644295302, |
| "grad_norm": 0.21482285857200623, |
| "learning_rate": 4.394573521880052e-06, |
| "loss": 0.019, |
| "step": 22680 |
| }, |
| { |
| "epoch": 21.754554170661553, |
| "grad_norm": 0.2901485860347748, |
| "learning_rate": 4.368539725553461e-06, |
| "loss": 0.0284, |
| "step": 22690 |
| }, |
| { |
| "epoch": 21.764141898370085, |
| "grad_norm": 0.2559397518634796, |
| "learning_rate": 4.342579748565068e-06, |
| "loss": 0.0241, |
| "step": 22700 |
| }, |
| { |
| "epoch": 21.77372962607862, |
| "grad_norm": 0.24078018963336945, |
| "learning_rate": 4.316693632911089e-06, |
| "loss": 0.0234, |
| "step": 22710 |
| }, |
| { |
| "epoch": 21.78331735378715, |
| "grad_norm": 0.23349763453006744, |
| "learning_rate": 4.2908814204682405e-06, |
| "loss": 0.0204, |
| "step": 22720 |
| }, |
| { |
| "epoch": 21.792905081495686, |
| "grad_norm": 0.2100505232810974, |
| "learning_rate": 4.265143152993695e-06, |
| "loss": 0.0271, |
| "step": 22730 |
| }, |
| { |
| "epoch": 21.802492809204217, |
| "grad_norm": 0.41341936588287354, |
| "learning_rate": 4.23947887212498e-06, |
| "loss": 0.0319, |
| "step": 22740 |
| }, |
| { |
| "epoch": 21.812080536912752, |
| "grad_norm": 0.17779189348220825, |
| "learning_rate": 4.213888619379963e-06, |
| "loss": 0.0163, |
| "step": 22750 |
| }, |
| { |
| "epoch": 21.821668264621284, |
| "grad_norm": 0.15619170665740967, |
| "learning_rate": 4.188372436156734e-06, |
| "loss": 0.0233, |
| "step": 22760 |
| }, |
| { |
| "epoch": 21.83125599232982, |
| "grad_norm": 0.22723935544490814, |
| "learning_rate": 4.162930363733558e-06, |
| "loss": 0.0236, |
| "step": 22770 |
| }, |
| { |
| "epoch": 21.84084372003835, |
| "grad_norm": 0.16326063871383667, |
| "learning_rate": 4.137562443268822e-06, |
| "loss": 0.0195, |
| "step": 22780 |
| }, |
| { |
| "epoch": 21.850431447746885, |
| "grad_norm": 0.2659025490283966, |
| "learning_rate": 4.112268715800943e-06, |
| "loss": 0.0242, |
| "step": 22790 |
| }, |
| { |
| "epoch": 21.860019175455417, |
| "grad_norm": 0.22254447638988495, |
| "learning_rate": 4.087049222248324e-06, |
| "loss": 0.0217, |
| "step": 22800 |
| }, |
| { |
| "epoch": 21.86960690316395, |
| "grad_norm": 0.19777144491672516, |
| "learning_rate": 4.061904003409261e-06, |
| "loss": 0.0195, |
| "step": 22810 |
| }, |
| { |
| "epoch": 21.879194630872483, |
| "grad_norm": 0.251908540725708, |
| "learning_rate": 4.036833099961912e-06, |
| "loss": 0.0232, |
| "step": 22820 |
| }, |
| { |
| "epoch": 21.888782358581015, |
| "grad_norm": 0.1795634925365448, |
| "learning_rate": 4.0118365524642095e-06, |
| "loss": 0.0274, |
| "step": 22830 |
| }, |
| { |
| "epoch": 21.89837008628955, |
| "grad_norm": 0.2238956242799759, |
| "learning_rate": 3.986914401353797e-06, |
| "loss": 0.0198, |
| "step": 22840 |
| }, |
| { |
| "epoch": 21.90795781399808, |
| "grad_norm": 0.13608454167842865, |
| "learning_rate": 3.96206668694794e-06, |
| "loss": 0.0206, |
| "step": 22850 |
| }, |
| { |
| "epoch": 21.917545541706616, |
| "grad_norm": 0.32671546936035156, |
| "learning_rate": 3.93729344944353e-06, |
| "loss": 0.0222, |
| "step": 22860 |
| }, |
| { |
| "epoch": 21.927133269415148, |
| "grad_norm": 0.15036197006702423, |
| "learning_rate": 3.912594728916929e-06, |
| "loss": 0.0259, |
| "step": 22870 |
| }, |
| { |
| "epoch": 21.936720997123683, |
| "grad_norm": 0.2388329952955246, |
| "learning_rate": 3.887970565324006e-06, |
| "loss": 0.0273, |
| "step": 22880 |
| }, |
| { |
| "epoch": 21.946308724832214, |
| "grad_norm": 0.2434564232826233, |
| "learning_rate": 3.8634209984999615e-06, |
| "loss": 0.018, |
| "step": 22890 |
| }, |
| { |
| "epoch": 21.95589645254075, |
| "grad_norm": 0.16361406445503235, |
| "learning_rate": 3.8389460681593545e-06, |
| "loss": 0.0264, |
| "step": 22900 |
| }, |
| { |
| "epoch": 21.96548418024928, |
| "grad_norm": 0.4144115149974823, |
| "learning_rate": 3.8145458138959865e-06, |
| "loss": 0.0238, |
| "step": 22910 |
| }, |
| { |
| "epoch": 21.975071907957815, |
| "grad_norm": 0.1729598045349121, |
| "learning_rate": 3.790220275182854e-06, |
| "loss": 0.0276, |
| "step": 22920 |
| }, |
| { |
| "epoch": 21.984659635666347, |
| "grad_norm": 0.2305474728345871, |
| "learning_rate": 3.7659694913720956e-06, |
| "loss": 0.0209, |
| "step": 22930 |
| }, |
| { |
| "epoch": 21.994247363374882, |
| "grad_norm": 0.11657913029193878, |
| "learning_rate": 3.741793501694901e-06, |
| "loss": 0.0214, |
| "step": 22940 |
| }, |
| { |
| "epoch": 22.003835091083413, |
| "grad_norm": 0.15219245851039886, |
| "learning_rate": 3.71769234526147e-06, |
| "loss": 0.0187, |
| "step": 22950 |
| }, |
| { |
| "epoch": 22.013422818791945, |
| "grad_norm": 0.2325374186038971, |
| "learning_rate": 3.6936660610609465e-06, |
| "loss": 0.0256, |
| "step": 22960 |
| }, |
| { |
| "epoch": 22.02301054650048, |
| "grad_norm": 0.15524373948574066, |
| "learning_rate": 3.6697146879613564e-06, |
| "loss": 0.0209, |
| "step": 22970 |
| }, |
| { |
| "epoch": 22.03259827420901, |
| "grad_norm": 0.2640591263771057, |
| "learning_rate": 3.645838264709517e-06, |
| "loss": 0.0192, |
| "step": 22980 |
| }, |
| { |
| "epoch": 22.042186001917546, |
| "grad_norm": 0.15865999460220337, |
| "learning_rate": 3.6220368299310136e-06, |
| "loss": 0.0207, |
| "step": 22990 |
| }, |
| { |
| "epoch": 22.051773729626078, |
| "grad_norm": 0.20118165016174316, |
| "learning_rate": 3.5983104221301244e-06, |
| "loss": 0.0217, |
| "step": 23000 |
| }, |
| { |
| "epoch": 22.061361457334613, |
| "grad_norm": 0.40763455629348755, |
| "learning_rate": 3.5746590796897404e-06, |
| "loss": 0.0239, |
| "step": 23010 |
| }, |
| { |
| "epoch": 22.070949185043144, |
| "grad_norm": 0.18958386778831482, |
| "learning_rate": 3.551082840871328e-06, |
| "loss": 0.025, |
| "step": 23020 |
| }, |
| { |
| "epoch": 22.08053691275168, |
| "grad_norm": 0.2477806657552719, |
| "learning_rate": 3.5275817438148616e-06, |
| "loss": 0.0189, |
| "step": 23030 |
| }, |
| { |
| "epoch": 22.09012464046021, |
| "grad_norm": 0.1568249762058258, |
| "learning_rate": 3.504155826538741e-06, |
| "loss": 0.0222, |
| "step": 23040 |
| }, |
| { |
| "epoch": 22.099712368168746, |
| "grad_norm": 0.2600797116756439, |
| "learning_rate": 3.4808051269397512e-06, |
| "loss": 0.0238, |
| "step": 23050 |
| }, |
| { |
| "epoch": 22.109300095877277, |
| "grad_norm": 0.18155524134635925, |
| "learning_rate": 3.457529682793004e-06, |
| "loss": 0.022, |
| "step": 23060 |
| }, |
| { |
| "epoch": 22.11888782358581, |
| "grad_norm": 0.1558566689491272, |
| "learning_rate": 3.4343295317518565e-06, |
| "loss": 0.0225, |
| "step": 23070 |
| }, |
| { |
| "epoch": 22.128475551294343, |
| "grad_norm": 0.23820623755455017, |
| "learning_rate": 3.4112047113478653e-06, |
| "loss": 0.0242, |
| "step": 23080 |
| }, |
| { |
| "epoch": 22.138063279002875, |
| "grad_norm": 0.1538003832101822, |
| "learning_rate": 3.3881552589907216e-06, |
| "loss": 0.0214, |
| "step": 23090 |
| }, |
| { |
| "epoch": 22.14765100671141, |
| "grad_norm": 0.21073570847511292, |
| "learning_rate": 3.36518121196821e-06, |
| "loss": 0.0243, |
| "step": 23100 |
| }, |
| { |
| "epoch": 22.15723873441994, |
| "grad_norm": 0.1772642582654953, |
| "learning_rate": 3.34228260744609e-06, |
| "loss": 0.0228, |
| "step": 23110 |
| }, |
| { |
| "epoch": 22.166826462128476, |
| "grad_norm": 0.18608751893043518, |
| "learning_rate": 3.3194594824681123e-06, |
| "loss": 0.0229, |
| "step": 23120 |
| }, |
| { |
| "epoch": 22.176414189837008, |
| "grad_norm": 0.14585159718990326, |
| "learning_rate": 3.2967118739559045e-06, |
| "loss": 0.0216, |
| "step": 23130 |
| }, |
| { |
| "epoch": 22.186001917545543, |
| "grad_norm": 0.18371617794036865, |
| "learning_rate": 3.2740398187089405e-06, |
| "loss": 0.021, |
| "step": 23140 |
| }, |
| { |
| "epoch": 22.195589645254074, |
| "grad_norm": 0.11511317640542984, |
| "learning_rate": 3.2514433534044544e-06, |
| "loss": 0.023, |
| "step": 23150 |
| }, |
| { |
| "epoch": 22.20517737296261, |
| "grad_norm": 0.22962027788162231, |
| "learning_rate": 3.2289225145974046e-06, |
| "loss": 0.0187, |
| "step": 23160 |
| }, |
| { |
| "epoch": 22.21476510067114, |
| "grad_norm": 0.1875505894422531, |
| "learning_rate": 3.2064773387203984e-06, |
| "loss": 0.0298, |
| "step": 23170 |
| }, |
| { |
| "epoch": 22.224352828379676, |
| "grad_norm": 0.1442171037197113, |
| "learning_rate": 3.1841078620836683e-06, |
| "loss": 0.0231, |
| "step": 23180 |
| }, |
| { |
| "epoch": 22.233940556088207, |
| "grad_norm": 0.22087924182415009, |
| "learning_rate": 3.1618141208749617e-06, |
| "loss": 0.0229, |
| "step": 23190 |
| }, |
| { |
| "epoch": 22.24352828379674, |
| "grad_norm": 0.18746836483478546, |
| "learning_rate": 3.139596151159502e-06, |
| "loss": 0.0197, |
| "step": 23200 |
| }, |
| { |
| "epoch": 22.253116011505274, |
| "grad_norm": 0.23875057697296143, |
| "learning_rate": 3.1174539888799425e-06, |
| "loss": 0.0202, |
| "step": 23210 |
| }, |
| { |
| "epoch": 22.262703739213805, |
| "grad_norm": 0.3558003902435303, |
| "learning_rate": 3.0953876698563144e-06, |
| "loss": 0.0209, |
| "step": 23220 |
| }, |
| { |
| "epoch": 22.27229146692234, |
| "grad_norm": 0.32513800263404846, |
| "learning_rate": 3.0733972297859294e-06, |
| "loss": 0.0306, |
| "step": 23230 |
| }, |
| { |
| "epoch": 22.28187919463087, |
| "grad_norm": 0.14356492459774017, |
| "learning_rate": 3.0514827042433804e-06, |
| "loss": 0.0263, |
| "step": 23240 |
| }, |
| { |
| "epoch": 22.291466922339406, |
| "grad_norm": 0.28215450048446655, |
| "learning_rate": 3.029644128680409e-06, |
| "loss": 0.0266, |
| "step": 23250 |
| }, |
| { |
| "epoch": 22.301054650047938, |
| "grad_norm": 0.27818936109542847, |
| "learning_rate": 3.0078815384259163e-06, |
| "loss": 0.0242, |
| "step": 23260 |
| }, |
| { |
| "epoch": 22.310642377756473, |
| "grad_norm": 0.21585923433303833, |
| "learning_rate": 2.9861949686858903e-06, |
| "loss": 0.0225, |
| "step": 23270 |
| }, |
| { |
| "epoch": 22.320230105465004, |
| "grad_norm": 0.2902468144893646, |
| "learning_rate": 2.964584454543312e-06, |
| "loss": 0.0302, |
| "step": 23280 |
| }, |
| { |
| "epoch": 22.32981783317354, |
| "grad_norm": 0.15972809493541718, |
| "learning_rate": 2.9430500309581387e-06, |
| "loss": 0.0265, |
| "step": 23290 |
| }, |
| { |
| "epoch": 22.33940556088207, |
| "grad_norm": 0.29737722873687744, |
| "learning_rate": 2.9215917327672426e-06, |
| "loss": 0.0187, |
| "step": 23300 |
| }, |
| { |
| "epoch": 22.348993288590606, |
| "grad_norm": 0.24347999691963196, |
| "learning_rate": 2.9002095946843277e-06, |
| "loss": 0.0233, |
| "step": 23310 |
| }, |
| { |
| "epoch": 22.358581016299137, |
| "grad_norm": 0.38935643434524536, |
| "learning_rate": 2.878903651299891e-06, |
| "loss": 0.0225, |
| "step": 23320 |
| }, |
| { |
| "epoch": 22.36816874400767, |
| "grad_norm": 0.32759687304496765, |
| "learning_rate": 2.8576739370811957e-06, |
| "loss": 0.0218, |
| "step": 23330 |
| }, |
| { |
| "epoch": 22.377756471716204, |
| "grad_norm": 0.11117486655712128, |
| "learning_rate": 2.8365204863721573e-06, |
| "loss": 0.023, |
| "step": 23340 |
| }, |
| { |
| "epoch": 22.387344199424735, |
| "grad_norm": 0.2842784523963928, |
| "learning_rate": 2.815443333393325e-06, |
| "loss": 0.0238, |
| "step": 23350 |
| }, |
| { |
| "epoch": 22.39693192713327, |
| "grad_norm": 0.23901750147342682, |
| "learning_rate": 2.794442512241824e-06, |
| "loss": 0.0237, |
| "step": 23360 |
| }, |
| { |
| "epoch": 22.4065196548418, |
| "grad_norm": 0.23337188363075256, |
| "learning_rate": 2.7735180568912943e-06, |
| "loss": 0.0188, |
| "step": 23370 |
| }, |
| { |
| "epoch": 22.416107382550337, |
| "grad_norm": 0.2736450433731079, |
| "learning_rate": 2.7526700011918316e-06, |
| "loss": 0.0247, |
| "step": 23380 |
| }, |
| { |
| "epoch": 22.425695110258868, |
| "grad_norm": 0.13441747426986694, |
| "learning_rate": 2.731898378869935e-06, |
| "loss": 0.0229, |
| "step": 23390 |
| }, |
| { |
| "epoch": 22.435282837967403, |
| "grad_norm": 0.1925612986087799, |
| "learning_rate": 2.7112032235284744e-06, |
| "loss": 0.0268, |
| "step": 23400 |
| }, |
| { |
| "epoch": 22.444870565675934, |
| "grad_norm": 0.2339990884065628, |
| "learning_rate": 2.6905845686465924e-06, |
| "loss": 0.0239, |
| "step": 23410 |
| }, |
| { |
| "epoch": 22.45445829338447, |
| "grad_norm": 0.17596887052059174, |
| "learning_rate": 2.6700424475796905e-06, |
| "loss": 0.0222, |
| "step": 23420 |
| }, |
| { |
| "epoch": 22.464046021093, |
| "grad_norm": 0.3486236035823822, |
| "learning_rate": 2.6495768935593525e-06, |
| "loss": 0.0222, |
| "step": 23430 |
| }, |
| { |
| "epoch": 22.473633748801532, |
| "grad_norm": 0.290425181388855, |
| "learning_rate": 2.6291879396933004e-06, |
| "loss": 0.0259, |
| "step": 23440 |
| }, |
| { |
| "epoch": 22.483221476510067, |
| "grad_norm": 0.18840055167675018, |
| "learning_rate": 2.6088756189653397e-06, |
| "loss": 0.0291, |
| "step": 23450 |
| }, |
| { |
| "epoch": 22.4928092042186, |
| "grad_norm": 0.2448890209197998, |
| "learning_rate": 2.588639964235301e-06, |
| "loss": 0.0244, |
| "step": 23460 |
| }, |
| { |
| "epoch": 22.502396931927134, |
| "grad_norm": 0.49676454067230225, |
| "learning_rate": 2.568481008238982e-06, |
| "loss": 0.0248, |
| "step": 23470 |
| }, |
| { |
| "epoch": 22.511984659635665, |
| "grad_norm": 0.15404298901557922, |
| "learning_rate": 2.5483987835881127e-06, |
| "loss": 0.0215, |
| "step": 23480 |
| }, |
| { |
| "epoch": 22.5215723873442, |
| "grad_norm": 0.13425439596176147, |
| "learning_rate": 2.528393322770306e-06, |
| "loss": 0.0236, |
| "step": 23490 |
| }, |
| { |
| "epoch": 22.53116011505273, |
| "grad_norm": 0.23426268994808197, |
| "learning_rate": 2.508464658148968e-06, |
| "loss": 0.0218, |
| "step": 23500 |
| }, |
| { |
| "epoch": 22.540747842761267, |
| "grad_norm": 0.21721801161766052, |
| "learning_rate": 2.488612821963271e-06, |
| "loss": 0.0229, |
| "step": 23510 |
| }, |
| { |
| "epoch": 22.550335570469798, |
| "grad_norm": 0.2656913697719574, |
| "learning_rate": 2.4688378463281146e-06, |
| "loss": 0.0248, |
| "step": 23520 |
| }, |
| { |
| "epoch": 22.559923298178333, |
| "grad_norm": 0.15874969959259033, |
| "learning_rate": 2.4491397632340487e-06, |
| "loss": 0.0244, |
| "step": 23530 |
| }, |
| { |
| "epoch": 22.569511025886865, |
| "grad_norm": 0.26198479533195496, |
| "learning_rate": 2.429518604547232e-06, |
| "loss": 0.0248, |
| "step": 23540 |
| }, |
| { |
| "epoch": 22.5790987535954, |
| "grad_norm": 0.2117781937122345, |
| "learning_rate": 2.409974402009385e-06, |
| "loss": 0.0177, |
| "step": 23550 |
| }, |
| { |
| "epoch": 22.58868648130393, |
| "grad_norm": 0.3583064675331116, |
| "learning_rate": 2.390507187237734e-06, |
| "loss": 0.0242, |
| "step": 23560 |
| }, |
| { |
| "epoch": 22.598274209012462, |
| "grad_norm": 0.26825496554374695, |
| "learning_rate": 2.371116991724953e-06, |
| "loss": 0.0242, |
| "step": 23570 |
| }, |
| { |
| "epoch": 22.607861936720997, |
| "grad_norm": 0.2770189344882965, |
| "learning_rate": 2.3518038468391236e-06, |
| "loss": 0.0198, |
| "step": 23580 |
| }, |
| { |
| "epoch": 22.61744966442953, |
| "grad_norm": 0.13024599850177765, |
| "learning_rate": 2.332567783823686e-06, |
| "loss": 0.0221, |
| "step": 23590 |
| }, |
| { |
| "epoch": 22.627037392138064, |
| "grad_norm": 0.16579675674438477, |
| "learning_rate": 2.313408833797376e-06, |
| "loss": 0.0198, |
| "step": 23600 |
| }, |
| { |
| "epoch": 22.636625119846595, |
| "grad_norm": 0.23456168174743652, |
| "learning_rate": 2.294327027754184e-06, |
| "loss": 0.0242, |
| "step": 23610 |
| }, |
| { |
| "epoch": 22.64621284755513, |
| "grad_norm": 0.2899184226989746, |
| "learning_rate": 2.275322396563301e-06, |
| "loss": 0.0268, |
| "step": 23620 |
| }, |
| { |
| "epoch": 22.65580057526366, |
| "grad_norm": 0.19349917769432068, |
| "learning_rate": 2.2563949709690725e-06, |
| "loss": 0.0224, |
| "step": 23630 |
| }, |
| { |
| "epoch": 22.665388302972197, |
| "grad_norm": 0.32430675625801086, |
| "learning_rate": 2.2375447815909388e-06, |
| "loss": 0.0241, |
| "step": 23640 |
| }, |
| { |
| "epoch": 22.674976030680728, |
| "grad_norm": 0.18584056198596954, |
| "learning_rate": 2.218771858923402e-06, |
| "loss": 0.0231, |
| "step": 23650 |
| }, |
| { |
| "epoch": 22.684563758389263, |
| "grad_norm": 0.21064673364162445, |
| "learning_rate": 2.2000762333359625e-06, |
| "loss": 0.0294, |
| "step": 23660 |
| }, |
| { |
| "epoch": 22.694151486097795, |
| "grad_norm": 0.2811007797718048, |
| "learning_rate": 2.1814579350730835e-06, |
| "loss": 0.023, |
| "step": 23670 |
| }, |
| { |
| "epoch": 22.70373921380633, |
| "grad_norm": 0.17581719160079956, |
| "learning_rate": 2.162916994254116e-06, |
| "loss": 0.0212, |
| "step": 23680 |
| }, |
| { |
| "epoch": 22.71332694151486, |
| "grad_norm": 0.22076162695884705, |
| "learning_rate": 2.1444534408732898e-06, |
| "loss": 0.026, |
| "step": 23690 |
| }, |
| { |
| "epoch": 22.722914669223393, |
| "grad_norm": 0.17099499702453613, |
| "learning_rate": 2.1260673047996227e-06, |
| "loss": 0.0231, |
| "step": 23700 |
| }, |
| { |
| "epoch": 22.732502396931928, |
| "grad_norm": 0.2862556576728821, |
| "learning_rate": 2.1077586157769e-06, |
| "loss": 0.0212, |
| "step": 23710 |
| }, |
| { |
| "epoch": 22.74209012464046, |
| "grad_norm": 0.257538378238678, |
| "learning_rate": 2.0895274034236245e-06, |
| "loss": 0.022, |
| "step": 23720 |
| }, |
| { |
| "epoch": 22.751677852348994, |
| "grad_norm": 0.12845216691493988, |
| "learning_rate": 2.071373697232959e-06, |
| "loss": 0.0332, |
| "step": 23730 |
| }, |
| { |
| "epoch": 22.761265580057525, |
| "grad_norm": 0.2115718573331833, |
| "learning_rate": 2.0532975265726786e-06, |
| "loss": 0.0295, |
| "step": 23740 |
| }, |
| { |
| "epoch": 22.77085330776606, |
| "grad_norm": 0.24508948624134064, |
| "learning_rate": 2.0352989206851303e-06, |
| "loss": 0.0219, |
| "step": 23750 |
| }, |
| { |
| "epoch": 22.780441035474592, |
| "grad_norm": 0.16549085080623627, |
| "learning_rate": 2.0173779086871735e-06, |
| "loss": 0.0228, |
| "step": 23760 |
| }, |
| { |
| "epoch": 22.790028763183127, |
| "grad_norm": 0.2713741362094879, |
| "learning_rate": 1.999534519570162e-06, |
| "loss": 0.0253, |
| "step": 23770 |
| }, |
| { |
| "epoch": 22.79961649089166, |
| "grad_norm": 0.30385440587997437, |
| "learning_rate": 1.981768782199861e-06, |
| "loss": 0.0219, |
| "step": 23780 |
| }, |
| { |
| "epoch": 22.809204218600193, |
| "grad_norm": 0.13380350172519684, |
| "learning_rate": 1.964080725316414e-06, |
| "loss": 0.0223, |
| "step": 23790 |
| }, |
| { |
| "epoch": 22.818791946308725, |
| "grad_norm": 0.2257150113582611, |
| "learning_rate": 1.9464703775343096e-06, |
| "loss": 0.0246, |
| "step": 23800 |
| }, |
| { |
| "epoch": 22.828379674017256, |
| "grad_norm": 0.18876703083515167, |
| "learning_rate": 1.928937767342315e-06, |
| "loss": 0.0216, |
| "step": 23810 |
| }, |
| { |
| "epoch": 22.83796740172579, |
| "grad_norm": 0.21190357208251953, |
| "learning_rate": 1.911482923103447e-06, |
| "loss": 0.0236, |
| "step": 23820 |
| }, |
| { |
| "epoch": 22.847555129434323, |
| "grad_norm": 0.20915569365024567, |
| "learning_rate": 1.8941058730549132e-06, |
| "loss": 0.0202, |
| "step": 23830 |
| }, |
| { |
| "epoch": 22.857142857142858, |
| "grad_norm": 0.11660904437303543, |
| "learning_rate": 1.8768066453080657e-06, |
| "loss": 0.0227, |
| "step": 23840 |
| }, |
| { |
| "epoch": 22.86673058485139, |
| "grad_norm": 0.3356838524341583, |
| "learning_rate": 1.8595852678483738e-06, |
| "loss": 0.0264, |
| "step": 23850 |
| }, |
| { |
| "epoch": 22.876318312559924, |
| "grad_norm": 0.26690155267715454, |
| "learning_rate": 1.8424417685353634e-06, |
| "loss": 0.0249, |
| "step": 23860 |
| }, |
| { |
| "epoch": 22.885906040268456, |
| "grad_norm": 0.23408538103103638, |
| "learning_rate": 1.825376175102561e-06, |
| "loss": 0.0249, |
| "step": 23870 |
| }, |
| { |
| "epoch": 22.89549376797699, |
| "grad_norm": 0.40512949228286743, |
| "learning_rate": 1.8083885151574775e-06, |
| "loss": 0.0259, |
| "step": 23880 |
| }, |
| { |
| "epoch": 22.905081495685522, |
| "grad_norm": 0.20047682523727417, |
| "learning_rate": 1.7914788161815466e-06, |
| "loss": 0.025, |
| "step": 23890 |
| }, |
| { |
| "epoch": 22.914669223394057, |
| "grad_norm": 0.2298455536365509, |
| "learning_rate": 1.7746471055300751e-06, |
| "loss": 0.0208, |
| "step": 23900 |
| }, |
| { |
| "epoch": 22.92425695110259, |
| "grad_norm": 0.13947898149490356, |
| "learning_rate": 1.7578934104322097e-06, |
| "loss": 0.0201, |
| "step": 23910 |
| }, |
| { |
| "epoch": 22.933844678811123, |
| "grad_norm": 0.22104570269584656, |
| "learning_rate": 1.741217757990893e-06, |
| "loss": 0.0233, |
| "step": 23920 |
| }, |
| { |
| "epoch": 22.943432406519655, |
| "grad_norm": 0.2084132432937622, |
| "learning_rate": 1.7246201751828117e-06, |
| "loss": 0.0269, |
| "step": 23930 |
| }, |
| { |
| "epoch": 22.953020134228186, |
| "grad_norm": 0.2208138108253479, |
| "learning_rate": 1.7081006888583495e-06, |
| "loss": 0.023, |
| "step": 23940 |
| }, |
| { |
| "epoch": 22.96260786193672, |
| "grad_norm": 0.20159326493740082, |
| "learning_rate": 1.6916593257415735e-06, |
| "loss": 0.0181, |
| "step": 23950 |
| }, |
| { |
| "epoch": 22.972195589645253, |
| "grad_norm": 0.16700109839439392, |
| "learning_rate": 1.6752961124301415e-06, |
| "loss": 0.0225, |
| "step": 23960 |
| }, |
| { |
| "epoch": 22.981783317353788, |
| "grad_norm": 0.16461221873760223, |
| "learning_rate": 1.6590110753953058e-06, |
| "loss": 0.0267, |
| "step": 23970 |
| }, |
| { |
| "epoch": 22.99137104506232, |
| "grad_norm": 0.30045902729034424, |
| "learning_rate": 1.6428042409818434e-06, |
| "loss": 0.0252, |
| "step": 23980 |
| }, |
| { |
| "epoch": 23.000958772770854, |
| "grad_norm": 0.2971097528934479, |
| "learning_rate": 1.6266756354080148e-06, |
| "loss": 0.021, |
| "step": 23990 |
| }, |
| { |
| "epoch": 23.010546500479386, |
| "grad_norm": 0.18751384317874908, |
| "learning_rate": 1.610625284765538e-06, |
| "loss": 0.0225, |
| "step": 24000 |
| }, |
| { |
| "epoch": 23.02013422818792, |
| "grad_norm": 0.2587001621723175, |
| "learning_rate": 1.5946532150195315e-06, |
| "loss": 0.024, |
| "step": 24010 |
| }, |
| { |
| "epoch": 23.029721955896452, |
| "grad_norm": 0.2185692936182022, |
| "learning_rate": 1.578759452008477e-06, |
| "loss": 0.0269, |
| "step": 24020 |
| }, |
| { |
| "epoch": 23.039309683604987, |
| "grad_norm": 0.17715659737586975, |
| "learning_rate": 1.5629440214441737e-06, |
| "loss": 0.0227, |
| "step": 24030 |
| }, |
| { |
| "epoch": 23.04889741131352, |
| "grad_norm": 0.27682605385780334, |
| "learning_rate": 1.5472069489117058e-06, |
| "loss": 0.0261, |
| "step": 24040 |
| }, |
| { |
| "epoch": 23.058485139022054, |
| "grad_norm": 0.20817138254642487, |
| "learning_rate": 1.531548259869392e-06, |
| "loss": 0.0165, |
| "step": 24050 |
| }, |
| { |
| "epoch": 23.068072866730585, |
| "grad_norm": 0.2723507583141327, |
| "learning_rate": 1.515967979648747e-06, |
| "loss": 0.0228, |
| "step": 24060 |
| }, |
| { |
| "epoch": 23.077660594439116, |
| "grad_norm": 0.1344461739063263, |
| "learning_rate": 1.5004661334544422e-06, |
| "loss": 0.0207, |
| "step": 24070 |
| }, |
| { |
| "epoch": 23.08724832214765, |
| "grad_norm": 0.16644145548343658, |
| "learning_rate": 1.4850427463642568e-06, |
| "loss": 0.0282, |
| "step": 24080 |
| }, |
| { |
| "epoch": 23.096836049856183, |
| "grad_norm": 0.22762452065944672, |
| "learning_rate": 1.4696978433290653e-06, |
| "loss": 0.0273, |
| "step": 24090 |
| }, |
| { |
| "epoch": 23.106423777564718, |
| "grad_norm": 0.1904400885105133, |
| "learning_rate": 1.4544314491727607e-06, |
| "loss": 0.0216, |
| "step": 24100 |
| }, |
| { |
| "epoch": 23.11601150527325, |
| "grad_norm": 0.20357421040534973, |
| "learning_rate": 1.4392435885922262e-06, |
| "loss": 0.0176, |
| "step": 24110 |
| }, |
| { |
| "epoch": 23.125599232981784, |
| "grad_norm": 0.14489389955997467, |
| "learning_rate": 1.4241342861573081e-06, |
| "loss": 0.0241, |
| "step": 24120 |
| }, |
| { |
| "epoch": 23.135186960690316, |
| "grad_norm": 0.42258408665657043, |
| "learning_rate": 1.4091035663107599e-06, |
| "loss": 0.0261, |
| "step": 24130 |
| }, |
| { |
| "epoch": 23.14477468839885, |
| "grad_norm": 0.33509764075279236, |
| "learning_rate": 1.39415145336822e-06, |
| "loss": 0.0225, |
| "step": 24140 |
| }, |
| { |
| "epoch": 23.154362416107382, |
| "grad_norm": 0.1689392775297165, |
| "learning_rate": 1.3792779715181503e-06, |
| "loss": 0.0239, |
| "step": 24150 |
| }, |
| { |
| "epoch": 23.163950143815917, |
| "grad_norm": 0.16699060797691345, |
| "learning_rate": 1.3644831448218154e-06, |
| "loss": 0.018, |
| "step": 24160 |
| }, |
| { |
| "epoch": 23.17353787152445, |
| "grad_norm": 0.18517597019672394, |
| "learning_rate": 1.349766997213242e-06, |
| "loss": 0.0241, |
| "step": 24170 |
| }, |
| { |
| "epoch": 23.18312559923298, |
| "grad_norm": 0.25795888900756836, |
| "learning_rate": 1.3351295524991592e-06, |
| "loss": 0.019, |
| "step": 24180 |
| }, |
| { |
| "epoch": 23.192713326941515, |
| "grad_norm": 0.14099453389644623, |
| "learning_rate": 1.3205708343589973e-06, |
| "loss": 0.0202, |
| "step": 24190 |
| }, |
| { |
| "epoch": 23.202301054650047, |
| "grad_norm": 0.1665448546409607, |
| "learning_rate": 1.3060908663448057e-06, |
| "loss": 0.0227, |
| "step": 24200 |
| }, |
| { |
| "epoch": 23.21188878235858, |
| "grad_norm": 0.23710502684116364, |
| "learning_rate": 1.2916896718812577e-06, |
| "loss": 0.0206, |
| "step": 24210 |
| }, |
| { |
| "epoch": 23.221476510067113, |
| "grad_norm": 0.20079617202281952, |
| "learning_rate": 1.2773672742655784e-06, |
| "loss": 0.0254, |
| "step": 24220 |
| }, |
| { |
| "epoch": 23.231064237775648, |
| "grad_norm": 0.19830353558063507, |
| "learning_rate": 1.2631236966675287e-06, |
| "loss": 0.0192, |
| "step": 24230 |
| }, |
| { |
| "epoch": 23.24065196548418, |
| "grad_norm": 0.14918625354766846, |
| "learning_rate": 1.2489589621293485e-06, |
| "loss": 0.019, |
| "step": 24240 |
| }, |
| { |
| "epoch": 23.250239693192714, |
| "grad_norm": 0.2350005954504013, |
| "learning_rate": 1.2348730935657582e-06, |
| "loss": 0.0234, |
| "step": 24250 |
| }, |
| { |
| "epoch": 23.259827420901246, |
| "grad_norm": 0.25462934374809265, |
| "learning_rate": 1.2208661137638687e-06, |
| "loss": 0.0205, |
| "step": 24260 |
| }, |
| { |
| "epoch": 23.26941514860978, |
| "grad_norm": 0.18844899535179138, |
| "learning_rate": 1.2069380453831768e-06, |
| "loss": 0.0264, |
| "step": 24270 |
| }, |
| { |
| "epoch": 23.279002876318312, |
| "grad_norm": 0.247798353433609, |
| "learning_rate": 1.19308891095552e-06, |
| "loss": 0.0228, |
| "step": 24280 |
| }, |
| { |
| "epoch": 23.288590604026847, |
| "grad_norm": 0.19767391681671143, |
| "learning_rate": 1.1793187328850485e-06, |
| "loss": 0.0214, |
| "step": 24290 |
| }, |
| { |
| "epoch": 23.29817833173538, |
| "grad_norm": 0.30730581283569336, |
| "learning_rate": 1.165627533448177e-06, |
| "loss": 0.0214, |
| "step": 24300 |
| }, |
| { |
| "epoch": 23.30776605944391, |
| "grad_norm": 0.3338695466518402, |
| "learning_rate": 1.1520153347935658e-06, |
| "loss": 0.0286, |
| "step": 24310 |
| }, |
| { |
| "epoch": 23.317353787152445, |
| "grad_norm": 0.17891177535057068, |
| "learning_rate": 1.1384821589420502e-06, |
| "loss": 0.0213, |
| "step": 24320 |
| }, |
| { |
| "epoch": 23.326941514860977, |
| "grad_norm": 0.2726079225540161, |
| "learning_rate": 1.1250280277866509e-06, |
| "loss": 0.0247, |
| "step": 24330 |
| }, |
| { |
| "epoch": 23.33652924256951, |
| "grad_norm": 0.15224401652812958, |
| "learning_rate": 1.1116529630925022e-06, |
| "loss": 0.0221, |
| "step": 24340 |
| }, |
| { |
| "epoch": 23.346116970278043, |
| "grad_norm": 0.3975865840911865, |
| "learning_rate": 1.0983569864968346e-06, |
| "loss": 0.024, |
| "step": 24350 |
| }, |
| { |
| "epoch": 23.355704697986578, |
| "grad_norm": 0.1999419927597046, |
| "learning_rate": 1.0851401195089316e-06, |
| "loss": 0.0247, |
| "step": 24360 |
| }, |
| { |
| "epoch": 23.36529242569511, |
| "grad_norm": 0.22919629514217377, |
| "learning_rate": 1.072002383510118e-06, |
| "loss": 0.0218, |
| "step": 24370 |
| }, |
| { |
| "epoch": 23.374880153403645, |
| "grad_norm": 0.11979561299085617, |
| "learning_rate": 1.05894379975367e-06, |
| "loss": 0.0154, |
| "step": 24380 |
| }, |
| { |
| "epoch": 23.384467881112176, |
| "grad_norm": 0.16747049987316132, |
| "learning_rate": 1.0459643893648507e-06, |
| "loss": 0.0228, |
| "step": 24390 |
| }, |
| { |
| "epoch": 23.39405560882071, |
| "grad_norm": 0.2855249345302582, |
| "learning_rate": 1.0330641733408309e-06, |
| "loss": 0.0268, |
| "step": 24400 |
| }, |
| { |
| "epoch": 23.403643336529242, |
| "grad_norm": 0.39327678084373474, |
| "learning_rate": 1.0202431725506556e-06, |
| "loss": 0.0241, |
| "step": 24410 |
| }, |
| { |
| "epoch": 23.413231064237777, |
| "grad_norm": 0.21982307732105255, |
| "learning_rate": 1.0075014077352396e-06, |
| "loss": 0.0157, |
| "step": 24420 |
| }, |
| { |
| "epoch": 23.42281879194631, |
| "grad_norm": 0.18971124291419983, |
| "learning_rate": 9.948388995072943e-07, |
| "loss": 0.0256, |
| "step": 24430 |
| }, |
| { |
| "epoch": 23.43240651965484, |
| "grad_norm": 0.2546897530555725, |
| "learning_rate": 9.822556683513395e-07, |
| "loss": 0.02, |
| "step": 24440 |
| }, |
| { |
| "epoch": 23.441994247363375, |
| "grad_norm": 0.23896943032741547, |
| "learning_rate": 9.69751734623625e-07, |
| "loss": 0.02, |
| "step": 24450 |
| }, |
| { |
| "epoch": 23.451581975071907, |
| "grad_norm": 0.2220362275838852, |
| "learning_rate": 9.57327118552137e-07, |
| "loss": 0.0228, |
| "step": 24460 |
| }, |
| { |
| "epoch": 23.461169702780442, |
| "grad_norm": 0.20060044527053833, |
| "learning_rate": 9.449818402365251e-07, |
| "loss": 0.0198, |
| "step": 24470 |
| }, |
| { |
| "epoch": 23.470757430488973, |
| "grad_norm": 0.22660043835639954, |
| "learning_rate": 9.327159196481138e-07, |
| "loss": 0.0225, |
| "step": 24480 |
| }, |
| { |
| "epoch": 23.48034515819751, |
| "grad_norm": 0.20978592336177826, |
| "learning_rate": 9.205293766298307e-07, |
| "loss": 0.0201, |
| "step": 24490 |
| }, |
| { |
| "epoch": 23.48993288590604, |
| "grad_norm": 0.205510213971138, |
| "learning_rate": 9.084222308962053e-07, |
| "loss": 0.0257, |
| "step": 24500 |
| }, |
| { |
| "epoch": 23.499520613614575, |
| "grad_norm": 0.15999889373779297, |
| "learning_rate": 8.963945020333209e-07, |
| "loss": 0.0242, |
| "step": 24510 |
| }, |
| { |
| "epoch": 23.509108341323106, |
| "grad_norm": 0.16011640429496765, |
| "learning_rate": 8.844462094987793e-07, |
| "loss": 0.0243, |
| "step": 24520 |
| }, |
| { |
| "epoch": 23.51869606903164, |
| "grad_norm": 0.1832507699728012, |
| "learning_rate": 8.725773726216801e-07, |
| "loss": 0.0199, |
| "step": 24530 |
| }, |
| { |
| "epoch": 23.528283796740173, |
| "grad_norm": 0.1802021861076355, |
| "learning_rate": 8.607880106025868e-07, |
| "loss": 0.0228, |
| "step": 24540 |
| }, |
| { |
| "epoch": 23.537871524448704, |
| "grad_norm": 0.3221112787723541, |
| "learning_rate": 8.49078142513493e-07, |
| "loss": 0.0253, |
| "step": 24550 |
| }, |
| { |
| "epoch": 23.54745925215724, |
| "grad_norm": 0.16884173452854156, |
| "learning_rate": 8.37447787297796e-07, |
| "loss": 0.0232, |
| "step": 24560 |
| }, |
| { |
| "epoch": 23.55704697986577, |
| "grad_norm": 0.26041117310523987, |
| "learning_rate": 8.258969637702563e-07, |
| "loss": 0.023, |
| "step": 24570 |
| }, |
| { |
| "epoch": 23.566634707574305, |
| "grad_norm": 0.3258151710033417, |
| "learning_rate": 8.144256906169767e-07, |
| "loss": 0.0211, |
| "step": 24580 |
| }, |
| { |
| "epoch": 23.576222435282837, |
| "grad_norm": 0.2306542694568634, |
| "learning_rate": 8.030339863953684e-07, |
| "loss": 0.0213, |
| "step": 24590 |
| }, |
| { |
| "epoch": 23.585810162991372, |
| "grad_norm": 0.2112276405096054, |
| "learning_rate": 7.917218695341178e-07, |
| "loss": 0.0251, |
| "step": 24600 |
| }, |
| { |
| "epoch": 23.595397890699903, |
| "grad_norm": 0.20719598233699799, |
| "learning_rate": 7.804893583331696e-07, |
| "loss": 0.0226, |
| "step": 24610 |
| }, |
| { |
| "epoch": 23.60498561840844, |
| "grad_norm": 0.3165718913078308, |
| "learning_rate": 7.693364709636886e-07, |
| "loss": 0.0287, |
| "step": 24620 |
| }, |
| { |
| "epoch": 23.61457334611697, |
| "grad_norm": 0.24788926541805267, |
| "learning_rate": 7.582632254680089e-07, |
| "loss": 0.0228, |
| "step": 24630 |
| }, |
| { |
| "epoch": 23.624161073825505, |
| "grad_norm": 0.2913201153278351, |
| "learning_rate": 7.472696397596568e-07, |
| "loss": 0.0216, |
| "step": 24640 |
| }, |
| { |
| "epoch": 23.633748801534036, |
| "grad_norm": 0.16556118428707123, |
| "learning_rate": 7.363557316232673e-07, |
| "loss": 0.0192, |
| "step": 24650 |
| }, |
| { |
| "epoch": 23.64333652924257, |
| "grad_norm": 0.1945585161447525, |
| "learning_rate": 7.255215187145892e-07, |
| "loss": 0.023, |
| "step": 24660 |
| }, |
| { |
| "epoch": 23.652924256951103, |
| "grad_norm": 0.26050955057144165, |
| "learning_rate": 7.147670185604361e-07, |
| "loss": 0.019, |
| "step": 24670 |
| }, |
| { |
| "epoch": 23.662511984659634, |
| "grad_norm": 0.20233625173568726, |
| "learning_rate": 7.04092248558691e-07, |
| "loss": 0.0239, |
| "step": 24680 |
| }, |
| { |
| "epoch": 23.67209971236817, |
| "grad_norm": 0.19280561804771423, |
| "learning_rate": 6.93497225978218e-07, |
| "loss": 0.0211, |
| "step": 24690 |
| }, |
| { |
| "epoch": 23.6816874400767, |
| "grad_norm": 0.15425735712051392, |
| "learning_rate": 6.829819679589122e-07, |
| "loss": 0.0265, |
| "step": 24700 |
| }, |
| { |
| "epoch": 23.691275167785236, |
| "grad_norm": 0.20832641422748566, |
| "learning_rate": 6.725464915115997e-07, |
| "loss": 0.0204, |
| "step": 24710 |
| }, |
| { |
| "epoch": 23.700862895493767, |
| "grad_norm": 0.1475028246641159, |
| "learning_rate": 6.621908135180655e-07, |
| "loss": 0.022, |
| "step": 24720 |
| }, |
| { |
| "epoch": 23.710450623202302, |
| "grad_norm": 0.18368731439113617, |
| "learning_rate": 6.519149507309807e-07, |
| "loss": 0.0222, |
| "step": 24730 |
| }, |
| { |
| "epoch": 23.720038350910833, |
| "grad_norm": 0.2757015824317932, |
| "learning_rate": 6.417189197739093e-07, |
| "loss": 0.0198, |
| "step": 24740 |
| }, |
| { |
| "epoch": 23.72962607861937, |
| "grad_norm": 0.25313273072242737, |
| "learning_rate": 6.316027371412625e-07, |
| "loss": 0.0287, |
| "step": 24750 |
| }, |
| { |
| "epoch": 23.7392138063279, |
| "grad_norm": 0.30066144466400146, |
| "learning_rate": 6.215664191982884e-07, |
| "loss": 0.0214, |
| "step": 24760 |
| }, |
| { |
| "epoch": 23.748801534036435, |
| "grad_norm": 0.4100160002708435, |
| "learning_rate": 6.116099821810272e-07, |
| "loss": 0.0223, |
| "step": 24770 |
| }, |
| { |
| "epoch": 23.758389261744966, |
| "grad_norm": 0.231341153383255, |
| "learning_rate": 6.017334421963006e-07, |
| "loss": 0.0241, |
| "step": 24780 |
| }, |
| { |
| "epoch": 23.7679769894535, |
| "grad_norm": 0.19291090965270996, |
| "learning_rate": 5.919368152216664e-07, |
| "loss": 0.0233, |
| "step": 24790 |
| }, |
| { |
| "epoch": 23.777564717162033, |
| "grad_norm": 0.20064283907413483, |
| "learning_rate": 5.822201171054197e-07, |
| "loss": 0.0186, |
| "step": 24800 |
| }, |
| { |
| "epoch": 23.787152444870564, |
| "grad_norm": 0.22854574024677277, |
| "learning_rate": 5.725833635665423e-07, |
| "loss": 0.0148, |
| "step": 24810 |
| }, |
| { |
| "epoch": 23.7967401725791, |
| "grad_norm": 0.2627497613430023, |
| "learning_rate": 5.630265701946912e-07, |
| "loss": 0.0229, |
| "step": 24820 |
| }, |
| { |
| "epoch": 23.80632790028763, |
| "grad_norm": 0.19262003898620605, |
| "learning_rate": 5.535497524501665e-07, |
| "loss": 0.0195, |
| "step": 24830 |
| }, |
| { |
| "epoch": 23.815915627996166, |
| "grad_norm": 0.2796723246574402, |
| "learning_rate": 5.441529256638933e-07, |
| "loss": 0.0249, |
| "step": 24840 |
| }, |
| { |
| "epoch": 23.825503355704697, |
| "grad_norm": 0.2186604142189026, |
| "learning_rate": 5.348361050373896e-07, |
| "loss": 0.0213, |
| "step": 24850 |
| }, |
| { |
| "epoch": 23.835091083413232, |
| "grad_norm": 0.14878273010253906, |
| "learning_rate": 5.255993056427433e-07, |
| "loss": 0.0204, |
| "step": 24860 |
| }, |
| { |
| "epoch": 23.844678811121764, |
| "grad_norm": 0.2015840858221054, |
| "learning_rate": 5.164425424226016e-07, |
| "loss": 0.0183, |
| "step": 24870 |
| }, |
| { |
| "epoch": 23.8542665388303, |
| "grad_norm": 0.3008297383785248, |
| "learning_rate": 5.073658301901207e-07, |
| "loss": 0.0228, |
| "step": 24880 |
| }, |
| { |
| "epoch": 23.86385426653883, |
| "grad_norm": 0.1776721477508545, |
| "learning_rate": 4.983691836289606e-07, |
| "loss": 0.025, |
| "step": 24890 |
| }, |
| { |
| "epoch": 23.873441994247365, |
| "grad_norm": 0.27587175369262695, |
| "learning_rate": 4.894526172932623e-07, |
| "loss": 0.021, |
| "step": 24900 |
| }, |
| { |
| "epoch": 23.883029721955896, |
| "grad_norm": 0.13460497558116913, |
| "learning_rate": 4.806161456076097e-07, |
| "loss": 0.0215, |
| "step": 24910 |
| }, |
| { |
| "epoch": 23.892617449664428, |
| "grad_norm": 0.23786711692810059, |
| "learning_rate": 4.718597828670235e-07, |
| "loss": 0.0228, |
| "step": 24920 |
| }, |
| { |
| "epoch": 23.902205177372963, |
| "grad_norm": 0.18216513097286224, |
| "learning_rate": 4.6318354323692246e-07, |
| "loss": 0.0194, |
| "step": 24930 |
| }, |
| { |
| "epoch": 23.911792905081494, |
| "grad_norm": 0.10642199218273163, |
| "learning_rate": 4.5458744075311253e-07, |
| "loss": 0.0225, |
| "step": 24940 |
| }, |
| { |
| "epoch": 23.92138063279003, |
| "grad_norm": 0.2601510286331177, |
| "learning_rate": 4.460714893217588e-07, |
| "loss": 0.0237, |
| "step": 24950 |
| }, |
| { |
| "epoch": 23.93096836049856, |
| "grad_norm": 0.2652058005332947, |
| "learning_rate": 4.376357027193634e-07, |
| "loss": 0.0209, |
| "step": 24960 |
| }, |
| { |
| "epoch": 23.940556088207096, |
| "grad_norm": 0.1884078085422516, |
| "learning_rate": 4.292800945927378e-07, |
| "loss": 0.0241, |
| "step": 24970 |
| }, |
| { |
| "epoch": 23.950143815915627, |
| "grad_norm": 0.3231159448623657, |
| "learning_rate": 4.210046784590027e-07, |
| "loss": 0.0274, |
| "step": 24980 |
| }, |
| { |
| "epoch": 23.959731543624162, |
| "grad_norm": 0.2526688575744629, |
| "learning_rate": 4.128094677055272e-07, |
| "loss": 0.0229, |
| "step": 24990 |
| }, |
| { |
| "epoch": 23.969319271332694, |
| "grad_norm": 0.13271057605743408, |
| "learning_rate": 4.0469447558995065e-07, |
| "loss": 0.021, |
| "step": 25000 |
| }, |
| { |
| "epoch": 23.97890699904123, |
| "grad_norm": 0.24924740195274353, |
| "learning_rate": 3.9665971524012747e-07, |
| "loss": 0.0283, |
| "step": 25010 |
| }, |
| { |
| "epoch": 23.98849472674976, |
| "grad_norm": 0.29960912466049194, |
| "learning_rate": 3.8870519965412135e-07, |
| "loss": 0.0239, |
| "step": 25020 |
| }, |
| { |
| "epoch": 23.99808245445829, |
| "grad_norm": 0.17576931416988373, |
| "learning_rate": 3.8083094170018875e-07, |
| "loss": 0.0207, |
| "step": 25030 |
| }, |
| { |
| "epoch": 24.007670182166827, |
| "grad_norm": 0.21935302019119263, |
| "learning_rate": 3.7303695411674e-07, |
| "loss": 0.0222, |
| "step": 25040 |
| }, |
| { |
| "epoch": 24.017257909875358, |
| "grad_norm": 0.2815200686454773, |
| "learning_rate": 3.6532324951233934e-07, |
| "loss": 0.0238, |
| "step": 25050 |
| }, |
| { |
| "epoch": 24.026845637583893, |
| "grad_norm": 0.245747908949852, |
| "learning_rate": 3.576898403656659e-07, |
| "loss": 0.018, |
| "step": 25060 |
| }, |
| { |
| "epoch": 24.036433365292424, |
| "grad_norm": 0.20894894003868103, |
| "learning_rate": 3.501367390255139e-07, |
| "loss": 0.0214, |
| "step": 25070 |
| }, |
| { |
| "epoch": 24.04602109300096, |
| "grad_norm": 0.16371525824069977, |
| "learning_rate": 3.426639577107427e-07, |
| "loss": 0.019, |
| "step": 25080 |
| }, |
| { |
| "epoch": 24.05560882070949, |
| "grad_norm": 0.15699537098407745, |
| "learning_rate": 3.352715085103042e-07, |
| "loss": 0.0221, |
| "step": 25090 |
| }, |
| { |
| "epoch": 24.065196548418026, |
| "grad_norm": 0.29383793473243713, |
| "learning_rate": 3.279594033831601e-07, |
| "loss": 0.0197, |
| "step": 25100 |
| }, |
| { |
| "epoch": 24.074784276126557, |
| "grad_norm": 0.25920745730400085, |
| "learning_rate": 3.2072765415833153e-07, |
| "loss": 0.0217, |
| "step": 25110 |
| }, |
| { |
| "epoch": 24.084372003835092, |
| "grad_norm": 0.1622145175933838, |
| "learning_rate": 3.1357627253482127e-07, |
| "loss": 0.0242, |
| "step": 25120 |
| }, |
| { |
| "epoch": 24.093959731543624, |
| "grad_norm": 0.26235565543174744, |
| "learning_rate": 3.0650527008162513e-07, |
| "loss": 0.0224, |
| "step": 25130 |
| }, |
| { |
| "epoch": 24.10354745925216, |
| "grad_norm": 0.24994677305221558, |
| "learning_rate": 2.9951465823771505e-07, |
| "loss": 0.0243, |
| "step": 25140 |
| }, |
| { |
| "epoch": 24.11313518696069, |
| "grad_norm": 0.35253608226776123, |
| "learning_rate": 2.926044483120005e-07, |
| "loss": 0.0271, |
| "step": 25150 |
| }, |
| { |
| "epoch": 24.12272291466922, |
| "grad_norm": 0.12745951116085052, |
| "learning_rate": 2.857746514833337e-07, |
| "loss": 0.0182, |
| "step": 25160 |
| }, |
| { |
| "epoch": 24.132310642377757, |
| "grad_norm": 0.16648143529891968, |
| "learning_rate": 2.79025278800471e-07, |
| "loss": 0.0211, |
| "step": 25170 |
| }, |
| { |
| "epoch": 24.141898370086288, |
| "grad_norm": 0.15013748407363892, |
| "learning_rate": 2.7235634118207286e-07, |
| "loss": 0.0203, |
| "step": 25180 |
| }, |
| { |
| "epoch": 24.151486097794823, |
| "grad_norm": 0.2093784213066101, |
| "learning_rate": 2.6576784941667045e-07, |
| "loss": 0.0232, |
| "step": 25190 |
| }, |
| { |
| "epoch": 24.161073825503355, |
| "grad_norm": 0.23546428978443146, |
| "learning_rate": 2.592598141626601e-07, |
| "loss": 0.0243, |
| "step": 25200 |
| }, |
| { |
| "epoch": 24.17066155321189, |
| "grad_norm": 0.20667380094528198, |
| "learning_rate": 2.528322459482757e-07, |
| "loss": 0.0225, |
| "step": 25210 |
| }, |
| { |
| "epoch": 24.18024928092042, |
| "grad_norm": 0.26651787757873535, |
| "learning_rate": 2.4648515517158297e-07, |
| "loss": 0.0196, |
| "step": 25220 |
| }, |
| { |
| "epoch": 24.189837008628956, |
| "grad_norm": 0.2723236083984375, |
| "learning_rate": 2.402185521004574e-07, |
| "loss": 0.0199, |
| "step": 25230 |
| }, |
| { |
| "epoch": 24.199424736337487, |
| "grad_norm": 0.2087775468826294, |
| "learning_rate": 2.3403244687256743e-07, |
| "loss": 0.0251, |
| "step": 25240 |
| }, |
| { |
| "epoch": 24.209012464046022, |
| "grad_norm": 0.1503581702709198, |
| "learning_rate": 2.279268494953468e-07, |
| "loss": 0.0172, |
| "step": 25250 |
| }, |
| { |
| "epoch": 24.218600191754554, |
| "grad_norm": 0.1757836937904358, |
| "learning_rate": 2.219017698460002e-07, |
| "loss": 0.0227, |
| "step": 25260 |
| }, |
| { |
| "epoch": 24.22818791946309, |
| "grad_norm": 0.22135482728481293, |
| "learning_rate": 2.1595721767147526e-07, |
| "loss": 0.0182, |
| "step": 25270 |
| }, |
| { |
| "epoch": 24.23777564717162, |
| "grad_norm": 0.19286498427391052, |
| "learning_rate": 2.1009320258845167e-07, |
| "loss": 0.0265, |
| "step": 25280 |
| }, |
| { |
| "epoch": 24.247363374880152, |
| "grad_norm": 0.12808747589588165, |
| "learning_rate": 2.0430973408330778e-07, |
| "loss": 0.0201, |
| "step": 25290 |
| }, |
| { |
| "epoch": 24.256951102588687, |
| "grad_norm": 0.15946893393993378, |
| "learning_rate": 1.9860682151212616e-07, |
| "loss": 0.026, |
| "step": 25300 |
| }, |
| { |
| "epoch": 24.26653883029722, |
| "grad_norm": 0.2374187558889389, |
| "learning_rate": 1.929844741006881e-07, |
| "loss": 0.0193, |
| "step": 25310 |
| }, |
| { |
| "epoch": 24.276126558005753, |
| "grad_norm": 0.2157372087240219, |
| "learning_rate": 1.8744270094441796e-07, |
| "loss": 0.0266, |
| "step": 25320 |
| }, |
| { |
| "epoch": 24.285714285714285, |
| "grad_norm": 0.27296164631843567, |
| "learning_rate": 1.819815110084111e-07, |
| "loss": 0.0211, |
| "step": 25330 |
| }, |
| { |
| "epoch": 24.29530201342282, |
| "grad_norm": 0.16994787752628326, |
| "learning_rate": 1.766009131273838e-07, |
| "loss": 0.0188, |
| "step": 25340 |
| }, |
| { |
| "epoch": 24.30488974113135, |
| "grad_norm": 0.2888137102127075, |
| "learning_rate": 1.7130091600568443e-07, |
| "loss": 0.0247, |
| "step": 25350 |
| }, |
| { |
| "epoch": 24.314477468839886, |
| "grad_norm": 0.26905524730682373, |
| "learning_rate": 1.660815282172823e-07, |
| "loss": 0.026, |
| "step": 25360 |
| }, |
| { |
| "epoch": 24.324065196548418, |
| "grad_norm": 0.28536051511764526, |
| "learning_rate": 1.609427582057288e-07, |
| "loss": 0.0221, |
| "step": 25370 |
| }, |
| { |
| "epoch": 24.333652924256953, |
| "grad_norm": 0.26181870698928833, |
| "learning_rate": 1.5588461428415745e-07, |
| "loss": 0.0248, |
| "step": 25380 |
| }, |
| { |
| "epoch": 24.343240651965484, |
| "grad_norm": 0.20964038372039795, |
| "learning_rate": 1.5090710463527836e-07, |
| "loss": 0.0222, |
| "step": 25390 |
| }, |
| { |
| "epoch": 24.352828379674015, |
| "grad_norm": 0.22509586811065674, |
| "learning_rate": 1.4601023731135034e-07, |
| "loss": 0.0196, |
| "step": 25400 |
| }, |
| { |
| "epoch": 24.36241610738255, |
| "grad_norm": 0.13734106719493866, |
| "learning_rate": 1.4119402023418106e-07, |
| "loss": 0.0249, |
| "step": 25410 |
| }, |
| { |
| "epoch": 24.372003835091082, |
| "grad_norm": 0.2952769100666046, |
| "learning_rate": 1.3645846119510474e-07, |
| "loss": 0.0204, |
| "step": 25420 |
| }, |
| { |
| "epoch": 24.381591562799617, |
| "grad_norm": 0.33259129524230957, |
| "learning_rate": 1.3180356785496562e-07, |
| "loss": 0.0267, |
| "step": 25430 |
| }, |
| { |
| "epoch": 24.39117929050815, |
| "grad_norm": 0.1688985675573349, |
| "learning_rate": 1.2722934774412887e-07, |
| "loss": 0.0208, |
| "step": 25440 |
| }, |
| { |
| "epoch": 24.400767018216683, |
| "grad_norm": 0.13669002056121826, |
| "learning_rate": 1.2273580826244192e-07, |
| "loss": 0.0238, |
| "step": 25450 |
| }, |
| { |
| "epoch": 24.410354745925215, |
| "grad_norm": 0.14696350693702698, |
| "learning_rate": 1.1832295667922876e-07, |
| "loss": 0.0219, |
| "step": 25460 |
| }, |
| { |
| "epoch": 24.41994247363375, |
| "grad_norm": 0.20755600929260254, |
| "learning_rate": 1.139908001332901e-07, |
| "loss": 0.0186, |
| "step": 25470 |
| }, |
| { |
| "epoch": 24.42953020134228, |
| "grad_norm": 0.19683778285980225, |
| "learning_rate": 1.0973934563288658e-07, |
| "loss": 0.0211, |
| "step": 25480 |
| }, |
| { |
| "epoch": 24.439117929050816, |
| "grad_norm": 0.2026386559009552, |
| "learning_rate": 1.0556860005571101e-07, |
| "loss": 0.0238, |
| "step": 25490 |
| }, |
| { |
| "epoch": 24.448705656759348, |
| "grad_norm": 0.4480651617050171, |
| "learning_rate": 1.0147857014890516e-07, |
| "loss": 0.021, |
| "step": 25500 |
| }, |
| { |
| "epoch": 24.458293384467883, |
| "grad_norm": 0.31666049361228943, |
| "learning_rate": 9.746926252902633e-08, |
| "loss": 0.032, |
| "step": 25510 |
| }, |
| { |
| "epoch": 24.467881112176414, |
| "grad_norm": 0.5467284321784973, |
| "learning_rate": 9.354068368204739e-08, |
| "loss": 0.0209, |
| "step": 25520 |
| }, |
| { |
| "epoch": 24.477468839884946, |
| "grad_norm": 0.15496346354484558, |
| "learning_rate": 8.969283996335121e-08, |
| "loss": 0.0224, |
| "step": 25530 |
| }, |
| { |
| "epoch": 24.48705656759348, |
| "grad_norm": 0.210786372423172, |
| "learning_rate": 8.59257375976974e-08, |
| "loss": 0.025, |
| "step": 25540 |
| }, |
| { |
| "epoch": 24.496644295302012, |
| "grad_norm": 0.12938974797725677, |
| "learning_rate": 8.223938267924446e-08, |
| "loss": 0.0176, |
| "step": 25550 |
| }, |
| { |
| "epoch": 24.506232023010547, |
| "grad_norm": 0.22987248003482819, |
| "learning_rate": 7.863378117151099e-08, |
| "loss": 0.0231, |
| "step": 25560 |
| }, |
| { |
| "epoch": 24.51581975071908, |
| "grad_norm": 0.3242381811141968, |
| "learning_rate": 7.510893890738113e-08, |
| "loss": 0.023, |
| "step": 25570 |
| }, |
| { |
| "epoch": 24.525407478427613, |
| "grad_norm": 0.2817991375923157, |
| "learning_rate": 7.166486158909913e-08, |
| "loss": 0.0231, |
| "step": 25580 |
| }, |
| { |
| "epoch": 24.534995206136145, |
| "grad_norm": 0.20501790940761566, |
| "learning_rate": 6.830155478824707e-08, |
| "loss": 0.0191, |
| "step": 25590 |
| }, |
| { |
| "epoch": 24.54458293384468, |
| "grad_norm": 0.1096939668059349, |
| "learning_rate": 6.501902394574488e-08, |
| "loss": 0.0273, |
| "step": 25600 |
| }, |
| { |
| "epoch": 24.55417066155321, |
| "grad_norm": 0.1630508154630661, |
| "learning_rate": 6.181727437183372e-08, |
| "loss": 0.0209, |
| "step": 25610 |
| }, |
| { |
| "epoch": 24.563758389261746, |
| "grad_norm": 0.28238698840141296, |
| "learning_rate": 5.8696311246081436e-08, |
| "loss": 0.0251, |
| "step": 25620 |
| }, |
| { |
| "epoch": 24.573346116970278, |
| "grad_norm": 0.11937420070171356, |
| "learning_rate": 5.5656139617366045e-08, |
| "loss": 0.0185, |
| "step": 25630 |
| }, |
| { |
| "epoch": 24.582933844678813, |
| "grad_norm": 0.17204758524894714, |
| "learning_rate": 5.2696764403847855e-08, |
| "loss": 0.0229, |
| "step": 25640 |
| }, |
| { |
| "epoch": 24.592521572387344, |
| "grad_norm": 0.17664316296577454, |
| "learning_rate": 4.981819039300284e-08, |
| "loss": 0.019, |
| "step": 25650 |
| }, |
| { |
| "epoch": 24.602109300095876, |
| "grad_norm": 0.14691434800624847, |
| "learning_rate": 4.702042224158931e-08, |
| "loss": 0.0272, |
| "step": 25660 |
| }, |
| { |
| "epoch": 24.61169702780441, |
| "grad_norm": 0.21293459832668304, |
| "learning_rate": 4.430346447562572e-08, |
| "loss": 0.0174, |
| "step": 25670 |
| }, |
| { |
| "epoch": 24.621284755512942, |
| "grad_norm": 0.17576336860656738, |
| "learning_rate": 4.166732149041841e-08, |
| "loss": 0.0257, |
| "step": 25680 |
| }, |
| { |
| "epoch": 24.630872483221477, |
| "grad_norm": 0.19463558495044708, |
| "learning_rate": 3.911199755053385e-08, |
| "loss": 0.0212, |
| "step": 25690 |
| }, |
| { |
| "epoch": 24.64046021093001, |
| "grad_norm": 0.17403477430343628, |
| "learning_rate": 3.663749678979311e-08, |
| "loss": 0.0202, |
| "step": 25700 |
| }, |
| { |
| "epoch": 24.650047938638544, |
| "grad_norm": 0.3777727782726288, |
| "learning_rate": 3.424382321126629e-08, |
| "loss": 0.024, |
| "step": 25710 |
| }, |
| { |
| "epoch": 24.659635666347075, |
| "grad_norm": 0.14289294183254242, |
| "learning_rate": 3.193098068727252e-08, |
| "loss": 0.0244, |
| "step": 25720 |
| }, |
| { |
| "epoch": 24.66922339405561, |
| "grad_norm": 0.17767243087291718, |
| "learning_rate": 2.9698972959357753e-08, |
| "loss": 0.0241, |
| "step": 25730 |
| }, |
| { |
| "epoch": 24.67881112176414, |
| "grad_norm": 0.2469603717327118, |
| "learning_rate": 2.7547803638311442e-08, |
| "loss": 0.0244, |
| "step": 25740 |
| }, |
| { |
| "epoch": 24.688398849472676, |
| "grad_norm": 0.1393066793680191, |
| "learning_rate": 2.5477476204144314e-08, |
| "loss": 0.0237, |
| "step": 25750 |
| }, |
| { |
| "epoch": 24.697986577181208, |
| "grad_norm": 0.2745441794395447, |
| "learning_rate": 2.3487994006077263e-08, |
| "loss": 0.0192, |
| "step": 25760 |
| }, |
| { |
| "epoch": 24.70757430488974, |
| "grad_norm": 0.19631850719451904, |
| "learning_rate": 2.1579360262558025e-08, |
| "loss": 0.0228, |
| "step": 25770 |
| }, |
| { |
| "epoch": 24.717162032598274, |
| "grad_norm": 0.4640311300754547, |
| "learning_rate": 1.9751578061244504e-08, |
| "loss": 0.0216, |
| "step": 25780 |
| }, |
| { |
| "epoch": 24.726749760306806, |
| "grad_norm": 0.262236088514328, |
| "learning_rate": 1.8004650358982578e-08, |
| "loss": 0.0243, |
| "step": 25790 |
| }, |
| { |
| "epoch": 24.73633748801534, |
| "grad_norm": 0.1786222904920578, |
| "learning_rate": 1.6338579981833856e-08, |
| "loss": 0.0165, |
| "step": 25800 |
| }, |
| { |
| "epoch": 24.745925215723872, |
| "grad_norm": 0.2555926442146301, |
| "learning_rate": 1.475336962504792e-08, |
| "loss": 0.0201, |
| "step": 25810 |
| }, |
| { |
| "epoch": 24.755512943432407, |
| "grad_norm": 0.16927938163280487, |
| "learning_rate": 1.3249021853062315e-08, |
| "loss": 0.0225, |
| "step": 25820 |
| }, |
| { |
| "epoch": 24.76510067114094, |
| "grad_norm": 0.17081952095031738, |
| "learning_rate": 1.182553909950812e-08, |
| "loss": 0.0206, |
| "step": 25830 |
| }, |
| { |
| "epoch": 24.774688398849474, |
| "grad_norm": 0.2548944056034088, |
| "learning_rate": 1.048292366719883e-08, |
| "loss": 0.0238, |
| "step": 25840 |
| }, |
| { |
| "epoch": 24.784276126558005, |
| "grad_norm": 0.14747904241085052, |
| "learning_rate": 9.221177728108154e-09, |
| "loss": 0.0218, |
| "step": 25850 |
| }, |
| { |
| "epoch": 24.79386385426654, |
| "grad_norm": 0.2064131796360016, |
| "learning_rate": 8.040303323414433e-09, |
| "loss": 0.0275, |
| "step": 25860 |
| }, |
| { |
| "epoch": 24.80345158197507, |
| "grad_norm": 0.1762009561061859, |
| "learning_rate": 6.940302363445117e-09, |
| "loss": 0.0183, |
| "step": 25870 |
| }, |
| { |
| "epoch": 24.813039309683607, |
| "grad_norm": 0.36469346284866333, |
| "learning_rate": 5.9211766277045276e-09, |
| "loss": 0.0226, |
| "step": 25880 |
| }, |
| { |
| "epoch": 24.822627037392138, |
| "grad_norm": 0.23766785860061646, |
| "learning_rate": 4.982927764862755e-09, |
| "loss": 0.0225, |
| "step": 25890 |
| }, |
| { |
| "epoch": 24.83221476510067, |
| "grad_norm": 0.277342826128006, |
| "learning_rate": 4.125557292750104e-09, |
| "loss": 0.0245, |
| "step": 25900 |
| }, |
| { |
| "epoch": 24.841802492809204, |
| "grad_norm": 0.2073160707950592, |
| "learning_rate": 3.349066598362649e-09, |
| "loss": 0.0213, |
| "step": 25910 |
| }, |
| { |
| "epoch": 24.851390220517736, |
| "grad_norm": 0.24508048593997955, |
| "learning_rate": 2.6534569378455776e-09, |
| "loss": 0.0217, |
| "step": 25920 |
| }, |
| { |
| "epoch": 24.86097794822627, |
| "grad_norm": 0.11171819269657135, |
| "learning_rate": 2.0387294365209475e-09, |
| "loss": 0.0213, |
| "step": 25930 |
| }, |
| { |
| "epoch": 24.870565675934802, |
| "grad_norm": 0.24452242255210876, |
| "learning_rate": 1.5048850888377265e-09, |
| "loss": 0.0172, |
| "step": 25940 |
| }, |
| { |
| "epoch": 24.880153403643337, |
| "grad_norm": 0.2535116374492645, |
| "learning_rate": 1.0519247584106495e-09, |
| "loss": 0.0252, |
| "step": 25950 |
| }, |
| { |
| "epoch": 24.88974113135187, |
| "grad_norm": 0.2804677486419678, |
| "learning_rate": 6.798491780202199e-10, |
| "loss": 0.0258, |
| "step": 25960 |
| }, |
| { |
| "epoch": 24.899328859060404, |
| "grad_norm": 0.21948733925819397, |
| "learning_rate": 3.8865894956829905e-10, |
| "loss": 0.0209, |
| "step": 25970 |
| }, |
| { |
| "epoch": 24.908916586768935, |
| "grad_norm": 0.21812966465950012, |
| "learning_rate": 1.7835454413361875e-10, |
| "loss": 0.025, |
| "step": 25980 |
| }, |
| { |
| "epoch": 24.91850431447747, |
| "grad_norm": 0.14540976285934448, |
| "learning_rate": 4.893630192737142e-11, |
| "loss": 0.0231, |
| "step": 25990 |
| }, |
| { |
| "epoch": 24.928092042186, |
| "grad_norm": 0.1897832155227661, |
| "learning_rate": 4.0443231541509307e-13, |
| "loss": 0.025, |
| "step": 26000 |
| }, |
| { |
| "epoch": 24.928092042186, |
| "step": 26000, |
| "total_flos": 0.0, |
| "train_loss": 0.037590215687568374, |
| "train_runtime": 11164.0416, |
| "train_samples_per_second": 74.525, |
| "train_steps_per_second": 2.329 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 26000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 25, |
| "save_steps": 20000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|