{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 42850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023337222870478414, "grad_norm": 3.321801435363931, "learning_rate": 2.1003500583430577e-08, "loss": 0.8511, "step": 10 }, { "epoch": 0.0004667444574095683, "grad_norm": 2.9125161289595303, "learning_rate": 4.434072345390899e-08, "loss": 0.8588, "step": 20 }, { "epoch": 0.0007001166861143524, "grad_norm": 2.805127594003184, "learning_rate": 6.767794632438741e-08, "loss": 0.8103, "step": 30 }, { "epoch": 0.0009334889148191366, "grad_norm": 2.8756970899691767, "learning_rate": 9.101516919486582e-08, "loss": 0.8416, "step": 40 }, { "epoch": 0.0011668611435239206, "grad_norm": 3.4541721605243354, "learning_rate": 1.1435239206534423e-07, "loss": 0.856, "step": 50 }, { "epoch": 0.0014002333722287048, "grad_norm": 3.2939503802130363, "learning_rate": 1.3768961493582263e-07, "loss": 0.8061, "step": 60 }, { "epoch": 0.001633605600933489, "grad_norm": 2.9236394304448377, "learning_rate": 1.6102683780630106e-07, "loss": 0.7943, "step": 70 }, { "epoch": 0.0018669778296382731, "grad_norm": 2.9906833334761087, "learning_rate": 1.8436406067677946e-07, "loss": 0.8162, "step": 80 }, { "epoch": 0.0021003500583430573, "grad_norm": 2.5760454485221707, "learning_rate": 2.077012835472579e-07, "loss": 0.7965, "step": 90 }, { "epoch": 0.002333722287047841, "grad_norm": 2.5395038096001166, "learning_rate": 2.310385064177363e-07, "loss": 0.8109, "step": 100 }, { "epoch": 0.0025670945157526253, "grad_norm": 1.9750310942787972, "learning_rate": 2.543757292882147e-07, "loss": 0.7237, "step": 110 }, { "epoch": 0.0028004667444574095, "grad_norm": 2.0228661266470502, "learning_rate": 2.7771295215869314e-07, "loss": 0.7664, "step": 120 }, { "epoch": 0.0030338389731621937, "grad_norm": 1.9528483162435049, "learning_rate": 3.0105017502917156e-07, "loss": 0.7581, "step": 130 }, { "epoch": 0.003267211201866978, "grad_norm": 1.6951183927875757, "learning_rate": 3.2438739789965e-07, "loss": 0.7604, "step": 140 }, { "epoch": 0.003500583430571762, "grad_norm": 2.259458440949255, "learning_rate": 3.477246207701284e-07, "loss": 0.7693, "step": 150 }, { "epoch": 0.0037339556592765463, "grad_norm": 2.0410024669057565, "learning_rate": 3.710618436406068e-07, "loss": 0.7485, "step": 160 }, { "epoch": 0.0039673278879813305, "grad_norm": 1.9002610560430193, "learning_rate": 3.943990665110852e-07, "loss": 0.6849, "step": 170 }, { "epoch": 0.004200700116686115, "grad_norm": 1.7975323272625428, "learning_rate": 4.1773628938156364e-07, "loss": 0.7217, "step": 180 }, { "epoch": 0.004434072345390899, "grad_norm": 1.6955199305852655, "learning_rate": 4.41073512252042e-07, "loss": 0.6825, "step": 190 }, { "epoch": 0.004667444574095682, "grad_norm": 1.9873536967393053, "learning_rate": 4.6441073512252044e-07, "loss": 0.6744, "step": 200 }, { "epoch": 0.004900816802800466, "grad_norm": 1.9087364899347692, "learning_rate": 4.877479579929989e-07, "loss": 0.68, "step": 210 }, { "epoch": 0.005134189031505251, "grad_norm": 1.6952743545807585, "learning_rate": 5.110851808634773e-07, "loss": 0.6746, "step": 220 }, { "epoch": 0.005367561260210035, "grad_norm": 1.7766412302500227, "learning_rate": 5.344224037339557e-07, "loss": 0.6576, "step": 230 }, { "epoch": 0.005600933488914819, "grad_norm": 1.8605118160596283, "learning_rate": 5.577596266044341e-07, "loss": 0.6685, "step": 240 }, { "epoch": 0.005834305717619603, "grad_norm": 1.8820649126463678, "learning_rate": 5.810968494749126e-07, "loss": 0.6302, "step": 250 }, { "epoch": 0.006067677946324387, "grad_norm": 2.097610308668876, "learning_rate": 6.044340723453909e-07, "loss": 0.6436, "step": 260 }, { "epoch": 0.006301050175029172, "grad_norm": 1.6066186823106796, "learning_rate": 6.277712952158693e-07, "loss": 0.6617, "step": 270 }, { "epoch": 0.006534422403733956, "grad_norm": 1.9167383037723027, "learning_rate": 6.511085180863477e-07, "loss": 0.6459, "step": 280 }, { "epoch": 0.00676779463243874, "grad_norm": 1.8324061061650232, "learning_rate": 6.744457409568262e-07, "loss": 0.6706, "step": 290 }, { "epoch": 0.007001166861143524, "grad_norm": 2.125088908585811, "learning_rate": 6.977829638273046e-07, "loss": 0.6521, "step": 300 }, { "epoch": 0.007234539089848308, "grad_norm": 2.0612782403544094, "learning_rate": 7.211201866977831e-07, "loss": 0.6105, "step": 310 }, { "epoch": 0.007467911318553093, "grad_norm": 1.8142854691366832, "learning_rate": 7.444574095682615e-07, "loss": 0.6286, "step": 320 }, { "epoch": 0.007701283547257876, "grad_norm": 1.7674263202253475, "learning_rate": 7.677946324387399e-07, "loss": 0.6183, "step": 330 }, { "epoch": 0.007934655775962661, "grad_norm": 1.680404019679175, "learning_rate": 7.911318553092182e-07, "loss": 0.608, "step": 340 }, { "epoch": 0.008168028004667444, "grad_norm": 1.8798054808646223, "learning_rate": 8.144690781796967e-07, "loss": 0.6216, "step": 350 }, { "epoch": 0.00840140023337223, "grad_norm": 2.026508928755662, "learning_rate": 8.378063010501751e-07, "loss": 0.6429, "step": 360 }, { "epoch": 0.008634772462077013, "grad_norm": 1.9697996955189212, "learning_rate": 8.611435239206535e-07, "loss": 0.6415, "step": 370 }, { "epoch": 0.008868144690781798, "grad_norm": 1.6851109550054488, "learning_rate": 8.844807467911319e-07, "loss": 0.615, "step": 380 }, { "epoch": 0.009101516919486581, "grad_norm": 1.6671548488402486, "learning_rate": 9.078179696616103e-07, "loss": 0.6092, "step": 390 }, { "epoch": 0.009334889148191364, "grad_norm": 1.8582561616920208, "learning_rate": 9.311551925320887e-07, "loss": 0.616, "step": 400 }, { "epoch": 0.00956826137689615, "grad_norm": 1.6538284030217598, "learning_rate": 9.54492415402567e-07, "loss": 0.5764, "step": 410 }, { "epoch": 0.009801633605600933, "grad_norm": 2.6548995557180524, "learning_rate": 9.778296382730454e-07, "loss": 0.6573, "step": 420 }, { "epoch": 0.010035005834305718, "grad_norm": 2.6820659666637123, "learning_rate": 1.001166861143524e-06, "loss": 0.6102, "step": 430 }, { "epoch": 0.010268378063010501, "grad_norm": 1.7705327895825858, "learning_rate": 1.0245040840140024e-06, "loss": 0.6165, "step": 440 }, { "epoch": 0.010501750291715286, "grad_norm": 2.0568940113098577, "learning_rate": 1.047841306884481e-06, "loss": 0.6136, "step": 450 }, { "epoch": 0.01073512252042007, "grad_norm": 1.6134373514920497, "learning_rate": 1.0711785297549594e-06, "loss": 0.5832, "step": 460 }, { "epoch": 0.010968494749124855, "grad_norm": 1.8012665569635773, "learning_rate": 1.0945157526254377e-06, "loss": 0.6317, "step": 470 }, { "epoch": 0.011201866977829638, "grad_norm": 1.8433403528766061, "learning_rate": 1.117852975495916e-06, "loss": 0.5523, "step": 480 }, { "epoch": 0.011435239206534423, "grad_norm": 1.73372055447057, "learning_rate": 1.1411901983663945e-06, "loss": 0.6375, "step": 490 }, { "epoch": 0.011668611435239206, "grad_norm": 2.095570185554152, "learning_rate": 1.1645274212368728e-06, "loss": 0.6428, "step": 500 }, { "epoch": 0.011901983663943991, "grad_norm": 1.7033518144680941, "learning_rate": 1.1878646441073512e-06, "loss": 0.5897, "step": 510 }, { "epoch": 0.012135355892648775, "grad_norm": 1.8982883747473271, "learning_rate": 1.2112018669778298e-06, "loss": 0.5897, "step": 520 }, { "epoch": 0.012368728121353558, "grad_norm": 2.0062143453514985, "learning_rate": 1.2345390898483082e-06, "loss": 0.5925, "step": 530 }, { "epoch": 0.012602100350058343, "grad_norm": 1.8913874278560163, "learning_rate": 1.2578763127187865e-06, "loss": 0.5996, "step": 540 }, { "epoch": 0.012835472578763127, "grad_norm": 1.7143523359243804, "learning_rate": 1.281213535589265e-06, "loss": 0.5986, "step": 550 }, { "epoch": 0.013068844807467912, "grad_norm": 1.980793465659204, "learning_rate": 1.3045507584597433e-06, "loss": 0.6382, "step": 560 }, { "epoch": 0.013302217036172695, "grad_norm": 1.8857678520880428, "learning_rate": 1.3278879813302217e-06, "loss": 0.6113, "step": 570 }, { "epoch": 0.01353558926487748, "grad_norm": 2.065814086574085, "learning_rate": 1.3512252042007e-06, "loss": 0.608, "step": 580 }, { "epoch": 0.013768961493582263, "grad_norm": 1.8667274550942141, "learning_rate": 1.3745624270711786e-06, "loss": 0.6259, "step": 590 }, { "epoch": 0.014002333722287048, "grad_norm": 1.9790736142246723, "learning_rate": 1.397899649941657e-06, "loss": 0.5615, "step": 600 }, { "epoch": 0.014235705950991832, "grad_norm": 4.720796627587461, "learning_rate": 1.4212368728121354e-06, "loss": 0.5612, "step": 610 }, { "epoch": 0.014469078179696617, "grad_norm": 1.8627002348188935, "learning_rate": 1.4445740956826137e-06, "loss": 0.594, "step": 620 }, { "epoch": 0.0147024504084014, "grad_norm": 1.8973811527458118, "learning_rate": 1.4679113185530925e-06, "loss": 0.5845, "step": 630 }, { "epoch": 0.014935822637106185, "grad_norm": 1.754255132507506, "learning_rate": 1.491248541423571e-06, "loss": 0.6289, "step": 640 }, { "epoch": 0.015169194865810968, "grad_norm": 1.880599359627649, "learning_rate": 1.5145857642940493e-06, "loss": 0.5907, "step": 650 }, { "epoch": 0.015402567094515752, "grad_norm": 1.9206939705418469, "learning_rate": 1.5379229871645277e-06, "loss": 0.577, "step": 660 }, { "epoch": 0.015635939323220535, "grad_norm": 2.2961400415623996, "learning_rate": 1.561260210035006e-06, "loss": 0.5643, "step": 670 }, { "epoch": 0.015869311551925322, "grad_norm": 1.8017489501922743, "learning_rate": 1.5845974329054844e-06, "loss": 0.5861, "step": 680 }, { "epoch": 0.016102683780630105, "grad_norm": 1.8933442842873085, "learning_rate": 1.6079346557759628e-06, "loss": 0.6119, "step": 690 }, { "epoch": 0.01633605600933489, "grad_norm": 2.0810058358769274, "learning_rate": 1.6312718786464414e-06, "loss": 0.6047, "step": 700 }, { "epoch": 0.016569428238039672, "grad_norm": 2.0177784723222487, "learning_rate": 1.6546091015169197e-06, "loss": 0.5791, "step": 710 }, { "epoch": 0.01680280046674446, "grad_norm": 2.040241367031874, "learning_rate": 1.6779463243873981e-06, "loss": 0.5662, "step": 720 }, { "epoch": 0.017036172695449242, "grad_norm": 1.920718075277611, "learning_rate": 1.7012835472578765e-06, "loss": 0.5972, "step": 730 }, { "epoch": 0.017269544924154025, "grad_norm": 1.8933068656599272, "learning_rate": 1.7246207701283549e-06, "loss": 0.578, "step": 740 }, { "epoch": 0.01750291715285881, "grad_norm": 2.106199723473422, "learning_rate": 1.7479579929988332e-06, "loss": 0.5642, "step": 750 }, { "epoch": 0.017736289381563596, "grad_norm": 1.6539192001544802, "learning_rate": 1.7712952158693116e-06, "loss": 0.5678, "step": 760 }, { "epoch": 0.01796966161026838, "grad_norm": 1.8093312773788475, "learning_rate": 1.7946324387397902e-06, "loss": 0.5692, "step": 770 }, { "epoch": 0.018203033838973162, "grad_norm": 1.7900302468202403, "learning_rate": 1.8179696616102686e-06, "loss": 0.5881, "step": 780 }, { "epoch": 0.018436406067677946, "grad_norm": 2.1104284549546968, "learning_rate": 1.841306884480747e-06, "loss": 0.5698, "step": 790 }, { "epoch": 0.01866977829638273, "grad_norm": 1.891109155883477, "learning_rate": 1.8646441073512253e-06, "loss": 0.5771, "step": 800 }, { "epoch": 0.018903150525087516, "grad_norm": 1.7871102686139289, "learning_rate": 1.8879813302217037e-06, "loss": 0.5697, "step": 810 }, { "epoch": 0.0191365227537923, "grad_norm": 1.8385854601807103, "learning_rate": 1.9113185530921823e-06, "loss": 0.5621, "step": 820 }, { "epoch": 0.019369894982497082, "grad_norm": 2.227099799128628, "learning_rate": 1.9346557759626606e-06, "loss": 0.5623, "step": 830 }, { "epoch": 0.019603267211201866, "grad_norm": 1.9937038765974213, "learning_rate": 1.957992998833139e-06, "loss": 0.5873, "step": 840 }, { "epoch": 0.019836639439906652, "grad_norm": 2.051863545709385, "learning_rate": 1.9813302217036174e-06, "loss": 0.582, "step": 850 }, { "epoch": 0.020070011668611436, "grad_norm": 1.7638248097372669, "learning_rate": 2.0046674445740958e-06, "loss": 0.5577, "step": 860 }, { "epoch": 0.02030338389731622, "grad_norm": 1.8976926467704711, "learning_rate": 2.028004667444574e-06, "loss": 0.5856, "step": 870 }, { "epoch": 0.020536756126021002, "grad_norm": 1.9850945415912673, "learning_rate": 2.0513418903150525e-06, "loss": 0.5703, "step": 880 }, { "epoch": 0.02077012835472579, "grad_norm": 1.720462570097855, "learning_rate": 2.0746791131855313e-06, "loss": 0.5624, "step": 890 }, { "epoch": 0.021003500583430573, "grad_norm": 1.7936924607054296, "learning_rate": 2.0980163360560097e-06, "loss": 0.5838, "step": 900 }, { "epoch": 0.021236872812135356, "grad_norm": 1.9815768806850997, "learning_rate": 2.121353558926488e-06, "loss": 0.5277, "step": 910 }, { "epoch": 0.02147024504084014, "grad_norm": 2.074211249707595, "learning_rate": 2.1446907817969664e-06, "loss": 0.5735, "step": 920 }, { "epoch": 0.021703617269544923, "grad_norm": 2.0228080928206635, "learning_rate": 2.1680280046674448e-06, "loss": 0.549, "step": 930 }, { "epoch": 0.02193698949824971, "grad_norm": 1.8468520451025492, "learning_rate": 2.191365227537923e-06, "loss": 0.5568, "step": 940 }, { "epoch": 0.022170361726954493, "grad_norm": 1.9980166561602624, "learning_rate": 2.2147024504084015e-06, "loss": 0.5969, "step": 950 }, { "epoch": 0.022403733955659276, "grad_norm": 1.822217739852198, "learning_rate": 2.23803967327888e-06, "loss": 0.5696, "step": 960 }, { "epoch": 0.02263710618436406, "grad_norm": 1.5601646307721817, "learning_rate": 2.2613768961493583e-06, "loss": 0.5419, "step": 970 }, { "epoch": 0.022870478413068846, "grad_norm": 2.3351225254876735, "learning_rate": 2.2847141190198367e-06, "loss": 0.5801, "step": 980 }, { "epoch": 0.02310385064177363, "grad_norm": 2.2364963142406253, "learning_rate": 2.3080513418903154e-06, "loss": 0.5837, "step": 990 }, { "epoch": 0.023337222870478413, "grad_norm": 1.904646116188979, "learning_rate": 2.331388564760794e-06, "loss": 0.5751, "step": 1000 }, { "epoch": 0.023570595099183196, "grad_norm": 1.7885038512162663, "learning_rate": 2.354725787631272e-06, "loss": 0.5514, "step": 1010 }, { "epoch": 0.023803967327887983, "grad_norm": 1.9167113035088226, "learning_rate": 2.3780630105017506e-06, "loss": 0.5792, "step": 1020 }, { "epoch": 0.024037339556592766, "grad_norm": 1.7407356599997943, "learning_rate": 2.401400233372229e-06, "loss": 0.5536, "step": 1030 }, { "epoch": 0.02427071178529755, "grad_norm": 1.792088780762227, "learning_rate": 2.4247374562427073e-06, "loss": 0.5384, "step": 1040 }, { "epoch": 0.024504084014002333, "grad_norm": 1.9310176018949525, "learning_rate": 2.4480746791131857e-06, "loss": 0.5787, "step": 1050 }, { "epoch": 0.024737456242707116, "grad_norm": 1.9993843261974953, "learning_rate": 2.471411901983664e-06, "loss": 0.5696, "step": 1060 }, { "epoch": 0.024970828471411903, "grad_norm": 2.006899070634986, "learning_rate": 2.4947491248541424e-06, "loss": 0.5923, "step": 1070 }, { "epoch": 0.025204200700116686, "grad_norm": 1.8911032175296678, "learning_rate": 2.518086347724621e-06, "loss": 0.5598, "step": 1080 }, { "epoch": 0.02543757292882147, "grad_norm": 1.9256925076819595, "learning_rate": 2.5414235705950996e-06, "loss": 0.5611, "step": 1090 }, { "epoch": 0.025670945157526253, "grad_norm": 2.10689573449088, "learning_rate": 2.5647607934655776e-06, "loss": 0.5664, "step": 1100 }, { "epoch": 0.02590431738623104, "grad_norm": 2.1149500938294508, "learning_rate": 2.5880980163360563e-06, "loss": 0.5807, "step": 1110 }, { "epoch": 0.026137689614935823, "grad_norm": 2.5583473773284595, "learning_rate": 2.6114352392065347e-06, "loss": 0.5812, "step": 1120 }, { "epoch": 0.026371061843640606, "grad_norm": 1.7908618579468814, "learning_rate": 2.634772462077013e-06, "loss": 0.5916, "step": 1130 }, { "epoch": 0.02660443407234539, "grad_norm": 1.8441088395412968, "learning_rate": 2.6581096849474915e-06, "loss": 0.5351, "step": 1140 }, { "epoch": 0.026837806301050177, "grad_norm": 1.836472963456457, "learning_rate": 2.68144690781797e-06, "loss": 0.5673, "step": 1150 }, { "epoch": 0.02707117852975496, "grad_norm": 1.9685534634627075, "learning_rate": 2.7047841306884482e-06, "loss": 0.5528, "step": 1160 }, { "epoch": 0.027304550758459743, "grad_norm": 2.7783828194467004, "learning_rate": 2.728121353558927e-06, "loss": 0.5697, "step": 1170 }, { "epoch": 0.027537922987164527, "grad_norm": 1.9791047295476087, "learning_rate": 2.751458576429405e-06, "loss": 0.5715, "step": 1180 }, { "epoch": 0.02777129521586931, "grad_norm": 2.1130831289654775, "learning_rate": 2.7747957992998838e-06, "loss": 0.5618, "step": 1190 }, { "epoch": 0.028004667444574097, "grad_norm": 2.144371182273601, "learning_rate": 2.7981330221703617e-06, "loss": 0.5605, "step": 1200 }, { "epoch": 0.02823803967327888, "grad_norm": 1.738369889858361, "learning_rate": 2.8214702450408405e-06, "loss": 0.5321, "step": 1210 }, { "epoch": 0.028471411901983663, "grad_norm": 1.8581865305517877, "learning_rate": 2.8448074679113185e-06, "loss": 0.5605, "step": 1220 }, { "epoch": 0.028704784130688447, "grad_norm": 1.8114755760221781, "learning_rate": 2.8681446907817972e-06, "loss": 0.5454, "step": 1230 }, { "epoch": 0.028938156359393234, "grad_norm": 2.114694641811852, "learning_rate": 2.891481913652275e-06, "loss": 0.5625, "step": 1240 }, { "epoch": 0.029171528588098017, "grad_norm": 1.766959678550857, "learning_rate": 2.914819136522754e-06, "loss": 0.5398, "step": 1250 }, { "epoch": 0.0294049008168028, "grad_norm": 1.6412244352230398, "learning_rate": 2.9381563593932324e-06, "loss": 0.5709, "step": 1260 }, { "epoch": 0.029638273045507584, "grad_norm": 1.9473118975083874, "learning_rate": 2.9614935822637107e-06, "loss": 0.5799, "step": 1270 }, { "epoch": 0.02987164527421237, "grad_norm": 2.065004143378876, "learning_rate": 2.9848308051341895e-06, "loss": 0.5472, "step": 1280 }, { "epoch": 0.030105017502917154, "grad_norm": 2.193878177588881, "learning_rate": 3.0081680280046675e-06, "loss": 0.5402, "step": 1290 }, { "epoch": 0.030338389731621937, "grad_norm": 1.9317374233021736, "learning_rate": 3.0315052508751463e-06, "loss": 0.5912, "step": 1300 }, { "epoch": 0.03057176196032672, "grad_norm": 1.9538093686352536, "learning_rate": 3.0548424737456247e-06, "loss": 0.5449, "step": 1310 }, { "epoch": 0.030805134189031504, "grad_norm": 1.467262186602219, "learning_rate": 3.078179696616103e-06, "loss": 0.5491, "step": 1320 }, { "epoch": 0.03103850641773629, "grad_norm": 1.9711680223573005, "learning_rate": 3.1015169194865814e-06, "loss": 0.5598, "step": 1330 }, { "epoch": 0.03127187864644107, "grad_norm": 1.9395398315827532, "learning_rate": 3.1248541423570598e-06, "loss": 0.5533, "step": 1340 }, { "epoch": 0.03150525087514586, "grad_norm": 1.6598419172018952, "learning_rate": 3.148191365227538e-06, "loss": 0.5652, "step": 1350 }, { "epoch": 0.031738623103850644, "grad_norm": 1.9401772747016588, "learning_rate": 3.171528588098017e-06, "loss": 0.5662, "step": 1360 }, { "epoch": 0.031971995332555424, "grad_norm": 2.1595104361836155, "learning_rate": 3.194865810968495e-06, "loss": 0.5831, "step": 1370 }, { "epoch": 0.03220536756126021, "grad_norm": 1.7968340939424348, "learning_rate": 3.2182030338389737e-06, "loss": 0.5606, "step": 1380 }, { "epoch": 0.032438739789965, "grad_norm": 2.1451710307337697, "learning_rate": 3.2415402567094516e-06, "loss": 0.5421, "step": 1390 }, { "epoch": 0.03267211201866978, "grad_norm": 2.163770386891864, "learning_rate": 3.2648774795799304e-06, "loss": 0.5624, "step": 1400 }, { "epoch": 0.032905484247374564, "grad_norm": 2.496996876722346, "learning_rate": 3.2882147024504084e-06, "loss": 0.5422, "step": 1410 }, { "epoch": 0.033138856476079344, "grad_norm": 4.792922769679782, "learning_rate": 3.311551925320887e-06, "loss": 0.5881, "step": 1420 }, { "epoch": 0.03337222870478413, "grad_norm": 1.7727729229917346, "learning_rate": 3.334889148191365e-06, "loss": 0.5397, "step": 1430 }, { "epoch": 0.03360560093348892, "grad_norm": 2.042607036477401, "learning_rate": 3.358226371061844e-06, "loss": 0.5724, "step": 1440 }, { "epoch": 0.0338389731621937, "grad_norm": 1.981738709472462, "learning_rate": 3.3815635939323223e-06, "loss": 0.5569, "step": 1450 }, { "epoch": 0.034072345390898484, "grad_norm": 1.923431067097263, "learning_rate": 3.4049008168028007e-06, "loss": 0.5785, "step": 1460 }, { "epoch": 0.034305717619603264, "grad_norm": 2.804600296721526, "learning_rate": 3.428238039673279e-06, "loss": 0.5306, "step": 1470 }, { "epoch": 0.03453908984830805, "grad_norm": 1.7156660595998952, "learning_rate": 3.451575262543758e-06, "loss": 0.5552, "step": 1480 }, { "epoch": 0.03477246207701284, "grad_norm": 1.9413610579549745, "learning_rate": 3.474912485414236e-06, "loss": 0.5671, "step": 1490 }, { "epoch": 0.03500583430571762, "grad_norm": 2.1647708783161166, "learning_rate": 3.4982497082847146e-06, "loss": 0.5565, "step": 1500 }, { "epoch": 0.035239206534422404, "grad_norm": 3.459043504880465, "learning_rate": 3.5215869311551925e-06, "loss": 0.5216, "step": 1510 }, { "epoch": 0.03547257876312719, "grad_norm": 1.9258450411836772, "learning_rate": 3.5449241540256713e-06, "loss": 0.5548, "step": 1520 }, { "epoch": 0.03570595099183197, "grad_norm": 2.0703689494493105, "learning_rate": 3.5682613768961493e-06, "loss": 0.5688, "step": 1530 }, { "epoch": 0.03593932322053676, "grad_norm": 2.386227127128689, "learning_rate": 3.591598599766628e-06, "loss": 0.5598, "step": 1540 }, { "epoch": 0.03617269544924154, "grad_norm": 1.8528030914220466, "learning_rate": 3.614935822637107e-06, "loss": 0.5388, "step": 1550 }, { "epoch": 0.036406067677946324, "grad_norm": 2.1064885722783697, "learning_rate": 3.638273045507585e-06, "loss": 0.5655, "step": 1560 }, { "epoch": 0.03663943990665111, "grad_norm": 1.9129111740917035, "learning_rate": 3.6616102683780636e-06, "loss": 0.5447, "step": 1570 }, { "epoch": 0.03687281213535589, "grad_norm": 1.9126552187838797, "learning_rate": 3.6849474912485416e-06, "loss": 0.5378, "step": 1580 }, { "epoch": 0.03710618436406068, "grad_norm": 1.6639647741905812, "learning_rate": 3.7082847141190204e-06, "loss": 0.5616, "step": 1590 }, { "epoch": 0.03733955659276546, "grad_norm": 1.872931204261857, "learning_rate": 3.7316219369894983e-06, "loss": 0.568, "step": 1600 }, { "epoch": 0.037572928821470244, "grad_norm": 1.7415412819266731, "learning_rate": 3.754959159859977e-06, "loss": 0.5493, "step": 1610 }, { "epoch": 0.03780630105017503, "grad_norm": 1.7438362186336558, "learning_rate": 3.7782963827304555e-06, "loss": 0.5662, "step": 1620 }, { "epoch": 0.03803967327887981, "grad_norm": 1.939197612250081, "learning_rate": 3.801633605600934e-06, "loss": 0.5611, "step": 1630 }, { "epoch": 0.0382730455075846, "grad_norm": 2.352051416025329, "learning_rate": 3.824970828471412e-06, "loss": 0.563, "step": 1640 }, { "epoch": 0.038506417736289385, "grad_norm": 2.0548535876619436, "learning_rate": 3.848308051341891e-06, "loss": 0.5449, "step": 1650 }, { "epoch": 0.038739789964994165, "grad_norm": 2.112847671012294, "learning_rate": 3.871645274212369e-06, "loss": 0.5341, "step": 1660 }, { "epoch": 0.03897316219369895, "grad_norm": 2.1901649756504495, "learning_rate": 3.894982497082848e-06, "loss": 0.5512, "step": 1670 }, { "epoch": 0.03920653442240373, "grad_norm": 1.921006610767703, "learning_rate": 3.918319719953326e-06, "loss": 0.5632, "step": 1680 }, { "epoch": 0.03943990665110852, "grad_norm": 1.663086256333828, "learning_rate": 3.9416569428238045e-06, "loss": 0.5322, "step": 1690 }, { "epoch": 0.039673278879813305, "grad_norm": 1.9388484833848996, "learning_rate": 3.9649941656942825e-06, "loss": 0.555, "step": 1700 }, { "epoch": 0.039906651108518085, "grad_norm": 1.934118046625819, "learning_rate": 3.988331388564761e-06, "loss": 0.5497, "step": 1710 }, { "epoch": 0.04014002333722287, "grad_norm": 2.9961156679687213, "learning_rate": 4.011668611435239e-06, "loss": 0.5264, "step": 1720 }, { "epoch": 0.04037339556592765, "grad_norm": 2.039775764033007, "learning_rate": 4.035005834305718e-06, "loss": 0.5366, "step": 1730 }, { "epoch": 0.04060676779463244, "grad_norm": 1.8560997522221199, "learning_rate": 4.058343057176196e-06, "loss": 0.5405, "step": 1740 }, { "epoch": 0.040840140023337225, "grad_norm": 2.0608039522081656, "learning_rate": 4.081680280046675e-06, "loss": 0.5619, "step": 1750 }, { "epoch": 0.041073512252042005, "grad_norm": 1.7825645295097297, "learning_rate": 4.105017502917153e-06, "loss": 0.568, "step": 1760 }, { "epoch": 0.04130688448074679, "grad_norm": 1.9709996221010921, "learning_rate": 4.1283547257876315e-06, "loss": 0.5582, "step": 1770 }, { "epoch": 0.04154025670945158, "grad_norm": 1.944253526593534, "learning_rate": 4.1516919486581095e-06, "loss": 0.5409, "step": 1780 }, { "epoch": 0.04177362893815636, "grad_norm": 2.6477782689422966, "learning_rate": 4.175029171528588e-06, "loss": 0.5672, "step": 1790 }, { "epoch": 0.042007001166861145, "grad_norm": 1.896567242438071, "learning_rate": 4.198366394399067e-06, "loss": 0.534, "step": 1800 }, { "epoch": 0.042240373395565925, "grad_norm": 1.8395082761974944, "learning_rate": 4.221703617269545e-06, "loss": 0.5368, "step": 1810 }, { "epoch": 0.04247374562427071, "grad_norm": 1.8833888088411608, "learning_rate": 4.245040840140024e-06, "loss": 0.5482, "step": 1820 }, { "epoch": 0.0427071178529755, "grad_norm": 1.7780872770672023, "learning_rate": 4.268378063010502e-06, "loss": 0.5593, "step": 1830 }, { "epoch": 0.04294049008168028, "grad_norm": 1.7232243063644352, "learning_rate": 4.2917152858809805e-06, "loss": 0.5453, "step": 1840 }, { "epoch": 0.043173862310385065, "grad_norm": 2.0415533724131687, "learning_rate": 4.3150525087514585e-06, "loss": 0.5699, "step": 1850 }, { "epoch": 0.043407234539089845, "grad_norm": 1.976674507557579, "learning_rate": 4.338389731621937e-06, "loss": 0.5511, "step": 1860 }, { "epoch": 0.04364060676779463, "grad_norm": 1.7238357555544035, "learning_rate": 4.361726954492416e-06, "loss": 0.5502, "step": 1870 }, { "epoch": 0.04387397899649942, "grad_norm": 1.8791334548116452, "learning_rate": 4.385064177362894e-06, "loss": 0.5402, "step": 1880 }, { "epoch": 0.0441073512252042, "grad_norm": 2.158367392434674, "learning_rate": 4.408401400233373e-06, "loss": 0.5524, "step": 1890 }, { "epoch": 0.044340723453908985, "grad_norm": 1.940969217480049, "learning_rate": 4.431738623103851e-06, "loss": 0.5499, "step": 1900 }, { "epoch": 0.04457409568261377, "grad_norm": 1.6403998555358723, "learning_rate": 4.4550758459743296e-06, "loss": 0.5178, "step": 1910 }, { "epoch": 0.04480746791131855, "grad_norm": 1.7440791098227604, "learning_rate": 4.478413068844808e-06, "loss": 0.5448, "step": 1920 }, { "epoch": 0.04504084014002334, "grad_norm": 1.9162054652980756, "learning_rate": 4.501750291715286e-06, "loss": 0.5532, "step": 1930 }, { "epoch": 0.04527421236872812, "grad_norm": 1.9243007956264808, "learning_rate": 4.525087514585765e-06, "loss": 0.5579, "step": 1940 }, { "epoch": 0.045507584597432905, "grad_norm": 2.043572471755938, "learning_rate": 4.548424737456243e-06, "loss": 0.5353, "step": 1950 }, { "epoch": 0.04574095682613769, "grad_norm": 2.121485980310702, "learning_rate": 4.571761960326722e-06, "loss": 0.5424, "step": 1960 }, { "epoch": 0.04597432905484247, "grad_norm": 2.50801515724677, "learning_rate": 4.5950991831972e-06, "loss": 0.5682, "step": 1970 }, { "epoch": 0.04620770128354726, "grad_norm": 2.038946602643619, "learning_rate": 4.618436406067679e-06, "loss": 0.5154, "step": 1980 }, { "epoch": 0.04644107351225204, "grad_norm": 1.8836281296860846, "learning_rate": 4.6417736289381566e-06, "loss": 0.5264, "step": 1990 }, { "epoch": 0.046674445740956826, "grad_norm": 1.9072803469285051, "learning_rate": 4.665110851808635e-06, "loss": 0.5388, "step": 2000 }, { "epoch": 0.04690781796966161, "grad_norm": 1.9576252258413556, "learning_rate": 4.688448074679113e-06, "loss": 0.5362, "step": 2010 }, { "epoch": 0.04714119019836639, "grad_norm": 2.1047179580986155, "learning_rate": 4.711785297549592e-06, "loss": 0.5396, "step": 2020 }, { "epoch": 0.04737456242707118, "grad_norm": 1.8140927614765971, "learning_rate": 4.73512252042007e-06, "loss": 0.5231, "step": 2030 }, { "epoch": 0.047607934655775966, "grad_norm": 4.300119245846117, "learning_rate": 4.758459743290549e-06, "loss": 0.5704, "step": 2040 }, { "epoch": 0.047841306884480746, "grad_norm": 1.8624375472176113, "learning_rate": 4.781796966161027e-06, "loss": 0.5254, "step": 2050 }, { "epoch": 0.04807467911318553, "grad_norm": 1.9994899680849239, "learning_rate": 4.805134189031506e-06, "loss": 0.5488, "step": 2060 }, { "epoch": 0.04830805134189031, "grad_norm": 2.331023195276847, "learning_rate": 4.828471411901984e-06, "loss": 0.5565, "step": 2070 }, { "epoch": 0.0485414235705951, "grad_norm": 1.9138638470781237, "learning_rate": 4.851808634772462e-06, "loss": 0.5851, "step": 2080 }, { "epoch": 0.048774795799299886, "grad_norm": 2.3733605680073486, "learning_rate": 4.875145857642941e-06, "loss": 0.5523, "step": 2090 }, { "epoch": 0.049008168028004666, "grad_norm": 2.0007325901846995, "learning_rate": 4.898483080513419e-06, "loss": 0.5491, "step": 2100 }, { "epoch": 0.04924154025670945, "grad_norm": 2.09174346216162, "learning_rate": 4.921820303383898e-06, "loss": 0.5449, "step": 2110 }, { "epoch": 0.04947491248541423, "grad_norm": 1.8043347903620552, "learning_rate": 4.945157526254376e-06, "loss": 0.57, "step": 2120 }, { "epoch": 0.04970828471411902, "grad_norm": 1.7677378988083652, "learning_rate": 4.968494749124855e-06, "loss": 0.5186, "step": 2130 }, { "epoch": 0.049941656942823806, "grad_norm": 2.528147764578218, "learning_rate": 4.9918319719953326e-06, "loss": 0.5722, "step": 2140 }, { "epoch": 0.050175029171528586, "grad_norm": 1.9363692924212506, "learning_rate": 5.015169194865811e-06, "loss": 0.5464, "step": 2150 }, { "epoch": 0.05040840140023337, "grad_norm": 1.650593999288811, "learning_rate": 5.038506417736289e-06, "loss": 0.5224, "step": 2160 }, { "epoch": 0.05064177362893816, "grad_norm": 1.8365110832523135, "learning_rate": 5.061843640606768e-06, "loss": 0.533, "step": 2170 }, { "epoch": 0.05087514585764294, "grad_norm": 1.622253753641868, "learning_rate": 5.085180863477247e-06, "loss": 0.5098, "step": 2180 }, { "epoch": 0.051108518086347726, "grad_norm": 1.8889768771633546, "learning_rate": 5.108518086347725e-06, "loss": 0.5087, "step": 2190 }, { "epoch": 0.051341890315052506, "grad_norm": 2.0289510187856044, "learning_rate": 5.131855309218204e-06, "loss": 0.5519, "step": 2200 }, { "epoch": 0.05157526254375729, "grad_norm": 1.7508717632959734, "learning_rate": 5.155192532088682e-06, "loss": 0.5065, "step": 2210 }, { "epoch": 0.05180863477246208, "grad_norm": 1.7461422138066987, "learning_rate": 5.17852975495916e-06, "loss": 0.5596, "step": 2220 }, { "epoch": 0.05204200700116686, "grad_norm": 1.7840302902166705, "learning_rate": 5.201866977829638e-06, "loss": 0.5347, "step": 2230 }, { "epoch": 0.052275379229871646, "grad_norm": 1.6911978619424737, "learning_rate": 5.225204200700117e-06, "loss": 0.5052, "step": 2240 }, { "epoch": 0.052508751458576426, "grad_norm": 2.0102493292640418, "learning_rate": 5.248541423570596e-06, "loss": 0.5284, "step": 2250 }, { "epoch": 0.05274212368728121, "grad_norm": 1.7344160488539706, "learning_rate": 5.271878646441074e-06, "loss": 0.5277, "step": 2260 }, { "epoch": 0.052975495915986, "grad_norm": 2.091087075452519, "learning_rate": 5.295215869311552e-06, "loss": 0.5573, "step": 2270 }, { "epoch": 0.05320886814469078, "grad_norm": 2.097761582459626, "learning_rate": 5.318553092182031e-06, "loss": 0.5528, "step": 2280 }, { "epoch": 0.053442240373395566, "grad_norm": 1.9801110710845184, "learning_rate": 5.3418903150525094e-06, "loss": 0.5152, "step": 2290 }, { "epoch": 0.05367561260210035, "grad_norm": 2.0505797293865, "learning_rate": 5.365227537922988e-06, "loss": 0.5732, "step": 2300 }, { "epoch": 0.05390898483080513, "grad_norm": 1.7350774716091946, "learning_rate": 5.388564760793465e-06, "loss": 0.5324, "step": 2310 }, { "epoch": 0.05414235705950992, "grad_norm": 2.043642857540956, "learning_rate": 5.411901983663944e-06, "loss": 0.4891, "step": 2320 }, { "epoch": 0.0543757292882147, "grad_norm": 2.1558637959309794, "learning_rate": 5.435239206534423e-06, "loss": 0.5046, "step": 2330 }, { "epoch": 0.05460910151691949, "grad_norm": 1.981857830494123, "learning_rate": 5.458576429404902e-06, "loss": 0.5141, "step": 2340 }, { "epoch": 0.05484247374562427, "grad_norm": 1.8321896774049558, "learning_rate": 5.4819136522753805e-06, "loss": 0.5218, "step": 2350 }, { "epoch": 0.05507584597432905, "grad_norm": 2.5854696773222905, "learning_rate": 5.505250875145858e-06, "loss": 0.5425, "step": 2360 }, { "epoch": 0.05530921820303384, "grad_norm": 2.150253904774116, "learning_rate": 5.5285880980163364e-06, "loss": 0.5422, "step": 2370 }, { "epoch": 0.05554259043173862, "grad_norm": 1.6961858827467333, "learning_rate": 5.551925320886815e-06, "loss": 0.5352, "step": 2380 }, { "epoch": 0.05577596266044341, "grad_norm": 1.940376447548389, "learning_rate": 5.575262543757294e-06, "loss": 0.5465, "step": 2390 }, { "epoch": 0.056009334889148193, "grad_norm": 1.9905013149023503, "learning_rate": 5.598599766627771e-06, "loss": 0.5486, "step": 2400 }, { "epoch": 0.05624270711785297, "grad_norm": 1.6855306404870227, "learning_rate": 5.62193698949825e-06, "loss": 0.5113, "step": 2410 }, { "epoch": 0.05647607934655776, "grad_norm": 1.8232427719613467, "learning_rate": 5.645274212368729e-06, "loss": 0.5551, "step": 2420 }, { "epoch": 0.05670945157526255, "grad_norm": 1.9953675480467008, "learning_rate": 5.6686114352392075e-06, "loss": 0.5129, "step": 2430 }, { "epoch": 0.05694282380396733, "grad_norm": 1.710689355157264, "learning_rate": 5.691948658109685e-06, "loss": 0.5256, "step": 2440 }, { "epoch": 0.057176196032672114, "grad_norm": 1.9533219704112916, "learning_rate": 5.715285880980163e-06, "loss": 0.5467, "step": 2450 }, { "epoch": 0.05740956826137689, "grad_norm": 1.7350320866928417, "learning_rate": 5.738623103850642e-06, "loss": 0.5198, "step": 2460 }, { "epoch": 0.05764294049008168, "grad_norm": 1.5114979521719736, "learning_rate": 5.761960326721121e-06, "loss": 0.5364, "step": 2470 }, { "epoch": 0.05787631271878647, "grad_norm": 2.0001606698579226, "learning_rate": 5.785297549591599e-06, "loss": 0.5579, "step": 2480 }, { "epoch": 0.05810968494749125, "grad_norm": 1.7938195608383982, "learning_rate": 5.808634772462077e-06, "loss": 0.5364, "step": 2490 }, { "epoch": 0.058343057176196034, "grad_norm": 2.585866660065655, "learning_rate": 5.831971995332556e-06, "loss": 0.5628, "step": 2500 }, { "epoch": 0.058576429404900814, "grad_norm": 1.9815568052355292, "learning_rate": 5.8553092182030345e-06, "loss": 0.5736, "step": 2510 }, { "epoch": 0.0588098016336056, "grad_norm": 1.8708069607346516, "learning_rate": 5.8786464410735124e-06, "loss": 0.5493, "step": 2520 }, { "epoch": 0.05904317386231039, "grad_norm": 2.0357232250373993, "learning_rate": 5.901983663943991e-06, "loss": 0.5491, "step": 2530 }, { "epoch": 0.05927654609101517, "grad_norm": 1.5491179318390405, "learning_rate": 5.925320886814469e-06, "loss": 0.5073, "step": 2540 }, { "epoch": 0.059509918319719954, "grad_norm": 1.9104400208570085, "learning_rate": 5.948658109684948e-06, "loss": 0.5492, "step": 2550 }, { "epoch": 0.05974329054842474, "grad_norm": 1.6374990992823877, "learning_rate": 5.971995332555426e-06, "loss": 0.5327, "step": 2560 }, { "epoch": 0.05997666277712952, "grad_norm": 1.921443221543358, "learning_rate": 5.995332555425905e-06, "loss": 0.5565, "step": 2570 }, { "epoch": 0.06021003500583431, "grad_norm": 1.902503733573911, "learning_rate": 6.0186697782963835e-06, "loss": 0.5607, "step": 2580 }, { "epoch": 0.06044340723453909, "grad_norm": 1.9323899316963362, "learning_rate": 6.0420070011668615e-06, "loss": 0.5401, "step": 2590 }, { "epoch": 0.060676779463243874, "grad_norm": 1.7081300753291058, "learning_rate": 6.06534422403734e-06, "loss": 0.5306, "step": 2600 }, { "epoch": 0.06091015169194866, "grad_norm": 1.8073519828748568, "learning_rate": 6.088681446907818e-06, "loss": 0.5381, "step": 2610 }, { "epoch": 0.06114352392065344, "grad_norm": 2.116740498776085, "learning_rate": 6.112018669778297e-06, "loss": 0.5421, "step": 2620 }, { "epoch": 0.06137689614935823, "grad_norm": 1.7845465500190816, "learning_rate": 6.135355892648776e-06, "loss": 0.5273, "step": 2630 }, { "epoch": 0.06161026837806301, "grad_norm": 5.4220940516685765, "learning_rate": 6.158693115519255e-06, "loss": 0.5394, "step": 2640 }, { "epoch": 0.061843640606767794, "grad_norm": 1.7373075014800954, "learning_rate": 6.182030338389732e-06, "loss": 0.5533, "step": 2650 }, { "epoch": 0.06207701283547258, "grad_norm": 1.9730978828549344, "learning_rate": 6.2053675612602105e-06, "loss": 0.5113, "step": 2660 }, { "epoch": 0.06231038506417736, "grad_norm": 1.8255465499021946, "learning_rate": 6.228704784130689e-06, "loss": 0.5414, "step": 2670 }, { "epoch": 0.06254375729288214, "grad_norm": 1.81637824357515, "learning_rate": 6.252042007001168e-06, "loss": 0.5435, "step": 2680 }, { "epoch": 0.06277712952158693, "grad_norm": 1.9809847163255394, "learning_rate": 6.275379229871645e-06, "loss": 0.5357, "step": 2690 }, { "epoch": 0.06301050175029171, "grad_norm": 1.966778348469347, "learning_rate": 6.298716452742124e-06, "loss": 0.5491, "step": 2700 }, { "epoch": 0.0632438739789965, "grad_norm": 1.8801482549569464, "learning_rate": 6.322053675612603e-06, "loss": 0.5049, "step": 2710 }, { "epoch": 0.06347724620770129, "grad_norm": 2.545466662665752, "learning_rate": 6.345390898483082e-06, "loss": 0.5, "step": 2720 }, { "epoch": 0.06371061843640607, "grad_norm": 1.8156083984395255, "learning_rate": 6.368728121353559e-06, "loss": 0.5324, "step": 2730 }, { "epoch": 0.06394399066511085, "grad_norm": 1.3819622543950723, "learning_rate": 6.3920653442240375e-06, "loss": 0.5047, "step": 2740 }, { "epoch": 0.06417736289381563, "grad_norm": 1.9535090562618997, "learning_rate": 6.415402567094516e-06, "loss": 0.5473, "step": 2750 }, { "epoch": 0.06441073512252042, "grad_norm": 1.9879069058990155, "learning_rate": 6.438739789964995e-06, "loss": 0.5664, "step": 2760 }, { "epoch": 0.06464410735122521, "grad_norm": 1.9155986022571898, "learning_rate": 6.462077012835473e-06, "loss": 0.5413, "step": 2770 }, { "epoch": 0.06487747957993, "grad_norm": 1.739444780795415, "learning_rate": 6.485414235705951e-06, "loss": 0.522, "step": 2780 }, { "epoch": 0.06511085180863477, "grad_norm": 1.7080331504125466, "learning_rate": 6.50875145857643e-06, "loss": 0.5094, "step": 2790 }, { "epoch": 0.06534422403733955, "grad_norm": 1.7894791192052777, "learning_rate": 6.532088681446909e-06, "loss": 0.5325, "step": 2800 }, { "epoch": 0.06557759626604434, "grad_norm": 1.7118536712547594, "learning_rate": 6.5554259043173865e-06, "loss": 0.5414, "step": 2810 }, { "epoch": 0.06581096849474913, "grad_norm": 2.5571014240585357, "learning_rate": 6.578763127187865e-06, "loss": 0.5715, "step": 2820 }, { "epoch": 0.06604434072345391, "grad_norm": 1.851287560783203, "learning_rate": 6.602100350058343e-06, "loss": 0.5419, "step": 2830 }, { "epoch": 0.06627771295215869, "grad_norm": 1.8153215136676704, "learning_rate": 6.625437572928822e-06, "loss": 0.549, "step": 2840 }, { "epoch": 0.06651108518086347, "grad_norm": 1.7890438619247548, "learning_rate": 6.648774795799301e-06, "loss": 0.5521, "step": 2850 }, { "epoch": 0.06674445740956826, "grad_norm": 2.2161684854819943, "learning_rate": 6.672112018669779e-06, "loss": 0.5428, "step": 2860 }, { "epoch": 0.06697782963827305, "grad_norm": 2.147386546843159, "learning_rate": 6.695449241540258e-06, "loss": 0.5673, "step": 2870 }, { "epoch": 0.06721120186697783, "grad_norm": 2.098259861619241, "learning_rate": 6.7187864644107356e-06, "loss": 0.5295, "step": 2880 }, { "epoch": 0.06744457409568261, "grad_norm": 2.3068015059231013, "learning_rate": 6.742123687281214e-06, "loss": 0.5162, "step": 2890 }, { "epoch": 0.0676779463243874, "grad_norm": 1.9097713691634581, "learning_rate": 6.765460910151692e-06, "loss": 0.5128, "step": 2900 }, { "epoch": 0.06791131855309218, "grad_norm": 1.6283638216004463, "learning_rate": 6.788798133022171e-06, "loss": 0.535, "step": 2910 }, { "epoch": 0.06814469078179697, "grad_norm": 2.2383948844591512, "learning_rate": 6.81213535589265e-06, "loss": 0.5642, "step": 2920 }, { "epoch": 0.06837806301050176, "grad_norm": 1.908959477554262, "learning_rate": 6.835472578763128e-06, "loss": 0.5551, "step": 2930 }, { "epoch": 0.06861143523920653, "grad_norm": 1.926809114198177, "learning_rate": 6.858809801633606e-06, "loss": 0.5373, "step": 2940 }, { "epoch": 0.06884480746791131, "grad_norm": 2.110318224791492, "learning_rate": 6.882147024504085e-06, "loss": 0.5691, "step": 2950 }, { "epoch": 0.0690781796966161, "grad_norm": 2.0784969994883626, "learning_rate": 6.905484247374563e-06, "loss": 0.5408, "step": 2960 }, { "epoch": 0.06931155192532089, "grad_norm": 1.98331191707211, "learning_rate": 6.928821470245042e-06, "loss": 0.5415, "step": 2970 }, { "epoch": 0.06954492415402568, "grad_norm": 3.282796463861512, "learning_rate": 6.952158693115519e-06, "loss": 0.5434, "step": 2980 }, { "epoch": 0.06977829638273046, "grad_norm": 1.6752140587587656, "learning_rate": 6.975495915985998e-06, "loss": 0.5529, "step": 2990 }, { "epoch": 0.07001166861143523, "grad_norm": 1.8153910269262854, "learning_rate": 6.998833138856477e-06, "loss": 0.556, "step": 3000 }, { "epoch": 0.07024504084014002, "grad_norm": 1.8380158174458583, "learning_rate": 7.022170361726956e-06, "loss": 0.5093, "step": 3010 }, { "epoch": 0.07047841306884481, "grad_norm": 1.7796700860498087, "learning_rate": 7.045507584597433e-06, "loss": 0.5217, "step": 3020 }, { "epoch": 0.0707117852975496, "grad_norm": 1.724839942186046, "learning_rate": 7.068844807467912e-06, "loss": 0.5482, "step": 3030 }, { "epoch": 0.07094515752625438, "grad_norm": 2.3849256363025515, "learning_rate": 7.09218203033839e-06, "loss": 0.5552, "step": 3040 }, { "epoch": 0.07117852975495916, "grad_norm": 1.8230572575969333, "learning_rate": 7.115519253208869e-06, "loss": 0.5507, "step": 3050 }, { "epoch": 0.07141190198366394, "grad_norm": 1.9155725850018386, "learning_rate": 7.138856476079346e-06, "loss": 0.5413, "step": 3060 }, { "epoch": 0.07164527421236873, "grad_norm": 1.8956012646628393, "learning_rate": 7.162193698949825e-06, "loss": 0.5287, "step": 3070 }, { "epoch": 0.07187864644107352, "grad_norm": 1.7852621399393975, "learning_rate": 7.185530921820304e-06, "loss": 0.5107, "step": 3080 }, { "epoch": 0.0721120186697783, "grad_norm": 1.8503230584985968, "learning_rate": 7.208868144690783e-06, "loss": 0.535, "step": 3090 }, { "epoch": 0.07234539089848308, "grad_norm": 2.0581912247719956, "learning_rate": 7.2322053675612615e-06, "loss": 0.5555, "step": 3100 }, { "epoch": 0.07257876312718786, "grad_norm": 1.813540243873344, "learning_rate": 7.2555425904317386e-06, "loss": 0.5548, "step": 3110 }, { "epoch": 0.07281213535589265, "grad_norm": 1.8260355588147341, "learning_rate": 7.278879813302217e-06, "loss": 0.5057, "step": 3120 }, { "epoch": 0.07304550758459744, "grad_norm": 1.8292882756811848, "learning_rate": 7.302217036172696e-06, "loss": 0.5361, "step": 3130 }, { "epoch": 0.07327887981330222, "grad_norm": 2.0084269669559514, "learning_rate": 7.325554259043175e-06, "loss": 0.5081, "step": 3140 }, { "epoch": 0.073512252042007, "grad_norm": 2.0451290967986195, "learning_rate": 7.348891481913653e-06, "loss": 0.5406, "step": 3150 }, { "epoch": 0.07374562427071178, "grad_norm": 1.9895570583221078, "learning_rate": 7.372228704784131e-06, "loss": 0.533, "step": 3160 }, { "epoch": 0.07397899649941657, "grad_norm": 2.0339924944294716, "learning_rate": 7.39556592765461e-06, "loss": 0.5553, "step": 3170 }, { "epoch": 0.07421236872812136, "grad_norm": 1.8465051299933453, "learning_rate": 7.4189031505250884e-06, "loss": 0.5357, "step": 3180 }, { "epoch": 0.07444574095682614, "grad_norm": 1.9615829072242252, "learning_rate": 7.442240373395566e-06, "loss": 0.5393, "step": 3190 }, { "epoch": 0.07467911318553092, "grad_norm": 1.8351314370529512, "learning_rate": 7.465577596266045e-06, "loss": 0.5407, "step": 3200 }, { "epoch": 0.0749124854142357, "grad_norm": 1.8269395940121294, "learning_rate": 7.488914819136523e-06, "loss": 0.5428, "step": 3210 }, { "epoch": 0.07514585764294049, "grad_norm": 1.7643804037057946, "learning_rate": 7.512252042007002e-06, "loss": 0.5314, "step": 3220 }, { "epoch": 0.07537922987164528, "grad_norm": 1.6925298589316025, "learning_rate": 7.53558926487748e-06, "loss": 0.5374, "step": 3230 }, { "epoch": 0.07561260210035006, "grad_norm": 1.5559961288298247, "learning_rate": 7.558926487747959e-06, "loss": 0.5302, "step": 3240 }, { "epoch": 0.07584597432905485, "grad_norm": 2.0132718984336533, "learning_rate": 7.5822637106184375e-06, "loss": 0.5194, "step": 3250 }, { "epoch": 0.07607934655775962, "grad_norm": 1.5037564012643108, "learning_rate": 7.6056009334889154e-06, "loss": 0.5537, "step": 3260 }, { "epoch": 0.07631271878646441, "grad_norm": 1.85441314007388, "learning_rate": 7.628938156359393e-06, "loss": 0.5446, "step": 3270 }, { "epoch": 0.0765460910151692, "grad_norm": 1.8169042645801974, "learning_rate": 7.652275379229872e-06, "loss": 0.5243, "step": 3280 }, { "epoch": 0.07677946324387398, "grad_norm": 1.5577200690163655, "learning_rate": 7.675612602100351e-06, "loss": 0.541, "step": 3290 }, { "epoch": 0.07701283547257877, "grad_norm": 1.8628519789973579, "learning_rate": 7.69894982497083e-06, "loss": 0.5309, "step": 3300 }, { "epoch": 0.07724620770128354, "grad_norm": 1.5043113087810354, "learning_rate": 7.722287047841307e-06, "loss": 0.4986, "step": 3310 }, { "epoch": 0.07747957992998833, "grad_norm": 8.23673587182035, "learning_rate": 7.745624270711786e-06, "loss": 0.5137, "step": 3320 }, { "epoch": 0.07771295215869312, "grad_norm": 1.7896247690341125, "learning_rate": 7.768961493582264e-06, "loss": 0.5509, "step": 3330 }, { "epoch": 0.0779463243873979, "grad_norm": 1.7353456941305143, "learning_rate": 7.792298716452743e-06, "loss": 0.5178, "step": 3340 }, { "epoch": 0.07817969661610269, "grad_norm": 1.8893777606360556, "learning_rate": 7.81563593932322e-06, "loss": 0.5262, "step": 3350 }, { "epoch": 0.07841306884480746, "grad_norm": 6.0142756211274735, "learning_rate": 7.8389731621937e-06, "loss": 0.5085, "step": 3360 }, { "epoch": 0.07864644107351225, "grad_norm": 3.9797346633917345, "learning_rate": 7.862310385064178e-06, "loss": 0.5036, "step": 3370 }, { "epoch": 0.07887981330221704, "grad_norm": 1.668663371892045, "learning_rate": 7.885647607934657e-06, "loss": 0.5115, "step": 3380 }, { "epoch": 0.07911318553092182, "grad_norm": 1.8590240421365798, "learning_rate": 7.908984830805136e-06, "loss": 0.5434, "step": 3390 }, { "epoch": 0.07934655775962661, "grad_norm": 1.828666973994112, "learning_rate": 7.932322053675613e-06, "loss": 0.5665, "step": 3400 }, { "epoch": 0.07957992998833138, "grad_norm": 1.7760963060582688, "learning_rate": 7.955659276546091e-06, "loss": 0.5493, "step": 3410 }, { "epoch": 0.07981330221703617, "grad_norm": 1.7014116236735626, "learning_rate": 7.97899649941657e-06, "loss": 0.5454, "step": 3420 }, { "epoch": 0.08004667444574096, "grad_norm": 1.8216105668154141, "learning_rate": 8.002333722287049e-06, "loss": 0.5325, "step": 3430 }, { "epoch": 0.08028004667444574, "grad_norm": 1.7967675790657882, "learning_rate": 8.025670945157526e-06, "loss": 0.5213, "step": 3440 }, { "epoch": 0.08051341890315053, "grad_norm": 2.116397055918936, "learning_rate": 8.049008168028005e-06, "loss": 0.5097, "step": 3450 }, { "epoch": 0.0807467911318553, "grad_norm": 1.783146665436848, "learning_rate": 8.072345390898484e-06, "loss": 0.5186, "step": 3460 }, { "epoch": 0.08098016336056009, "grad_norm": 1.889365209405198, "learning_rate": 8.095682613768963e-06, "loss": 0.5622, "step": 3470 }, { "epoch": 0.08121353558926488, "grad_norm": 2.364311336338291, "learning_rate": 8.11901983663944e-06, "loss": 0.5348, "step": 3480 }, { "epoch": 0.08144690781796966, "grad_norm": 1.9916792338151466, "learning_rate": 8.142357059509918e-06, "loss": 0.5284, "step": 3490 }, { "epoch": 0.08168028004667445, "grad_norm": 1.883704737762259, "learning_rate": 8.165694282380397e-06, "loss": 0.538, "step": 3500 }, { "epoch": 0.08191365227537924, "grad_norm": 2.101257049248236, "learning_rate": 8.189031505250876e-06, "loss": 0.53, "step": 3510 }, { "epoch": 0.08214702450408401, "grad_norm": 1.9362819782091587, "learning_rate": 8.212368728121353e-06, "loss": 0.5436, "step": 3520 }, { "epoch": 0.0823803967327888, "grad_norm": 1.9195169139323467, "learning_rate": 8.235705950991832e-06, "loss": 0.5446, "step": 3530 }, { "epoch": 0.08261376896149358, "grad_norm": 1.6727442809492035, "learning_rate": 8.25904317386231e-06, "loss": 0.5311, "step": 3540 }, { "epoch": 0.08284714119019837, "grad_norm": 1.6124649509102815, "learning_rate": 8.28238039673279e-06, "loss": 0.5494, "step": 3550 }, { "epoch": 0.08308051341890316, "grad_norm": 2.0426355404900027, "learning_rate": 8.305717619603267e-06, "loss": 0.5232, "step": 3560 }, { "epoch": 0.08331388564760793, "grad_norm": 1.6383166496381976, "learning_rate": 8.329054842473745e-06, "loss": 0.5168, "step": 3570 }, { "epoch": 0.08354725787631272, "grad_norm": 1.608881540196644, "learning_rate": 8.352392065344224e-06, "loss": 0.5116, "step": 3580 }, { "epoch": 0.0837806301050175, "grad_norm": 1.584321785436834, "learning_rate": 8.375729288214703e-06, "loss": 0.5236, "step": 3590 }, { "epoch": 0.08401400233372229, "grad_norm": 1.8393231191324635, "learning_rate": 8.399066511085182e-06, "loss": 0.5303, "step": 3600 }, { "epoch": 0.08424737456242708, "grad_norm": 1.7666546699101238, "learning_rate": 8.422403733955659e-06, "loss": 0.5455, "step": 3610 }, { "epoch": 0.08448074679113185, "grad_norm": 1.8254466097441653, "learning_rate": 8.445740956826138e-06, "loss": 0.5504, "step": 3620 }, { "epoch": 0.08471411901983664, "grad_norm": 1.6933849556825344, "learning_rate": 8.469078179696617e-06, "loss": 0.5581, "step": 3630 }, { "epoch": 0.08494749124854142, "grad_norm": 2.0632480215018023, "learning_rate": 8.492415402567095e-06, "loss": 0.5182, "step": 3640 }, { "epoch": 0.08518086347724621, "grad_norm": 1.8550202359772616, "learning_rate": 8.515752625437574e-06, "loss": 0.5722, "step": 3650 }, { "epoch": 0.085414235705951, "grad_norm": 1.8756200917606458, "learning_rate": 8.539089848308051e-06, "loss": 0.55, "step": 3660 }, { "epoch": 0.08564760793465577, "grad_norm": 2.077517373153662, "learning_rate": 8.56242707117853e-06, "loss": 0.5304, "step": 3670 }, { "epoch": 0.08588098016336056, "grad_norm": 1.853011068501322, "learning_rate": 8.585764294049009e-06, "loss": 0.5351, "step": 3680 }, { "epoch": 0.08611435239206534, "grad_norm": 1.8545381846592488, "learning_rate": 8.609101516919488e-06, "loss": 0.5419, "step": 3690 }, { "epoch": 0.08634772462077013, "grad_norm": 1.7866328228057002, "learning_rate": 8.632438739789966e-06, "loss": 0.5486, "step": 3700 }, { "epoch": 0.08658109684947492, "grad_norm": 1.6767964395532755, "learning_rate": 8.655775962660443e-06, "loss": 0.5211, "step": 3710 }, { "epoch": 0.08681446907817969, "grad_norm": 5.499364416040961, "learning_rate": 8.679113185530922e-06, "loss": 0.5824, "step": 3720 }, { "epoch": 0.08704784130688448, "grad_norm": 1.7990536489682754, "learning_rate": 8.702450408401401e-06, "loss": 0.5345, "step": 3730 }, { "epoch": 0.08728121353558926, "grad_norm": 2.0820822181032783, "learning_rate": 8.72578763127188e-06, "loss": 0.5207, "step": 3740 }, { "epoch": 0.08751458576429405, "grad_norm": 1.684075492266652, "learning_rate": 8.749124854142359e-06, "loss": 0.5441, "step": 3750 }, { "epoch": 0.08774795799299884, "grad_norm": 1.5561901443907868, "learning_rate": 8.772462077012836e-06, "loss": 0.5176, "step": 3760 }, { "epoch": 0.08798133022170362, "grad_norm": 2.0715086646133654, "learning_rate": 8.795799299883315e-06, "loss": 0.5108, "step": 3770 }, { "epoch": 0.0882147024504084, "grad_norm": 1.6908856211201022, "learning_rate": 8.819136522753793e-06, "loss": 0.5392, "step": 3780 }, { "epoch": 0.08844807467911318, "grad_norm": 1.7101633497133641, "learning_rate": 8.842473745624272e-06, "loss": 0.5235, "step": 3790 }, { "epoch": 0.08868144690781797, "grad_norm": 2.0029397295276894, "learning_rate": 8.865810968494751e-06, "loss": 0.546, "step": 3800 }, { "epoch": 0.08891481913652276, "grad_norm": 1.8570079200136143, "learning_rate": 8.889148191365228e-06, "loss": 0.504, "step": 3810 }, { "epoch": 0.08914819136522754, "grad_norm": 2.1516160267186715, "learning_rate": 8.912485414235707e-06, "loss": 0.5197, "step": 3820 }, { "epoch": 0.08938156359393232, "grad_norm": 2.277495507691139, "learning_rate": 8.935822637106186e-06, "loss": 0.5643, "step": 3830 }, { "epoch": 0.0896149358226371, "grad_norm": 1.8988621956518046, "learning_rate": 8.959159859976664e-06, "loss": 0.559, "step": 3840 }, { "epoch": 0.08984830805134189, "grad_norm": 1.9383823200824746, "learning_rate": 8.982497082847142e-06, "loss": 0.5477, "step": 3850 }, { "epoch": 0.09008168028004668, "grad_norm": 1.6516693952364707, "learning_rate": 9.00583430571762e-06, "loss": 0.5118, "step": 3860 }, { "epoch": 0.09031505250875146, "grad_norm": 2.055662237860751, "learning_rate": 9.029171528588099e-06, "loss": 0.5467, "step": 3870 }, { "epoch": 0.09054842473745624, "grad_norm": 1.9249888763945067, "learning_rate": 9.052508751458578e-06, "loss": 0.5568, "step": 3880 }, { "epoch": 0.09078179696616102, "grad_norm": 2.427875318274457, "learning_rate": 9.075845974329057e-06, "loss": 0.5324, "step": 3890 }, { "epoch": 0.09101516919486581, "grad_norm": 1.9449528614110825, "learning_rate": 9.099183197199534e-06, "loss": 0.5682, "step": 3900 }, { "epoch": 0.0912485414235706, "grad_norm": 2.619317233538575, "learning_rate": 9.122520420070013e-06, "loss": 0.5687, "step": 3910 }, { "epoch": 0.09148191365227538, "grad_norm": 1.9280360459348354, "learning_rate": 9.145857642940491e-06, "loss": 0.5314, "step": 3920 }, { "epoch": 0.09171528588098016, "grad_norm": 1.5195718354812542, "learning_rate": 9.16919486581097e-06, "loss": 0.5161, "step": 3930 }, { "epoch": 0.09194865810968494, "grad_norm": 2.7932362496938876, "learning_rate": 9.192532088681447e-06, "loss": 0.5553, "step": 3940 }, { "epoch": 0.09218203033838973, "grad_norm": 2.0144735984507025, "learning_rate": 9.215869311551926e-06, "loss": 0.5501, "step": 3950 }, { "epoch": 0.09241540256709452, "grad_norm": 1.8182978816188222, "learning_rate": 9.239206534422405e-06, "loss": 0.5506, "step": 3960 }, { "epoch": 0.0926487747957993, "grad_norm": 1.7227851372843972, "learning_rate": 9.262543757292884e-06, "loss": 0.5441, "step": 3970 }, { "epoch": 0.09288214702450408, "grad_norm": 1.6518264750022296, "learning_rate": 9.28588098016336e-06, "loss": 0.5549, "step": 3980 }, { "epoch": 0.09311551925320886, "grad_norm": 2.0660507736906277, "learning_rate": 9.30921820303384e-06, "loss": 0.537, "step": 3990 }, { "epoch": 0.09334889148191365, "grad_norm": 1.645511751990585, "learning_rate": 9.332555425904318e-06, "loss": 0.5385, "step": 4000 }, { "epoch": 0.09358226371061844, "grad_norm": 1.7945857586581697, "learning_rate": 9.355892648774797e-06, "loss": 0.53, "step": 4010 }, { "epoch": 0.09381563593932322, "grad_norm": 1.846346092879836, "learning_rate": 9.379229871645274e-06, "loss": 0.5227, "step": 4020 }, { "epoch": 0.09404900816802801, "grad_norm": 2.481663827542068, "learning_rate": 9.402567094515753e-06, "loss": 0.5245, "step": 4030 }, { "epoch": 0.09428238039673278, "grad_norm": 1.7453476505084051, "learning_rate": 9.425904317386232e-06, "loss": 0.5631, "step": 4040 }, { "epoch": 0.09451575262543757, "grad_norm": 1.6174798424978865, "learning_rate": 9.44924154025671e-06, "loss": 0.5443, "step": 4050 }, { "epoch": 0.09474912485414236, "grad_norm": 2.001050491271908, "learning_rate": 9.472578763127188e-06, "loss": 0.5219, "step": 4060 }, { "epoch": 0.09498249708284714, "grad_norm": 1.5228425034044246, "learning_rate": 9.495915985997667e-06, "loss": 0.519, "step": 4070 }, { "epoch": 0.09521586931155193, "grad_norm": 1.5474010929403412, "learning_rate": 9.519253208868145e-06, "loss": 0.5455, "step": 4080 }, { "epoch": 0.0954492415402567, "grad_norm": 1.5198136129815814, "learning_rate": 9.542590431738624e-06, "loss": 0.5202, "step": 4090 }, { "epoch": 0.09568261376896149, "grad_norm": 1.8679496309132935, "learning_rate": 9.565927654609101e-06, "loss": 0.5618, "step": 4100 }, { "epoch": 0.09591598599766628, "grad_norm": 1.7197728161739105, "learning_rate": 9.58926487747958e-06, "loss": 0.5454, "step": 4110 }, { "epoch": 0.09614935822637106, "grad_norm": 3.9114874595266356, "learning_rate": 9.612602100350059e-06, "loss": 0.5239, "step": 4120 }, { "epoch": 0.09638273045507585, "grad_norm": 1.9950576322546605, "learning_rate": 9.635939323220538e-06, "loss": 0.5529, "step": 4130 }, { "epoch": 0.09661610268378062, "grad_norm": 1.8357440065631287, "learning_rate": 9.659276546091016e-06, "loss": 0.5436, "step": 4140 }, { "epoch": 0.09684947491248541, "grad_norm": 1.6096402179565459, "learning_rate": 9.682613768961494e-06, "loss": 0.5102, "step": 4150 }, { "epoch": 0.0970828471411902, "grad_norm": 1.7692642495140085, "learning_rate": 9.705950991831972e-06, "loss": 0.5257, "step": 4160 }, { "epoch": 0.09731621936989499, "grad_norm": 1.3149651220420906, "learning_rate": 9.729288214702451e-06, "loss": 0.5215, "step": 4170 }, { "epoch": 0.09754959159859977, "grad_norm": 1.6353624532492879, "learning_rate": 9.75262543757293e-06, "loss": 0.5659, "step": 4180 }, { "epoch": 0.09778296382730454, "grad_norm": 1.489136001395627, "learning_rate": 9.775962660443407e-06, "loss": 0.5608, "step": 4190 }, { "epoch": 0.09801633605600933, "grad_norm": 1.7145402916918095, "learning_rate": 9.799299883313886e-06, "loss": 0.5547, "step": 4200 }, { "epoch": 0.09824970828471412, "grad_norm": 1.3655556698998408, "learning_rate": 9.822637106184365e-06, "loss": 0.5307, "step": 4210 }, { "epoch": 0.0984830805134189, "grad_norm": 1.3787325688471468, "learning_rate": 9.845974329054843e-06, "loss": 0.5321, "step": 4220 }, { "epoch": 0.09871645274212369, "grad_norm": 1.7462734666134363, "learning_rate": 9.86931155192532e-06, "loss": 0.5417, "step": 4230 }, { "epoch": 0.09894982497082846, "grad_norm": 1.6155067577402165, "learning_rate": 9.8926487747958e-06, "loss": 0.579, "step": 4240 }, { "epoch": 0.09918319719953325, "grad_norm": 1.7739888138538225, "learning_rate": 9.915985997666278e-06, "loss": 0.549, "step": 4250 }, { "epoch": 0.09941656942823804, "grad_norm": 1.717764799699531, "learning_rate": 9.939323220536757e-06, "loss": 0.5608, "step": 4260 }, { "epoch": 0.09964994165694283, "grad_norm": 1.9477512948292175, "learning_rate": 9.962660443407236e-06, "loss": 0.5603, "step": 4270 }, { "epoch": 0.09988331388564761, "grad_norm": 1.480718393078661, "learning_rate": 9.985997666277713e-06, "loss": 0.5175, "step": 4280 }, { "epoch": 0.1001166861143524, "grad_norm": 2.630648609193409, "learning_rate": 9.99999973455591e-06, "loss": 0.5642, "step": 4290 }, { "epoch": 0.10035005834305717, "grad_norm": 1.9380240181632937, "learning_rate": 9.999996748310215e-06, "loss": 0.5369, "step": 4300 }, { "epoch": 0.10058343057176196, "grad_norm": 1.802876804842863, "learning_rate": 9.999990444015698e-06, "loss": 0.5356, "step": 4310 }, { "epoch": 0.10081680280046675, "grad_norm": 1.9017790390453235, "learning_rate": 9.999980821676541e-06, "loss": 0.5096, "step": 4320 }, { "epoch": 0.10105017502917153, "grad_norm": 1.951416123775375, "learning_rate": 9.999967881299134e-06, "loss": 0.5237, "step": 4330 }, { "epoch": 0.10128354725787632, "grad_norm": 1.7968070155067675, "learning_rate": 9.999951622892063e-06, "loss": 0.565, "step": 4340 }, { "epoch": 0.10151691948658109, "grad_norm": 1.6024045768311594, "learning_rate": 9.999932046466115e-06, "loss": 0.5595, "step": 4350 }, { "epoch": 0.10175029171528588, "grad_norm": 1.5683446966058527, "learning_rate": 9.999909152034283e-06, "loss": 0.5368, "step": 4360 }, { "epoch": 0.10198366394399067, "grad_norm": 1.9295000481753883, "learning_rate": 9.999882939611761e-06, "loss": 0.5545, "step": 4370 }, { "epoch": 0.10221703617269545, "grad_norm": 1.903220949798579, "learning_rate": 9.999853409215939e-06, "loss": 0.5158, "step": 4380 }, { "epoch": 0.10245040840140024, "grad_norm": 2.984445446127136, "learning_rate": 9.99982056086642e-06, "loss": 0.5529, "step": 4390 }, { "epoch": 0.10268378063010501, "grad_norm": 1.7117759261756127, "learning_rate": 9.999784394584998e-06, "loss": 0.5561, "step": 4400 }, { "epoch": 0.1029171528588098, "grad_norm": 1.793072702221186, "learning_rate": 9.999744910395676e-06, "loss": 0.5432, "step": 4410 }, { "epoch": 0.10315052508751459, "grad_norm": 3.081841690239812, "learning_rate": 9.999702108324654e-06, "loss": 0.5408, "step": 4420 }, { "epoch": 0.10338389731621937, "grad_norm": 1.8876901750680657, "learning_rate": 9.99965598840034e-06, "loss": 0.528, "step": 4430 }, { "epoch": 0.10361726954492416, "grad_norm": 2.009390656677473, "learning_rate": 9.999606550653332e-06, "loss": 0.552, "step": 4440 }, { "epoch": 0.10385064177362893, "grad_norm": 1.6953025963024573, "learning_rate": 9.999553795116443e-06, "loss": 0.5289, "step": 4450 }, { "epoch": 0.10408401400233372, "grad_norm": 1.8854220089177103, "learning_rate": 9.999497721824683e-06, "loss": 0.5545, "step": 4460 }, { "epoch": 0.1043173862310385, "grad_norm": 1.6226225534785625, "learning_rate": 9.999438330815258e-06, "loss": 0.5452, "step": 4470 }, { "epoch": 0.10455075845974329, "grad_norm": 1.625177852911195, "learning_rate": 9.999375622127588e-06, "loss": 0.527, "step": 4480 }, { "epoch": 0.10478413068844808, "grad_norm": 2.0522978810638377, "learning_rate": 9.999309595803279e-06, "loss": 0.5306, "step": 4490 }, { "epoch": 0.10501750291715285, "grad_norm": 1.5828630351322097, "learning_rate": 9.99924025188615e-06, "loss": 0.5346, "step": 4500 }, { "epoch": 0.10525087514585764, "grad_norm": 1.6947995842540242, "learning_rate": 9.999167590422219e-06, "loss": 0.553, "step": 4510 }, { "epoch": 0.10548424737456243, "grad_norm": 1.706985534606039, "learning_rate": 9.999091611459704e-06, "loss": 0.5423, "step": 4520 }, { "epoch": 0.10571761960326721, "grad_norm": 1.9388513108495409, "learning_rate": 9.999012315049027e-06, "loss": 0.5438, "step": 4530 }, { "epoch": 0.105950991831972, "grad_norm": 1.8359177437285443, "learning_rate": 9.998929701242808e-06, "loss": 0.5257, "step": 4540 }, { "epoch": 0.10618436406067679, "grad_norm": 1.8919312821163066, "learning_rate": 9.998843770095871e-06, "loss": 0.539, "step": 4550 }, { "epoch": 0.10641773628938156, "grad_norm": 1.4401467360003457, "learning_rate": 9.998754521665241e-06, "loss": 0.4895, "step": 4560 }, { "epoch": 0.10665110851808635, "grad_norm": 1.8439115533379435, "learning_rate": 9.998661956010144e-06, "loss": 0.5559, "step": 4570 }, { "epoch": 0.10688448074679113, "grad_norm": 1.9559604712273595, "learning_rate": 9.998566073192008e-06, "loss": 0.5202, "step": 4580 }, { "epoch": 0.10711785297549592, "grad_norm": 1.517280281433443, "learning_rate": 9.998466873274461e-06, "loss": 0.5357, "step": 4590 }, { "epoch": 0.1073512252042007, "grad_norm": 1.8448032517473862, "learning_rate": 9.998364356323336e-06, "loss": 0.5774, "step": 4600 }, { "epoch": 0.10758459743290548, "grad_norm": 1.8312246245756525, "learning_rate": 9.99825852240666e-06, "loss": 0.5473, "step": 4610 }, { "epoch": 0.10781796966161027, "grad_norm": 1.553489710933144, "learning_rate": 9.998149371594666e-06, "loss": 0.5298, "step": 4620 }, { "epoch": 0.10805134189031505, "grad_norm": 1.5698209380262322, "learning_rate": 9.998036903959789e-06, "loss": 0.5118, "step": 4630 }, { "epoch": 0.10828471411901984, "grad_norm": 1.9172875966967153, "learning_rate": 9.997921119576663e-06, "loss": 0.5486, "step": 4640 }, { "epoch": 0.10851808634772463, "grad_norm": 1.6859826949896113, "learning_rate": 9.997802018522125e-06, "loss": 0.5546, "step": 4650 }, { "epoch": 0.1087514585764294, "grad_norm": 1.763867465351448, "learning_rate": 9.997679600875212e-06, "loss": 0.5401, "step": 4660 }, { "epoch": 0.10898483080513419, "grad_norm": 1.897069659242261, "learning_rate": 9.997553866717158e-06, "loss": 0.5325, "step": 4670 }, { "epoch": 0.10921820303383897, "grad_norm": 2.5337262825895386, "learning_rate": 9.997424816131406e-06, "loss": 0.5174, "step": 4680 }, { "epoch": 0.10945157526254376, "grad_norm": 1.725190574337659, "learning_rate": 9.997292449203593e-06, "loss": 0.5021, "step": 4690 }, { "epoch": 0.10968494749124855, "grad_norm": 1.692140959425351, "learning_rate": 9.997156766021559e-06, "loss": 0.5235, "step": 4700 }, { "epoch": 0.10991831971995332, "grad_norm": 1.4775260851961938, "learning_rate": 9.997017766675342e-06, "loss": 0.4963, "step": 4710 }, { "epoch": 0.1101516919486581, "grad_norm": 1.4447291269291798, "learning_rate": 9.996875451257191e-06, "loss": 0.547, "step": 4720 }, { "epoch": 0.1103850641773629, "grad_norm": 1.441235118472698, "learning_rate": 9.996729819861541e-06, "loss": 0.5362, "step": 4730 }, { "epoch": 0.11061843640606768, "grad_norm": 1.8432026400465549, "learning_rate": 9.996580872585038e-06, "loss": 0.5299, "step": 4740 }, { "epoch": 0.11085180863477247, "grad_norm": 1.8541239925197526, "learning_rate": 9.996428609526523e-06, "loss": 0.5156, "step": 4750 }, { "epoch": 0.11108518086347724, "grad_norm": 1.9617769969551635, "learning_rate": 9.99627303078704e-06, "loss": 0.5483, "step": 4760 }, { "epoch": 0.11131855309218203, "grad_norm": 1.9373025741556718, "learning_rate": 9.996114136469833e-06, "loss": 0.55, "step": 4770 }, { "epoch": 0.11155192532088681, "grad_norm": 1.5021460157542923, "learning_rate": 9.995951926680344e-06, "loss": 0.5603, "step": 4780 }, { "epoch": 0.1117852975495916, "grad_norm": 2.64638449173176, "learning_rate": 9.99578640152622e-06, "loss": 0.5587, "step": 4790 }, { "epoch": 0.11201866977829639, "grad_norm": 1.754234478064228, "learning_rate": 9.995617561117304e-06, "loss": 0.5292, "step": 4800 }, { "epoch": 0.11225204200700117, "grad_norm": 1.882617288188398, "learning_rate": 9.99544540556564e-06, "loss": 0.5237, "step": 4810 }, { "epoch": 0.11248541423570595, "grad_norm": 1.9782856161834563, "learning_rate": 9.995269934985471e-06, "loss": 0.5554, "step": 4820 }, { "epoch": 0.11271878646441073, "grad_norm": 1.6117690812101855, "learning_rate": 9.995091149493244e-06, "loss": 0.5566, "step": 4830 }, { "epoch": 0.11295215869311552, "grad_norm": 1.602506831143533, "learning_rate": 9.994909049207601e-06, "loss": 0.523, "step": 4840 }, { "epoch": 0.11318553092182031, "grad_norm": 1.8037363682722174, "learning_rate": 9.994723634249386e-06, "loss": 0.5209, "step": 4850 }, { "epoch": 0.1134189031505251, "grad_norm": 1.6730073131883625, "learning_rate": 9.99453490474164e-06, "loss": 0.4988, "step": 4860 }, { "epoch": 0.11365227537922987, "grad_norm": 2.0621638209326503, "learning_rate": 9.99434286080961e-06, "loss": 0.5582, "step": 4870 }, { "epoch": 0.11388564760793465, "grad_norm": 1.630433345426616, "learning_rate": 9.994147502580736e-06, "loss": 0.4968, "step": 4880 }, { "epoch": 0.11411901983663944, "grad_norm": 1.5657595857476616, "learning_rate": 9.99394883018466e-06, "loss": 0.5589, "step": 4890 }, { "epoch": 0.11435239206534423, "grad_norm": 1.6682093134857405, "learning_rate": 9.99374684375322e-06, "loss": 0.5299, "step": 4900 }, { "epoch": 0.11458576429404901, "grad_norm": 1.8341134836929143, "learning_rate": 9.993541543420463e-06, "loss": 0.5456, "step": 4910 }, { "epoch": 0.11481913652275379, "grad_norm": 2.174696781532709, "learning_rate": 9.99333292932262e-06, "loss": 0.5345, "step": 4920 }, { "epoch": 0.11505250875145857, "grad_norm": 1.858738395258736, "learning_rate": 9.993121001598138e-06, "loss": 0.5346, "step": 4930 }, { "epoch": 0.11528588098016336, "grad_norm": 1.8044813199400116, "learning_rate": 9.99290576038765e-06, "loss": 0.5305, "step": 4940 }, { "epoch": 0.11551925320886815, "grad_norm": 1.7383578677838647, "learning_rate": 9.992687205833991e-06, "loss": 0.544, "step": 4950 }, { "epoch": 0.11575262543757293, "grad_norm": 1.5377174432609162, "learning_rate": 9.9924653380822e-06, "loss": 0.5294, "step": 4960 }, { "epoch": 0.11598599766627771, "grad_norm": 1.9498560101841182, "learning_rate": 9.992240157279507e-06, "loss": 0.5457, "step": 4970 }, { "epoch": 0.1162193698949825, "grad_norm": 1.55835452694597, "learning_rate": 9.992011663575345e-06, "loss": 0.5215, "step": 4980 }, { "epoch": 0.11645274212368728, "grad_norm": 1.867437043920295, "learning_rate": 9.991779857121346e-06, "loss": 0.5369, "step": 4990 }, { "epoch": 0.11668611435239207, "grad_norm": 1.73777070780749, "learning_rate": 9.99154473807134e-06, "loss": 0.5533, "step": 5000 }, { "epoch": 0.11691948658109685, "grad_norm": 2.1742250818081397, "learning_rate": 9.991306306581351e-06, "loss": 0.5292, "step": 5010 }, { "epoch": 0.11715285880980163, "grad_norm": 1.530930787398578, "learning_rate": 9.991064562809607e-06, "loss": 0.5162, "step": 5020 }, { "epoch": 0.11738623103850641, "grad_norm": 1.492639813966678, "learning_rate": 9.990819506916532e-06, "loss": 0.5041, "step": 5030 }, { "epoch": 0.1176196032672112, "grad_norm": 1.3979532771647274, "learning_rate": 9.990571139064746e-06, "loss": 0.5279, "step": 5040 }, { "epoch": 0.11785297549591599, "grad_norm": 1.7362791650894893, "learning_rate": 9.990319459419068e-06, "loss": 0.523, "step": 5050 }, { "epoch": 0.11808634772462077, "grad_norm": 1.7513657359938304, "learning_rate": 9.990064468146519e-06, "loss": 0.502, "step": 5060 }, { "epoch": 0.11831971995332556, "grad_norm": 1.68493490153061, "learning_rate": 9.989806165416309e-06, "loss": 0.5273, "step": 5070 }, { "epoch": 0.11855309218203033, "grad_norm": 1.6752256325246873, "learning_rate": 9.989544551399853e-06, "loss": 0.5429, "step": 5080 }, { "epoch": 0.11878646441073512, "grad_norm": 1.7350542847701365, "learning_rate": 9.98927962627076e-06, "loss": 0.5347, "step": 5090 }, { "epoch": 0.11901983663943991, "grad_norm": 1.7151819629002185, "learning_rate": 9.989011390204838e-06, "loss": 0.5265, "step": 5100 }, { "epoch": 0.1192532088681447, "grad_norm": 1.6416759454656162, "learning_rate": 9.98873984338009e-06, "loss": 0.553, "step": 5110 }, { "epoch": 0.11948658109684948, "grad_norm": 1.6472660647590949, "learning_rate": 9.98846498597672e-06, "loss": 0.5674, "step": 5120 }, { "epoch": 0.11971995332555425, "grad_norm": 1.3309850469656117, "learning_rate": 9.988186818177122e-06, "loss": 0.4806, "step": 5130 }, { "epoch": 0.11995332555425904, "grad_norm": 1.661573380902705, "learning_rate": 9.98790534016589e-06, "loss": 0.5255, "step": 5140 }, { "epoch": 0.12018669778296383, "grad_norm": 1.7102348044442268, "learning_rate": 9.987620552129821e-06, "loss": 0.4981, "step": 5150 }, { "epoch": 0.12042007001166861, "grad_norm": 1.5192621254584413, "learning_rate": 9.987332454257902e-06, "loss": 0.4974, "step": 5160 }, { "epoch": 0.1206534422403734, "grad_norm": 1.7590523692238227, "learning_rate": 9.987041046741314e-06, "loss": 0.5399, "step": 5170 }, { "epoch": 0.12088681446907817, "grad_norm": 1.6568994499475307, "learning_rate": 9.986746329773443e-06, "loss": 0.5114, "step": 5180 }, { "epoch": 0.12112018669778296, "grad_norm": 1.6074994857010143, "learning_rate": 9.986448303549864e-06, "loss": 0.5432, "step": 5190 }, { "epoch": 0.12135355892648775, "grad_norm": 1.6992420995686977, "learning_rate": 9.986146968268346e-06, "loss": 0.5414, "step": 5200 }, { "epoch": 0.12158693115519253, "grad_norm": 1.6434633183074128, "learning_rate": 9.985842324128865e-06, "loss": 0.5443, "step": 5210 }, { "epoch": 0.12182030338389732, "grad_norm": 4.402357431530811, "learning_rate": 9.985534371333582e-06, "loss": 0.5519, "step": 5220 }, { "epoch": 0.1220536756126021, "grad_norm": 1.9168178090650274, "learning_rate": 9.985223110086858e-06, "loss": 0.5599, "step": 5230 }, { "epoch": 0.12228704784130688, "grad_norm": 1.7366581596105368, "learning_rate": 9.98490854059525e-06, "loss": 0.5445, "step": 5240 }, { "epoch": 0.12252042007001167, "grad_norm": 1.7508891982147188, "learning_rate": 9.98459066306751e-06, "loss": 0.557, "step": 5250 }, { "epoch": 0.12275379229871645, "grad_norm": 1.43894413233037, "learning_rate": 9.984269477714584e-06, "loss": 0.5299, "step": 5260 }, { "epoch": 0.12298716452742124, "grad_norm": 1.8314043056149347, "learning_rate": 9.983944984749613e-06, "loss": 0.5531, "step": 5270 }, { "epoch": 0.12322053675612601, "grad_norm": 1.5106453539973432, "learning_rate": 9.983617184387938e-06, "loss": 0.5191, "step": 5280 }, { "epoch": 0.1234539089848308, "grad_norm": 1.73348790835701, "learning_rate": 9.983286076847083e-06, "loss": 0.5223, "step": 5290 }, { "epoch": 0.12368728121353559, "grad_norm": 1.9282067831177252, "learning_rate": 9.98295166234678e-06, "loss": 0.5265, "step": 5300 }, { "epoch": 0.12392065344224037, "grad_norm": 1.5740083986733606, "learning_rate": 9.98261394110895e-06, "loss": 0.504, "step": 5310 }, { "epoch": 0.12415402567094516, "grad_norm": 1.3731920599371494, "learning_rate": 9.982272913357705e-06, "loss": 0.5497, "step": 5320 }, { "epoch": 0.12438739789964995, "grad_norm": 1.7411614500899775, "learning_rate": 9.981928579319358e-06, "loss": 0.5393, "step": 5330 }, { "epoch": 0.12462077012835472, "grad_norm": 3.2187723559893966, "learning_rate": 9.98158093922241e-06, "loss": 0.5625, "step": 5340 }, { "epoch": 0.12485414235705951, "grad_norm": 1.6837982368861453, "learning_rate": 9.981229993297561e-06, "loss": 0.5365, "step": 5350 }, { "epoch": 0.12508751458576428, "grad_norm": 1.5858574654523354, "learning_rate": 9.980875741777699e-06, "loss": 0.5163, "step": 5360 }, { "epoch": 0.12532088681446907, "grad_norm": 1.5794847860449277, "learning_rate": 9.980518184897912e-06, "loss": 0.5219, "step": 5370 }, { "epoch": 0.12555425904317385, "grad_norm": 1.6038846150180355, "learning_rate": 9.980157322895477e-06, "loss": 0.5223, "step": 5380 }, { "epoch": 0.12578763127187864, "grad_norm": 1.9470160160545884, "learning_rate": 9.979793156009864e-06, "loss": 0.5277, "step": 5390 }, { "epoch": 0.12602100350058343, "grad_norm": 1.5674513905404903, "learning_rate": 9.97942568448274e-06, "loss": 0.538, "step": 5400 }, { "epoch": 0.12625437572928822, "grad_norm": 1.9572017529131627, "learning_rate": 9.979054908557963e-06, "loss": 0.526, "step": 5410 }, { "epoch": 0.126487747957993, "grad_norm": 1.7331380211990473, "learning_rate": 9.978680828481584e-06, "loss": 0.54, "step": 5420 }, { "epoch": 0.1267211201866978, "grad_norm": 1.8838264151613362, "learning_rate": 9.978303444501844e-06, "loss": 0.551, "step": 5430 }, { "epoch": 0.12695449241540258, "grad_norm": 1.782018904430898, "learning_rate": 9.977922756869182e-06, "loss": 0.544, "step": 5440 }, { "epoch": 0.12718786464410736, "grad_norm": 1.7313899069508236, "learning_rate": 9.977538765836223e-06, "loss": 0.5092, "step": 5450 }, { "epoch": 0.12742123687281215, "grad_norm": 1.9556004134821088, "learning_rate": 9.97715147165779e-06, "loss": 0.5516, "step": 5460 }, { "epoch": 0.1276546091015169, "grad_norm": 1.8130509042135527, "learning_rate": 9.976760874590895e-06, "loss": 0.5242, "step": 5470 }, { "epoch": 0.1278879813302217, "grad_norm": 1.7327712628639118, "learning_rate": 9.97636697489474e-06, "loss": 0.5228, "step": 5480 }, { "epoch": 0.12812135355892648, "grad_norm": 1.5321073644887564, "learning_rate": 9.975969772830722e-06, "loss": 0.5118, "step": 5490 }, { "epoch": 0.12835472578763127, "grad_norm": 1.7823306768073364, "learning_rate": 9.97556926866243e-06, "loss": 0.5161, "step": 5500 }, { "epoch": 0.12858809801633606, "grad_norm": 1.5705936932875941, "learning_rate": 9.97516546265564e-06, "loss": 0.5282, "step": 5510 }, { "epoch": 0.12882147024504084, "grad_norm": 1.469514877221771, "learning_rate": 9.974758355078324e-06, "loss": 0.5147, "step": 5520 }, { "epoch": 0.12905484247374563, "grad_norm": 1.4225720620800264, "learning_rate": 9.974347946200641e-06, "loss": 0.501, "step": 5530 }, { "epoch": 0.12928821470245042, "grad_norm": 1.4152224009809595, "learning_rate": 9.973934236294946e-06, "loss": 0.5286, "step": 5540 }, { "epoch": 0.1295215869311552, "grad_norm": 2.0484914859638517, "learning_rate": 9.973517225635776e-06, "loss": 0.5251, "step": 5550 }, { "epoch": 0.12975495915986, "grad_norm": 1.860985331258325, "learning_rate": 9.973096914499867e-06, "loss": 0.5317, "step": 5560 }, { "epoch": 0.12998833138856475, "grad_norm": 1.632900087797287, "learning_rate": 9.972673303166141e-06, "loss": 0.5144, "step": 5570 }, { "epoch": 0.13022170361726954, "grad_norm": 1.5829829077070332, "learning_rate": 9.972246391915709e-06, "loss": 0.5365, "step": 5580 }, { "epoch": 0.13045507584597432, "grad_norm": 2.104424789761714, "learning_rate": 9.971816181031877e-06, "loss": 0.5176, "step": 5590 }, { "epoch": 0.1306884480746791, "grad_norm": 1.5981073068888962, "learning_rate": 9.971382670800134e-06, "loss": 0.5016, "step": 5600 }, { "epoch": 0.1309218203033839, "grad_norm": 2.575993607098442, "learning_rate": 9.970945861508165e-06, "loss": 0.5394, "step": 5610 }, { "epoch": 0.13115519253208868, "grad_norm": 2.0218795114369104, "learning_rate": 9.970505753445838e-06, "loss": 0.5051, "step": 5620 }, { "epoch": 0.13138856476079347, "grad_norm": 1.4073009046985687, "learning_rate": 9.970062346905216e-06, "loss": 0.5541, "step": 5630 }, { "epoch": 0.13162193698949826, "grad_norm": 1.7325115756340435, "learning_rate": 9.969615642180546e-06, "loss": 0.5331, "step": 5640 }, { "epoch": 0.13185530921820304, "grad_norm": 1.7388684065773392, "learning_rate": 9.969165639568265e-06, "loss": 0.5516, "step": 5650 }, { "epoch": 0.13208868144690783, "grad_norm": 1.791515857971823, "learning_rate": 9.968712339367004e-06, "loss": 0.5303, "step": 5660 }, { "epoch": 0.1323220536756126, "grad_norm": 1.4991138546459972, "learning_rate": 9.968255741877572e-06, "loss": 0.5163, "step": 5670 }, { "epoch": 0.13255542590431738, "grad_norm": 1.7787139228223652, "learning_rate": 9.967795847402976e-06, "loss": 0.5327, "step": 5680 }, { "epoch": 0.13278879813302216, "grad_norm": 1.7508263522181275, "learning_rate": 9.967332656248404e-06, "loss": 0.5483, "step": 5690 }, { "epoch": 0.13302217036172695, "grad_norm": 1.631990951065582, "learning_rate": 9.966866168721236e-06, "loss": 0.5345, "step": 5700 }, { "epoch": 0.13325554259043174, "grad_norm": 2.3406908958452544, "learning_rate": 9.966396385131037e-06, "loss": 0.5012, "step": 5710 }, { "epoch": 0.13348891481913652, "grad_norm": 1.519762278423464, "learning_rate": 9.96592330578956e-06, "loss": 0.5216, "step": 5720 }, { "epoch": 0.1337222870478413, "grad_norm": 1.8510478531038554, "learning_rate": 9.965446931010745e-06, "loss": 0.5548, "step": 5730 }, { "epoch": 0.1339556592765461, "grad_norm": 1.471768661470898, "learning_rate": 9.96496726111072e-06, "loss": 0.5158, "step": 5740 }, { "epoch": 0.13418903150525088, "grad_norm": 1.613419675178553, "learning_rate": 9.9644842964078e-06, "loss": 0.5286, "step": 5750 }, { "epoch": 0.13442240373395567, "grad_norm": 2.9246106261815235, "learning_rate": 9.963998037222484e-06, "loss": 0.5562, "step": 5760 }, { "epoch": 0.13465577596266046, "grad_norm": 1.5273937420720443, "learning_rate": 9.963508483877458e-06, "loss": 0.5031, "step": 5770 }, { "epoch": 0.13488914819136522, "grad_norm": 1.3591744247221245, "learning_rate": 9.963015636697594e-06, "loss": 0.5163, "step": 5780 }, { "epoch": 0.13512252042007, "grad_norm": 1.7156141486577463, "learning_rate": 9.962519496009953e-06, "loss": 0.5331, "step": 5790 }, { "epoch": 0.1353558926487748, "grad_norm": 1.6701589978128337, "learning_rate": 9.962020062143775e-06, "loss": 0.5454, "step": 5800 }, { "epoch": 0.13558926487747958, "grad_norm": 1.6049990856087235, "learning_rate": 9.961517335430494e-06, "loss": 0.5558, "step": 5810 }, { "epoch": 0.13582263710618436, "grad_norm": 2.025825379785842, "learning_rate": 9.961011316203723e-06, "loss": 0.525, "step": 5820 }, { "epoch": 0.13605600933488915, "grad_norm": 1.7712756495719746, "learning_rate": 9.96050200479926e-06, "loss": 0.5249, "step": 5830 }, { "epoch": 0.13628938156359394, "grad_norm": 1.928941658353895, "learning_rate": 9.95998940155509e-06, "loss": 0.532, "step": 5840 }, { "epoch": 0.13652275379229872, "grad_norm": 1.6441900676822152, "learning_rate": 9.959473506811384e-06, "loss": 0.5293, "step": 5850 }, { "epoch": 0.1367561260210035, "grad_norm": 1.7046405357156236, "learning_rate": 9.95895432091049e-06, "loss": 0.5091, "step": 5860 }, { "epoch": 0.1369894982497083, "grad_norm": 1.6648366389787668, "learning_rate": 9.95843184419695e-06, "loss": 0.5302, "step": 5870 }, { "epoch": 0.13722287047841306, "grad_norm": 1.6212941551791213, "learning_rate": 9.957906077017484e-06, "loss": 0.5212, "step": 5880 }, { "epoch": 0.13745624270711784, "grad_norm": 1.7936995218023912, "learning_rate": 9.957377019720991e-06, "loss": 0.5217, "step": 5890 }, { "epoch": 0.13768961493582263, "grad_norm": 1.6989109927669939, "learning_rate": 9.956844672658568e-06, "loss": 0.549, "step": 5900 }, { "epoch": 0.13792298716452742, "grad_norm": 1.5764412614260905, "learning_rate": 9.956309036183478e-06, "loss": 0.5442, "step": 5910 }, { "epoch": 0.1381563593932322, "grad_norm": 1.4544278187489383, "learning_rate": 9.95577011065118e-06, "loss": 0.5128, "step": 5920 }, { "epoch": 0.138389731621937, "grad_norm": 1.6138322350048653, "learning_rate": 9.955227896419308e-06, "loss": 0.532, "step": 5930 }, { "epoch": 0.13862310385064178, "grad_norm": 1.4559854499917393, "learning_rate": 9.95468239384768e-06, "loss": 0.5182, "step": 5940 }, { "epoch": 0.13885647607934656, "grad_norm": 1.6052339512718512, "learning_rate": 9.954133603298299e-06, "loss": 0.5239, "step": 5950 }, { "epoch": 0.13908984830805135, "grad_norm": 1.7500335819748858, "learning_rate": 9.953581525135349e-06, "loss": 0.5246, "step": 5960 }, { "epoch": 0.13932322053675614, "grad_norm": 1.5536136394165951, "learning_rate": 9.953026159725191e-06, "loss": 0.5121, "step": 5970 }, { "epoch": 0.13955659276546092, "grad_norm": 1.506783646993436, "learning_rate": 9.952467507436374e-06, "loss": 0.532, "step": 5980 }, { "epoch": 0.13978996499416568, "grad_norm": 1.5962824331438081, "learning_rate": 9.951905568639625e-06, "loss": 0.5488, "step": 5990 }, { "epoch": 0.14002333722287047, "grad_norm": 1.5973090560801166, "learning_rate": 9.951340343707852e-06, "loss": 0.5222, "step": 6000 }, { "epoch": 0.14025670945157526, "grad_norm": 1.5341584622925808, "learning_rate": 9.950771833016145e-06, "loss": 0.4855, "step": 6010 }, { "epoch": 0.14049008168028004, "grad_norm": 1.689686098993697, "learning_rate": 9.95020003694177e-06, "loss": 0.5181, "step": 6020 }, { "epoch": 0.14072345390898483, "grad_norm": 1.7475740330363656, "learning_rate": 9.949624955864182e-06, "loss": 0.5402, "step": 6030 }, { "epoch": 0.14095682613768962, "grad_norm": 1.9195587066272068, "learning_rate": 9.949046590165005e-06, "loss": 0.5058, "step": 6040 }, { "epoch": 0.1411901983663944, "grad_norm": 1.676083970079515, "learning_rate": 9.948464940228053e-06, "loss": 0.5512, "step": 6050 }, { "epoch": 0.1414235705950992, "grad_norm": 1.5123183021676314, "learning_rate": 9.947880006439313e-06, "loss": 0.5147, "step": 6060 }, { "epoch": 0.14165694282380398, "grad_norm": 1.8846051620349256, "learning_rate": 9.94729178918695e-06, "loss": 0.5141, "step": 6070 }, { "epoch": 0.14189031505250876, "grad_norm": 1.9540027048375572, "learning_rate": 9.946700288861317e-06, "loss": 0.536, "step": 6080 }, { "epoch": 0.14212368728121352, "grad_norm": 7.348629385059846, "learning_rate": 9.946105505854937e-06, "loss": 0.5117, "step": 6090 }, { "epoch": 0.1423570595099183, "grad_norm": 1.6302106590706849, "learning_rate": 9.945507440562514e-06, "loss": 0.5356, "step": 6100 }, { "epoch": 0.1425904317386231, "grad_norm": 1.6895808683948619, "learning_rate": 9.944906093380929e-06, "loss": 0.512, "step": 6110 }, { "epoch": 0.14282380396732788, "grad_norm": 1.5704770382387103, "learning_rate": 9.944301464709243e-06, "loss": 0.4871, "step": 6120 }, { "epoch": 0.14305717619603267, "grad_norm": 1.6687026067120805, "learning_rate": 9.943693554948692e-06, "loss": 0.5572, "step": 6130 }, { "epoch": 0.14329054842473746, "grad_norm": 1.6508513734463568, "learning_rate": 9.943082364502696e-06, "loss": 0.5328, "step": 6140 }, { "epoch": 0.14352392065344224, "grad_norm": 1.6048256469526596, "learning_rate": 9.942467893776843e-06, "loss": 0.5148, "step": 6150 }, { "epoch": 0.14375729288214703, "grad_norm": 1.682509846585489, "learning_rate": 9.941850143178904e-06, "loss": 0.5281, "step": 6160 }, { "epoch": 0.14399066511085182, "grad_norm": 1.8408173430802703, "learning_rate": 9.941229113118822e-06, "loss": 0.5183, "step": 6170 }, { "epoch": 0.1442240373395566, "grad_norm": 1.407915175738699, "learning_rate": 9.940604804008722e-06, "loss": 0.5098, "step": 6180 }, { "epoch": 0.14445740956826136, "grad_norm": 1.5757545777178468, "learning_rate": 9.939977216262899e-06, "loss": 0.5183, "step": 6190 }, { "epoch": 0.14469078179696615, "grad_norm": 1.580291861073056, "learning_rate": 9.93934635029783e-06, "loss": 0.5251, "step": 6200 }, { "epoch": 0.14492415402567094, "grad_norm": 1.6877706386268683, "learning_rate": 9.938712206532162e-06, "loss": 0.5216, "step": 6210 }, { "epoch": 0.14515752625437572, "grad_norm": 1.6636059381572625, "learning_rate": 9.93807478538672e-06, "loss": 0.5254, "step": 6220 }, { "epoch": 0.1453908984830805, "grad_norm": 1.3134397710045282, "learning_rate": 9.937434087284501e-06, "loss": 0.5221, "step": 6230 }, { "epoch": 0.1456242707117853, "grad_norm": 1.5097680090821624, "learning_rate": 9.936790112650683e-06, "loss": 0.5684, "step": 6240 }, { "epoch": 0.14585764294049008, "grad_norm": 1.4985372581690666, "learning_rate": 9.93614286191261e-06, "loss": 0.5419, "step": 6250 }, { "epoch": 0.14609101516919487, "grad_norm": 1.6644135187235989, "learning_rate": 9.935492335499806e-06, "loss": 0.5165, "step": 6260 }, { "epoch": 0.14632438739789966, "grad_norm": 1.6449727159731216, "learning_rate": 9.934838533843966e-06, "loss": 0.5291, "step": 6270 }, { "epoch": 0.14655775962660444, "grad_norm": 1.464600098645926, "learning_rate": 9.93418145737896e-06, "loss": 0.5194, "step": 6280 }, { "epoch": 0.14679113185530923, "grad_norm": 1.6959349960994852, "learning_rate": 9.933521106540834e-06, "loss": 0.5587, "step": 6290 }, { "epoch": 0.147024504084014, "grad_norm": 1.6137097087409198, "learning_rate": 9.932857481767797e-06, "loss": 0.5433, "step": 6300 }, { "epoch": 0.14725787631271878, "grad_norm": 1.6249171793355315, "learning_rate": 9.932190583500242e-06, "loss": 0.5383, "step": 6310 }, { "epoch": 0.14749124854142356, "grad_norm": 1.582034029420302, "learning_rate": 9.931520412180728e-06, "loss": 0.5558, "step": 6320 }, { "epoch": 0.14772462077012835, "grad_norm": 1.599971830848711, "learning_rate": 9.930846968253988e-06, "loss": 0.5245, "step": 6330 }, { "epoch": 0.14795799299883314, "grad_norm": 1.6941138568647638, "learning_rate": 9.930170252166926e-06, "loss": 0.5475, "step": 6340 }, { "epoch": 0.14819136522753792, "grad_norm": 1.5555888436557606, "learning_rate": 9.929490264368617e-06, "loss": 0.5433, "step": 6350 }, { "epoch": 0.1484247374562427, "grad_norm": 1.5813510188649835, "learning_rate": 9.928807005310308e-06, "loss": 0.5343, "step": 6360 }, { "epoch": 0.1486581096849475, "grad_norm": 1.4823103230905903, "learning_rate": 9.928120475445418e-06, "loss": 0.5387, "step": 6370 }, { "epoch": 0.14889148191365228, "grad_norm": 1.481090809242857, "learning_rate": 9.927430675229534e-06, "loss": 0.5185, "step": 6380 }, { "epoch": 0.14912485414235707, "grad_norm": 1.7943677864612064, "learning_rate": 9.926737605120413e-06, "loss": 0.5438, "step": 6390 }, { "epoch": 0.14935822637106183, "grad_norm": 1.43399654313382, "learning_rate": 9.926041265577989e-06, "loss": 0.4976, "step": 6400 }, { "epoch": 0.14959159859976662, "grad_norm": 2.2433167918270787, "learning_rate": 9.925341657064352e-06, "loss": 0.5388, "step": 6410 }, { "epoch": 0.1498249708284714, "grad_norm": 1.2585569682601765, "learning_rate": 9.924638780043777e-06, "loss": 0.5262, "step": 6420 }, { "epoch": 0.1500583430571762, "grad_norm": 1.5320326055765454, "learning_rate": 9.923932634982693e-06, "loss": 0.53, "step": 6430 }, { "epoch": 0.15029171528588098, "grad_norm": 1.8324721929737398, "learning_rate": 9.923223222349711e-06, "loss": 0.5144, "step": 6440 }, { "epoch": 0.15052508751458576, "grad_norm": 1.893186840131505, "learning_rate": 9.922510542615604e-06, "loss": 0.5018, "step": 6450 }, { "epoch": 0.15075845974329055, "grad_norm": 1.7762432824634449, "learning_rate": 9.921794596253311e-06, "loss": 0.5408, "step": 6460 }, { "epoch": 0.15099183197199534, "grad_norm": 1.6635548720118698, "learning_rate": 9.921075383737942e-06, "loss": 0.5435, "step": 6470 }, { "epoch": 0.15122520420070013, "grad_norm": 1.6735724677253048, "learning_rate": 9.920352905546772e-06, "loss": 0.5622, "step": 6480 }, { "epoch": 0.1514585764294049, "grad_norm": 1.5839793769968955, "learning_rate": 9.919627162159248e-06, "loss": 0.5016, "step": 6490 }, { "epoch": 0.1516919486581097, "grad_norm": 1.6119595755383676, "learning_rate": 9.918898154056982e-06, "loss": 0.501, "step": 6500 }, { "epoch": 0.15192532088681446, "grad_norm": 1.7298963221357428, "learning_rate": 9.918165881723748e-06, "loss": 0.5224, "step": 6510 }, { "epoch": 0.15215869311551924, "grad_norm": 1.5527460843144485, "learning_rate": 9.917430345645487e-06, "loss": 0.5391, "step": 6520 }, { "epoch": 0.15239206534422403, "grad_norm": 1.664283803281982, "learning_rate": 9.916691546310315e-06, "loss": 0.5159, "step": 6530 }, { "epoch": 0.15262543757292882, "grad_norm": 1.585023660888564, "learning_rate": 9.9159494842085e-06, "loss": 0.5381, "step": 6540 }, { "epoch": 0.1528588098016336, "grad_norm": 1.6867231886870808, "learning_rate": 9.915204159832488e-06, "loss": 0.5439, "step": 6550 }, { "epoch": 0.1530921820303384, "grad_norm": 1.6874060958340502, "learning_rate": 9.91445557367688e-06, "loss": 0.556, "step": 6560 }, { "epoch": 0.15332555425904318, "grad_norm": 1.4295150958274834, "learning_rate": 9.913703726238446e-06, "loss": 0.5564, "step": 6570 }, { "epoch": 0.15355892648774797, "grad_norm": 1.779598060994952, "learning_rate": 9.91294861801612e-06, "loss": 0.5479, "step": 6580 }, { "epoch": 0.15379229871645275, "grad_norm": 2.7311040391795904, "learning_rate": 9.912190249511e-06, "loss": 0.5532, "step": 6590 }, { "epoch": 0.15402567094515754, "grad_norm": 1.677565342775812, "learning_rate": 9.911428621226347e-06, "loss": 0.5208, "step": 6600 }, { "epoch": 0.1542590431738623, "grad_norm": 1.3908629438475115, "learning_rate": 9.910663733667584e-06, "loss": 0.4962, "step": 6610 }, { "epoch": 0.15449241540256708, "grad_norm": 2.1142669577601074, "learning_rate": 9.9098955873423e-06, "loss": 0.5279, "step": 6620 }, { "epoch": 0.15472578763127187, "grad_norm": 1.523803848851002, "learning_rate": 9.909124182760244e-06, "loss": 0.5248, "step": 6630 }, { "epoch": 0.15495915985997666, "grad_norm": 1.6845193324723653, "learning_rate": 9.908349520433327e-06, "loss": 0.544, "step": 6640 }, { "epoch": 0.15519253208868145, "grad_norm": 1.4888730590090171, "learning_rate": 9.907571600875624e-06, "loss": 0.5527, "step": 6650 }, { "epoch": 0.15542590431738623, "grad_norm": 1.626868142623208, "learning_rate": 9.90679042460337e-06, "loss": 0.5345, "step": 6660 }, { "epoch": 0.15565927654609102, "grad_norm": 1.5405811728725687, "learning_rate": 9.906005992134961e-06, "loss": 0.5502, "step": 6670 }, { "epoch": 0.1558926487747958, "grad_norm": 1.6474762384897927, "learning_rate": 9.905218303990955e-06, "loss": 0.5235, "step": 6680 }, { "epoch": 0.1561260210035006, "grad_norm": 1.4227164503026681, "learning_rate": 9.904427360694071e-06, "loss": 0.5313, "step": 6690 }, { "epoch": 0.15635939323220538, "grad_norm": 1.6806136559754898, "learning_rate": 9.903633162769183e-06, "loss": 0.5396, "step": 6700 }, { "epoch": 0.15659276546091014, "grad_norm": 1.539404608084795, "learning_rate": 9.902835710743335e-06, "loss": 0.4976, "step": 6710 }, { "epoch": 0.15682613768961493, "grad_norm": 1.513450939648282, "learning_rate": 9.90203500514572e-06, "loss": 0.5542, "step": 6720 }, { "epoch": 0.1570595099183197, "grad_norm": 1.6408443037049956, "learning_rate": 9.901231046507695e-06, "loss": 0.5066, "step": 6730 }, { "epoch": 0.1572928821470245, "grad_norm": 1.6225419703190067, "learning_rate": 9.900423835362775e-06, "loss": 0.5206, "step": 6740 }, { "epoch": 0.15752625437572929, "grad_norm": 1.8714766313508515, "learning_rate": 9.899613372246635e-06, "loss": 0.5193, "step": 6750 }, { "epoch": 0.15775962660443407, "grad_norm": 1.6564175022152057, "learning_rate": 9.898799657697104e-06, "loss": 0.5465, "step": 6760 }, { "epoch": 0.15799299883313886, "grad_norm": 1.7808271538179348, "learning_rate": 9.897982692254173e-06, "loss": 0.5132, "step": 6770 }, { "epoch": 0.15822637106184365, "grad_norm": 1.4508644802981578, "learning_rate": 9.89716247645999e-06, "loss": 0.5188, "step": 6780 }, { "epoch": 0.15845974329054843, "grad_norm": 1.6055236044278973, "learning_rate": 9.896339010858858e-06, "loss": 0.5248, "step": 6790 }, { "epoch": 0.15869311551925322, "grad_norm": 1.5912764775322463, "learning_rate": 9.895512295997234e-06, "loss": 0.5303, "step": 6800 }, { "epoch": 0.158926487747958, "grad_norm": 2.0119075984932744, "learning_rate": 9.894682332423738e-06, "loss": 0.4989, "step": 6810 }, { "epoch": 0.15915985997666277, "grad_norm": 1.6884751622053686, "learning_rate": 9.893849120689138e-06, "loss": 0.5294, "step": 6820 }, { "epoch": 0.15939323220536755, "grad_norm": 1.6001005657299527, "learning_rate": 9.893012661346369e-06, "loss": 0.5179, "step": 6830 }, { "epoch": 0.15962660443407234, "grad_norm": 1.6663964185247009, "learning_rate": 9.892172954950508e-06, "loss": 0.5315, "step": 6840 }, { "epoch": 0.15985997666277713, "grad_norm": 2.7325169939912737, "learning_rate": 9.891330002058793e-06, "loss": 0.52, "step": 6850 }, { "epoch": 0.1600933488914819, "grad_norm": 1.83366335152439, "learning_rate": 9.89048380323062e-06, "loss": 0.5011, "step": 6860 }, { "epoch": 0.1603267211201867, "grad_norm": 1.867826370719257, "learning_rate": 9.88963435902753e-06, "loss": 0.5415, "step": 6870 }, { "epoch": 0.16056009334889149, "grad_norm": 1.540063299538415, "learning_rate": 9.888781670013228e-06, "loss": 0.512, "step": 6880 }, { "epoch": 0.16079346557759627, "grad_norm": 1.325438130244439, "learning_rate": 9.887925736753563e-06, "loss": 0.5094, "step": 6890 }, { "epoch": 0.16102683780630106, "grad_norm": 1.6884216069639142, "learning_rate": 9.887066559816542e-06, "loss": 0.513, "step": 6900 }, { "epoch": 0.16126021003500585, "grad_norm": 1.9340398532832108, "learning_rate": 9.886204139772327e-06, "loss": 0.5211, "step": 6910 }, { "epoch": 0.1614935822637106, "grad_norm": 1.813720852047686, "learning_rate": 9.885338477193223e-06, "loss": 0.524, "step": 6920 }, { "epoch": 0.1617269544924154, "grad_norm": 1.6597720819613617, "learning_rate": 9.884469572653697e-06, "loss": 0.5426, "step": 6930 }, { "epoch": 0.16196032672112018, "grad_norm": 1.6572591621271755, "learning_rate": 9.883597426730362e-06, "loss": 0.5232, "step": 6940 }, { "epoch": 0.16219369894982497, "grad_norm": 1.6507047857022477, "learning_rate": 9.882722040001983e-06, "loss": 0.5493, "step": 6950 }, { "epoch": 0.16242707117852975, "grad_norm": 1.6012501681819251, "learning_rate": 9.881843413049474e-06, "loss": 0.5088, "step": 6960 }, { "epoch": 0.16266044340723454, "grad_norm": 1.6659960171875443, "learning_rate": 9.8809615464559e-06, "loss": 0.5305, "step": 6970 }, { "epoch": 0.16289381563593933, "grad_norm": 1.4106170421571829, "learning_rate": 9.880076440806481e-06, "loss": 0.5279, "step": 6980 }, { "epoch": 0.1631271878646441, "grad_norm": 1.6130510580508683, "learning_rate": 9.87918809668858e-06, "loss": 0.5284, "step": 6990 }, { "epoch": 0.1633605600933489, "grad_norm": 1.5505522453938727, "learning_rate": 9.878296514691707e-06, "loss": 0.5206, "step": 7000 }, { "epoch": 0.1635939323220537, "grad_norm": 1.4279575556405326, "learning_rate": 9.877401695407532e-06, "loss": 0.4921, "step": 7010 }, { "epoch": 0.16382730455075847, "grad_norm": 1.4740797364448657, "learning_rate": 9.87650363942986e-06, "loss": 0.5371, "step": 7020 }, { "epoch": 0.16406067677946323, "grad_norm": 1.582551327562317, "learning_rate": 9.875602347354655e-06, "loss": 0.5145, "step": 7030 }, { "epoch": 0.16429404900816802, "grad_norm": 1.569027542430852, "learning_rate": 9.874697819780022e-06, "loss": 0.5112, "step": 7040 }, { "epoch": 0.1645274212368728, "grad_norm": 1.754429863465296, "learning_rate": 9.873790057306213e-06, "loss": 0.5265, "step": 7050 }, { "epoch": 0.1647607934655776, "grad_norm": 1.7243066861446568, "learning_rate": 9.872879060535631e-06, "loss": 0.5479, "step": 7060 }, { "epoch": 0.16499416569428238, "grad_norm": 1.6185663107232942, "learning_rate": 9.87196483007282e-06, "loss": 0.5348, "step": 7070 }, { "epoch": 0.16522753792298717, "grad_norm": 1.5089767034709396, "learning_rate": 9.871047366524476e-06, "loss": 0.5162, "step": 7080 }, { "epoch": 0.16546091015169195, "grad_norm": 1.397038403346024, "learning_rate": 9.870126670499433e-06, "loss": 0.5223, "step": 7090 }, { "epoch": 0.16569428238039674, "grad_norm": 1.7368676062910844, "learning_rate": 9.869202742608678e-06, "loss": 0.5406, "step": 7100 }, { "epoch": 0.16592765460910153, "grad_norm": 1.4912785640789652, "learning_rate": 9.868275583465337e-06, "loss": 0.5457, "step": 7110 }, { "epoch": 0.1661610268378063, "grad_norm": 1.7313152181353593, "learning_rate": 9.867345193684682e-06, "loss": 0.5339, "step": 7120 }, { "epoch": 0.16639439906651107, "grad_norm": 1.7009961253820374, "learning_rate": 9.866411573884132e-06, "loss": 0.5334, "step": 7130 }, { "epoch": 0.16662777129521586, "grad_norm": 1.5218539203544685, "learning_rate": 9.865474724683242e-06, "loss": 0.5288, "step": 7140 }, { "epoch": 0.16686114352392065, "grad_norm": 1.5722840529810593, "learning_rate": 9.864534646703719e-06, "loss": 0.5265, "step": 7150 }, { "epoch": 0.16709451575262543, "grad_norm": 1.5473914805555158, "learning_rate": 9.863591340569407e-06, "loss": 0.5322, "step": 7160 }, { "epoch": 0.16732788798133022, "grad_norm": 1.5432822047487362, "learning_rate": 9.862644806906293e-06, "loss": 0.5244, "step": 7170 }, { "epoch": 0.167561260210035, "grad_norm": 1.9474009361101055, "learning_rate": 9.861695046342506e-06, "loss": 0.534, "step": 7180 }, { "epoch": 0.1677946324387398, "grad_norm": 1.6940328589929883, "learning_rate": 9.860742059508315e-06, "loss": 0.5492, "step": 7190 }, { "epoch": 0.16802800466744458, "grad_norm": 1.7962890961025135, "learning_rate": 9.859785847036136e-06, "loss": 0.5125, "step": 7200 }, { "epoch": 0.16826137689614937, "grad_norm": 1.6694482825498467, "learning_rate": 9.85882640956052e-06, "loss": 0.5378, "step": 7210 }, { "epoch": 0.16849474912485415, "grad_norm": 1.6403694219367904, "learning_rate": 9.85786374771816e-06, "loss": 0.5122, "step": 7220 }, { "epoch": 0.1687281213535589, "grad_norm": 1.4252582905049498, "learning_rate": 9.856897862147885e-06, "loss": 0.5004, "step": 7230 }, { "epoch": 0.1689614935822637, "grad_norm": 1.4238717551465752, "learning_rate": 9.855928753490669e-06, "loss": 0.5181, "step": 7240 }, { "epoch": 0.1691948658109685, "grad_norm": 1.514859472193706, "learning_rate": 9.854956422389623e-06, "loss": 0.514, "step": 7250 }, { "epoch": 0.16942823803967327, "grad_norm": 1.3819753079256634, "learning_rate": 9.853980869489994e-06, "loss": 0.5158, "step": 7260 }, { "epoch": 0.16966161026837806, "grad_norm": 1.7952231612029, "learning_rate": 9.853002095439169e-06, "loss": 0.548, "step": 7270 }, { "epoch": 0.16989498249708285, "grad_norm": 1.4832540276399908, "learning_rate": 9.852020100886675e-06, "loss": 0.515, "step": 7280 }, { "epoch": 0.17012835472578763, "grad_norm": 1.5448518496154238, "learning_rate": 9.85103488648417e-06, "loss": 0.5197, "step": 7290 }, { "epoch": 0.17036172695449242, "grad_norm": 1.663522588270324, "learning_rate": 9.850046452885455e-06, "loss": 0.5364, "step": 7300 }, { "epoch": 0.1705950991831972, "grad_norm": 1.6671117119339962, "learning_rate": 9.849054800746464e-06, "loss": 0.4785, "step": 7310 }, { "epoch": 0.170828471411902, "grad_norm": 1.5909340287004283, "learning_rate": 9.848059930725267e-06, "loss": 0.5564, "step": 7320 }, { "epoch": 0.17106184364060678, "grad_norm": 1.3199219107239633, "learning_rate": 9.84706184348207e-06, "loss": 0.5042, "step": 7330 }, { "epoch": 0.17129521586931154, "grad_norm": 1.5874563041739247, "learning_rate": 9.846060539679213e-06, "loss": 0.5201, "step": 7340 }, { "epoch": 0.17152858809801633, "grad_norm": 1.6086110308963213, "learning_rate": 9.845056019981175e-06, "loss": 0.5269, "step": 7350 }, { "epoch": 0.1717619603267211, "grad_norm": 1.640447138175047, "learning_rate": 9.84404828505456e-06, "loss": 0.5342, "step": 7360 }, { "epoch": 0.1719953325554259, "grad_norm": 1.4715328128460576, "learning_rate": 9.843037335568114e-06, "loss": 0.52, "step": 7370 }, { "epoch": 0.1722287047841307, "grad_norm": 1.8342324986197853, "learning_rate": 9.842023172192714e-06, "loss": 0.5306, "step": 7380 }, { "epoch": 0.17246207701283547, "grad_norm": 1.4178956936694807, "learning_rate": 9.841005795601369e-06, "loss": 0.5238, "step": 7390 }, { "epoch": 0.17269544924154026, "grad_norm": 1.772270887607743, "learning_rate": 9.83998520646922e-06, "loss": 0.5306, "step": 7400 }, { "epoch": 0.17292882147024505, "grad_norm": 1.6391334215054145, "learning_rate": 9.83896140547354e-06, "loss": 0.4996, "step": 7410 }, { "epoch": 0.17316219369894983, "grad_norm": 1.2814026027281593, "learning_rate": 9.837934393293736e-06, "loss": 0.5082, "step": 7420 }, { "epoch": 0.17339556592765462, "grad_norm": 1.703393449813955, "learning_rate": 9.83690417061134e-06, "loss": 0.5179, "step": 7430 }, { "epoch": 0.17362893815635938, "grad_norm": 1.511204496329853, "learning_rate": 9.83587073811002e-06, "loss": 0.5144, "step": 7440 }, { "epoch": 0.17386231038506417, "grad_norm": 1.4658527604205913, "learning_rate": 9.834834096475575e-06, "loss": 0.5218, "step": 7450 }, { "epoch": 0.17409568261376895, "grad_norm": 1.7411136002874474, "learning_rate": 9.833794246395924e-06, "loss": 0.5392, "step": 7460 }, { "epoch": 0.17432905484247374, "grad_norm": 1.5159317194940904, "learning_rate": 9.832751188561131e-06, "loss": 0.5159, "step": 7470 }, { "epoch": 0.17456242707117853, "grad_norm": 1.5417453332569961, "learning_rate": 9.831704923663373e-06, "loss": 0.5116, "step": 7480 }, { "epoch": 0.17479579929988331, "grad_norm": 4.326694130029765, "learning_rate": 9.830655452396966e-06, "loss": 0.5305, "step": 7490 }, { "epoch": 0.1750291715285881, "grad_norm": 1.6439041031792865, "learning_rate": 9.829602775458347e-06, "loss": 0.5454, "step": 7500 }, { "epoch": 0.1752625437572929, "grad_norm": 1.681209083863651, "learning_rate": 9.828546893546084e-06, "loss": 0.5257, "step": 7510 }, { "epoch": 0.17549591598599767, "grad_norm": 1.593048326002686, "learning_rate": 9.827487807360873e-06, "loss": 0.5247, "step": 7520 }, { "epoch": 0.17572928821470246, "grad_norm": 1.6719349302903421, "learning_rate": 9.826425517605533e-06, "loss": 0.5241, "step": 7530 }, { "epoch": 0.17596266044340725, "grad_norm": 1.5961904687645843, "learning_rate": 9.82536002498501e-06, "loss": 0.5237, "step": 7540 }, { "epoch": 0.176196032672112, "grad_norm": 1.5115406097475528, "learning_rate": 9.824291330206374e-06, "loss": 0.4966, "step": 7550 }, { "epoch": 0.1764294049008168, "grad_norm": 1.6695382797500649, "learning_rate": 9.823219433978826e-06, "loss": 0.5188, "step": 7560 }, { "epoch": 0.17666277712952158, "grad_norm": 1.9753612155426663, "learning_rate": 9.822144337013685e-06, "loss": 0.5329, "step": 7570 }, { "epoch": 0.17689614935822637, "grad_norm": 1.4087064419833124, "learning_rate": 9.821066040024396e-06, "loss": 0.5286, "step": 7580 }, { "epoch": 0.17712952158693115, "grad_norm": 1.7061832624965274, "learning_rate": 9.819984543726526e-06, "loss": 0.5182, "step": 7590 }, { "epoch": 0.17736289381563594, "grad_norm": 1.7051247960201088, "learning_rate": 9.81889984883777e-06, "loss": 0.5141, "step": 7600 }, { "epoch": 0.17759626604434073, "grad_norm": 1.9041216737627444, "learning_rate": 9.817811956077944e-06, "loss": 0.5193, "step": 7610 }, { "epoch": 0.17782963827304551, "grad_norm": 1.6223405245123674, "learning_rate": 9.81672086616898e-06, "loss": 0.5091, "step": 7620 }, { "epoch": 0.1780630105017503, "grad_norm": 1.6407454872091403, "learning_rate": 9.815626579834941e-06, "loss": 0.5458, "step": 7630 }, { "epoch": 0.1782963827304551, "grad_norm": 1.4866290941503784, "learning_rate": 9.814529097802002e-06, "loss": 0.5417, "step": 7640 }, { "epoch": 0.17852975495915985, "grad_norm": 1.7489694198824597, "learning_rate": 9.813428420798468e-06, "loss": 0.4832, "step": 7650 }, { "epoch": 0.17876312718786463, "grad_norm": 1.5995557187659861, "learning_rate": 9.812324549554754e-06, "loss": 0.5063, "step": 7660 }, { "epoch": 0.17899649941656942, "grad_norm": 1.7775483600271695, "learning_rate": 9.811217484803405e-06, "loss": 0.5258, "step": 7670 }, { "epoch": 0.1792298716452742, "grad_norm": 1.6142654606855882, "learning_rate": 9.810107227279079e-06, "loss": 0.548, "step": 7680 }, { "epoch": 0.179463243873979, "grad_norm": 1.7282895301254175, "learning_rate": 9.808993777718555e-06, "loss": 0.5192, "step": 7690 }, { "epoch": 0.17969661610268378, "grad_norm": 1.4791453279826594, "learning_rate": 9.807877136860728e-06, "loss": 0.517, "step": 7700 }, { "epoch": 0.17992998833138857, "grad_norm": 1.525079641416514, "learning_rate": 9.806757305446613e-06, "loss": 0.5398, "step": 7710 }, { "epoch": 0.18016336056009336, "grad_norm": 1.674993919392689, "learning_rate": 9.805634284219338e-06, "loss": 0.5374, "step": 7720 }, { "epoch": 0.18039673278879814, "grad_norm": 1.7189263177610903, "learning_rate": 9.80450807392416e-06, "loss": 0.5249, "step": 7730 }, { "epoch": 0.18063010501750293, "grad_norm": 1.5985417725524804, "learning_rate": 9.803378675308435e-06, "loss": 0.5173, "step": 7740 }, { "epoch": 0.1808634772462077, "grad_norm": 1.4967183568879847, "learning_rate": 9.802246089121646e-06, "loss": 0.5026, "step": 7750 }, { "epoch": 0.18109684947491247, "grad_norm": 1.9264676317974652, "learning_rate": 9.80111031611539e-06, "loss": 0.534, "step": 7760 }, { "epoch": 0.18133022170361726, "grad_norm": 1.638230726907476, "learning_rate": 9.799971357043378e-06, "loss": 0.5017, "step": 7770 }, { "epoch": 0.18156359393232205, "grad_norm": 1.6337205715605612, "learning_rate": 9.798829212661434e-06, "loss": 0.5125, "step": 7780 }, { "epoch": 0.18179696616102684, "grad_norm": 1.4833447580491874, "learning_rate": 9.797683883727495e-06, "loss": 0.5195, "step": 7790 }, { "epoch": 0.18203033838973162, "grad_norm": 1.6462576126797632, "learning_rate": 9.796535371001616e-06, "loss": 0.5418, "step": 7800 }, { "epoch": 0.1822637106184364, "grad_norm": 1.5754414669063979, "learning_rate": 9.79538367524596e-06, "loss": 0.5191, "step": 7810 }, { "epoch": 0.1824970828471412, "grad_norm": 1.4603148765701806, "learning_rate": 9.794228797224805e-06, "loss": 0.5535, "step": 7820 }, { "epoch": 0.18273045507584598, "grad_norm": 1.5373375644527714, "learning_rate": 9.793070737704537e-06, "loss": 0.5089, "step": 7830 }, { "epoch": 0.18296382730455077, "grad_norm": 1.724482916110248, "learning_rate": 9.79190949745366e-06, "loss": 0.5114, "step": 7840 }, { "epoch": 0.18319719953325556, "grad_norm": 1.5881290318156096, "learning_rate": 9.790745077242782e-06, "loss": 0.5269, "step": 7850 }, { "epoch": 0.18343057176196031, "grad_norm": 1.8525648426833516, "learning_rate": 9.789577477844625e-06, "loss": 0.5302, "step": 7860 }, { "epoch": 0.1836639439906651, "grad_norm": 1.4203548816837372, "learning_rate": 9.78840670003402e-06, "loss": 0.5185, "step": 7870 }, { "epoch": 0.1838973162193699, "grad_norm": 1.5974721252659696, "learning_rate": 9.787232744587908e-06, "loss": 0.5052, "step": 7880 }, { "epoch": 0.18413068844807468, "grad_norm": 1.4978176183456138, "learning_rate": 9.786055612285336e-06, "loss": 0.5144, "step": 7890 }, { "epoch": 0.18436406067677946, "grad_norm": 1.6952861819756768, "learning_rate": 9.784875303907463e-06, "loss": 0.5253, "step": 7900 }, { "epoch": 0.18459743290548425, "grad_norm": 1.777446022805081, "learning_rate": 9.783691820237553e-06, "loss": 0.4967, "step": 7910 }, { "epoch": 0.18483080513418904, "grad_norm": 1.5379774230235084, "learning_rate": 9.782505162060976e-06, "loss": 0.4871, "step": 7920 }, { "epoch": 0.18506417736289382, "grad_norm": 1.8989760423198174, "learning_rate": 9.781315330165213e-06, "loss": 0.5124, "step": 7930 }, { "epoch": 0.1852975495915986, "grad_norm": 2.1831297781015313, "learning_rate": 9.780122325339848e-06, "loss": 0.5304, "step": 7940 }, { "epoch": 0.1855309218203034, "grad_norm": 1.5846717777315644, "learning_rate": 9.778926148376569e-06, "loss": 0.511, "step": 7950 }, { "epoch": 0.18576429404900816, "grad_norm": 1.4779815150202735, "learning_rate": 9.777726800069172e-06, "loss": 0.5149, "step": 7960 }, { "epoch": 0.18599766627771294, "grad_norm": 2.186686331895068, "learning_rate": 9.77652428121356e-06, "loss": 0.5055, "step": 7970 }, { "epoch": 0.18623103850641773, "grad_norm": 1.5042958260613155, "learning_rate": 9.775318592607735e-06, "loss": 0.492, "step": 7980 }, { "epoch": 0.18646441073512252, "grad_norm": 1.5946892467041784, "learning_rate": 9.774109735051802e-06, "loss": 0.5046, "step": 7990 }, { "epoch": 0.1866977829638273, "grad_norm": 1.8162240344186917, "learning_rate": 9.772897709347973e-06, "loss": 0.5181, "step": 8000 }, { "epoch": 0.1869311551925321, "grad_norm": 1.7407573922536552, "learning_rate": 9.77168251630056e-06, "loss": 0.5354, "step": 8010 }, { "epoch": 0.18716452742123688, "grad_norm": 1.783969990714214, "learning_rate": 9.77046415671598e-06, "loss": 0.5271, "step": 8020 }, { "epoch": 0.18739789964994166, "grad_norm": 1.397528171308447, "learning_rate": 9.769242631402744e-06, "loss": 0.5003, "step": 8030 }, { "epoch": 0.18763127187864645, "grad_norm": 1.698214945533909, "learning_rate": 9.768017941171474e-06, "loss": 0.5166, "step": 8040 }, { "epoch": 0.18786464410735124, "grad_norm": 1.6805834032473121, "learning_rate": 9.766790086834882e-06, "loss": 0.4947, "step": 8050 }, { "epoch": 0.18809801633605602, "grad_norm": 1.7190830143295883, "learning_rate": 9.76555906920779e-06, "loss": 0.5313, "step": 8060 }, { "epoch": 0.18833138856476078, "grad_norm": 3.2767479456950093, "learning_rate": 9.76432488910711e-06, "loss": 0.5082, "step": 8070 }, { "epoch": 0.18856476079346557, "grad_norm": 1.7342720053853165, "learning_rate": 9.763087547351858e-06, "loss": 0.5245, "step": 8080 }, { "epoch": 0.18879813302217036, "grad_norm": 1.5994821530306498, "learning_rate": 9.761847044763144e-06, "loss": 0.5317, "step": 8090 }, { "epoch": 0.18903150525087514, "grad_norm": 1.5345271335182333, "learning_rate": 9.76060338216418e-06, "loss": 0.5202, "step": 8100 }, { "epoch": 0.18926487747957993, "grad_norm": 1.3139584480832585, "learning_rate": 9.759356560380276e-06, "loss": 0.5206, "step": 8110 }, { "epoch": 0.18949824970828472, "grad_norm": 1.742025275157371, "learning_rate": 9.758106580238831e-06, "loss": 0.5536, "step": 8120 }, { "epoch": 0.1897316219369895, "grad_norm": 1.4445324682290517, "learning_rate": 9.756853442569348e-06, "loss": 0.506, "step": 8130 }, { "epoch": 0.1899649941656943, "grad_norm": 1.3582204091429977, "learning_rate": 9.755597148203419e-06, "loss": 0.5094, "step": 8140 }, { "epoch": 0.19019836639439908, "grad_norm": 1.5747277700157367, "learning_rate": 9.754337697974736e-06, "loss": 0.5024, "step": 8150 }, { "epoch": 0.19043173862310386, "grad_norm": 1.5521554699570441, "learning_rate": 9.753075092719082e-06, "loss": 0.4998, "step": 8160 }, { "epoch": 0.19066511085180862, "grad_norm": 1.7410903755945304, "learning_rate": 9.751809333274334e-06, "loss": 0.5322, "step": 8170 }, { "epoch": 0.1908984830805134, "grad_norm": 1.7178490414189114, "learning_rate": 9.750540420480466e-06, "loss": 0.5252, "step": 8180 }, { "epoch": 0.1911318553092182, "grad_norm": 1.8741435572959384, "learning_rate": 9.749268355179537e-06, "loss": 0.5672, "step": 8190 }, { "epoch": 0.19136522753792298, "grad_norm": 1.7819325696312713, "learning_rate": 9.747993138215707e-06, "loss": 0.5245, "step": 8200 }, { "epoch": 0.19159859976662777, "grad_norm": 1.875231119790019, "learning_rate": 9.74671477043522e-06, "loss": 0.5366, "step": 8210 }, { "epoch": 0.19183197199533256, "grad_norm": 1.7800770265816839, "learning_rate": 9.745433252686415e-06, "loss": 0.5184, "step": 8220 }, { "epoch": 0.19206534422403734, "grad_norm": 1.4644274959223442, "learning_rate": 9.74414858581972e-06, "loss": 0.5061, "step": 8230 }, { "epoch": 0.19229871645274213, "grad_norm": 1.7223777419633943, "learning_rate": 9.742860770687652e-06, "loss": 0.5362, "step": 8240 }, { "epoch": 0.19253208868144692, "grad_norm": 1.5274342882076701, "learning_rate": 9.74156980814482e-06, "loss": 0.4705, "step": 8250 }, { "epoch": 0.1927654609101517, "grad_norm": 1.737434962173542, "learning_rate": 9.740275699047918e-06, "loss": 0.5279, "step": 8260 }, { "epoch": 0.19299883313885646, "grad_norm": 1.6217425479591563, "learning_rate": 9.738978444255734e-06, "loss": 0.5133, "step": 8270 }, { "epoch": 0.19323220536756125, "grad_norm": 1.6102918086284195, "learning_rate": 9.737678044629134e-06, "loss": 0.4899, "step": 8280 }, { "epoch": 0.19346557759626604, "grad_norm": 1.5796097738143693, "learning_rate": 9.73637450103108e-06, "loss": 0.5065, "step": 8290 }, { "epoch": 0.19369894982497082, "grad_norm": 1.5753054523228094, "learning_rate": 9.735067814326616e-06, "loss": 0.5481, "step": 8300 }, { "epoch": 0.1939323220536756, "grad_norm": 1.592510557719278, "learning_rate": 9.733757985382873e-06, "loss": 0.5073, "step": 8310 }, { "epoch": 0.1941656942823804, "grad_norm": 1.9289180640237618, "learning_rate": 9.732445015069065e-06, "loss": 0.524, "step": 8320 }, { "epoch": 0.19439906651108518, "grad_norm": 1.6996305486697427, "learning_rate": 9.731128904256495e-06, "loss": 0.5296, "step": 8330 }, { "epoch": 0.19463243873978997, "grad_norm": 1.4708800298689304, "learning_rate": 9.729809653818546e-06, "loss": 0.5072, "step": 8340 }, { "epoch": 0.19486581096849476, "grad_norm": 1.5903889898935912, "learning_rate": 9.728487264630687e-06, "loss": 0.5383, "step": 8350 }, { "epoch": 0.19509918319719954, "grad_norm": 1.6131492433021495, "learning_rate": 9.727161737570467e-06, "loss": 0.4926, "step": 8360 }, { "epoch": 0.19533255542590433, "grad_norm": 1.6999890138391014, "learning_rate": 9.725833073517523e-06, "loss": 0.5084, "step": 8370 }, { "epoch": 0.1955659276546091, "grad_norm": 1.718563656079282, "learning_rate": 9.724501273353566e-06, "loss": 0.4967, "step": 8380 }, { "epoch": 0.19579929988331388, "grad_norm": 1.551476785556496, "learning_rate": 9.723166337962393e-06, "loss": 0.508, "step": 8390 }, { "epoch": 0.19603267211201866, "grad_norm": 1.5811640420090185, "learning_rate": 9.721828268229883e-06, "loss": 0.5174, "step": 8400 }, { "epoch": 0.19626604434072345, "grad_norm": 2.9806809509367773, "learning_rate": 9.720487065043993e-06, "loss": 0.4905, "step": 8410 }, { "epoch": 0.19649941656942824, "grad_norm": 1.5393503280034888, "learning_rate": 9.719142729294754e-06, "loss": 0.5277, "step": 8420 }, { "epoch": 0.19673278879813302, "grad_norm": 1.4763936866398992, "learning_rate": 9.717795261874286e-06, "loss": 0.5148, "step": 8430 }, { "epoch": 0.1969661610268378, "grad_norm": 1.9136526272768508, "learning_rate": 9.71644466367678e-06, "loss": 0.4814, "step": 8440 }, { "epoch": 0.1971995332555426, "grad_norm": 1.7812931268983514, "learning_rate": 9.715090935598508e-06, "loss": 0.4998, "step": 8450 }, { "epoch": 0.19743290548424738, "grad_norm": 1.4573374095389158, "learning_rate": 9.713734078537816e-06, "loss": 0.4898, "step": 8460 }, { "epoch": 0.19766627771295217, "grad_norm": 1.7586247144858023, "learning_rate": 9.712374093395131e-06, "loss": 0.5023, "step": 8470 }, { "epoch": 0.19789964994165693, "grad_norm": 1.473446857301035, "learning_rate": 9.71101098107295e-06, "loss": 0.5045, "step": 8480 }, { "epoch": 0.19813302217036172, "grad_norm": 1.539609156633676, "learning_rate": 9.709644742475847e-06, "loss": 0.513, "step": 8490 }, { "epoch": 0.1983663943990665, "grad_norm": 1.6216794259271452, "learning_rate": 9.708275378510477e-06, "loss": 0.5286, "step": 8500 }, { "epoch": 0.1985997666277713, "grad_norm": 1.4382525192845097, "learning_rate": 9.70690289008556e-06, "loss": 0.5255, "step": 8510 }, { "epoch": 0.19883313885647608, "grad_norm": 1.768987963818089, "learning_rate": 9.705527278111895e-06, "loss": 0.5219, "step": 8520 }, { "epoch": 0.19906651108518086, "grad_norm": 1.7039354385519114, "learning_rate": 9.704148543502352e-06, "loss": 0.5357, "step": 8530 }, { "epoch": 0.19929988331388565, "grad_norm": 2.0493385335609373, "learning_rate": 9.702766687171874e-06, "loss": 0.5381, "step": 8540 }, { "epoch": 0.19953325554259044, "grad_norm": 1.6997604816387077, "learning_rate": 9.70138171003747e-06, "loss": 0.5189, "step": 8550 }, { "epoch": 0.19976662777129522, "grad_norm": 1.6862594723552162, "learning_rate": 9.699993613018233e-06, "loss": 0.5031, "step": 8560 }, { "epoch": 0.2, "grad_norm": 1.7420662632349386, "learning_rate": 9.698602397035311e-06, "loss": 0.5184, "step": 8570 }, { "epoch": 0.2002333722287048, "grad_norm": 1.5027697398124287, "learning_rate": 9.697208063011934e-06, "loss": 0.4926, "step": 8580 }, { "epoch": 0.20046674445740956, "grad_norm": 1.6437478994907502, "learning_rate": 9.695810611873393e-06, "loss": 0.5003, "step": 8590 }, { "epoch": 0.20070011668611434, "grad_norm": 1.7796326626171146, "learning_rate": 9.694410044547054e-06, "loss": 0.5177, "step": 8600 }, { "epoch": 0.20093348891481913, "grad_norm": 1.4001341264428184, "learning_rate": 9.693006361962345e-06, "loss": 0.5286, "step": 8610 }, { "epoch": 0.20116686114352392, "grad_norm": 1.4842793527150908, "learning_rate": 9.691599565050766e-06, "loss": 0.5392, "step": 8620 }, { "epoch": 0.2014002333722287, "grad_norm": 1.558726092422921, "learning_rate": 9.690189654745881e-06, "loss": 0.5181, "step": 8630 }, { "epoch": 0.2016336056009335, "grad_norm": 1.649072800129856, "learning_rate": 9.688776631983319e-06, "loss": 0.4737, "step": 8640 }, { "epoch": 0.20186697782963828, "grad_norm": 1.6575012863136909, "learning_rate": 9.68736049770078e-06, "loss": 0.5056, "step": 8650 }, { "epoch": 0.20210035005834306, "grad_norm": 1.5499173340889674, "learning_rate": 9.685941252838022e-06, "loss": 0.5357, "step": 8660 }, { "epoch": 0.20233372228704785, "grad_norm": 1.8057884396601527, "learning_rate": 9.684518898336873e-06, "loss": 0.4946, "step": 8670 }, { "epoch": 0.20256709451575264, "grad_norm": 1.5287321446863253, "learning_rate": 9.68309343514122e-06, "loss": 0.5165, "step": 8680 }, { "epoch": 0.2028004667444574, "grad_norm": 1.7684750098065805, "learning_rate": 9.681664864197015e-06, "loss": 0.5401, "step": 8690 }, { "epoch": 0.20303383897316218, "grad_norm": 1.4090051461821826, "learning_rate": 9.680233186452273e-06, "loss": 0.5212, "step": 8700 }, { "epoch": 0.20326721120186697, "grad_norm": 1.537415903375732, "learning_rate": 9.67879840285707e-06, "loss": 0.4662, "step": 8710 }, { "epoch": 0.20350058343057176, "grad_norm": 1.58399509508331, "learning_rate": 9.677360514363543e-06, "loss": 0.5175, "step": 8720 }, { "epoch": 0.20373395565927654, "grad_norm": 1.4644607338693232, "learning_rate": 9.675919521925888e-06, "loss": 0.5127, "step": 8730 }, { "epoch": 0.20396732788798133, "grad_norm": 1.4410625625230984, "learning_rate": 9.674475426500364e-06, "loss": 0.5207, "step": 8740 }, { "epoch": 0.20420070011668612, "grad_norm": 1.664584015088237, "learning_rate": 9.673028229045287e-06, "loss": 0.5159, "step": 8750 }, { "epoch": 0.2044340723453909, "grad_norm": 2.657950680463978, "learning_rate": 9.671577930521032e-06, "loss": 0.4935, "step": 8760 }, { "epoch": 0.2046674445740957, "grad_norm": 1.5474072962449155, "learning_rate": 9.670124531890033e-06, "loss": 0.5015, "step": 8770 }, { "epoch": 0.20490081680280048, "grad_norm": 1.6191006978338347, "learning_rate": 9.668668034116776e-06, "loss": 0.5264, "step": 8780 }, { "epoch": 0.20513418903150524, "grad_norm": 1.7399179356755476, "learning_rate": 9.667208438167812e-06, "loss": 0.5263, "step": 8790 }, { "epoch": 0.20536756126021002, "grad_norm": 1.563455876371457, "learning_rate": 9.665745745011743e-06, "loss": 0.4994, "step": 8800 }, { "epoch": 0.2056009334889148, "grad_norm": 1.5082759132780914, "learning_rate": 9.664279955619226e-06, "loss": 0.518, "step": 8810 }, { "epoch": 0.2058343057176196, "grad_norm": 1.6682679852016282, "learning_rate": 9.662811070962975e-06, "loss": 0.5213, "step": 8820 }, { "epoch": 0.20606767794632438, "grad_norm": 1.6910332944595632, "learning_rate": 9.661339092017756e-06, "loss": 0.5284, "step": 8830 }, { "epoch": 0.20630105017502917, "grad_norm": 1.5792910384662753, "learning_rate": 9.65986401976039e-06, "loss": 0.4879, "step": 8840 }, { "epoch": 0.20653442240373396, "grad_norm": 1.7541084209540143, "learning_rate": 9.658385855169747e-06, "loss": 0.528, "step": 8850 }, { "epoch": 0.20676779463243875, "grad_norm": 1.8493142900264794, "learning_rate": 9.656904599226757e-06, "loss": 0.485, "step": 8860 }, { "epoch": 0.20700116686114353, "grad_norm": 2.0958633539706986, "learning_rate": 9.655420252914393e-06, "loss": 0.5529, "step": 8870 }, { "epoch": 0.20723453908984832, "grad_norm": 1.5868042077143403, "learning_rate": 9.653932817217682e-06, "loss": 0.5264, "step": 8880 }, { "epoch": 0.2074679113185531, "grad_norm": 1.4821736596043873, "learning_rate": 9.652442293123704e-06, "loss": 0.5335, "step": 8890 }, { "epoch": 0.20770128354725786, "grad_norm": 1.676067127440347, "learning_rate": 9.650948681621586e-06, "loss": 0.5349, "step": 8900 }, { "epoch": 0.20793465577596265, "grad_norm": 1.529237792960911, "learning_rate": 9.649451983702502e-06, "loss": 0.5046, "step": 8910 }, { "epoch": 0.20816802800466744, "grad_norm": 1.7912755789221104, "learning_rate": 9.647952200359676e-06, "loss": 0.5257, "step": 8920 }, { "epoch": 0.20840140023337222, "grad_norm": 1.5490087194095832, "learning_rate": 9.646449332588382e-06, "loss": 0.5246, "step": 8930 }, { "epoch": 0.208634772462077, "grad_norm": 2.0885105848492067, "learning_rate": 9.644943381385933e-06, "loss": 0.5095, "step": 8940 }, { "epoch": 0.2088681446907818, "grad_norm": 1.6680008122284842, "learning_rate": 9.6434343477517e-06, "loss": 0.5316, "step": 8950 }, { "epoch": 0.20910151691948659, "grad_norm": 1.7392561288935335, "learning_rate": 9.64192223268709e-06, "loss": 0.4893, "step": 8960 }, { "epoch": 0.20933488914819137, "grad_norm": 1.7387529599482852, "learning_rate": 9.640407037195557e-06, "loss": 0.5164, "step": 8970 }, { "epoch": 0.20956826137689616, "grad_norm": 1.5983872316270336, "learning_rate": 9.638888762282602e-06, "loss": 0.5272, "step": 8980 }, { "epoch": 0.20980163360560095, "grad_norm": 1.4517664877321945, "learning_rate": 9.637367408955767e-06, "loss": 0.4993, "step": 8990 }, { "epoch": 0.2100350058343057, "grad_norm": 1.4260508380101908, "learning_rate": 9.635842978224639e-06, "loss": 0.514, "step": 9000 }, { "epoch": 0.2102683780630105, "grad_norm": 1.3318680965968794, "learning_rate": 9.634315471100843e-06, "loss": 0.5256, "step": 9010 }, { "epoch": 0.21050175029171528, "grad_norm": 1.4586360285975375, "learning_rate": 9.63278488859805e-06, "loss": 0.5324, "step": 9020 }, { "epoch": 0.21073512252042007, "grad_norm": 1.4568812053512992, "learning_rate": 9.63125123173197e-06, "loss": 0.5245, "step": 9030 }, { "epoch": 0.21096849474912485, "grad_norm": 2.6239027016988317, "learning_rate": 9.629714501520353e-06, "loss": 0.5246, "step": 9040 }, { "epoch": 0.21120186697782964, "grad_norm": 1.8197011455051975, "learning_rate": 9.628174698982988e-06, "loss": 0.5291, "step": 9050 }, { "epoch": 0.21143523920653443, "grad_norm": 1.6637845701253253, "learning_rate": 9.626631825141706e-06, "loss": 0.5076, "step": 9060 }, { "epoch": 0.2116686114352392, "grad_norm": 1.67302628369698, "learning_rate": 9.625085881020372e-06, "loss": 0.5058, "step": 9070 }, { "epoch": 0.211901983663944, "grad_norm": 1.655852563025376, "learning_rate": 9.623536867644893e-06, "loss": 0.5141, "step": 9080 }, { "epoch": 0.21213535589264879, "grad_norm": 1.5614222819580514, "learning_rate": 9.621984786043205e-06, "loss": 0.5241, "step": 9090 }, { "epoch": 0.21236872812135357, "grad_norm": 1.5278406121752495, "learning_rate": 9.620429637245292e-06, "loss": 0.5155, "step": 9100 }, { "epoch": 0.21260210035005833, "grad_norm": 1.714224363368386, "learning_rate": 9.618871422283159e-06, "loss": 0.5443, "step": 9110 }, { "epoch": 0.21283547257876312, "grad_norm": 1.6329828131266035, "learning_rate": 9.61731014219086e-06, "loss": 0.493, "step": 9120 }, { "epoch": 0.2130688448074679, "grad_norm": 1.6587331576705675, "learning_rate": 9.61574579800447e-06, "loss": 0.5396, "step": 9130 }, { "epoch": 0.2133022170361727, "grad_norm": 1.4712714982687098, "learning_rate": 9.614178390762108e-06, "loss": 0.4902, "step": 9140 }, { "epoch": 0.21353558926487748, "grad_norm": 1.6130926806004289, "learning_rate": 9.61260792150392e-06, "loss": 0.5294, "step": 9150 }, { "epoch": 0.21376896149358227, "grad_norm": 1.8288236763516101, "learning_rate": 9.611034391272089e-06, "loss": 0.4929, "step": 9160 }, { "epoch": 0.21400233372228705, "grad_norm": 1.4834732972914608, "learning_rate": 9.609457801110821e-06, "loss": 0.509, "step": 9170 }, { "epoch": 0.21423570595099184, "grad_norm": 1.6608955648489871, "learning_rate": 9.60787815206636e-06, "loss": 0.5351, "step": 9180 }, { "epoch": 0.21446907817969663, "grad_norm": 1.421227912650998, "learning_rate": 9.606295445186974e-06, "loss": 0.4859, "step": 9190 }, { "epoch": 0.2147024504084014, "grad_norm": 1.724020800412076, "learning_rate": 9.604709681522966e-06, "loss": 0.5495, "step": 9200 }, { "epoch": 0.21493582263710617, "grad_norm": 2.0591352446179054, "learning_rate": 9.603120862126665e-06, "loss": 0.5096, "step": 9210 }, { "epoch": 0.21516919486581096, "grad_norm": 1.3742048581978705, "learning_rate": 9.601528988052428e-06, "loss": 0.503, "step": 9220 }, { "epoch": 0.21540256709451575, "grad_norm": 1.8123027860665382, "learning_rate": 9.59993406035664e-06, "loss": 0.5171, "step": 9230 }, { "epoch": 0.21563593932322053, "grad_norm": 1.9815081597586308, "learning_rate": 9.598336080097705e-06, "loss": 0.5351, "step": 9240 }, { "epoch": 0.21586931155192532, "grad_norm": 1.5585989011489436, "learning_rate": 9.596735048336067e-06, "loss": 0.5047, "step": 9250 }, { "epoch": 0.2161026837806301, "grad_norm": 2.223291751282174, "learning_rate": 9.595130966134182e-06, "loss": 0.5025, "step": 9260 }, { "epoch": 0.2163360560093349, "grad_norm": 1.9316225748880036, "learning_rate": 9.593523834556537e-06, "loss": 0.5049, "step": 9270 }, { "epoch": 0.21656942823803968, "grad_norm": 1.6693086516639204, "learning_rate": 9.59191365466964e-06, "loss": 0.5171, "step": 9280 }, { "epoch": 0.21680280046674447, "grad_norm": 1.7135481844540486, "learning_rate": 9.590300427542025e-06, "loss": 0.5238, "step": 9290 }, { "epoch": 0.21703617269544925, "grad_norm": 1.832758632097904, "learning_rate": 9.588684154244244e-06, "loss": 0.5111, "step": 9300 }, { "epoch": 0.217269544924154, "grad_norm": 1.6769273616509988, "learning_rate": 9.587064835848872e-06, "loss": 0.5098, "step": 9310 }, { "epoch": 0.2175029171528588, "grad_norm": 1.815862193630411, "learning_rate": 9.585442473430507e-06, "loss": 0.4884, "step": 9320 }, { "epoch": 0.21773628938156359, "grad_norm": 1.6123157709001512, "learning_rate": 9.583817068065765e-06, "loss": 0.5006, "step": 9330 }, { "epoch": 0.21796966161026837, "grad_norm": 1.5825197330225758, "learning_rate": 9.58218862083328e-06, "loss": 0.5306, "step": 9340 }, { "epoch": 0.21820303383897316, "grad_norm": 1.663256014201136, "learning_rate": 9.580557132813706e-06, "loss": 0.5593, "step": 9350 }, { "epoch": 0.21843640606767795, "grad_norm": 1.9705227628087818, "learning_rate": 9.578922605089717e-06, "loss": 0.5262, "step": 9360 }, { "epoch": 0.21866977829638273, "grad_norm": 1.6969309377715318, "learning_rate": 9.577285038746e-06, "loss": 0.5079, "step": 9370 }, { "epoch": 0.21890315052508752, "grad_norm": 1.682606255755542, "learning_rate": 9.575644434869265e-06, "loss": 0.472, "step": 9380 }, { "epoch": 0.2191365227537923, "grad_norm": 1.8001985389457604, "learning_rate": 9.574000794548228e-06, "loss": 0.5171, "step": 9390 }, { "epoch": 0.2193698949824971, "grad_norm": 1.6096708089545428, "learning_rate": 9.57235411887363e-06, "loss": 0.5453, "step": 9400 }, { "epoch": 0.21960326721120188, "grad_norm": 1.6870415534516567, "learning_rate": 9.570704408938219e-06, "loss": 0.4923, "step": 9410 }, { "epoch": 0.21983663943990664, "grad_norm": 1.673217886963022, "learning_rate": 9.56905166583676e-06, "loss": 0.5204, "step": 9420 }, { "epoch": 0.22007001166861143, "grad_norm": 1.431184847309791, "learning_rate": 9.56739589066603e-06, "loss": 0.5002, "step": 9430 }, { "epoch": 0.2203033838973162, "grad_norm": 1.6843933075265054, "learning_rate": 9.56573708452482e-06, "loss": 0.5274, "step": 9440 }, { "epoch": 0.220536756126021, "grad_norm": 1.6249276540261084, "learning_rate": 9.56407524851393e-06, "loss": 0.5144, "step": 9450 }, { "epoch": 0.2207701283547258, "grad_norm": 2.0516874068638993, "learning_rate": 9.562410383736167e-06, "loss": 0.5453, "step": 9460 }, { "epoch": 0.22100350058343057, "grad_norm": 1.7188057161799473, "learning_rate": 9.560742491296358e-06, "loss": 0.4965, "step": 9470 }, { "epoch": 0.22123687281213536, "grad_norm": 1.6256416497131934, "learning_rate": 9.559071572301331e-06, "loss": 0.5068, "step": 9480 }, { "epoch": 0.22147024504084015, "grad_norm": 1.6628876682383276, "learning_rate": 9.557397627859926e-06, "loss": 0.4989, "step": 9490 }, { "epoch": 0.22170361726954493, "grad_norm": 1.6255171704926956, "learning_rate": 9.55572065908299e-06, "loss": 0.522, "step": 9500 }, { "epoch": 0.22193698949824972, "grad_norm": 1.4333693955169466, "learning_rate": 9.554040667083372e-06, "loss": 0.4776, "step": 9510 }, { "epoch": 0.22217036172695448, "grad_norm": 1.618165162675081, "learning_rate": 9.552357652975936e-06, "loss": 0.4969, "step": 9520 }, { "epoch": 0.22240373395565927, "grad_norm": 1.4216928753242395, "learning_rate": 9.550671617877549e-06, "loss": 0.5078, "step": 9530 }, { "epoch": 0.22263710618436405, "grad_norm": 1.5883010175493968, "learning_rate": 9.548982562907077e-06, "loss": 0.5384, "step": 9540 }, { "epoch": 0.22287047841306884, "grad_norm": 1.7274167573155532, "learning_rate": 9.547290489185395e-06, "loss": 0.5515, "step": 9550 }, { "epoch": 0.22310385064177363, "grad_norm": 1.631917995153994, "learning_rate": 9.545595397835382e-06, "loss": 0.5298, "step": 9560 }, { "epoch": 0.2233372228704784, "grad_norm": 1.548401793388509, "learning_rate": 9.543897289981916e-06, "loss": 0.5122, "step": 9570 }, { "epoch": 0.2235705950991832, "grad_norm": 1.5847744497745075, "learning_rate": 9.54219616675188e-06, "loss": 0.5105, "step": 9580 }, { "epoch": 0.223803967327888, "grad_norm": 1.425980770260342, "learning_rate": 9.540492029274156e-06, "loss": 0.5002, "step": 9590 }, { "epoch": 0.22403733955659277, "grad_norm": 1.5655419404885196, "learning_rate": 9.538784878679628e-06, "loss": 0.4785, "step": 9600 }, { "epoch": 0.22427071178529756, "grad_norm": 1.5763563436250365, "learning_rate": 9.537074716101177e-06, "loss": 0.4788, "step": 9610 }, { "epoch": 0.22450408401400235, "grad_norm": 1.6245320063235906, "learning_rate": 9.535361542673687e-06, "loss": 0.5202, "step": 9620 }, { "epoch": 0.2247374562427071, "grad_norm": 1.5143255339428559, "learning_rate": 9.533645359534034e-06, "loss": 0.5494, "step": 9630 }, { "epoch": 0.2249708284714119, "grad_norm": 1.5409100351297194, "learning_rate": 9.531926167821097e-06, "loss": 0.5178, "step": 9640 }, { "epoch": 0.22520420070011668, "grad_norm": 1.4980107329166663, "learning_rate": 9.53020396867575e-06, "loss": 0.5178, "step": 9650 }, { "epoch": 0.22543757292882147, "grad_norm": 1.5638213896935051, "learning_rate": 9.528478763240855e-06, "loss": 0.4888, "step": 9660 }, { "epoch": 0.22567094515752625, "grad_norm": 1.5094441549168158, "learning_rate": 9.526750552661286e-06, "loss": 0.5025, "step": 9670 }, { "epoch": 0.22590431738623104, "grad_norm": 1.6884182944235735, "learning_rate": 9.525019338083895e-06, "loss": 0.5006, "step": 9680 }, { "epoch": 0.22613768961493583, "grad_norm": 1.638411657453942, "learning_rate": 9.523285120657536e-06, "loss": 0.4833, "step": 9690 }, { "epoch": 0.22637106184364061, "grad_norm": 1.4856561422674057, "learning_rate": 9.52154790153305e-06, "loss": 0.4904, "step": 9700 }, { "epoch": 0.2266044340723454, "grad_norm": 1.3386531562480708, "learning_rate": 9.519807681863278e-06, "loss": 0.5076, "step": 9710 }, { "epoch": 0.2268378063010502, "grad_norm": 1.8945383191187013, "learning_rate": 9.518064462803044e-06, "loss": 0.5053, "step": 9720 }, { "epoch": 0.22707117852975495, "grad_norm": 1.6923297053731723, "learning_rate": 9.516318245509166e-06, "loss": 0.507, "step": 9730 }, { "epoch": 0.22730455075845973, "grad_norm": 1.5983261302516067, "learning_rate": 9.514569031140455e-06, "loss": 0.5557, "step": 9740 }, { "epoch": 0.22753792298716452, "grad_norm": 1.5551099062114355, "learning_rate": 9.512816820857705e-06, "loss": 0.4745, "step": 9750 }, { "epoch": 0.2277712952158693, "grad_norm": 1.6939321545989479, "learning_rate": 9.511061615823698e-06, "loss": 0.53, "step": 9760 }, { "epoch": 0.2280046674445741, "grad_norm": 1.7753564107611504, "learning_rate": 9.509303417203212e-06, "loss": 0.5171, "step": 9770 }, { "epoch": 0.22823803967327888, "grad_norm": 1.893714218388217, "learning_rate": 9.507542226163e-06, "loss": 0.5249, "step": 9780 }, { "epoch": 0.22847141190198367, "grad_norm": 1.2399583409615582, "learning_rate": 9.50577804387181e-06, "loss": 0.4948, "step": 9790 }, { "epoch": 0.22870478413068845, "grad_norm": 1.7318660427838117, "learning_rate": 9.504010871500371e-06, "loss": 0.5423, "step": 9800 }, { "epoch": 0.22893815635939324, "grad_norm": 2.6465191265754964, "learning_rate": 9.502240710221395e-06, "loss": 0.5031, "step": 9810 }, { "epoch": 0.22917152858809803, "grad_norm": 1.7519177931606489, "learning_rate": 9.500467561209578e-06, "loss": 0.5154, "step": 9820 }, { "epoch": 0.2294049008168028, "grad_norm": 1.6945696252747227, "learning_rate": 9.498691425641601e-06, "loss": 0.5151, "step": 9830 }, { "epoch": 0.22963827304550757, "grad_norm": 1.9508337487080003, "learning_rate": 9.496912304696128e-06, "loss": 0.5251, "step": 9840 }, { "epoch": 0.22987164527421236, "grad_norm": 2.320100317143632, "learning_rate": 9.495130199553802e-06, "loss": 0.5324, "step": 9850 }, { "epoch": 0.23010501750291715, "grad_norm": 1.574456217843016, "learning_rate": 9.49334511139724e-06, "loss": 0.5051, "step": 9860 }, { "epoch": 0.23033838973162193, "grad_norm": 1.3553093329048536, "learning_rate": 9.491557041411051e-06, "loss": 0.5073, "step": 9870 }, { "epoch": 0.23057176196032672, "grad_norm": 1.4379393904768338, "learning_rate": 9.489765990781814e-06, "loss": 0.5184, "step": 9880 }, { "epoch": 0.2308051341890315, "grad_norm": 1.4621011671891313, "learning_rate": 9.487971960698088e-06, "loss": 0.5178, "step": 9890 }, { "epoch": 0.2310385064177363, "grad_norm": 1.6180871516700246, "learning_rate": 9.486174952350411e-06, "loss": 0.5206, "step": 9900 }, { "epoch": 0.23127187864644108, "grad_norm": 1.5125841303688747, "learning_rate": 9.484374966931295e-06, "loss": 0.4968, "step": 9910 }, { "epoch": 0.23150525087514587, "grad_norm": 1.5516306243895608, "learning_rate": 9.482572005635229e-06, "loss": 0.5264, "step": 9920 }, { "epoch": 0.23173862310385065, "grad_norm": 1.708618033574747, "learning_rate": 9.480766069658678e-06, "loss": 0.5319, "step": 9930 }, { "epoch": 0.23197199533255541, "grad_norm": 1.798471705124206, "learning_rate": 9.478957160200076e-06, "loss": 0.513, "step": 9940 }, { "epoch": 0.2322053675612602, "grad_norm": 1.6495079814967168, "learning_rate": 9.477145278459838e-06, "loss": 0.5342, "step": 9950 }, { "epoch": 0.232438739789965, "grad_norm": 1.4195564689119762, "learning_rate": 9.475330425640344e-06, "loss": 0.5141, "step": 9960 }, { "epoch": 0.23267211201866977, "grad_norm": 1.6836651961641802, "learning_rate": 9.47351260294595e-06, "loss": 0.5348, "step": 9970 }, { "epoch": 0.23290548424737456, "grad_norm": 2.122782798134255, "learning_rate": 9.471691811582978e-06, "loss": 0.4865, "step": 9980 }, { "epoch": 0.23313885647607935, "grad_norm": 1.557023727534706, "learning_rate": 9.469868052759732e-06, "loss": 0.5102, "step": 9990 }, { "epoch": 0.23337222870478413, "grad_norm": 1.6345647444189066, "learning_rate": 9.468041327686471e-06, "loss": 0.5152, "step": 10000 }, { "epoch": 0.23360560093348892, "grad_norm": 1.5493418077739822, "learning_rate": 9.466211637575429e-06, "loss": 0.5122, "step": 10010 }, { "epoch": 0.2338389731621937, "grad_norm": 2.2662915965212753, "learning_rate": 9.46437898364081e-06, "loss": 0.5357, "step": 10020 }, { "epoch": 0.2340723453908985, "grad_norm": 1.5857866228444863, "learning_rate": 9.462543367098778e-06, "loss": 0.5176, "step": 10030 }, { "epoch": 0.23430571761960325, "grad_norm": 1.684338328613386, "learning_rate": 9.460704789167468e-06, "loss": 0.5002, "step": 10040 }, { "epoch": 0.23453908984830804, "grad_norm": 1.7700967166753654, "learning_rate": 9.458863251066978e-06, "loss": 0.5126, "step": 10050 }, { "epoch": 0.23477246207701283, "grad_norm": 1.5226747729338284, "learning_rate": 9.457018754019375e-06, "loss": 0.5028, "step": 10060 }, { "epoch": 0.23500583430571761, "grad_norm": 1.44812308932358, "learning_rate": 9.455171299248683e-06, "loss": 0.5, "step": 10070 }, { "epoch": 0.2352392065344224, "grad_norm": 1.5890578373517292, "learning_rate": 9.453320887980893e-06, "loss": 0.5192, "step": 10080 }, { "epoch": 0.2354725787631272, "grad_norm": 1.8163836391138795, "learning_rate": 9.451467521443956e-06, "loss": 0.5051, "step": 10090 }, { "epoch": 0.23570595099183198, "grad_norm": 1.6391596445174004, "learning_rate": 9.449611200867785e-06, "loss": 0.5186, "step": 10100 }, { "epoch": 0.23593932322053676, "grad_norm": 1.964521094549118, "learning_rate": 9.447751927484254e-06, "loss": 0.4926, "step": 10110 }, { "epoch": 0.23617269544924155, "grad_norm": 2.0439288301544343, "learning_rate": 9.445889702527197e-06, "loss": 0.5069, "step": 10120 }, { "epoch": 0.23640606767794634, "grad_norm": 1.582654798766209, "learning_rate": 9.444024527232402e-06, "loss": 0.5194, "step": 10130 }, { "epoch": 0.23663943990665112, "grad_norm": 1.5472971620080502, "learning_rate": 9.442156402837622e-06, "loss": 0.5201, "step": 10140 }, { "epoch": 0.23687281213535588, "grad_norm": 1.9137080341865322, "learning_rate": 9.440285330582562e-06, "loss": 0.5307, "step": 10150 }, { "epoch": 0.23710618436406067, "grad_norm": 1.416099618921997, "learning_rate": 9.438411311708881e-06, "loss": 0.5183, "step": 10160 }, { "epoch": 0.23733955659276545, "grad_norm": 2.263709451155927, "learning_rate": 9.436534347460203e-06, "loss": 0.505, "step": 10170 }, { "epoch": 0.23757292882147024, "grad_norm": 1.4553848351765435, "learning_rate": 9.434654439082099e-06, "loss": 0.5055, "step": 10180 }, { "epoch": 0.23780630105017503, "grad_norm": 1.7043198111088862, "learning_rate": 9.432771587822091e-06, "loss": 0.4989, "step": 10190 }, { "epoch": 0.23803967327887982, "grad_norm": 1.5529591978003185, "learning_rate": 9.430885794929664e-06, "loss": 0.5089, "step": 10200 }, { "epoch": 0.2382730455075846, "grad_norm": 1.624197275300492, "learning_rate": 9.428997061656247e-06, "loss": 0.5239, "step": 10210 }, { "epoch": 0.2385064177362894, "grad_norm": 1.5730866948618436, "learning_rate": 9.427105389255221e-06, "loss": 0.4949, "step": 10220 }, { "epoch": 0.23873978996499418, "grad_norm": 1.7080979368477824, "learning_rate": 9.425210778981922e-06, "loss": 0.4812, "step": 10230 }, { "epoch": 0.23897316219369896, "grad_norm": 1.3341727470547722, "learning_rate": 9.423313232093632e-06, "loss": 0.511, "step": 10240 }, { "epoch": 0.23920653442240372, "grad_norm": 1.48410810183396, "learning_rate": 9.42141274984958e-06, "loss": 0.493, "step": 10250 }, { "epoch": 0.2394399066511085, "grad_norm": 1.7300306869651683, "learning_rate": 9.419509333510947e-06, "loss": 0.5279, "step": 10260 }, { "epoch": 0.2396732788798133, "grad_norm": 1.4031460199417318, "learning_rate": 9.417602984340862e-06, "loss": 0.5557, "step": 10270 }, { "epoch": 0.23990665110851808, "grad_norm": 1.5064354491723013, "learning_rate": 9.415693703604395e-06, "loss": 0.5074, "step": 10280 }, { "epoch": 0.24014002333722287, "grad_norm": 1.7783093945777462, "learning_rate": 9.413781492568564e-06, "loss": 0.5142, "step": 10290 }, { "epoch": 0.24037339556592766, "grad_norm": 1.5047131060417924, "learning_rate": 9.411866352502332e-06, "loss": 0.498, "step": 10300 }, { "epoch": 0.24060676779463244, "grad_norm": 1.595114862925519, "learning_rate": 9.409948284676607e-06, "loss": 0.5323, "step": 10310 }, { "epoch": 0.24084014002333723, "grad_norm": 2.5729666631934394, "learning_rate": 9.408027290364237e-06, "loss": 0.5265, "step": 10320 }, { "epoch": 0.24107351225204202, "grad_norm": 1.6200578790128366, "learning_rate": 9.406103370840014e-06, "loss": 0.5336, "step": 10330 }, { "epoch": 0.2413068844807468, "grad_norm": 1.6833287096767688, "learning_rate": 9.40417652738067e-06, "loss": 0.5162, "step": 10340 }, { "epoch": 0.24154025670945156, "grad_norm": 1.7147630191416507, "learning_rate": 9.40224676126488e-06, "loss": 0.5269, "step": 10350 }, { "epoch": 0.24177362893815635, "grad_norm": 1.6880799500444374, "learning_rate": 9.400314073773251e-06, "loss": 0.5158, "step": 10360 }, { "epoch": 0.24200700116686114, "grad_norm": 1.779484373967782, "learning_rate": 9.39837846618834e-06, "loss": 0.5142, "step": 10370 }, { "epoch": 0.24224037339556592, "grad_norm": 1.6351258576699326, "learning_rate": 9.396439939794634e-06, "loss": 0.5098, "step": 10380 }, { "epoch": 0.2424737456242707, "grad_norm": 1.519027084930144, "learning_rate": 9.394498495878558e-06, "loss": 0.5133, "step": 10390 }, { "epoch": 0.2427071178529755, "grad_norm": 1.6319667530824145, "learning_rate": 9.392554135728476e-06, "loss": 0.5198, "step": 10400 }, { "epoch": 0.24294049008168028, "grad_norm": 1.2979684573532209, "learning_rate": 9.390606860634681e-06, "loss": 0.4864, "step": 10410 }, { "epoch": 0.24317386231038507, "grad_norm": 1.379740004311542, "learning_rate": 9.38865667188941e-06, "loss": 0.4696, "step": 10420 }, { "epoch": 0.24340723453908986, "grad_norm": 1.7578135061672522, "learning_rate": 9.386703570786825e-06, "loss": 0.547, "step": 10430 }, { "epoch": 0.24364060676779464, "grad_norm": 2.9812659626715177, "learning_rate": 9.384747558623023e-06, "loss": 0.5197, "step": 10440 }, { "epoch": 0.24387397899649943, "grad_norm": 1.5387522331395926, "learning_rate": 9.382788636696037e-06, "loss": 0.5268, "step": 10450 }, { "epoch": 0.2441073512252042, "grad_norm": 1.5610243317984878, "learning_rate": 9.380826806305826e-06, "loss": 0.5, "step": 10460 }, { "epoch": 0.24434072345390898, "grad_norm": 1.3885150733277682, "learning_rate": 9.378862068754278e-06, "loss": 0.5093, "step": 10470 }, { "epoch": 0.24457409568261376, "grad_norm": 1.5511214247232221, "learning_rate": 9.376894425345216e-06, "loss": 0.5305, "step": 10480 }, { "epoch": 0.24480746791131855, "grad_norm": 1.6669980191937206, "learning_rate": 9.374923877384388e-06, "loss": 0.535, "step": 10490 }, { "epoch": 0.24504084014002334, "grad_norm": 1.7101395837024336, "learning_rate": 9.372950426179469e-06, "loss": 0.4958, "step": 10500 }, { "epoch": 0.24527421236872812, "grad_norm": 1.6898334151571603, "learning_rate": 9.37097407304006e-06, "loss": 0.4924, "step": 10510 }, { "epoch": 0.2455075845974329, "grad_norm": 1.5809270904660395, "learning_rate": 9.368994819277693e-06, "loss": 0.5026, "step": 10520 }, { "epoch": 0.2457409568261377, "grad_norm": 1.6128495956648847, "learning_rate": 9.367012666205817e-06, "loss": 0.5085, "step": 10530 }, { "epoch": 0.24597432905484248, "grad_norm": 1.382969824904601, "learning_rate": 9.36502761513981e-06, "loss": 0.4926, "step": 10540 }, { "epoch": 0.24620770128354727, "grad_norm": 1.730608656147605, "learning_rate": 9.363039667396973e-06, "loss": 0.4915, "step": 10550 }, { "epoch": 0.24644107351225203, "grad_norm": 1.4019412252600851, "learning_rate": 9.361048824296528e-06, "loss": 0.5114, "step": 10560 }, { "epoch": 0.24667444574095682, "grad_norm": 1.5641707744095792, "learning_rate": 9.35905508715962e-06, "loss": 0.4942, "step": 10570 }, { "epoch": 0.2469078179696616, "grad_norm": 1.544277168718449, "learning_rate": 9.35705845730931e-06, "loss": 0.4703, "step": 10580 }, { "epoch": 0.2471411901983664, "grad_norm": 1.6441763202513178, "learning_rate": 9.355058936070584e-06, "loss": 0.4968, "step": 10590 }, { "epoch": 0.24737456242707118, "grad_norm": 1.6064081290827577, "learning_rate": 9.353056524770345e-06, "loss": 0.5118, "step": 10600 }, { "epoch": 0.24760793465577596, "grad_norm": 1.701426079486011, "learning_rate": 9.351051224737413e-06, "loss": 0.5179, "step": 10610 }, { "epoch": 0.24784130688448075, "grad_norm": 1.855070567413436, "learning_rate": 9.349043037302527e-06, "loss": 0.5198, "step": 10620 }, { "epoch": 0.24807467911318554, "grad_norm": 1.6134377141522476, "learning_rate": 9.347031963798338e-06, "loss": 0.4962, "step": 10630 }, { "epoch": 0.24830805134189032, "grad_norm": 1.5223795797216029, "learning_rate": 9.345018005559418e-06, "loss": 0.536, "step": 10640 }, { "epoch": 0.2485414235705951, "grad_norm": 1.3499966790489084, "learning_rate": 9.343001163922247e-06, "loss": 0.5154, "step": 10650 }, { "epoch": 0.2487747957992999, "grad_norm": 1.7065875656353933, "learning_rate": 9.340981440225224e-06, "loss": 0.5166, "step": 10660 }, { "epoch": 0.24900816802800466, "grad_norm": 1.7198807102684843, "learning_rate": 9.338958835808658e-06, "loss": 0.5059, "step": 10670 }, { "epoch": 0.24924154025670944, "grad_norm": 1.5820088296739694, "learning_rate": 9.336933352014767e-06, "loss": 0.5031, "step": 10680 }, { "epoch": 0.24947491248541423, "grad_norm": 1.6447134644553074, "learning_rate": 9.334904990187687e-06, "loss": 0.5237, "step": 10690 }, { "epoch": 0.24970828471411902, "grad_norm": 1.6831515909318797, "learning_rate": 9.332873751673457e-06, "loss": 0.5258, "step": 10700 }, { "epoch": 0.2499416569428238, "grad_norm": 1.472868509978348, "learning_rate": 9.330839637820028e-06, "loss": 0.5008, "step": 10710 }, { "epoch": 0.25017502917152856, "grad_norm": 1.646949483739457, "learning_rate": 9.32880264997726e-06, "loss": 0.4959, "step": 10720 }, { "epoch": 0.25040840140023335, "grad_norm": 2.2875383046864823, "learning_rate": 9.326762789496918e-06, "loss": 0.4907, "step": 10730 }, { "epoch": 0.25064177362893814, "grad_norm": 1.6676969473823016, "learning_rate": 9.324720057732672e-06, "loss": 0.5188, "step": 10740 }, { "epoch": 0.2508751458576429, "grad_norm": 1.6651446957051963, "learning_rate": 9.322674456040103e-06, "loss": 0.4909, "step": 10750 }, { "epoch": 0.2511085180863477, "grad_norm": 1.8191311621037958, "learning_rate": 9.320625985776692e-06, "loss": 0.493, "step": 10760 }, { "epoch": 0.2513418903150525, "grad_norm": 1.4038057638530368, "learning_rate": 9.318574648301823e-06, "loss": 0.497, "step": 10770 }, { "epoch": 0.2515752625437573, "grad_norm": 1.733453371975136, "learning_rate": 9.316520444976788e-06, "loss": 0.5361, "step": 10780 }, { "epoch": 0.25180863477246207, "grad_norm": 1.5590952935330744, "learning_rate": 9.314463377164773e-06, "loss": 0.5046, "step": 10790 }, { "epoch": 0.25204200700116686, "grad_norm": 1.5035366691278174, "learning_rate": 9.312403446230874e-06, "loss": 0.5165, "step": 10800 }, { "epoch": 0.25227537922987164, "grad_norm": 1.5236592713467603, "learning_rate": 9.310340653542078e-06, "loss": 0.4982, "step": 10810 }, { "epoch": 0.25250875145857643, "grad_norm": 1.5691760464553455, "learning_rate": 9.308275000467278e-06, "loss": 0.5238, "step": 10820 }, { "epoch": 0.2527421236872812, "grad_norm": 1.593518566867878, "learning_rate": 9.306206488377261e-06, "loss": 0.4896, "step": 10830 }, { "epoch": 0.252975495915986, "grad_norm": 1.4779341886097048, "learning_rate": 9.304135118644712e-06, "loss": 0.5223, "step": 10840 }, { "epoch": 0.2532088681446908, "grad_norm": 1.595101460174457, "learning_rate": 9.302060892644215e-06, "loss": 0.5183, "step": 10850 }, { "epoch": 0.2534422403733956, "grad_norm": 1.5421761314329765, "learning_rate": 9.299983811752247e-06, "loss": 0.4734, "step": 10860 }, { "epoch": 0.25367561260210036, "grad_norm": 1.6494460804810718, "learning_rate": 9.297903877347178e-06, "loss": 0.5191, "step": 10870 }, { "epoch": 0.25390898483080515, "grad_norm": 1.509549219566645, "learning_rate": 9.295821090809277e-06, "loss": 0.4887, "step": 10880 }, { "epoch": 0.25414235705950994, "grad_norm": 1.6042527674778424, "learning_rate": 9.293735453520701e-06, "loss": 0.5064, "step": 10890 }, { "epoch": 0.2543757292882147, "grad_norm": 1.7338891622893478, "learning_rate": 9.291646966865497e-06, "loss": 0.5178, "step": 10900 }, { "epoch": 0.2546091015169195, "grad_norm": 1.6065943998105883, "learning_rate": 9.289555632229612e-06, "loss": 0.5427, "step": 10910 }, { "epoch": 0.2548424737456243, "grad_norm": 1.489109155955742, "learning_rate": 9.287461451000872e-06, "loss": 0.5186, "step": 10920 }, { "epoch": 0.25507584597432903, "grad_norm": 1.7552028666129849, "learning_rate": 9.285364424569001e-06, "loss": 0.5106, "step": 10930 }, { "epoch": 0.2553092182030338, "grad_norm": 1.6499966466881946, "learning_rate": 9.283264554325604e-06, "loss": 0.5053, "step": 10940 }, { "epoch": 0.2555425904317386, "grad_norm": 1.3120608969725085, "learning_rate": 9.281161841664176e-06, "loss": 0.5137, "step": 10950 }, { "epoch": 0.2557759626604434, "grad_norm": 1.5162665172060734, "learning_rate": 9.279056287980101e-06, "loss": 0.5051, "step": 10960 }, { "epoch": 0.2560093348891482, "grad_norm": 1.6141120962657476, "learning_rate": 9.276947894670645e-06, "loss": 0.5272, "step": 10970 }, { "epoch": 0.25624270711785296, "grad_norm": 1.6789038153912708, "learning_rate": 9.27483666313496e-06, "loss": 0.5199, "step": 10980 }, { "epoch": 0.25647607934655775, "grad_norm": 1.4384357749792887, "learning_rate": 9.27272259477408e-06, "loss": 0.5063, "step": 10990 }, { "epoch": 0.25670945157526254, "grad_norm": 1.6990316241700556, "learning_rate": 9.270605690990921e-06, "loss": 0.5071, "step": 11000 }, { "epoch": 0.2569428238039673, "grad_norm": 1.747683087233455, "learning_rate": 9.268485953190284e-06, "loss": 0.5259, "step": 11010 }, { "epoch": 0.2571761960326721, "grad_norm": 1.7278535100268628, "learning_rate": 9.266363382778846e-06, "loss": 0.5158, "step": 11020 }, { "epoch": 0.2574095682613769, "grad_norm": 1.6318871534912696, "learning_rate": 9.26423798116517e-06, "loss": 0.5206, "step": 11030 }, { "epoch": 0.2576429404900817, "grad_norm": 1.6145857371313972, "learning_rate": 9.262109749759692e-06, "loss": 0.5409, "step": 11040 }, { "epoch": 0.25787631271878647, "grad_norm": 1.6761211838305243, "learning_rate": 9.259978689974729e-06, "loss": 0.5156, "step": 11050 }, { "epoch": 0.25810968494749126, "grad_norm": 1.4851850722484377, "learning_rate": 9.257844803224471e-06, "loss": 0.4737, "step": 11060 }, { "epoch": 0.25834305717619604, "grad_norm": 1.716381504934474, "learning_rate": 9.255708090924993e-06, "loss": 0.5164, "step": 11070 }, { "epoch": 0.25857642940490083, "grad_norm": 1.6525507423820753, "learning_rate": 9.253568554494232e-06, "loss": 0.4872, "step": 11080 }, { "epoch": 0.2588098016336056, "grad_norm": 1.3332574236834638, "learning_rate": 9.251426195352012e-06, "loss": 0.5191, "step": 11090 }, { "epoch": 0.2590431738623104, "grad_norm": 1.488646235199159, "learning_rate": 9.249281014920019e-06, "loss": 0.5168, "step": 11100 }, { "epoch": 0.2592765460910152, "grad_norm": 1.6140800065143972, "learning_rate": 9.247133014621823e-06, "loss": 0.5099, "step": 11110 }, { "epoch": 0.25950991831972, "grad_norm": 1.6233481273947705, "learning_rate": 9.244982195882855e-06, "loss": 0.5198, "step": 11120 }, { "epoch": 0.2597432905484247, "grad_norm": 1.6985692822672376, "learning_rate": 9.242828560130419e-06, "loss": 0.5307, "step": 11130 }, { "epoch": 0.2599766627771295, "grad_norm": 1.7543452944418032, "learning_rate": 9.24067210879369e-06, "loss": 0.4905, "step": 11140 }, { "epoch": 0.2602100350058343, "grad_norm": 1.6260474267369256, "learning_rate": 9.238512843303718e-06, "loss": 0.4984, "step": 11150 }, { "epoch": 0.26044340723453907, "grad_norm": 1.660445906882396, "learning_rate": 9.236350765093404e-06, "loss": 0.4882, "step": 11160 }, { "epoch": 0.26067677946324386, "grad_norm": 1.7175725486018256, "learning_rate": 9.23418587559753e-06, "loss": 0.4979, "step": 11170 }, { "epoch": 0.26091015169194864, "grad_norm": 1.8540323422086864, "learning_rate": 9.232018176252738e-06, "loss": 0.5017, "step": 11180 }, { "epoch": 0.26114352392065343, "grad_norm": 1.7097079587332817, "learning_rate": 9.229847668497536e-06, "loss": 0.4969, "step": 11190 }, { "epoch": 0.2613768961493582, "grad_norm": 1.518506799517891, "learning_rate": 9.227674353772293e-06, "loss": 0.4951, "step": 11200 }, { "epoch": 0.261610268378063, "grad_norm": 1.4508796090543412, "learning_rate": 9.225498233519246e-06, "loss": 0.4853, "step": 11210 }, { "epoch": 0.2618436406067678, "grad_norm": 1.4358163825457582, "learning_rate": 9.223319309182488e-06, "loss": 0.4946, "step": 11220 }, { "epoch": 0.2620770128354726, "grad_norm": 1.6769348825369625, "learning_rate": 9.221137582207974e-06, "loss": 0.4861, "step": 11230 }, { "epoch": 0.26231038506417736, "grad_norm": 1.5971859072532333, "learning_rate": 9.218953054043527e-06, "loss": 0.5138, "step": 11240 }, { "epoch": 0.26254375729288215, "grad_norm": 1.6001350845470512, "learning_rate": 9.216765726138815e-06, "loss": 0.5064, "step": 11250 }, { "epoch": 0.26277712952158694, "grad_norm": 1.538168629338886, "learning_rate": 9.214575599945374e-06, "loss": 0.5261, "step": 11260 }, { "epoch": 0.2630105017502917, "grad_norm": 1.6461979975868513, "learning_rate": 9.212382676916594e-06, "loss": 0.5164, "step": 11270 }, { "epoch": 0.2632438739789965, "grad_norm": 1.7923716489906878, "learning_rate": 9.210186958507722e-06, "loss": 0.5018, "step": 11280 }, { "epoch": 0.2634772462077013, "grad_norm": 1.4039297839126594, "learning_rate": 9.207988446175858e-06, "loss": 0.5452, "step": 11290 }, { "epoch": 0.2637106184364061, "grad_norm": 1.6035434609609691, "learning_rate": 9.205787141379955e-06, "loss": 0.5305, "step": 11300 }, { "epoch": 0.2639439906651109, "grad_norm": 1.5979832816160913, "learning_rate": 9.203583045580825e-06, "loss": 0.5118, "step": 11310 }, { "epoch": 0.26417736289381566, "grad_norm": 1.7220324074383586, "learning_rate": 9.201376160241126e-06, "loss": 0.4836, "step": 11320 }, { "epoch": 0.26441073512252045, "grad_norm": 1.7954462200514123, "learning_rate": 9.199166486825373e-06, "loss": 0.5157, "step": 11330 }, { "epoch": 0.2646441073512252, "grad_norm": 1.7845966172865153, "learning_rate": 9.196954026799922e-06, "loss": 0.5102, "step": 11340 }, { "epoch": 0.26487747957992996, "grad_norm": 1.6076882401343855, "learning_rate": 9.19473878163299e-06, "loss": 0.5094, "step": 11350 }, { "epoch": 0.26511085180863475, "grad_norm": 1.512918662060273, "learning_rate": 9.192520752794633e-06, "loss": 0.4971, "step": 11360 }, { "epoch": 0.26534422403733954, "grad_norm": 1.6595100660125301, "learning_rate": 9.190299941756758e-06, "loss": 0.5219, "step": 11370 }, { "epoch": 0.2655775962660443, "grad_norm": 1.5503356018664705, "learning_rate": 9.18807634999312e-06, "loss": 0.5059, "step": 11380 }, { "epoch": 0.2658109684947491, "grad_norm": 1.4993206481785244, "learning_rate": 9.185849978979313e-06, "loss": 0.5065, "step": 11390 }, { "epoch": 0.2660443407234539, "grad_norm": 1.7850909575017138, "learning_rate": 9.183620830192783e-06, "loss": 0.5002, "step": 11400 }, { "epoch": 0.2662777129521587, "grad_norm": 1.5292346066070124, "learning_rate": 9.181388905112814e-06, "loss": 0.4928, "step": 11410 }, { "epoch": 0.26651108518086347, "grad_norm": 1.509118747982862, "learning_rate": 9.179154205220536e-06, "loss": 0.4857, "step": 11420 }, { "epoch": 0.26674445740956826, "grad_norm": 1.540531453469912, "learning_rate": 9.176916731998914e-06, "loss": 0.4867, "step": 11430 }, { "epoch": 0.26697782963827305, "grad_norm": 1.682658125631963, "learning_rate": 9.174676486932765e-06, "loss": 0.5097, "step": 11440 }, { "epoch": 0.26721120186697783, "grad_norm": 1.5695765652528242, "learning_rate": 9.172433471508734e-06, "loss": 0.5005, "step": 11450 }, { "epoch": 0.2674445740956826, "grad_norm": 1.8229060054177333, "learning_rate": 9.170187687215311e-06, "loss": 0.5222, "step": 11460 }, { "epoch": 0.2676779463243874, "grad_norm": 1.5718192865960308, "learning_rate": 9.167939135542821e-06, "loss": 0.4998, "step": 11470 }, { "epoch": 0.2679113185530922, "grad_norm": 1.514634472916728, "learning_rate": 9.165687817983423e-06, "loss": 0.5104, "step": 11480 }, { "epoch": 0.268144690781797, "grad_norm": 1.626169947010278, "learning_rate": 9.163433736031117e-06, "loss": 0.5005, "step": 11490 }, { "epoch": 0.26837806301050177, "grad_norm": 1.301511356962538, "learning_rate": 9.161176891181733e-06, "loss": 0.5118, "step": 11500 }, { "epoch": 0.26861143523920655, "grad_norm": 1.5378711294280045, "learning_rate": 9.15891728493294e-06, "loss": 0.4916, "step": 11510 }, { "epoch": 0.26884480746791134, "grad_norm": 1.288653142349809, "learning_rate": 9.156654918784231e-06, "loss": 0.5046, "step": 11520 }, { "epoch": 0.2690781796966161, "grad_norm": 1.602406302243873, "learning_rate": 9.154389794236938e-06, "loss": 0.4928, "step": 11530 }, { "epoch": 0.2693115519253209, "grad_norm": 1.5910963191430625, "learning_rate": 9.152121912794221e-06, "loss": 0.5085, "step": 11540 }, { "epoch": 0.26954492415402564, "grad_norm": 1.6550470998354847, "learning_rate": 9.149851275961069e-06, "loss": 0.4947, "step": 11550 }, { "epoch": 0.26977829638273043, "grad_norm": 1.6220292271114654, "learning_rate": 9.147577885244297e-06, "loss": 0.4942, "step": 11560 }, { "epoch": 0.2700116686114352, "grad_norm": 1.4279323586159778, "learning_rate": 9.145301742152553e-06, "loss": 0.5172, "step": 11570 }, { "epoch": 0.27024504084014, "grad_norm": 1.5037870511618232, "learning_rate": 9.143022848196309e-06, "loss": 0.5181, "step": 11580 }, { "epoch": 0.2704784130688448, "grad_norm": 1.351280200677102, "learning_rate": 9.14074120488786e-06, "loss": 0.5183, "step": 11590 }, { "epoch": 0.2707117852975496, "grad_norm": 1.557753672398412, "learning_rate": 9.13845681374133e-06, "loss": 0.5141, "step": 11600 }, { "epoch": 0.27094515752625437, "grad_norm": 1.478855356973082, "learning_rate": 9.136169676272665e-06, "loss": 0.5372, "step": 11610 }, { "epoch": 0.27117852975495915, "grad_norm": 1.7427995760016821, "learning_rate": 9.13387979399963e-06, "loss": 0.4899, "step": 11620 }, { "epoch": 0.27141190198366394, "grad_norm": 1.543502482210675, "learning_rate": 9.131587168441814e-06, "loss": 0.5007, "step": 11630 }, { "epoch": 0.2716452742123687, "grad_norm": 1.5644060364829862, "learning_rate": 9.12929180112063e-06, "loss": 0.5116, "step": 11640 }, { "epoch": 0.2718786464410735, "grad_norm": 1.6168188597242392, "learning_rate": 9.126993693559305e-06, "loss": 0.5279, "step": 11650 }, { "epoch": 0.2721120186697783, "grad_norm": 1.4685405676410133, "learning_rate": 9.124692847282887e-06, "loss": 0.5, "step": 11660 }, { "epoch": 0.2723453908984831, "grad_norm": 1.257540505557901, "learning_rate": 9.122389263818241e-06, "loss": 0.4621, "step": 11670 }, { "epoch": 0.2725787631271879, "grad_norm": 1.577716751817285, "learning_rate": 9.120082944694047e-06, "loss": 0.521, "step": 11680 }, { "epoch": 0.27281213535589266, "grad_norm": 1.6349964190346202, "learning_rate": 9.117773891440805e-06, "loss": 0.526, "step": 11690 }, { "epoch": 0.27304550758459745, "grad_norm": 1.7205727578321293, "learning_rate": 9.115462105590823e-06, "loss": 0.5267, "step": 11700 }, { "epoch": 0.27327887981330223, "grad_norm": 1.5009274133541821, "learning_rate": 9.113147588678228e-06, "loss": 0.4889, "step": 11710 }, { "epoch": 0.273512252042007, "grad_norm": 1.835095002923193, "learning_rate": 9.110830342238956e-06, "loss": 0.5222, "step": 11720 }, { "epoch": 0.2737456242707118, "grad_norm": 1.678493675603124, "learning_rate": 9.108510367810757e-06, "loss": 0.5155, "step": 11730 }, { "epoch": 0.2739789964994166, "grad_norm": 1.8389736431774064, "learning_rate": 9.106187666933187e-06, "loss": 0.4945, "step": 11740 }, { "epoch": 0.2742123687281214, "grad_norm": 1.428710464609132, "learning_rate": 9.103862241147616e-06, "loss": 0.5088, "step": 11750 }, { "epoch": 0.2744457409568261, "grad_norm": 1.6334437778166067, "learning_rate": 9.10153409199722e-06, "loss": 0.5184, "step": 11760 }, { "epoch": 0.2746791131855309, "grad_norm": 1.8097620676672073, "learning_rate": 9.09920322102698e-06, "loss": 0.5128, "step": 11770 }, { "epoch": 0.2749124854142357, "grad_norm": 1.6535587750395808, "learning_rate": 9.096869629783688e-06, "loss": 0.5122, "step": 11780 }, { "epoch": 0.2751458576429405, "grad_norm": 1.5079154350908566, "learning_rate": 9.094533319815942e-06, "loss": 0.5218, "step": 11790 }, { "epoch": 0.27537922987164526, "grad_norm": 1.5824851652848837, "learning_rate": 9.092194292674135e-06, "loss": 0.5149, "step": 11800 }, { "epoch": 0.27561260210035005, "grad_norm": 2.1506309945053275, "learning_rate": 9.089852549910472e-06, "loss": 0.5002, "step": 11810 }, { "epoch": 0.27584597432905483, "grad_norm": 1.533778436276957, "learning_rate": 9.087508093078959e-06, "loss": 0.5189, "step": 11820 }, { "epoch": 0.2760793465577596, "grad_norm": 1.5880087913099306, "learning_rate": 9.085160923735399e-06, "loss": 0.4901, "step": 11830 }, { "epoch": 0.2763127187864644, "grad_norm": 1.8725507527895566, "learning_rate": 9.082811043437399e-06, "loss": 0.5305, "step": 11840 }, { "epoch": 0.2765460910151692, "grad_norm": 1.4948383311434166, "learning_rate": 9.080458453744361e-06, "loss": 0.5027, "step": 11850 }, { "epoch": 0.276779463243874, "grad_norm": 1.5336538571488685, "learning_rate": 9.078103156217492e-06, "loss": 0.523, "step": 11860 }, { "epoch": 0.27701283547257877, "grad_norm": 1.666892922763854, "learning_rate": 9.075745152419787e-06, "loss": 0.489, "step": 11870 }, { "epoch": 0.27724620770128355, "grad_norm": 1.5610623566674622, "learning_rate": 9.073384443916046e-06, "loss": 0.5016, "step": 11880 }, { "epoch": 0.27747957992998834, "grad_norm": 1.6493126391940067, "learning_rate": 9.071021032272856e-06, "loss": 0.4808, "step": 11890 }, { "epoch": 0.2777129521586931, "grad_norm": 1.6751076086129664, "learning_rate": 9.0686549190586e-06, "loss": 0.4672, "step": 11900 }, { "epoch": 0.2779463243873979, "grad_norm": 1.407171558240374, "learning_rate": 9.066286105843457e-06, "loss": 0.4964, "step": 11910 }, { "epoch": 0.2781796966161027, "grad_norm": 2.3166842769028095, "learning_rate": 9.063914594199394e-06, "loss": 0.4798, "step": 11920 }, { "epoch": 0.2784130688448075, "grad_norm": 1.5977734869192064, "learning_rate": 9.061540385700173e-06, "loss": 0.5033, "step": 11930 }, { "epoch": 0.2786464410735123, "grad_norm": 1.479281609232687, "learning_rate": 9.05916348192134e-06, "loss": 0.5207, "step": 11940 }, { "epoch": 0.27887981330221706, "grad_norm": 1.5538504925750813, "learning_rate": 9.056783884440236e-06, "loss": 0.5066, "step": 11950 }, { "epoch": 0.27911318553092185, "grad_norm": 1.3508680916201916, "learning_rate": 9.054401594835982e-06, "loss": 0.4707, "step": 11960 }, { "epoch": 0.2793465577596266, "grad_norm": 1.8236007450155518, "learning_rate": 9.052016614689494e-06, "loss": 0.4909, "step": 11970 }, { "epoch": 0.27957992998833137, "grad_norm": 1.6217613115372984, "learning_rate": 9.049628945583464e-06, "loss": 0.4978, "step": 11980 }, { "epoch": 0.27981330221703615, "grad_norm": 1.5519691889143354, "learning_rate": 9.047238589102378e-06, "loss": 0.5133, "step": 11990 }, { "epoch": 0.28004667444574094, "grad_norm": 1.624634731382664, "learning_rate": 9.044845546832499e-06, "loss": 0.4925, "step": 12000 }, { "epoch": 0.2802800466744457, "grad_norm": 1.6180251678489161, "learning_rate": 9.042449820361876e-06, "loss": 0.4784, "step": 12010 }, { "epoch": 0.2805134189031505, "grad_norm": 1.4127916153521363, "learning_rate": 9.040051411280335e-06, "loss": 0.5263, "step": 12020 }, { "epoch": 0.2807467911318553, "grad_norm": 1.4681424087682442, "learning_rate": 9.037650321179486e-06, "loss": 0.4954, "step": 12030 }, { "epoch": 0.2809801633605601, "grad_norm": 1.6395651206561954, "learning_rate": 9.035246551652716e-06, "loss": 0.5, "step": 12040 }, { "epoch": 0.2812135355892649, "grad_norm": 1.579237146164663, "learning_rate": 9.032840104295193e-06, "loss": 0.518, "step": 12050 }, { "epoch": 0.28144690781796966, "grad_norm": 1.6401076397950853, "learning_rate": 9.030430980703857e-06, "loss": 0.4801, "step": 12060 }, { "epoch": 0.28168028004667445, "grad_norm": 1.6335599399647616, "learning_rate": 9.02801918247743e-06, "loss": 0.4862, "step": 12070 }, { "epoch": 0.28191365227537923, "grad_norm": 1.6031558540315474, "learning_rate": 9.025604711216405e-06, "loss": 0.4988, "step": 12080 }, { "epoch": 0.282147024504084, "grad_norm": 1.8333370275328327, "learning_rate": 9.023187568523049e-06, "loss": 0.502, "step": 12090 }, { "epoch": 0.2823803967327888, "grad_norm": 1.5353300931252745, "learning_rate": 9.020767756001401e-06, "loss": 0.5218, "step": 12100 }, { "epoch": 0.2826137689614936, "grad_norm": 1.5961991597611227, "learning_rate": 9.018345275257278e-06, "loss": 0.4961, "step": 12110 }, { "epoch": 0.2828471411901984, "grad_norm": 1.561264187525853, "learning_rate": 9.015920127898257e-06, "loss": 0.5196, "step": 12120 }, { "epoch": 0.28308051341890317, "grad_norm": 1.595593763024833, "learning_rate": 9.013492315533695e-06, "loss": 0.5016, "step": 12130 }, { "epoch": 0.28331388564760795, "grad_norm": 1.7608677885731254, "learning_rate": 9.011061839774713e-06, "loss": 0.5166, "step": 12140 }, { "epoch": 0.28354725787631274, "grad_norm": 1.7134642911361706, "learning_rate": 9.008628702234197e-06, "loss": 0.5267, "step": 12150 }, { "epoch": 0.28378063010501753, "grad_norm": 1.453474180545501, "learning_rate": 9.006192904526803e-06, "loss": 0.4977, "step": 12160 }, { "epoch": 0.28401400233372226, "grad_norm": 1.5053080885872683, "learning_rate": 9.003754448268949e-06, "loss": 0.4948, "step": 12170 }, { "epoch": 0.28424737456242705, "grad_norm": 1.368924633962007, "learning_rate": 9.001313335078824e-06, "loss": 0.4899, "step": 12180 }, { "epoch": 0.28448074679113183, "grad_norm": 1.4910800399442632, "learning_rate": 8.998869566576374e-06, "loss": 0.5112, "step": 12190 }, { "epoch": 0.2847141190198366, "grad_norm": 1.513080919309942, "learning_rate": 8.996423144383305e-06, "loss": 0.5083, "step": 12200 }, { "epoch": 0.2849474912485414, "grad_norm": 1.6662225513419846, "learning_rate": 8.993974070123094e-06, "loss": 0.5274, "step": 12210 }, { "epoch": 0.2851808634772462, "grad_norm": 1.6378653553271536, "learning_rate": 8.991522345420966e-06, "loss": 0.5084, "step": 12220 }, { "epoch": 0.285414235705951, "grad_norm": 1.4371476675111923, "learning_rate": 8.989067971903913e-06, "loss": 0.4954, "step": 12230 }, { "epoch": 0.28564760793465577, "grad_norm": 1.7183091843255243, "learning_rate": 8.986610951200683e-06, "loss": 0.5025, "step": 12240 }, { "epoch": 0.28588098016336055, "grad_norm": 1.7605530437918917, "learning_rate": 8.984151284941777e-06, "loss": 0.4986, "step": 12250 }, { "epoch": 0.28611435239206534, "grad_norm": 1.4894416333081, "learning_rate": 8.981688974759459e-06, "loss": 0.4875, "step": 12260 }, { "epoch": 0.2863477246207701, "grad_norm": 2.0370708744649626, "learning_rate": 8.979224022287738e-06, "loss": 0.5038, "step": 12270 }, { "epoch": 0.2865810968494749, "grad_norm": 1.4545942559759582, "learning_rate": 8.976756429162388e-06, "loss": 0.4654, "step": 12280 }, { "epoch": 0.2868144690781797, "grad_norm": 1.6835730400654016, "learning_rate": 8.974286197020922e-06, "loss": 0.5256, "step": 12290 }, { "epoch": 0.2870478413068845, "grad_norm": 1.503098567058669, "learning_rate": 8.971813327502616e-06, "loss": 0.4876, "step": 12300 }, { "epoch": 0.2872812135355893, "grad_norm": 1.3931256559582423, "learning_rate": 8.96933782224849e-06, "loss": 0.4814, "step": 12310 }, { "epoch": 0.28751458576429406, "grad_norm": 1.7722985140343877, "learning_rate": 8.966859682901315e-06, "loss": 0.4922, "step": 12320 }, { "epoch": 0.28774795799299885, "grad_norm": 1.3489825183611002, "learning_rate": 8.96437891110561e-06, "loss": 0.5059, "step": 12330 }, { "epoch": 0.28798133022170364, "grad_norm": 1.75384153418522, "learning_rate": 8.961895508507637e-06, "loss": 0.4908, "step": 12340 }, { "epoch": 0.2882147024504084, "grad_norm": 1.358054730600358, "learning_rate": 8.959409476755412e-06, "loss": 0.5098, "step": 12350 }, { "epoch": 0.2884480746791132, "grad_norm": 1.4665916248928355, "learning_rate": 8.956920817498686e-06, "loss": 0.5061, "step": 12360 }, { "epoch": 0.288681446907818, "grad_norm": 1.5717118261883831, "learning_rate": 8.954429532388964e-06, "loss": 0.5065, "step": 12370 }, { "epoch": 0.2889148191365227, "grad_norm": 1.711666456142017, "learning_rate": 8.951935623079485e-06, "loss": 0.5396, "step": 12380 }, { "epoch": 0.2891481913652275, "grad_norm": 1.7115284845859189, "learning_rate": 8.949439091225234e-06, "loss": 0.4856, "step": 12390 }, { "epoch": 0.2893815635939323, "grad_norm": 1.6607754075493049, "learning_rate": 8.946939938482933e-06, "loss": 0.504, "step": 12400 }, { "epoch": 0.2896149358226371, "grad_norm": 3.2994478598952504, "learning_rate": 8.944438166511047e-06, "loss": 0.4859, "step": 12410 }, { "epoch": 0.2898483080513419, "grad_norm": 1.8219980040920856, "learning_rate": 8.941933776969777e-06, "loss": 0.5137, "step": 12420 }, { "epoch": 0.29008168028004666, "grad_norm": 1.6174798289044692, "learning_rate": 8.939426771521062e-06, "loss": 0.4939, "step": 12430 }, { "epoch": 0.29031505250875145, "grad_norm": 1.477343517476252, "learning_rate": 8.936917151828574e-06, "loss": 0.5158, "step": 12440 }, { "epoch": 0.29054842473745623, "grad_norm": 1.7066094608283482, "learning_rate": 8.934404919557726e-06, "loss": 0.5084, "step": 12450 }, { "epoch": 0.290781796966161, "grad_norm": 1.4262208687830717, "learning_rate": 8.931890076375657e-06, "loss": 0.5215, "step": 12460 }, { "epoch": 0.2910151691948658, "grad_norm": 2.118009485903479, "learning_rate": 8.929372623951245e-06, "loss": 0.5411, "step": 12470 }, { "epoch": 0.2912485414235706, "grad_norm": 1.3548912774943989, "learning_rate": 8.926852563955098e-06, "loss": 0.5023, "step": 12480 }, { "epoch": 0.2914819136522754, "grad_norm": 1.5901627867951649, "learning_rate": 8.924329898059552e-06, "loss": 0.4982, "step": 12490 }, { "epoch": 0.29171528588098017, "grad_norm": 1.6298278627819063, "learning_rate": 8.921804627938672e-06, "loss": 0.4861, "step": 12500 }, { "epoch": 0.29194865810968496, "grad_norm": 1.4706275201554386, "learning_rate": 8.919276755268256e-06, "loss": 0.4898, "step": 12510 }, { "epoch": 0.29218203033838974, "grad_norm": 1.559683980479244, "learning_rate": 8.916746281725826e-06, "loss": 0.4836, "step": 12520 }, { "epoch": 0.29241540256709453, "grad_norm": 1.4079638664659795, "learning_rate": 8.914213208990627e-06, "loss": 0.514, "step": 12530 }, { "epoch": 0.2926487747957993, "grad_norm": 1.529321299143766, "learning_rate": 8.911677538743634e-06, "loss": 0.5027, "step": 12540 }, { "epoch": 0.2928821470245041, "grad_norm": 1.6745571830456492, "learning_rate": 8.909139272667547e-06, "loss": 0.5052, "step": 12550 }, { "epoch": 0.2931155192532089, "grad_norm": 1.7231773470288128, "learning_rate": 8.906598412446778e-06, "loss": 0.495, "step": 12560 }, { "epoch": 0.2933488914819137, "grad_norm": 1.5722520022957691, "learning_rate": 8.904054959767473e-06, "loss": 0.5203, "step": 12570 }, { "epoch": 0.29358226371061846, "grad_norm": 1.719101974535538, "learning_rate": 8.901508916317492e-06, "loss": 0.5137, "step": 12580 }, { "epoch": 0.2938156359393232, "grad_norm": 1.993719559247674, "learning_rate": 8.898960283786415e-06, "loss": 0.525, "step": 12590 }, { "epoch": 0.294049008168028, "grad_norm": 1.5018534919698432, "learning_rate": 8.89640906386554e-06, "loss": 0.483, "step": 12600 }, { "epoch": 0.29428238039673277, "grad_norm": 1.7319138640645007, "learning_rate": 8.893855258247884e-06, "loss": 0.5007, "step": 12610 }, { "epoch": 0.29451575262543755, "grad_norm": 1.9186136277127759, "learning_rate": 8.891298868628176e-06, "loss": 0.5043, "step": 12620 }, { "epoch": 0.29474912485414234, "grad_norm": 1.3943410559538558, "learning_rate": 8.888739896702865e-06, "loss": 0.5114, "step": 12630 }, { "epoch": 0.29498249708284713, "grad_norm": 1.5651076563627686, "learning_rate": 8.88617834417011e-06, "loss": 0.5128, "step": 12640 }, { "epoch": 0.2952158693115519, "grad_norm": 1.576878972566554, "learning_rate": 8.883614212729783e-06, "loss": 0.5039, "step": 12650 }, { "epoch": 0.2954492415402567, "grad_norm": 1.5974578497965408, "learning_rate": 8.881047504083466e-06, "loss": 0.5067, "step": 12660 }, { "epoch": 0.2956826137689615, "grad_norm": 1.5446398252047344, "learning_rate": 8.878478219934457e-06, "loss": 0.5079, "step": 12670 }, { "epoch": 0.2959159859976663, "grad_norm": 1.6821524175742208, "learning_rate": 8.875906361987755e-06, "loss": 0.4975, "step": 12680 }, { "epoch": 0.29614935822637106, "grad_norm": 1.4972407413619462, "learning_rate": 8.873331931950075e-06, "loss": 0.4992, "step": 12690 }, { "epoch": 0.29638273045507585, "grad_norm": 1.6663292327250006, "learning_rate": 8.870754931529833e-06, "loss": 0.4984, "step": 12700 }, { "epoch": 0.29661610268378064, "grad_norm": 1.4855588240767854, "learning_rate": 8.868175362437152e-06, "loss": 0.5086, "step": 12710 }, { "epoch": 0.2968494749124854, "grad_norm": 1.6563565339855506, "learning_rate": 8.865593226383861e-06, "loss": 0.5158, "step": 12720 }, { "epoch": 0.2970828471411902, "grad_norm": 1.9636202945630181, "learning_rate": 8.863008525083495e-06, "loss": 0.4837, "step": 12730 }, { "epoch": 0.297316219369895, "grad_norm": 4.227756803081928, "learning_rate": 8.860421260251281e-06, "loss": 0.5125, "step": 12740 }, { "epoch": 0.2975495915985998, "grad_norm": 1.5937556788900182, "learning_rate": 8.857831433604163e-06, "loss": 0.5188, "step": 12750 }, { "epoch": 0.29778296382730457, "grad_norm": 1.580944529713419, "learning_rate": 8.85523904686077e-06, "loss": 0.508, "step": 12760 }, { "epoch": 0.29801633605600936, "grad_norm": 1.684902411360959, "learning_rate": 8.852644101741436e-06, "loss": 0.4974, "step": 12770 }, { "epoch": 0.29824970828471414, "grad_norm": 1.5034242986011794, "learning_rate": 8.850046599968198e-06, "loss": 0.5011, "step": 12780 }, { "epoch": 0.29848308051341893, "grad_norm": 1.896760089818133, "learning_rate": 8.847446543264781e-06, "loss": 0.4975, "step": 12790 }, { "epoch": 0.29871645274212366, "grad_norm": 1.7228449872409013, "learning_rate": 8.844843933356609e-06, "loss": 0.5015, "step": 12800 }, { "epoch": 0.29894982497082845, "grad_norm": 1.6823464123247804, "learning_rate": 8.842238771970804e-06, "loss": 0.4954, "step": 12810 }, { "epoch": 0.29918319719953324, "grad_norm": 1.6422544364640914, "learning_rate": 8.839631060836172e-06, "loss": 0.4922, "step": 12820 }, { "epoch": 0.299416569428238, "grad_norm": 1.4686746249399503, "learning_rate": 8.837020801683223e-06, "loss": 0.5124, "step": 12830 }, { "epoch": 0.2996499416569428, "grad_norm": 1.7808232035564182, "learning_rate": 8.834407996244146e-06, "loss": 0.5509, "step": 12840 }, { "epoch": 0.2998833138856476, "grad_norm": 1.4327691111376537, "learning_rate": 8.831792646252829e-06, "loss": 0.4939, "step": 12850 }, { "epoch": 0.3001166861143524, "grad_norm": 1.5717160515495672, "learning_rate": 8.829174753444843e-06, "loss": 0.5004, "step": 12860 }, { "epoch": 0.30035005834305717, "grad_norm": 2.720583484693275, "learning_rate": 8.82655431955745e-06, "loss": 0.5094, "step": 12870 }, { "epoch": 0.30058343057176196, "grad_norm": 1.57584614758833, "learning_rate": 8.823931346329595e-06, "loss": 0.5167, "step": 12880 }, { "epoch": 0.30081680280046674, "grad_norm": 1.6225278909005891, "learning_rate": 8.821305835501912e-06, "loss": 0.5204, "step": 12890 }, { "epoch": 0.30105017502917153, "grad_norm": 1.5561320730644808, "learning_rate": 8.818677788816713e-06, "loss": 0.5171, "step": 12900 }, { "epoch": 0.3012835472578763, "grad_norm": 1.3669010602739435, "learning_rate": 8.816047208017999e-06, "loss": 0.5023, "step": 12910 }, { "epoch": 0.3015169194865811, "grad_norm": 1.4317573979425784, "learning_rate": 8.813414094851452e-06, "loss": 0.4914, "step": 12920 }, { "epoch": 0.3017502917152859, "grad_norm": 1.3794877775654295, "learning_rate": 8.810778451064428e-06, "loss": 0.486, "step": 12930 }, { "epoch": 0.3019836639439907, "grad_norm": 1.4880097053384298, "learning_rate": 8.808140278405973e-06, "loss": 0.5135, "step": 12940 }, { "epoch": 0.30221703617269546, "grad_norm": 3.86947840392625, "learning_rate": 8.8054995786268e-06, "loss": 0.5362, "step": 12950 }, { "epoch": 0.30245040840140025, "grad_norm": 1.4701539421719203, "learning_rate": 8.802856353479308e-06, "loss": 0.5117, "step": 12960 }, { "epoch": 0.30268378063010504, "grad_norm": 1.6405915322624773, "learning_rate": 8.800210604717565e-06, "loss": 0.5055, "step": 12970 }, { "epoch": 0.3029171528588098, "grad_norm": 1.5338417823519328, "learning_rate": 8.79756233409732e-06, "loss": 0.4675, "step": 12980 }, { "epoch": 0.3031505250875146, "grad_norm": 1.631348825109797, "learning_rate": 8.794911543375989e-06, "loss": 0.5098, "step": 12990 }, { "epoch": 0.3033838973162194, "grad_norm": 1.533413727708728, "learning_rate": 8.792258234312667e-06, "loss": 0.47, "step": 13000 }, { "epoch": 0.30361726954492413, "grad_norm": 1.639896727084452, "learning_rate": 8.789602408668115e-06, "loss": 0.5031, "step": 13010 }, { "epoch": 0.3038506417736289, "grad_norm": 1.637406892760708, "learning_rate": 8.786944068204766e-06, "loss": 0.5116, "step": 13020 }, { "epoch": 0.3040840140023337, "grad_norm": 1.4665559764076002, "learning_rate": 8.784283214686725e-06, "loss": 0.5263, "step": 13030 }, { "epoch": 0.3043173862310385, "grad_norm": 1.8398386169558987, "learning_rate": 8.781619849879755e-06, "loss": 0.4761, "step": 13040 }, { "epoch": 0.3045507584597433, "grad_norm": 1.5431098485547265, "learning_rate": 8.778953975551298e-06, "loss": 0.5178, "step": 13050 }, { "epoch": 0.30478413068844806, "grad_norm": 1.57867000093747, "learning_rate": 8.776285593470455e-06, "loss": 0.4713, "step": 13060 }, { "epoch": 0.30501750291715285, "grad_norm": 1.4766938610651092, "learning_rate": 8.773614705407987e-06, "loss": 0.5096, "step": 13070 }, { "epoch": 0.30525087514585764, "grad_norm": 1.4858071434485411, "learning_rate": 8.770941313136327e-06, "loss": 0.4939, "step": 13080 }, { "epoch": 0.3054842473745624, "grad_norm": 1.563221984201051, "learning_rate": 8.768265418429563e-06, "loss": 0.5008, "step": 13090 }, { "epoch": 0.3057176196032672, "grad_norm": 1.5142851926263683, "learning_rate": 8.765587023063446e-06, "loss": 0.503, "step": 13100 }, { "epoch": 0.305950991831972, "grad_norm": 1.6709124497316112, "learning_rate": 8.76290612881539e-06, "loss": 0.5079, "step": 13110 }, { "epoch": 0.3061843640606768, "grad_norm": 1.7441359160352499, "learning_rate": 8.760222737464458e-06, "loss": 0.518, "step": 13120 }, { "epoch": 0.30641773628938157, "grad_norm": 1.66301012662659, "learning_rate": 8.75753685079138e-06, "loss": 0.4916, "step": 13130 }, { "epoch": 0.30665110851808636, "grad_norm": 1.8296848350169244, "learning_rate": 8.754848470578538e-06, "loss": 0.5044, "step": 13140 }, { "epoch": 0.30688448074679114, "grad_norm": 1.471971932176099, "learning_rate": 8.752157598609966e-06, "loss": 0.5003, "step": 13150 }, { "epoch": 0.30711785297549593, "grad_norm": 1.5641075132162734, "learning_rate": 8.749464236671354e-06, "loss": 0.5178, "step": 13160 }, { "epoch": 0.3073512252042007, "grad_norm": 1.246088287572766, "learning_rate": 8.746768386550048e-06, "loss": 0.5138, "step": 13170 }, { "epoch": 0.3075845974329055, "grad_norm": 1.6293622503849794, "learning_rate": 8.744070050035039e-06, "loss": 0.4846, "step": 13180 }, { "epoch": 0.3078179696616103, "grad_norm": 1.3402436162833962, "learning_rate": 8.741369228916969e-06, "loss": 0.4837, "step": 13190 }, { "epoch": 0.3080513418903151, "grad_norm": 1.5936206605464074, "learning_rate": 8.738665924988135e-06, "loss": 0.4951, "step": 13200 }, { "epoch": 0.3082847141190198, "grad_norm": 1.6053039256014938, "learning_rate": 8.735960140042472e-06, "loss": 0.5168, "step": 13210 }, { "epoch": 0.3085180863477246, "grad_norm": 1.5461433174030021, "learning_rate": 8.73325187587557e-06, "loss": 0.4996, "step": 13220 }, { "epoch": 0.3087514585764294, "grad_norm": 1.4807889387898294, "learning_rate": 8.73054113428466e-06, "loss": 0.5386, "step": 13230 }, { "epoch": 0.30898483080513417, "grad_norm": 1.5371814762802776, "learning_rate": 8.727827917068616e-06, "loss": 0.5406, "step": 13240 }, { "epoch": 0.30921820303383896, "grad_norm": 1.7038852172771066, "learning_rate": 8.72511222602796e-06, "loss": 0.4716, "step": 13250 }, { "epoch": 0.30945157526254374, "grad_norm": 1.4149016181262963, "learning_rate": 8.72239406296485e-06, "loss": 0.4973, "step": 13260 }, { "epoch": 0.30968494749124853, "grad_norm": 1.504798973654345, "learning_rate": 8.719673429683085e-06, "loss": 0.4905, "step": 13270 }, { "epoch": 0.3099183197199533, "grad_norm": 1.449563564310814, "learning_rate": 8.71695032798811e-06, "loss": 0.4772, "step": 13280 }, { "epoch": 0.3101516919486581, "grad_norm": 1.5488423865352918, "learning_rate": 8.714224759686997e-06, "loss": 0.4963, "step": 13290 }, { "epoch": 0.3103850641773629, "grad_norm": 1.5144250648912208, "learning_rate": 8.711496726588466e-06, "loss": 0.5128, "step": 13300 }, { "epoch": 0.3106184364060677, "grad_norm": 1.7000158554916844, "learning_rate": 8.708766230502865e-06, "loss": 0.4958, "step": 13310 }, { "epoch": 0.31085180863477246, "grad_norm": 1.6990922838595315, "learning_rate": 8.706033273242178e-06, "loss": 0.5216, "step": 13320 }, { "epoch": 0.31108518086347725, "grad_norm": 1.4818544216119045, "learning_rate": 8.703297856620027e-06, "loss": 0.4771, "step": 13330 }, { "epoch": 0.31131855309218204, "grad_norm": 1.853562993402067, "learning_rate": 8.70055998245166e-06, "loss": 0.4894, "step": 13340 }, { "epoch": 0.3115519253208868, "grad_norm": 1.6604003504671805, "learning_rate": 8.697819652553956e-06, "loss": 0.4961, "step": 13350 }, { "epoch": 0.3117852975495916, "grad_norm": 1.7013065197513695, "learning_rate": 8.69507686874543e-06, "loss": 0.4874, "step": 13360 }, { "epoch": 0.3120186697782964, "grad_norm": 1.8815843308184665, "learning_rate": 8.69233163284622e-06, "loss": 0.5133, "step": 13370 }, { "epoch": 0.3122520420070012, "grad_norm": 1.657064862147506, "learning_rate": 8.689583946678092e-06, "loss": 0.5075, "step": 13380 }, { "epoch": 0.31248541423570597, "grad_norm": 1.6317758243385105, "learning_rate": 8.686833812064439e-06, "loss": 0.5042, "step": 13390 }, { "epoch": 0.31271878646441076, "grad_norm": 1.8246531447251526, "learning_rate": 8.684081230830276e-06, "loss": 0.5002, "step": 13400 }, { "epoch": 0.31295215869311555, "grad_norm": 1.7271675691704276, "learning_rate": 8.681326204802247e-06, "loss": 0.5019, "step": 13410 }, { "epoch": 0.3131855309218203, "grad_norm": 1.5442496234566543, "learning_rate": 8.678568735808615e-06, "loss": 0.4938, "step": 13420 }, { "epoch": 0.31341890315052506, "grad_norm": 1.5588406118085172, "learning_rate": 8.675808825679265e-06, "loss": 0.5192, "step": 13430 }, { "epoch": 0.31365227537922985, "grad_norm": 1.5132087547829056, "learning_rate": 8.673046476245698e-06, "loss": 0.5047, "step": 13440 }, { "epoch": 0.31388564760793464, "grad_norm": 1.5408414606513243, "learning_rate": 8.67028168934104e-06, "loss": 0.4797, "step": 13450 }, { "epoch": 0.3141190198366394, "grad_norm": 1.443011423283554, "learning_rate": 8.667514466800033e-06, "loss": 0.4943, "step": 13460 }, { "epoch": 0.3143523920653442, "grad_norm": 1.8115691815558022, "learning_rate": 8.66474481045903e-06, "loss": 0.5272, "step": 13470 }, { "epoch": 0.314585764294049, "grad_norm": 1.4472500882631283, "learning_rate": 8.66197272215601e-06, "loss": 0.5503, "step": 13480 }, { "epoch": 0.3148191365227538, "grad_norm": 1.7231787062589254, "learning_rate": 8.659198203730553e-06, "loss": 0.494, "step": 13490 }, { "epoch": 0.31505250875145857, "grad_norm": 1.75137607497001, "learning_rate": 8.656421257023857e-06, "loss": 0.5201, "step": 13500 }, { "epoch": 0.31528588098016336, "grad_norm": 1.6625876058612645, "learning_rate": 8.653641883878737e-06, "loss": 0.5159, "step": 13510 }, { "epoch": 0.31551925320886814, "grad_norm": 1.5719993138529995, "learning_rate": 8.650860086139611e-06, "loss": 0.5069, "step": 13520 }, { "epoch": 0.31575262543757293, "grad_norm": 1.5035744374921876, "learning_rate": 8.648075865652508e-06, "loss": 0.5085, "step": 13530 }, { "epoch": 0.3159859976662777, "grad_norm": 1.105001460126243, "learning_rate": 8.645289224265064e-06, "loss": 0.4856, "step": 13540 }, { "epoch": 0.3162193698949825, "grad_norm": 1.5416349647366492, "learning_rate": 8.642500163826527e-06, "loss": 0.4941, "step": 13550 }, { "epoch": 0.3164527421236873, "grad_norm": 1.7722831740600191, "learning_rate": 8.63970868618774e-06, "loss": 0.5029, "step": 13560 }, { "epoch": 0.3166861143523921, "grad_norm": 1.7432561169297878, "learning_rate": 8.63691479320116e-06, "loss": 0.4959, "step": 13570 }, { "epoch": 0.31691948658109687, "grad_norm": 1.5849089272685994, "learning_rate": 8.634118486720842e-06, "loss": 0.5308, "step": 13580 }, { "epoch": 0.31715285880980165, "grad_norm": 1.3516354155196297, "learning_rate": 8.631319768602445e-06, "loss": 0.5059, "step": 13590 }, { "epoch": 0.31738623103850644, "grad_norm": 1.5138825669035632, "learning_rate": 8.628518640703222e-06, "loss": 0.5102, "step": 13600 }, { "epoch": 0.3176196032672112, "grad_norm": 1.6773428168116025, "learning_rate": 8.625715104882037e-06, "loss": 0.5007, "step": 13610 }, { "epoch": 0.317852975495916, "grad_norm": 1.807832993596907, "learning_rate": 8.622909162999339e-06, "loss": 0.4966, "step": 13620 }, { "epoch": 0.31808634772462074, "grad_norm": 1.7926189042205032, "learning_rate": 8.620100816917183e-06, "loss": 0.5026, "step": 13630 }, { "epoch": 0.31831971995332553, "grad_norm": 1.6579591468957342, "learning_rate": 8.617290068499213e-06, "loss": 0.4899, "step": 13640 }, { "epoch": 0.3185530921820303, "grad_norm": 1.5859204026846982, "learning_rate": 8.614476919610675e-06, "loss": 0.4602, "step": 13650 }, { "epoch": 0.3187864644107351, "grad_norm": 1.8558240102655474, "learning_rate": 8.6116613721184e-06, "loss": 0.5103, "step": 13660 }, { "epoch": 0.3190198366394399, "grad_norm": 1.6998401881122913, "learning_rate": 8.608843427890813e-06, "loss": 0.5246, "step": 13670 }, { "epoch": 0.3192532088681447, "grad_norm": 1.5171012615484307, "learning_rate": 8.606023088797935e-06, "loss": 0.4729, "step": 13680 }, { "epoch": 0.31948658109684946, "grad_norm": 1.5827334887221172, "learning_rate": 8.603200356711365e-06, "loss": 0.4735, "step": 13690 }, { "epoch": 0.31971995332555425, "grad_norm": 1.3949353832461762, "learning_rate": 8.600375233504305e-06, "loss": 0.5039, "step": 13700 }, { "epoch": 0.31995332555425904, "grad_norm": 1.7049420030563485, "learning_rate": 8.597547721051528e-06, "loss": 0.4917, "step": 13710 }, { "epoch": 0.3201866977829638, "grad_norm": 1.4604213189678692, "learning_rate": 8.594717821229404e-06, "loss": 0.4985, "step": 13720 }, { "epoch": 0.3204200700116686, "grad_norm": 1.807318901669287, "learning_rate": 8.591885535915884e-06, "loss": 0.4771, "step": 13730 }, { "epoch": 0.3206534422403734, "grad_norm": 1.5750693180773787, "learning_rate": 8.5890508669905e-06, "loss": 0.4892, "step": 13740 }, { "epoch": 0.3208868144690782, "grad_norm": 1.6876229410509134, "learning_rate": 8.586213816334367e-06, "loss": 0.5112, "step": 13750 }, { "epoch": 0.32112018669778297, "grad_norm": 1.8105294047875147, "learning_rate": 8.58337438583018e-06, "loss": 0.4929, "step": 13760 }, { "epoch": 0.32135355892648776, "grad_norm": 1.5732781366661692, "learning_rate": 8.580532577362217e-06, "loss": 0.4904, "step": 13770 }, { "epoch": 0.32158693115519255, "grad_norm": 1.6647561490534133, "learning_rate": 8.577688392816328e-06, "loss": 0.5017, "step": 13780 }, { "epoch": 0.32182030338389733, "grad_norm": 1.5523700259522157, "learning_rate": 8.574841834079945e-06, "loss": 0.5006, "step": 13790 }, { "epoch": 0.3220536756126021, "grad_norm": 1.4911319359022723, "learning_rate": 8.571992903042071e-06, "loss": 0.5074, "step": 13800 }, { "epoch": 0.3222870478413069, "grad_norm": 1.3141191431133619, "learning_rate": 8.569141601593288e-06, "loss": 0.5123, "step": 13810 }, { "epoch": 0.3225204200700117, "grad_norm": 1.7489912648017223, "learning_rate": 8.566287931625747e-06, "loss": 0.5071, "step": 13820 }, { "epoch": 0.3227537922987165, "grad_norm": 1.607419853054715, "learning_rate": 8.563431895033176e-06, "loss": 0.5259, "step": 13830 }, { "epoch": 0.3229871645274212, "grad_norm": 1.6279382440423578, "learning_rate": 8.560573493710865e-06, "loss": 0.5157, "step": 13840 }, { "epoch": 0.323220536756126, "grad_norm": 1.5121895650018569, "learning_rate": 8.557712729555682e-06, "loss": 0.5238, "step": 13850 }, { "epoch": 0.3234539089848308, "grad_norm": 1.5680104345855583, "learning_rate": 8.554849604466056e-06, "loss": 0.5015, "step": 13860 }, { "epoch": 0.32368728121353557, "grad_norm": 1.4585421567567074, "learning_rate": 8.551984120341989e-06, "loss": 0.5001, "step": 13870 }, { "epoch": 0.32392065344224036, "grad_norm": 1.3740226054233924, "learning_rate": 8.549116279085045e-06, "loss": 0.4998, "step": 13880 }, { "epoch": 0.32415402567094515, "grad_norm": 1.5152222201517662, "learning_rate": 8.546246082598352e-06, "loss": 0.4966, "step": 13890 }, { "epoch": 0.32438739789964993, "grad_norm": 1.6689011596827759, "learning_rate": 8.5433735327866e-06, "loss": 0.4986, "step": 13900 }, { "epoch": 0.3246207701283547, "grad_norm": 1.5592154112874033, "learning_rate": 8.540498631556046e-06, "loss": 0.4949, "step": 13910 }, { "epoch": 0.3248541423570595, "grad_norm": 1.3681194327424115, "learning_rate": 8.537621380814502e-06, "loss": 0.5004, "step": 13920 }, { "epoch": 0.3250875145857643, "grad_norm": 1.546461632445181, "learning_rate": 8.534741782471339e-06, "loss": 0.4958, "step": 13930 }, { "epoch": 0.3253208868144691, "grad_norm": 1.3840056863550272, "learning_rate": 8.53185983843749e-06, "loss": 0.5241, "step": 13940 }, { "epoch": 0.32555425904317387, "grad_norm": 1.6933455080614157, "learning_rate": 8.528975550625444e-06, "loss": 0.4915, "step": 13950 }, { "epoch": 0.32578763127187865, "grad_norm": 3.536736168920722, "learning_rate": 8.52608892094924e-06, "loss": 0.5128, "step": 13960 }, { "epoch": 0.32602100350058344, "grad_norm": 1.584329044168368, "learning_rate": 8.523199951324477e-06, "loss": 0.5058, "step": 13970 }, { "epoch": 0.3262543757292882, "grad_norm": 1.608287093670301, "learning_rate": 8.520308643668304e-06, "loss": 0.4957, "step": 13980 }, { "epoch": 0.326487747957993, "grad_norm": 1.4493651923426951, "learning_rate": 8.517414999899423e-06, "loss": 0.4937, "step": 13990 }, { "epoch": 0.3267211201866978, "grad_norm": 1.720235628968434, "learning_rate": 8.514519021938085e-06, "loss": 0.5162, "step": 14000 }, { "epoch": 0.3269544924154026, "grad_norm": 1.8401767708897152, "learning_rate": 8.511620711706091e-06, "loss": 0.4997, "step": 14010 }, { "epoch": 0.3271878646441074, "grad_norm": 1.4279857705663215, "learning_rate": 8.50872007112679e-06, "loss": 0.4912, "step": 14020 }, { "epoch": 0.32742123687281216, "grad_norm": 1.7415728355967752, "learning_rate": 8.505817102125074e-06, "loss": 0.5106, "step": 14030 }, { "epoch": 0.32765460910151695, "grad_norm": 1.808383696560252, "learning_rate": 8.502911806627385e-06, "loss": 0.5009, "step": 14040 }, { "epoch": 0.3278879813302217, "grad_norm": 1.6927767519691845, "learning_rate": 8.500004186561706e-06, "loss": 0.4863, "step": 14050 }, { "epoch": 0.32812135355892647, "grad_norm": 1.5514197388823303, "learning_rate": 8.497094243857562e-06, "loss": 0.4643, "step": 14060 }, { "epoch": 0.32835472578763125, "grad_norm": 1.8507158076098633, "learning_rate": 8.494181980446024e-06, "loss": 0.4933, "step": 14070 }, { "epoch": 0.32858809801633604, "grad_norm": 1.5766243282173327, "learning_rate": 8.491267398259695e-06, "loss": 0.4669, "step": 14080 }, { "epoch": 0.3288214702450408, "grad_norm": 1.700260931018511, "learning_rate": 8.488350499232727e-06, "loss": 0.5089, "step": 14090 }, { "epoch": 0.3290548424737456, "grad_norm": 1.6335128708805964, "learning_rate": 8.4854312853008e-06, "loss": 0.5146, "step": 14100 }, { "epoch": 0.3292882147024504, "grad_norm": 1.437922216839244, "learning_rate": 8.482509758401136e-06, "loss": 0.4908, "step": 14110 }, { "epoch": 0.3295215869311552, "grad_norm": 1.7298672321355812, "learning_rate": 8.47958592047249e-06, "loss": 0.4911, "step": 14120 }, { "epoch": 0.32975495915986, "grad_norm": 1.3875744384327122, "learning_rate": 8.476659773455148e-06, "loss": 0.4879, "step": 14130 }, { "epoch": 0.32998833138856476, "grad_norm": 1.5368596839560291, "learning_rate": 8.473731319290934e-06, "loss": 0.4794, "step": 14140 }, { "epoch": 0.33022170361726955, "grad_norm": 1.5873389961753446, "learning_rate": 8.470800559923198e-06, "loss": 0.4905, "step": 14150 }, { "epoch": 0.33045507584597433, "grad_norm": 1.5811018931646175, "learning_rate": 8.467867497296825e-06, "loss": 0.4971, "step": 14160 }, { "epoch": 0.3306884480746791, "grad_norm": 1.5720070210568908, "learning_rate": 8.464932133358221e-06, "loss": 0.4964, "step": 14170 }, { "epoch": 0.3309218203033839, "grad_norm": 1.5949597814031715, "learning_rate": 8.461994470055327e-06, "loss": 0.4947, "step": 14180 }, { "epoch": 0.3311551925320887, "grad_norm": 1.4948392025242627, "learning_rate": 8.459054509337604e-06, "loss": 0.515, "step": 14190 }, { "epoch": 0.3313885647607935, "grad_norm": 1.6775336855526177, "learning_rate": 8.456112253156038e-06, "loss": 0.5043, "step": 14200 }, { "epoch": 0.33162193698949827, "grad_norm": 1.1343658714290898, "learning_rate": 8.453167703463148e-06, "loss": 0.5199, "step": 14210 }, { "epoch": 0.33185530921820305, "grad_norm": 1.7010400187079526, "learning_rate": 8.450220862212959e-06, "loss": 0.4792, "step": 14220 }, { "epoch": 0.33208868144690784, "grad_norm": 1.5356839880159003, "learning_rate": 8.447271731361029e-06, "loss": 0.4945, "step": 14230 }, { "epoch": 0.3323220536756126, "grad_norm": 1.8861799787412339, "learning_rate": 8.444320312864429e-06, "loss": 0.5162, "step": 14240 }, { "epoch": 0.33255542590431736, "grad_norm": 1.5409775874611573, "learning_rate": 8.441366608681754e-06, "loss": 0.4937, "step": 14250 }, { "epoch": 0.33278879813302215, "grad_norm": 1.7322557012657258, "learning_rate": 8.438410620773108e-06, "loss": 0.4925, "step": 14260 }, { "epoch": 0.33302217036172693, "grad_norm": 1.6449649304461076, "learning_rate": 8.435452351100118e-06, "loss": 0.4924, "step": 14270 }, { "epoch": 0.3332555425904317, "grad_norm": 1.852252456760072, "learning_rate": 8.43249180162592e-06, "loss": 0.4895, "step": 14280 }, { "epoch": 0.3334889148191365, "grad_norm": 2.4803307743817147, "learning_rate": 8.429528974315165e-06, "loss": 0.4923, "step": 14290 }, { "epoch": 0.3337222870478413, "grad_norm": 1.4167857143090854, "learning_rate": 8.426563871134017e-06, "loss": 0.4985, "step": 14300 }, { "epoch": 0.3339556592765461, "grad_norm": 1.513612894738059, "learning_rate": 8.423596494050146e-06, "loss": 0.466, "step": 14310 }, { "epoch": 0.33418903150525087, "grad_norm": 1.9164765620100888, "learning_rate": 8.420626845032736e-06, "loss": 0.5379, "step": 14320 }, { "epoch": 0.33442240373395565, "grad_norm": 2.009878352820868, "learning_rate": 8.417654926052475e-06, "loss": 0.5217, "step": 14330 }, { "epoch": 0.33465577596266044, "grad_norm": 1.616908635177217, "learning_rate": 8.41468073908156e-06, "loss": 0.4821, "step": 14340 }, { "epoch": 0.3348891481913652, "grad_norm": 1.6819292044881538, "learning_rate": 8.411704286093691e-06, "loss": 0.4862, "step": 14350 }, { "epoch": 0.33512252042007, "grad_norm": 1.70101767296804, "learning_rate": 8.408725569064074e-06, "loss": 0.4857, "step": 14360 }, { "epoch": 0.3353558926487748, "grad_norm": 1.3787675580794168, "learning_rate": 8.405744589969412e-06, "loss": 0.4673, "step": 14370 }, { "epoch": 0.3355892648774796, "grad_norm": 2.041780417150165, "learning_rate": 8.402761350787916e-06, "loss": 0.5181, "step": 14380 }, { "epoch": 0.3358226371061844, "grad_norm": 1.5185605620212625, "learning_rate": 8.399775853499295e-06, "loss": 0.5123, "step": 14390 }, { "epoch": 0.33605600933488916, "grad_norm": 1.6899501020816043, "learning_rate": 8.396788100084754e-06, "loss": 0.5003, "step": 14400 }, { "epoch": 0.33628938156359395, "grad_norm": 3.6665988233376603, "learning_rate": 8.393798092526996e-06, "loss": 0.5178, "step": 14410 }, { "epoch": 0.33652275379229873, "grad_norm": 1.911534309998152, "learning_rate": 8.390805832810222e-06, "loss": 0.4996, "step": 14420 }, { "epoch": 0.3367561260210035, "grad_norm": 1.542130288651411, "learning_rate": 8.387811322920125e-06, "loss": 0.4783, "step": 14430 }, { "epoch": 0.3369894982497083, "grad_norm": 1.3861554027089111, "learning_rate": 8.384814564843893e-06, "loss": 0.4772, "step": 14440 }, { "epoch": 0.3372228704784131, "grad_norm": 1.5108138570810634, "learning_rate": 8.381815560570204e-06, "loss": 0.5083, "step": 14450 }, { "epoch": 0.3374562427071178, "grad_norm": 1.6815619144174592, "learning_rate": 8.37881431208923e-06, "loss": 0.4729, "step": 14460 }, { "epoch": 0.3376896149358226, "grad_norm": 1.7737840893817154, "learning_rate": 8.375810821392627e-06, "loss": 0.4973, "step": 14470 }, { "epoch": 0.3379229871645274, "grad_norm": 1.6167784334595927, "learning_rate": 8.372805090473545e-06, "loss": 0.5104, "step": 14480 }, { "epoch": 0.3381563593932322, "grad_norm": 1.6382676797188491, "learning_rate": 8.369797121326617e-06, "loss": 0.5025, "step": 14490 }, { "epoch": 0.338389731621937, "grad_norm": 1.6881864321922064, "learning_rate": 8.36678691594796e-06, "loss": 0.5238, "step": 14500 }, { "epoch": 0.33862310385064176, "grad_norm": 1.8291423937234712, "learning_rate": 8.363774476335178e-06, "loss": 0.489, "step": 14510 }, { "epoch": 0.33885647607934655, "grad_norm": 1.9283110241829609, "learning_rate": 8.360759804487359e-06, "loss": 0.4992, "step": 14520 }, { "epoch": 0.33908984830805133, "grad_norm": 1.7009841573146531, "learning_rate": 8.357742902405066e-06, "loss": 0.4946, "step": 14530 }, { "epoch": 0.3393232205367561, "grad_norm": 1.5390386720571216, "learning_rate": 8.35472377209035e-06, "loss": 0.4748, "step": 14540 }, { "epoch": 0.3395565927654609, "grad_norm": 1.9635711373591174, "learning_rate": 8.351702415546734e-06, "loss": 0.5109, "step": 14550 }, { "epoch": 0.3397899649941657, "grad_norm": 1.818281400677569, "learning_rate": 8.348678834779221e-06, "loss": 0.464, "step": 14560 }, { "epoch": 0.3400233372228705, "grad_norm": 1.881005399190479, "learning_rate": 8.345653031794292e-06, "loss": 0.5007, "step": 14570 }, { "epoch": 0.34025670945157527, "grad_norm": 1.6829026556572066, "learning_rate": 8.342625008599899e-06, "loss": 0.5, "step": 14580 }, { "epoch": 0.34049008168028005, "grad_norm": 1.5170783089105122, "learning_rate": 8.339594767205468e-06, "loss": 0.4864, "step": 14590 }, { "epoch": 0.34072345390898484, "grad_norm": 1.4822636775817253, "learning_rate": 8.336562309621901e-06, "loss": 0.472, "step": 14600 }, { "epoch": 0.34095682613768963, "grad_norm": 1.4991052940479732, "learning_rate": 8.333527637861567e-06, "loss": 0.5016, "step": 14610 }, { "epoch": 0.3411901983663944, "grad_norm": 1.4926750549914731, "learning_rate": 8.330490753938304e-06, "loss": 0.4709, "step": 14620 }, { "epoch": 0.3414235705950992, "grad_norm": 1.5372723431710993, "learning_rate": 8.327451659867418e-06, "loss": 0.5049, "step": 14630 }, { "epoch": 0.341656942823804, "grad_norm": 1.4209449009802646, "learning_rate": 8.324410357665686e-06, "loss": 0.5074, "step": 14640 }, { "epoch": 0.3418903150525088, "grad_norm": 1.695582346589349, "learning_rate": 8.321366849351346e-06, "loss": 0.5077, "step": 14650 }, { "epoch": 0.34212368728121356, "grad_norm": 1.6270221027442462, "learning_rate": 8.318321136944101e-06, "loss": 0.466, "step": 14660 }, { "epoch": 0.3423570595099183, "grad_norm": 1.8041661820324244, "learning_rate": 8.315273222465114e-06, "loss": 0.5006, "step": 14670 }, { "epoch": 0.3425904317386231, "grad_norm": 1.535518237974565, "learning_rate": 8.312223107937018e-06, "loss": 0.4992, "step": 14680 }, { "epoch": 0.34282380396732787, "grad_norm": 1.4887035317843398, "learning_rate": 8.309170795383895e-06, "loss": 0.5278, "step": 14690 }, { "epoch": 0.34305717619603265, "grad_norm": 4.290738752114953, "learning_rate": 8.306116286831293e-06, "loss": 0.5097, "step": 14700 }, { "epoch": 0.34329054842473744, "grad_norm": 1.9534127001550396, "learning_rate": 8.303059584306214e-06, "loss": 0.4819, "step": 14710 }, { "epoch": 0.3435239206534422, "grad_norm": 1.5666344029995596, "learning_rate": 8.300000689837117e-06, "loss": 0.5031, "step": 14720 }, { "epoch": 0.343757292882147, "grad_norm": 1.4995145363971463, "learning_rate": 8.296939605453916e-06, "loss": 0.4727, "step": 14730 }, { "epoch": 0.3439906651108518, "grad_norm": 1.5374631152273182, "learning_rate": 8.293876333187979e-06, "loss": 0.5013, "step": 14740 }, { "epoch": 0.3442240373395566, "grad_norm": 1.6112868624356715, "learning_rate": 8.290810875072122e-06, "loss": 0.4658, "step": 14750 }, { "epoch": 0.3444574095682614, "grad_norm": 1.685539901460985, "learning_rate": 8.287743233140618e-06, "loss": 0.5093, "step": 14760 }, { "epoch": 0.34469078179696616, "grad_norm": 1.4698963273116623, "learning_rate": 8.28467340942918e-06, "loss": 0.4821, "step": 14770 }, { "epoch": 0.34492415402567095, "grad_norm": 1.6925186292661323, "learning_rate": 8.281601405974982e-06, "loss": 0.5145, "step": 14780 }, { "epoch": 0.34515752625437573, "grad_norm": 1.5950176208213793, "learning_rate": 8.278527224816628e-06, "loss": 0.4842, "step": 14790 }, { "epoch": 0.3453908984830805, "grad_norm": 1.5243421272421716, "learning_rate": 8.275450867994184e-06, "loss": 0.5247, "step": 14800 }, { "epoch": 0.3456242707117853, "grad_norm": 1.5839937108510969, "learning_rate": 8.272372337549146e-06, "loss": 0.5036, "step": 14810 }, { "epoch": 0.3458576429404901, "grad_norm": 1.7028742990523786, "learning_rate": 8.26929163552446e-06, "loss": 0.4835, "step": 14820 }, { "epoch": 0.3460910151691949, "grad_norm": 1.6501861708179768, "learning_rate": 8.266208763964513e-06, "loss": 0.5207, "step": 14830 }, { "epoch": 0.34632438739789967, "grad_norm": 1.6608307303112857, "learning_rate": 8.263123724915128e-06, "loss": 0.5104, "step": 14840 }, { "epoch": 0.34655775962660446, "grad_norm": 1.7454659983120693, "learning_rate": 8.260036520423569e-06, "loss": 0.4899, "step": 14850 }, { "epoch": 0.34679113185530924, "grad_norm": 1.5597387383677068, "learning_rate": 8.256947152538536e-06, "loss": 0.5078, "step": 14860 }, { "epoch": 0.34702450408401403, "grad_norm": 1.7533045012302173, "learning_rate": 8.253855623310166e-06, "loss": 0.5268, "step": 14870 }, { "epoch": 0.34725787631271876, "grad_norm": 1.5867496993426153, "learning_rate": 8.250761934790029e-06, "loss": 0.5077, "step": 14880 }, { "epoch": 0.34749124854142355, "grad_norm": 1.4458099197201784, "learning_rate": 8.247666089031126e-06, "loss": 0.4706, "step": 14890 }, { "epoch": 0.34772462077012833, "grad_norm": 1.6316934169127066, "learning_rate": 8.244568088087894e-06, "loss": 0.5021, "step": 14900 }, { "epoch": 0.3479579929988331, "grad_norm": 1.6158124407846968, "learning_rate": 8.2414679340162e-06, "loss": 0.498, "step": 14910 }, { "epoch": 0.3481913652275379, "grad_norm": 1.6045942953746746, "learning_rate": 8.238365628873333e-06, "loss": 0.5129, "step": 14920 }, { "epoch": 0.3484247374562427, "grad_norm": 1.5006736482791632, "learning_rate": 8.235261174718019e-06, "loss": 0.5038, "step": 14930 }, { "epoch": 0.3486581096849475, "grad_norm": 1.6869189099080608, "learning_rate": 8.232154573610402e-06, "loss": 0.473, "step": 14940 }, { "epoch": 0.34889148191365227, "grad_norm": 1.6439993532271382, "learning_rate": 8.229045827612054e-06, "loss": 0.4921, "step": 14950 }, { "epoch": 0.34912485414235706, "grad_norm": 1.696232155151627, "learning_rate": 8.225934938785975e-06, "loss": 0.4776, "step": 14960 }, { "epoch": 0.34935822637106184, "grad_norm": 4.402172955626638, "learning_rate": 8.22282190919658e-06, "loss": 0.4995, "step": 14970 }, { "epoch": 0.34959159859976663, "grad_norm": 1.527002354326693, "learning_rate": 8.219706740909704e-06, "loss": 0.5217, "step": 14980 }, { "epoch": 0.3498249708284714, "grad_norm": 1.610637905926121, "learning_rate": 8.216589435992608e-06, "loss": 0.4977, "step": 14990 }, { "epoch": 0.3500583430571762, "grad_norm": 1.621663476430052, "learning_rate": 8.213469996513968e-06, "loss": 0.4926, "step": 15000 }, { "epoch": 0.350291715285881, "grad_norm": 2.9835039255210742, "learning_rate": 8.210348424543872e-06, "loss": 0.4818, "step": 15010 }, { "epoch": 0.3505250875145858, "grad_norm": 1.7464133774036557, "learning_rate": 8.20722472215383e-06, "loss": 0.5185, "step": 15020 }, { "epoch": 0.35075845974329056, "grad_norm": 1.71566254219512, "learning_rate": 8.204098891416763e-06, "loss": 0.4988, "step": 15030 }, { "epoch": 0.35099183197199535, "grad_norm": 1.216260275511818, "learning_rate": 8.200970934407004e-06, "loss": 0.4597, "step": 15040 }, { "epoch": 0.35122520420070014, "grad_norm": 1.464883235541899, "learning_rate": 8.197840853200295e-06, "loss": 0.4896, "step": 15050 }, { "epoch": 0.3514585764294049, "grad_norm": 1.5187251209367707, "learning_rate": 8.194708649873792e-06, "loss": 0.5037, "step": 15060 }, { "epoch": 0.3516919486581097, "grad_norm": 1.7478800174656783, "learning_rate": 8.191574326506056e-06, "loss": 0.5005, "step": 15070 }, { "epoch": 0.3519253208868145, "grad_norm": 1.6018213558249912, "learning_rate": 8.188437885177056e-06, "loss": 0.4647, "step": 15080 }, { "epoch": 0.35215869311551923, "grad_norm": 1.6146901593391645, "learning_rate": 8.185299327968169e-06, "loss": 0.5061, "step": 15090 }, { "epoch": 0.352392065344224, "grad_norm": 1.5013772606865647, "learning_rate": 8.182158656962169e-06, "loss": 0.4947, "step": 15100 }, { "epoch": 0.3526254375729288, "grad_norm": 1.557618904467256, "learning_rate": 8.179015874243239e-06, "loss": 0.4725, "step": 15110 }, { "epoch": 0.3528588098016336, "grad_norm": 1.7763733261232386, "learning_rate": 8.175870981896963e-06, "loss": 0.5396, "step": 15120 }, { "epoch": 0.3530921820303384, "grad_norm": 1.4911863486530152, "learning_rate": 8.172723982010322e-06, "loss": 0.4991, "step": 15130 }, { "epoch": 0.35332555425904316, "grad_norm": 1.4946528153868848, "learning_rate": 8.169574876671698e-06, "loss": 0.5021, "step": 15140 }, { "epoch": 0.35355892648774795, "grad_norm": 1.445334762052615, "learning_rate": 8.166423667970872e-06, "loss": 0.5035, "step": 15150 }, { "epoch": 0.35379229871645274, "grad_norm": 1.4491311210250466, "learning_rate": 8.163270357999012e-06, "loss": 0.4643, "step": 15160 }, { "epoch": 0.3540256709451575, "grad_norm": 1.6834388075456541, "learning_rate": 8.160114948848693e-06, "loss": 0.5186, "step": 15170 }, { "epoch": 0.3542590431738623, "grad_norm": 1.4978798370486155, "learning_rate": 8.156957442613872e-06, "loss": 0.5033, "step": 15180 }, { "epoch": 0.3544924154025671, "grad_norm": 1.5976305640153718, "learning_rate": 8.153797841389907e-06, "loss": 0.4804, "step": 15190 }, { "epoch": 0.3547257876312719, "grad_norm": 1.6138932615689319, "learning_rate": 8.150636147273537e-06, "loss": 0.4845, "step": 15200 }, { "epoch": 0.35495915985997667, "grad_norm": 1.6903778249825392, "learning_rate": 8.147472362362897e-06, "loss": 0.4757, "step": 15210 }, { "epoch": 0.35519253208868146, "grad_norm": 1.7075975097312726, "learning_rate": 8.144306488757505e-06, "loss": 0.4811, "step": 15220 }, { "epoch": 0.35542590431738624, "grad_norm": 1.4550655007329238, "learning_rate": 8.14113852855827e-06, "loss": 0.4732, "step": 15230 }, { "epoch": 0.35565927654609103, "grad_norm": 1.7489856014017506, "learning_rate": 8.13796848386748e-06, "loss": 0.5036, "step": 15240 }, { "epoch": 0.3558926487747958, "grad_norm": 1.54253322919159, "learning_rate": 8.134796356788812e-06, "loss": 0.4923, "step": 15250 }, { "epoch": 0.3561260210035006, "grad_norm": 1.4548273918924894, "learning_rate": 8.131622149427317e-06, "loss": 0.4769, "step": 15260 }, { "epoch": 0.3563593932322054, "grad_norm": 1.4633812986013435, "learning_rate": 8.128445863889436e-06, "loss": 0.5111, "step": 15270 }, { "epoch": 0.3565927654609102, "grad_norm": 1.450845216933014, "learning_rate": 8.125267502282982e-06, "loss": 0.497, "step": 15280 }, { "epoch": 0.3568261376896149, "grad_norm": 1.588066855711362, "learning_rate": 8.122087066717149e-06, "loss": 0.5092, "step": 15290 }, { "epoch": 0.3570595099183197, "grad_norm": 1.6352077609957465, "learning_rate": 8.118904559302505e-06, "loss": 0.4743, "step": 15300 }, { "epoch": 0.3572928821470245, "grad_norm": 1.454383771505251, "learning_rate": 8.115719982150997e-06, "loss": 0.4714, "step": 15310 }, { "epoch": 0.35752625437572927, "grad_norm": 1.5003890378783227, "learning_rate": 8.112533337375941e-06, "loss": 0.4794, "step": 15320 }, { "epoch": 0.35775962660443406, "grad_norm": 1.720761923439714, "learning_rate": 8.109344627092029e-06, "loss": 0.484, "step": 15330 }, { "epoch": 0.35799299883313884, "grad_norm": 1.75972662590526, "learning_rate": 8.106153853415319e-06, "loss": 0.5023, "step": 15340 }, { "epoch": 0.35822637106184363, "grad_norm": 1.6775780687812454, "learning_rate": 8.102961018463243e-06, "loss": 0.4805, "step": 15350 }, { "epoch": 0.3584597432905484, "grad_norm": 1.2380123951319515, "learning_rate": 8.099766124354596e-06, "loss": 0.4774, "step": 15360 }, { "epoch": 0.3586931155192532, "grad_norm": 1.5038538150096055, "learning_rate": 8.096569173209545e-06, "loss": 0.4781, "step": 15370 }, { "epoch": 0.358926487747958, "grad_norm": 1.6852332025101613, "learning_rate": 8.093370167149618e-06, "loss": 0.5069, "step": 15380 }, { "epoch": 0.3591598599766628, "grad_norm": 1.5825153928972995, "learning_rate": 8.09016910829771e-06, "loss": 0.4814, "step": 15390 }, { "epoch": 0.35939323220536756, "grad_norm": 1.4538927427183743, "learning_rate": 8.086965998778075e-06, "loss": 0.4961, "step": 15400 }, { "epoch": 0.35962660443407235, "grad_norm": 1.4649611547148864, "learning_rate": 8.083760840716329e-06, "loss": 0.4844, "step": 15410 }, { "epoch": 0.35985997666277714, "grad_norm": 1.546828712112674, "learning_rate": 8.080553636239447e-06, "loss": 0.5081, "step": 15420 }, { "epoch": 0.3600933488914819, "grad_norm": 1.5765136353888185, "learning_rate": 8.077344387475762e-06, "loss": 0.4868, "step": 15430 }, { "epoch": 0.3603267211201867, "grad_norm": 1.6074062186077325, "learning_rate": 8.074133096554968e-06, "loss": 0.506, "step": 15440 }, { "epoch": 0.3605600933488915, "grad_norm": 1.5566950005059825, "learning_rate": 8.070919765608107e-06, "loss": 0.5017, "step": 15450 }, { "epoch": 0.3607934655775963, "grad_norm": 1.6665519669620872, "learning_rate": 8.06770439676758e-06, "loss": 0.4894, "step": 15460 }, { "epoch": 0.36102683780630107, "grad_norm": 1.5434125974156097, "learning_rate": 8.064486992167136e-06, "loss": 0.4554, "step": 15470 }, { "epoch": 0.36126021003500586, "grad_norm": 1.4841924510524032, "learning_rate": 8.061267553941878e-06, "loss": 0.5042, "step": 15480 }, { "epoch": 0.36149358226371064, "grad_norm": 1.6762603739763462, "learning_rate": 8.058046084228262e-06, "loss": 0.4971, "step": 15490 }, { "epoch": 0.3617269544924154, "grad_norm": 1.6572021362568636, "learning_rate": 8.054822585164083e-06, "loss": 0.5241, "step": 15500 }, { "epoch": 0.36196032672112016, "grad_norm": 1.6372215767099563, "learning_rate": 8.051597058888491e-06, "loss": 0.4604, "step": 15510 }, { "epoch": 0.36219369894982495, "grad_norm": 1.5625928925207486, "learning_rate": 8.048369507541975e-06, "loss": 0.5019, "step": 15520 }, { "epoch": 0.36242707117852974, "grad_norm": 1.4657180057355454, "learning_rate": 8.045139933266377e-06, "loss": 0.4872, "step": 15530 }, { "epoch": 0.3626604434072345, "grad_norm": 1.4113068255361094, "learning_rate": 8.041908338204869e-06, "loss": 0.5045, "step": 15540 }, { "epoch": 0.3628938156359393, "grad_norm": 1.2892082914034024, "learning_rate": 8.038674724501973e-06, "loss": 0.4666, "step": 15550 }, { "epoch": 0.3631271878646441, "grad_norm": 1.499106144485461, "learning_rate": 8.03543909430355e-06, "loss": 0.4776, "step": 15560 }, { "epoch": 0.3633605600933489, "grad_norm": 1.40230040601755, "learning_rate": 8.032201449756794e-06, "loss": 0.4846, "step": 15570 }, { "epoch": 0.36359393232205367, "grad_norm": 1.5392196532115667, "learning_rate": 8.02896179301024e-06, "loss": 0.5035, "step": 15580 }, { "epoch": 0.36382730455075846, "grad_norm": 1.6574822836025118, "learning_rate": 8.025720126213759e-06, "loss": 0.4763, "step": 15590 }, { "epoch": 0.36406067677946324, "grad_norm": 1.6787968664327428, "learning_rate": 8.02247645151855e-06, "loss": 0.4615, "step": 15600 }, { "epoch": 0.36429404900816803, "grad_norm": 1.9133916655184342, "learning_rate": 8.019230771077153e-06, "loss": 0.4899, "step": 15610 }, { "epoch": 0.3645274212368728, "grad_norm": 1.567561320284626, "learning_rate": 8.015983087043433e-06, "loss": 0.4973, "step": 15620 }, { "epoch": 0.3647607934655776, "grad_norm": 1.6844869244110035, "learning_rate": 8.012733401572585e-06, "loss": 0.5072, "step": 15630 }, { "epoch": 0.3649941656942824, "grad_norm": 1.583000026603775, "learning_rate": 8.009481716821135e-06, "loss": 0.4814, "step": 15640 }, { "epoch": 0.3652275379229872, "grad_norm": 1.5148685005183267, "learning_rate": 8.006228034946932e-06, "loss": 0.51, "step": 15650 }, { "epoch": 0.36546091015169196, "grad_norm": 1.595590197619691, "learning_rate": 8.002972358109156e-06, "loss": 0.4762, "step": 15660 }, { "epoch": 0.36569428238039675, "grad_norm": 1.5349074549012245, "learning_rate": 7.999714688468306e-06, "loss": 0.4741, "step": 15670 }, { "epoch": 0.36592765460910154, "grad_norm": 1.5000726367653292, "learning_rate": 7.996455028186201e-06, "loss": 0.479, "step": 15680 }, { "epoch": 0.3661610268378063, "grad_norm": 1.45662774480612, "learning_rate": 7.99319337942599e-06, "loss": 0.5274, "step": 15690 }, { "epoch": 0.3663943990665111, "grad_norm": 1.631301947353602, "learning_rate": 7.989929744352134e-06, "loss": 0.5001, "step": 15700 }, { "epoch": 0.36662777129521584, "grad_norm": 1.4703141990990707, "learning_rate": 7.986664125130416e-06, "loss": 0.4645, "step": 15710 }, { "epoch": 0.36686114352392063, "grad_norm": 1.6001936238929808, "learning_rate": 7.983396523927933e-06, "loss": 0.4805, "step": 15720 }, { "epoch": 0.3670945157526254, "grad_norm": 1.3871611477547647, "learning_rate": 7.980126942913097e-06, "loss": 0.5178, "step": 15730 }, { "epoch": 0.3673278879813302, "grad_norm": 1.5659997482982893, "learning_rate": 7.976855384255638e-06, "loss": 0.5214, "step": 15740 }, { "epoch": 0.367561260210035, "grad_norm": 1.6077197920889879, "learning_rate": 7.973581850126595e-06, "loss": 0.494, "step": 15750 }, { "epoch": 0.3677946324387398, "grad_norm": 1.5609377204418529, "learning_rate": 7.970306342698318e-06, "loss": 0.497, "step": 15760 }, { "epoch": 0.36802800466744456, "grad_norm": 1.4539067519546445, "learning_rate": 7.967028864144466e-06, "loss": 0.5035, "step": 15770 }, { "epoch": 0.36826137689614935, "grad_norm": 1.6256821575728329, "learning_rate": 7.963749416640008e-06, "loss": 0.4987, "step": 15780 }, { "epoch": 0.36849474912485414, "grad_norm": 1.6068914081595083, "learning_rate": 7.960468002361221e-06, "loss": 0.4861, "step": 15790 }, { "epoch": 0.3687281213535589, "grad_norm": 1.6990347569396362, "learning_rate": 7.957184623485682e-06, "loss": 0.4863, "step": 15800 }, { "epoch": 0.3689614935822637, "grad_norm": 1.907809086523545, "learning_rate": 7.953899282192278e-06, "loss": 0.4937, "step": 15810 }, { "epoch": 0.3691948658109685, "grad_norm": 1.497209113283416, "learning_rate": 7.950611980661192e-06, "loss": 0.5031, "step": 15820 }, { "epoch": 0.3694282380396733, "grad_norm": 1.6496386763161546, "learning_rate": 7.947322721073911e-06, "loss": 0.4768, "step": 15830 }, { "epoch": 0.36966161026837807, "grad_norm": 1.6630133706598973, "learning_rate": 7.944031505613223e-06, "loss": 0.5137, "step": 15840 }, { "epoch": 0.36989498249708286, "grad_norm": 1.4716793514182145, "learning_rate": 7.94073833646321e-06, "loss": 0.5053, "step": 15850 }, { "epoch": 0.37012835472578764, "grad_norm": 1.5995799576046874, "learning_rate": 7.937443215809254e-06, "loss": 0.4729, "step": 15860 }, { "epoch": 0.37036172695449243, "grad_norm": 1.3304076700957939, "learning_rate": 7.934146145838032e-06, "loss": 0.5031, "step": 15870 }, { "epoch": 0.3705950991831972, "grad_norm": 1.4282265818015585, "learning_rate": 7.93084712873751e-06, "loss": 0.4926, "step": 15880 }, { "epoch": 0.370828471411902, "grad_norm": 1.9651642609376045, "learning_rate": 7.927546166696949e-06, "loss": 0.503, "step": 15890 }, { "epoch": 0.3710618436406068, "grad_norm": 1.6124894970992516, "learning_rate": 7.924243261906906e-06, "loss": 0.5049, "step": 15900 }, { "epoch": 0.3712952158693116, "grad_norm": 1.4562494857470472, "learning_rate": 7.920938416559219e-06, "loss": 0.4922, "step": 15910 }, { "epoch": 0.3715285880980163, "grad_norm": 1.6872451237220902, "learning_rate": 7.917631632847016e-06, "loss": 0.5004, "step": 15920 }, { "epoch": 0.3717619603267211, "grad_norm": 1.580773270941528, "learning_rate": 7.914322912964714e-06, "loss": 0.4846, "step": 15930 }, { "epoch": 0.3719953325554259, "grad_norm": 1.597768752544965, "learning_rate": 7.911012259108014e-06, "loss": 0.5004, "step": 15940 }, { "epoch": 0.37222870478413067, "grad_norm": 1.679176480801604, "learning_rate": 7.907699673473899e-06, "loss": 0.5158, "step": 15950 }, { "epoch": 0.37246207701283546, "grad_norm": 1.6244046085193171, "learning_rate": 7.904385158260633e-06, "loss": 0.4681, "step": 15960 }, { "epoch": 0.37269544924154024, "grad_norm": 1.6987923993515603, "learning_rate": 7.901068715667765e-06, "loss": 0.4965, "step": 15970 }, { "epoch": 0.37292882147024503, "grad_norm": 1.5319531663638752, "learning_rate": 7.897750347896119e-06, "loss": 0.5299, "step": 15980 }, { "epoch": 0.3731621936989498, "grad_norm": 1.5503989471275608, "learning_rate": 7.894430057147796e-06, "loss": 0.4837, "step": 15990 }, { "epoch": 0.3733955659276546, "grad_norm": 1.550739589292665, "learning_rate": 7.891107845626175e-06, "loss": 0.488, "step": 16000 }, { "epoch": 0.3736289381563594, "grad_norm": 1.5945917811435362, "learning_rate": 7.887783715535911e-06, "loss": 0.488, "step": 16010 }, { "epoch": 0.3738623103850642, "grad_norm": 1.741160194453624, "learning_rate": 7.88445766908293e-06, "loss": 0.4831, "step": 16020 }, { "epoch": 0.37409568261376897, "grad_norm": 1.4730439934793418, "learning_rate": 7.881129708474431e-06, "loss": 0.4796, "step": 16030 }, { "epoch": 0.37432905484247375, "grad_norm": 1.5137375605686907, "learning_rate": 7.877799835918882e-06, "loss": 0.4728, "step": 16040 }, { "epoch": 0.37456242707117854, "grad_norm": 1.6161511325720512, "learning_rate": 7.874468053626022e-06, "loss": 0.4958, "step": 16050 }, { "epoch": 0.3747957992998833, "grad_norm": 1.5048866480797758, "learning_rate": 7.871134363806852e-06, "loss": 0.4886, "step": 16060 }, { "epoch": 0.3750291715285881, "grad_norm": 1.4785502077554613, "learning_rate": 7.867798768673644e-06, "loss": 0.4752, "step": 16070 }, { "epoch": 0.3752625437572929, "grad_norm": 1.4527786197526735, "learning_rate": 7.864461270439933e-06, "loss": 0.4973, "step": 16080 }, { "epoch": 0.3754959159859977, "grad_norm": 1.7659841958086158, "learning_rate": 7.86112187132052e-06, "loss": 0.496, "step": 16090 }, { "epoch": 0.3757292882147025, "grad_norm": 1.6035562944757993, "learning_rate": 7.85778057353146e-06, "loss": 0.4936, "step": 16100 }, { "epoch": 0.37596266044340726, "grad_norm": 1.330392126752677, "learning_rate": 7.854437379290076e-06, "loss": 0.4821, "step": 16110 }, { "epoch": 0.37619603267211205, "grad_norm": 1.493215034346125, "learning_rate": 7.85109229081494e-06, "loss": 0.4907, "step": 16120 }, { "epoch": 0.3764294049008168, "grad_norm": 1.6696820921071118, "learning_rate": 7.847745310325895e-06, "loss": 0.4886, "step": 16130 }, { "epoch": 0.37666277712952156, "grad_norm": 1.7301681247794243, "learning_rate": 7.844396440044024e-06, "loss": 0.5042, "step": 16140 }, { "epoch": 0.37689614935822635, "grad_norm": 1.699218419516103, "learning_rate": 7.841045682191677e-06, "loss": 0.4991, "step": 16150 }, { "epoch": 0.37712952158693114, "grad_norm": 1.7586354819680583, "learning_rate": 7.837693038992447e-06, "loss": 0.5078, "step": 16160 }, { "epoch": 0.3773628938156359, "grad_norm": 1.674903340903281, "learning_rate": 7.834338512671182e-06, "loss": 0.4592, "step": 16170 }, { "epoch": 0.3775962660443407, "grad_norm": 1.5620159119809887, "learning_rate": 7.830982105453985e-06, "loss": 0.4885, "step": 16180 }, { "epoch": 0.3778296382730455, "grad_norm": 1.5801246291793447, "learning_rate": 7.827623819568198e-06, "loss": 0.4745, "step": 16190 }, { "epoch": 0.3780630105017503, "grad_norm": 2.1827392398437397, "learning_rate": 7.824263657242413e-06, "loss": 0.4786, "step": 16200 }, { "epoch": 0.37829638273045507, "grad_norm": 1.5681072668032034, "learning_rate": 7.82090162070647e-06, "loss": 0.4949, "step": 16210 }, { "epoch": 0.37852975495915986, "grad_norm": 1.507323682192281, "learning_rate": 7.817537712191449e-06, "loss": 0.5031, "step": 16220 }, { "epoch": 0.37876312718786465, "grad_norm": 1.6183630836395733, "learning_rate": 7.814171933929676e-06, "loss": 0.4802, "step": 16230 }, { "epoch": 0.37899649941656943, "grad_norm": 1.299242847320639, "learning_rate": 7.810804288154714e-06, "loss": 0.493, "step": 16240 }, { "epoch": 0.3792298716452742, "grad_norm": 1.451094408525099, "learning_rate": 7.807434777101369e-06, "loss": 0.5078, "step": 16250 }, { "epoch": 0.379463243873979, "grad_norm": 1.5680998711075131, "learning_rate": 7.804063403005682e-06, "loss": 0.4985, "step": 16260 }, { "epoch": 0.3796966161026838, "grad_norm": 1.338860975235953, "learning_rate": 7.800690168104931e-06, "loss": 0.4933, "step": 16270 }, { "epoch": 0.3799299883313886, "grad_norm": 1.3843122599334718, "learning_rate": 7.797315074637628e-06, "loss": 0.4734, "step": 16280 }, { "epoch": 0.38016336056009337, "grad_norm": 1.5611985675491673, "learning_rate": 7.793938124843521e-06, "loss": 0.4834, "step": 16290 }, { "epoch": 0.38039673278879815, "grad_norm": 1.574813413615217, "learning_rate": 7.790559320963588e-06, "loss": 0.5019, "step": 16300 }, { "epoch": 0.38063010501750294, "grad_norm": 1.5002971075196299, "learning_rate": 7.787178665240038e-06, "loss": 0.4532, "step": 16310 }, { "epoch": 0.3808634772462077, "grad_norm": 1.597082602956037, "learning_rate": 7.783796159916308e-06, "loss": 0.4931, "step": 16320 }, { "epoch": 0.38109684947491246, "grad_norm": 1.3618854818366877, "learning_rate": 7.780411807237064e-06, "loss": 0.494, "step": 16330 }, { "epoch": 0.38133022170361724, "grad_norm": 1.9651386577487868, "learning_rate": 7.777025609448195e-06, "loss": 0.5084, "step": 16340 }, { "epoch": 0.38156359393232203, "grad_norm": 1.7760557821477045, "learning_rate": 7.773637568796818e-06, "loss": 0.5236, "step": 16350 }, { "epoch": 0.3817969661610268, "grad_norm": 1.496494418849191, "learning_rate": 7.77024768753127e-06, "loss": 0.4749, "step": 16360 }, { "epoch": 0.3820303383897316, "grad_norm": 1.7449217648301656, "learning_rate": 7.766855967901114e-06, "loss": 0.4921, "step": 16370 }, { "epoch": 0.3822637106184364, "grad_norm": 1.4428331615661616, "learning_rate": 7.763462412157125e-06, "loss": 0.4659, "step": 16380 }, { "epoch": 0.3824970828471412, "grad_norm": 1.3881480500101586, "learning_rate": 7.760067022551305e-06, "loss": 0.4808, "step": 16390 }, { "epoch": 0.38273045507584597, "grad_norm": 1.7915076094141213, "learning_rate": 7.756669801336867e-06, "loss": 0.4947, "step": 16400 }, { "epoch": 0.38296382730455075, "grad_norm": 1.5355740378760738, "learning_rate": 7.753270750768243e-06, "loss": 0.4981, "step": 16410 }, { "epoch": 0.38319719953325554, "grad_norm": 1.6398848752387605, "learning_rate": 7.749869873101074e-06, "loss": 0.4989, "step": 16420 }, { "epoch": 0.3834305717619603, "grad_norm": 1.7600580440550373, "learning_rate": 7.746467170592223e-06, "loss": 0.4896, "step": 16430 }, { "epoch": 0.3836639439906651, "grad_norm": 1.3766537529914207, "learning_rate": 7.743062645499755e-06, "loss": 0.483, "step": 16440 }, { "epoch": 0.3838973162193699, "grad_norm": 1.3599827162824532, "learning_rate": 7.739656300082947e-06, "loss": 0.4878, "step": 16450 }, { "epoch": 0.3841306884480747, "grad_norm": 1.640329526515912, "learning_rate": 7.736248136602284e-06, "loss": 0.4671, "step": 16460 }, { "epoch": 0.3843640606767795, "grad_norm": 1.5929723715067237, "learning_rate": 7.732838157319462e-06, "loss": 0.4841, "step": 16470 }, { "epoch": 0.38459743290548426, "grad_norm": 1.6545446303210147, "learning_rate": 7.729426364497375e-06, "loss": 0.4837, "step": 16480 }, { "epoch": 0.38483080513418905, "grad_norm": 1.0331247856815557, "learning_rate": 7.726012760400121e-06, "loss": 0.4342, "step": 16490 }, { "epoch": 0.38506417736289383, "grad_norm": 1.7020548408315241, "learning_rate": 7.722597347293008e-06, "loss": 0.4977, "step": 16500 }, { "epoch": 0.3852975495915986, "grad_norm": 1.5968552163324106, "learning_rate": 7.719180127442535e-06, "loss": 0.4839, "step": 16510 }, { "epoch": 0.3855309218203034, "grad_norm": 1.3057752661951139, "learning_rate": 7.715761103116405e-06, "loss": 0.4723, "step": 16520 }, { "epoch": 0.3857642940490082, "grad_norm": 1.7313167870178092, "learning_rate": 7.712340276583518e-06, "loss": 0.511, "step": 16530 }, { "epoch": 0.3859976662777129, "grad_norm": 2.0265195770989815, "learning_rate": 7.70891765011397e-06, "loss": 0.4674, "step": 16540 }, { "epoch": 0.3862310385064177, "grad_norm": 1.4150958642396827, "learning_rate": 7.705493225979048e-06, "loss": 0.4777, "step": 16550 }, { "epoch": 0.3864644107351225, "grad_norm": 1.3092610323640388, "learning_rate": 7.702067006451237e-06, "loss": 0.4532, "step": 16560 }, { "epoch": 0.3866977829638273, "grad_norm": 1.6225741683741977, "learning_rate": 7.698638993804211e-06, "loss": 0.4746, "step": 16570 }, { "epoch": 0.3869311551925321, "grad_norm": 1.4228976814784104, "learning_rate": 7.695209190312835e-06, "loss": 0.4753, "step": 16580 }, { "epoch": 0.38716452742123686, "grad_norm": 1.9148406393640347, "learning_rate": 7.691777598253159e-06, "loss": 0.5247, "step": 16590 }, { "epoch": 0.38739789964994165, "grad_norm": 1.6565745331795878, "learning_rate": 7.688344219902424e-06, "loss": 0.5045, "step": 16600 }, { "epoch": 0.38763127187864643, "grad_norm": 1.5081478971696103, "learning_rate": 7.684909057539054e-06, "loss": 0.4787, "step": 16610 }, { "epoch": 0.3878646441073512, "grad_norm": 1.5478255638328628, "learning_rate": 7.681472113442661e-06, "loss": 0.4888, "step": 16620 }, { "epoch": 0.388098016336056, "grad_norm": 1.9813801264564332, "learning_rate": 7.678033389894031e-06, "loss": 0.5115, "step": 16630 }, { "epoch": 0.3883313885647608, "grad_norm": 1.609441632274415, "learning_rate": 7.674592889175141e-06, "loss": 0.5114, "step": 16640 }, { "epoch": 0.3885647607934656, "grad_norm": 1.7100381039762864, "learning_rate": 7.671150613569139e-06, "loss": 0.5171, "step": 16650 }, { "epoch": 0.38879813302217037, "grad_norm": 1.7163956616352978, "learning_rate": 7.667706565360356e-06, "loss": 0.4527, "step": 16660 }, { "epoch": 0.38903150525087515, "grad_norm": 1.6298556855934836, "learning_rate": 7.664260746834295e-06, "loss": 0.4867, "step": 16670 }, { "epoch": 0.38926487747957994, "grad_norm": 1.6483931142533113, "learning_rate": 7.66081316027764e-06, "loss": 0.4888, "step": 16680 }, { "epoch": 0.3894982497082847, "grad_norm": 1.4854139325373878, "learning_rate": 7.657363807978241e-06, "loss": 0.4767, "step": 16690 }, { "epoch": 0.3897316219369895, "grad_norm": 1.3783921351269612, "learning_rate": 7.653912692225127e-06, "loss": 0.4857, "step": 16700 }, { "epoch": 0.3899649941656943, "grad_norm": 1.3825827518474585, "learning_rate": 7.65045981530849e-06, "loss": 0.4821, "step": 16710 }, { "epoch": 0.3901983663943991, "grad_norm": 1.7642448649122289, "learning_rate": 7.647005179519697e-06, "loss": 0.4755, "step": 16720 }, { "epoch": 0.3904317386231039, "grad_norm": 1.5205849604173889, "learning_rate": 7.643548787151277e-06, "loss": 0.4792, "step": 16730 }, { "epoch": 0.39066511085180866, "grad_norm": 1.5524034953827077, "learning_rate": 7.640090640496932e-06, "loss": 0.4819, "step": 16740 }, { "epoch": 0.3908984830805134, "grad_norm": 1.5270282373632795, "learning_rate": 7.636630741851519e-06, "loss": 0.5131, "step": 16750 }, { "epoch": 0.3911318553092182, "grad_norm": 1.530429231269362, "learning_rate": 7.633169093511062e-06, "loss": 0.451, "step": 16760 }, { "epoch": 0.39136522753792297, "grad_norm": 1.4621751626521746, "learning_rate": 7.629705697772746e-06, "loss": 0.466, "step": 16770 }, { "epoch": 0.39159859976662775, "grad_norm": 1.3473408459684966, "learning_rate": 7.626240556934919e-06, "loss": 0.4842, "step": 16780 }, { "epoch": 0.39183197199533254, "grad_norm": 1.4413063708748761, "learning_rate": 7.622773673297081e-06, "loss": 0.4699, "step": 16790 }, { "epoch": 0.3920653442240373, "grad_norm": 1.5932838720551221, "learning_rate": 7.619305049159892e-06, "loss": 0.4867, "step": 16800 }, { "epoch": 0.3922987164527421, "grad_norm": 1.4863614673373564, "learning_rate": 7.6158346868251675e-06, "loss": 0.4642, "step": 16810 }, { "epoch": 0.3925320886814469, "grad_norm": 1.1972680616563411, "learning_rate": 7.612362588595872e-06, "loss": 0.5017, "step": 16820 }, { "epoch": 0.3927654609101517, "grad_norm": 1.5237342637464308, "learning_rate": 7.608888756776129e-06, "loss": 0.4657, "step": 16830 }, { "epoch": 0.3929988331388565, "grad_norm": 1.580654380138482, "learning_rate": 7.6054131936712075e-06, "loss": 0.4969, "step": 16840 }, { "epoch": 0.39323220536756126, "grad_norm": 1.477771905187571, "learning_rate": 7.601935901587526e-06, "loss": 0.4915, "step": 16850 }, { "epoch": 0.39346557759626605, "grad_norm": 2.2571873200908805, "learning_rate": 7.5984568828326514e-06, "loss": 0.518, "step": 16860 }, { "epoch": 0.39369894982497083, "grad_norm": 1.6497533069854557, "learning_rate": 7.594976139715296e-06, "loss": 0.5008, "step": 16870 }, { "epoch": 0.3939323220536756, "grad_norm": 1.867682081562202, "learning_rate": 7.591493674545316e-06, "loss": 0.4864, "step": 16880 }, { "epoch": 0.3941656942823804, "grad_norm": 1.3671379948024978, "learning_rate": 7.588009489633712e-06, "loss": 0.4896, "step": 16890 }, { "epoch": 0.3943990665110852, "grad_norm": 1.6195187980845052, "learning_rate": 7.584523587292623e-06, "loss": 0.5099, "step": 16900 }, { "epoch": 0.39463243873979, "grad_norm": 1.744863114188113, "learning_rate": 7.58103596983533e-06, "loss": 0.4835, "step": 16910 }, { "epoch": 0.39486581096849477, "grad_norm": 1.5112599699851006, "learning_rate": 7.577546639576252e-06, "loss": 0.5101, "step": 16920 }, { "epoch": 0.39509918319719955, "grad_norm": 1.5590303177683353, "learning_rate": 7.574055598830945e-06, "loss": 0.4793, "step": 16930 }, { "epoch": 0.39533255542590434, "grad_norm": 1.4391963627000444, "learning_rate": 7.570562849916097e-06, "loss": 0.4943, "step": 16940 }, { "epoch": 0.39556592765460913, "grad_norm": 1.46087919890776, "learning_rate": 7.567068395149532e-06, "loss": 0.4994, "step": 16950 }, { "epoch": 0.39579929988331386, "grad_norm": 1.2561808001382315, "learning_rate": 7.563572236850207e-06, "loss": 0.492, "step": 16960 }, { "epoch": 0.39603267211201865, "grad_norm": 1.4663122679909657, "learning_rate": 7.560074377338209e-06, "loss": 0.4688, "step": 16970 }, { "epoch": 0.39626604434072343, "grad_norm": 1.7494199176742289, "learning_rate": 7.556574818934751e-06, "loss": 0.4927, "step": 16980 }, { "epoch": 0.3964994165694282, "grad_norm": 1.6849617788047027, "learning_rate": 7.553073563962179e-06, "loss": 0.4562, "step": 16990 }, { "epoch": 0.396732788798133, "grad_norm": 1.5087229591387818, "learning_rate": 7.549570614743956e-06, "loss": 0.4969, "step": 17000 }, { "epoch": 0.3969661610268378, "grad_norm": 1.3389218832091658, "learning_rate": 7.54606597360468e-06, "loss": 0.4583, "step": 17010 }, { "epoch": 0.3971995332555426, "grad_norm": 1.7671560656329068, "learning_rate": 7.542559642870064e-06, "loss": 0.4669, "step": 17020 }, { "epoch": 0.39743290548424737, "grad_norm": 1.5013928836379187, "learning_rate": 7.539051624866947e-06, "loss": 0.5037, "step": 17030 }, { "epoch": 0.39766627771295215, "grad_norm": 1.6374998720218312, "learning_rate": 7.535541921923282e-06, "loss": 0.4962, "step": 17040 }, { "epoch": 0.39789964994165694, "grad_norm": 1.5763077368130862, "learning_rate": 7.532030536368146e-06, "loss": 0.4998, "step": 17050 }, { "epoch": 0.39813302217036173, "grad_norm": 1.8977827570269026, "learning_rate": 7.528517470531731e-06, "loss": 0.4745, "step": 17060 }, { "epoch": 0.3983663943990665, "grad_norm": 1.422418612965534, "learning_rate": 7.5250027267453414e-06, "loss": 0.5185, "step": 17070 }, { "epoch": 0.3985997666277713, "grad_norm": 1.6414483229375743, "learning_rate": 7.521486307341398e-06, "loss": 0.4829, "step": 17080 }, { "epoch": 0.3988331388564761, "grad_norm": 1.8803431069477603, "learning_rate": 7.5179682146534325e-06, "loss": 0.5094, "step": 17090 }, { "epoch": 0.3990665110851809, "grad_norm": 1.5646728893070891, "learning_rate": 7.514448451016088e-06, "loss": 0.4622, "step": 17100 }, { "epoch": 0.39929988331388566, "grad_norm": 1.2137614312069296, "learning_rate": 7.5109270187651155e-06, "loss": 0.4845, "step": 17110 }, { "epoch": 0.39953325554259045, "grad_norm": 1.5267375759613269, "learning_rate": 7.507403920237369e-06, "loss": 0.4802, "step": 17120 }, { "epoch": 0.39976662777129524, "grad_norm": 1.5544435303759194, "learning_rate": 7.503879157770819e-06, "loss": 0.5021, "step": 17130 }, { "epoch": 0.4, "grad_norm": 1.3971481677449336, "learning_rate": 7.500352733704529e-06, "loss": 0.4956, "step": 17140 }, { "epoch": 0.4002333722287048, "grad_norm": 1.517483719487613, "learning_rate": 7.496824650378671e-06, "loss": 0.5164, "step": 17150 }, { "epoch": 0.4004667444574096, "grad_norm": 1.6621838331044225, "learning_rate": 7.49329491013452e-06, "loss": 0.48, "step": 17160 }, { "epoch": 0.4007001166861143, "grad_norm": 2.2838725541849927, "learning_rate": 7.4897635153144455e-06, "loss": 0.5072, "step": 17170 }, { "epoch": 0.4009334889148191, "grad_norm": 1.4576404770992608, "learning_rate": 7.486230468261915e-06, "loss": 0.4679, "step": 17180 }, { "epoch": 0.4011668611435239, "grad_norm": 1.5902707433118384, "learning_rate": 7.482695771321498e-06, "loss": 0.4807, "step": 17190 }, { "epoch": 0.4014002333722287, "grad_norm": 1.473968668686822, "learning_rate": 7.4791594268388535e-06, "loss": 0.4965, "step": 17200 }, { "epoch": 0.4016336056009335, "grad_norm": 1.3681621605207337, "learning_rate": 7.475621437160737e-06, "loss": 0.4709, "step": 17210 }, { "epoch": 0.40186697782963826, "grad_norm": 1.3222980360312429, "learning_rate": 7.472081804634993e-06, "loss": 0.4836, "step": 17220 }, { "epoch": 0.40210035005834305, "grad_norm": 1.3830232609833966, "learning_rate": 7.468540531610559e-06, "loss": 0.4691, "step": 17230 }, { "epoch": 0.40233372228704783, "grad_norm": 1.5524738348578508, "learning_rate": 7.464997620437459e-06, "loss": 0.5015, "step": 17240 }, { "epoch": 0.4025670945157526, "grad_norm": 1.3288865041739466, "learning_rate": 7.461453073466807e-06, "loss": 0.5066, "step": 17250 }, { "epoch": 0.4028004667444574, "grad_norm": 1.644484512226082, "learning_rate": 7.457906893050797e-06, "loss": 0.4738, "step": 17260 }, { "epoch": 0.4030338389731622, "grad_norm": 1.3883489638096185, "learning_rate": 7.454359081542714e-06, "loss": 0.4998, "step": 17270 }, { "epoch": 0.403267211201867, "grad_norm": 2.803643839013734, "learning_rate": 7.4508096412969185e-06, "loss": 0.4665, "step": 17280 }, { "epoch": 0.40350058343057177, "grad_norm": 1.6430603651373334, "learning_rate": 7.447258574668858e-06, "loss": 0.4624, "step": 17290 }, { "epoch": 0.40373395565927656, "grad_norm": 1.6035007960682268, "learning_rate": 7.443705884015057e-06, "loss": 0.4781, "step": 17300 }, { "epoch": 0.40396732788798134, "grad_norm": 1.6790614435190603, "learning_rate": 7.440151571693115e-06, "loss": 0.4687, "step": 17310 }, { "epoch": 0.40420070011668613, "grad_norm": 1.6867574678511366, "learning_rate": 7.436595640061709e-06, "loss": 0.5087, "step": 17320 }, { "epoch": 0.4044340723453909, "grad_norm": 1.4586845005619706, "learning_rate": 7.433038091480595e-06, "loss": 0.489, "step": 17330 }, { "epoch": 0.4046674445740957, "grad_norm": 1.3515967472739414, "learning_rate": 7.429478928310595e-06, "loss": 0.5187, "step": 17340 }, { "epoch": 0.4049008168028005, "grad_norm": 1.4933476853100116, "learning_rate": 7.425918152913609e-06, "loss": 0.4634, "step": 17350 }, { "epoch": 0.4051341890315053, "grad_norm": 1.471597682280155, "learning_rate": 7.422355767652603e-06, "loss": 0.47, "step": 17360 }, { "epoch": 0.40536756126021, "grad_norm": 1.7787103878925952, "learning_rate": 7.41879177489161e-06, "loss": 0.5313, "step": 17370 }, { "epoch": 0.4056009334889148, "grad_norm": 1.9232129314699242, "learning_rate": 7.4152261769957345e-06, "loss": 0.4841, "step": 17380 }, { "epoch": 0.4058343057176196, "grad_norm": 1.4710331569959392, "learning_rate": 7.4116589763311416e-06, "loss": 0.4859, "step": 17390 }, { "epoch": 0.40606767794632437, "grad_norm": 1.3055713810470049, "learning_rate": 7.4080901752650645e-06, "loss": 0.4807, "step": 17400 }, { "epoch": 0.40630105017502915, "grad_norm": 1.683500254587287, "learning_rate": 7.404519776165795e-06, "loss": 0.4773, "step": 17410 }, { "epoch": 0.40653442240373394, "grad_norm": 1.540579414336699, "learning_rate": 7.4009477814026855e-06, "loss": 0.5136, "step": 17420 }, { "epoch": 0.40676779463243873, "grad_norm": 1.5078537449800928, "learning_rate": 7.397374193346149e-06, "loss": 0.4981, "step": 17430 }, { "epoch": 0.4070011668611435, "grad_norm": 1.421598373776181, "learning_rate": 7.393799014367653e-06, "loss": 0.5112, "step": 17440 }, { "epoch": 0.4072345390898483, "grad_norm": 1.599448103884301, "learning_rate": 7.390222246839727e-06, "loss": 0.5112, "step": 17450 }, { "epoch": 0.4074679113185531, "grad_norm": 1.5891140247171818, "learning_rate": 7.386643893135945e-06, "loss": 0.4623, "step": 17460 }, { "epoch": 0.4077012835472579, "grad_norm": 1.6576785459237113, "learning_rate": 7.3830639556309424e-06, "loss": 0.4713, "step": 17470 }, { "epoch": 0.40793465577596266, "grad_norm": 1.484404250988383, "learning_rate": 7.3794824367004036e-06, "loss": 0.4949, "step": 17480 }, { "epoch": 0.40816802800466745, "grad_norm": 1.980232980939638, "learning_rate": 7.375899338721058e-06, "loss": 0.4703, "step": 17490 }, { "epoch": 0.40840140023337224, "grad_norm": 1.5359641993428708, "learning_rate": 7.372314664070687e-06, "loss": 0.5196, "step": 17500 }, { "epoch": 0.408634772462077, "grad_norm": 1.661658732839983, "learning_rate": 7.368728415128116e-06, "loss": 0.4632, "step": 17510 }, { "epoch": 0.4088681446907818, "grad_norm": 1.439937347342062, "learning_rate": 7.365140594273219e-06, "loss": 0.4745, "step": 17520 }, { "epoch": 0.4091015169194866, "grad_norm": 1.4744297651168037, "learning_rate": 7.361551203886907e-06, "loss": 0.4802, "step": 17530 }, { "epoch": 0.4093348891481914, "grad_norm": 1.5977964012053372, "learning_rate": 7.35796024635114e-06, "loss": 0.5156, "step": 17540 }, { "epoch": 0.40956826137689617, "grad_norm": 1.6757882080315998, "learning_rate": 7.3543677240489116e-06, "loss": 0.4835, "step": 17550 }, { "epoch": 0.40980163360560096, "grad_norm": 1.5887664042543719, "learning_rate": 7.350773639364258e-06, "loss": 0.5003, "step": 17560 }, { "epoch": 0.41003500583430574, "grad_norm": 1.3945167289919698, "learning_rate": 7.347177994682249e-06, "loss": 0.4562, "step": 17570 }, { "epoch": 0.4102683780630105, "grad_norm": 2.08926817949336, "learning_rate": 7.34358079238899e-06, "loss": 0.4975, "step": 17580 }, { "epoch": 0.41050175029171526, "grad_norm": 1.2858274956166968, "learning_rate": 7.339982034871623e-06, "loss": 0.4756, "step": 17590 }, { "epoch": 0.41073512252042005, "grad_norm": 1.6545938292554885, "learning_rate": 7.33638172451832e-06, "loss": 0.5034, "step": 17600 }, { "epoch": 0.41096849474912484, "grad_norm": 1.5962314868501626, "learning_rate": 7.332779863718282e-06, "loss": 0.4646, "step": 17610 }, { "epoch": 0.4112018669778296, "grad_norm": 1.478932704517337, "learning_rate": 7.329176454861744e-06, "loss": 0.4907, "step": 17620 }, { "epoch": 0.4114352392065344, "grad_norm": 1.7128500182282798, "learning_rate": 7.325571500339962e-06, "loss": 0.5018, "step": 17630 }, { "epoch": 0.4116686114352392, "grad_norm": 1.6813774736400022, "learning_rate": 7.32196500254522e-06, "loss": 0.4814, "step": 17640 }, { "epoch": 0.411901983663944, "grad_norm": 1.6906413067799089, "learning_rate": 7.3183569638708294e-06, "loss": 0.4804, "step": 17650 }, { "epoch": 0.41213535589264877, "grad_norm": 1.722002705934009, "learning_rate": 7.31474738671112e-06, "loss": 0.4778, "step": 17660 }, { "epoch": 0.41236872812135356, "grad_norm": 2.0966296941817593, "learning_rate": 7.311136273461443e-06, "loss": 0.4784, "step": 17670 }, { "epoch": 0.41260210035005834, "grad_norm": 1.8174827880883184, "learning_rate": 7.307523626518172e-06, "loss": 0.5003, "step": 17680 }, { "epoch": 0.41283547257876313, "grad_norm": 1.3828118718576607, "learning_rate": 7.303909448278695e-06, "loss": 0.4586, "step": 17690 }, { "epoch": 0.4130688448074679, "grad_norm": 1.7541707931605524, "learning_rate": 7.300293741141418e-06, "loss": 0.4845, "step": 17700 }, { "epoch": 0.4133022170361727, "grad_norm": 1.747551127558791, "learning_rate": 7.296676507505761e-06, "loss": 0.4917, "step": 17710 }, { "epoch": 0.4135355892648775, "grad_norm": 1.7564536329843483, "learning_rate": 7.2930577497721566e-06, "loss": 0.4965, "step": 17720 }, { "epoch": 0.4137689614935823, "grad_norm": 1.2868605865467933, "learning_rate": 7.28943747034205e-06, "loss": 0.4575, "step": 17730 }, { "epoch": 0.41400233372228706, "grad_norm": 1.5740089050030845, "learning_rate": 7.285815671617897e-06, "loss": 0.4923, "step": 17740 }, { "epoch": 0.41423570595099185, "grad_norm": 1.4856756959746156, "learning_rate": 7.282192356003157e-06, "loss": 0.4939, "step": 17750 }, { "epoch": 0.41446907817969664, "grad_norm": 2.2473643698756773, "learning_rate": 7.2785675259023e-06, "loss": 0.4789, "step": 17760 }, { "epoch": 0.4147024504084014, "grad_norm": 1.5932584599516109, "learning_rate": 7.274941183720803e-06, "loss": 0.5003, "step": 17770 }, { "epoch": 0.4149358226371062, "grad_norm": 1.4997947198736223, "learning_rate": 7.2713133318651395e-06, "loss": 0.4752, "step": 17780 }, { "epoch": 0.41516919486581094, "grad_norm": 2.2376598306251765, "learning_rate": 7.267683972742793e-06, "loss": 0.4708, "step": 17790 }, { "epoch": 0.41540256709451573, "grad_norm": 1.6104890890524717, "learning_rate": 7.264053108762241e-06, "loss": 0.4848, "step": 17800 }, { "epoch": 0.4156359393232205, "grad_norm": 1.559955419841314, "learning_rate": 7.260420742332961e-06, "loss": 0.4827, "step": 17810 }, { "epoch": 0.4158693115519253, "grad_norm": 1.8978136615559777, "learning_rate": 7.256786875865429e-06, "loss": 0.4625, "step": 17820 }, { "epoch": 0.4161026837806301, "grad_norm": 1.589205535517727, "learning_rate": 7.253151511771118e-06, "loss": 0.492, "step": 17830 }, { "epoch": 0.4163360560093349, "grad_norm": 1.7101223161532222, "learning_rate": 7.249514652462492e-06, "loss": 0.4696, "step": 17840 }, { "epoch": 0.41656942823803966, "grad_norm": 1.724154907755921, "learning_rate": 7.245876300353006e-06, "loss": 0.5025, "step": 17850 }, { "epoch": 0.41680280046674445, "grad_norm": 1.3382641392020362, "learning_rate": 7.242236457857108e-06, "loss": 0.4859, "step": 17860 }, { "epoch": 0.41703617269544924, "grad_norm": 1.5229802259611176, "learning_rate": 7.238595127390234e-06, "loss": 0.5009, "step": 17870 }, { "epoch": 0.417269544924154, "grad_norm": 1.6125324656620956, "learning_rate": 7.234952311368811e-06, "loss": 0.4779, "step": 17880 }, { "epoch": 0.4175029171528588, "grad_norm": 1.5685048924901435, "learning_rate": 7.231308012210247e-06, "loss": 0.4827, "step": 17890 }, { "epoch": 0.4177362893815636, "grad_norm": 1.6066573587895923, "learning_rate": 7.227662232332935e-06, "loss": 0.4571, "step": 17900 }, { "epoch": 0.4179696616102684, "grad_norm": 1.761765278530422, "learning_rate": 7.224014974156254e-06, "loss": 0.4965, "step": 17910 }, { "epoch": 0.41820303383897317, "grad_norm": 1.4123876941296072, "learning_rate": 7.2203662401005605e-06, "loss": 0.5108, "step": 17920 }, { "epoch": 0.41843640606767796, "grad_norm": 2.678990524816103, "learning_rate": 7.21671603258719e-06, "loss": 0.5034, "step": 17930 }, { "epoch": 0.41866977829638274, "grad_norm": 1.5512473966879028, "learning_rate": 7.213064354038459e-06, "loss": 0.5056, "step": 17940 }, { "epoch": 0.41890315052508753, "grad_norm": 1.434280962633422, "learning_rate": 7.209411206877661e-06, "loss": 0.4793, "step": 17950 }, { "epoch": 0.4191365227537923, "grad_norm": 1.7318044668664423, "learning_rate": 7.205756593529058e-06, "loss": 0.4878, "step": 17960 }, { "epoch": 0.4193698949824971, "grad_norm": 2.812520719500957, "learning_rate": 7.2021005164178894e-06, "loss": 0.4755, "step": 17970 }, { "epoch": 0.4196032672112019, "grad_norm": 1.6910357320007858, "learning_rate": 7.198442977970368e-06, "loss": 0.4923, "step": 17980 }, { "epoch": 0.4198366394399067, "grad_norm": 1.6684124373266718, "learning_rate": 7.194783980613669e-06, "loss": 0.4843, "step": 17990 }, { "epoch": 0.4200700116686114, "grad_norm": 1.5629681534952804, "learning_rate": 7.191123526775944e-06, "loss": 0.4656, "step": 18000 }, { "epoch": 0.4203033838973162, "grad_norm": 1.5468136806608597, "learning_rate": 7.187461618886305e-06, "loss": 0.4615, "step": 18010 }, { "epoch": 0.420536756126021, "grad_norm": 1.315363179105194, "learning_rate": 7.183798259374835e-06, "loss": 0.4792, "step": 18020 }, { "epoch": 0.42077012835472577, "grad_norm": 1.478832421546285, "learning_rate": 7.180133450672574e-06, "loss": 0.4728, "step": 18030 }, { "epoch": 0.42100350058343056, "grad_norm": 1.4717624137415302, "learning_rate": 7.1764671952115265e-06, "loss": 0.5017, "step": 18040 }, { "epoch": 0.42123687281213534, "grad_norm": 1.574093114142092, "learning_rate": 7.172799495424658e-06, "loss": 0.4884, "step": 18050 }, { "epoch": 0.42147024504084013, "grad_norm": 1.5423518760916068, "learning_rate": 7.1691303537458904e-06, "loss": 0.4505, "step": 18060 }, { "epoch": 0.4217036172695449, "grad_norm": 1.4110982119784878, "learning_rate": 7.165459772610104e-06, "loss": 0.445, "step": 18070 }, { "epoch": 0.4219369894982497, "grad_norm": 1.7005649677363848, "learning_rate": 7.161787754453133e-06, "loss": 0.485, "step": 18080 }, { "epoch": 0.4221703617269545, "grad_norm": 1.5937829662353282, "learning_rate": 7.158114301711768e-06, "loss": 0.489, "step": 18090 }, { "epoch": 0.4224037339556593, "grad_norm": 1.3698511952393069, "learning_rate": 7.154439416823748e-06, "loss": 0.4435, "step": 18100 }, { "epoch": 0.42263710618436406, "grad_norm": 1.6118724910845648, "learning_rate": 7.150763102227764e-06, "loss": 0.4988, "step": 18110 }, { "epoch": 0.42287047841306885, "grad_norm": 1.6620523689855127, "learning_rate": 7.1470853603634584e-06, "loss": 0.4826, "step": 18120 }, { "epoch": 0.42310385064177364, "grad_norm": 1.4494473799117995, "learning_rate": 7.143406193671416e-06, "loss": 0.4725, "step": 18130 }, { "epoch": 0.4233372228704784, "grad_norm": 1.580635690501036, "learning_rate": 7.139725604593169e-06, "loss": 0.4755, "step": 18140 }, { "epoch": 0.4235705950991832, "grad_norm": 1.4601811258027428, "learning_rate": 7.136043595571195e-06, "loss": 0.4862, "step": 18150 }, { "epoch": 0.423803967327888, "grad_norm": 1.5784579040579603, "learning_rate": 7.132360169048913e-06, "loss": 0.4512, "step": 18160 }, { "epoch": 0.4240373395565928, "grad_norm": 1.445466379893328, "learning_rate": 7.12867532747068e-06, "loss": 0.4812, "step": 18170 }, { "epoch": 0.42427071178529757, "grad_norm": 1.662311874616365, "learning_rate": 7.124989073281796e-06, "loss": 0.5138, "step": 18180 }, { "epoch": 0.42450408401400236, "grad_norm": 1.5879645719747657, "learning_rate": 7.1213014089285e-06, "loss": 0.503, "step": 18190 }, { "epoch": 0.42473745624270715, "grad_norm": 1.5350657999110433, "learning_rate": 7.117612336857957e-06, "loss": 0.5028, "step": 18200 }, { "epoch": 0.4249708284714119, "grad_norm": 1.563704284027993, "learning_rate": 7.113921859518278e-06, "loss": 0.4775, "step": 18210 }, { "epoch": 0.42520420070011666, "grad_norm": 1.7061482162583441, "learning_rate": 7.1102299793585e-06, "loss": 0.494, "step": 18220 }, { "epoch": 0.42543757292882145, "grad_norm": 1.9751441921949036, "learning_rate": 7.106536698828593e-06, "loss": 0.4871, "step": 18230 }, { "epoch": 0.42567094515752624, "grad_norm": 1.554122235684902, "learning_rate": 7.102842020379454e-06, "loss": 0.4914, "step": 18240 }, { "epoch": 0.425904317386231, "grad_norm": 1.5318849820338212, "learning_rate": 7.099145946462909e-06, "loss": 0.4941, "step": 18250 }, { "epoch": 0.4261376896149358, "grad_norm": 1.5317209276582031, "learning_rate": 7.095448479531712e-06, "loss": 0.5018, "step": 18260 }, { "epoch": 0.4263710618436406, "grad_norm": 1.4690087792892053, "learning_rate": 7.091749622039541e-06, "loss": 0.4613, "step": 18270 }, { "epoch": 0.4266044340723454, "grad_norm": 1.736036602918441, "learning_rate": 7.088049376440992e-06, "loss": 0.4934, "step": 18280 }, { "epoch": 0.42683780630105017, "grad_norm": 1.497167809670741, "learning_rate": 7.084347745191588e-06, "loss": 0.4849, "step": 18290 }, { "epoch": 0.42707117852975496, "grad_norm": 1.6536938752926136, "learning_rate": 7.080644730747769e-06, "loss": 0.4886, "step": 18300 }, { "epoch": 0.42730455075845974, "grad_norm": 1.6692026161154057, "learning_rate": 7.076940335566891e-06, "loss": 0.4743, "step": 18310 }, { "epoch": 0.42753792298716453, "grad_norm": 1.5953176581476929, "learning_rate": 7.073234562107232e-06, "loss": 0.4917, "step": 18320 }, { "epoch": 0.4277712952158693, "grad_norm": 1.5801716699897257, "learning_rate": 7.069527412827978e-06, "loss": 0.492, "step": 18330 }, { "epoch": 0.4280046674445741, "grad_norm": 1.6214200498607125, "learning_rate": 7.065818890189233e-06, "loss": 0.5034, "step": 18340 }, { "epoch": 0.4282380396732789, "grad_norm": 1.9232821808212364, "learning_rate": 7.062108996652009e-06, "loss": 0.5007, "step": 18350 }, { "epoch": 0.4284714119019837, "grad_norm": 1.6232800265720204, "learning_rate": 7.058397734678231e-06, "loss": 0.4921, "step": 18360 }, { "epoch": 0.42870478413068847, "grad_norm": 1.5749796265510994, "learning_rate": 7.05468510673073e-06, "loss": 0.4976, "step": 18370 }, { "epoch": 0.42893815635939325, "grad_norm": 1.3109548405809788, "learning_rate": 7.050971115273242e-06, "loss": 0.4793, "step": 18380 }, { "epoch": 0.42917152858809804, "grad_norm": 1.4735672271700755, "learning_rate": 7.04725576277041e-06, "loss": 0.4963, "step": 18390 }, { "epoch": 0.4294049008168028, "grad_norm": 1.6101249494021193, "learning_rate": 7.04353905168778e-06, "loss": 0.4737, "step": 18400 }, { "epoch": 0.42963827304550756, "grad_norm": 1.8006719112780074, "learning_rate": 7.0398209844918015e-06, "loss": 0.476, "step": 18410 }, { "epoch": 0.42987164527421234, "grad_norm": 1.5430400376925073, "learning_rate": 7.036101563649818e-06, "loss": 0.4612, "step": 18420 }, { "epoch": 0.43010501750291713, "grad_norm": 1.5159482722134026, "learning_rate": 7.03238079163008e-06, "loss": 0.4859, "step": 18430 }, { "epoch": 0.4303383897316219, "grad_norm": 1.6521444423331981, "learning_rate": 7.0286586709017245e-06, "loss": 0.5128, "step": 18440 }, { "epoch": 0.4305717619603267, "grad_norm": 1.4874595208558778, "learning_rate": 7.0249352039347925e-06, "loss": 0.5, "step": 18450 }, { "epoch": 0.4308051341890315, "grad_norm": 1.777558165953417, "learning_rate": 7.021210393200213e-06, "loss": 0.4731, "step": 18460 }, { "epoch": 0.4310385064177363, "grad_norm": 1.294668796842625, "learning_rate": 7.017484241169807e-06, "loss": 0.4594, "step": 18470 }, { "epoch": 0.43127187864644106, "grad_norm": 1.6322668455076266, "learning_rate": 7.013756750316292e-06, "loss": 0.4584, "step": 18480 }, { "epoch": 0.43150525087514585, "grad_norm": 1.6168196298074438, "learning_rate": 7.010027923113263e-06, "loss": 0.4779, "step": 18490 }, { "epoch": 0.43173862310385064, "grad_norm": 1.9691877580021717, "learning_rate": 7.00629776203521e-06, "loss": 0.4801, "step": 18500 }, { "epoch": 0.4319719953325554, "grad_norm": 1.660458147601826, "learning_rate": 7.0025662695575095e-06, "loss": 0.5096, "step": 18510 }, { "epoch": 0.4322053675612602, "grad_norm": 1.8331118320455397, "learning_rate": 6.998833448156411e-06, "loss": 0.5082, "step": 18520 }, { "epoch": 0.432438739789965, "grad_norm": 1.5340644042147977, "learning_rate": 6.995099300309058e-06, "loss": 0.4782, "step": 18530 }, { "epoch": 0.4326721120186698, "grad_norm": 1.7728171034721483, "learning_rate": 6.991363828493467e-06, "loss": 0.5065, "step": 18540 }, { "epoch": 0.4329054842473746, "grad_norm": 1.6072963851849937, "learning_rate": 6.987627035188537e-06, "loss": 0.4805, "step": 18550 }, { "epoch": 0.43313885647607936, "grad_norm": 1.524258111488441, "learning_rate": 6.983888922874041e-06, "loss": 0.4993, "step": 18560 }, { "epoch": 0.43337222870478415, "grad_norm": 2.4975293065300885, "learning_rate": 6.980149494030628e-06, "loss": 0.4743, "step": 18570 }, { "epoch": 0.43360560093348893, "grad_norm": 1.7753565823284319, "learning_rate": 6.976408751139823e-06, "loss": 0.5162, "step": 18580 }, { "epoch": 0.4338389731621937, "grad_norm": 1.8187357839491907, "learning_rate": 6.972666696684016e-06, "loss": 0.4842, "step": 18590 }, { "epoch": 0.4340723453908985, "grad_norm": 1.3979443102739297, "learning_rate": 6.968923333146479e-06, "loss": 0.4546, "step": 18600 }, { "epoch": 0.4343057176196033, "grad_norm": 1.59224172457196, "learning_rate": 6.965178663011343e-06, "loss": 0.5114, "step": 18610 }, { "epoch": 0.434539089848308, "grad_norm": 1.6277791854877945, "learning_rate": 6.9614326887636095e-06, "loss": 0.4616, "step": 18620 }, { "epoch": 0.4347724620770128, "grad_norm": 1.38569142829571, "learning_rate": 6.957685412889146e-06, "loss": 0.4923, "step": 18630 }, { "epoch": 0.4350058343057176, "grad_norm": 2.381088943117639, "learning_rate": 6.953936837874679e-06, "loss": 0.4918, "step": 18640 }, { "epoch": 0.4352392065344224, "grad_norm": 1.716239990817753, "learning_rate": 6.950186966207808e-06, "loss": 0.4756, "step": 18650 }, { "epoch": 0.43547257876312717, "grad_norm": 1.6575381812938086, "learning_rate": 6.946435800376982e-06, "loss": 0.4651, "step": 18660 }, { "epoch": 0.43570595099183196, "grad_norm": 1.7230385427205615, "learning_rate": 6.942683342871513e-06, "loss": 0.4933, "step": 18670 }, { "epoch": 0.43593932322053675, "grad_norm": 1.6855001093012207, "learning_rate": 6.938929596181569e-06, "loss": 0.4989, "step": 18680 }, { "epoch": 0.43617269544924153, "grad_norm": 1.507932275069792, "learning_rate": 6.935174562798177e-06, "loss": 0.4495, "step": 18690 }, { "epoch": 0.4364060676779463, "grad_norm": 1.5325691176593583, "learning_rate": 6.931418245213215e-06, "loss": 0.4731, "step": 18700 }, { "epoch": 0.4366394399066511, "grad_norm": 1.4712393603177036, "learning_rate": 6.927660645919412e-06, "loss": 0.4896, "step": 18710 }, { "epoch": 0.4368728121353559, "grad_norm": 1.5444874933332715, "learning_rate": 6.92390176741035e-06, "loss": 0.4671, "step": 18720 }, { "epoch": 0.4371061843640607, "grad_norm": 1.7369543149313575, "learning_rate": 6.920141612180462e-06, "loss": 0.4785, "step": 18730 }, { "epoch": 0.43733955659276547, "grad_norm": 1.775531860282773, "learning_rate": 6.91638018272502e-06, "loss": 0.4899, "step": 18740 }, { "epoch": 0.43757292882147025, "grad_norm": 1.6502352627930723, "learning_rate": 6.912617481540151e-06, "loss": 0.4819, "step": 18750 }, { "epoch": 0.43780630105017504, "grad_norm": 1.5341842497421845, "learning_rate": 6.90885351112282e-06, "loss": 0.502, "step": 18760 }, { "epoch": 0.4380396732788798, "grad_norm": 1.5706637475425944, "learning_rate": 6.905088273970837e-06, "loss": 0.4622, "step": 18770 }, { "epoch": 0.4382730455075846, "grad_norm": 1.6412391504189177, "learning_rate": 6.901321772582852e-06, "loss": 0.4884, "step": 18780 }, { "epoch": 0.4385064177362894, "grad_norm": 1.937802601414265, "learning_rate": 6.897554009458353e-06, "loss": 0.5022, "step": 18790 }, { "epoch": 0.4387397899649942, "grad_norm": 1.507374548491162, "learning_rate": 6.893784987097667e-06, "loss": 0.4869, "step": 18800 }, { "epoch": 0.438973162193699, "grad_norm": 1.6872424361115335, "learning_rate": 6.890014708001953e-06, "loss": 0.4771, "step": 18810 }, { "epoch": 0.43920653442240376, "grad_norm": 1.9048276971946339, "learning_rate": 6.8862431746732094e-06, "loss": 0.5096, "step": 18820 }, { "epoch": 0.4394399066511085, "grad_norm": 1.4003037473131608, "learning_rate": 6.882470389614262e-06, "loss": 0.4629, "step": 18830 }, { "epoch": 0.4396732788798133, "grad_norm": 2.2111751389703844, "learning_rate": 6.878696355328774e-06, "loss": 0.4783, "step": 18840 }, { "epoch": 0.43990665110851807, "grad_norm": 1.4967755019270976, "learning_rate": 6.874921074321227e-06, "loss": 0.4951, "step": 18850 }, { "epoch": 0.44014002333722285, "grad_norm": 1.6469072862217886, "learning_rate": 6.8711445490969386e-06, "loss": 0.4963, "step": 18860 }, { "epoch": 0.44037339556592764, "grad_norm": 1.9743974862913916, "learning_rate": 6.8673667821620515e-06, "loss": 0.5048, "step": 18870 }, { "epoch": 0.4406067677946324, "grad_norm": 1.3847924788746049, "learning_rate": 6.863587776023527e-06, "loss": 0.471, "step": 18880 }, { "epoch": 0.4408401400233372, "grad_norm": 1.5156637404821591, "learning_rate": 6.8598075331891535e-06, "loss": 0.4848, "step": 18890 }, { "epoch": 0.441073512252042, "grad_norm": 1.5850028085512065, "learning_rate": 6.8560260561675395e-06, "loss": 0.4713, "step": 18900 }, { "epoch": 0.4413068844807468, "grad_norm": 1.643223561138095, "learning_rate": 6.85224334746811e-06, "loss": 0.4951, "step": 18910 }, { "epoch": 0.4415402567094516, "grad_norm": 1.4747189135143046, "learning_rate": 6.848459409601111e-06, "loss": 0.4981, "step": 18920 }, { "epoch": 0.44177362893815636, "grad_norm": 1.677104415785764, "learning_rate": 6.8446742450776004e-06, "loss": 0.4948, "step": 18930 }, { "epoch": 0.44200700116686115, "grad_norm": 2.034467784604965, "learning_rate": 6.840887856409452e-06, "loss": 0.4893, "step": 18940 }, { "epoch": 0.44224037339556593, "grad_norm": 1.6184682180575725, "learning_rate": 6.837100246109355e-06, "loss": 0.4603, "step": 18950 }, { "epoch": 0.4424737456242707, "grad_norm": 1.8271864881670126, "learning_rate": 6.833311416690802e-06, "loss": 0.4719, "step": 18960 }, { "epoch": 0.4427071178529755, "grad_norm": 1.6383268243098432, "learning_rate": 6.829521370668103e-06, "loss": 0.4753, "step": 18970 }, { "epoch": 0.4429404900816803, "grad_norm": 1.8090469179635449, "learning_rate": 6.8257301105563675e-06, "loss": 0.4807, "step": 18980 }, { "epoch": 0.4431738623103851, "grad_norm": 1.9496784212273384, "learning_rate": 6.821937638871517e-06, "loss": 0.4809, "step": 18990 }, { "epoch": 0.44340723453908987, "grad_norm": 1.2626284289393375, "learning_rate": 6.818143958130271e-06, "loss": 0.4564, "step": 19000 }, { "epoch": 0.44364060676779465, "grad_norm": 1.545677358266627, "learning_rate": 6.814349070850159e-06, "loss": 0.4815, "step": 19010 }, { "epoch": 0.44387397899649944, "grad_norm": 1.7886437843796863, "learning_rate": 6.810552979549503e-06, "loss": 0.4875, "step": 19020 }, { "epoch": 0.4441073512252042, "grad_norm": 3.5578464271783226, "learning_rate": 6.8067556867474305e-06, "loss": 0.4511, "step": 19030 }, { "epoch": 0.44434072345390896, "grad_norm": 1.6342947546063116, "learning_rate": 6.802957194963863e-06, "loss": 0.4977, "step": 19040 }, { "epoch": 0.44457409568261375, "grad_norm": 1.5792747327969596, "learning_rate": 6.79915750671952e-06, "loss": 0.5023, "step": 19050 }, { "epoch": 0.44480746791131853, "grad_norm": 1.622711516210288, "learning_rate": 6.795356624535908e-06, "loss": 0.4833, "step": 19060 }, { "epoch": 0.4450408401400233, "grad_norm": 2.0472344721310805, "learning_rate": 6.791554550935338e-06, "loss": 0.4858, "step": 19070 }, { "epoch": 0.4452742123687281, "grad_norm": 1.3580997357222722, "learning_rate": 6.7877512884409e-06, "loss": 0.4703, "step": 19080 }, { "epoch": 0.4455075845974329, "grad_norm": 1.667189448152024, "learning_rate": 6.783946839576479e-06, "loss": 0.4844, "step": 19090 }, { "epoch": 0.4457409568261377, "grad_norm": 1.6072929821748043, "learning_rate": 6.780141206866748e-06, "loss": 0.5028, "step": 19100 }, { "epoch": 0.44597432905484247, "grad_norm": 1.7609300740737346, "learning_rate": 6.776334392837161e-06, "loss": 0.5109, "step": 19110 }, { "epoch": 0.44620770128354725, "grad_norm": 1.774839359295716, "learning_rate": 6.772526400013961e-06, "loss": 0.4869, "step": 19120 }, { "epoch": 0.44644107351225204, "grad_norm": 1.445469645727685, "learning_rate": 6.768717230924169e-06, "loss": 0.4935, "step": 19130 }, { "epoch": 0.4466744457409568, "grad_norm": 1.6315890121513053, "learning_rate": 6.764906888095588e-06, "loss": 0.465, "step": 19140 }, { "epoch": 0.4469078179696616, "grad_norm": 1.6945808429856513, "learning_rate": 6.761095374056803e-06, "loss": 0.4935, "step": 19150 }, { "epoch": 0.4471411901983664, "grad_norm": 1.4203390124160935, "learning_rate": 6.7572826913371704e-06, "loss": 0.4856, "step": 19160 }, { "epoch": 0.4473745624270712, "grad_norm": 1.4690395705044301, "learning_rate": 6.753468842466828e-06, "loss": 0.4842, "step": 19170 }, { "epoch": 0.447607934655776, "grad_norm": 1.5798272919632452, "learning_rate": 6.749653829976683e-06, "loss": 0.4597, "step": 19180 }, { "epoch": 0.44784130688448076, "grad_norm": 1.657929179067109, "learning_rate": 6.74583765639842e-06, "loss": 0.4597, "step": 19190 }, { "epoch": 0.44807467911318555, "grad_norm": 1.683616225710363, "learning_rate": 6.742020324264485e-06, "loss": 0.501, "step": 19200 }, { "epoch": 0.44830805134189033, "grad_norm": 1.797490727224251, "learning_rate": 6.738201836108101e-06, "loss": 0.4615, "step": 19210 }, { "epoch": 0.4485414235705951, "grad_norm": 1.67165168325625, "learning_rate": 6.734382194463258e-06, "loss": 0.4802, "step": 19220 }, { "epoch": 0.4487747957992999, "grad_norm": 1.694581835114702, "learning_rate": 6.730561401864707e-06, "loss": 0.4861, "step": 19230 }, { "epoch": 0.4490081680280047, "grad_norm": 1.9637688130940305, "learning_rate": 6.726739460847964e-06, "loss": 0.4858, "step": 19240 }, { "epoch": 0.4492415402567094, "grad_norm": 1.670003209063851, "learning_rate": 6.722916373949311e-06, "loss": 0.4889, "step": 19250 }, { "epoch": 0.4494749124854142, "grad_norm": 1.6699730726286504, "learning_rate": 6.719092143705786e-06, "loss": 0.5078, "step": 19260 }, { "epoch": 0.449708284714119, "grad_norm": 2.0354958551780644, "learning_rate": 6.715266772655184e-06, "loss": 0.4728, "step": 19270 }, { "epoch": 0.4499416569428238, "grad_norm": 1.608169550385928, "learning_rate": 6.711440263336064e-06, "loss": 0.4888, "step": 19280 }, { "epoch": 0.4501750291715286, "grad_norm": 1.4037436951966618, "learning_rate": 6.707612618287738e-06, "loss": 0.4679, "step": 19290 }, { "epoch": 0.45040840140023336, "grad_norm": 1.4670926474425188, "learning_rate": 6.703783840050266e-06, "loss": 0.5068, "step": 19300 }, { "epoch": 0.45064177362893815, "grad_norm": 1.1651605416513378, "learning_rate": 6.699953931164467e-06, "loss": 0.482, "step": 19310 }, { "epoch": 0.45087514585764293, "grad_norm": 1.7418006168030882, "learning_rate": 6.696122894171906e-06, "loss": 0.491, "step": 19320 }, { "epoch": 0.4511085180863477, "grad_norm": 1.636810707294452, "learning_rate": 6.692290731614901e-06, "loss": 0.4919, "step": 19330 }, { "epoch": 0.4513418903150525, "grad_norm": 1.5514236660591672, "learning_rate": 6.688457446036512e-06, "loss": 0.4806, "step": 19340 }, { "epoch": 0.4515752625437573, "grad_norm": 1.5573501676253616, "learning_rate": 6.6846230399805446e-06, "loss": 0.5098, "step": 19350 }, { "epoch": 0.4518086347724621, "grad_norm": 1.77617659938319, "learning_rate": 6.680787515991554e-06, "loss": 0.4671, "step": 19360 }, { "epoch": 0.45204200700116687, "grad_norm": 1.8165874034543457, "learning_rate": 6.67695087661483e-06, "loss": 0.4399, "step": 19370 }, { "epoch": 0.45227537922987165, "grad_norm": 1.6091870032386457, "learning_rate": 6.673113124396405e-06, "loss": 0.4763, "step": 19380 }, { "epoch": 0.45250875145857644, "grad_norm": 1.4689218020735482, "learning_rate": 6.669274261883053e-06, "loss": 0.4521, "step": 19390 }, { "epoch": 0.45274212368728123, "grad_norm": 1.3659612901793199, "learning_rate": 6.665434291622282e-06, "loss": 0.475, "step": 19400 }, { "epoch": 0.452975495915986, "grad_norm": 1.5714242276051904, "learning_rate": 6.661593216162333e-06, "loss": 0.4732, "step": 19410 }, { "epoch": 0.4532088681446908, "grad_norm": 1.65659918814691, "learning_rate": 6.6577510380521845e-06, "loss": 0.4806, "step": 19420 }, { "epoch": 0.4534422403733956, "grad_norm": 1.4149351632279699, "learning_rate": 6.653907759841546e-06, "loss": 0.4729, "step": 19430 }, { "epoch": 0.4536756126021004, "grad_norm": 1.5821150293981978, "learning_rate": 6.650063384080856e-06, "loss": 0.4932, "step": 19440 }, { "epoch": 0.4539089848308051, "grad_norm": 1.605473934524161, "learning_rate": 6.646217913321279e-06, "loss": 0.4866, "step": 19450 }, { "epoch": 0.4541423570595099, "grad_norm": 1.4954169190403577, "learning_rate": 6.64237135011471e-06, "loss": 0.4671, "step": 19460 }, { "epoch": 0.4543757292882147, "grad_norm": 2.0824683913854463, "learning_rate": 6.638523697013769e-06, "loss": 0.4793, "step": 19470 }, { "epoch": 0.45460910151691947, "grad_norm": 1.6910021907390946, "learning_rate": 6.634674956571796e-06, "loss": 0.485, "step": 19480 }, { "epoch": 0.45484247374562425, "grad_norm": 1.8229934414657871, "learning_rate": 6.630825131342854e-06, "loss": 0.5012, "step": 19490 }, { "epoch": 0.45507584597432904, "grad_norm": 1.4520190788870635, "learning_rate": 6.626974223881728e-06, "loss": 0.4954, "step": 19500 }, { "epoch": 0.4553092182030338, "grad_norm": 1.508859588728731, "learning_rate": 6.623122236743919e-06, "loss": 0.4819, "step": 19510 }, { "epoch": 0.4555425904317386, "grad_norm": 1.2708167350618456, "learning_rate": 6.619269172485645e-06, "loss": 0.4737, "step": 19520 }, { "epoch": 0.4557759626604434, "grad_norm": 2.6573526182437748, "learning_rate": 6.6154150336638375e-06, "loss": 0.4912, "step": 19530 }, { "epoch": 0.4560093348891482, "grad_norm": 1.403709198629826, "learning_rate": 6.611559822836144e-06, "loss": 0.4723, "step": 19540 }, { "epoch": 0.456242707117853, "grad_norm": 1.7598298207388765, "learning_rate": 6.607703542560921e-06, "loss": 0.492, "step": 19550 }, { "epoch": 0.45647607934655776, "grad_norm": 1.5763593556173734, "learning_rate": 6.603846195397234e-06, "loss": 0.4639, "step": 19560 }, { "epoch": 0.45670945157526255, "grad_norm": 1.6534595744152816, "learning_rate": 6.599987783904861e-06, "loss": 0.4822, "step": 19570 }, { "epoch": 0.45694282380396734, "grad_norm": 1.4030293951616464, "learning_rate": 6.596128310644281e-06, "loss": 0.464, "step": 19580 }, { "epoch": 0.4571761960326721, "grad_norm": 1.3966393615025507, "learning_rate": 6.592267778176679e-06, "loss": 0.4675, "step": 19590 }, { "epoch": 0.4574095682613769, "grad_norm": 1.6130932164469467, "learning_rate": 6.588406189063945e-06, "loss": 0.4572, "step": 19600 }, { "epoch": 0.4576429404900817, "grad_norm": 1.7645738869871999, "learning_rate": 6.584543545868669e-06, "loss": 0.4913, "step": 19610 }, { "epoch": 0.4578763127187865, "grad_norm": 1.5196414225801993, "learning_rate": 6.580679851154142e-06, "loss": 0.4803, "step": 19620 }, { "epoch": 0.45810968494749127, "grad_norm": 1.7476772939756295, "learning_rate": 6.576815107484348e-06, "loss": 0.4808, "step": 19630 }, { "epoch": 0.45834305717619606, "grad_norm": 1.7504403366274757, "learning_rate": 6.572949317423971e-06, "loss": 0.494, "step": 19640 }, { "epoch": 0.45857642940490084, "grad_norm": 1.6127635542715555, "learning_rate": 6.5690824835383915e-06, "loss": 0.499, "step": 19650 }, { "epoch": 0.4588098016336056, "grad_norm": 1.6582234156124522, "learning_rate": 6.565214608393676e-06, "loss": 0.4536, "step": 19660 }, { "epoch": 0.45904317386231036, "grad_norm": 1.5491930466202244, "learning_rate": 6.561345694556589e-06, "loss": 0.4692, "step": 19670 }, { "epoch": 0.45927654609101515, "grad_norm": 1.5138661098066075, "learning_rate": 6.5574757445945794e-06, "loss": 0.4654, "step": 19680 }, { "epoch": 0.45950991831971993, "grad_norm": 1.5358334681628154, "learning_rate": 6.553604761075786e-06, "loss": 0.4821, "step": 19690 }, { "epoch": 0.4597432905484247, "grad_norm": 1.5041376295743523, "learning_rate": 6.549732746569033e-06, "loss": 0.4445, "step": 19700 }, { "epoch": 0.4599766627771295, "grad_norm": 1.5383358053336842, "learning_rate": 6.5458597036438275e-06, "loss": 0.482, "step": 19710 }, { "epoch": 0.4602100350058343, "grad_norm": 1.4572627750559823, "learning_rate": 6.541985634870362e-06, "loss": 0.467, "step": 19720 }, { "epoch": 0.4604434072345391, "grad_norm": 1.5010879114196876, "learning_rate": 6.538110542819508e-06, "loss": 0.4529, "step": 19730 }, { "epoch": 0.46067677946324387, "grad_norm": 1.7286777017951183, "learning_rate": 6.534234430062816e-06, "loss": 0.4031, "step": 19740 }, { "epoch": 0.46091015169194866, "grad_norm": 1.6665155724517455, "learning_rate": 6.530357299172511e-06, "loss": 0.491, "step": 19750 }, { "epoch": 0.46114352392065344, "grad_norm": 1.6990054986061196, "learning_rate": 6.526479152721503e-06, "loss": 0.4893, "step": 19760 }, { "epoch": 0.46137689614935823, "grad_norm": 1.4722375665145175, "learning_rate": 6.5225999932833625e-06, "loss": 0.4653, "step": 19770 }, { "epoch": 0.461610268378063, "grad_norm": 1.8708377940504946, "learning_rate": 6.518719823432342e-06, "loss": 0.475, "step": 19780 }, { "epoch": 0.4618436406067678, "grad_norm": 1.7423836649338098, "learning_rate": 6.514838645743364e-06, "loss": 0.477, "step": 19790 }, { "epoch": 0.4620770128354726, "grad_norm": 1.536103427259089, "learning_rate": 6.5109564627920154e-06, "loss": 0.4634, "step": 19800 }, { "epoch": 0.4623103850641774, "grad_norm": 1.4415772965142843, "learning_rate": 6.507073277154551e-06, "loss": 0.4671, "step": 19810 }, { "epoch": 0.46254375729288216, "grad_norm": 1.3787607431798912, "learning_rate": 6.503189091407896e-06, "loss": 0.4948, "step": 19820 }, { "epoch": 0.46277712952158695, "grad_norm": 1.5662577418012178, "learning_rate": 6.499303908129632e-06, "loss": 0.5027, "step": 19830 }, { "epoch": 0.46301050175029174, "grad_norm": 2.455876526067256, "learning_rate": 6.495417729898008e-06, "loss": 0.481, "step": 19840 }, { "epoch": 0.4632438739789965, "grad_norm": 1.802066113352213, "learning_rate": 6.491530559291932e-06, "loss": 0.5081, "step": 19850 }, { "epoch": 0.4634772462077013, "grad_norm": 1.470579369248154, "learning_rate": 6.487642398890971e-06, "loss": 0.4711, "step": 19860 }, { "epoch": 0.46371061843640604, "grad_norm": 1.204493015871823, "learning_rate": 6.483753251275345e-06, "loss": 0.478, "step": 19870 }, { "epoch": 0.46394399066511083, "grad_norm": 1.5361981678019252, "learning_rate": 6.479863119025933e-06, "loss": 0.4585, "step": 19880 }, { "epoch": 0.4641773628938156, "grad_norm": 1.4704242199177893, "learning_rate": 6.4759720047242665e-06, "loss": 0.4968, "step": 19890 }, { "epoch": 0.4644107351225204, "grad_norm": 1.5290524314864562, "learning_rate": 6.4720799109525305e-06, "loss": 0.4832, "step": 19900 }, { "epoch": 0.4646441073512252, "grad_norm": 1.9922552644063547, "learning_rate": 6.468186840293555e-06, "loss": 0.4942, "step": 19910 }, { "epoch": 0.46487747957993, "grad_norm": 1.5302753336955965, "learning_rate": 6.464292795330824e-06, "loss": 0.471, "step": 19920 }, { "epoch": 0.46511085180863476, "grad_norm": 2.3859388748411026, "learning_rate": 6.460397778648464e-06, "loss": 0.4833, "step": 19930 }, { "epoch": 0.46534422403733955, "grad_norm": 1.8837613809636704, "learning_rate": 6.456501792831249e-06, "loss": 0.4323, "step": 19940 }, { "epoch": 0.46557759626604434, "grad_norm": 1.5048127791019081, "learning_rate": 6.452604840464592e-06, "loss": 0.4793, "step": 19950 }, { "epoch": 0.4658109684947491, "grad_norm": 1.4469170483531912, "learning_rate": 6.448706924134554e-06, "loss": 0.4824, "step": 19960 }, { "epoch": 0.4660443407234539, "grad_norm": 1.3663929106410655, "learning_rate": 6.444808046427831e-06, "loss": 0.4665, "step": 19970 }, { "epoch": 0.4662777129521587, "grad_norm": 1.5020329363789808, "learning_rate": 6.440908209931756e-06, "loss": 0.4575, "step": 19980 }, { "epoch": 0.4665110851808635, "grad_norm": 1.57209041962373, "learning_rate": 6.4370074172343e-06, "loss": 0.4793, "step": 19990 }, { "epoch": 0.46674445740956827, "grad_norm": 1.5367212372634533, "learning_rate": 6.433105670924074e-06, "loss": 0.4823, "step": 20000 }, { "epoch": 0.46697782963827306, "grad_norm": 1.5424174259988033, "learning_rate": 6.429202973590311e-06, "loss": 0.4714, "step": 20010 }, { "epoch": 0.46721120186697784, "grad_norm": 1.9385306118225432, "learning_rate": 6.425299327822882e-06, "loss": 0.4946, "step": 20020 }, { "epoch": 0.46744457409568263, "grad_norm": 1.5447888137888002, "learning_rate": 6.421394736212289e-06, "loss": 0.4735, "step": 20030 }, { "epoch": 0.4676779463243874, "grad_norm": 1.39469233659483, "learning_rate": 6.417489201349657e-06, "loss": 0.4596, "step": 20040 }, { "epoch": 0.4679113185530922, "grad_norm": 1.7272252627075508, "learning_rate": 6.413582725826737e-06, "loss": 0.4731, "step": 20050 }, { "epoch": 0.468144690781797, "grad_norm": 1.7797863439066968, "learning_rate": 6.409675312235908e-06, "loss": 0.47, "step": 20060 }, { "epoch": 0.4683780630105018, "grad_norm": 1.5646221289310809, "learning_rate": 6.40576696317017e-06, "loss": 0.4625, "step": 20070 }, { "epoch": 0.4686114352392065, "grad_norm": 1.7377363960248866, "learning_rate": 6.4018576812231404e-06, "loss": 0.4794, "step": 20080 }, { "epoch": 0.4688448074679113, "grad_norm": 1.6400016621072075, "learning_rate": 6.397947468989061e-06, "loss": 0.4963, "step": 20090 }, { "epoch": 0.4690781796966161, "grad_norm": 1.7025110395428202, "learning_rate": 6.394036329062788e-06, "loss": 0.47, "step": 20100 }, { "epoch": 0.46931155192532087, "grad_norm": 1.4455781570508779, "learning_rate": 6.390124264039796e-06, "loss": 0.4622, "step": 20110 }, { "epoch": 0.46954492415402566, "grad_norm": 1.7884975090285833, "learning_rate": 6.386211276516168e-06, "loss": 0.5064, "step": 20120 }, { "epoch": 0.46977829638273044, "grad_norm": 1.3738785941153908, "learning_rate": 6.382297369088601e-06, "loss": 0.4606, "step": 20130 }, { "epoch": 0.47001166861143523, "grad_norm": 1.5919034226992266, "learning_rate": 6.378382544354408e-06, "loss": 0.4785, "step": 20140 }, { "epoch": 0.47024504084014, "grad_norm": 1.4686684635666705, "learning_rate": 6.374466804911506e-06, "loss": 0.4712, "step": 20150 }, { "epoch": 0.4704784130688448, "grad_norm": 1.685804554642149, "learning_rate": 6.370550153358416e-06, "loss": 0.4722, "step": 20160 }, { "epoch": 0.4707117852975496, "grad_norm": 3.4467974421583425, "learning_rate": 6.366632592294271e-06, "loss": 0.4898, "step": 20170 }, { "epoch": 0.4709451575262544, "grad_norm": 1.5766148882765203, "learning_rate": 6.362714124318804e-06, "loss": 0.5004, "step": 20180 }, { "epoch": 0.47117852975495916, "grad_norm": 1.6354399440886802, "learning_rate": 6.35879475203235e-06, "loss": 0.4794, "step": 20190 }, { "epoch": 0.47141190198366395, "grad_norm": 1.5955244390207128, "learning_rate": 6.354874478035844e-06, "loss": 0.4661, "step": 20200 }, { "epoch": 0.47164527421236874, "grad_norm": 1.4453660517800526, "learning_rate": 6.350953304930821e-06, "loss": 0.4717, "step": 20210 }, { "epoch": 0.4718786464410735, "grad_norm": 1.7659278167264092, "learning_rate": 6.347031235319412e-06, "loss": 0.4728, "step": 20220 }, { "epoch": 0.4721120186697783, "grad_norm": 1.5753257601140551, "learning_rate": 6.3431082718043394e-06, "loss": 0.47, "step": 20230 }, { "epoch": 0.4723453908984831, "grad_norm": 1.4806981655515947, "learning_rate": 6.339184416988924e-06, "loss": 0.4829, "step": 20240 }, { "epoch": 0.4725787631271879, "grad_norm": 1.6510505534481181, "learning_rate": 6.335259673477077e-06, "loss": 0.4724, "step": 20250 }, { "epoch": 0.47281213535589267, "grad_norm": 1.5271728580551907, "learning_rate": 6.331334043873294e-06, "loss": 0.4985, "step": 20260 }, { "epoch": 0.47304550758459746, "grad_norm": 1.59442884254792, "learning_rate": 6.327407530782667e-06, "loss": 0.468, "step": 20270 }, { "epoch": 0.47327887981330224, "grad_norm": 1.507250555520505, "learning_rate": 6.3234801368108685e-06, "loss": 0.4815, "step": 20280 }, { "epoch": 0.473512252042007, "grad_norm": 1.7833339981756529, "learning_rate": 6.319551864564157e-06, "loss": 0.4918, "step": 20290 }, { "epoch": 0.47374562427071176, "grad_norm": 1.5950340575316024, "learning_rate": 6.315622716649374e-06, "loss": 0.4728, "step": 20300 }, { "epoch": 0.47397899649941655, "grad_norm": 1.4271865137813218, "learning_rate": 6.311692695673943e-06, "loss": 0.4677, "step": 20310 }, { "epoch": 0.47421236872812134, "grad_norm": 1.5518633140156475, "learning_rate": 6.307761804245865e-06, "loss": 0.4867, "step": 20320 }, { "epoch": 0.4744457409568261, "grad_norm": 1.3413721798472005, "learning_rate": 6.30383004497372e-06, "loss": 0.4794, "step": 20330 }, { "epoch": 0.4746791131855309, "grad_norm": 1.6089368597041556, "learning_rate": 6.299897420466664e-06, "loss": 0.4819, "step": 20340 }, { "epoch": 0.4749124854142357, "grad_norm": 1.511114380179296, "learning_rate": 6.295963933334425e-06, "loss": 0.5, "step": 20350 }, { "epoch": 0.4751458576429405, "grad_norm": 1.577927623059982, "learning_rate": 6.292029586187308e-06, "loss": 0.4776, "step": 20360 }, { "epoch": 0.47537922987164527, "grad_norm": 2.3224883910031338, "learning_rate": 6.288094381636184e-06, "loss": 0.4754, "step": 20370 }, { "epoch": 0.47561260210035006, "grad_norm": 1.7086555747857433, "learning_rate": 6.284158322292494e-06, "loss": 0.4826, "step": 20380 }, { "epoch": 0.47584597432905484, "grad_norm": 1.566740060584028, "learning_rate": 6.28022141076825e-06, "loss": 0.4662, "step": 20390 }, { "epoch": 0.47607934655775963, "grad_norm": 1.5379597483484444, "learning_rate": 6.276283649676024e-06, "loss": 0.4547, "step": 20400 }, { "epoch": 0.4763127187864644, "grad_norm": 1.7122842177307847, "learning_rate": 6.2723450416289544e-06, "loss": 0.4982, "step": 20410 }, { "epoch": 0.4765460910151692, "grad_norm": 1.5162822042389124, "learning_rate": 6.2684055892407435e-06, "loss": 0.5047, "step": 20420 }, { "epoch": 0.476779463243874, "grad_norm": 1.5931858130022782, "learning_rate": 6.264465295125651e-06, "loss": 0.5062, "step": 20430 }, { "epoch": 0.4770128354725788, "grad_norm": 1.4554788699338685, "learning_rate": 6.260524161898497e-06, "loss": 0.4461, "step": 20440 }, { "epoch": 0.47724620770128356, "grad_norm": 1.5154574561660956, "learning_rate": 6.256582192174656e-06, "loss": 0.4678, "step": 20450 }, { "epoch": 0.47747957992998835, "grad_norm": 1.4052486186962654, "learning_rate": 6.252639388570062e-06, "loss": 0.4643, "step": 20460 }, { "epoch": 0.47771295215869314, "grad_norm": 1.76449391896823, "learning_rate": 6.248695753701196e-06, "loss": 0.4368, "step": 20470 }, { "epoch": 0.4779463243873979, "grad_norm": 1.8305147774191173, "learning_rate": 6.244751290185098e-06, "loss": 0.4682, "step": 20480 }, { "epoch": 0.47817969661610266, "grad_norm": 1.3984682749496107, "learning_rate": 6.2408060006393525e-06, "loss": 0.4782, "step": 20490 }, { "epoch": 0.47841306884480744, "grad_norm": 1.7302662643647722, "learning_rate": 6.236859887682094e-06, "loss": 0.4861, "step": 20500 }, { "epoch": 0.47864644107351223, "grad_norm": 1.5920263102305976, "learning_rate": 6.232912953932002e-06, "loss": 0.4618, "step": 20510 }, { "epoch": 0.478879813302217, "grad_norm": 1.8103726241331213, "learning_rate": 6.228965202008303e-06, "loss": 0.4909, "step": 20520 }, { "epoch": 0.4791131855309218, "grad_norm": 1.2823761218311276, "learning_rate": 6.2250166345307675e-06, "loss": 0.4645, "step": 20530 }, { "epoch": 0.4793465577596266, "grad_norm": 1.5829010660305816, "learning_rate": 6.221067254119702e-06, "loss": 0.488, "step": 20540 }, { "epoch": 0.4795799299883314, "grad_norm": 1.6267587285010627, "learning_rate": 6.217117063395957e-06, "loss": 0.4905, "step": 20550 }, { "epoch": 0.47981330221703616, "grad_norm": 1.382427896474281, "learning_rate": 6.21316606498092e-06, "loss": 0.4774, "step": 20560 }, { "epoch": 0.48004667444574095, "grad_norm": 1.7886719217882054, "learning_rate": 6.2092142614965135e-06, "loss": 0.4667, "step": 20570 }, { "epoch": 0.48028004667444574, "grad_norm": 1.6561139993099463, "learning_rate": 6.205261655565192e-06, "loss": 0.4792, "step": 20580 }, { "epoch": 0.4805134189031505, "grad_norm": 1.6482676976240074, "learning_rate": 6.201308249809949e-06, "loss": 0.4963, "step": 20590 }, { "epoch": 0.4807467911318553, "grad_norm": 1.6826049627705537, "learning_rate": 6.197354046854303e-06, "loss": 0.4764, "step": 20600 }, { "epoch": 0.4809801633605601, "grad_norm": 1.5298968085419216, "learning_rate": 6.193399049322303e-06, "loss": 0.4704, "step": 20610 }, { "epoch": 0.4812135355892649, "grad_norm": 1.4062011185600594, "learning_rate": 6.189443259838526e-06, "loss": 0.4608, "step": 20620 }, { "epoch": 0.48144690781796967, "grad_norm": 1.5007140536456458, "learning_rate": 6.185486681028075e-06, "loss": 0.4817, "step": 20630 }, { "epoch": 0.48168028004667446, "grad_norm": 1.527485345801459, "learning_rate": 6.181529315516575e-06, "loss": 0.4537, "step": 20640 }, { "epoch": 0.48191365227537925, "grad_norm": 1.6366386317164707, "learning_rate": 6.177571165930174e-06, "loss": 0.4969, "step": 20650 }, { "epoch": 0.48214702450408403, "grad_norm": 1.6850343960197067, "learning_rate": 6.1736122348955414e-06, "loss": 0.4861, "step": 20660 }, { "epoch": 0.4823803967327888, "grad_norm": 1.5546354442786228, "learning_rate": 6.169652525039866e-06, "loss": 0.4601, "step": 20670 }, { "epoch": 0.4826137689614936, "grad_norm": 1.4932151123044044, "learning_rate": 6.165692038990847e-06, "loss": 0.4675, "step": 20680 }, { "epoch": 0.4828471411901984, "grad_norm": 1.7262223964818832, "learning_rate": 6.161730779376707e-06, "loss": 0.4891, "step": 20690 }, { "epoch": 0.4830805134189031, "grad_norm": 1.5417441376341994, "learning_rate": 6.157768748826175e-06, "loss": 0.4726, "step": 20700 }, { "epoch": 0.4833138856476079, "grad_norm": 1.4418006711508633, "learning_rate": 6.153805949968498e-06, "loss": 0.4523, "step": 20710 }, { "epoch": 0.4835472578763127, "grad_norm": 1.7135244496418391, "learning_rate": 6.14984238543343e-06, "loss": 0.4668, "step": 20720 }, { "epoch": 0.4837806301050175, "grad_norm": 1.4863585367474468, "learning_rate": 6.1458780578512285e-06, "loss": 0.4788, "step": 20730 }, { "epoch": 0.48401400233372227, "grad_norm": 1.558519899004532, "learning_rate": 6.141912969852667e-06, "loss": 0.4756, "step": 20740 }, { "epoch": 0.48424737456242706, "grad_norm": 1.9141841122720331, "learning_rate": 6.137947124069016e-06, "loss": 0.5124, "step": 20750 }, { "epoch": 0.48448074679113184, "grad_norm": 1.3643341312202901, "learning_rate": 6.1339805231320506e-06, "loss": 0.4137, "step": 20760 }, { "epoch": 0.48471411901983663, "grad_norm": 1.6524898597281295, "learning_rate": 6.130013169674047e-06, "loss": 0.4891, "step": 20770 }, { "epoch": 0.4849474912485414, "grad_norm": 1.7804646571382956, "learning_rate": 6.126045066327785e-06, "loss": 0.4958, "step": 20780 }, { "epoch": 0.4851808634772462, "grad_norm": 1.476894914552993, "learning_rate": 6.122076215726535e-06, "loss": 0.4671, "step": 20790 }, { "epoch": 0.485414235705951, "grad_norm": 1.7058803123181234, "learning_rate": 6.1181066205040666e-06, "loss": 0.4761, "step": 20800 }, { "epoch": 0.4856476079346558, "grad_norm": 1.524261523969637, "learning_rate": 6.114136283294645e-06, "loss": 0.4733, "step": 20810 }, { "epoch": 0.48588098016336057, "grad_norm": 1.4572490985977227, "learning_rate": 6.110165206733027e-06, "loss": 0.4956, "step": 20820 }, { "epoch": 0.48611435239206535, "grad_norm": 1.4656623770322765, "learning_rate": 6.10619339345446e-06, "loss": 0.4578, "step": 20830 }, { "epoch": 0.48634772462077014, "grad_norm": 2.160200350366014, "learning_rate": 6.1022208460946765e-06, "loss": 0.4646, "step": 20840 }, { "epoch": 0.4865810968494749, "grad_norm": 1.4163186637651246, "learning_rate": 6.0982475672899036e-06, "loss": 0.4915, "step": 20850 }, { "epoch": 0.4868144690781797, "grad_norm": 1.5445968676277675, "learning_rate": 6.094273559676847e-06, "loss": 0.4778, "step": 20860 }, { "epoch": 0.4870478413068845, "grad_norm": 1.5606052791582548, "learning_rate": 6.090298825892698e-06, "loss": 0.508, "step": 20870 }, { "epoch": 0.4872812135355893, "grad_norm": 1.5592465160430589, "learning_rate": 6.086323368575131e-06, "loss": 0.5104, "step": 20880 }, { "epoch": 0.4875145857642941, "grad_norm": 1.8860210960081583, "learning_rate": 6.082347190362304e-06, "loss": 0.5013, "step": 20890 }, { "epoch": 0.48774795799299886, "grad_norm": 1.5163325150276343, "learning_rate": 6.078370293892842e-06, "loss": 0.4816, "step": 20900 }, { "epoch": 0.4879813302217036, "grad_norm": 1.619168619123594, "learning_rate": 6.074392681805859e-06, "loss": 0.4751, "step": 20910 }, { "epoch": 0.4882147024504084, "grad_norm": 1.5379617336304539, "learning_rate": 6.070414356740939e-06, "loss": 0.4801, "step": 20920 }, { "epoch": 0.48844807467911316, "grad_norm": 1.5696872390894814, "learning_rate": 6.0664353213381395e-06, "loss": 0.4492, "step": 20930 }, { "epoch": 0.48868144690781795, "grad_norm": 1.4924980130137517, "learning_rate": 6.062455578237985e-06, "loss": 0.4855, "step": 20940 }, { "epoch": 0.48891481913652274, "grad_norm": 1.580758478372914, "learning_rate": 6.0584751300814785e-06, "loss": 0.4705, "step": 20950 }, { "epoch": 0.4891481913652275, "grad_norm": 1.5678412113854112, "learning_rate": 6.054493979510082e-06, "loss": 0.4782, "step": 20960 }, { "epoch": 0.4893815635939323, "grad_norm": 1.6973132747915394, "learning_rate": 6.05051212916573e-06, "loss": 0.5071, "step": 20970 }, { "epoch": 0.4896149358226371, "grad_norm": 1.7707640651100194, "learning_rate": 6.046529581690817e-06, "loss": 0.4671, "step": 20980 }, { "epoch": 0.4898483080513419, "grad_norm": 1.873755810218618, "learning_rate": 6.042546339728205e-06, "loss": 0.4659, "step": 20990 }, { "epoch": 0.49008168028004667, "grad_norm": 1.5881654474205482, "learning_rate": 6.038562405921212e-06, "loss": 0.5048, "step": 21000 }, { "epoch": 0.49031505250875146, "grad_norm": 1.2080299172773827, "learning_rate": 6.034577782913617e-06, "loss": 0.4466, "step": 21010 }, { "epoch": 0.49054842473745625, "grad_norm": 1.7713370499308327, "learning_rate": 6.0305924733496565e-06, "loss": 0.482, "step": 21020 }, { "epoch": 0.49078179696616103, "grad_norm": 1.6099469460026798, "learning_rate": 6.026606479874024e-06, "loss": 0.4812, "step": 21030 }, { "epoch": 0.4910151691948658, "grad_norm": 1.419714876175498, "learning_rate": 6.022619805131864e-06, "loss": 0.4634, "step": 21040 }, { "epoch": 0.4912485414235706, "grad_norm": 1.6529516278549274, "learning_rate": 6.018632451768776e-06, "loss": 0.487, "step": 21050 }, { "epoch": 0.4914819136522754, "grad_norm": 1.7465117502772844, "learning_rate": 6.014644422430806e-06, "loss": 0.4869, "step": 21060 }, { "epoch": 0.4917152858809802, "grad_norm": 1.626118426408658, "learning_rate": 6.010655719764452e-06, "loss": 0.5071, "step": 21070 }, { "epoch": 0.49194865810968497, "grad_norm": 1.4696786191790736, "learning_rate": 6.006666346416658e-06, "loss": 0.4487, "step": 21080 }, { "epoch": 0.49218203033838975, "grad_norm": 1.7462287529892804, "learning_rate": 6.002676305034812e-06, "loss": 0.4816, "step": 21090 }, { "epoch": 0.49241540256709454, "grad_norm": 1.711942483968081, "learning_rate": 5.998685598266749e-06, "loss": 0.4898, "step": 21100 }, { "epoch": 0.4926487747957993, "grad_norm": 2.4193711206168746, "learning_rate": 5.994694228760739e-06, "loss": 0.4719, "step": 21110 }, { "epoch": 0.49288214702450406, "grad_norm": 1.6666990162463864, "learning_rate": 5.990702199165497e-06, "loss": 0.5214, "step": 21120 }, { "epoch": 0.49311551925320884, "grad_norm": 1.5286225304111007, "learning_rate": 5.986709512130174e-06, "loss": 0.4826, "step": 21130 }, { "epoch": 0.49334889148191363, "grad_norm": 1.5795599256122095, "learning_rate": 5.982716170304361e-06, "loss": 0.4871, "step": 21140 }, { "epoch": 0.4935822637106184, "grad_norm": 1.5436116363789631, "learning_rate": 5.978722176338075e-06, "loss": 0.4835, "step": 21150 }, { "epoch": 0.4938156359393232, "grad_norm": 1.564264951083172, "learning_rate": 5.974727532881776e-06, "loss": 0.4467, "step": 21160 }, { "epoch": 0.494049008168028, "grad_norm": 1.5580942984431372, "learning_rate": 5.970732242586348e-06, "loss": 0.4701, "step": 21170 }, { "epoch": 0.4942823803967328, "grad_norm": 1.6247186196023287, "learning_rate": 5.966736308103105e-06, "loss": 0.4668, "step": 21180 }, { "epoch": 0.49451575262543757, "grad_norm": 1.607160438452157, "learning_rate": 5.962739732083791e-06, "loss": 0.4676, "step": 21190 }, { "epoch": 0.49474912485414235, "grad_norm": 2.147779021297562, "learning_rate": 5.958742517180576e-06, "loss": 0.4789, "step": 21200 }, { "epoch": 0.49498249708284714, "grad_norm": 3.4819506229417136, "learning_rate": 5.9547446660460514e-06, "loss": 0.4443, "step": 21210 }, { "epoch": 0.4952158693115519, "grad_norm": 1.4343395182197956, "learning_rate": 5.950746181333231e-06, "loss": 0.4708, "step": 21220 }, { "epoch": 0.4954492415402567, "grad_norm": 1.4422866883217313, "learning_rate": 5.946747065695553e-06, "loss": 0.4815, "step": 21230 }, { "epoch": 0.4956826137689615, "grad_norm": 2.2599227209371535, "learning_rate": 5.942747321786868e-06, "loss": 0.4921, "step": 21240 }, { "epoch": 0.4959159859976663, "grad_norm": 1.3208953258222138, "learning_rate": 5.938746952261449e-06, "loss": 0.4625, "step": 21250 }, { "epoch": 0.4961493582263711, "grad_norm": 1.6101844056067325, "learning_rate": 5.93474595977398e-06, "loss": 0.4653, "step": 21260 }, { "epoch": 0.49638273045507586, "grad_norm": 1.5345444218866053, "learning_rate": 5.930744346979563e-06, "loss": 0.4811, "step": 21270 }, { "epoch": 0.49661610268378065, "grad_norm": 1.6138147642304013, "learning_rate": 5.9267421165337055e-06, "loss": 0.4771, "step": 21280 }, { "epoch": 0.49684947491248543, "grad_norm": 1.5700712794848999, "learning_rate": 5.922739271092332e-06, "loss": 0.495, "step": 21290 }, { "epoch": 0.4970828471411902, "grad_norm": 1.5653220775635581, "learning_rate": 5.91873581331177e-06, "loss": 0.48, "step": 21300 }, { "epoch": 0.497316219369895, "grad_norm": 1.5326281561075183, "learning_rate": 5.914731745848754e-06, "loss": 0.4621, "step": 21310 }, { "epoch": 0.4975495915985998, "grad_norm": 1.5648081347576264, "learning_rate": 5.910727071360427e-06, "loss": 0.4891, "step": 21320 }, { "epoch": 0.4977829638273045, "grad_norm": 1.7227894295372626, "learning_rate": 5.9067217925043285e-06, "loss": 0.4582, "step": 21330 }, { "epoch": 0.4980163360560093, "grad_norm": 2.2057200852578567, "learning_rate": 5.902715911938403e-06, "loss": 0.4827, "step": 21340 }, { "epoch": 0.4982497082847141, "grad_norm": 1.441224238717328, "learning_rate": 5.898709432320997e-06, "loss": 0.4585, "step": 21350 }, { "epoch": 0.4984830805134189, "grad_norm": 1.6415010321067338, "learning_rate": 5.894702356310848e-06, "loss": 0.4779, "step": 21360 }, { "epoch": 0.4987164527421237, "grad_norm": 1.8480170572833037, "learning_rate": 5.890694686567091e-06, "loss": 0.4757, "step": 21370 }, { "epoch": 0.49894982497082846, "grad_norm": 1.5360596581644967, "learning_rate": 5.886686425749261e-06, "loss": 0.4856, "step": 21380 }, { "epoch": 0.49918319719953325, "grad_norm": 1.5600607807846454, "learning_rate": 5.8826775765172775e-06, "loss": 0.4635, "step": 21390 }, { "epoch": 0.49941656942823803, "grad_norm": 1.5200687257592267, "learning_rate": 5.878668141531453e-06, "loss": 0.4592, "step": 21400 }, { "epoch": 0.4996499416569428, "grad_norm": 1.6594968034513267, "learning_rate": 5.874658123452492e-06, "loss": 0.4667, "step": 21410 }, { "epoch": 0.4998833138856476, "grad_norm": 1.8418435240876667, "learning_rate": 5.870647524941484e-06, "loss": 0.5003, "step": 21420 }, { "epoch": 0.5001166861143523, "grad_norm": 1.4267972738281778, "learning_rate": 5.866636348659901e-06, "loss": 0.4641, "step": 21430 }, { "epoch": 0.5003500583430571, "grad_norm": 1.704669624932715, "learning_rate": 5.862624597269599e-06, "loss": 0.4856, "step": 21440 }, { "epoch": 0.5005834305717619, "grad_norm": 1.466990184368295, "learning_rate": 5.858612273432821e-06, "loss": 0.484, "step": 21450 }, { "epoch": 0.5008168028004667, "grad_norm": 1.468092062940701, "learning_rate": 5.8545993798121845e-06, "loss": 0.4787, "step": 21460 }, { "epoch": 0.5010501750291715, "grad_norm": 1.5613892151265303, "learning_rate": 5.850585919070685e-06, "loss": 0.4866, "step": 21470 }, { "epoch": 0.5012835472578763, "grad_norm": 1.6396304324153006, "learning_rate": 5.8465718938716965e-06, "loss": 0.4912, "step": 21480 }, { "epoch": 0.5015169194865811, "grad_norm": 1.6875253123972411, "learning_rate": 5.84255730687897e-06, "loss": 0.4853, "step": 21490 }, { "epoch": 0.5017502917152858, "grad_norm": 1.5365520667048875, "learning_rate": 5.838542160756622e-06, "loss": 0.4671, "step": 21500 }, { "epoch": 0.5019836639439906, "grad_norm": 1.5827629915501988, "learning_rate": 5.834526458169147e-06, "loss": 0.472, "step": 21510 }, { "epoch": 0.5022170361726954, "grad_norm": 1.28126165533037, "learning_rate": 5.830510201781405e-06, "loss": 0.4861, "step": 21520 }, { "epoch": 0.5024504084014002, "grad_norm": 1.755936326426827, "learning_rate": 5.8264933942586275e-06, "loss": 0.4826, "step": 21530 }, { "epoch": 0.502683780630105, "grad_norm": 1.8166837168397214, "learning_rate": 5.822476038266406e-06, "loss": 0.4879, "step": 21540 }, { "epoch": 0.5029171528588098, "grad_norm": 1.71744794540839, "learning_rate": 5.818458136470698e-06, "loss": 0.4473, "step": 21550 }, { "epoch": 0.5031505250875146, "grad_norm": 1.9100749606057894, "learning_rate": 5.814439691537828e-06, "loss": 0.4662, "step": 21560 }, { "epoch": 0.5033838973162194, "grad_norm": 1.2962531705091511, "learning_rate": 5.810420706134474e-06, "loss": 0.4665, "step": 21570 }, { "epoch": 0.5036172695449241, "grad_norm": 1.6027646062880887, "learning_rate": 5.806401182927676e-06, "loss": 0.5027, "step": 21580 }, { "epoch": 0.5038506417736289, "grad_norm": 1.536482970736376, "learning_rate": 5.802381124584831e-06, "loss": 0.4508, "step": 21590 }, { "epoch": 0.5040840140023337, "grad_norm": 1.4739290235522764, "learning_rate": 5.798360533773693e-06, "loss": 0.4835, "step": 21600 }, { "epoch": 0.5043173862310385, "grad_norm": 1.545837622873865, "learning_rate": 5.794339413162363e-06, "loss": 0.4593, "step": 21610 }, { "epoch": 0.5045507584597433, "grad_norm": 1.5897146264992286, "learning_rate": 5.7903177654192985e-06, "loss": 0.4724, "step": 21620 }, { "epoch": 0.5047841306884481, "grad_norm": 1.6055188412630963, "learning_rate": 5.786295593213308e-06, "loss": 0.4662, "step": 21630 }, { "epoch": 0.5050175029171529, "grad_norm": 1.9420924353496267, "learning_rate": 5.782272899213546e-06, "loss": 0.4778, "step": 21640 }, { "epoch": 0.5052508751458576, "grad_norm": 1.5878831081654936, "learning_rate": 5.778249686089511e-06, "loss": 0.472, "step": 21650 }, { "epoch": 0.5054842473745624, "grad_norm": 1.5161998066756839, "learning_rate": 5.774225956511049e-06, "loss": 0.4744, "step": 21660 }, { "epoch": 0.5057176196032672, "grad_norm": 1.64443568622009, "learning_rate": 5.77020171314835e-06, "loss": 0.4595, "step": 21670 }, { "epoch": 0.505950991831972, "grad_norm": 1.4291505978573427, "learning_rate": 5.76617695867194e-06, "loss": 0.4921, "step": 21680 }, { "epoch": 0.5061843640606768, "grad_norm": 1.2698182834122584, "learning_rate": 5.762151695752688e-06, "loss": 0.4439, "step": 21690 }, { "epoch": 0.5064177362893816, "grad_norm": 1.4451354763200364, "learning_rate": 5.758125927061801e-06, "loss": 0.4613, "step": 21700 }, { "epoch": 0.5066511085180864, "grad_norm": 1.5882742664039462, "learning_rate": 5.754099655270819e-06, "loss": 0.4833, "step": 21710 }, { "epoch": 0.5068844807467912, "grad_norm": 1.8341143756208722, "learning_rate": 5.750072883051616e-06, "loss": 0.4984, "step": 21720 }, { "epoch": 0.5071178529754959, "grad_norm": 1.4635206208669023, "learning_rate": 5.7460456130764006e-06, "loss": 0.48, "step": 21730 }, { "epoch": 0.5073512252042007, "grad_norm": 1.650614175558713, "learning_rate": 5.742017848017711e-06, "loss": 0.4814, "step": 21740 }, { "epoch": 0.5075845974329055, "grad_norm": 1.709781839062892, "learning_rate": 5.737989590548412e-06, "loss": 0.5037, "step": 21750 }, { "epoch": 0.5078179696616103, "grad_norm": 1.5572504228100854, "learning_rate": 5.733960843341695e-06, "loss": 0.4635, "step": 21760 }, { "epoch": 0.5080513418903151, "grad_norm": 1.2864207661340374, "learning_rate": 5.72993160907108e-06, "loss": 0.5054, "step": 21770 }, { "epoch": 0.5082847141190199, "grad_norm": 1.590221359379389, "learning_rate": 5.7259018904104065e-06, "loss": 0.4565, "step": 21780 }, { "epoch": 0.5085180863477247, "grad_norm": 1.851770360908581, "learning_rate": 5.721871690033838e-06, "loss": 0.4683, "step": 21790 }, { "epoch": 0.5087514585764294, "grad_norm": 1.5448361660631513, "learning_rate": 5.717841010615856e-06, "loss": 0.4647, "step": 21800 }, { "epoch": 0.5089848308051342, "grad_norm": 1.5570929817583905, "learning_rate": 5.7138098548312584e-06, "loss": 0.4889, "step": 21810 }, { "epoch": 0.509218203033839, "grad_norm": 1.5735217818784824, "learning_rate": 5.709778225355166e-06, "loss": 0.4584, "step": 21820 }, { "epoch": 0.5094515752625438, "grad_norm": 1.4990794836418009, "learning_rate": 5.705746124863004e-06, "loss": 0.4918, "step": 21830 }, { "epoch": 0.5096849474912486, "grad_norm": 1.521289401519192, "learning_rate": 5.701713556030519e-06, "loss": 0.4659, "step": 21840 }, { "epoch": 0.5099183197199533, "grad_norm": 1.7798518867430162, "learning_rate": 5.697680521533764e-06, "loss": 0.4825, "step": 21850 }, { "epoch": 0.5101516919486581, "grad_norm": 1.868868027327476, "learning_rate": 5.693647024049101e-06, "loss": 0.5063, "step": 21860 }, { "epoch": 0.5103850641773628, "grad_norm": 1.6225760760456265, "learning_rate": 5.689613066253201e-06, "loss": 0.4625, "step": 21870 }, { "epoch": 0.5106184364060676, "grad_norm": 1.6178775876775868, "learning_rate": 5.6855786508230396e-06, "loss": 0.4568, "step": 21880 }, { "epoch": 0.5108518086347724, "grad_norm": 1.4823730817808067, "learning_rate": 5.681543780435894e-06, "loss": 0.4548, "step": 21890 }, { "epoch": 0.5110851808634772, "grad_norm": 1.4440017219271224, "learning_rate": 5.677508457769347e-06, "loss": 0.4826, "step": 21900 }, { "epoch": 0.511318553092182, "grad_norm": 1.62435672677255, "learning_rate": 5.67347268550128e-06, "loss": 0.4436, "step": 21910 }, { "epoch": 0.5115519253208868, "grad_norm": 1.8750409243350818, "learning_rate": 5.669436466309872e-06, "loss": 0.4799, "step": 21920 }, { "epoch": 0.5117852975495916, "grad_norm": 1.536099177372227, "learning_rate": 5.6653998028736e-06, "loss": 0.4534, "step": 21930 }, { "epoch": 0.5120186697782964, "grad_norm": 1.6810895301885027, "learning_rate": 5.661362697871235e-06, "loss": 0.4701, "step": 21940 }, { "epoch": 0.5122520420070011, "grad_norm": 1.470920848750823, "learning_rate": 5.657325153981841e-06, "loss": 0.4932, "step": 21950 }, { "epoch": 0.5124854142357059, "grad_norm": 1.7394769053017187, "learning_rate": 5.6532871738847705e-06, "loss": 0.4713, "step": 21960 }, { "epoch": 0.5127187864644107, "grad_norm": 1.6342400705880042, "learning_rate": 5.649248760259672e-06, "loss": 0.5042, "step": 21970 }, { "epoch": 0.5129521586931155, "grad_norm": 1.7925357928835146, "learning_rate": 5.645209915786477e-06, "loss": 0.4682, "step": 21980 }, { "epoch": 0.5131855309218203, "grad_norm": 1.661364846865691, "learning_rate": 5.641170643145404e-06, "loss": 0.4671, "step": 21990 }, { "epoch": 0.5134189031505251, "grad_norm": 1.482846358932161, "learning_rate": 5.637130945016953e-06, "loss": 0.4853, "step": 22000 }, { "epoch": 0.5136522753792299, "grad_norm": 1.5427509968312239, "learning_rate": 5.6330908240819116e-06, "loss": 0.4839, "step": 22010 }, { "epoch": 0.5138856476079346, "grad_norm": 1.7118082447413012, "learning_rate": 5.629050283021345e-06, "loss": 0.4489, "step": 22020 }, { "epoch": 0.5141190198366394, "grad_norm": 1.6945182946169395, "learning_rate": 5.625009324516597e-06, "loss": 0.4864, "step": 22030 }, { "epoch": 0.5143523920653442, "grad_norm": 1.802596284764978, "learning_rate": 5.620967951249287e-06, "loss": 0.4846, "step": 22040 }, { "epoch": 0.514585764294049, "grad_norm": 1.463009213289786, "learning_rate": 5.616926165901315e-06, "loss": 0.472, "step": 22050 }, { "epoch": 0.5148191365227538, "grad_norm": 1.4015311885217407, "learning_rate": 5.61288397115485e-06, "loss": 0.4564, "step": 22060 }, { "epoch": 0.5150525087514586, "grad_norm": 1.5694254452264047, "learning_rate": 5.608841369692331e-06, "loss": 0.449, "step": 22070 }, { "epoch": 0.5152858809801634, "grad_norm": 1.3957901876211767, "learning_rate": 5.604798364196472e-06, "loss": 0.4845, "step": 22080 }, { "epoch": 0.5155192532088682, "grad_norm": 1.591198236693787, "learning_rate": 5.600754957350252e-06, "loss": 0.4705, "step": 22090 }, { "epoch": 0.5157526254375729, "grad_norm": 1.7639218706783193, "learning_rate": 5.596711151836917e-06, "loss": 0.4878, "step": 22100 }, { "epoch": 0.5159859976662777, "grad_norm": 1.2692190653921553, "learning_rate": 5.592666950339979e-06, "loss": 0.4745, "step": 22110 }, { "epoch": 0.5162193698949825, "grad_norm": 1.5738037885986862, "learning_rate": 5.588622355543208e-06, "loss": 0.4915, "step": 22120 }, { "epoch": 0.5164527421236873, "grad_norm": 1.3215417975392998, "learning_rate": 5.584577370130643e-06, "loss": 0.4634, "step": 22130 }, { "epoch": 0.5166861143523921, "grad_norm": 1.6435747216028667, "learning_rate": 5.580531996786572e-06, "loss": 0.455, "step": 22140 }, { "epoch": 0.5169194865810969, "grad_norm": 1.6698587220136676, "learning_rate": 5.576486238195551e-06, "loss": 0.4665, "step": 22150 }, { "epoch": 0.5171528588098017, "grad_norm": 1.6009178273492233, "learning_rate": 5.572440097042384e-06, "loss": 0.4606, "step": 22160 }, { "epoch": 0.5173862310385065, "grad_norm": 1.7639166025540784, "learning_rate": 5.568393576012133e-06, "loss": 0.4741, "step": 22170 }, { "epoch": 0.5176196032672112, "grad_norm": 1.4473836679976355, "learning_rate": 5.564346677790109e-06, "loss": 0.4777, "step": 22180 }, { "epoch": 0.517852975495916, "grad_norm": 1.6965982051198687, "learning_rate": 5.560299405061874e-06, "loss": 0.483, "step": 22190 }, { "epoch": 0.5180863477246208, "grad_norm": 1.5796571128764796, "learning_rate": 5.556251760513243e-06, "loss": 0.501, "step": 22200 }, { "epoch": 0.5183197199533256, "grad_norm": 1.458252695475878, "learning_rate": 5.5522037468302725e-06, "loss": 0.4453, "step": 22210 }, { "epoch": 0.5185530921820304, "grad_norm": 1.5252033727864482, "learning_rate": 5.5481553666992636e-06, "loss": 0.4799, "step": 22220 }, { "epoch": 0.5187864644107352, "grad_norm": 1.4060902753081923, "learning_rate": 5.5441066228067655e-06, "loss": 0.4886, "step": 22230 }, { "epoch": 0.51901983663944, "grad_norm": 1.4760543505781656, "learning_rate": 5.540057517839565e-06, "loss": 0.4785, "step": 22240 }, { "epoch": 0.5192532088681447, "grad_norm": 1.5497950648278682, "learning_rate": 5.53600805448469e-06, "loss": 0.4709, "step": 22250 }, { "epoch": 0.5194865810968494, "grad_norm": 1.299606995963547, "learning_rate": 5.531958235429404e-06, "loss": 0.4787, "step": 22260 }, { "epoch": 0.5197199533255542, "grad_norm": 1.6267966543182801, "learning_rate": 5.527908063361211e-06, "loss": 0.4392, "step": 22270 }, { "epoch": 0.519953325554259, "grad_norm": 1.594023575659021, "learning_rate": 5.523857540967842e-06, "loss": 0.4916, "step": 22280 }, { "epoch": 0.5201866977829638, "grad_norm": 1.6641787869566778, "learning_rate": 5.519806670937269e-06, "loss": 0.4641, "step": 22290 }, { "epoch": 0.5204200700116686, "grad_norm": 1.400383403046175, "learning_rate": 5.515755455957689e-06, "loss": 0.4503, "step": 22300 }, { "epoch": 0.5206534422403734, "grad_norm": 1.4523824416278854, "learning_rate": 5.5117038987175306e-06, "loss": 0.4468, "step": 22310 }, { "epoch": 0.5208868144690781, "grad_norm": 1.820473617437514, "learning_rate": 5.5076520019054465e-06, "loss": 0.4789, "step": 22320 }, { "epoch": 0.5211201866977829, "grad_norm": 1.534087784811894, "learning_rate": 5.503599768210318e-06, "loss": 0.4574, "step": 22330 }, { "epoch": 0.5213535589264877, "grad_norm": 1.465515561499401, "learning_rate": 5.499547200321251e-06, "loss": 0.4637, "step": 22340 }, { "epoch": 0.5215869311551925, "grad_norm": 1.7190435130617356, "learning_rate": 5.4954943009275665e-06, "loss": 0.5079, "step": 22350 }, { "epoch": 0.5218203033838973, "grad_norm": 1.518724377454095, "learning_rate": 5.491441072718813e-06, "loss": 0.4689, "step": 22360 }, { "epoch": 0.5220536756126021, "grad_norm": 1.5409725162722756, "learning_rate": 5.487387518384753e-06, "loss": 0.46, "step": 22370 }, { "epoch": 0.5222870478413069, "grad_norm": 1.6192659161058165, "learning_rate": 5.483333640615366e-06, "loss": 0.4563, "step": 22380 }, { "epoch": 0.5225204200700116, "grad_norm": 1.6994607393328927, "learning_rate": 5.479279442100847e-06, "loss": 0.5102, "step": 22390 }, { "epoch": 0.5227537922987164, "grad_norm": 1.573565261440138, "learning_rate": 5.475224925531604e-06, "loss": 0.4698, "step": 22400 }, { "epoch": 0.5229871645274212, "grad_norm": 1.5269885588276546, "learning_rate": 5.4711700935982535e-06, "loss": 0.4542, "step": 22410 }, { "epoch": 0.523220536756126, "grad_norm": 1.4473100958635277, "learning_rate": 5.4671149489916276e-06, "loss": 0.4684, "step": 22420 }, { "epoch": 0.5234539089848308, "grad_norm": 3.1863901808987576, "learning_rate": 5.4630594944027575e-06, "loss": 0.4731, "step": 22430 }, { "epoch": 0.5236872812135356, "grad_norm": 1.5231544453174761, "learning_rate": 5.4590037325228854e-06, "loss": 0.4725, "step": 22440 }, { "epoch": 0.5239206534422404, "grad_norm": 1.8617140551374294, "learning_rate": 5.454947666043456e-06, "loss": 0.4939, "step": 22450 }, { "epoch": 0.5241540256709452, "grad_norm": 1.558357662909367, "learning_rate": 5.4508912976561175e-06, "loss": 0.4559, "step": 22460 }, { "epoch": 0.5243873978996499, "grad_norm": 1.6044593011953343, "learning_rate": 5.446834630052717e-06, "loss": 0.4424, "step": 22470 }, { "epoch": 0.5246207701283547, "grad_norm": 1.589440457579395, "learning_rate": 5.442777665925299e-06, "loss": 0.4675, "step": 22480 }, { "epoch": 0.5248541423570595, "grad_norm": 1.7153768188948273, "learning_rate": 5.438720407966109e-06, "loss": 0.4728, "step": 22490 }, { "epoch": 0.5250875145857643, "grad_norm": 1.6669382026460802, "learning_rate": 5.434662858867583e-06, "loss": 0.4814, "step": 22500 }, { "epoch": 0.5253208868144691, "grad_norm": 1.708817881369932, "learning_rate": 5.4306050213223516e-06, "loss": 0.4783, "step": 22510 }, { "epoch": 0.5255542590431739, "grad_norm": 1.6680033415034012, "learning_rate": 5.4265468980232385e-06, "loss": 0.4496, "step": 22520 }, { "epoch": 0.5257876312718787, "grad_norm": 1.5779032462805775, "learning_rate": 5.422488491663254e-06, "loss": 0.4405, "step": 22530 }, { "epoch": 0.5260210035005835, "grad_norm": 1.726325108512117, "learning_rate": 5.4184298049355985e-06, "loss": 0.4502, "step": 22540 }, { "epoch": 0.5262543757292882, "grad_norm": 1.8196503605918468, "learning_rate": 5.414370840533658e-06, "loss": 0.4739, "step": 22550 }, { "epoch": 0.526487747957993, "grad_norm": 1.7823349656506184, "learning_rate": 5.4103116011510045e-06, "loss": 0.4674, "step": 22560 }, { "epoch": 0.5267211201866978, "grad_norm": 1.8079985565983532, "learning_rate": 5.406252089481389e-06, "loss": 0.4843, "step": 22570 }, { "epoch": 0.5269544924154026, "grad_norm": 1.7368960056224652, "learning_rate": 5.402192308218742e-06, "loss": 0.4823, "step": 22580 }, { "epoch": 0.5271878646441074, "grad_norm": 1.6924959575780532, "learning_rate": 5.398132260057182e-06, "loss": 0.4642, "step": 22590 }, { "epoch": 0.5274212368728122, "grad_norm": 1.749665530969434, "learning_rate": 5.394071947690993e-06, "loss": 0.4702, "step": 22600 }, { "epoch": 0.527654609101517, "grad_norm": 1.5029937914206384, "learning_rate": 5.390011373814642e-06, "loss": 0.4495, "step": 22610 }, { "epoch": 0.5278879813302217, "grad_norm": 1.6249619300837297, "learning_rate": 5.385950541122766e-06, "loss": 0.4818, "step": 22620 }, { "epoch": 0.5281213535589265, "grad_norm": 1.2652973815637643, "learning_rate": 5.381889452310177e-06, "loss": 0.5136, "step": 22630 }, { "epoch": 0.5283547257876313, "grad_norm": 1.5131083428670113, "learning_rate": 5.377828110071853e-06, "loss": 0.484, "step": 22640 }, { "epoch": 0.5285880980163361, "grad_norm": 1.5547250709668583, "learning_rate": 5.373766517102944e-06, "loss": 0.4776, "step": 22650 }, { "epoch": 0.5288214702450409, "grad_norm": 1.4658458094879656, "learning_rate": 5.369704676098764e-06, "loss": 0.4875, "step": 22660 }, { "epoch": 0.5290548424737457, "grad_norm": 1.4565974695922352, "learning_rate": 5.3656425897547895e-06, "loss": 0.4542, "step": 22670 }, { "epoch": 0.5292882147024504, "grad_norm": 1.468114004982631, "learning_rate": 5.361580260766664e-06, "loss": 0.4619, "step": 22680 }, { "epoch": 0.5295215869311551, "grad_norm": 1.726787310035152, "learning_rate": 5.357517691830192e-06, "loss": 0.4896, "step": 22690 }, { "epoch": 0.5297549591598599, "grad_norm": 1.447327100031297, "learning_rate": 5.353454885641336e-06, "loss": 0.4841, "step": 22700 }, { "epoch": 0.5299883313885647, "grad_norm": 1.6673769809133667, "learning_rate": 5.349391844896212e-06, "loss": 0.4684, "step": 22710 }, { "epoch": 0.5302217036172695, "grad_norm": 1.5967095746350612, "learning_rate": 5.3453285722910975e-06, "loss": 0.4829, "step": 22720 }, { "epoch": 0.5304550758459743, "grad_norm": 1.7337467960052344, "learning_rate": 5.341265070522423e-06, "loss": 0.4953, "step": 22730 }, { "epoch": 0.5306884480746791, "grad_norm": 1.6461051041001826, "learning_rate": 5.3372013422867676e-06, "loss": 0.4784, "step": 22740 }, { "epoch": 0.5309218203033839, "grad_norm": 1.6032335934450548, "learning_rate": 5.333137390280863e-06, "loss": 0.4928, "step": 22750 }, { "epoch": 0.5311551925320886, "grad_norm": 1.6978700121332442, "learning_rate": 5.329073217201589e-06, "loss": 0.4579, "step": 22760 }, { "epoch": 0.5313885647607934, "grad_norm": 1.528267894229959, "learning_rate": 5.325008825745975e-06, "loss": 0.4523, "step": 22770 }, { "epoch": 0.5316219369894982, "grad_norm": 1.660413186932253, "learning_rate": 5.320944218611188e-06, "loss": 0.4724, "step": 22780 }, { "epoch": 0.531855309218203, "grad_norm": 1.578380349914356, "learning_rate": 5.316879398494547e-06, "loss": 0.4705, "step": 22790 }, { "epoch": 0.5320886814469078, "grad_norm": 1.6733841757361585, "learning_rate": 5.3128143680935055e-06, "loss": 0.4672, "step": 22800 }, { "epoch": 0.5323220536756126, "grad_norm": 1.595641258884535, "learning_rate": 5.30874913010566e-06, "loss": 0.4527, "step": 22810 }, { "epoch": 0.5325554259043174, "grad_norm": 1.5183593834889073, "learning_rate": 5.304683687228745e-06, "loss": 0.5056, "step": 22820 }, { "epoch": 0.5327887981330222, "grad_norm": 1.7115537224468993, "learning_rate": 5.300618042160626e-06, "loss": 0.4659, "step": 22830 }, { "epoch": 0.5330221703617269, "grad_norm": 1.8948103716594056, "learning_rate": 5.296552197599312e-06, "loss": 0.4766, "step": 22840 }, { "epoch": 0.5332555425904317, "grad_norm": 1.8479181246704384, "learning_rate": 5.292486156242934e-06, "loss": 0.5126, "step": 22850 }, { "epoch": 0.5334889148191365, "grad_norm": 1.7199298142860238, "learning_rate": 5.288419920789761e-06, "loss": 0.4698, "step": 22860 }, { "epoch": 0.5337222870478413, "grad_norm": 1.6929652129841546, "learning_rate": 5.284353493938189e-06, "loss": 0.4633, "step": 22870 }, { "epoch": 0.5339556592765461, "grad_norm": 1.5723087724606362, "learning_rate": 5.280286878386739e-06, "loss": 0.4666, "step": 22880 }, { "epoch": 0.5341890315052509, "grad_norm": 1.124960109910678, "learning_rate": 5.276220076834058e-06, "loss": 0.4624, "step": 22890 }, { "epoch": 0.5344224037339557, "grad_norm": 1.6479909187227006, "learning_rate": 5.272153091978918e-06, "loss": 0.4765, "step": 22900 }, { "epoch": 0.5346557759626605, "grad_norm": 1.6091710388690068, "learning_rate": 5.268085926520211e-06, "loss": 0.4857, "step": 22910 }, { "epoch": 0.5348891481913652, "grad_norm": 1.5608380678865257, "learning_rate": 5.264018583156951e-06, "loss": 0.4903, "step": 22920 }, { "epoch": 0.53512252042007, "grad_norm": 1.4432201462774341, "learning_rate": 5.259951064588267e-06, "loss": 0.4594, "step": 22930 }, { "epoch": 0.5353558926487748, "grad_norm": 1.7082117654552411, "learning_rate": 5.255883373513405e-06, "loss": 0.4493, "step": 22940 }, { "epoch": 0.5355892648774796, "grad_norm": 1.4517314519141749, "learning_rate": 5.251815512631728e-06, "loss": 0.4552, "step": 22950 }, { "epoch": 0.5358226371061844, "grad_norm": 1.7998029062143441, "learning_rate": 5.247747484642709e-06, "loss": 0.468, "step": 22960 }, { "epoch": 0.5360560093348892, "grad_norm": 1.4824941511906633, "learning_rate": 5.243679292245934e-06, "loss": 0.468, "step": 22970 }, { "epoch": 0.536289381563594, "grad_norm": 1.6473523656340006, "learning_rate": 5.2396109381410965e-06, "loss": 0.4792, "step": 22980 }, { "epoch": 0.5365227537922987, "grad_norm": 1.6487463145141081, "learning_rate": 5.235542425027996e-06, "loss": 0.4719, "step": 22990 }, { "epoch": 0.5367561260210035, "grad_norm": 1.4240247152009902, "learning_rate": 5.231473755606541e-06, "loss": 0.4362, "step": 23000 }, { "epoch": 0.5369894982497083, "grad_norm": 1.8033807084126678, "learning_rate": 5.227404932576742e-06, "loss": 0.4697, "step": 23010 }, { "epoch": 0.5372228704784131, "grad_norm": 1.5802211444838505, "learning_rate": 5.223335958638711e-06, "loss": 0.4696, "step": 23020 }, { "epoch": 0.5374562427071179, "grad_norm": 1.4987694826063351, "learning_rate": 5.219266836492661e-06, "loss": 0.4209, "step": 23030 }, { "epoch": 0.5376896149358227, "grad_norm": 1.6330833239865312, "learning_rate": 5.215197568838902e-06, "loss": 0.496, "step": 23040 }, { "epoch": 0.5379229871645275, "grad_norm": 1.5647613436402998, "learning_rate": 5.211128158377844e-06, "loss": 0.4579, "step": 23050 }, { "epoch": 0.5381563593932323, "grad_norm": 1.464501036177597, "learning_rate": 5.207058607809986e-06, "loss": 0.4712, "step": 23060 }, { "epoch": 0.538389731621937, "grad_norm": 1.8415902106876652, "learning_rate": 5.202988919835925e-06, "loss": 0.4606, "step": 23070 }, { "epoch": 0.5386231038506418, "grad_norm": 1.8310620896620153, "learning_rate": 5.1989190971563475e-06, "loss": 0.4722, "step": 23080 }, { "epoch": 0.5388564760793465, "grad_norm": 2.0553393712739547, "learning_rate": 5.194849142472029e-06, "loss": 0.4839, "step": 23090 }, { "epoch": 0.5390898483080513, "grad_norm": 1.4368303353848906, "learning_rate": 5.190779058483832e-06, "loss": 0.4594, "step": 23100 }, { "epoch": 0.5393232205367561, "grad_norm": 1.712270401827161, "learning_rate": 5.186708847892709e-06, "loss": 0.4651, "step": 23110 }, { "epoch": 0.5395565927654609, "grad_norm": 1.3137596217903174, "learning_rate": 5.182638513399688e-06, "loss": 0.4587, "step": 23120 }, { "epoch": 0.5397899649941656, "grad_norm": 1.571825307681819, "learning_rate": 5.17856805770589e-06, "loss": 0.4667, "step": 23130 }, { "epoch": 0.5400233372228704, "grad_norm": 1.792916687180707, "learning_rate": 5.174497483512506e-06, "loss": 0.4576, "step": 23140 }, { "epoch": 0.5402567094515752, "grad_norm": 1.6309939410912757, "learning_rate": 5.170426793520813e-06, "loss": 0.4749, "step": 23150 }, { "epoch": 0.54049008168028, "grad_norm": 1.7962392937986755, "learning_rate": 5.166355990432164e-06, "loss": 0.4882, "step": 23160 }, { "epoch": 0.5407234539089848, "grad_norm": 1.8480451567236553, "learning_rate": 5.162285076947982e-06, "loss": 0.5046, "step": 23170 }, { "epoch": 0.5409568261376896, "grad_norm": 1.5754938476220788, "learning_rate": 5.158214055769768e-06, "loss": 0.4905, "step": 23180 }, { "epoch": 0.5411901983663944, "grad_norm": 1.8566571993377459, "learning_rate": 5.154142929599094e-06, "loss": 0.4586, "step": 23190 }, { "epoch": 0.5414235705950992, "grad_norm": 1.5117488201523068, "learning_rate": 5.150071701137601e-06, "loss": 0.4708, "step": 23200 }, { "epoch": 0.5416569428238039, "grad_norm": 1.7389311427663823, "learning_rate": 5.146000373086997e-06, "loss": 0.4396, "step": 23210 }, { "epoch": 0.5418903150525087, "grad_norm": 1.5244369042804715, "learning_rate": 5.141928948149055e-06, "loss": 0.4519, "step": 23220 }, { "epoch": 0.5421236872812135, "grad_norm": 1.341357102003452, "learning_rate": 5.137857429025618e-06, "loss": 0.47, "step": 23230 }, { "epoch": 0.5423570595099183, "grad_norm": 1.4900651194749397, "learning_rate": 5.133785818418584e-06, "loss": 0.459, "step": 23240 }, { "epoch": 0.5425904317386231, "grad_norm": 1.6910040912225863, "learning_rate": 5.129714119029918e-06, "loss": 0.4725, "step": 23250 }, { "epoch": 0.5428238039673279, "grad_norm": 1.365508388911062, "learning_rate": 5.12564233356164e-06, "loss": 0.4678, "step": 23260 }, { "epoch": 0.5430571761960327, "grad_norm": 1.6940146340136801, "learning_rate": 5.121570464715828e-06, "loss": 0.4619, "step": 23270 }, { "epoch": 0.5432905484247375, "grad_norm": 2.025718708021807, "learning_rate": 5.1174985151946155e-06, "loss": 0.4637, "step": 23280 }, { "epoch": 0.5435239206534422, "grad_norm": 1.5615748234571103, "learning_rate": 5.113426487700193e-06, "loss": 0.4724, "step": 23290 }, { "epoch": 0.543757292882147, "grad_norm": 1.5165002497142201, "learning_rate": 5.1093543849347935e-06, "loss": 0.4437, "step": 23300 }, { "epoch": 0.5439906651108518, "grad_norm": 1.50182710357171, "learning_rate": 5.105282209600712e-06, "loss": 0.4702, "step": 23310 }, { "epoch": 0.5442240373395566, "grad_norm": 1.508064529819712, "learning_rate": 5.10120996440028e-06, "loss": 0.4835, "step": 23320 }, { "epoch": 0.5444574095682614, "grad_norm": 1.4741598020901148, "learning_rate": 5.097137652035885e-06, "loss": 0.4826, "step": 23330 }, { "epoch": 0.5446907817969662, "grad_norm": 1.614030038649043, "learning_rate": 5.093065275209954e-06, "loss": 0.4524, "step": 23340 }, { "epoch": 0.544924154025671, "grad_norm": 1.6834588214264365, "learning_rate": 5.088992836624958e-06, "loss": 0.4899, "step": 23350 }, { "epoch": 0.5451575262543757, "grad_norm": 1.5194784712185807, "learning_rate": 5.084920338983407e-06, "loss": 0.4718, "step": 23360 }, { "epoch": 0.5453908984830805, "grad_norm": 1.6213459779583703, "learning_rate": 5.080847784987853e-06, "loss": 0.4498, "step": 23370 }, { "epoch": 0.5456242707117853, "grad_norm": 1.5925317115807425, "learning_rate": 5.076775177340886e-06, "loss": 0.456, "step": 23380 }, { "epoch": 0.5458576429404901, "grad_norm": 1.7430549020420483, "learning_rate": 5.072702518745127e-06, "loss": 0.4705, "step": 23390 }, { "epoch": 0.5460910151691949, "grad_norm": 1.439264858054237, "learning_rate": 5.068629811903236e-06, "loss": 0.4458, "step": 23400 }, { "epoch": 0.5463243873978997, "grad_norm": 4.275903448146921, "learning_rate": 5.064557059517903e-06, "loss": 0.4827, "step": 23410 }, { "epoch": 0.5465577596266045, "grad_norm": 1.5681964620821334, "learning_rate": 5.060484264291846e-06, "loss": 0.4682, "step": 23420 }, { "epoch": 0.5467911318553093, "grad_norm": 1.5860054446236078, "learning_rate": 5.056411428927814e-06, "loss": 0.4655, "step": 23430 }, { "epoch": 0.547024504084014, "grad_norm": 1.5871674024192854, "learning_rate": 5.052338556128584e-06, "loss": 0.4785, "step": 23440 }, { "epoch": 0.5472578763127188, "grad_norm": 1.5644294732575381, "learning_rate": 5.048265648596954e-06, "loss": 0.472, "step": 23450 }, { "epoch": 0.5474912485414236, "grad_norm": 1.5463499301622468, "learning_rate": 5.044192709035747e-06, "loss": 0.4849, "step": 23460 }, { "epoch": 0.5477246207701284, "grad_norm": 2.4304067293877316, "learning_rate": 5.0401197401478075e-06, "loss": 0.4938, "step": 23470 }, { "epoch": 0.5479579929988332, "grad_norm": 1.5877859345344467, "learning_rate": 5.036046744636003e-06, "loss": 0.4695, "step": 23480 }, { "epoch": 0.548191365227538, "grad_norm": 1.8904892480990292, "learning_rate": 5.031973725203207e-06, "loss": 0.4251, "step": 23490 }, { "epoch": 0.5484247374562428, "grad_norm": 1.468755985044099, "learning_rate": 5.0279006845523215e-06, "loss": 0.4487, "step": 23500 }, { "epoch": 0.5486581096849474, "grad_norm": 1.6162822034341573, "learning_rate": 5.023827625386259e-06, "loss": 0.4598, "step": 23510 }, { "epoch": 0.5488914819136522, "grad_norm": 1.579241221803778, "learning_rate": 5.019754550407941e-06, "loss": 0.469, "step": 23520 }, { "epoch": 0.549124854142357, "grad_norm": 1.7465287705467385, "learning_rate": 5.0156814623202996e-06, "loss": 0.4679, "step": 23530 }, { "epoch": 0.5493582263710618, "grad_norm": 1.8096064227678115, "learning_rate": 5.011608363826281e-06, "loss": 0.464, "step": 23540 }, { "epoch": 0.5495915985997666, "grad_norm": 1.3505624693986769, "learning_rate": 5.007535257628836e-06, "loss": 0.4672, "step": 23550 }, { "epoch": 0.5498249708284714, "grad_norm": 1.3665553035499416, "learning_rate": 5.0034621464309155e-06, "loss": 0.453, "step": 23560 }, { "epoch": 0.5500583430571762, "grad_norm": 1.987744271042441, "learning_rate": 4.999389032935479e-06, "loss": 0.4923, "step": 23570 }, { "epoch": 0.550291715285881, "grad_norm": 1.53363387237399, "learning_rate": 4.995315919845487e-06, "loss": 0.4445, "step": 23580 }, { "epoch": 0.5505250875145857, "grad_norm": 1.489531043127412, "learning_rate": 4.991242809863899e-06, "loss": 0.453, "step": 23590 }, { "epoch": 0.5507584597432905, "grad_norm": 1.481370255183648, "learning_rate": 4.987169705693671e-06, "loss": 0.457, "step": 23600 }, { "epoch": 0.5509918319719953, "grad_norm": 1.843621518691417, "learning_rate": 4.9830966100377565e-06, "loss": 0.454, "step": 23610 }, { "epoch": 0.5512252042007001, "grad_norm": 1.5679454529278412, "learning_rate": 4.979023525599104e-06, "loss": 0.4895, "step": 23620 }, { "epoch": 0.5514585764294049, "grad_norm": 1.2556915580129644, "learning_rate": 4.974950455080655e-06, "loss": 0.4374, "step": 23630 }, { "epoch": 0.5516919486581097, "grad_norm": 1.723922732716804, "learning_rate": 4.970877401185338e-06, "loss": 0.4691, "step": 23640 }, { "epoch": 0.5519253208868145, "grad_norm": 1.561601855911686, "learning_rate": 4.966804366616075e-06, "loss": 0.4691, "step": 23650 }, { "epoch": 0.5521586931155192, "grad_norm": 1.5569850413615371, "learning_rate": 4.962731354075774e-06, "loss": 0.4571, "step": 23660 }, { "epoch": 0.552392065344224, "grad_norm": 1.807702295418931, "learning_rate": 4.958658366267324e-06, "loss": 0.4443, "step": 23670 }, { "epoch": 0.5526254375729288, "grad_norm": 1.5990412519160002, "learning_rate": 4.954585405893605e-06, "loss": 0.4671, "step": 23680 }, { "epoch": 0.5528588098016336, "grad_norm": 1.4881956963620024, "learning_rate": 4.9505124756574745e-06, "loss": 0.4412, "step": 23690 }, { "epoch": 0.5530921820303384, "grad_norm": 1.4738816016100358, "learning_rate": 4.946439578261769e-06, "loss": 0.4538, "step": 23700 }, { "epoch": 0.5533255542590432, "grad_norm": 1.717594356008125, "learning_rate": 4.942366716409304e-06, "loss": 0.4804, "step": 23710 }, { "epoch": 0.553558926487748, "grad_norm": 1.4676836145567695, "learning_rate": 4.938293892802875e-06, "loss": 0.4688, "step": 23720 }, { "epoch": 0.5537922987164527, "grad_norm": 1.6937774377638402, "learning_rate": 4.934221110145249e-06, "loss": 0.4827, "step": 23730 }, { "epoch": 0.5540256709451575, "grad_norm": 1.5558625600381615, "learning_rate": 4.930148371139163e-06, "loss": 0.4424, "step": 23740 }, { "epoch": 0.5542590431738623, "grad_norm": 1.7574151591787481, "learning_rate": 4.926075678487331e-06, "loss": 0.4745, "step": 23750 }, { "epoch": 0.5544924154025671, "grad_norm": 1.8811331135127178, "learning_rate": 4.922003034892433e-06, "loss": 0.4646, "step": 23760 }, { "epoch": 0.5547257876312719, "grad_norm": 1.5972388881275594, "learning_rate": 4.917930443057115e-06, "loss": 0.4879, "step": 23770 }, { "epoch": 0.5549591598599767, "grad_norm": 1.500395163925958, "learning_rate": 4.9138579056839916e-06, "loss": 0.4989, "step": 23780 }, { "epoch": 0.5551925320886815, "grad_norm": 1.5035495132752261, "learning_rate": 4.909785425475641e-06, "loss": 0.4513, "step": 23790 }, { "epoch": 0.5554259043173863, "grad_norm": 1.600011568758636, "learning_rate": 4.905713005134603e-06, "loss": 0.4716, "step": 23800 }, { "epoch": 0.555659276546091, "grad_norm": 1.6030212158067048, "learning_rate": 4.901640647363375e-06, "loss": 0.4676, "step": 23810 }, { "epoch": 0.5558926487747958, "grad_norm": 1.5928330953767704, "learning_rate": 4.897568354864416e-06, "loss": 0.4699, "step": 23820 }, { "epoch": 0.5561260210035006, "grad_norm": 1.5245874167853577, "learning_rate": 4.893496130340142e-06, "loss": 0.4742, "step": 23830 }, { "epoch": 0.5563593932322054, "grad_norm": 1.7676292779485796, "learning_rate": 4.8894239764929205e-06, "loss": 0.4879, "step": 23840 }, { "epoch": 0.5565927654609102, "grad_norm": 1.5732348189234455, "learning_rate": 4.885351896025076e-06, "loss": 0.454, "step": 23850 }, { "epoch": 0.556826137689615, "grad_norm": 1.5897368717292573, "learning_rate": 4.881279891638882e-06, "loss": 0.4504, "step": 23860 }, { "epoch": 0.5570595099183198, "grad_norm": 2.7200030195784537, "learning_rate": 4.877207966036564e-06, "loss": 0.4623, "step": 23870 }, { "epoch": 0.5572928821470245, "grad_norm": 1.6361015518286963, "learning_rate": 4.87313612192029e-06, "loss": 0.4807, "step": 23880 }, { "epoch": 0.5575262543757293, "grad_norm": 1.7126799781329092, "learning_rate": 4.86906436199218e-06, "loss": 0.4497, "step": 23890 }, { "epoch": 0.5577596266044341, "grad_norm": 1.5993618070352715, "learning_rate": 4.864992688954295e-06, "loss": 0.4723, "step": 23900 }, { "epoch": 0.5579929988331389, "grad_norm": 1.5272754092202938, "learning_rate": 4.860921105508637e-06, "loss": 0.4511, "step": 23910 }, { "epoch": 0.5582263710618437, "grad_norm": 1.6159284616009666, "learning_rate": 4.85684961435715e-06, "loss": 0.4307, "step": 23920 }, { "epoch": 0.5584597432905484, "grad_norm": 1.664088834263047, "learning_rate": 4.852778218201722e-06, "loss": 0.4441, "step": 23930 }, { "epoch": 0.5586931155192532, "grad_norm": 1.646347135533881, "learning_rate": 4.848706919744166e-06, "loss": 0.4924, "step": 23940 }, { "epoch": 0.558926487747958, "grad_norm": 1.7797190977867199, "learning_rate": 4.844635721686244e-06, "loss": 0.4565, "step": 23950 }, { "epoch": 0.5591598599766627, "grad_norm": 1.8689168968068295, "learning_rate": 4.840564626729639e-06, "loss": 0.4756, "step": 23960 }, { "epoch": 0.5593932322053675, "grad_norm": 2.310598049190883, "learning_rate": 4.836493637575973e-06, "loss": 0.4541, "step": 23970 }, { "epoch": 0.5596266044340723, "grad_norm": 1.4993235834279781, "learning_rate": 4.832422756926798e-06, "loss": 0.4688, "step": 23980 }, { "epoch": 0.5598599766627771, "grad_norm": 1.5837244580464487, "learning_rate": 4.828351987483588e-06, "loss": 0.4583, "step": 23990 }, { "epoch": 0.5600933488914819, "grad_norm": 1.6247051389627192, "learning_rate": 4.824281331947749e-06, "loss": 0.4555, "step": 24000 }, { "epoch": 0.5603267211201867, "grad_norm": 1.6705833984630343, "learning_rate": 4.8202107930206105e-06, "loss": 0.4555, "step": 24010 }, { "epoch": 0.5605600933488915, "grad_norm": 1.4806108806002667, "learning_rate": 4.816140373403424e-06, "loss": 0.4413, "step": 24020 }, { "epoch": 0.5607934655775962, "grad_norm": 1.7852365318138337, "learning_rate": 4.812070075797359e-06, "loss": 0.4787, "step": 24030 }, { "epoch": 0.561026837806301, "grad_norm": 1.742281643779623, "learning_rate": 4.807999902903507e-06, "loss": 0.4613, "step": 24040 }, { "epoch": 0.5612602100350058, "grad_norm": 1.5915129393246932, "learning_rate": 4.8039298574228796e-06, "loss": 0.4862, "step": 24050 }, { "epoch": 0.5614935822637106, "grad_norm": 1.902826156060591, "learning_rate": 4.799859942056396e-06, "loss": 0.478, "step": 24060 }, { "epoch": 0.5617269544924154, "grad_norm": 1.473170552236364, "learning_rate": 4.7957901595048955e-06, "loss": 0.467, "step": 24070 }, { "epoch": 0.5619603267211202, "grad_norm": 1.3525135137648132, "learning_rate": 4.7917205124691284e-06, "loss": 0.4709, "step": 24080 }, { "epoch": 0.562193698949825, "grad_norm": 1.5197375734904879, "learning_rate": 4.7876510036497515e-06, "loss": 0.4691, "step": 24090 }, { "epoch": 0.5624270711785297, "grad_norm": 1.5506376832713291, "learning_rate": 4.7835816357473344e-06, "loss": 0.4554, "step": 24100 }, { "epoch": 0.5626604434072345, "grad_norm": 1.5124858604421196, "learning_rate": 4.7795124114623495e-06, "loss": 0.4639, "step": 24110 }, { "epoch": 0.5628938156359393, "grad_norm": 1.511781556477571, "learning_rate": 4.775443333495178e-06, "loss": 0.4583, "step": 24120 }, { "epoch": 0.5631271878646441, "grad_norm": 1.5687528791990752, "learning_rate": 4.771374404546098e-06, "loss": 0.4681, "step": 24130 }, { "epoch": 0.5633605600933489, "grad_norm": 1.4735926665753887, "learning_rate": 4.767305627315295e-06, "loss": 0.4653, "step": 24140 }, { "epoch": 0.5635939323220537, "grad_norm": 1.8340931069273279, "learning_rate": 4.763237004502852e-06, "loss": 0.4471, "step": 24150 }, { "epoch": 0.5638273045507585, "grad_norm": 1.4285544902157163, "learning_rate": 4.759168538808744e-06, "loss": 0.4883, "step": 24160 }, { "epoch": 0.5640606767794633, "grad_norm": 1.748478730104028, "learning_rate": 4.75510023293285e-06, "loss": 0.4734, "step": 24170 }, { "epoch": 0.564294049008168, "grad_norm": 1.5976346687322565, "learning_rate": 4.751032089574939e-06, "loss": 0.468, "step": 24180 }, { "epoch": 0.5645274212368728, "grad_norm": 1.900681469044529, "learning_rate": 4.7469641114346714e-06, "loss": 0.4674, "step": 24190 }, { "epoch": 0.5647607934655776, "grad_norm": 1.4198689477701392, "learning_rate": 4.742896301211599e-06, "loss": 0.479, "step": 24200 }, { "epoch": 0.5649941656942824, "grad_norm": 1.8334218085695255, "learning_rate": 4.738828661605163e-06, "loss": 0.4514, "step": 24210 }, { "epoch": 0.5652275379229872, "grad_norm": 1.6643340917848943, "learning_rate": 4.7347611953146904e-06, "loss": 0.4561, "step": 24220 }, { "epoch": 0.565460910151692, "grad_norm": 1.3491933398141218, "learning_rate": 4.730693905039392e-06, "loss": 0.448, "step": 24230 }, { "epoch": 0.5656942823803968, "grad_norm": 1.6785155554290334, "learning_rate": 4.726626793478364e-06, "loss": 0.4408, "step": 24240 }, { "epoch": 0.5659276546091015, "grad_norm": 1.4839933235886957, "learning_rate": 4.722559863330584e-06, "loss": 0.4453, "step": 24250 }, { "epoch": 0.5661610268378063, "grad_norm": 1.4236151804069148, "learning_rate": 4.718493117294907e-06, "loss": 0.4459, "step": 24260 }, { "epoch": 0.5663943990665111, "grad_norm": 1.7810190029888315, "learning_rate": 4.714426558070067e-06, "loss": 0.4846, "step": 24270 }, { "epoch": 0.5666277712952159, "grad_norm": 1.8317841062937483, "learning_rate": 4.710360188354675e-06, "loss": 0.4491, "step": 24280 }, { "epoch": 0.5668611435239207, "grad_norm": 1.7563798542973947, "learning_rate": 4.706294010847217e-06, "loss": 0.474, "step": 24290 }, { "epoch": 0.5670945157526255, "grad_norm": 1.5222115762619495, "learning_rate": 4.702228028246045e-06, "loss": 0.4663, "step": 24300 }, { "epoch": 0.5673278879813303, "grad_norm": 1.4173481245510524, "learning_rate": 4.698162243249392e-06, "loss": 0.4338, "step": 24310 }, { "epoch": 0.5675612602100351, "grad_norm": 1.7577984749975863, "learning_rate": 4.694096658555351e-06, "loss": 0.4734, "step": 24320 }, { "epoch": 0.5677946324387398, "grad_norm": 1.6389131673408148, "learning_rate": 4.690031276861886e-06, "loss": 0.4545, "step": 24330 }, { "epoch": 0.5680280046674445, "grad_norm": 1.7707520658532747, "learning_rate": 4.685966100866826e-06, "loss": 0.4656, "step": 24340 }, { "epoch": 0.5682613768961493, "grad_norm": 1.722931639436991, "learning_rate": 4.681901133267864e-06, "loss": 0.4556, "step": 24350 }, { "epoch": 0.5684947491248541, "grad_norm": 1.5622773920763928, "learning_rate": 4.677836376762553e-06, "loss": 0.4461, "step": 24360 }, { "epoch": 0.5687281213535589, "grad_norm": 1.473236108755791, "learning_rate": 4.6737718340483064e-06, "loss": 0.4568, "step": 24370 }, { "epoch": 0.5689614935822637, "grad_norm": 1.4373809727676155, "learning_rate": 4.669707507822398e-06, "loss": 0.4788, "step": 24380 }, { "epoch": 0.5691948658109685, "grad_norm": 3.0049765208555708, "learning_rate": 4.665643400781953e-06, "loss": 0.4733, "step": 24390 }, { "epoch": 0.5694282380396732, "grad_norm": 1.562817658103015, "learning_rate": 4.661579515623957e-06, "loss": 0.4707, "step": 24400 }, { "epoch": 0.569661610268378, "grad_norm": 1.4143648771700505, "learning_rate": 4.657515855045246e-06, "loss": 0.4485, "step": 24410 }, { "epoch": 0.5698949824970828, "grad_norm": 1.884685915358589, "learning_rate": 4.6534524217425045e-06, "loss": 0.4717, "step": 24420 }, { "epoch": 0.5701283547257876, "grad_norm": 1.3504258728828684, "learning_rate": 4.64938921841227e-06, "loss": 0.4656, "step": 24430 }, { "epoch": 0.5703617269544924, "grad_norm": 1.3881903990661928, "learning_rate": 4.6453262477509255e-06, "loss": 0.4417, "step": 24440 }, { "epoch": 0.5705950991831972, "grad_norm": 1.504395895044618, "learning_rate": 4.641263512454697e-06, "loss": 0.4796, "step": 24450 }, { "epoch": 0.570828471411902, "grad_norm": 1.6329802461818408, "learning_rate": 4.6372010152196604e-06, "loss": 0.4379, "step": 24460 }, { "epoch": 0.5710618436406067, "grad_norm": 1.6597017038386754, "learning_rate": 4.633138758741731e-06, "loss": 0.4616, "step": 24470 }, { "epoch": 0.5712952158693115, "grad_norm": 1.4300630143108388, "learning_rate": 4.629076745716659e-06, "loss": 0.4541, "step": 24480 }, { "epoch": 0.5715285880980163, "grad_norm": 1.481019375931577, "learning_rate": 4.625014978840041e-06, "loss": 0.4633, "step": 24490 }, { "epoch": 0.5717619603267211, "grad_norm": 1.5130537887957174, "learning_rate": 4.620953460807307e-06, "loss": 0.4871, "step": 24500 }, { "epoch": 0.5719953325554259, "grad_norm": 1.438349771006125, "learning_rate": 4.616892194313721e-06, "loss": 0.4685, "step": 24510 }, { "epoch": 0.5722287047841307, "grad_norm": 1.5700194302224404, "learning_rate": 4.61283118205438e-06, "loss": 0.4804, "step": 24520 }, { "epoch": 0.5724620770128355, "grad_norm": 1.6708956449368215, "learning_rate": 4.608770426724215e-06, "loss": 0.4931, "step": 24530 }, { "epoch": 0.5726954492415403, "grad_norm": 1.8654501353617265, "learning_rate": 4.604709931017984e-06, "loss": 0.4626, "step": 24540 }, { "epoch": 0.572928821470245, "grad_norm": 1.5206902158267037, "learning_rate": 4.600649697630272e-06, "loss": 0.4681, "step": 24550 }, { "epoch": 0.5731621936989498, "grad_norm": 1.5376243320000753, "learning_rate": 4.596589729255493e-06, "loss": 0.4518, "step": 24560 }, { "epoch": 0.5733955659276546, "grad_norm": 1.4805904093804083, "learning_rate": 4.592530028587883e-06, "loss": 0.4484, "step": 24570 }, { "epoch": 0.5736289381563594, "grad_norm": 1.5620025743857981, "learning_rate": 4.5884705983215e-06, "loss": 0.4594, "step": 24580 }, { "epoch": 0.5738623103850642, "grad_norm": 1.5198643633001152, "learning_rate": 4.584411441150225e-06, "loss": 0.4817, "step": 24590 }, { "epoch": 0.574095682613769, "grad_norm": 1.6459552178594627, "learning_rate": 4.580352559767753e-06, "loss": 0.4844, "step": 24600 }, { "epoch": 0.5743290548424738, "grad_norm": 1.8267045867723308, "learning_rate": 4.5762939568676025e-06, "loss": 0.4543, "step": 24610 }, { "epoch": 0.5745624270711785, "grad_norm": 1.8373529575067606, "learning_rate": 4.572235635143102e-06, "loss": 0.4503, "step": 24620 }, { "epoch": 0.5747957992998833, "grad_norm": 1.6039537260587786, "learning_rate": 4.5681775972873946e-06, "loss": 0.4625, "step": 24630 }, { "epoch": 0.5750291715285881, "grad_norm": 1.578379568291334, "learning_rate": 4.5641198459934375e-06, "loss": 0.4994, "step": 24640 }, { "epoch": 0.5752625437572929, "grad_norm": 1.5818736800786894, "learning_rate": 4.560062383953994e-06, "loss": 0.4703, "step": 24650 }, { "epoch": 0.5754959159859977, "grad_norm": 1.4841228943891707, "learning_rate": 4.556005213861639e-06, "loss": 0.4635, "step": 24660 }, { "epoch": 0.5757292882147025, "grad_norm": 1.6230702394315635, "learning_rate": 4.5519483384087495e-06, "loss": 0.48, "step": 24670 }, { "epoch": 0.5759626604434073, "grad_norm": 1.7415071431599836, "learning_rate": 4.547891760287512e-06, "loss": 0.4812, "step": 24680 }, { "epoch": 0.5761960326721121, "grad_norm": 1.5923704398719152, "learning_rate": 4.543835482189911e-06, "loss": 0.4768, "step": 24690 }, { "epoch": 0.5764294049008168, "grad_norm": 1.575333271391385, "learning_rate": 4.5397795068077375e-06, "loss": 0.4627, "step": 24700 }, { "epoch": 0.5766627771295216, "grad_norm": 1.6093319473091643, "learning_rate": 4.535723836832573e-06, "loss": 0.4306, "step": 24710 }, { "epoch": 0.5768961493582264, "grad_norm": 1.573829635563104, "learning_rate": 4.531668474955805e-06, "loss": 0.4594, "step": 24720 }, { "epoch": 0.5771295215869312, "grad_norm": 1.650627852706191, "learning_rate": 4.5276134238686125e-06, "loss": 0.455, "step": 24730 }, { "epoch": 0.577362893815636, "grad_norm": 1.8086739923211406, "learning_rate": 4.5235586862619674e-06, "loss": 0.4531, "step": 24740 }, { "epoch": 0.5775962660443408, "grad_norm": 1.4816910062123247, "learning_rate": 4.5195042648266354e-06, "loss": 0.4764, "step": 24750 }, { "epoch": 0.5778296382730455, "grad_norm": 1.4403829821300134, "learning_rate": 4.5154501622531736e-06, "loss": 0.45, "step": 24760 }, { "epoch": 0.5780630105017502, "grad_norm": 1.39553700574931, "learning_rate": 4.5113963812319236e-06, "loss": 0.4845, "step": 24770 }, { "epoch": 0.578296382730455, "grad_norm": 1.3409314186603756, "learning_rate": 4.507342924453017e-06, "loss": 0.433, "step": 24780 }, { "epoch": 0.5785297549591598, "grad_norm": 1.648560978256431, "learning_rate": 4.503289794606368e-06, "loss": 0.4904, "step": 24790 }, { "epoch": 0.5787631271878646, "grad_norm": 1.8034430000434685, "learning_rate": 4.4992369943816775e-06, "loss": 0.4603, "step": 24800 }, { "epoch": 0.5789964994165694, "grad_norm": 1.5881332626682931, "learning_rate": 4.495184526468422e-06, "loss": 0.4514, "step": 24810 }, { "epoch": 0.5792298716452742, "grad_norm": 1.650597739217934, "learning_rate": 4.491132393555861e-06, "loss": 0.4619, "step": 24820 }, { "epoch": 0.579463243873979, "grad_norm": 1.520587404019462, "learning_rate": 4.487080598333034e-06, "loss": 0.4383, "step": 24830 }, { "epoch": 0.5796966161026837, "grad_norm": 1.5046062775859712, "learning_rate": 4.4830291434887495e-06, "loss": 0.5002, "step": 24840 }, { "epoch": 0.5799299883313885, "grad_norm": 1.5206791681506235, "learning_rate": 4.478978031711597e-06, "loss": 0.4945, "step": 24850 }, { "epoch": 0.5801633605600933, "grad_norm": 1.600055388128127, "learning_rate": 4.4749272656899365e-06, "loss": 0.4612, "step": 24860 }, { "epoch": 0.5803967327887981, "grad_norm": 1.5233287246910636, "learning_rate": 4.470876848111894e-06, "loss": 0.4918, "step": 24870 }, { "epoch": 0.5806301050175029, "grad_norm": 1.5477285000196674, "learning_rate": 4.46682678166537e-06, "loss": 0.4675, "step": 24880 }, { "epoch": 0.5808634772462077, "grad_norm": 1.64580597652462, "learning_rate": 4.4627770690380305e-06, "loss": 0.4642, "step": 24890 }, { "epoch": 0.5810968494749125, "grad_norm": 1.6686969992305563, "learning_rate": 4.458727712917305e-06, "loss": 0.48, "step": 24900 }, { "epoch": 0.5813302217036173, "grad_norm": 1.6165993660714992, "learning_rate": 4.454678715990389e-06, "loss": 0.4573, "step": 24910 }, { "epoch": 0.581563593932322, "grad_norm": 1.5835665097136422, "learning_rate": 4.450630080944235e-06, "loss": 0.4765, "step": 24920 }, { "epoch": 0.5817969661610268, "grad_norm": 1.5421167834464935, "learning_rate": 4.446581810465563e-06, "loss": 0.4694, "step": 24930 }, { "epoch": 0.5820303383897316, "grad_norm": 1.8925235120344592, "learning_rate": 4.442533907240843e-06, "loss": 0.4426, "step": 24940 }, { "epoch": 0.5822637106184364, "grad_norm": 1.759554408451435, "learning_rate": 4.438486373956306e-06, "loss": 0.4589, "step": 24950 }, { "epoch": 0.5824970828471412, "grad_norm": 1.7943627645639826, "learning_rate": 4.4344392132979355e-06, "loss": 0.4812, "step": 24960 }, { "epoch": 0.582730455075846, "grad_norm": 1.5717608681669317, "learning_rate": 4.430392427951471e-06, "loss": 0.4577, "step": 24970 }, { "epoch": 0.5829638273045508, "grad_norm": 1.3805547810488774, "learning_rate": 4.426346020602398e-06, "loss": 0.4774, "step": 24980 }, { "epoch": 0.5831971995332556, "grad_norm": 1.4172718792722951, "learning_rate": 4.422299993935954e-06, "loss": 0.473, "step": 24990 }, { "epoch": 0.5834305717619603, "grad_norm": 1.4015556171613959, "learning_rate": 4.418254350637126e-06, "loss": 0.4341, "step": 25000 }, { "epoch": 0.5836639439906651, "grad_norm": 1.8866699946756604, "learning_rate": 4.4142090933906404e-06, "loss": 0.4552, "step": 25010 }, { "epoch": 0.5838973162193699, "grad_norm": 1.3131257629005284, "learning_rate": 4.410164224880973e-06, "loss": 0.467, "step": 25020 }, { "epoch": 0.5841306884480747, "grad_norm": 1.567191436981886, "learning_rate": 4.4061197477923415e-06, "loss": 0.4541, "step": 25030 }, { "epoch": 0.5843640606767795, "grad_norm": 1.3363754676239306, "learning_rate": 4.402075664808699e-06, "loss": 0.4695, "step": 25040 }, { "epoch": 0.5845974329054843, "grad_norm": 1.6308624089031527, "learning_rate": 4.398031978613742e-06, "loss": 0.4716, "step": 25050 }, { "epoch": 0.5848308051341891, "grad_norm": 2.0581627246082985, "learning_rate": 4.393988691890902e-06, "loss": 0.4843, "step": 25060 }, { "epoch": 0.5850641773628938, "grad_norm": 1.7245095887237911, "learning_rate": 4.389945807323346e-06, "loss": 0.478, "step": 25070 }, { "epoch": 0.5852975495915986, "grad_norm": 1.5103255624203615, "learning_rate": 4.385903327593971e-06, "loss": 0.4569, "step": 25080 }, { "epoch": 0.5855309218203034, "grad_norm": 1.3725143065184413, "learning_rate": 4.381861255385409e-06, "loss": 0.4993, "step": 25090 }, { "epoch": 0.5857642940490082, "grad_norm": 1.6529387461591938, "learning_rate": 4.3778195933800206e-06, "loss": 0.4878, "step": 25100 }, { "epoch": 0.585997666277713, "grad_norm": 1.327703088389983, "learning_rate": 4.373778344259892e-06, "loss": 0.4317, "step": 25110 }, { "epoch": 0.5862310385064178, "grad_norm": 1.4136938124011924, "learning_rate": 4.369737510706842e-06, "loss": 0.468, "step": 25120 }, { "epoch": 0.5864644107351226, "grad_norm": 1.6037174231667541, "learning_rate": 4.365697095402404e-06, "loss": 0.446, "step": 25130 }, { "epoch": 0.5866977829638274, "grad_norm": 1.400617120779616, "learning_rate": 4.3616571010278405e-06, "loss": 0.4457, "step": 25140 }, { "epoch": 0.5869311551925321, "grad_norm": 1.4894768276213803, "learning_rate": 4.357617530264134e-06, "loss": 0.4371, "step": 25150 }, { "epoch": 0.5871645274212369, "grad_norm": 1.5940407665941498, "learning_rate": 4.353578385791983e-06, "loss": 0.4775, "step": 25160 }, { "epoch": 0.5873978996499416, "grad_norm": 2.0598132920244607, "learning_rate": 4.349539670291807e-06, "loss": 0.4882, "step": 25170 }, { "epoch": 0.5876312718786464, "grad_norm": 1.587247346070053, "learning_rate": 4.345501386443738e-06, "loss": 0.4666, "step": 25180 }, { "epoch": 0.5878646441073512, "grad_norm": 1.432514842770501, "learning_rate": 4.341463536927621e-06, "loss": 0.4585, "step": 25190 }, { "epoch": 0.588098016336056, "grad_norm": 1.6148873759571194, "learning_rate": 4.3374261244230155e-06, "loss": 0.4536, "step": 25200 }, { "epoch": 0.5883313885647607, "grad_norm": 1.5291840255389202, "learning_rate": 4.3333891516091895e-06, "loss": 0.4419, "step": 25210 }, { "epoch": 0.5885647607934655, "grad_norm": 1.7924166565059874, "learning_rate": 4.32935262116512e-06, "loss": 0.4703, "step": 25220 }, { "epoch": 0.5887981330221703, "grad_norm": 1.4386706183362519, "learning_rate": 4.325316535769487e-06, "loss": 0.434, "step": 25230 }, { "epoch": 0.5890315052508751, "grad_norm": 1.799420798134498, "learning_rate": 4.321280898100681e-06, "loss": 0.4397, "step": 25240 }, { "epoch": 0.5892648774795799, "grad_norm": 1.652884517596566, "learning_rate": 4.317245710836791e-06, "loss": 0.4743, "step": 25250 }, { "epoch": 0.5894982497082847, "grad_norm": 1.9070914779958892, "learning_rate": 4.313210976655609e-06, "loss": 0.419, "step": 25260 }, { "epoch": 0.5897316219369895, "grad_norm": 1.4775165567398658, "learning_rate": 4.309176698234625e-06, "loss": 0.4596, "step": 25270 }, { "epoch": 0.5899649941656943, "grad_norm": 1.5885354188415124, "learning_rate": 4.305142878251026e-06, "loss": 0.4522, "step": 25280 }, { "epoch": 0.590198366394399, "grad_norm": 1.1882515781731464, "learning_rate": 4.301109519381701e-06, "loss": 0.4671, "step": 25290 }, { "epoch": 0.5904317386231038, "grad_norm": 1.3780264582302377, "learning_rate": 4.2970766243032215e-06, "loss": 0.4258, "step": 25300 }, { "epoch": 0.5906651108518086, "grad_norm": 1.6304777464877231, "learning_rate": 4.293044195691862e-06, "loss": 0.4596, "step": 25310 }, { "epoch": 0.5908984830805134, "grad_norm": 1.384629248484643, "learning_rate": 4.2890122362235835e-06, "loss": 0.4606, "step": 25320 }, { "epoch": 0.5911318553092182, "grad_norm": 1.4120350511871194, "learning_rate": 4.284980748574032e-06, "loss": 0.4656, "step": 25330 }, { "epoch": 0.591365227537923, "grad_norm": 1.5967596106859643, "learning_rate": 4.2809497354185466e-06, "loss": 0.4771, "step": 25340 }, { "epoch": 0.5915985997666278, "grad_norm": 1.7012316458138221, "learning_rate": 4.276919199432148e-06, "loss": 0.441, "step": 25350 }, { "epoch": 0.5918319719953326, "grad_norm": 1.5850434649701992, "learning_rate": 4.2728891432895424e-06, "loss": 0.4588, "step": 25360 }, { "epoch": 0.5920653442240373, "grad_norm": 1.6205308954593125, "learning_rate": 4.268859569665113e-06, "loss": 0.4776, "step": 25370 }, { "epoch": 0.5922987164527421, "grad_norm": 1.3278030581322635, "learning_rate": 4.264830481232929e-06, "loss": 0.4434, "step": 25380 }, { "epoch": 0.5925320886814469, "grad_norm": 1.7806579670253915, "learning_rate": 4.260801880666734e-06, "loss": 0.4528, "step": 25390 }, { "epoch": 0.5927654609101517, "grad_norm": 1.4323841296146942, "learning_rate": 4.256773770639946e-06, "loss": 0.4607, "step": 25400 }, { "epoch": 0.5929988331388565, "grad_norm": 1.476123835772557, "learning_rate": 4.252746153825662e-06, "loss": 0.432, "step": 25410 }, { "epoch": 0.5932322053675613, "grad_norm": 1.8955337601629625, "learning_rate": 4.2487190328966505e-06, "loss": 0.4867, "step": 25420 }, { "epoch": 0.5934655775962661, "grad_norm": 1.731145217269464, "learning_rate": 4.244692410525347e-06, "loss": 0.4716, "step": 25430 }, { "epoch": 0.5936989498249708, "grad_norm": 1.6356646758682114, "learning_rate": 4.240666289383862e-06, "loss": 0.4745, "step": 25440 }, { "epoch": 0.5939323220536756, "grad_norm": 1.8070475091592397, "learning_rate": 4.23664067214397e-06, "loss": 0.4845, "step": 25450 }, { "epoch": 0.5941656942823804, "grad_norm": 1.5930046870508587, "learning_rate": 4.23261556147711e-06, "loss": 0.4265, "step": 25460 }, { "epoch": 0.5943990665110852, "grad_norm": 1.7870972362478121, "learning_rate": 4.228590960054388e-06, "loss": 0.4683, "step": 25470 }, { "epoch": 0.59463243873979, "grad_norm": 1.4899273624151153, "learning_rate": 4.224566870546569e-06, "loss": 0.4724, "step": 25480 }, { "epoch": 0.5948658109684948, "grad_norm": 2.2664924371109545, "learning_rate": 4.220543295624081e-06, "loss": 0.4248, "step": 25490 }, { "epoch": 0.5950991831971996, "grad_norm": 1.5120190777477591, "learning_rate": 4.21652023795701e-06, "loss": 0.4876, "step": 25500 }, { "epoch": 0.5953325554259044, "grad_norm": 1.5543360038335852, "learning_rate": 4.212497700215097e-06, "loss": 0.4696, "step": 25510 }, { "epoch": 0.5955659276546091, "grad_norm": 1.6011304008419194, "learning_rate": 4.208475685067739e-06, "loss": 0.4554, "step": 25520 }, { "epoch": 0.5957992998833139, "grad_norm": 1.74355393927657, "learning_rate": 4.204454195183987e-06, "loss": 0.4502, "step": 25530 }, { "epoch": 0.5960326721120187, "grad_norm": 1.5046723487475608, "learning_rate": 4.200433233232543e-06, "loss": 0.4771, "step": 25540 }, { "epoch": 0.5962660443407235, "grad_norm": 1.7282855693762316, "learning_rate": 4.1964128018817566e-06, "loss": 0.4583, "step": 25550 }, { "epoch": 0.5964994165694283, "grad_norm": 1.3110278865282035, "learning_rate": 4.192392903799627e-06, "loss": 0.4643, "step": 25560 }, { "epoch": 0.5967327887981331, "grad_norm": 1.5896798983084104, "learning_rate": 4.188373541653804e-06, "loss": 0.4386, "step": 25570 }, { "epoch": 0.5969661610268379, "grad_norm": 1.8343398227444527, "learning_rate": 4.184354718111571e-06, "loss": 0.4544, "step": 25580 }, { "epoch": 0.5971995332555425, "grad_norm": 1.6496465530398905, "learning_rate": 4.180336435839863e-06, "loss": 0.453, "step": 25590 }, { "epoch": 0.5974329054842473, "grad_norm": 1.8229578790904595, "learning_rate": 4.176318697505255e-06, "loss": 0.4616, "step": 25600 }, { "epoch": 0.5976662777129521, "grad_norm": 1.6685239894283508, "learning_rate": 4.172301505773958e-06, "loss": 0.4712, "step": 25610 }, { "epoch": 0.5978996499416569, "grad_norm": 1.8639195448021602, "learning_rate": 4.168284863311819e-06, "loss": 0.4692, "step": 25620 }, { "epoch": 0.5981330221703617, "grad_norm": 1.54702111605793, "learning_rate": 4.164268772784325e-06, "loss": 0.4894, "step": 25630 }, { "epoch": 0.5983663943990665, "grad_norm": 1.8528844391346522, "learning_rate": 4.160253236856596e-06, "loss": 0.4507, "step": 25640 }, { "epoch": 0.5985997666277713, "grad_norm": 1.5654571355815645, "learning_rate": 4.156238258193379e-06, "loss": 0.4756, "step": 25650 }, { "epoch": 0.598833138856476, "grad_norm": 1.5135640307655136, "learning_rate": 4.1522238394590576e-06, "loss": 0.4443, "step": 25660 }, { "epoch": 0.5990665110851808, "grad_norm": 1.586553965576647, "learning_rate": 4.148209983317639e-06, "loss": 0.4524, "step": 25670 }, { "epoch": 0.5992998833138856, "grad_norm": 1.5047704652419405, "learning_rate": 4.144196692432762e-06, "loss": 0.4526, "step": 25680 }, { "epoch": 0.5995332555425904, "grad_norm": 1.8667191654730866, "learning_rate": 4.1401839694676845e-06, "loss": 0.4729, "step": 25690 }, { "epoch": 0.5997666277712952, "grad_norm": 1.6688022247230216, "learning_rate": 4.136171817085291e-06, "loss": 0.4696, "step": 25700 }, { "epoch": 0.6, "grad_norm": 1.7519859184584265, "learning_rate": 4.132160237948089e-06, "loss": 0.4638, "step": 25710 }, { "epoch": 0.6002333722287048, "grad_norm": 1.3183669807421328, "learning_rate": 4.128149234718199e-06, "loss": 0.4579, "step": 25720 }, { "epoch": 0.6004667444574096, "grad_norm": 1.6055185681247894, "learning_rate": 4.124138810057368e-06, "loss": 0.4291, "step": 25730 }, { "epoch": 0.6007001166861143, "grad_norm": 1.6476986920046979, "learning_rate": 4.120128966626952e-06, "loss": 0.4699, "step": 25740 }, { "epoch": 0.6009334889148191, "grad_norm": 1.5482304261536297, "learning_rate": 4.1161197070879244e-06, "loss": 0.465, "step": 25750 }, { "epoch": 0.6011668611435239, "grad_norm": 1.867814226675801, "learning_rate": 4.112111034100871e-06, "loss": 0.4673, "step": 25760 }, { "epoch": 0.6014002333722287, "grad_norm": 1.5604242382742046, "learning_rate": 4.108102950325988e-06, "loss": 0.4516, "step": 25770 }, { "epoch": 0.6016336056009335, "grad_norm": 1.7620777562734222, "learning_rate": 4.104095458423082e-06, "loss": 0.4531, "step": 25780 }, { "epoch": 0.6018669778296383, "grad_norm": 1.9118992603108973, "learning_rate": 4.1000885610515625e-06, "loss": 0.5055, "step": 25790 }, { "epoch": 0.6021003500583431, "grad_norm": 1.5978392944324324, "learning_rate": 4.09608226087045e-06, "loss": 0.4565, "step": 25800 }, { "epoch": 0.6023337222870478, "grad_norm": 1.5217720705108346, "learning_rate": 4.092076560538366e-06, "loss": 0.4833, "step": 25810 }, { "epoch": 0.6025670945157526, "grad_norm": 1.8115659385023348, "learning_rate": 4.0880714627135345e-06, "loss": 0.4584, "step": 25820 }, { "epoch": 0.6028004667444574, "grad_norm": 1.816641868879112, "learning_rate": 4.084066970053777e-06, "loss": 0.4855, "step": 25830 }, { "epoch": 0.6030338389731622, "grad_norm": 1.5388945104322203, "learning_rate": 4.080063085216517e-06, "loss": 0.4363, "step": 25840 }, { "epoch": 0.603267211201867, "grad_norm": 1.558244770826234, "learning_rate": 4.076059810858772e-06, "loss": 0.4421, "step": 25850 }, { "epoch": 0.6035005834305718, "grad_norm": 1.527535197269356, "learning_rate": 4.072057149637158e-06, "loss": 0.4761, "step": 25860 }, { "epoch": 0.6037339556592766, "grad_norm": 1.7401822254436694, "learning_rate": 4.06805510420788e-06, "loss": 0.4819, "step": 25870 }, { "epoch": 0.6039673278879814, "grad_norm": 1.56659944434314, "learning_rate": 4.064053677226737e-06, "loss": 0.4715, "step": 25880 }, { "epoch": 0.6042007001166861, "grad_norm": 1.470561074489095, "learning_rate": 4.060052871349116e-06, "loss": 0.4379, "step": 25890 }, { "epoch": 0.6044340723453909, "grad_norm": 1.7746162588535916, "learning_rate": 4.056052689229995e-06, "loss": 0.4639, "step": 25900 }, { "epoch": 0.6046674445740957, "grad_norm": 1.6457862232455414, "learning_rate": 4.052053133523931e-06, "loss": 0.4641, "step": 25910 }, { "epoch": 0.6049008168028005, "grad_norm": 1.4150368912718356, "learning_rate": 4.048054206885074e-06, "loss": 0.4623, "step": 25920 }, { "epoch": 0.6051341890315053, "grad_norm": 1.5885049590427507, "learning_rate": 4.044055911967152e-06, "loss": 0.4639, "step": 25930 }, { "epoch": 0.6053675612602101, "grad_norm": 1.729448722516464, "learning_rate": 4.040058251423472e-06, "loss": 0.4982, "step": 25940 }, { "epoch": 0.6056009334889149, "grad_norm": 1.671488046145837, "learning_rate": 4.036061227906924e-06, "loss": 0.4319, "step": 25950 }, { "epoch": 0.6058343057176196, "grad_norm": 1.5585637697184933, "learning_rate": 4.032064844069975e-06, "loss": 0.4529, "step": 25960 }, { "epoch": 0.6060676779463244, "grad_norm": 1.499528084530496, "learning_rate": 4.028069102564662e-06, "loss": 0.4649, "step": 25970 }, { "epoch": 0.6063010501750292, "grad_norm": 1.7760716099659064, "learning_rate": 4.024074006042603e-06, "loss": 0.4635, "step": 25980 }, { "epoch": 0.606534422403734, "grad_norm": 1.545151866659962, "learning_rate": 4.020079557154984e-06, "loss": 0.4524, "step": 25990 }, { "epoch": 0.6067677946324388, "grad_norm": 1.6309793870889437, "learning_rate": 4.016085758552563e-06, "loss": 0.4449, "step": 26000 }, { "epoch": 0.6070011668611435, "grad_norm": 1.7129102617936491, "learning_rate": 4.012092612885663e-06, "loss": 0.4879, "step": 26010 }, { "epoch": 0.6072345390898483, "grad_norm": 1.6825732822462818, "learning_rate": 4.008100122804178e-06, "loss": 0.4452, "step": 26020 }, { "epoch": 0.607467911318553, "grad_norm": 1.591762403379104, "learning_rate": 4.004108290957565e-06, "loss": 0.4717, "step": 26030 }, { "epoch": 0.6077012835472578, "grad_norm": 1.6951281590258385, "learning_rate": 4.000117119994844e-06, "loss": 0.4701, "step": 26040 }, { "epoch": 0.6079346557759626, "grad_norm": 1.7592878103389593, "learning_rate": 3.996126612564597e-06, "loss": 0.4519, "step": 26050 }, { "epoch": 0.6081680280046674, "grad_norm": 2.076407337015625, "learning_rate": 3.992136771314964e-06, "loss": 0.4856, "step": 26060 }, { "epoch": 0.6084014002333722, "grad_norm": 1.66851871194818, "learning_rate": 3.988147598893648e-06, "loss": 0.4495, "step": 26070 }, { "epoch": 0.608634772462077, "grad_norm": 1.6449301327111083, "learning_rate": 3.9841590979479e-06, "loss": 0.4616, "step": 26080 }, { "epoch": 0.6088681446907818, "grad_norm": 1.5658683562927145, "learning_rate": 3.980171271124533e-06, "loss": 0.4844, "step": 26090 }, { "epoch": 0.6091015169194866, "grad_norm": 1.714887165065019, "learning_rate": 3.976184121069908e-06, "loss": 0.4467, "step": 26100 }, { "epoch": 0.6093348891481913, "grad_norm": 1.4448235027100642, "learning_rate": 3.972197650429938e-06, "loss": 0.4322, "step": 26110 }, { "epoch": 0.6095682613768961, "grad_norm": 1.491471119696059, "learning_rate": 3.968211861850087e-06, "loss": 0.4643, "step": 26120 }, { "epoch": 0.6098016336056009, "grad_norm": 1.6170791035050085, "learning_rate": 3.964226757975366e-06, "loss": 0.4479, "step": 26130 }, { "epoch": 0.6100350058343057, "grad_norm": 1.708824992326832, "learning_rate": 3.960242341450327e-06, "loss": 0.4705, "step": 26140 }, { "epoch": 0.6102683780630105, "grad_norm": 1.5333116315266537, "learning_rate": 3.956258614919071e-06, "loss": 0.481, "step": 26150 }, { "epoch": 0.6105017502917153, "grad_norm": 1.8210075049604635, "learning_rate": 3.952275581025241e-06, "loss": 0.4402, "step": 26160 }, { "epoch": 0.6107351225204201, "grad_norm": 1.935369242045982, "learning_rate": 3.948293242412019e-06, "loss": 0.4529, "step": 26170 }, { "epoch": 0.6109684947491248, "grad_norm": 1.5804610619306654, "learning_rate": 3.944311601722122e-06, "loss": 0.4598, "step": 26180 }, { "epoch": 0.6112018669778296, "grad_norm": 1.568672758697002, "learning_rate": 3.940330661597813e-06, "loss": 0.4675, "step": 26190 }, { "epoch": 0.6114352392065344, "grad_norm": 1.6069919626942886, "learning_rate": 3.936350424680879e-06, "loss": 0.4873, "step": 26200 }, { "epoch": 0.6116686114352392, "grad_norm": 1.518552127408209, "learning_rate": 3.932370893612647e-06, "loss": 0.4794, "step": 26210 }, { "epoch": 0.611901983663944, "grad_norm": 1.5145282988477453, "learning_rate": 3.928392071033977e-06, "loss": 0.4649, "step": 26220 }, { "epoch": 0.6121353558926488, "grad_norm": 1.7559497070566903, "learning_rate": 3.924413959585254e-06, "loss": 0.447, "step": 26230 }, { "epoch": 0.6123687281213536, "grad_norm": 1.5764742539116825, "learning_rate": 3.920436561906394e-06, "loss": 0.5116, "step": 26240 }, { "epoch": 0.6126021003500584, "grad_norm": 1.6199482714582467, "learning_rate": 3.916459880636838e-06, "loss": 0.4475, "step": 26250 }, { "epoch": 0.6128354725787631, "grad_norm": 1.7483005747820792, "learning_rate": 3.912483918415552e-06, "loss": 0.4388, "step": 26260 }, { "epoch": 0.6130688448074679, "grad_norm": 1.3718426736935156, "learning_rate": 3.908508677881028e-06, "loss": 0.436, "step": 26270 }, { "epoch": 0.6133022170361727, "grad_norm": 1.6786475138251695, "learning_rate": 3.904534161671274e-06, "loss": 0.4754, "step": 26280 }, { "epoch": 0.6135355892648775, "grad_norm": 1.636102672085474, "learning_rate": 3.900560372423817e-06, "loss": 0.4556, "step": 26290 }, { "epoch": 0.6137689614935823, "grad_norm": 1.5653866358373452, "learning_rate": 3.896587312775708e-06, "loss": 0.4718, "step": 26300 }, { "epoch": 0.6140023337222871, "grad_norm": 1.6059284108296301, "learning_rate": 3.892614985363508e-06, "loss": 0.4714, "step": 26310 }, { "epoch": 0.6142357059509919, "grad_norm": 1.8315289630324931, "learning_rate": 3.888643392823296e-06, "loss": 0.4433, "step": 26320 }, { "epoch": 0.6144690781796966, "grad_norm": 1.5530719533141917, "learning_rate": 3.884672537790658e-06, "loss": 0.4382, "step": 26330 }, { "epoch": 0.6147024504084014, "grad_norm": 1.668729313326983, "learning_rate": 3.8807024229006955e-06, "loss": 0.4672, "step": 26340 }, { "epoch": 0.6149358226371062, "grad_norm": 1.5792342032707956, "learning_rate": 3.876733050788018e-06, "loss": 0.4476, "step": 26350 }, { "epoch": 0.615169194865811, "grad_norm": 1.5458787816776016, "learning_rate": 3.872764424086741e-06, "loss": 0.4574, "step": 26360 }, { "epoch": 0.6154025670945158, "grad_norm": 1.6240212223719952, "learning_rate": 3.868796545430484e-06, "loss": 0.469, "step": 26370 }, { "epoch": 0.6156359393232206, "grad_norm": 1.6939173471586013, "learning_rate": 3.864829417452373e-06, "loss": 0.4369, "step": 26380 }, { "epoch": 0.6158693115519254, "grad_norm": 1.5114012927366938, "learning_rate": 3.860863042785037e-06, "loss": 0.4575, "step": 26390 }, { "epoch": 0.6161026837806302, "grad_norm": 1.6792087421773976, "learning_rate": 3.856897424060599e-06, "loss": 0.462, "step": 26400 }, { "epoch": 0.6163360560093349, "grad_norm": 1.6004500855708368, "learning_rate": 3.852932563910684e-06, "loss": 0.459, "step": 26410 }, { "epoch": 0.6165694282380396, "grad_norm": 1.8108771322698314, "learning_rate": 3.848968464966416e-06, "loss": 0.4619, "step": 26420 }, { "epoch": 0.6168028004667444, "grad_norm": 1.7280333314772673, "learning_rate": 3.84500512985841e-06, "loss": 0.4451, "step": 26430 }, { "epoch": 0.6170361726954492, "grad_norm": 1.5203419405706213, "learning_rate": 3.841042561216776e-06, "loss": 0.4565, "step": 26440 }, { "epoch": 0.617269544924154, "grad_norm": 1.616241987167004, "learning_rate": 3.8370807616711144e-06, "loss": 0.5064, "step": 26450 }, { "epoch": 0.6175029171528588, "grad_norm": 1.3977392401699205, "learning_rate": 3.833119733850518e-06, "loss": 0.4588, "step": 26460 }, { "epoch": 0.6177362893815636, "grad_norm": 1.611954345207514, "learning_rate": 3.829159480383561e-06, "loss": 0.4637, "step": 26470 }, { "epoch": 0.6179696616102683, "grad_norm": 1.732687459774861, "learning_rate": 3.825200003898312e-06, "loss": 0.4555, "step": 26480 }, { "epoch": 0.6182030338389731, "grad_norm": 1.5064975535645095, "learning_rate": 3.8212413070223184e-06, "loss": 0.5008, "step": 26490 }, { "epoch": 0.6184364060676779, "grad_norm": 1.5972935355728688, "learning_rate": 3.817283392382612e-06, "loss": 0.4338, "step": 26500 }, { "epoch": 0.6186697782963827, "grad_norm": 1.54623771106108, "learning_rate": 3.8133262626057045e-06, "loss": 0.4446, "step": 26510 }, { "epoch": 0.6189031505250875, "grad_norm": 1.8363069161286099, "learning_rate": 3.8093699203175895e-06, "loss": 0.4771, "step": 26520 }, { "epoch": 0.6191365227537923, "grad_norm": 1.5789226175287892, "learning_rate": 3.8054143681437335e-06, "loss": 0.4664, "step": 26530 }, { "epoch": 0.6193698949824971, "grad_norm": 1.6294224914261506, "learning_rate": 3.8014596087090826e-06, "loss": 0.4393, "step": 26540 }, { "epoch": 0.6196032672112018, "grad_norm": 1.8455630070404834, "learning_rate": 3.7975056446380553e-06, "loss": 0.4535, "step": 26550 }, { "epoch": 0.6198366394399066, "grad_norm": 1.5025577071714609, "learning_rate": 3.793552478554544e-06, "loss": 0.4439, "step": 26560 }, { "epoch": 0.6200700116686114, "grad_norm": 1.7328987347290679, "learning_rate": 3.7896001130819093e-06, "loss": 0.4426, "step": 26570 }, { "epoch": 0.6203033838973162, "grad_norm": 1.6291470938631052, "learning_rate": 3.785648550842979e-06, "loss": 0.4588, "step": 26580 }, { "epoch": 0.620536756126021, "grad_norm": 1.8602554353160619, "learning_rate": 3.781697794460052e-06, "loss": 0.4485, "step": 26590 }, { "epoch": 0.6207701283547258, "grad_norm": 1.7057031576170403, "learning_rate": 3.7777478465548887e-06, "loss": 0.4722, "step": 26600 }, { "epoch": 0.6210035005834306, "grad_norm": 1.692808829774122, "learning_rate": 3.7737987097487187e-06, "loss": 0.482, "step": 26610 }, { "epoch": 0.6212368728121354, "grad_norm": 1.378188111916514, "learning_rate": 3.7698503866622255e-06, "loss": 0.4418, "step": 26620 }, { "epoch": 0.6214702450408401, "grad_norm": 1.4703803777775613, "learning_rate": 3.7659028799155585e-06, "loss": 0.45, "step": 26630 }, { "epoch": 0.6217036172695449, "grad_norm": 1.744211374734605, "learning_rate": 3.761956192128323e-06, "loss": 0.4461, "step": 26640 }, { "epoch": 0.6219369894982497, "grad_norm": 1.484249736632367, "learning_rate": 3.7580103259195812e-06, "loss": 0.4209, "step": 26650 }, { "epoch": 0.6221703617269545, "grad_norm": 1.3599034036068243, "learning_rate": 3.75406528390785e-06, "loss": 0.4326, "step": 26660 }, { "epoch": 0.6224037339556593, "grad_norm": 1.8247253891206061, "learning_rate": 3.7501210687111008e-06, "loss": 0.4478, "step": 26670 }, { "epoch": 0.6226371061843641, "grad_norm": 1.8314276262829488, "learning_rate": 3.746177682946753e-06, "loss": 0.4784, "step": 26680 }, { "epoch": 0.6228704784130689, "grad_norm": 1.455822463400235, "learning_rate": 3.742235129231678e-06, "loss": 0.4456, "step": 26690 }, { "epoch": 0.6231038506417736, "grad_norm": 1.400063660304275, "learning_rate": 3.7382934101821954e-06, "loss": 0.4702, "step": 26700 }, { "epoch": 0.6233372228704784, "grad_norm": 1.5932912514149487, "learning_rate": 3.7343525284140716e-06, "loss": 0.4684, "step": 26710 }, { "epoch": 0.6235705950991832, "grad_norm": 1.7992994036167578, "learning_rate": 3.7304124865425124e-06, "loss": 0.4989, "step": 26720 }, { "epoch": 0.623803967327888, "grad_norm": 1.3479797647291156, "learning_rate": 3.726473287182172e-06, "loss": 0.4691, "step": 26730 }, { "epoch": 0.6240373395565928, "grad_norm": 1.5871718542282536, "learning_rate": 3.722534932947144e-06, "loss": 0.4519, "step": 26740 }, { "epoch": 0.6242707117852976, "grad_norm": 1.7290779369951113, "learning_rate": 3.7185974264509574e-06, "loss": 0.4739, "step": 26750 }, { "epoch": 0.6245040840140024, "grad_norm": 2.2954568246751395, "learning_rate": 3.714660770306584e-06, "loss": 0.4528, "step": 26760 }, { "epoch": 0.6247374562427072, "grad_norm": 1.652865834985576, "learning_rate": 3.710724967126429e-06, "loss": 0.4697, "step": 26770 }, { "epoch": 0.6249708284714119, "grad_norm": 1.499358350050315, "learning_rate": 3.7067900195223315e-06, "loss": 0.4672, "step": 26780 }, { "epoch": 0.6252042007001167, "grad_norm": 1.444981065741424, "learning_rate": 3.702855930105562e-06, "loss": 0.4843, "step": 26790 }, { "epoch": 0.6254375729288215, "grad_norm": 1.481926331871984, "learning_rate": 3.6989227014868224e-06, "loss": 0.4495, "step": 26800 }, { "epoch": 0.6256709451575263, "grad_norm": 1.5795715015368939, "learning_rate": 3.6949903362762444e-06, "loss": 0.4658, "step": 26810 }, { "epoch": 0.6259043173862311, "grad_norm": 1.4973525879297285, "learning_rate": 3.6910588370833842e-06, "loss": 0.4633, "step": 26820 }, { "epoch": 0.6261376896149359, "grad_norm": 1.607247649993037, "learning_rate": 3.6871282065172253e-06, "loss": 0.4685, "step": 26830 }, { "epoch": 0.6263710618436406, "grad_norm": 1.7087821409545587, "learning_rate": 3.6831984471861754e-06, "loss": 0.4588, "step": 26840 }, { "epoch": 0.6266044340723453, "grad_norm": 1.6955809929769876, "learning_rate": 3.6792695616980607e-06, "loss": 0.4701, "step": 26850 }, { "epoch": 0.6268378063010501, "grad_norm": 1.5007445019495544, "learning_rate": 3.675341552660131e-06, "loss": 0.4707, "step": 26860 }, { "epoch": 0.6270711785297549, "grad_norm": 1.504408952289226, "learning_rate": 3.671414422679053e-06, "loss": 0.4379, "step": 26870 }, { "epoch": 0.6273045507584597, "grad_norm": 1.4153483202842374, "learning_rate": 3.6674881743609103e-06, "loss": 0.4409, "step": 26880 }, { "epoch": 0.6275379229871645, "grad_norm": 1.5106677497722978, "learning_rate": 3.6635628103112007e-06, "loss": 0.4428, "step": 26890 }, { "epoch": 0.6277712952158693, "grad_norm": 1.6167250381990632, "learning_rate": 3.6596383331348363e-06, "loss": 0.4569, "step": 26900 }, { "epoch": 0.6280046674445741, "grad_norm": 1.7054102098322483, "learning_rate": 3.6557147454361407e-06, "loss": 0.4421, "step": 26910 }, { "epoch": 0.6282380396732788, "grad_norm": 1.628684327057359, "learning_rate": 3.651792049818846e-06, "loss": 0.4545, "step": 26920 }, { "epoch": 0.6284714119019836, "grad_norm": 1.714974767633596, "learning_rate": 3.647870248886093e-06, "loss": 0.4298, "step": 26930 }, { "epoch": 0.6287047841306884, "grad_norm": 1.599000426042323, "learning_rate": 3.64394934524043e-06, "loss": 0.4526, "step": 26940 }, { "epoch": 0.6289381563593932, "grad_norm": 1.6348320715201303, "learning_rate": 3.640029341483806e-06, "loss": 0.4393, "step": 26950 }, { "epoch": 0.629171528588098, "grad_norm": 1.5480527664016792, "learning_rate": 3.6361102402175787e-06, "loss": 0.4601, "step": 26960 }, { "epoch": 0.6294049008168028, "grad_norm": 1.7350691227616144, "learning_rate": 3.6321920440424995e-06, "loss": 0.4512, "step": 26970 }, { "epoch": 0.6296382730455076, "grad_norm": 1.7702454128947864, "learning_rate": 3.628274755558727e-06, "loss": 0.4319, "step": 26980 }, { "epoch": 0.6298716452742124, "grad_norm": 1.6092807476521929, "learning_rate": 3.624358377365812e-06, "loss": 0.4542, "step": 26990 }, { "epoch": 0.6301050175029171, "grad_norm": 1.571926203204574, "learning_rate": 3.620442912062705e-06, "loss": 0.4528, "step": 27000 }, { "epoch": 0.6303383897316219, "grad_norm": 1.6378597393947176, "learning_rate": 3.616528362247746e-06, "loss": 0.4711, "step": 27010 }, { "epoch": 0.6305717619603267, "grad_norm": 1.2201766007881933, "learning_rate": 3.612614730518671e-06, "loss": 0.4505, "step": 27020 }, { "epoch": 0.6308051341890315, "grad_norm": 1.6913007085497167, "learning_rate": 3.608702019472608e-06, "loss": 0.4524, "step": 27030 }, { "epoch": 0.6310385064177363, "grad_norm": 1.5195316429036405, "learning_rate": 3.604790231706069e-06, "loss": 0.4576, "step": 27040 }, { "epoch": 0.6312718786464411, "grad_norm": 1.8040840196295511, "learning_rate": 3.600879369814958e-06, "loss": 0.4267, "step": 27050 }, { "epoch": 0.6315052508751459, "grad_norm": 1.57364163390637, "learning_rate": 3.596969436394563e-06, "loss": 0.4301, "step": 27060 }, { "epoch": 0.6317386231038506, "grad_norm": 1.6227888974356828, "learning_rate": 3.5930604340395537e-06, "loss": 0.476, "step": 27070 }, { "epoch": 0.6319719953325554, "grad_norm": 1.610733052157478, "learning_rate": 3.5891523653439854e-06, "loss": 0.4516, "step": 27080 }, { "epoch": 0.6322053675612602, "grad_norm": 1.869626068723344, "learning_rate": 3.5852452329012928e-06, "loss": 0.4538, "step": 27090 }, { "epoch": 0.632438739789965, "grad_norm": 1.6273485455679586, "learning_rate": 3.5813390393042883e-06, "loss": 0.4554, "step": 27100 }, { "epoch": 0.6326721120186698, "grad_norm": 1.9529173948973, "learning_rate": 3.5774337871451612e-06, "loss": 0.476, "step": 27110 }, { "epoch": 0.6329054842473746, "grad_norm": 1.8850202209974274, "learning_rate": 3.5735294790154775e-06, "loss": 0.4799, "step": 27120 }, { "epoch": 0.6331388564760794, "grad_norm": 1.5664723885712681, "learning_rate": 3.5696261175061766e-06, "loss": 0.463, "step": 27130 }, { "epoch": 0.6333722287047842, "grad_norm": 1.5436376699416596, "learning_rate": 3.5657237052075666e-06, "loss": 0.4619, "step": 27140 }, { "epoch": 0.6336056009334889, "grad_norm": 1.7845579261555493, "learning_rate": 3.5618222447093293e-06, "loss": 0.4762, "step": 27150 }, { "epoch": 0.6338389731621937, "grad_norm": 1.7069800654866185, "learning_rate": 3.557921738600514e-06, "loss": 0.4554, "step": 27160 }, { "epoch": 0.6340723453908985, "grad_norm": 1.5931506128909283, "learning_rate": 3.554022189469537e-06, "loss": 0.4397, "step": 27170 }, { "epoch": 0.6343057176196033, "grad_norm": 1.5558341051378854, "learning_rate": 3.550123599904177e-06, "loss": 0.4664, "step": 27180 }, { "epoch": 0.6345390898483081, "grad_norm": 1.544351458194548, "learning_rate": 3.5462259724915788e-06, "loss": 0.4449, "step": 27190 }, { "epoch": 0.6347724620770129, "grad_norm": 1.4476146081455181, "learning_rate": 3.542329309818249e-06, "loss": 0.4581, "step": 27200 }, { "epoch": 0.6350058343057177, "grad_norm": 1.4937014550415189, "learning_rate": 3.53843361447005e-06, "loss": 0.4572, "step": 27210 }, { "epoch": 0.6352392065344225, "grad_norm": 1.697940574809226, "learning_rate": 3.5345388890322065e-06, "loss": 0.4787, "step": 27220 }, { "epoch": 0.6354725787631272, "grad_norm": 1.6302057245776698, "learning_rate": 3.530645136089298e-06, "loss": 0.4777, "step": 27230 }, { "epoch": 0.635705950991832, "grad_norm": 1.4283958565124018, "learning_rate": 3.5267523582252583e-06, "loss": 0.4486, "step": 27240 }, { "epoch": 0.6359393232205367, "grad_norm": 1.5710221275289475, "learning_rate": 3.5228605580233743e-06, "loss": 0.4622, "step": 27250 }, { "epoch": 0.6361726954492415, "grad_norm": 1.4690261554101955, "learning_rate": 3.518969738066285e-06, "loss": 0.4566, "step": 27260 }, { "epoch": 0.6364060676779463, "grad_norm": 1.417787192008187, "learning_rate": 3.5150799009359786e-06, "loss": 0.484, "step": 27270 }, { "epoch": 0.6366394399066511, "grad_norm": 1.8527687552249295, "learning_rate": 3.511191049213789e-06, "loss": 0.4522, "step": 27280 }, { "epoch": 0.6368728121353558, "grad_norm": 1.7680991985807637, "learning_rate": 3.5073031854803984e-06, "loss": 0.4696, "step": 27290 }, { "epoch": 0.6371061843640606, "grad_norm": 1.4931033271992278, "learning_rate": 3.503416312315835e-06, "loss": 0.4694, "step": 27300 }, { "epoch": 0.6373395565927654, "grad_norm": 1.691486994885885, "learning_rate": 3.499530432299464e-06, "loss": 0.4484, "step": 27310 }, { "epoch": 0.6375729288214702, "grad_norm": 1.2051475641599887, "learning_rate": 3.495645548009999e-06, "loss": 0.4703, "step": 27320 }, { "epoch": 0.637806301050175, "grad_norm": 1.564630650203105, "learning_rate": 3.4917616620254836e-06, "loss": 0.485, "step": 27330 }, { "epoch": 0.6380396732788798, "grad_norm": 1.856718948761169, "learning_rate": 3.487878776923308e-06, "loss": 0.4825, "step": 27340 }, { "epoch": 0.6382730455075846, "grad_norm": 1.6951482086835103, "learning_rate": 3.4839968952801937e-06, "loss": 0.4729, "step": 27350 }, { "epoch": 0.6385064177362894, "grad_norm": 1.670035617332232, "learning_rate": 3.480116019672196e-06, "loss": 0.4609, "step": 27360 }, { "epoch": 0.6387397899649941, "grad_norm": 1.5690386074582863, "learning_rate": 3.4762361526747034e-06, "loss": 0.4667, "step": 27370 }, { "epoch": 0.6389731621936989, "grad_norm": 2.6640914858031692, "learning_rate": 3.4723572968624368e-06, "loss": 0.4491, "step": 27380 }, { "epoch": 0.6392065344224037, "grad_norm": 2.0312209216816592, "learning_rate": 3.4684794548094447e-06, "loss": 0.4344, "step": 27390 }, { "epoch": 0.6394399066511085, "grad_norm": 1.5774539651205801, "learning_rate": 3.4646026290890998e-06, "loss": 0.4587, "step": 27400 }, { "epoch": 0.6396732788798133, "grad_norm": 1.7163704385252674, "learning_rate": 3.460726822274105e-06, "loss": 0.4619, "step": 27410 }, { "epoch": 0.6399066511085181, "grad_norm": 1.8160137100622507, "learning_rate": 3.4568520369364855e-06, "loss": 0.4442, "step": 27420 }, { "epoch": 0.6401400233372229, "grad_norm": 1.5043734623304152, "learning_rate": 3.4529782756475883e-06, "loss": 0.457, "step": 27430 }, { "epoch": 0.6403733955659276, "grad_norm": 1.9115132346003763, "learning_rate": 3.4491055409780794e-06, "loss": 0.4707, "step": 27440 }, { "epoch": 0.6406067677946324, "grad_norm": 1.6946424273337866, "learning_rate": 3.4452338354979475e-06, "loss": 0.4818, "step": 27450 }, { "epoch": 0.6408401400233372, "grad_norm": 1.939103463467201, "learning_rate": 3.4413631617764936e-06, "loss": 0.4421, "step": 27460 }, { "epoch": 0.641073512252042, "grad_norm": 1.9923419313568356, "learning_rate": 3.437493522382337e-06, "loss": 0.4743, "step": 27470 }, { "epoch": 0.6413068844807468, "grad_norm": 1.4687073727999522, "learning_rate": 3.43362491988341e-06, "loss": 0.4747, "step": 27480 }, { "epoch": 0.6415402567094516, "grad_norm": 1.5165959542206608, "learning_rate": 3.4297573568469576e-06, "loss": 0.4228, "step": 27490 }, { "epoch": 0.6417736289381564, "grad_norm": 1.6575564359261006, "learning_rate": 3.425890835839532e-06, "loss": 0.4752, "step": 27500 }, { "epoch": 0.6420070011668612, "grad_norm": 1.413572469114562, "learning_rate": 3.422025359426997e-06, "loss": 0.4466, "step": 27510 }, { "epoch": 0.6422403733955659, "grad_norm": 1.8012337300004122, "learning_rate": 3.418160930174523e-06, "loss": 0.4542, "step": 27520 }, { "epoch": 0.6424737456242707, "grad_norm": 1.5440813502289408, "learning_rate": 3.4142975506465837e-06, "loss": 0.4646, "step": 27530 }, { "epoch": 0.6427071178529755, "grad_norm": 1.54375948329193, "learning_rate": 3.410435223406957e-06, "loss": 0.438, "step": 27540 }, { "epoch": 0.6429404900816803, "grad_norm": 1.7578812561514576, "learning_rate": 3.406573951018723e-06, "loss": 0.4765, "step": 27550 }, { "epoch": 0.6431738623103851, "grad_norm": 1.691041603834478, "learning_rate": 3.402713736044262e-06, "loss": 0.4747, "step": 27560 }, { "epoch": 0.6434072345390899, "grad_norm": 1.615943719356788, "learning_rate": 3.398854581045251e-06, "loss": 0.4546, "step": 27570 }, { "epoch": 0.6436406067677947, "grad_norm": 1.68943723596673, "learning_rate": 3.3949964885826647e-06, "loss": 0.4489, "step": 27580 }, { "epoch": 0.6438739789964995, "grad_norm": 1.4325133124476606, "learning_rate": 3.3911394612167734e-06, "loss": 0.4399, "step": 27590 }, { "epoch": 0.6441073512252042, "grad_norm": 1.7453092225486873, "learning_rate": 3.3872835015071393e-06, "loss": 0.4667, "step": 27600 }, { "epoch": 0.644340723453909, "grad_norm": 2.200556352009573, "learning_rate": 3.383428612012616e-06, "loss": 0.4582, "step": 27610 }, { "epoch": 0.6445740956826138, "grad_norm": 1.5825173591989268, "learning_rate": 3.3795747952913484e-06, "loss": 0.4546, "step": 27620 }, { "epoch": 0.6448074679113186, "grad_norm": 1.6989476074209209, "learning_rate": 3.375722053900768e-06, "loss": 0.4704, "step": 27630 }, { "epoch": 0.6450408401400234, "grad_norm": 1.808759213030928, "learning_rate": 3.3718703903975937e-06, "loss": 0.4566, "step": 27640 }, { "epoch": 0.6452742123687282, "grad_norm": 1.5671907986403262, "learning_rate": 3.368019807337828e-06, "loss": 0.4433, "step": 27650 }, { "epoch": 0.645507584597433, "grad_norm": 1.7386165069150685, "learning_rate": 3.3641703072767577e-06, "loss": 0.4612, "step": 27660 }, { "epoch": 0.6457409568261376, "grad_norm": 1.6260004383752662, "learning_rate": 3.36032189276895e-06, "loss": 0.4574, "step": 27670 }, { "epoch": 0.6459743290548424, "grad_norm": 1.5911076037679093, "learning_rate": 3.356474566368252e-06, "loss": 0.4817, "step": 27680 }, { "epoch": 0.6462077012835472, "grad_norm": 2.0596771906322813, "learning_rate": 3.352628330627791e-06, "loss": 0.4748, "step": 27690 }, { "epoch": 0.646441073512252, "grad_norm": 1.6908290710452183, "learning_rate": 3.3487831880999643e-06, "loss": 0.4689, "step": 27700 }, { "epoch": 0.6466744457409568, "grad_norm": 1.701719159436704, "learning_rate": 3.3449391413364495e-06, "loss": 0.4308, "step": 27710 }, { "epoch": 0.6469078179696616, "grad_norm": 2.159278036523414, "learning_rate": 3.341096192888195e-06, "loss": 0.4699, "step": 27720 }, { "epoch": 0.6471411901983664, "grad_norm": 1.573613242245588, "learning_rate": 3.3372543453054207e-06, "loss": 0.4717, "step": 27730 }, { "epoch": 0.6473745624270711, "grad_norm": 2.3325676722918685, "learning_rate": 3.3334136011376173e-06, "loss": 0.4418, "step": 27740 }, { "epoch": 0.6476079346557759, "grad_norm": 1.6977970413507542, "learning_rate": 3.3295739629335388e-06, "loss": 0.4674, "step": 27750 }, { "epoch": 0.6478413068844807, "grad_norm": 1.9929460074086216, "learning_rate": 3.3257354332412094e-06, "loss": 0.4413, "step": 27760 }, { "epoch": 0.6480746791131855, "grad_norm": 1.67623888729291, "learning_rate": 3.3218980146079195e-06, "loss": 0.4593, "step": 27770 }, { "epoch": 0.6483080513418903, "grad_norm": 1.5350027332341396, "learning_rate": 3.318061709580214e-06, "loss": 0.4292, "step": 27780 }, { "epoch": 0.6485414235705951, "grad_norm": 1.8114808059554754, "learning_rate": 3.314226520703908e-06, "loss": 0.4631, "step": 27790 }, { "epoch": 0.6487747957992999, "grad_norm": 1.775181216626135, "learning_rate": 3.3103924505240703e-06, "loss": 0.4455, "step": 27800 }, { "epoch": 0.6490081680280047, "grad_norm": 1.4886557394229774, "learning_rate": 3.3065595015850297e-06, "loss": 0.4535, "step": 27810 }, { "epoch": 0.6492415402567094, "grad_norm": 1.6398618131816862, "learning_rate": 3.3027276764303696e-06, "loss": 0.4729, "step": 27820 }, { "epoch": 0.6494749124854142, "grad_norm": 1.746307718289886, "learning_rate": 3.2988969776029285e-06, "loss": 0.4452, "step": 27830 }, { "epoch": 0.649708284714119, "grad_norm": 1.5801987138487836, "learning_rate": 3.295067407644798e-06, "loss": 0.4532, "step": 27840 }, { "epoch": 0.6499416569428238, "grad_norm": 1.5507876784695707, "learning_rate": 3.291238969097318e-06, "loss": 0.452, "step": 27850 }, { "epoch": 0.6501750291715286, "grad_norm": 1.5802823887918824, "learning_rate": 3.28741166450108e-06, "loss": 0.4528, "step": 27860 }, { "epoch": 0.6504084014002334, "grad_norm": 1.8486864647495937, "learning_rate": 3.2835854963959236e-06, "loss": 0.4713, "step": 27870 }, { "epoch": 0.6506417736289382, "grad_norm": 1.750447455564302, "learning_rate": 3.279760467320932e-06, "loss": 0.449, "step": 27880 }, { "epoch": 0.650875145857643, "grad_norm": 1.6837453881250102, "learning_rate": 3.275936579814433e-06, "loss": 0.4833, "step": 27890 }, { "epoch": 0.6511085180863477, "grad_norm": 1.8086076686538026, "learning_rate": 3.272113836413998e-06, "loss": 0.4821, "step": 27900 }, { "epoch": 0.6513418903150525, "grad_norm": 1.7675066584365164, "learning_rate": 3.2682922396564387e-06, "loss": 0.45, "step": 27910 }, { "epoch": 0.6515752625437573, "grad_norm": 1.9854606394537457, "learning_rate": 3.264471792077805e-06, "loss": 0.439, "step": 27920 }, { "epoch": 0.6518086347724621, "grad_norm": 1.9806727599024079, "learning_rate": 3.260652496213385e-06, "loss": 0.4457, "step": 27930 }, { "epoch": 0.6520420070011669, "grad_norm": 1.664454795592271, "learning_rate": 3.2568343545977027e-06, "loss": 0.4445, "step": 27940 }, { "epoch": 0.6522753792298717, "grad_norm": 1.647218665114568, "learning_rate": 3.253017369764516e-06, "loss": 0.4428, "step": 27950 }, { "epoch": 0.6525087514585765, "grad_norm": 1.6885129949658615, "learning_rate": 3.249201544246815e-06, "loss": 0.4555, "step": 27960 }, { "epoch": 0.6527421236872812, "grad_norm": 1.7786072048590376, "learning_rate": 3.245386880576819e-06, "loss": 0.4514, "step": 27970 }, { "epoch": 0.652975495915986, "grad_norm": 1.524089447312398, "learning_rate": 3.2415733812859803e-06, "loss": 0.4736, "step": 27980 }, { "epoch": 0.6532088681446908, "grad_norm": 1.72851537385386, "learning_rate": 3.237761048904973e-06, "loss": 0.4492, "step": 27990 }, { "epoch": 0.6534422403733956, "grad_norm": 1.3954909116065173, "learning_rate": 3.233949885963701e-06, "loss": 0.4153, "step": 28000 }, { "epoch": 0.6536756126021004, "grad_norm": 1.7114783654094259, "learning_rate": 3.2301398949912923e-06, "loss": 0.4549, "step": 28010 }, { "epoch": 0.6539089848308052, "grad_norm": 1.922288507932278, "learning_rate": 3.2263310785160927e-06, "loss": 0.4568, "step": 28020 }, { "epoch": 0.65414235705951, "grad_norm": 1.7107695196521353, "learning_rate": 3.2225234390656743e-06, "loss": 0.4343, "step": 28030 }, { "epoch": 0.6543757292882147, "grad_norm": 1.6866562163998815, "learning_rate": 3.2187169791668245e-06, "loss": 0.4748, "step": 28040 }, { "epoch": 0.6546091015169195, "grad_norm": 1.6947024245537021, "learning_rate": 3.21491170134555e-06, "loss": 0.4511, "step": 28050 }, { "epoch": 0.6548424737456243, "grad_norm": 1.5418272427051791, "learning_rate": 3.21110760812707e-06, "loss": 0.4446, "step": 28060 }, { "epoch": 0.6550758459743291, "grad_norm": 1.5775637872582657, "learning_rate": 3.2073047020358215e-06, "loss": 0.4682, "step": 28070 }, { "epoch": 0.6553092182030339, "grad_norm": 1.7762622503945424, "learning_rate": 3.203502985595449e-06, "loss": 0.4386, "step": 28080 }, { "epoch": 0.6555425904317386, "grad_norm": 1.9267169586276423, "learning_rate": 3.199702461328813e-06, "loss": 0.4571, "step": 28090 }, { "epoch": 0.6557759626604434, "grad_norm": 1.7005496920919525, "learning_rate": 3.195903131757979e-06, "loss": 0.4769, "step": 28100 }, { "epoch": 0.6560093348891481, "grad_norm": 1.6955105457504636, "learning_rate": 3.192104999404221e-06, "loss": 0.4561, "step": 28110 }, { "epoch": 0.6562427071178529, "grad_norm": 1.847907932239725, "learning_rate": 3.1883080667880175e-06, "loss": 0.4427, "step": 28120 }, { "epoch": 0.6564760793465577, "grad_norm": 1.656963303382239, "learning_rate": 3.1845123364290544e-06, "loss": 0.4703, "step": 28130 }, { "epoch": 0.6567094515752625, "grad_norm": 1.6635579147835666, "learning_rate": 3.1807178108462133e-06, "loss": 0.4762, "step": 28140 }, { "epoch": 0.6569428238039673, "grad_norm": 1.5946442161517929, "learning_rate": 3.1769244925575827e-06, "loss": 0.4933, "step": 28150 }, { "epoch": 0.6571761960326721, "grad_norm": 1.8492403639435624, "learning_rate": 3.1731323840804473e-06, "loss": 0.4648, "step": 28160 }, { "epoch": 0.6574095682613769, "grad_norm": 2.1588896381311984, "learning_rate": 3.1693414879312866e-06, "loss": 0.441, "step": 28170 }, { "epoch": 0.6576429404900817, "grad_norm": 1.7324594269599147, "learning_rate": 3.165551806625781e-06, "loss": 0.4451, "step": 28180 }, { "epoch": 0.6578763127187864, "grad_norm": 1.4300147199933428, "learning_rate": 3.1617633426787992e-06, "loss": 0.4469, "step": 28190 }, { "epoch": 0.6581096849474912, "grad_norm": 1.6722526448285335, "learning_rate": 3.1579760986044067e-06, "loss": 0.4627, "step": 28200 }, { "epoch": 0.658343057176196, "grad_norm": 1.6546337664584525, "learning_rate": 3.154190076915855e-06, "loss": 0.4569, "step": 28210 }, { "epoch": 0.6585764294049008, "grad_norm": 1.8209193075885675, "learning_rate": 3.150405280125588e-06, "loss": 0.4331, "step": 28220 }, { "epoch": 0.6588098016336056, "grad_norm": 1.6492403044368946, "learning_rate": 3.1466217107452364e-06, "loss": 0.4616, "step": 28230 }, { "epoch": 0.6590431738623104, "grad_norm": 1.567119806189386, "learning_rate": 3.142839371285613e-06, "loss": 0.4669, "step": 28240 }, { "epoch": 0.6592765460910152, "grad_norm": 1.6337440373415475, "learning_rate": 3.139058264256718e-06, "loss": 0.4389, "step": 28250 }, { "epoch": 0.65950991831972, "grad_norm": 2.312507503176711, "learning_rate": 3.1352783921677327e-06, "loss": 0.4544, "step": 28260 }, { "epoch": 0.6597432905484247, "grad_norm": 1.7869068687280996, "learning_rate": 3.13149975752702e-06, "loss": 0.4417, "step": 28270 }, { "epoch": 0.6599766627771295, "grad_norm": 1.4626879144356306, "learning_rate": 3.1277223628421173e-06, "loss": 0.4462, "step": 28280 }, { "epoch": 0.6602100350058343, "grad_norm": 1.781765688666515, "learning_rate": 3.1239462106197448e-06, "loss": 0.4582, "step": 28290 }, { "epoch": 0.6604434072345391, "grad_norm": 1.6937773649469843, "learning_rate": 3.1201713033657954e-06, "loss": 0.4616, "step": 28300 }, { "epoch": 0.6606767794632439, "grad_norm": 1.6222567342831173, "learning_rate": 3.116397643585335e-06, "loss": 0.4454, "step": 28310 }, { "epoch": 0.6609101516919487, "grad_norm": 1.4983849110718315, "learning_rate": 3.1126252337826026e-06, "loss": 0.4597, "step": 28320 }, { "epoch": 0.6611435239206535, "grad_norm": 1.5904772714808848, "learning_rate": 3.10885407646101e-06, "loss": 0.4618, "step": 28330 }, { "epoch": 0.6613768961493582, "grad_norm": 1.4124421154174407, "learning_rate": 3.1050841741231324e-06, "loss": 0.4588, "step": 28340 }, { "epoch": 0.661610268378063, "grad_norm": 1.7677059442685568, "learning_rate": 3.1013155292707174e-06, "loss": 0.4403, "step": 28350 }, { "epoch": 0.6618436406067678, "grad_norm": 1.808707478339427, "learning_rate": 3.0975481444046756e-06, "loss": 0.4231, "step": 28360 }, { "epoch": 0.6620770128354726, "grad_norm": 1.4234908239319897, "learning_rate": 3.0937820220250838e-06, "loss": 0.4528, "step": 28370 }, { "epoch": 0.6623103850641774, "grad_norm": 1.504397031978422, "learning_rate": 3.090017164631176e-06, "loss": 0.4528, "step": 28380 }, { "epoch": 0.6625437572928822, "grad_norm": 1.5586623363685221, "learning_rate": 3.086253574721352e-06, "loss": 0.4307, "step": 28390 }, { "epoch": 0.662777129521587, "grad_norm": 1.4511325267004225, "learning_rate": 3.0824912547931684e-06, "loss": 0.4399, "step": 28400 }, { "epoch": 0.6630105017502917, "grad_norm": 1.8409801029072745, "learning_rate": 3.078730207343338e-06, "loss": 0.4521, "step": 28410 }, { "epoch": 0.6632438739789965, "grad_norm": 1.6823554287265348, "learning_rate": 3.0749704348677314e-06, "loss": 0.4443, "step": 28420 }, { "epoch": 0.6634772462077013, "grad_norm": 1.5343628874355573, "learning_rate": 3.0712119398613704e-06, "loss": 0.4553, "step": 28430 }, { "epoch": 0.6637106184364061, "grad_norm": 1.7274185581873571, "learning_rate": 3.067454724818435e-06, "loss": 0.4912, "step": 28440 }, { "epoch": 0.6639439906651109, "grad_norm": 1.4710899081359237, "learning_rate": 3.0636987922322457e-06, "loss": 0.4481, "step": 28450 }, { "epoch": 0.6641773628938157, "grad_norm": 1.8365083246621308, "learning_rate": 3.0599441445952805e-06, "loss": 0.4587, "step": 28460 }, { "epoch": 0.6644107351225205, "grad_norm": 1.6335903865946904, "learning_rate": 3.05619078439916e-06, "loss": 0.4606, "step": 28470 }, { "epoch": 0.6646441073512253, "grad_norm": 1.6416277815676714, "learning_rate": 3.0524387141346543e-06, "loss": 0.4639, "step": 28480 }, { "epoch": 0.66487747957993, "grad_norm": 1.978921767289313, "learning_rate": 3.0486879362916762e-06, "loss": 0.4802, "step": 28490 }, { "epoch": 0.6651108518086347, "grad_norm": 1.7984851999860965, "learning_rate": 3.0449384533592774e-06, "loss": 0.4585, "step": 28500 }, { "epoch": 0.6653442240373395, "grad_norm": 1.4287533493094837, "learning_rate": 3.0411902678256544e-06, "loss": 0.4505, "step": 28510 }, { "epoch": 0.6655775962660443, "grad_norm": 1.6099671400148199, "learning_rate": 3.0374433821781424e-06, "loss": 0.4378, "step": 28520 }, { "epoch": 0.6658109684947491, "grad_norm": 1.5499410226496224, "learning_rate": 3.0336977989032113e-06, "loss": 0.4671, "step": 28530 }, { "epoch": 0.6660443407234539, "grad_norm": 1.7862719430266174, "learning_rate": 3.029953520486468e-06, "loss": 0.439, "step": 28540 }, { "epoch": 0.6662777129521587, "grad_norm": 1.6939058172554695, "learning_rate": 3.0262105494126566e-06, "loss": 0.4495, "step": 28550 }, { "epoch": 0.6665110851808634, "grad_norm": 1.9001870622265473, "learning_rate": 3.022468888165647e-06, "loss": 0.4762, "step": 28560 }, { "epoch": 0.6667444574095682, "grad_norm": 1.6551582752364835, "learning_rate": 3.018728539228447e-06, "loss": 0.4814, "step": 28570 }, { "epoch": 0.666977829638273, "grad_norm": 1.6669645560970294, "learning_rate": 3.014989505083188e-06, "loss": 0.4087, "step": 28580 }, { "epoch": 0.6672112018669778, "grad_norm": 1.4076248541437824, "learning_rate": 3.0112517882111334e-06, "loss": 0.4689, "step": 28590 }, { "epoch": 0.6674445740956826, "grad_norm": 1.6922401481939928, "learning_rate": 3.0075153910926693e-06, "loss": 0.451, "step": 28600 }, { "epoch": 0.6676779463243874, "grad_norm": 1.6898913738768453, "learning_rate": 3.0037803162073055e-06, "loss": 0.4498, "step": 28610 }, { "epoch": 0.6679113185530922, "grad_norm": 1.2836876783798339, "learning_rate": 3.0000465660336774e-06, "loss": 0.4395, "step": 28620 }, { "epoch": 0.668144690781797, "grad_norm": 1.54335221457223, "learning_rate": 2.996314143049539e-06, "loss": 0.444, "step": 28630 }, { "epoch": 0.6683780630105017, "grad_norm": 1.3166262797907526, "learning_rate": 2.9925830497317642e-06, "loss": 0.4413, "step": 28640 }, { "epoch": 0.6686114352392065, "grad_norm": 1.5738511033084526, "learning_rate": 2.988853288556346e-06, "loss": 0.4649, "step": 28650 }, { "epoch": 0.6688448074679113, "grad_norm": 1.6344813472522195, "learning_rate": 2.985124861998391e-06, "loss": 0.4407, "step": 28660 }, { "epoch": 0.6690781796966161, "grad_norm": 1.6213649236047805, "learning_rate": 2.981397772532119e-06, "loss": 0.4402, "step": 28670 }, { "epoch": 0.6693115519253209, "grad_norm": 1.5966326649552391, "learning_rate": 2.9776720226308674e-06, "loss": 0.4435, "step": 28680 }, { "epoch": 0.6695449241540257, "grad_norm": 1.6072328708558228, "learning_rate": 2.9739476147670817e-06, "loss": 0.4959, "step": 28690 }, { "epoch": 0.6697782963827305, "grad_norm": 1.8987802488291317, "learning_rate": 2.970224551412315e-06, "loss": 0.4453, "step": 28700 }, { "epoch": 0.6700116686114352, "grad_norm": 1.7331293462458346, "learning_rate": 2.966502835037231e-06, "loss": 0.4694, "step": 28710 }, { "epoch": 0.67024504084014, "grad_norm": 1.7005791970269903, "learning_rate": 2.9627824681116e-06, "loss": 0.4458, "step": 28720 }, { "epoch": 0.6704784130688448, "grad_norm": 1.5992604128975267, "learning_rate": 2.959063453104294e-06, "loss": 0.4256, "step": 28730 }, { "epoch": 0.6707117852975496, "grad_norm": 1.5977055315627768, "learning_rate": 2.9553457924832886e-06, "loss": 0.4539, "step": 28740 }, { "epoch": 0.6709451575262544, "grad_norm": 1.714483194038755, "learning_rate": 2.951629488715662e-06, "loss": 0.4447, "step": 28750 }, { "epoch": 0.6711785297549592, "grad_norm": 1.6231187516240309, "learning_rate": 2.9479145442675934e-06, "loss": 0.4292, "step": 28760 }, { "epoch": 0.671411901983664, "grad_norm": 1.5066555429013366, "learning_rate": 2.9442009616043543e-06, "loss": 0.4533, "step": 28770 }, { "epoch": 0.6716452742123687, "grad_norm": 1.4205630802887685, "learning_rate": 2.9404887431903177e-06, "loss": 0.4497, "step": 28780 }, { "epoch": 0.6718786464410735, "grad_norm": 1.8942060576675737, "learning_rate": 2.936777891488951e-06, "loss": 0.4282, "step": 28790 }, { "epoch": 0.6721120186697783, "grad_norm": 1.6666966167809087, "learning_rate": 2.93306840896281e-06, "loss": 0.4639, "step": 28800 }, { "epoch": 0.6723453908984831, "grad_norm": 1.724203614631454, "learning_rate": 2.929360298073547e-06, "loss": 0.4581, "step": 28810 }, { "epoch": 0.6725787631271879, "grad_norm": 1.6822344462999164, "learning_rate": 2.9256535612819046e-06, "loss": 0.4709, "step": 28820 }, { "epoch": 0.6728121353558927, "grad_norm": 1.4995115164302806, "learning_rate": 2.9219482010477063e-06, "loss": 0.4536, "step": 28830 }, { "epoch": 0.6730455075845975, "grad_norm": 1.4936404231402673, "learning_rate": 2.91824421982987e-06, "loss": 0.4376, "step": 28840 }, { "epoch": 0.6732788798133023, "grad_norm": 1.5734529289068042, "learning_rate": 2.914541620086393e-06, "loss": 0.4566, "step": 28850 }, { "epoch": 0.673512252042007, "grad_norm": 1.7352739380610593, "learning_rate": 2.9108404042743614e-06, "loss": 0.4294, "step": 28860 }, { "epoch": 0.6737456242707118, "grad_norm": 1.447144992212216, "learning_rate": 2.9071405748499384e-06, "loss": 0.4416, "step": 28870 }, { "epoch": 0.6739789964994166, "grad_norm": 1.7624144811043656, "learning_rate": 2.9034421342683665e-06, "loss": 0.4515, "step": 28880 }, { "epoch": 0.6742123687281214, "grad_norm": 1.85134941112931, "learning_rate": 2.8997450849839724e-06, "loss": 0.4621, "step": 28890 }, { "epoch": 0.6744457409568262, "grad_norm": 1.790482899990464, "learning_rate": 2.896049429450154e-06, "loss": 0.4611, "step": 28900 }, { "epoch": 0.674679113185531, "grad_norm": 1.626471392238775, "learning_rate": 2.892355170119383e-06, "loss": 0.4688, "step": 28910 }, { "epoch": 0.6749124854142357, "grad_norm": 1.44779138927403, "learning_rate": 2.8886623094432125e-06, "loss": 0.4479, "step": 28920 }, { "epoch": 0.6751458576429404, "grad_norm": 1.5987232121147597, "learning_rate": 2.8849708498722605e-06, "loss": 0.438, "step": 28930 }, { "epoch": 0.6753792298716452, "grad_norm": 1.5663120060386475, "learning_rate": 2.8812807938562147e-06, "loss": 0.4365, "step": 28940 }, { "epoch": 0.67561260210035, "grad_norm": 1.694774731274599, "learning_rate": 2.877592143843838e-06, "loss": 0.4585, "step": 28950 }, { "epoch": 0.6758459743290548, "grad_norm": 1.5115914237866643, "learning_rate": 2.8739049022829547e-06, "loss": 0.4819, "step": 28960 }, { "epoch": 0.6760793465577596, "grad_norm": 1.7564553038750492, "learning_rate": 2.8702190716204536e-06, "loss": 0.467, "step": 28970 }, { "epoch": 0.6763127187864644, "grad_norm": 1.4285604478729188, "learning_rate": 2.866534654302293e-06, "loss": 0.4631, "step": 28980 }, { "epoch": 0.6765460910151692, "grad_norm": 1.6451883080858622, "learning_rate": 2.8628516527734886e-06, "loss": 0.4664, "step": 28990 }, { "epoch": 0.676779463243874, "grad_norm": 1.567511572047203, "learning_rate": 2.8591700694781156e-06, "loss": 0.4408, "step": 29000 }, { "epoch": 0.6770128354725787, "grad_norm": 1.48097779968077, "learning_rate": 2.8554899068593133e-06, "loss": 0.4317, "step": 29010 }, { "epoch": 0.6772462077012835, "grad_norm": 1.5042113493884, "learning_rate": 2.851811167359272e-06, "loss": 0.448, "step": 29020 }, { "epoch": 0.6774795799299883, "grad_norm": 1.5261716776165253, "learning_rate": 2.848133853419245e-06, "loss": 0.4542, "step": 29030 }, { "epoch": 0.6777129521586931, "grad_norm": 1.743571545945903, "learning_rate": 2.8444579674795326e-06, "loss": 0.4275, "step": 29040 }, { "epoch": 0.6779463243873979, "grad_norm": 1.5474558774775033, "learning_rate": 2.840783511979489e-06, "loss": 0.4742, "step": 29050 }, { "epoch": 0.6781796966161027, "grad_norm": 1.8966772599065878, "learning_rate": 2.837110489357523e-06, "loss": 0.4667, "step": 29060 }, { "epoch": 0.6784130688448075, "grad_norm": 2.0657129969479393, "learning_rate": 2.833438902051089e-06, "loss": 0.4653, "step": 29070 }, { "epoch": 0.6786464410735122, "grad_norm": 1.5576320772988215, "learning_rate": 2.8297687524966876e-06, "loss": 0.4571, "step": 29080 }, { "epoch": 0.678879813302217, "grad_norm": 1.7811797946131982, "learning_rate": 2.826100043129871e-06, "loss": 0.449, "step": 29090 }, { "epoch": 0.6791131855309218, "grad_norm": 1.5781527963801474, "learning_rate": 2.822432776385231e-06, "loss": 0.4666, "step": 29100 }, { "epoch": 0.6793465577596266, "grad_norm": 1.8817348458298808, "learning_rate": 2.8187669546964e-06, "loss": 0.4648, "step": 29110 }, { "epoch": 0.6795799299883314, "grad_norm": 1.7500545420341005, "learning_rate": 2.8151025804960598e-06, "loss": 0.4526, "step": 29120 }, { "epoch": 0.6798133022170362, "grad_norm": 1.7941869001308144, "learning_rate": 2.811439656215924e-06, "loss": 0.435, "step": 29130 }, { "epoch": 0.680046674445741, "grad_norm": 1.4866523219225465, "learning_rate": 2.8077781842867445e-06, "loss": 0.4486, "step": 29140 }, { "epoch": 0.6802800466744457, "grad_norm": 1.5416952099746375, "learning_rate": 2.804118167138315e-06, "loss": 0.4796, "step": 29150 }, { "epoch": 0.6805134189031505, "grad_norm": 1.5614108653281087, "learning_rate": 2.8004596071994587e-06, "loss": 0.4769, "step": 29160 }, { "epoch": 0.6807467911318553, "grad_norm": 1.4076026864033564, "learning_rate": 2.7968025068980316e-06, "loss": 0.4415, "step": 29170 }, { "epoch": 0.6809801633605601, "grad_norm": 1.5332970511657227, "learning_rate": 2.793146868660926e-06, "loss": 0.458, "step": 29180 }, { "epoch": 0.6812135355892649, "grad_norm": 1.6588327273215375, "learning_rate": 2.7894926949140588e-06, "loss": 0.4778, "step": 29190 }, { "epoch": 0.6814469078179697, "grad_norm": 1.6850371495351573, "learning_rate": 2.785839988082377e-06, "loss": 0.4453, "step": 29200 }, { "epoch": 0.6816802800466745, "grad_norm": 1.6185096320686203, "learning_rate": 2.7821887505898554e-06, "loss": 0.4743, "step": 29210 }, { "epoch": 0.6819136522753793, "grad_norm": 2.3591828740534404, "learning_rate": 2.7785389848594883e-06, "loss": 0.4214, "step": 29220 }, { "epoch": 0.682147024504084, "grad_norm": 1.7244203327773007, "learning_rate": 2.774890693313303e-06, "loss": 0.4565, "step": 29230 }, { "epoch": 0.6823803967327888, "grad_norm": 1.5468939369871788, "learning_rate": 2.7712438783723383e-06, "loss": 0.4434, "step": 29240 }, { "epoch": 0.6826137689614936, "grad_norm": 1.6534116689514302, "learning_rate": 2.7675985424566613e-06, "loss": 0.4261, "step": 29250 }, { "epoch": 0.6828471411901984, "grad_norm": 1.7001313056467924, "learning_rate": 2.7639546879853528e-06, "loss": 0.4195, "step": 29260 }, { "epoch": 0.6830805134189032, "grad_norm": 1.8752029626669602, "learning_rate": 2.7603123173765106e-06, "loss": 0.4424, "step": 29270 }, { "epoch": 0.683313885647608, "grad_norm": 1.553638222733711, "learning_rate": 2.756671433047251e-06, "loss": 0.4532, "step": 29280 }, { "epoch": 0.6835472578763128, "grad_norm": 1.5761103821476166, "learning_rate": 2.753032037413702e-06, "loss": 0.4284, "step": 29290 }, { "epoch": 0.6837806301050176, "grad_norm": 1.5937255461313857, "learning_rate": 2.749394132891e-06, "loss": 0.4463, "step": 29300 }, { "epoch": 0.6840140023337223, "grad_norm": 1.680113791643556, "learning_rate": 2.7457577218932995e-06, "loss": 0.4393, "step": 29310 }, { "epoch": 0.6842473745624271, "grad_norm": 1.7210302375180662, "learning_rate": 2.7421228068337603e-06, "loss": 0.4587, "step": 29320 }, { "epoch": 0.6844807467911318, "grad_norm": 1.5571138842793113, "learning_rate": 2.7384893901245435e-06, "loss": 0.4559, "step": 29330 }, { "epoch": 0.6847141190198366, "grad_norm": 1.6534451728140571, "learning_rate": 2.7348574741768287e-06, "loss": 0.4605, "step": 29340 }, { "epoch": 0.6849474912485414, "grad_norm": 1.5558970555529152, "learning_rate": 2.7312270614007885e-06, "loss": 0.466, "step": 29350 }, { "epoch": 0.6851808634772462, "grad_norm": 1.8146665909663235, "learning_rate": 2.7275981542056005e-06, "loss": 0.4433, "step": 29360 }, { "epoch": 0.685414235705951, "grad_norm": 1.4202724958520043, "learning_rate": 2.723970754999448e-06, "loss": 0.449, "step": 29370 }, { "epoch": 0.6856476079346557, "grad_norm": 1.4827419490267655, "learning_rate": 2.7203448661895087e-06, "loss": 0.467, "step": 29380 }, { "epoch": 0.6858809801633605, "grad_norm": 1.8234455533430345, "learning_rate": 2.7167204901819577e-06, "loss": 0.4582, "step": 29390 }, { "epoch": 0.6861143523920653, "grad_norm": 1.5457653303508438, "learning_rate": 2.71309762938197e-06, "loss": 0.4474, "step": 29400 }, { "epoch": 0.6863477246207701, "grad_norm": 1.6048983220209916, "learning_rate": 2.709476286193711e-06, "loss": 0.4188, "step": 29410 }, { "epoch": 0.6865810968494749, "grad_norm": 1.6878117098337013, "learning_rate": 2.7058564630203444e-06, "loss": 0.438, "step": 29420 }, { "epoch": 0.6868144690781797, "grad_norm": 1.5985203822625196, "learning_rate": 2.7022381622640196e-06, "loss": 0.4729, "step": 29430 }, { "epoch": 0.6870478413068845, "grad_norm": 1.6763557386426569, "learning_rate": 2.6986213863258758e-06, "loss": 0.4479, "step": 29440 }, { "epoch": 0.6872812135355892, "grad_norm": 1.6604857424827753, "learning_rate": 2.6950061376060467e-06, "loss": 0.4519, "step": 29450 }, { "epoch": 0.687514585764294, "grad_norm": 1.6960273013479092, "learning_rate": 2.6913924185036454e-06, "loss": 0.4544, "step": 29460 }, { "epoch": 0.6877479579929988, "grad_norm": 1.7917857906307832, "learning_rate": 2.687780231416772e-06, "loss": 0.4512, "step": 29470 }, { "epoch": 0.6879813302217036, "grad_norm": 1.560564712630037, "learning_rate": 2.6841695787425134e-06, "loss": 0.4561, "step": 29480 }, { "epoch": 0.6882147024504084, "grad_norm": 1.8869495801718337, "learning_rate": 2.6805604628769342e-06, "loss": 0.455, "step": 29490 }, { "epoch": 0.6884480746791132, "grad_norm": 1.7166115403048403, "learning_rate": 2.6769528862150785e-06, "loss": 0.4991, "step": 29500 }, { "epoch": 0.688681446907818, "grad_norm": 1.6936348164010167, "learning_rate": 2.6733468511509753e-06, "loss": 0.442, "step": 29510 }, { "epoch": 0.6889148191365227, "grad_norm": 1.391230744592368, "learning_rate": 2.669742360077623e-06, "loss": 0.4717, "step": 29520 }, { "epoch": 0.6891481913652275, "grad_norm": 1.4768240221045243, "learning_rate": 2.666139415386998e-06, "loss": 0.4536, "step": 29530 }, { "epoch": 0.6893815635939323, "grad_norm": 1.4182680528941398, "learning_rate": 2.6625380194700545e-06, "loss": 0.416, "step": 29540 }, { "epoch": 0.6896149358226371, "grad_norm": 1.651648296808515, "learning_rate": 2.658938174716713e-06, "loss": 0.4928, "step": 29550 }, { "epoch": 0.6898483080513419, "grad_norm": 1.7565879920195904, "learning_rate": 2.6553398835158682e-06, "loss": 0.4297, "step": 29560 }, { "epoch": 0.6900816802800467, "grad_norm": 1.7622983884331123, "learning_rate": 2.6517431482553823e-06, "loss": 0.4475, "step": 29570 }, { "epoch": 0.6903150525087515, "grad_norm": 1.7489456240803474, "learning_rate": 2.6481479713220844e-06, "loss": 0.4684, "step": 29580 }, { "epoch": 0.6905484247374563, "grad_norm": 1.3977788657014647, "learning_rate": 2.644554355101773e-06, "loss": 0.4153, "step": 29590 }, { "epoch": 0.690781796966161, "grad_norm": 1.7282580025802843, "learning_rate": 2.6409623019792075e-06, "loss": 0.4561, "step": 29600 }, { "epoch": 0.6910151691948658, "grad_norm": 1.8109498048803596, "learning_rate": 2.637371814338109e-06, "loss": 0.4674, "step": 29610 }, { "epoch": 0.6912485414235706, "grad_norm": 1.5769780414506471, "learning_rate": 2.6337828945611645e-06, "loss": 0.4435, "step": 29620 }, { "epoch": 0.6914819136522754, "grad_norm": 1.4847980178450837, "learning_rate": 2.6301955450300155e-06, "loss": 0.4592, "step": 29630 }, { "epoch": 0.6917152858809802, "grad_norm": 1.7455583639439436, "learning_rate": 2.6266097681252667e-06, "loss": 0.4576, "step": 29640 }, { "epoch": 0.691948658109685, "grad_norm": 1.7230583751893613, "learning_rate": 2.623025566226474e-06, "loss": 0.4799, "step": 29650 }, { "epoch": 0.6921820303383898, "grad_norm": 1.7972434523235759, "learning_rate": 2.6194429417121494e-06, "loss": 0.4599, "step": 29660 }, { "epoch": 0.6924154025670946, "grad_norm": 1.7638573634100827, "learning_rate": 2.615861896959762e-06, "loss": 0.485, "step": 29670 }, { "epoch": 0.6926487747957993, "grad_norm": 1.9706796537728486, "learning_rate": 2.6122824343457283e-06, "loss": 0.4356, "step": 29680 }, { "epoch": 0.6928821470245041, "grad_norm": 1.745567820897287, "learning_rate": 2.608704556245414e-06, "loss": 0.4579, "step": 29690 }, { "epoch": 0.6931155192532089, "grad_norm": 1.545586996476741, "learning_rate": 2.6051282650331387e-06, "loss": 0.439, "step": 29700 }, { "epoch": 0.6933488914819137, "grad_norm": 1.8458080356956188, "learning_rate": 2.6015535630821647e-06, "loss": 0.4794, "step": 29710 }, { "epoch": 0.6935822637106185, "grad_norm": 1.373182302632005, "learning_rate": 2.5979804527646986e-06, "loss": 0.438, "step": 29720 }, { "epoch": 0.6938156359393233, "grad_norm": 1.7289246070893751, "learning_rate": 2.5944089364518966e-06, "loss": 0.4463, "step": 29730 }, { "epoch": 0.6940490081680281, "grad_norm": 1.8991727385129304, "learning_rate": 2.5908390165138507e-06, "loss": 0.4623, "step": 29740 }, { "epoch": 0.6942823803967327, "grad_norm": 1.6358210980914663, "learning_rate": 2.587270695319596e-06, "loss": 0.4642, "step": 29750 }, { "epoch": 0.6945157526254375, "grad_norm": 1.5233636964232609, "learning_rate": 2.5837039752371083e-06, "loss": 0.4306, "step": 29760 }, { "epoch": 0.6947491248541423, "grad_norm": 1.6998617819452995, "learning_rate": 2.5801388586332992e-06, "loss": 0.4461, "step": 29770 }, { "epoch": 0.6949824970828471, "grad_norm": 1.5473643795682146, "learning_rate": 2.576575347874014e-06, "loss": 0.466, "step": 29780 }, { "epoch": 0.6952158693115519, "grad_norm": 1.6811179509773584, "learning_rate": 2.573013445324039e-06, "loss": 0.4408, "step": 29790 }, { "epoch": 0.6954492415402567, "grad_norm": 1.7243261134916317, "learning_rate": 2.5694531533470844e-06, "loss": 0.4641, "step": 29800 }, { "epoch": 0.6956826137689615, "grad_norm": 1.5367562375031183, "learning_rate": 2.5658944743058013e-06, "loss": 0.4539, "step": 29810 }, { "epoch": 0.6959159859976662, "grad_norm": 1.3689020860724759, "learning_rate": 2.562337410561762e-06, "loss": 0.4317, "step": 29820 }, { "epoch": 0.696149358226371, "grad_norm": 2.0723250190101306, "learning_rate": 2.55878196447547e-06, "loss": 0.478, "step": 29830 }, { "epoch": 0.6963827304550758, "grad_norm": 1.4901724273881694, "learning_rate": 2.555228138406357e-06, "loss": 0.4488, "step": 29840 }, { "epoch": 0.6966161026837806, "grad_norm": 1.6340818369521843, "learning_rate": 2.551675934712779e-06, "loss": 0.4093, "step": 29850 }, { "epoch": 0.6968494749124854, "grad_norm": 1.634664810337273, "learning_rate": 2.5481253557520115e-06, "loss": 0.4523, "step": 29860 }, { "epoch": 0.6970828471411902, "grad_norm": 2.022136045213168, "learning_rate": 2.544576403880258e-06, "loss": 0.4546, "step": 29870 }, { "epoch": 0.697316219369895, "grad_norm": 1.683900500492198, "learning_rate": 2.5410290814526386e-06, "loss": 0.4478, "step": 29880 }, { "epoch": 0.6975495915985997, "grad_norm": 2.425874477703, "learning_rate": 2.5374833908231893e-06, "loss": 0.4179, "step": 29890 }, { "epoch": 0.6977829638273045, "grad_norm": 1.7152074363644887, "learning_rate": 2.5339393343448713e-06, "loss": 0.4438, "step": 29900 }, { "epoch": 0.6980163360560093, "grad_norm": 1.4752664187956184, "learning_rate": 2.530396914369555e-06, "loss": 0.4304, "step": 29910 }, { "epoch": 0.6982497082847141, "grad_norm": 1.6143023619672077, "learning_rate": 2.5268561332480234e-06, "loss": 0.4585, "step": 29920 }, { "epoch": 0.6984830805134189, "grad_norm": 1.700915313427522, "learning_rate": 2.52331699332998e-06, "loss": 0.4547, "step": 29930 }, { "epoch": 0.6987164527421237, "grad_norm": 1.5494703187679995, "learning_rate": 2.5197794969640316e-06, "loss": 0.4453, "step": 29940 }, { "epoch": 0.6989498249708285, "grad_norm": 1.830786879066266, "learning_rate": 2.516243646497697e-06, "loss": 0.4499, "step": 29950 }, { "epoch": 0.6991831971995333, "grad_norm": 1.5922553060376168, "learning_rate": 2.5127094442774012e-06, "loss": 0.4376, "step": 29960 }, { "epoch": 0.699416569428238, "grad_norm": 1.8039791099455829, "learning_rate": 2.509176892648478e-06, "loss": 0.4829, "step": 29970 }, { "epoch": 0.6996499416569428, "grad_norm": 1.4225428375280076, "learning_rate": 2.5056459939551657e-06, "loss": 0.4437, "step": 29980 }, { "epoch": 0.6998833138856476, "grad_norm": 2.065532069676699, "learning_rate": 2.502116750540604e-06, "loss": 0.4632, "step": 29990 }, { "epoch": 0.7001166861143524, "grad_norm": 1.7731992775343617, "learning_rate": 2.4985891647468325e-06, "loss": 0.4592, "step": 30000 }, { "epoch": 0.7003500583430572, "grad_norm": 3.8624892215349407, "learning_rate": 2.4950632389147976e-06, "loss": 0.4559, "step": 30010 }, { "epoch": 0.700583430571762, "grad_norm": 1.6492263565486505, "learning_rate": 2.491538975384335e-06, "loss": 0.4443, "step": 30020 }, { "epoch": 0.7008168028004668, "grad_norm": 1.9986911207713978, "learning_rate": 2.488016376494186e-06, "loss": 0.4454, "step": 30030 }, { "epoch": 0.7010501750291716, "grad_norm": 1.725926708529928, "learning_rate": 2.484495444581981e-06, "loss": 0.4286, "step": 30040 }, { "epoch": 0.7012835472578763, "grad_norm": 1.7100988789698062, "learning_rate": 2.4809761819842454e-06, "loss": 0.4535, "step": 30050 }, { "epoch": 0.7015169194865811, "grad_norm": 1.7795671637473656, "learning_rate": 2.4774585910364e-06, "loss": 0.4515, "step": 30060 }, { "epoch": 0.7017502917152859, "grad_norm": 1.4832539615285534, "learning_rate": 2.473942674072754e-06, "loss": 0.4337, "step": 30070 }, { "epoch": 0.7019836639439907, "grad_norm": 1.3728773485435752, "learning_rate": 2.470428433426502e-06, "loss": 0.4483, "step": 30080 }, { "epoch": 0.7022170361726955, "grad_norm": 1.5081122822907227, "learning_rate": 2.4669158714297347e-06, "loss": 0.4274, "step": 30090 }, { "epoch": 0.7024504084014003, "grad_norm": 1.3960482831352936, "learning_rate": 2.463404990413422e-06, "loss": 0.4523, "step": 30100 }, { "epoch": 0.7026837806301051, "grad_norm": 1.6387199846726923, "learning_rate": 2.4598957927074186e-06, "loss": 0.4692, "step": 30110 }, { "epoch": 0.7029171528588098, "grad_norm": 1.5515105906660664, "learning_rate": 2.456388280640467e-06, "loss": 0.4472, "step": 30120 }, { "epoch": 0.7031505250875146, "grad_norm": 1.7027741392753044, "learning_rate": 2.452882456540187e-06, "loss": 0.4613, "step": 30130 }, { "epoch": 0.7033838973162194, "grad_norm": 1.6120827437791811, "learning_rate": 2.449378322733077e-06, "loss": 0.4501, "step": 30140 }, { "epoch": 0.7036172695449242, "grad_norm": 1.5591716930257593, "learning_rate": 2.445875881544519e-06, "loss": 0.4625, "step": 30150 }, { "epoch": 0.703850641773629, "grad_norm": 1.2677572568771487, "learning_rate": 2.442375135298768e-06, "loss": 0.4296, "step": 30160 }, { "epoch": 0.7040840140023337, "grad_norm": 1.619147278457216, "learning_rate": 2.438876086318952e-06, "loss": 0.4325, "step": 30170 }, { "epoch": 0.7043173862310385, "grad_norm": 1.6817636609086468, "learning_rate": 2.4353787369270796e-06, "loss": 0.4199, "step": 30180 }, { "epoch": 0.7045507584597432, "grad_norm": 1.993276583569401, "learning_rate": 2.4318830894440243e-06, "loss": 0.4463, "step": 30190 }, { "epoch": 0.704784130688448, "grad_norm": 1.5638646842973003, "learning_rate": 2.428389146189537e-06, "loss": 0.4282, "step": 30200 }, { "epoch": 0.7050175029171528, "grad_norm": 1.6876320759705796, "learning_rate": 2.4248969094822315e-06, "loss": 0.4595, "step": 30210 }, { "epoch": 0.7052508751458576, "grad_norm": 1.9548185732959078, "learning_rate": 2.4214063816395903e-06, "loss": 0.4805, "step": 30220 }, { "epoch": 0.7054842473745624, "grad_norm": 1.334653259856731, "learning_rate": 2.417917564977967e-06, "loss": 0.427, "step": 30230 }, { "epoch": 0.7057176196032672, "grad_norm": 2.036857475940195, "learning_rate": 2.414430461812574e-06, "loss": 0.4258, "step": 30240 }, { "epoch": 0.705950991831972, "grad_norm": 1.4651614736917369, "learning_rate": 2.410945074457487e-06, "loss": 0.4401, "step": 30250 }, { "epoch": 0.7061843640606768, "grad_norm": 1.9447884608762742, "learning_rate": 2.4074614052256474e-06, "loss": 0.4368, "step": 30260 }, { "epoch": 0.7064177362893815, "grad_norm": 1.8935715132250721, "learning_rate": 2.4039794564288526e-06, "loss": 0.4493, "step": 30270 }, { "epoch": 0.7066511085180863, "grad_norm": 2.11820647903269, "learning_rate": 2.4004992303777573e-06, "loss": 0.4608, "step": 30280 }, { "epoch": 0.7068844807467911, "grad_norm": 1.6525443205604535, "learning_rate": 2.3970207293818787e-06, "loss": 0.446, "step": 30290 }, { "epoch": 0.7071178529754959, "grad_norm": 1.6673393252665283, "learning_rate": 2.393543955749583e-06, "loss": 0.4485, "step": 30300 }, { "epoch": 0.7073512252042007, "grad_norm": 1.7182521162139748, "learning_rate": 2.3900689117880936e-06, "loss": 0.4271, "step": 30310 }, { "epoch": 0.7075845974329055, "grad_norm": 2.5330677389143252, "learning_rate": 2.3865955998034853e-06, "loss": 0.4501, "step": 30320 }, { "epoch": 0.7078179696616103, "grad_norm": 1.6527722543848973, "learning_rate": 2.3831240221006802e-06, "loss": 0.427, "step": 30330 }, { "epoch": 0.708051341890315, "grad_norm": 1.7334789702802895, "learning_rate": 2.3796541809834574e-06, "loss": 0.4407, "step": 30340 }, { "epoch": 0.7082847141190198, "grad_norm": 1.7511668563379152, "learning_rate": 2.376186078754436e-06, "loss": 0.4771, "step": 30350 }, { "epoch": 0.7085180863477246, "grad_norm": 1.6396991356350352, "learning_rate": 2.3727197177150834e-06, "loss": 0.4313, "step": 30360 }, { "epoch": 0.7087514585764294, "grad_norm": 1.6890676190327725, "learning_rate": 2.369255100165715e-06, "loss": 0.44, "step": 30370 }, { "epoch": 0.7089848308051342, "grad_norm": 1.5092443276275143, "learning_rate": 2.3657922284054853e-06, "loss": 0.4329, "step": 30380 }, { "epoch": 0.709218203033839, "grad_norm": 1.7573861063219252, "learning_rate": 2.3623311047323883e-06, "loss": 0.4568, "step": 30390 }, { "epoch": 0.7094515752625438, "grad_norm": 1.6115423937431326, "learning_rate": 2.358871731443265e-06, "loss": 0.4577, "step": 30400 }, { "epoch": 0.7096849474912486, "grad_norm": 1.7822604543822165, "learning_rate": 2.355414110833788e-06, "loss": 0.4455, "step": 30410 }, { "epoch": 0.7099183197199533, "grad_norm": 1.7750628805571902, "learning_rate": 2.351958245198472e-06, "loss": 0.4262, "step": 30420 }, { "epoch": 0.7101516919486581, "grad_norm": 1.7649869736365127, "learning_rate": 2.348504136830664e-06, "loss": 0.4675, "step": 30430 }, { "epoch": 0.7103850641773629, "grad_norm": 1.5170364314455098, "learning_rate": 2.3450517880225427e-06, "loss": 0.4352, "step": 30440 }, { "epoch": 0.7106184364060677, "grad_norm": 1.8127832420623755, "learning_rate": 2.3416012010651264e-06, "loss": 0.4479, "step": 30450 }, { "epoch": 0.7108518086347725, "grad_norm": 1.4702854034567236, "learning_rate": 2.3381523782482566e-06, "loss": 0.428, "step": 30460 }, { "epoch": 0.7110851808634773, "grad_norm": 1.6218578851122634, "learning_rate": 2.3347053218606075e-06, "loss": 0.4383, "step": 30470 }, { "epoch": 0.7113185530921821, "grad_norm": 1.50444669964424, "learning_rate": 2.3312600341896834e-06, "loss": 0.4336, "step": 30480 }, { "epoch": 0.7115519253208868, "grad_norm": 1.538374105927535, "learning_rate": 2.3278165175218105e-06, "loss": 0.4621, "step": 30490 }, { "epoch": 0.7117852975495916, "grad_norm": 1.46294810662543, "learning_rate": 2.3243747741421397e-06, "loss": 0.4514, "step": 30500 }, { "epoch": 0.7120186697782964, "grad_norm": 1.7529118000873236, "learning_rate": 2.3209348063346508e-06, "loss": 0.4389, "step": 30510 }, { "epoch": 0.7122520420070012, "grad_norm": 1.545649367324308, "learning_rate": 2.31749661638214e-06, "loss": 0.4586, "step": 30520 }, { "epoch": 0.712485414235706, "grad_norm": 1.6896432089513094, "learning_rate": 2.3140602065662227e-06, "loss": 0.4733, "step": 30530 }, { "epoch": 0.7127187864644108, "grad_norm": 1.8253585784542081, "learning_rate": 2.3106255791673395e-06, "loss": 0.4723, "step": 30540 }, { "epoch": 0.7129521586931156, "grad_norm": 1.4527591530172765, "learning_rate": 2.3071927364647424e-06, "loss": 0.4263, "step": 30550 }, { "epoch": 0.7131855309218204, "grad_norm": 1.4142100308950878, "learning_rate": 2.3037616807364987e-06, "loss": 0.4462, "step": 30560 }, { "epoch": 0.7134189031505251, "grad_norm": 1.7898921389430624, "learning_rate": 2.3003324142594965e-06, "loss": 0.4376, "step": 30570 }, { "epoch": 0.7136522753792298, "grad_norm": 1.6741387712336926, "learning_rate": 2.296904939309427e-06, "loss": 0.4539, "step": 30580 }, { "epoch": 0.7138856476079346, "grad_norm": 1.5963078892403038, "learning_rate": 2.2934792581608017e-06, "loss": 0.4273, "step": 30590 }, { "epoch": 0.7141190198366394, "grad_norm": 1.7148349699219925, "learning_rate": 2.290055373086936e-06, "loss": 0.4782, "step": 30600 }, { "epoch": 0.7143523920653442, "grad_norm": 1.7011503292438706, "learning_rate": 2.286633286359954e-06, "loss": 0.4486, "step": 30610 }, { "epoch": 0.714585764294049, "grad_norm": 1.6874047768809162, "learning_rate": 2.2832130002507892e-06, "loss": 0.4768, "step": 30620 }, { "epoch": 0.7148191365227538, "grad_norm": 1.6527593688095932, "learning_rate": 2.279794517029177e-06, "loss": 0.4447, "step": 30630 }, { "epoch": 0.7150525087514585, "grad_norm": 1.6633514661876088, "learning_rate": 2.2763778389636565e-06, "loss": 0.4476, "step": 30640 }, { "epoch": 0.7152858809801633, "grad_norm": 1.7479094944608382, "learning_rate": 2.272962968321573e-06, "loss": 0.4426, "step": 30650 }, { "epoch": 0.7155192532088681, "grad_norm": 1.6415064645161541, "learning_rate": 2.269549907369069e-06, "loss": 0.457, "step": 30660 }, { "epoch": 0.7157526254375729, "grad_norm": 1.444174097932717, "learning_rate": 2.266138658371083e-06, "loss": 0.4505, "step": 30670 }, { "epoch": 0.7159859976662777, "grad_norm": 1.5036341001973752, "learning_rate": 2.2627292235913596e-06, "loss": 0.4468, "step": 30680 }, { "epoch": 0.7162193698949825, "grad_norm": 2.018377146720263, "learning_rate": 2.259321605292432e-06, "loss": 0.4387, "step": 30690 }, { "epoch": 0.7164527421236873, "grad_norm": 1.7289750643705375, "learning_rate": 2.2559158057356307e-06, "loss": 0.4595, "step": 30700 }, { "epoch": 0.716686114352392, "grad_norm": 1.9753277787365868, "learning_rate": 2.2525118271810785e-06, "loss": 0.4371, "step": 30710 }, { "epoch": 0.7169194865810968, "grad_norm": 1.63105862625361, "learning_rate": 2.2491096718876886e-06, "loss": 0.4459, "step": 30720 }, { "epoch": 0.7171528588098016, "grad_norm": 1.620738029763525, "learning_rate": 2.2457093421131705e-06, "loss": 0.4282, "step": 30730 }, { "epoch": 0.7173862310385064, "grad_norm": 1.630598166500978, "learning_rate": 2.242310840114015e-06, "loss": 0.4418, "step": 30740 }, { "epoch": 0.7176196032672112, "grad_norm": 1.737061062071082, "learning_rate": 2.238914168145501e-06, "loss": 0.4573, "step": 30750 }, { "epoch": 0.717852975495916, "grad_norm": 1.5177112039202323, "learning_rate": 2.2355193284616984e-06, "loss": 0.4557, "step": 30760 }, { "epoch": 0.7180863477246208, "grad_norm": 1.6404515166658034, "learning_rate": 2.2321263233154555e-06, "loss": 0.4426, "step": 30770 }, { "epoch": 0.7183197199533256, "grad_norm": 1.5667860879159983, "learning_rate": 2.2287351549584036e-06, "loss": 0.4642, "step": 30780 }, { "epoch": 0.7185530921820303, "grad_norm": 1.8776990846080663, "learning_rate": 2.2253458256409594e-06, "loss": 0.442, "step": 30790 }, { "epoch": 0.7187864644107351, "grad_norm": 1.6492189016967012, "learning_rate": 2.221958337612314e-06, "loss": 0.4359, "step": 30800 }, { "epoch": 0.7190198366394399, "grad_norm": 1.7109732105585989, "learning_rate": 2.2185726931204415e-06, "loss": 0.4479, "step": 30810 }, { "epoch": 0.7192532088681447, "grad_norm": 1.546895818232732, "learning_rate": 2.2151888944120893e-06, "loss": 0.4664, "step": 30820 }, { "epoch": 0.7194865810968495, "grad_norm": 1.5527915060341166, "learning_rate": 2.2118069437327776e-06, "loss": 0.452, "step": 30830 }, { "epoch": 0.7197199533255543, "grad_norm": 1.8972964106988406, "learning_rate": 2.2084268433268086e-06, "loss": 0.4452, "step": 30840 }, { "epoch": 0.7199533255542591, "grad_norm": 1.7897582670310623, "learning_rate": 2.205048595437248e-06, "loss": 0.4589, "step": 30850 }, { "epoch": 0.7201866977829638, "grad_norm": 1.849553174919371, "learning_rate": 2.201672202305935e-06, "loss": 0.4495, "step": 30860 }, { "epoch": 0.7204200700116686, "grad_norm": 1.8954532275080913, "learning_rate": 2.1982976661734812e-06, "loss": 0.4633, "step": 30870 }, { "epoch": 0.7206534422403734, "grad_norm": 1.6253255049487372, "learning_rate": 2.1949249892792624e-06, "loss": 0.4433, "step": 30880 }, { "epoch": 0.7208868144690782, "grad_norm": 1.811771320169456, "learning_rate": 2.191554173861418e-06, "loss": 0.4519, "step": 30890 }, { "epoch": 0.721120186697783, "grad_norm": 1.753534140862676, "learning_rate": 2.1881852221568604e-06, "loss": 0.4382, "step": 30900 }, { "epoch": 0.7213535589264878, "grad_norm": 1.6676077028634386, "learning_rate": 2.184818136401258e-06, "loss": 0.4372, "step": 30910 }, { "epoch": 0.7215869311551926, "grad_norm": 1.47751154865847, "learning_rate": 2.1814529188290423e-06, "loss": 0.4655, "step": 30920 }, { "epoch": 0.7218203033838974, "grad_norm": 1.866642847162604, "learning_rate": 2.1780895716734085e-06, "loss": 0.4593, "step": 30930 }, { "epoch": 0.7220536756126021, "grad_norm": 1.8590021584371739, "learning_rate": 2.1747280971663063e-06, "loss": 0.4387, "step": 30940 }, { "epoch": 0.7222870478413069, "grad_norm": 1.7386170178634173, "learning_rate": 2.1713684975384432e-06, "loss": 0.4365, "step": 30950 }, { "epoch": 0.7225204200700117, "grad_norm": 1.681812843632489, "learning_rate": 2.168010775019287e-06, "loss": 0.443, "step": 30960 }, { "epoch": 0.7227537922987165, "grad_norm": 2.0464772597605974, "learning_rate": 2.164654931837053e-06, "loss": 0.4582, "step": 30970 }, { "epoch": 0.7229871645274213, "grad_norm": 1.9828415075923305, "learning_rate": 2.161300970218717e-06, "loss": 0.4648, "step": 30980 }, { "epoch": 0.7232205367561261, "grad_norm": 1.863469334508281, "learning_rate": 2.15794889239e-06, "loss": 0.4401, "step": 30990 }, { "epoch": 0.7234539089848308, "grad_norm": 1.650281516948381, "learning_rate": 2.1545987005753726e-06, "loss": 0.4472, "step": 31000 }, { "epoch": 0.7236872812135355, "grad_norm": 1.7202509663142365, "learning_rate": 2.1512503969980616e-06, "loss": 0.4306, "step": 31010 }, { "epoch": 0.7239206534422403, "grad_norm": 1.665035656942143, "learning_rate": 2.1479039838800316e-06, "loss": 0.4403, "step": 31020 }, { "epoch": 0.7241540256709451, "grad_norm": 1.843029208735399, "learning_rate": 2.144559463441996e-06, "loss": 0.4293, "step": 31030 }, { "epoch": 0.7243873978996499, "grad_norm": 1.683915826654137, "learning_rate": 2.141216837903415e-06, "loss": 0.4402, "step": 31040 }, { "epoch": 0.7246207701283547, "grad_norm": 1.7535338205361692, "learning_rate": 2.137876109482489e-06, "loss": 0.4518, "step": 31050 }, { "epoch": 0.7248541423570595, "grad_norm": 2.0682164507736407, "learning_rate": 2.134537280396158e-06, "loss": 0.46, "step": 31060 }, { "epoch": 0.7250875145857643, "grad_norm": 1.7318139474566991, "learning_rate": 2.131200352860103e-06, "loss": 0.4291, "step": 31070 }, { "epoch": 0.725320886814469, "grad_norm": 1.7265086656528896, "learning_rate": 2.127865329088743e-06, "loss": 0.4449, "step": 31080 }, { "epoch": 0.7255542590431738, "grad_norm": 1.9528620355900335, "learning_rate": 2.124532211295235e-06, "loss": 0.457, "step": 31090 }, { "epoch": 0.7257876312718786, "grad_norm": 1.594818020546835, "learning_rate": 2.1212010016914697e-06, "loss": 0.4673, "step": 31100 }, { "epoch": 0.7260210035005834, "grad_norm": 1.7778067860464197, "learning_rate": 2.1178717024880694e-06, "loss": 0.4718, "step": 31110 }, { "epoch": 0.7262543757292882, "grad_norm": 1.549110453229561, "learning_rate": 2.114544315894396e-06, "loss": 0.4686, "step": 31120 }, { "epoch": 0.726487747957993, "grad_norm": 1.7843429586884187, "learning_rate": 2.111218844118533e-06, "loss": 0.4434, "step": 31130 }, { "epoch": 0.7267211201866978, "grad_norm": 1.9196273984785612, "learning_rate": 2.1078952893672967e-06, "loss": 0.4587, "step": 31140 }, { "epoch": 0.7269544924154026, "grad_norm": 1.6552145420637212, "learning_rate": 2.1045736538462356e-06, "loss": 0.4248, "step": 31150 }, { "epoch": 0.7271878646441073, "grad_norm": 1.6193581666644903, "learning_rate": 2.101253939759619e-06, "loss": 0.4291, "step": 31160 }, { "epoch": 0.7274212368728121, "grad_norm": 1.7689425319962628, "learning_rate": 2.097936149310442e-06, "loss": 0.4514, "step": 31170 }, { "epoch": 0.7276546091015169, "grad_norm": 1.9545740669203706, "learning_rate": 2.094620284700426e-06, "loss": 0.4928, "step": 31180 }, { "epoch": 0.7278879813302217, "grad_norm": 1.9109718362629742, "learning_rate": 2.0913063481300123e-06, "loss": 0.4515, "step": 31190 }, { "epoch": 0.7281213535589265, "grad_norm": 1.5861019557301244, "learning_rate": 2.0879943417983606e-06, "loss": 0.4583, "step": 31200 }, { "epoch": 0.7283547257876313, "grad_norm": 1.6935430148506538, "learning_rate": 2.084684267903356e-06, "loss": 0.4689, "step": 31210 }, { "epoch": 0.7285880980163361, "grad_norm": 1.885935509581261, "learning_rate": 2.081376128641593e-06, "loss": 0.46, "step": 31220 }, { "epoch": 0.7288214702450408, "grad_norm": 1.6614736634873246, "learning_rate": 2.0780699262083905e-06, "loss": 0.4531, "step": 31230 }, { "epoch": 0.7290548424737456, "grad_norm": 1.8975931145390252, "learning_rate": 2.074765662797777e-06, "loss": 0.475, "step": 31240 }, { "epoch": 0.7292882147024504, "grad_norm": 2.812976882493586, "learning_rate": 2.0714633406024924e-06, "loss": 0.4714, "step": 31250 }, { "epoch": 0.7295215869311552, "grad_norm": 1.612694541899149, "learning_rate": 2.068162961813996e-06, "loss": 0.445, "step": 31260 }, { "epoch": 0.72975495915986, "grad_norm": 1.465284078198934, "learning_rate": 2.0648645286224506e-06, "loss": 0.4459, "step": 31270 }, { "epoch": 0.7299883313885648, "grad_norm": 1.527039210719968, "learning_rate": 2.061568043216728e-06, "loss": 0.4599, "step": 31280 }, { "epoch": 0.7302217036172696, "grad_norm": 1.5451872295301927, "learning_rate": 2.0582735077844135e-06, "loss": 0.4481, "step": 31290 }, { "epoch": 0.7304550758459744, "grad_norm": 1.5456163814486892, "learning_rate": 2.054980924511793e-06, "loss": 0.4209, "step": 31300 }, { "epoch": 0.7306884480746791, "grad_norm": 1.7251117580687154, "learning_rate": 2.051690295583856e-06, "loss": 0.4328, "step": 31310 }, { "epoch": 0.7309218203033839, "grad_norm": 1.750247562538584, "learning_rate": 2.0484016231843e-06, "loss": 0.4369, "step": 31320 }, { "epoch": 0.7311551925320887, "grad_norm": 1.9815328995936856, "learning_rate": 2.0451149094955218e-06, "loss": 0.4783, "step": 31330 }, { "epoch": 0.7313885647607935, "grad_norm": 1.5339070299990651, "learning_rate": 2.041830156698615e-06, "loss": 0.4537, "step": 31340 }, { "epoch": 0.7316219369894983, "grad_norm": 1.8208731420067794, "learning_rate": 2.0385473669733795e-06, "loss": 0.4414, "step": 31350 }, { "epoch": 0.7318553092182031, "grad_norm": 1.6559269486073966, "learning_rate": 2.0352665424983038e-06, "loss": 0.4488, "step": 31360 }, { "epoch": 0.7320886814469079, "grad_norm": 1.552353875202478, "learning_rate": 2.0319876854505795e-06, "loss": 0.4396, "step": 31370 }, { "epoch": 0.7323220536756126, "grad_norm": 1.4984642311144099, "learning_rate": 2.0287107980060886e-06, "loss": 0.4305, "step": 31380 }, { "epoch": 0.7325554259043174, "grad_norm": 1.8602040968530533, "learning_rate": 2.0254358823394053e-06, "loss": 0.4573, "step": 31390 }, { "epoch": 0.7327887981330222, "grad_norm": 1.4888510365868841, "learning_rate": 2.0221629406238e-06, "loss": 0.4486, "step": 31400 }, { "epoch": 0.7330221703617269, "grad_norm": 1.9061464776312924, "learning_rate": 2.0188919750312297e-06, "loss": 0.4413, "step": 31410 }, { "epoch": 0.7332555425904317, "grad_norm": 1.8053342558834098, "learning_rate": 2.015622987732337e-06, "loss": 0.4357, "step": 31420 }, { "epoch": 0.7334889148191365, "grad_norm": 1.942210405735305, "learning_rate": 2.01235598089646e-06, "loss": 0.4185, "step": 31430 }, { "epoch": 0.7337222870478413, "grad_norm": 1.7648962353937607, "learning_rate": 2.009090956691615e-06, "loss": 0.4492, "step": 31440 }, { "epoch": 0.733955659276546, "grad_norm": 1.6578233968326825, "learning_rate": 2.0058279172845064e-06, "loss": 0.4221, "step": 31450 }, { "epoch": 0.7341890315052508, "grad_norm": 1.5914397208524675, "learning_rate": 2.00256686484052e-06, "loss": 0.43, "step": 31460 }, { "epoch": 0.7344224037339556, "grad_norm": 1.6203716099142138, "learning_rate": 1.999307801523721e-06, "loss": 0.4435, "step": 31470 }, { "epoch": 0.7346557759626604, "grad_norm": 1.6962470612961913, "learning_rate": 1.9960507294968616e-06, "loss": 0.4428, "step": 31480 }, { "epoch": 0.7348891481913652, "grad_norm": 7.334083624933461, "learning_rate": 1.992795650921366e-06, "loss": 0.449, "step": 31490 }, { "epoch": 0.73512252042007, "grad_norm": 1.6895456822070782, "learning_rate": 1.989542567957335e-06, "loss": 0.4438, "step": 31500 }, { "epoch": 0.7353558926487748, "grad_norm": 1.7139016387977213, "learning_rate": 1.986291482763552e-06, "loss": 0.444, "step": 31510 }, { "epoch": 0.7355892648774796, "grad_norm": 1.9194763975470976, "learning_rate": 1.9830423974974682e-06, "loss": 0.4366, "step": 31520 }, { "epoch": 0.7358226371061843, "grad_norm": 1.6884126411025786, "learning_rate": 1.9797953143152083e-06, "loss": 0.4491, "step": 31530 }, { "epoch": 0.7360560093348891, "grad_norm": 1.7623575842613572, "learning_rate": 1.9765502353715724e-06, "loss": 0.4338, "step": 31540 }, { "epoch": 0.7362893815635939, "grad_norm": 1.7661833611538396, "learning_rate": 1.973307162820028e-06, "loss": 0.4762, "step": 31550 }, { "epoch": 0.7365227537922987, "grad_norm": 2.2508346374149237, "learning_rate": 1.9700660988127074e-06, "loss": 0.4438, "step": 31560 }, { "epoch": 0.7367561260210035, "grad_norm": 1.9081398433608658, "learning_rate": 1.966827045500418e-06, "loss": 0.4303, "step": 31570 }, { "epoch": 0.7369894982497083, "grad_norm": 1.600327226975638, "learning_rate": 1.9635900050326266e-06, "loss": 0.4649, "step": 31580 }, { "epoch": 0.7372228704784131, "grad_norm": 1.4596135476037484, "learning_rate": 1.9603549795574656e-06, "loss": 0.4554, "step": 31590 }, { "epoch": 0.7374562427071178, "grad_norm": 1.9595287454342547, "learning_rate": 1.9571219712217325e-06, "loss": 0.4546, "step": 31600 }, { "epoch": 0.7376896149358226, "grad_norm": 1.4727721928439559, "learning_rate": 1.9538909821708827e-06, "loss": 0.4298, "step": 31610 }, { "epoch": 0.7379229871645274, "grad_norm": 1.8992027158099054, "learning_rate": 1.950662014549037e-06, "loss": 0.4358, "step": 31620 }, { "epoch": 0.7381563593932322, "grad_norm": 1.6554785513684829, "learning_rate": 1.9474350704989686e-06, "loss": 0.4506, "step": 31630 }, { "epoch": 0.738389731621937, "grad_norm": 1.6688875975649526, "learning_rate": 1.944210152162109e-06, "loss": 0.4813, "step": 31640 }, { "epoch": 0.7386231038506418, "grad_norm": 1.7904376509630413, "learning_rate": 1.94098726167855e-06, "loss": 0.4468, "step": 31650 }, { "epoch": 0.7388564760793466, "grad_norm": 1.9100326803057857, "learning_rate": 1.937766401187034e-06, "loss": 0.4685, "step": 31660 }, { "epoch": 0.7390898483080514, "grad_norm": 1.5679015508706076, "learning_rate": 1.9345475728249534e-06, "loss": 0.4372, "step": 31670 }, { "epoch": 0.7393232205367561, "grad_norm": 1.7544122675550045, "learning_rate": 1.9313307787283608e-06, "loss": 0.4439, "step": 31680 }, { "epoch": 0.7395565927654609, "grad_norm": 1.4037060056845048, "learning_rate": 1.928116021031951e-06, "loss": 0.4341, "step": 31690 }, { "epoch": 0.7397899649941657, "grad_norm": 1.6053925872524106, "learning_rate": 1.924903301869068e-06, "loss": 0.4436, "step": 31700 }, { "epoch": 0.7400233372228705, "grad_norm": 1.6717040616857597, "learning_rate": 1.9216926233717087e-06, "loss": 0.454, "step": 31710 }, { "epoch": 0.7402567094515753, "grad_norm": 1.6453816881219643, "learning_rate": 1.918483987670511e-06, "loss": 0.4483, "step": 31720 }, { "epoch": 0.7404900816802801, "grad_norm": 1.6556057643115176, "learning_rate": 1.915277396894755e-06, "loss": 0.4551, "step": 31730 }, { "epoch": 0.7407234539089849, "grad_norm": 1.5730602681032477, "learning_rate": 1.912072853172372e-06, "loss": 0.4633, "step": 31740 }, { "epoch": 0.7409568261376897, "grad_norm": 1.6374915266643302, "learning_rate": 1.9088703586299273e-06, "loss": 0.469, "step": 31750 }, { "epoch": 0.7411901983663944, "grad_norm": 1.6853685605959263, "learning_rate": 1.9056699153926278e-06, "loss": 0.4482, "step": 31760 }, { "epoch": 0.7414235705950992, "grad_norm": 2.0325634289601906, "learning_rate": 1.9024715255843229e-06, "loss": 0.4513, "step": 31770 }, { "epoch": 0.741656942823804, "grad_norm": 1.5776106143961575, "learning_rate": 1.8992751913274944e-06, "loss": 0.4672, "step": 31780 }, { "epoch": 0.7418903150525088, "grad_norm": 1.494286640898615, "learning_rate": 1.8960809147432646e-06, "loss": 0.438, "step": 31790 }, { "epoch": 0.7421236872812136, "grad_norm": 1.602052730383239, "learning_rate": 1.8928886979513867e-06, "loss": 0.4259, "step": 31800 }, { "epoch": 0.7423570595099184, "grad_norm": 1.5970096803864884, "learning_rate": 1.8896985430702487e-06, "loss": 0.4372, "step": 31810 }, { "epoch": 0.7425904317386232, "grad_norm": 1.4367479919940591, "learning_rate": 1.8865104522168693e-06, "loss": 0.4555, "step": 31820 }, { "epoch": 0.7428238039673278, "grad_norm": 1.444708811328196, "learning_rate": 1.8833244275068963e-06, "loss": 0.4448, "step": 31830 }, { "epoch": 0.7430571761960326, "grad_norm": 1.9856583430579748, "learning_rate": 1.8801404710546122e-06, "loss": 0.4388, "step": 31840 }, { "epoch": 0.7432905484247374, "grad_norm": 1.5345100646814547, "learning_rate": 1.8769585849729216e-06, "loss": 0.443, "step": 31850 }, { "epoch": 0.7435239206534422, "grad_norm": 1.4826372295881722, "learning_rate": 1.873778771373354e-06, "loss": 0.451, "step": 31860 }, { "epoch": 0.743757292882147, "grad_norm": 1.272131131346104, "learning_rate": 1.8706010323660695e-06, "loss": 0.4405, "step": 31870 }, { "epoch": 0.7439906651108518, "grad_norm": 1.6896674062166546, "learning_rate": 1.8674253700598465e-06, "loss": 0.4478, "step": 31880 }, { "epoch": 0.7442240373395566, "grad_norm": 1.5432169390146508, "learning_rate": 1.8642517865620863e-06, "loss": 0.4391, "step": 31890 }, { "epoch": 0.7444574095682613, "grad_norm": 1.8495776367362802, "learning_rate": 1.8610802839788129e-06, "loss": 0.462, "step": 31900 }, { "epoch": 0.7446907817969661, "grad_norm": 1.6446426837758363, "learning_rate": 1.8579108644146672e-06, "loss": 0.4216, "step": 31910 }, { "epoch": 0.7449241540256709, "grad_norm": 2.63410671270773, "learning_rate": 1.8547435299729061e-06, "loss": 0.4541, "step": 31920 }, { "epoch": 0.7451575262543757, "grad_norm": 1.730192458264575, "learning_rate": 1.8515782827554092e-06, "loss": 0.4283, "step": 31930 }, { "epoch": 0.7453908984830805, "grad_norm": 1.4677243983562316, "learning_rate": 1.848415124862664e-06, "loss": 0.4381, "step": 31940 }, { "epoch": 0.7456242707117853, "grad_norm": 1.8010485921004238, "learning_rate": 1.8452540583937733e-06, "loss": 0.4363, "step": 31950 }, { "epoch": 0.7458576429404901, "grad_norm": 2.3178604525912014, "learning_rate": 1.8420950854464564e-06, "loss": 0.4664, "step": 31960 }, { "epoch": 0.7460910151691948, "grad_norm": 1.5481114720388294, "learning_rate": 1.8389382081170388e-06, "loss": 0.4566, "step": 31970 }, { "epoch": 0.7463243873978996, "grad_norm": 1.62677064967276, "learning_rate": 1.8357834285004534e-06, "loss": 0.4543, "step": 31980 }, { "epoch": 0.7465577596266044, "grad_norm": 1.556177841222934, "learning_rate": 1.8326307486902488e-06, "loss": 0.44, "step": 31990 }, { "epoch": 0.7467911318553092, "grad_norm": 1.666600048302156, "learning_rate": 1.8294801707785714e-06, "loss": 0.4426, "step": 32000 }, { "epoch": 0.747024504084014, "grad_norm": 1.5257118959448281, "learning_rate": 1.8263316968561807e-06, "loss": 0.4452, "step": 32010 }, { "epoch": 0.7472578763127188, "grad_norm": 1.4947312855570187, "learning_rate": 1.8231853290124336e-06, "loss": 0.4645, "step": 32020 }, { "epoch": 0.7474912485414236, "grad_norm": 1.6294201214004607, "learning_rate": 1.820041069335291e-06, "loss": 0.4595, "step": 32030 }, { "epoch": 0.7477246207701284, "grad_norm": 2.1360255482577237, "learning_rate": 1.8168989199113184e-06, "loss": 0.4466, "step": 32040 }, { "epoch": 0.7479579929988331, "grad_norm": 1.686130557314086, "learning_rate": 1.813758882825677e-06, "loss": 0.4874, "step": 32050 }, { "epoch": 0.7481913652275379, "grad_norm": 1.4803635938665132, "learning_rate": 1.810620960162126e-06, "loss": 0.4405, "step": 32060 }, { "epoch": 0.7484247374562427, "grad_norm": 2.1138689347055157, "learning_rate": 1.8074851540030252e-06, "loss": 0.4404, "step": 32070 }, { "epoch": 0.7486581096849475, "grad_norm": 1.7195155675392935, "learning_rate": 1.804351466429327e-06, "loss": 0.4648, "step": 32080 }, { "epoch": 0.7488914819136523, "grad_norm": 1.4856503291474323, "learning_rate": 1.801219899520576e-06, "loss": 0.4305, "step": 32090 }, { "epoch": 0.7491248541423571, "grad_norm": 1.5739699793539659, "learning_rate": 1.7980904553549155e-06, "loss": 0.4561, "step": 32100 }, { "epoch": 0.7493582263710619, "grad_norm": 1.661232655291369, "learning_rate": 1.7949631360090752e-06, "loss": 0.428, "step": 32110 }, { "epoch": 0.7495915985997667, "grad_norm": 1.9508522573471998, "learning_rate": 1.791837943558374e-06, "loss": 0.4583, "step": 32120 }, { "epoch": 0.7498249708284714, "grad_norm": 1.6470926989456036, "learning_rate": 1.7887148800767246e-06, "loss": 0.4334, "step": 32130 }, { "epoch": 0.7500583430571762, "grad_norm": 1.7650822859933408, "learning_rate": 1.785593947636623e-06, "loss": 0.4602, "step": 32140 }, { "epoch": 0.750291715285881, "grad_norm": 1.7492655934661296, "learning_rate": 1.782475148309149e-06, "loss": 0.4576, "step": 32150 }, { "epoch": 0.7505250875145858, "grad_norm": 1.5370622422790716, "learning_rate": 1.7793584841639737e-06, "loss": 0.4099, "step": 32160 }, { "epoch": 0.7507584597432906, "grad_norm": 1.6626942608581434, "learning_rate": 1.7762439572693441e-06, "loss": 0.4476, "step": 32170 }, { "epoch": 0.7509918319719954, "grad_norm": 1.68598372734298, "learning_rate": 1.7731315696920948e-06, "loss": 0.447, "step": 32180 }, { "epoch": 0.7512252042007002, "grad_norm": 1.7339344970329664, "learning_rate": 1.7700213234976377e-06, "loss": 0.4131, "step": 32190 }, { "epoch": 0.751458576429405, "grad_norm": 1.7065864011969476, "learning_rate": 1.7669132207499594e-06, "loss": 0.4417, "step": 32200 }, { "epoch": 0.7516919486581097, "grad_norm": 1.979594692069075, "learning_rate": 1.7638072635116339e-06, "loss": 0.4544, "step": 32210 }, { "epoch": 0.7519253208868145, "grad_norm": 1.471052525494272, "learning_rate": 1.7607034538438018e-06, "loss": 0.4274, "step": 32220 }, { "epoch": 0.7521586931155193, "grad_norm": 1.5723505202193928, "learning_rate": 1.7576017938061869e-06, "loss": 0.4373, "step": 32230 }, { "epoch": 0.7523920653442241, "grad_norm": 1.7229132730718957, "learning_rate": 1.7545022854570804e-06, "loss": 0.4392, "step": 32240 }, { "epoch": 0.7526254375729288, "grad_norm": 1.9189305052878265, "learning_rate": 1.7514049308533458e-06, "loss": 0.4621, "step": 32250 }, { "epoch": 0.7528588098016336, "grad_norm": 1.6033767071551097, "learning_rate": 1.7483097320504232e-06, "loss": 0.4426, "step": 32260 }, { "epoch": 0.7530921820303383, "grad_norm": 1.3984021791891628, "learning_rate": 1.745216691102316e-06, "loss": 0.459, "step": 32270 }, { "epoch": 0.7533255542590431, "grad_norm": 1.6547648108499406, "learning_rate": 1.7421258100615963e-06, "loss": 0.4602, "step": 32280 }, { "epoch": 0.7535589264877479, "grad_norm": 1.864302580781874, "learning_rate": 1.739037090979407e-06, "loss": 0.4569, "step": 32290 }, { "epoch": 0.7537922987164527, "grad_norm": 1.8748835337601677, "learning_rate": 1.7359505359054524e-06, "loss": 0.4678, "step": 32300 }, { "epoch": 0.7540256709451575, "grad_norm": 2.0591116611491707, "learning_rate": 1.7328661468880003e-06, "loss": 0.4503, "step": 32310 }, { "epoch": 0.7542590431738623, "grad_norm": 1.6930888728904663, "learning_rate": 1.7297839259738858e-06, "loss": 0.4309, "step": 32320 }, { "epoch": 0.7544924154025671, "grad_norm": 2.00319502536697, "learning_rate": 1.7267038752085002e-06, "loss": 0.4605, "step": 32330 }, { "epoch": 0.7547257876312718, "grad_norm": 1.534898851499904, "learning_rate": 1.723625996635796e-06, "loss": 0.4344, "step": 32340 }, { "epoch": 0.7549591598599766, "grad_norm": 1.6161228897308964, "learning_rate": 1.7205502922982869e-06, "loss": 0.4585, "step": 32350 }, { "epoch": 0.7551925320886814, "grad_norm": 1.8597056316671954, "learning_rate": 1.7174767642370405e-06, "loss": 0.4321, "step": 32360 }, { "epoch": 0.7554259043173862, "grad_norm": 1.7499222151988996, "learning_rate": 1.7144054144916793e-06, "loss": 0.4346, "step": 32370 }, { "epoch": 0.755659276546091, "grad_norm": 1.6937938033161724, "learning_rate": 1.7113362451003869e-06, "loss": 0.4652, "step": 32380 }, { "epoch": 0.7558926487747958, "grad_norm": 2.0683097549280736, "learning_rate": 1.7082692580998905e-06, "loss": 0.4643, "step": 32390 }, { "epoch": 0.7561260210035006, "grad_norm": 1.7231498141339956, "learning_rate": 1.7052044555254782e-06, "loss": 0.4351, "step": 32400 }, { "epoch": 0.7563593932322054, "grad_norm": 1.6269201176858126, "learning_rate": 1.7021418394109823e-06, "loss": 0.4544, "step": 32410 }, { "epoch": 0.7565927654609101, "grad_norm": 1.859566284499256, "learning_rate": 1.699081411788785e-06, "loss": 0.4284, "step": 32420 }, { "epoch": 0.7568261376896149, "grad_norm": 1.5670669172520764, "learning_rate": 1.6960231746898193e-06, "loss": 0.4422, "step": 32430 }, { "epoch": 0.7570595099183197, "grad_norm": 1.5856380701544583, "learning_rate": 1.6929671301435618e-06, "loss": 0.4664, "step": 32440 }, { "epoch": 0.7572928821470245, "grad_norm": 1.5633591307877888, "learning_rate": 1.6899132801780332e-06, "loss": 0.4349, "step": 32450 }, { "epoch": 0.7575262543757293, "grad_norm": 1.690662257256743, "learning_rate": 1.6868616268198023e-06, "loss": 0.4358, "step": 32460 }, { "epoch": 0.7577596266044341, "grad_norm": 1.9053408630856274, "learning_rate": 1.6838121720939766e-06, "loss": 0.4343, "step": 32470 }, { "epoch": 0.7579929988331389, "grad_norm": 1.745204963110116, "learning_rate": 1.6807649180242026e-06, "loss": 0.4228, "step": 32480 }, { "epoch": 0.7582263710618437, "grad_norm": 1.7121072868494425, "learning_rate": 1.6777198666326727e-06, "loss": 0.4301, "step": 32490 }, { "epoch": 0.7584597432905484, "grad_norm": 1.6151449076832811, "learning_rate": 1.674677019940113e-06, "loss": 0.4599, "step": 32500 }, { "epoch": 0.7586931155192532, "grad_norm": 1.5608815061863468, "learning_rate": 1.6716363799657852e-06, "loss": 0.4742, "step": 32510 }, { "epoch": 0.758926487747958, "grad_norm": 1.5463921637014808, "learning_rate": 1.6685979487274928e-06, "loss": 0.4484, "step": 32520 }, { "epoch": 0.7591598599766628, "grad_norm": 1.4830282722170873, "learning_rate": 1.6655617282415681e-06, "loss": 0.4519, "step": 32530 }, { "epoch": 0.7593932322053676, "grad_norm": 1.866047277113839, "learning_rate": 1.6625277205228768e-06, "loss": 0.451, "step": 32540 }, { "epoch": 0.7596266044340724, "grad_norm": 1.9113383351819602, "learning_rate": 1.6594959275848194e-06, "loss": 0.4373, "step": 32550 }, { "epoch": 0.7598599766627772, "grad_norm": 1.9641831196369903, "learning_rate": 1.6564663514393238e-06, "loss": 0.4249, "step": 32560 }, { "epoch": 0.760093348891482, "grad_norm": 1.5412023837098365, "learning_rate": 1.6534389940968471e-06, "loss": 0.4463, "step": 32570 }, { "epoch": 0.7603267211201867, "grad_norm": 1.5602825162897491, "learning_rate": 1.6504138575663752e-06, "loss": 0.4592, "step": 32580 }, { "epoch": 0.7605600933488915, "grad_norm": 1.9813213576303998, "learning_rate": 1.6473909438554175e-06, "loss": 0.424, "step": 32590 }, { "epoch": 0.7607934655775963, "grad_norm": 1.7715651777059012, "learning_rate": 1.6443702549700136e-06, "loss": 0.4436, "step": 32600 }, { "epoch": 0.7610268378063011, "grad_norm": 1.8130281773497396, "learning_rate": 1.6413517929147205e-06, "loss": 0.4545, "step": 32610 }, { "epoch": 0.7612602100350059, "grad_norm": 1.6619770265045581, "learning_rate": 1.638335559692623e-06, "loss": 0.4403, "step": 32620 }, { "epoch": 0.7614935822637107, "grad_norm": 1.590568012220062, "learning_rate": 1.6353215573053232e-06, "loss": 0.4581, "step": 32630 }, { "epoch": 0.7617269544924155, "grad_norm": 1.5557163368709368, "learning_rate": 1.632309787752942e-06, "loss": 0.4191, "step": 32640 }, { "epoch": 0.7619603267211202, "grad_norm": 1.8662773154865089, "learning_rate": 1.6293002530341233e-06, "loss": 0.447, "step": 32650 }, { "epoch": 0.7621936989498249, "grad_norm": 1.7472442908967496, "learning_rate": 1.6262929551460238e-06, "loss": 0.4357, "step": 32660 }, { "epoch": 0.7624270711785297, "grad_norm": 1.55278855316495, "learning_rate": 1.6232878960843157e-06, "loss": 0.434, "step": 32670 }, { "epoch": 0.7626604434072345, "grad_norm": 1.4664105531261056, "learning_rate": 1.6202850778431895e-06, "loss": 0.4477, "step": 32680 }, { "epoch": 0.7628938156359393, "grad_norm": 2.418483666994212, "learning_rate": 1.6172845024153444e-06, "loss": 0.4531, "step": 32690 }, { "epoch": 0.7631271878646441, "grad_norm": 1.4808233343685253, "learning_rate": 1.6142861717919915e-06, "loss": 0.4668, "step": 32700 }, { "epoch": 0.7633605600933488, "grad_norm": 1.7387417901496636, "learning_rate": 1.6112900879628574e-06, "loss": 0.4431, "step": 32710 }, { "epoch": 0.7635939323220536, "grad_norm": 1.9114105605198821, "learning_rate": 1.608296252916171e-06, "loss": 0.4646, "step": 32720 }, { "epoch": 0.7638273045507584, "grad_norm": 1.6342038337879974, "learning_rate": 1.6053046686386708e-06, "loss": 0.4536, "step": 32730 }, { "epoch": 0.7640606767794632, "grad_norm": 1.545722647318359, "learning_rate": 1.6023153371156057e-06, "loss": 0.4479, "step": 32740 }, { "epoch": 0.764294049008168, "grad_norm": 1.53150691680675, "learning_rate": 1.5993282603307254e-06, "loss": 0.4403, "step": 32750 }, { "epoch": 0.7645274212368728, "grad_norm": 1.551291373422764, "learning_rate": 1.596343440266282e-06, "loss": 0.4321, "step": 32760 }, { "epoch": 0.7647607934655776, "grad_norm": 1.8423107967789054, "learning_rate": 1.5933608789030365e-06, "loss": 0.4459, "step": 32770 }, { "epoch": 0.7649941656942824, "grad_norm": 1.7122754036134373, "learning_rate": 1.5903805782202435e-06, "loss": 0.4253, "step": 32780 }, { "epoch": 0.7652275379229871, "grad_norm": 1.9592302970288271, "learning_rate": 1.5874025401956643e-06, "loss": 0.4599, "step": 32790 }, { "epoch": 0.7654609101516919, "grad_norm": 1.4945916670947066, "learning_rate": 1.584426766805554e-06, "loss": 0.4441, "step": 32800 }, { "epoch": 0.7656942823803967, "grad_norm": 1.3117534015057215, "learning_rate": 1.581453260024664e-06, "loss": 0.4222, "step": 32810 }, { "epoch": 0.7659276546091015, "grad_norm": 1.5725916105944424, "learning_rate": 1.578482021826247e-06, "loss": 0.4642, "step": 32820 }, { "epoch": 0.7661610268378063, "grad_norm": 1.6616657158795245, "learning_rate": 1.5755130541820457e-06, "loss": 0.4553, "step": 32830 }, { "epoch": 0.7663943990665111, "grad_norm": 1.8437307046823512, "learning_rate": 1.5725463590622952e-06, "loss": 0.4356, "step": 32840 }, { "epoch": 0.7666277712952159, "grad_norm": 1.750828320751209, "learning_rate": 1.569581938435728e-06, "loss": 0.4569, "step": 32850 }, { "epoch": 0.7668611435239207, "grad_norm": 1.8742919365999475, "learning_rate": 1.5666197942695627e-06, "loss": 0.4223, "step": 32860 }, { "epoch": 0.7670945157526254, "grad_norm": 1.44393788661124, "learning_rate": 1.5636599285295068e-06, "loss": 0.4175, "step": 32870 }, { "epoch": 0.7673278879813302, "grad_norm": 1.69630242432608, "learning_rate": 1.5607023431797597e-06, "loss": 0.452, "step": 32880 }, { "epoch": 0.767561260210035, "grad_norm": 1.5986820873725145, "learning_rate": 1.5577470401830041e-06, "loss": 0.4306, "step": 32890 }, { "epoch": 0.7677946324387398, "grad_norm": 1.9113096092108528, "learning_rate": 1.5547940215004075e-06, "loss": 0.437, "step": 32900 }, { "epoch": 0.7680280046674446, "grad_norm": 1.6247419789659554, "learning_rate": 1.5518432890916269e-06, "loss": 0.4252, "step": 32910 }, { "epoch": 0.7682613768961494, "grad_norm": 1.3786264821828011, "learning_rate": 1.548894844914796e-06, "loss": 0.4393, "step": 32920 }, { "epoch": 0.7684947491248542, "grad_norm": 1.8314894184573578, "learning_rate": 1.545948690926533e-06, "loss": 0.4181, "step": 32930 }, { "epoch": 0.768728121353559, "grad_norm": 1.4773821629635875, "learning_rate": 1.5430048290819355e-06, "loss": 0.4286, "step": 32940 }, { "epoch": 0.7689614935822637, "grad_norm": 1.6289691731270526, "learning_rate": 1.540063261334579e-06, "loss": 0.443, "step": 32950 }, { "epoch": 0.7691948658109685, "grad_norm": 1.7105808285027202, "learning_rate": 1.5371239896365202e-06, "loss": 0.4375, "step": 32960 }, { "epoch": 0.7694282380396733, "grad_norm": 1.5302931602462753, "learning_rate": 1.5341870159382894e-06, "loss": 0.4183, "step": 32970 }, { "epoch": 0.7696616102683781, "grad_norm": 1.4981757878354074, "learning_rate": 1.5312523421888898e-06, "loss": 0.4301, "step": 32980 }, { "epoch": 0.7698949824970829, "grad_norm": 1.6777264890282648, "learning_rate": 1.5283199703358037e-06, "loss": 0.4354, "step": 32990 }, { "epoch": 0.7701283547257877, "grad_norm": 1.7584786694485581, "learning_rate": 1.5253899023249808e-06, "loss": 0.4422, "step": 33000 }, { "epoch": 0.7703617269544925, "grad_norm": 2.0333385514216107, "learning_rate": 1.5224621401008472e-06, "loss": 0.4323, "step": 33010 }, { "epoch": 0.7705950991831972, "grad_norm": 1.5551324245284541, "learning_rate": 1.5195366856062933e-06, "loss": 0.4226, "step": 33020 }, { "epoch": 0.770828471411902, "grad_norm": 1.5179949888659474, "learning_rate": 1.5166135407826788e-06, "loss": 0.4733, "step": 33030 }, { "epoch": 0.7710618436406068, "grad_norm": 1.532113910944492, "learning_rate": 1.513692707569837e-06, "loss": 0.4344, "step": 33040 }, { "epoch": 0.7712952158693116, "grad_norm": 1.6214749764656184, "learning_rate": 1.51077418790606e-06, "loss": 0.4495, "step": 33050 }, { "epoch": 0.7715285880980164, "grad_norm": 1.750797511930396, "learning_rate": 1.5078579837281054e-06, "loss": 0.4259, "step": 33060 }, { "epoch": 0.7717619603267212, "grad_norm": 1.7691180277443117, "learning_rate": 1.5049440969712004e-06, "loss": 0.4505, "step": 33070 }, { "epoch": 0.7719953325554259, "grad_norm": 1.3230553005497543, "learning_rate": 1.502032529569027e-06, "loss": 0.449, "step": 33080 }, { "epoch": 0.7722287047841306, "grad_norm": 1.3831442020710154, "learning_rate": 1.49912328345373e-06, "loss": 0.4377, "step": 33090 }, { "epoch": 0.7724620770128354, "grad_norm": 1.7227341735958306, "learning_rate": 1.4962163605559183e-06, "loss": 0.451, "step": 33100 }, { "epoch": 0.7726954492415402, "grad_norm": 1.8677471037480704, "learning_rate": 1.4933117628046534e-06, "loss": 0.4431, "step": 33110 }, { "epoch": 0.772928821470245, "grad_norm": 1.6271452600902117, "learning_rate": 1.4904094921274552e-06, "loss": 0.4642, "step": 33120 }, { "epoch": 0.7731621936989498, "grad_norm": 1.6997579862519203, "learning_rate": 1.4875095504503023e-06, "loss": 0.4337, "step": 33130 }, { "epoch": 0.7733955659276546, "grad_norm": 3.162185594059901, "learning_rate": 1.4846119396976243e-06, "loss": 0.4332, "step": 33140 }, { "epoch": 0.7736289381563594, "grad_norm": 1.7370391023688816, "learning_rate": 1.481716661792304e-06, "loss": 0.4543, "step": 33150 }, { "epoch": 0.7738623103850641, "grad_norm": 1.9110563645071406, "learning_rate": 1.4788237186556804e-06, "loss": 0.4581, "step": 33160 }, { "epoch": 0.7740956826137689, "grad_norm": 1.7401284901736636, "learning_rate": 1.4759331122075366e-06, "loss": 0.4435, "step": 33170 }, { "epoch": 0.7743290548424737, "grad_norm": 1.5455516334460748, "learning_rate": 1.4730448443661115e-06, "loss": 0.423, "step": 33180 }, { "epoch": 0.7745624270711785, "grad_norm": 1.7311286763414444, "learning_rate": 1.4701589170480885e-06, "loss": 0.4444, "step": 33190 }, { "epoch": 0.7747957992998833, "grad_norm": 1.8796810601339118, "learning_rate": 1.4672753321685956e-06, "loss": 0.4614, "step": 33200 }, { "epoch": 0.7750291715285881, "grad_norm": 1.6239201595336774, "learning_rate": 1.4643940916412136e-06, "loss": 0.4525, "step": 33210 }, { "epoch": 0.7752625437572929, "grad_norm": 1.444033782048839, "learning_rate": 1.4615151973779602e-06, "loss": 0.446, "step": 33220 }, { "epoch": 0.7754959159859977, "grad_norm": 1.4867070355998653, "learning_rate": 1.458638651289298e-06, "loss": 0.4347, "step": 33230 }, { "epoch": 0.7757292882147024, "grad_norm": 1.5038386244852346, "learning_rate": 1.4557644552841355e-06, "loss": 0.4519, "step": 33240 }, { "epoch": 0.7759626604434072, "grad_norm": 1.5323355645367422, "learning_rate": 1.4528926112698167e-06, "loss": 0.4509, "step": 33250 }, { "epoch": 0.776196032672112, "grad_norm": 1.5659694466341134, "learning_rate": 1.4500231211521254e-06, "loss": 0.454, "step": 33260 }, { "epoch": 0.7764294049008168, "grad_norm": 1.6227176548356277, "learning_rate": 1.4471559868352875e-06, "loss": 0.462, "step": 33270 }, { "epoch": 0.7766627771295216, "grad_norm": 1.5340134098990215, "learning_rate": 1.4442912102219603e-06, "loss": 0.4531, "step": 33280 }, { "epoch": 0.7768961493582264, "grad_norm": 1.5120272243128052, "learning_rate": 1.4414287932132382e-06, "loss": 0.434, "step": 33290 }, { "epoch": 0.7771295215869312, "grad_norm": 1.7219506941114566, "learning_rate": 1.4385687377086532e-06, "loss": 0.4391, "step": 33300 }, { "epoch": 0.777362893815636, "grad_norm": 1.5983576738825296, "learning_rate": 1.4357110456061652e-06, "loss": 0.4063, "step": 33310 }, { "epoch": 0.7775962660443407, "grad_norm": 1.970511667366199, "learning_rate": 1.4328557188021685e-06, "loss": 0.4409, "step": 33320 }, { "epoch": 0.7778296382730455, "grad_norm": 1.829285429291136, "learning_rate": 1.4300027591914862e-06, "loss": 0.4448, "step": 33330 }, { "epoch": 0.7780630105017503, "grad_norm": 1.9114929432229641, "learning_rate": 1.4271521686673707e-06, "loss": 0.4491, "step": 33340 }, { "epoch": 0.7782963827304551, "grad_norm": 1.8771333163200337, "learning_rate": 1.4243039491215061e-06, "loss": 0.4608, "step": 33350 }, { "epoch": 0.7785297549591599, "grad_norm": 1.5896516081296095, "learning_rate": 1.4214581024439972e-06, "loss": 0.4409, "step": 33360 }, { "epoch": 0.7787631271878647, "grad_norm": 1.8987556647390536, "learning_rate": 1.4186146305233768e-06, "loss": 0.4383, "step": 33370 }, { "epoch": 0.7789964994165695, "grad_norm": 1.9109160242613128, "learning_rate": 1.415773535246604e-06, "loss": 0.4647, "step": 33380 }, { "epoch": 0.7792298716452742, "grad_norm": 1.588878493973422, "learning_rate": 1.4129348184990554e-06, "loss": 0.4395, "step": 33390 }, { "epoch": 0.779463243873979, "grad_norm": 1.8242934848719308, "learning_rate": 1.4100984821645364e-06, "loss": 0.4568, "step": 33400 }, { "epoch": 0.7796966161026838, "grad_norm": 1.615758791994213, "learning_rate": 1.4072645281252668e-06, "loss": 0.4404, "step": 33410 }, { "epoch": 0.7799299883313886, "grad_norm": 1.7006470759156311, "learning_rate": 1.4044329582618848e-06, "loss": 0.4376, "step": 33420 }, { "epoch": 0.7801633605600934, "grad_norm": 1.8472410555745828, "learning_rate": 1.4016037744534538e-06, "loss": 0.4534, "step": 33430 }, { "epoch": 0.7803967327887982, "grad_norm": 1.3963655219535727, "learning_rate": 1.3987769785774463e-06, "loss": 0.4574, "step": 33440 }, { "epoch": 0.780630105017503, "grad_norm": 1.772448933181963, "learning_rate": 1.3959525725097516e-06, "loss": 0.4419, "step": 33450 }, { "epoch": 0.7808634772462077, "grad_norm": 2.0274551829836334, "learning_rate": 1.3931305581246773e-06, "loss": 0.436, "step": 33460 }, { "epoch": 0.7810968494749125, "grad_norm": 1.657218230133447, "learning_rate": 1.3903109372949403e-06, "loss": 0.4426, "step": 33470 }, { "epoch": 0.7813302217036173, "grad_norm": 1.6664398923058543, "learning_rate": 1.3874937118916671e-06, "loss": 0.4575, "step": 33480 }, { "epoch": 0.781563593932322, "grad_norm": 1.5724129829801046, "learning_rate": 1.3846788837844006e-06, "loss": 0.443, "step": 33490 }, { "epoch": 0.7817969661610268, "grad_norm": 1.7176597077532265, "learning_rate": 1.3818664548410882e-06, "loss": 0.4552, "step": 33500 }, { "epoch": 0.7820303383897316, "grad_norm": 1.9452284940671607, "learning_rate": 1.3790564269280842e-06, "loss": 0.4557, "step": 33510 }, { "epoch": 0.7822637106184364, "grad_norm": 1.6436056810698674, "learning_rate": 1.3762488019101555e-06, "loss": 0.4727, "step": 33520 }, { "epoch": 0.7824970828471411, "grad_norm": 1.7091305838979154, "learning_rate": 1.3734435816504693e-06, "loss": 0.4377, "step": 33530 }, { "epoch": 0.7827304550758459, "grad_norm": 1.7353242348695435, "learning_rate": 1.3706407680105959e-06, "loss": 0.4467, "step": 33540 }, { "epoch": 0.7829638273045507, "grad_norm": 1.7584133770594381, "learning_rate": 1.3678403628505148e-06, "loss": 0.4358, "step": 33550 }, { "epoch": 0.7831971995332555, "grad_norm": 1.5342632740124003, "learning_rate": 1.3650423680286007e-06, "loss": 0.4311, "step": 33560 }, { "epoch": 0.7834305717619603, "grad_norm": 2.0648956577785023, "learning_rate": 1.3622467854016336e-06, "loss": 0.441, "step": 33570 }, { "epoch": 0.7836639439906651, "grad_norm": 1.5195232195751731, "learning_rate": 1.3594536168247896e-06, "loss": 0.4389, "step": 33580 }, { "epoch": 0.7838973162193699, "grad_norm": 1.7239961552938206, "learning_rate": 1.3566628641516427e-06, "loss": 0.4512, "step": 33590 }, { "epoch": 0.7841306884480747, "grad_norm": 1.6948241865240765, "learning_rate": 1.353874529234167e-06, "loss": 0.4504, "step": 33600 }, { "epoch": 0.7843640606767794, "grad_norm": 1.7183567133227882, "learning_rate": 1.351088613922729e-06, "loss": 0.434, "step": 33610 }, { "epoch": 0.7845974329054842, "grad_norm": 1.7695841549391085, "learning_rate": 1.3483051200660891e-06, "loss": 0.4439, "step": 33620 }, { "epoch": 0.784830805134189, "grad_norm": 1.8436272783810528, "learning_rate": 1.3455240495114048e-06, "loss": 0.4624, "step": 33630 }, { "epoch": 0.7850641773628938, "grad_norm": 1.8472116460897623, "learning_rate": 1.3427454041042215e-06, "loss": 0.4463, "step": 33640 }, { "epoch": 0.7852975495915986, "grad_norm": 1.9081522757739007, "learning_rate": 1.3399691856884756e-06, "loss": 0.453, "step": 33650 }, { "epoch": 0.7855309218203034, "grad_norm": 1.8764164006911543, "learning_rate": 1.3371953961064965e-06, "loss": 0.4537, "step": 33660 }, { "epoch": 0.7857642940490082, "grad_norm": 1.9264965959967177, "learning_rate": 1.3344240371989974e-06, "loss": 0.4546, "step": 33670 }, { "epoch": 0.785997666277713, "grad_norm": 1.5089650825802352, "learning_rate": 1.3316551108050812e-06, "loss": 0.4554, "step": 33680 }, { "epoch": 0.7862310385064177, "grad_norm": 1.6481075687352083, "learning_rate": 1.328888618762235e-06, "loss": 0.4289, "step": 33690 }, { "epoch": 0.7864644107351225, "grad_norm": 1.603827575158557, "learning_rate": 1.3261245629063302e-06, "loss": 0.4696, "step": 33700 }, { "epoch": 0.7866977829638273, "grad_norm": 1.5502257082830349, "learning_rate": 1.3233629450716252e-06, "loss": 0.4263, "step": 33710 }, { "epoch": 0.7869311551925321, "grad_norm": 1.824159574972525, "learning_rate": 1.3206037670907557e-06, "loss": 0.4393, "step": 33720 }, { "epoch": 0.7871645274212369, "grad_norm": 1.8692475786749543, "learning_rate": 1.3178470307947395e-06, "loss": 0.4245, "step": 33730 }, { "epoch": 0.7873978996499417, "grad_norm": 1.9827822027694852, "learning_rate": 1.3150927380129774e-06, "loss": 0.4661, "step": 33740 }, { "epoch": 0.7876312718786465, "grad_norm": 1.626621758775696, "learning_rate": 1.3123408905732454e-06, "loss": 0.4613, "step": 33750 }, { "epoch": 0.7878646441073512, "grad_norm": 1.6627643526710367, "learning_rate": 1.3095914903016953e-06, "loss": 0.4282, "step": 33760 }, { "epoch": 0.788098016336056, "grad_norm": 1.3903478925228085, "learning_rate": 1.3068445390228606e-06, "loss": 0.4486, "step": 33770 }, { "epoch": 0.7883313885647608, "grad_norm": 1.652538500151449, "learning_rate": 1.3041000385596447e-06, "loss": 0.4465, "step": 33780 }, { "epoch": 0.7885647607934656, "grad_norm": 1.53090613263564, "learning_rate": 1.301357990733324e-06, "loss": 0.4222, "step": 33790 }, { "epoch": 0.7887981330221704, "grad_norm": 1.5427768830422577, "learning_rate": 1.298618397363553e-06, "loss": 0.4617, "step": 33800 }, { "epoch": 0.7890315052508752, "grad_norm": 1.7653578141682353, "learning_rate": 1.2958812602683508e-06, "loss": 0.44, "step": 33810 }, { "epoch": 0.78926487747958, "grad_norm": 1.6705111364141683, "learning_rate": 1.293146581264112e-06, "loss": 0.4255, "step": 33820 }, { "epoch": 0.7894982497082847, "grad_norm": 1.5475556912419313, "learning_rate": 1.2904143621655968e-06, "loss": 0.4325, "step": 33830 }, { "epoch": 0.7897316219369895, "grad_norm": 1.570637729341904, "learning_rate": 1.287684604785931e-06, "loss": 0.4359, "step": 33840 }, { "epoch": 0.7899649941656943, "grad_norm": 1.6458599917438028, "learning_rate": 1.2849573109366132e-06, "loss": 0.4129, "step": 33850 }, { "epoch": 0.7901983663943991, "grad_norm": 1.4118160473665244, "learning_rate": 1.2822324824275018e-06, "loss": 0.4205, "step": 33860 }, { "epoch": 0.7904317386231039, "grad_norm": 1.7937829421426594, "learning_rate": 1.2795101210668192e-06, "loss": 0.4658, "step": 33870 }, { "epoch": 0.7906651108518087, "grad_norm": 1.746528978448144, "learning_rate": 1.2767902286611555e-06, "loss": 0.427, "step": 33880 }, { "epoch": 0.7908984830805135, "grad_norm": 1.5631815348714608, "learning_rate": 1.2740728070154568e-06, "loss": 0.465, "step": 33890 }, { "epoch": 0.7911318553092183, "grad_norm": 1.7432616045296032, "learning_rate": 1.271357857933031e-06, "loss": 0.448, "step": 33900 }, { "epoch": 0.7913652275379229, "grad_norm": 1.3677528300176562, "learning_rate": 1.2686453832155487e-06, "loss": 0.415, "step": 33910 }, { "epoch": 0.7915985997666277, "grad_norm": 1.5909693092343153, "learning_rate": 1.265935384663034e-06, "loss": 0.4501, "step": 33920 }, { "epoch": 0.7918319719953325, "grad_norm": 1.944648042052084, "learning_rate": 1.2632278640738688e-06, "loss": 0.4571, "step": 33930 }, { "epoch": 0.7920653442240373, "grad_norm": 1.7526145466889524, "learning_rate": 1.2605228232447935e-06, "loss": 0.4535, "step": 33940 }, { "epoch": 0.7922987164527421, "grad_norm": 1.8743119673984163, "learning_rate": 1.2578202639708986e-06, "loss": 0.4594, "step": 33950 }, { "epoch": 0.7925320886814469, "grad_norm": 1.7271418713672753, "learning_rate": 1.2551201880456327e-06, "loss": 0.4327, "step": 33960 }, { "epoch": 0.7927654609101517, "grad_norm": 2.0900566700867045, "learning_rate": 1.2524225972607917e-06, "loss": 0.4497, "step": 33970 }, { "epoch": 0.7929988331388564, "grad_norm": 1.4115569432382729, "learning_rate": 1.2497274934065235e-06, "loss": 0.4453, "step": 33980 }, { "epoch": 0.7932322053675612, "grad_norm": 1.5152099403103698, "learning_rate": 1.2470348782713288e-06, "loss": 0.4772, "step": 33990 }, { "epoch": 0.793465577596266, "grad_norm": 1.8396115424908093, "learning_rate": 1.2443447536420539e-06, "loss": 0.4693, "step": 34000 }, { "epoch": 0.7936989498249708, "grad_norm": 1.4624897324098187, "learning_rate": 1.2416571213038908e-06, "loss": 0.4088, "step": 34010 }, { "epoch": 0.7939323220536756, "grad_norm": 1.6611110338020778, "learning_rate": 1.238971983040383e-06, "loss": 0.4536, "step": 34020 }, { "epoch": 0.7941656942823804, "grad_norm": 1.3303178308791193, "learning_rate": 1.2362893406334136e-06, "loss": 0.4239, "step": 34030 }, { "epoch": 0.7943990665110852, "grad_norm": 1.6322396015438836, "learning_rate": 1.2336091958632108e-06, "loss": 0.4149, "step": 34040 }, { "epoch": 0.79463243873979, "grad_norm": 1.88969574376014, "learning_rate": 1.2309315505083486e-06, "loss": 0.4493, "step": 34050 }, { "epoch": 0.7948658109684947, "grad_norm": 1.5288843199250082, "learning_rate": 1.2282564063457375e-06, "loss": 0.439, "step": 34060 }, { "epoch": 0.7950991831971995, "grad_norm": 1.9685224833484938, "learning_rate": 1.225583765150632e-06, "loss": 0.4281, "step": 34070 }, { "epoch": 0.7953325554259043, "grad_norm": 2.2339399396065174, "learning_rate": 1.2229136286966226e-06, "loss": 0.4516, "step": 34080 }, { "epoch": 0.7955659276546091, "grad_norm": 2.1534312508381945, "learning_rate": 1.2202459987556387e-06, "loss": 0.424, "step": 34090 }, { "epoch": 0.7957992998833139, "grad_norm": 1.6813354361737334, "learning_rate": 1.2175808770979485e-06, "loss": 0.4595, "step": 34100 }, { "epoch": 0.7960326721120187, "grad_norm": 1.8395544143240934, "learning_rate": 1.2149182654921538e-06, "loss": 0.4134, "step": 34110 }, { "epoch": 0.7962660443407235, "grad_norm": 1.491624524916013, "learning_rate": 1.212258165705188e-06, "loss": 0.4596, "step": 34120 }, { "epoch": 0.7964994165694282, "grad_norm": 4.48116005300001, "learning_rate": 1.2096005795023248e-06, "loss": 0.425, "step": 34130 }, { "epoch": 0.796732788798133, "grad_norm": 1.7462751549153364, "learning_rate": 1.2069455086471626e-06, "loss": 0.4416, "step": 34140 }, { "epoch": 0.7969661610268378, "grad_norm": 1.460447958750302, "learning_rate": 1.204292954901633e-06, "loss": 0.4345, "step": 34150 }, { "epoch": 0.7971995332555426, "grad_norm": 3.5202842861113264, "learning_rate": 1.201642920026e-06, "loss": 0.4504, "step": 34160 }, { "epoch": 0.7974329054842474, "grad_norm": 1.774192827938174, "learning_rate": 1.1989954057788528e-06, "loss": 0.4598, "step": 34170 }, { "epoch": 0.7976662777129522, "grad_norm": 1.6348256912709296, "learning_rate": 1.1963504139171072e-06, "loss": 0.4387, "step": 34180 }, { "epoch": 0.797899649941657, "grad_norm": 1.591478747097417, "learning_rate": 1.19370794619601e-06, "loss": 0.4376, "step": 34190 }, { "epoch": 0.7981330221703618, "grad_norm": 1.7217544636686304, "learning_rate": 1.1910680043691258e-06, "loss": 0.4406, "step": 34200 }, { "epoch": 0.7983663943990665, "grad_norm": 1.600557976295341, "learning_rate": 1.1884305901883508e-06, "loss": 0.4432, "step": 34210 }, { "epoch": 0.7985997666277713, "grad_norm": 1.8724032470301255, "learning_rate": 1.1857957054038987e-06, "loss": 0.4783, "step": 34220 }, { "epoch": 0.7988331388564761, "grad_norm": 1.8450976780123665, "learning_rate": 1.1831633517643031e-06, "loss": 0.4239, "step": 34230 }, { "epoch": 0.7990665110851809, "grad_norm": 1.6623033536870364, "learning_rate": 1.1805335310164252e-06, "loss": 0.4244, "step": 34240 }, { "epoch": 0.7992998833138857, "grad_norm": 1.7174476458190704, "learning_rate": 1.1779062449054384e-06, "loss": 0.4538, "step": 34250 }, { "epoch": 0.7995332555425905, "grad_norm": 1.762979008241302, "learning_rate": 1.1752814951748353e-06, "loss": 0.4758, "step": 34260 }, { "epoch": 0.7997666277712953, "grad_norm": 1.9059608267868011, "learning_rate": 1.1726592835664302e-06, "loss": 0.4351, "step": 34270 }, { "epoch": 0.8, "grad_norm": 1.7344801455038563, "learning_rate": 1.1700396118203466e-06, "loss": 0.4266, "step": 34280 }, { "epoch": 0.8002333722287048, "grad_norm": 1.6946584607865258, "learning_rate": 1.167422481675025e-06, "loss": 0.4267, "step": 34290 }, { "epoch": 0.8004667444574096, "grad_norm": 1.561336889408889, "learning_rate": 1.1648078948672225e-06, "loss": 0.439, "step": 34300 }, { "epoch": 0.8007001166861144, "grad_norm": 2.0500152220561807, "learning_rate": 1.1621958531320037e-06, "loss": 0.4367, "step": 34310 }, { "epoch": 0.8009334889148192, "grad_norm": 1.658519814897295, "learning_rate": 1.1595863582027445e-06, "loss": 0.4431, "step": 34320 }, { "epoch": 0.8011668611435239, "grad_norm": 1.8442822879855185, "learning_rate": 1.156979411811136e-06, "loss": 0.4522, "step": 34330 }, { "epoch": 0.8014002333722287, "grad_norm": 1.3191923030540265, "learning_rate": 1.1543750156871724e-06, "loss": 0.4467, "step": 34340 }, { "epoch": 0.8016336056009334, "grad_norm": 1.6296120351309582, "learning_rate": 1.1517731715591562e-06, "loss": 0.4198, "step": 34350 }, { "epoch": 0.8018669778296382, "grad_norm": 1.677697708098435, "learning_rate": 1.1491738811537006e-06, "loss": 0.4178, "step": 34360 }, { "epoch": 0.802100350058343, "grad_norm": 1.5874460998408364, "learning_rate": 1.1465771461957187e-06, "loss": 0.4417, "step": 34370 }, { "epoch": 0.8023337222870478, "grad_norm": 1.7460956640417258, "learning_rate": 1.1439829684084325e-06, "loss": 0.4552, "step": 34380 }, { "epoch": 0.8025670945157526, "grad_norm": 1.7487858231816669, "learning_rate": 1.1413913495133632e-06, "loss": 0.4002, "step": 34390 }, { "epoch": 0.8028004667444574, "grad_norm": 1.60044464591183, "learning_rate": 1.1388022912303353e-06, "loss": 0.4258, "step": 34400 }, { "epoch": 0.8030338389731622, "grad_norm": 1.6346434833202743, "learning_rate": 1.1362157952774766e-06, "loss": 0.4324, "step": 34410 }, { "epoch": 0.803267211201867, "grad_norm": 1.590145725205023, "learning_rate": 1.1336318633712102e-06, "loss": 0.4444, "step": 34420 }, { "epoch": 0.8035005834305717, "grad_norm": 1.380147904215541, "learning_rate": 1.1310504972262598e-06, "loss": 0.4408, "step": 34430 }, { "epoch": 0.8037339556592765, "grad_norm": 2.044963335284779, "learning_rate": 1.1284716985556465e-06, "loss": 0.4512, "step": 34440 }, { "epoch": 0.8039673278879813, "grad_norm": 1.7214375596505935, "learning_rate": 1.1258954690706865e-06, "loss": 0.4608, "step": 34450 }, { "epoch": 0.8042007001166861, "grad_norm": 1.6219645898679549, "learning_rate": 1.123321810480994e-06, "loss": 0.4254, "step": 34460 }, { "epoch": 0.8044340723453909, "grad_norm": 1.6196009475375694, "learning_rate": 1.1207507244944738e-06, "loss": 0.4108, "step": 34470 }, { "epoch": 0.8046674445740957, "grad_norm": 1.64786931152471, "learning_rate": 1.1181822128173241e-06, "loss": 0.4329, "step": 34480 }, { "epoch": 0.8049008168028005, "grad_norm": 1.842401015470303, "learning_rate": 1.1156162771540374e-06, "loss": 0.4666, "step": 34490 }, { "epoch": 0.8051341890315052, "grad_norm": 1.7409835359475676, "learning_rate": 1.1130529192073935e-06, "loss": 0.4391, "step": 34500 }, { "epoch": 0.80536756126021, "grad_norm": 1.9531501990487858, "learning_rate": 1.1104921406784614e-06, "loss": 0.4372, "step": 34510 }, { "epoch": 0.8056009334889148, "grad_norm": 2.127132448242381, "learning_rate": 1.107933943266603e-06, "loss": 0.4438, "step": 34520 }, { "epoch": 0.8058343057176196, "grad_norm": 1.8984001563144925, "learning_rate": 1.105378328669463e-06, "loss": 0.4653, "step": 34530 }, { "epoch": 0.8060676779463244, "grad_norm": 1.8951885712186984, "learning_rate": 1.1028252985829712e-06, "loss": 0.4566, "step": 34540 }, { "epoch": 0.8063010501750292, "grad_norm": 1.813130398385063, "learning_rate": 1.1002748547013474e-06, "loss": 0.4643, "step": 34550 }, { "epoch": 0.806534422403734, "grad_norm": 1.7267544447319718, "learning_rate": 1.0977269987170913e-06, "loss": 0.4329, "step": 34560 }, { "epoch": 0.8067677946324388, "grad_norm": 1.8933239749354154, "learning_rate": 1.0951817323209841e-06, "loss": 0.4505, "step": 34570 }, { "epoch": 0.8070011668611435, "grad_norm": 1.7774989810386168, "learning_rate": 1.0926390572020934e-06, "loss": 0.4425, "step": 34580 }, { "epoch": 0.8072345390898483, "grad_norm": 1.689708123953324, "learning_rate": 1.090098975047762e-06, "loss": 0.4311, "step": 34590 }, { "epoch": 0.8074679113185531, "grad_norm": 1.507638961541856, "learning_rate": 1.0875614875436163e-06, "loss": 0.4758, "step": 34600 }, { "epoch": 0.8077012835472579, "grad_norm": 2.221361182583598, "learning_rate": 1.0850265963735584e-06, "loss": 0.4264, "step": 34610 }, { "epoch": 0.8079346557759627, "grad_norm": 1.6983700697634276, "learning_rate": 1.082494303219766e-06, "loss": 0.4113, "step": 34620 }, { "epoch": 0.8081680280046675, "grad_norm": 1.752125009362814, "learning_rate": 1.0799646097626976e-06, "loss": 0.4086, "step": 34630 }, { "epoch": 0.8084014002333723, "grad_norm": 1.4700129027739168, "learning_rate": 1.077437517681082e-06, "loss": 0.4524, "step": 34640 }, { "epoch": 0.808634772462077, "grad_norm": 1.5718852085307176, "learning_rate": 1.0749130286519216e-06, "loss": 0.4719, "step": 34650 }, { "epoch": 0.8088681446907818, "grad_norm": 1.686255651377725, "learning_rate": 1.0723911443504969e-06, "loss": 0.434, "step": 34660 }, { "epoch": 0.8091015169194866, "grad_norm": 1.9159028842979886, "learning_rate": 1.0698718664503538e-06, "loss": 0.4348, "step": 34670 }, { "epoch": 0.8093348891481914, "grad_norm": 1.7207683837534824, "learning_rate": 1.0673551966233091e-06, "loss": 0.4554, "step": 34680 }, { "epoch": 0.8095682613768962, "grad_norm": 1.4074109569823687, "learning_rate": 1.0648411365394546e-06, "loss": 0.4409, "step": 34690 }, { "epoch": 0.809801633605601, "grad_norm": 1.690621063193446, "learning_rate": 1.0623296878671436e-06, "loss": 0.4601, "step": 34700 }, { "epoch": 0.8100350058343058, "grad_norm": 1.9160083438569415, "learning_rate": 1.0598208522729981e-06, "loss": 0.4574, "step": 34710 }, { "epoch": 0.8102683780630106, "grad_norm": 2.0135968918161904, "learning_rate": 1.0573146314219095e-06, "loss": 0.441, "step": 34720 }, { "epoch": 0.8105017502917153, "grad_norm": 1.7791071303939467, "learning_rate": 1.0548110269770307e-06, "loss": 0.442, "step": 34730 }, { "epoch": 0.81073512252042, "grad_norm": 1.64077165128372, "learning_rate": 1.0523100405997777e-06, "loss": 0.4733, "step": 34740 }, { "epoch": 0.8109684947491248, "grad_norm": 1.7014008653442338, "learning_rate": 1.0498116739498327e-06, "loss": 0.4655, "step": 34750 }, { "epoch": 0.8112018669778296, "grad_norm": 1.6394912042357954, "learning_rate": 1.0473159286851348e-06, "loss": 0.4162, "step": 34760 }, { "epoch": 0.8114352392065344, "grad_norm": 1.707070263133109, "learning_rate": 1.0448228064618893e-06, "loss": 0.4149, "step": 34770 }, { "epoch": 0.8116686114352392, "grad_norm": 1.9189741871089243, "learning_rate": 1.042332308934555e-06, "loss": 0.4611, "step": 34780 }, { "epoch": 0.811901983663944, "grad_norm": 1.8636546881879135, "learning_rate": 1.0398444377558515e-06, "loss": 0.437, "step": 34790 }, { "epoch": 0.8121353558926487, "grad_norm": 1.7384536084950375, "learning_rate": 1.037359194576757e-06, "loss": 0.4743, "step": 34800 }, { "epoch": 0.8123687281213535, "grad_norm": 1.7084241097650024, "learning_rate": 1.0348765810465033e-06, "loss": 0.4307, "step": 34810 }, { "epoch": 0.8126021003500583, "grad_norm": 1.7809217847450831, "learning_rate": 1.0323965988125784e-06, "loss": 0.4454, "step": 34820 }, { "epoch": 0.8128354725787631, "grad_norm": 1.5660819119549112, "learning_rate": 1.0299192495207234e-06, "loss": 0.4206, "step": 34830 }, { "epoch": 0.8130688448074679, "grad_norm": 1.611756105753879, "learning_rate": 1.027444534814931e-06, "loss": 0.4475, "step": 34840 }, { "epoch": 0.8133022170361727, "grad_norm": 1.8380741210079807, "learning_rate": 1.0249724563374503e-06, "loss": 0.48, "step": 34850 }, { "epoch": 0.8135355892648775, "grad_norm": 1.570574262158669, "learning_rate": 1.0225030157287763e-06, "loss": 0.4295, "step": 34860 }, { "epoch": 0.8137689614935822, "grad_norm": 1.5400330032158591, "learning_rate": 1.0200362146276532e-06, "loss": 0.4379, "step": 34870 }, { "epoch": 0.814002333722287, "grad_norm": 1.6962454855236941, "learning_rate": 1.0175720546710789e-06, "loss": 0.4558, "step": 34880 }, { "epoch": 0.8142357059509918, "grad_norm": 1.6322784004878936, "learning_rate": 1.0151105374942927e-06, "loss": 0.4183, "step": 34890 }, { "epoch": 0.8144690781796966, "grad_norm": 1.7489344616104077, "learning_rate": 1.0126516647307822e-06, "loss": 0.4673, "step": 34900 }, { "epoch": 0.8147024504084014, "grad_norm": 1.7792799439507048, "learning_rate": 1.0101954380122818e-06, "loss": 0.4428, "step": 34910 }, { "epoch": 0.8149358226371062, "grad_norm": 1.5637956104895485, "learning_rate": 1.0077418589687689e-06, "loss": 0.4302, "step": 34920 }, { "epoch": 0.815169194865811, "grad_norm": 1.6362169924283165, "learning_rate": 1.0052909292284606e-06, "loss": 0.4284, "step": 34930 }, { "epoch": 0.8154025670945158, "grad_norm": 1.6320701456212658, "learning_rate": 1.002842650417823e-06, "loss": 0.4353, "step": 34940 }, { "epoch": 0.8156359393232205, "grad_norm": 2.114502053333622, "learning_rate": 1.0003970241615563e-06, "loss": 0.4535, "step": 34950 }, { "epoch": 0.8158693115519253, "grad_norm": 1.8996129976605778, "learning_rate": 9.979540520826026e-07, "loss": 0.4537, "step": 34960 }, { "epoch": 0.8161026837806301, "grad_norm": 1.7598974181237195, "learning_rate": 9.955137358021455e-07, "loss": 0.4353, "step": 34970 }, { "epoch": 0.8163360560093349, "grad_norm": 2.196088882666483, "learning_rate": 9.930760769396008e-07, "loss": 0.4592, "step": 34980 }, { "epoch": 0.8165694282380397, "grad_norm": 1.4795462238634902, "learning_rate": 9.906410771126267e-07, "loss": 0.4312, "step": 34990 }, { "epoch": 0.8168028004667445, "grad_norm": 1.7821870714383743, "learning_rate": 9.882087379371125e-07, "loss": 0.4546, "step": 35000 }, { "epoch": 0.8170361726954493, "grad_norm": 1.6658668359177902, "learning_rate": 9.857790610271816e-07, "loss": 0.4147, "step": 35010 }, { "epoch": 0.817269544924154, "grad_norm": 2.036298434824041, "learning_rate": 9.833520479951957e-07, "loss": 0.4417, "step": 35020 }, { "epoch": 0.8175029171528588, "grad_norm": 1.487485608151358, "learning_rate": 9.809277004517437e-07, "loss": 0.4401, "step": 35030 }, { "epoch": 0.8177362893815636, "grad_norm": 1.767205370184394, "learning_rate": 9.785060200056462e-07, "loss": 0.4302, "step": 35040 }, { "epoch": 0.8179696616102684, "grad_norm": 1.8784227443762602, "learning_rate": 9.760870082639572e-07, "loss": 0.4571, "step": 35050 }, { "epoch": 0.8182030338389732, "grad_norm": 1.9121234407494376, "learning_rate": 9.736706668319568e-07, "loss": 0.4661, "step": 35060 }, { "epoch": 0.818436406067678, "grad_norm": 1.6296955173784327, "learning_rate": 9.712569973131524e-07, "loss": 0.4193, "step": 35070 }, { "epoch": 0.8186697782963828, "grad_norm": 1.5882598308523757, "learning_rate": 9.68846001309282e-07, "loss": 0.4221, "step": 35080 }, { "epoch": 0.8189031505250876, "grad_norm": 1.3085428778432464, "learning_rate": 9.664376804203063e-07, "loss": 0.4217, "step": 35090 }, { "epoch": 0.8191365227537923, "grad_norm": 1.662210468948592, "learning_rate": 9.6403203624441e-07, "loss": 0.4551, "step": 35100 }, { "epoch": 0.8193698949824971, "grad_norm": 1.64216374081178, "learning_rate": 9.616290703780056e-07, "loss": 0.4326, "step": 35110 }, { "epoch": 0.8196032672112019, "grad_norm": 1.5560440306257868, "learning_rate": 9.592287844157245e-07, "loss": 0.4278, "step": 35120 }, { "epoch": 0.8198366394399067, "grad_norm": 1.64756586259806, "learning_rate": 9.5683117995042e-07, "loss": 0.4149, "step": 35130 }, { "epoch": 0.8200700116686115, "grad_norm": 1.968403540754329, "learning_rate": 9.544362585731687e-07, "loss": 0.4796, "step": 35140 }, { "epoch": 0.8203033838973163, "grad_norm": 1.955417063032757, "learning_rate": 9.52044021873263e-07, "loss": 0.4399, "step": 35150 }, { "epoch": 0.820536756126021, "grad_norm": 1.4648075549989719, "learning_rate": 9.496544714382178e-07, "loss": 0.4501, "step": 35160 }, { "epoch": 0.8207701283547257, "grad_norm": 1.591160442039066, "learning_rate": 9.472676088537619e-07, "loss": 0.4457, "step": 35170 }, { "epoch": 0.8210035005834305, "grad_norm": 1.7765407045297272, "learning_rate": 9.448834357038422e-07, "loss": 0.4215, "step": 35180 }, { "epoch": 0.8212368728121353, "grad_norm": 1.5798754001640407, "learning_rate": 9.425019535706193e-07, "loss": 0.4415, "step": 35190 }, { "epoch": 0.8214702450408401, "grad_norm": 2.148705097839861, "learning_rate": 9.401231640344683e-07, "loss": 0.417, "step": 35200 }, { "epoch": 0.8217036172695449, "grad_norm": 1.8004015322082516, "learning_rate": 9.377470686739804e-07, "loss": 0.424, "step": 35210 }, { "epoch": 0.8219369894982497, "grad_norm": 1.6323300866302537, "learning_rate": 9.35373669065956e-07, "loss": 0.4789, "step": 35220 }, { "epoch": 0.8221703617269545, "grad_norm": 1.6230141071121447, "learning_rate": 9.330029667854057e-07, "loss": 0.4403, "step": 35230 }, { "epoch": 0.8224037339556592, "grad_norm": 1.6019919551527837, "learning_rate": 9.306349634055545e-07, "loss": 0.4497, "step": 35240 }, { "epoch": 0.822637106184364, "grad_norm": 1.7629094977495838, "learning_rate": 9.28269660497832e-07, "loss": 0.4389, "step": 35250 }, { "epoch": 0.8228704784130688, "grad_norm": 1.8413065291165431, "learning_rate": 9.259070596318759e-07, "loss": 0.4343, "step": 35260 }, { "epoch": 0.8231038506417736, "grad_norm": 1.6481507473763766, "learning_rate": 9.235471623755349e-07, "loss": 0.4616, "step": 35270 }, { "epoch": 0.8233372228704784, "grad_norm": 1.8457354508329484, "learning_rate": 9.211899702948596e-07, "loss": 0.4783, "step": 35280 }, { "epoch": 0.8235705950991832, "grad_norm": 1.7803223816529892, "learning_rate": 9.188354849541059e-07, "loss": 0.438, "step": 35290 }, { "epoch": 0.823803967327888, "grad_norm": 1.984405437543074, "learning_rate": 9.164837079157362e-07, "loss": 0.4186, "step": 35300 }, { "epoch": 0.8240373395565928, "grad_norm": 1.782277888843724, "learning_rate": 9.141346407404134e-07, "loss": 0.4327, "step": 35310 }, { "epoch": 0.8242707117852975, "grad_norm": 1.4186539230051654, "learning_rate": 9.117882849869997e-07, "loss": 0.4582, "step": 35320 }, { "epoch": 0.8245040840140023, "grad_norm": 1.436909005684373, "learning_rate": 9.094446422125642e-07, "loss": 0.4337, "step": 35330 }, { "epoch": 0.8247374562427071, "grad_norm": 2.0839769185599444, "learning_rate": 9.071037139723704e-07, "loss": 0.4429, "step": 35340 }, { "epoch": 0.8249708284714119, "grad_norm": 1.7865679422174754, "learning_rate": 9.047655018198814e-07, "loss": 0.4289, "step": 35350 }, { "epoch": 0.8252042007001167, "grad_norm": 1.959103058479247, "learning_rate": 9.02430007306761e-07, "loss": 0.4176, "step": 35360 }, { "epoch": 0.8254375729288215, "grad_norm": 1.5600149743630418, "learning_rate": 9.000972319828644e-07, "loss": 0.4429, "step": 35370 }, { "epoch": 0.8256709451575263, "grad_norm": 1.9173335038351291, "learning_rate": 8.977671773962482e-07, "loss": 0.4301, "step": 35380 }, { "epoch": 0.825904317386231, "grad_norm": 1.7585435975436736, "learning_rate": 8.954398450931584e-07, "loss": 0.4661, "step": 35390 }, { "epoch": 0.8261376896149358, "grad_norm": 2.012377538092993, "learning_rate": 8.931152366180351e-07, "loss": 0.4112, "step": 35400 }, { "epoch": 0.8263710618436406, "grad_norm": 1.5694553302245582, "learning_rate": 8.907933535135155e-07, "loss": 0.4048, "step": 35410 }, { "epoch": 0.8266044340723454, "grad_norm": 1.8345579689691296, "learning_rate": 8.884741973204231e-07, "loss": 0.432, "step": 35420 }, { "epoch": 0.8268378063010502, "grad_norm": 1.6128863395059978, "learning_rate": 8.861577695777723e-07, "loss": 0.445, "step": 35430 }, { "epoch": 0.827071178529755, "grad_norm": 1.7939203097572096, "learning_rate": 8.838440718227714e-07, "loss": 0.4371, "step": 35440 }, { "epoch": 0.8273045507584598, "grad_norm": 1.9087904569818575, "learning_rate": 8.815331055908116e-07, "loss": 0.4328, "step": 35450 }, { "epoch": 0.8275379229871646, "grad_norm": 1.8512838108585248, "learning_rate": 8.792248724154723e-07, "loss": 0.4296, "step": 35460 }, { "epoch": 0.8277712952158693, "grad_norm": 1.5329738809296276, "learning_rate": 8.769193738285242e-07, "loss": 0.4554, "step": 35470 }, { "epoch": 0.8280046674445741, "grad_norm": 1.4680809225168496, "learning_rate": 8.74616611359918e-07, "loss": 0.4103, "step": 35480 }, { "epoch": 0.8282380396732789, "grad_norm": 1.6098657251717234, "learning_rate": 8.723165865377887e-07, "loss": 0.4394, "step": 35490 }, { "epoch": 0.8284714119019837, "grad_norm": 1.6906840714149256, "learning_rate": 8.700193008884583e-07, "loss": 0.4576, "step": 35500 }, { "epoch": 0.8287047841306885, "grad_norm": 1.554040666599688, "learning_rate": 8.677247559364288e-07, "loss": 0.4165, "step": 35510 }, { "epoch": 0.8289381563593933, "grad_norm": 1.590198156460828, "learning_rate": 8.654329532043821e-07, "loss": 0.4421, "step": 35520 }, { "epoch": 0.8291715285880981, "grad_norm": 1.6682054050802975, "learning_rate": 8.631438942131842e-07, "loss": 0.4392, "step": 35530 }, { "epoch": 0.8294049008168028, "grad_norm": 1.4563531895711836, "learning_rate": 8.608575804818753e-07, "loss": 0.4451, "step": 35540 }, { "epoch": 0.8296382730455076, "grad_norm": 1.7300399417762398, "learning_rate": 8.585740135276804e-07, "loss": 0.4497, "step": 35550 }, { "epoch": 0.8298716452742124, "grad_norm": 2.059874819727118, "learning_rate": 8.562931948659941e-07, "loss": 0.4643, "step": 35560 }, { "epoch": 0.8301050175029171, "grad_norm": 1.65484041813224, "learning_rate": 8.540151260103907e-07, "loss": 0.4377, "step": 35570 }, { "epoch": 0.8303383897316219, "grad_norm": 1.492066259057131, "learning_rate": 8.51739808472623e-07, "loss": 0.417, "step": 35580 }, { "epoch": 0.8305717619603267, "grad_norm": 2.1274436699424712, "learning_rate": 8.494672437626117e-07, "loss": 0.4514, "step": 35590 }, { "epoch": 0.8308051341890315, "grad_norm": 1.707646570524451, "learning_rate": 8.47197433388457e-07, "loss": 0.426, "step": 35600 }, { "epoch": 0.8310385064177362, "grad_norm": 1.7566324591164026, "learning_rate": 8.449303788564267e-07, "loss": 0.4161, "step": 35610 }, { "epoch": 0.831271878646441, "grad_norm": 1.6340327227970461, "learning_rate": 8.426660816709598e-07, "loss": 0.4643, "step": 35620 }, { "epoch": 0.8315052508751458, "grad_norm": 1.5841745655285702, "learning_rate": 8.404045433346702e-07, "loss": 0.4329, "step": 35630 }, { "epoch": 0.8317386231038506, "grad_norm": 1.6817893615591246, "learning_rate": 8.381457653483355e-07, "loss": 0.4275, "step": 35640 }, { "epoch": 0.8319719953325554, "grad_norm": 1.8867302796732337, "learning_rate": 8.358897492109036e-07, "loss": 0.4256, "step": 35650 }, { "epoch": 0.8322053675612602, "grad_norm": 1.6758800532141673, "learning_rate": 8.336364964194921e-07, "loss": 0.4194, "step": 35660 }, { "epoch": 0.832438739789965, "grad_norm": 1.4650969828443217, "learning_rate": 8.313860084693803e-07, "loss": 0.422, "step": 35670 }, { "epoch": 0.8326721120186698, "grad_norm": 1.7321240677024923, "learning_rate": 8.291382868540148e-07, "loss": 0.4312, "step": 35680 }, { "epoch": 0.8329054842473745, "grad_norm": 1.846198669368528, "learning_rate": 8.268933330650092e-07, "loss": 0.4542, "step": 35690 }, { "epoch": 0.8331388564760793, "grad_norm": 1.583665187931786, "learning_rate": 8.246511485921355e-07, "loss": 0.4419, "step": 35700 }, { "epoch": 0.8333722287047841, "grad_norm": 1.770086595645971, "learning_rate": 8.224117349233296e-07, "loss": 0.4238, "step": 35710 }, { "epoch": 0.8336056009334889, "grad_norm": 1.668706096957047, "learning_rate": 8.201750935446912e-07, "loss": 0.4337, "step": 35720 }, { "epoch": 0.8338389731621937, "grad_norm": 1.650142099604922, "learning_rate": 8.179412259404779e-07, "loss": 0.4428, "step": 35730 }, { "epoch": 0.8340723453908985, "grad_norm": 1.7736029255615902, "learning_rate": 8.157101335931051e-07, "loss": 0.4519, "step": 35740 }, { "epoch": 0.8343057176196033, "grad_norm": 1.623352257511747, "learning_rate": 8.134818179831511e-07, "loss": 0.4426, "step": 35750 }, { "epoch": 0.834539089848308, "grad_norm": 1.7706256880449003, "learning_rate": 8.112562805893464e-07, "loss": 0.4306, "step": 35760 }, { "epoch": 0.8347724620770128, "grad_norm": 1.6170403574963277, "learning_rate": 8.090335228885831e-07, "loss": 0.4315, "step": 35770 }, { "epoch": 0.8350058343057176, "grad_norm": 1.8219011100744635, "learning_rate": 8.06813546355904e-07, "loss": 0.4409, "step": 35780 }, { "epoch": 0.8352392065344224, "grad_norm": 1.7718818628184194, "learning_rate": 8.045963524645073e-07, "loss": 0.4193, "step": 35790 }, { "epoch": 0.8354725787631272, "grad_norm": 1.7586417283601947, "learning_rate": 8.023819426857482e-07, "loss": 0.4353, "step": 35800 }, { "epoch": 0.835705950991832, "grad_norm": 1.7336010993345852, "learning_rate": 8.001703184891296e-07, "loss": 0.4628, "step": 35810 }, { "epoch": 0.8359393232205368, "grad_norm": 1.8911862715086327, "learning_rate": 7.979614813423076e-07, "loss": 0.4255, "step": 35820 }, { "epoch": 0.8361726954492416, "grad_norm": 1.8227312012668817, "learning_rate": 7.957554327110906e-07, "loss": 0.4435, "step": 35830 }, { "epoch": 0.8364060676779463, "grad_norm": 1.9757688633661328, "learning_rate": 7.935521740594343e-07, "loss": 0.4302, "step": 35840 }, { "epoch": 0.8366394399066511, "grad_norm": 1.6038577776867664, "learning_rate": 7.913517068494425e-07, "loss": 0.4458, "step": 35850 }, { "epoch": 0.8368728121353559, "grad_norm": 2.064398870507383, "learning_rate": 7.891540325413694e-07, "loss": 0.4774, "step": 35860 }, { "epoch": 0.8371061843640607, "grad_norm": 1.753255574437102, "learning_rate": 7.869591525936132e-07, "loss": 0.439, "step": 35870 }, { "epoch": 0.8373395565927655, "grad_norm": 1.7225427397551916, "learning_rate": 7.847670684627179e-07, "loss": 0.4413, "step": 35880 }, { "epoch": 0.8375729288214703, "grad_norm": 1.5811495621902485, "learning_rate": 7.825777816033747e-07, "loss": 0.4623, "step": 35890 }, { "epoch": 0.8378063010501751, "grad_norm": 1.85307473512966, "learning_rate": 7.80391293468416e-07, "loss": 0.4307, "step": 35900 }, { "epoch": 0.8380396732788798, "grad_norm": 1.6290631499144625, "learning_rate": 7.782076055088162e-07, "loss": 0.4433, "step": 35910 }, { "epoch": 0.8382730455075846, "grad_norm": 1.5945928359243509, "learning_rate": 7.760267191736948e-07, "loss": 0.4389, "step": 35920 }, { "epoch": 0.8385064177362894, "grad_norm": 1.6148928607514912, "learning_rate": 7.738486359103092e-07, "loss": 0.4619, "step": 35930 }, { "epoch": 0.8387397899649942, "grad_norm": 2.3165159076950546, "learning_rate": 7.716733571640583e-07, "loss": 0.4429, "step": 35940 }, { "epoch": 0.838973162193699, "grad_norm": 1.9809271678927962, "learning_rate": 7.695008843784785e-07, "loss": 0.4686, "step": 35950 }, { "epoch": 0.8392065344224038, "grad_norm": 1.9885416405073615, "learning_rate": 7.673312189952442e-07, "loss": 0.4356, "step": 35960 }, { "epoch": 0.8394399066511086, "grad_norm": 1.5471345894666793, "learning_rate": 7.6516436245417e-07, "loss": 0.4825, "step": 35970 }, { "epoch": 0.8396732788798134, "grad_norm": 1.6442943369896887, "learning_rate": 7.630003161932009e-07, "loss": 0.4711, "step": 35980 }, { "epoch": 0.839906651108518, "grad_norm": 1.6323429122083126, "learning_rate": 7.608390816484229e-07, "loss": 0.4562, "step": 35990 }, { "epoch": 0.8401400233372228, "grad_norm": 1.6090961378425286, "learning_rate": 7.586806602540519e-07, "loss": 0.4424, "step": 36000 }, { "epoch": 0.8403733955659276, "grad_norm": 1.933782180743435, "learning_rate": 7.565250534424373e-07, "loss": 0.4624, "step": 36010 }, { "epoch": 0.8406067677946324, "grad_norm": 1.813432297141774, "learning_rate": 7.543722626440642e-07, "loss": 0.4444, "step": 36020 }, { "epoch": 0.8408401400233372, "grad_norm": 1.8074539306167345, "learning_rate": 7.522222892875453e-07, "loss": 0.4301, "step": 36030 }, { "epoch": 0.841073512252042, "grad_norm": 1.6408877135672046, "learning_rate": 7.500751347996233e-07, "loss": 0.4614, "step": 36040 }, { "epoch": 0.8413068844807468, "grad_norm": 1.6216253683876158, "learning_rate": 7.47930800605175e-07, "loss": 0.4472, "step": 36050 }, { "epoch": 0.8415402567094515, "grad_norm": 1.6468490195533483, "learning_rate": 7.457892881272005e-07, "loss": 0.4362, "step": 36060 }, { "epoch": 0.8417736289381563, "grad_norm": 1.7412692058870634, "learning_rate": 7.436505987868281e-07, "loss": 0.4561, "step": 36070 }, { "epoch": 0.8420070011668611, "grad_norm": 1.909265495168902, "learning_rate": 7.415147340033169e-07, "loss": 0.4334, "step": 36080 }, { "epoch": 0.8422403733955659, "grad_norm": 1.6321894633477056, "learning_rate": 7.393816951940463e-07, "loss": 0.4475, "step": 36090 }, { "epoch": 0.8424737456242707, "grad_norm": 1.7901315549462435, "learning_rate": 7.372514837745226e-07, "loss": 0.4401, "step": 36100 }, { "epoch": 0.8427071178529755, "grad_norm": 1.652991658579844, "learning_rate": 7.351241011583776e-07, "loss": 0.4293, "step": 36110 }, { "epoch": 0.8429404900816803, "grad_norm": 1.7490755146248929, "learning_rate": 7.329995487573627e-07, "loss": 0.42, "step": 36120 }, { "epoch": 0.843173862310385, "grad_norm": 1.6448881256148549, "learning_rate": 7.308778279813516e-07, "loss": 0.4352, "step": 36130 }, { "epoch": 0.8434072345390898, "grad_norm": 1.7338806777687006, "learning_rate": 7.287589402383427e-07, "loss": 0.4372, "step": 36140 }, { "epoch": 0.8436406067677946, "grad_norm": 1.7581467750967485, "learning_rate": 7.266428869344483e-07, "loss": 0.4206, "step": 36150 }, { "epoch": 0.8438739789964994, "grad_norm": 1.594184765045754, "learning_rate": 7.245296694739057e-07, "loss": 0.4478, "step": 36160 }, { "epoch": 0.8441073512252042, "grad_norm": 2.679467930857972, "learning_rate": 7.224192892590665e-07, "loss": 0.4151, "step": 36170 }, { "epoch": 0.844340723453909, "grad_norm": 1.7793438158783714, "learning_rate": 7.203117476903987e-07, "loss": 0.4288, "step": 36180 }, { "epoch": 0.8445740956826138, "grad_norm": 1.684449309973923, "learning_rate": 7.18207046166492e-07, "loss": 0.4292, "step": 36190 }, { "epoch": 0.8448074679113186, "grad_norm": 1.6720505229995162, "learning_rate": 7.161051860840451e-07, "loss": 0.4387, "step": 36200 }, { "epoch": 0.8450408401400233, "grad_norm": 2.0658326159369875, "learning_rate": 7.140061688378736e-07, "loss": 0.4492, "step": 36210 }, { "epoch": 0.8452742123687281, "grad_norm": 1.7798320976390565, "learning_rate": 7.119099958209086e-07, "loss": 0.4487, "step": 36220 }, { "epoch": 0.8455075845974329, "grad_norm": 1.750125793519646, "learning_rate": 7.098166684241914e-07, "loss": 0.4337, "step": 36230 }, { "epoch": 0.8457409568261377, "grad_norm": 1.7733198260418603, "learning_rate": 7.077261880368735e-07, "loss": 0.4291, "step": 36240 }, { "epoch": 0.8459743290548425, "grad_norm": 1.6074714141568347, "learning_rate": 7.05638556046222e-07, "loss": 0.4515, "step": 36250 }, { "epoch": 0.8462077012835473, "grad_norm": 1.7034678107055927, "learning_rate": 7.035537738376086e-07, "loss": 0.435, "step": 36260 }, { "epoch": 0.8464410735122521, "grad_norm": 1.901404437128302, "learning_rate": 7.014718427945161e-07, "loss": 0.4503, "step": 36270 }, { "epoch": 0.8466744457409568, "grad_norm": 1.8243110604055173, "learning_rate": 6.993927642985365e-07, "loss": 0.4534, "step": 36280 }, { "epoch": 0.8469078179696616, "grad_norm": 1.7209399225113649, "learning_rate": 6.973165397293669e-07, "loss": 0.4346, "step": 36290 }, { "epoch": 0.8471411901983664, "grad_norm": 1.7575972823615595, "learning_rate": 6.952431704648105e-07, "loss": 0.4568, "step": 36300 }, { "epoch": 0.8473745624270712, "grad_norm": 1.6099792425884099, "learning_rate": 6.931726578807768e-07, "loss": 0.4647, "step": 36310 }, { "epoch": 0.847607934655776, "grad_norm": 1.8418656529342803, "learning_rate": 6.911050033512779e-07, "loss": 0.4445, "step": 36320 }, { "epoch": 0.8478413068844808, "grad_norm": 1.768154511936669, "learning_rate": 6.890402082484327e-07, "loss": 0.4207, "step": 36330 }, { "epoch": 0.8480746791131856, "grad_norm": 1.5329683472539937, "learning_rate": 6.869782739424591e-07, "loss": 0.4543, "step": 36340 }, { "epoch": 0.8483080513418904, "grad_norm": 1.6845064414685171, "learning_rate": 6.849192018016764e-07, "loss": 0.4437, "step": 36350 }, { "epoch": 0.8485414235705951, "grad_norm": 1.6268747519870224, "learning_rate": 6.828629931925085e-07, "loss": 0.4638, "step": 36360 }, { "epoch": 0.8487747957992999, "grad_norm": 1.9739737683725584, "learning_rate": 6.808096494794747e-07, "loss": 0.4252, "step": 36370 }, { "epoch": 0.8490081680280047, "grad_norm": 1.480437190988228, "learning_rate": 6.787591720251946e-07, "loss": 0.4331, "step": 36380 }, { "epoch": 0.8492415402567095, "grad_norm": 1.5978653139656682, "learning_rate": 6.767115621903869e-07, "loss": 0.4372, "step": 36390 }, { "epoch": 0.8494749124854143, "grad_norm": 1.7277285159819702, "learning_rate": 6.746668213338653e-07, "loss": 0.4332, "step": 36400 }, { "epoch": 0.849708284714119, "grad_norm": 1.4963634575562463, "learning_rate": 6.726249508125426e-07, "loss": 0.4405, "step": 36410 }, { "epoch": 0.8499416569428238, "grad_norm": 1.8493135273658243, "learning_rate": 6.705859519814234e-07, "loss": 0.4073, "step": 36420 }, { "epoch": 0.8501750291715285, "grad_norm": 1.6463627353713437, "learning_rate": 6.685498261936074e-07, "loss": 0.422, "step": 36430 }, { "epoch": 0.8504084014002333, "grad_norm": 1.7206616487117254, "learning_rate": 6.665165748002899e-07, "loss": 0.4503, "step": 36440 }, { "epoch": 0.8506417736289381, "grad_norm": 1.813958649106947, "learning_rate": 6.644861991507573e-07, "loss": 0.433, "step": 36450 }, { "epoch": 0.8508751458576429, "grad_norm": 1.8792454014008715, "learning_rate": 6.624587005923861e-07, "loss": 0.4617, "step": 36460 }, { "epoch": 0.8511085180863477, "grad_norm": 1.9606138254682413, "learning_rate": 6.604340804706472e-07, "loss": 0.441, "step": 36470 }, { "epoch": 0.8513418903150525, "grad_norm": 1.5636222631715029, "learning_rate": 6.58412340129097e-07, "loss": 0.4411, "step": 36480 }, { "epoch": 0.8515752625437573, "grad_norm": 1.7076566507872517, "learning_rate": 6.563934809093836e-07, "loss": 0.4498, "step": 36490 }, { "epoch": 0.851808634772462, "grad_norm": 1.742505896005506, "learning_rate": 6.54377504151244e-07, "loss": 0.4579, "step": 36500 }, { "epoch": 0.8520420070011668, "grad_norm": 1.8385809915665074, "learning_rate": 6.523644111924993e-07, "loss": 0.4399, "step": 36510 }, { "epoch": 0.8522753792298716, "grad_norm": 1.7537101846837178, "learning_rate": 6.503542033690586e-07, "loss": 0.4124, "step": 36520 }, { "epoch": 0.8525087514585764, "grad_norm": 1.4929489825973703, "learning_rate": 6.483468820149169e-07, "loss": 0.449, "step": 36530 }, { "epoch": 0.8527421236872812, "grad_norm": 1.9787856118529203, "learning_rate": 6.463424484621528e-07, "loss": 0.4515, "step": 36540 }, { "epoch": 0.852975495915986, "grad_norm": 1.7402189733812938, "learning_rate": 6.443409040409293e-07, "loss": 0.444, "step": 36550 }, { "epoch": 0.8532088681446908, "grad_norm": 1.465833214112972, "learning_rate": 6.423422500794918e-07, "loss": 0.4111, "step": 36560 }, { "epoch": 0.8534422403733956, "grad_norm": 1.5210274745308958, "learning_rate": 6.40346487904166e-07, "loss": 0.4057, "step": 36570 }, { "epoch": 0.8536756126021003, "grad_norm": 1.6065476829787197, "learning_rate": 6.383536188393619e-07, "loss": 0.4384, "step": 36580 }, { "epoch": 0.8539089848308051, "grad_norm": 1.9168986558461534, "learning_rate": 6.363636442075666e-07, "loss": 0.4233, "step": 36590 }, { "epoch": 0.8541423570595099, "grad_norm": 1.6238005578629289, "learning_rate": 6.343765653293471e-07, "loss": 0.4295, "step": 36600 }, { "epoch": 0.8543757292882147, "grad_norm": 1.8480738379184696, "learning_rate": 6.323923835233515e-07, "loss": 0.4039, "step": 36610 }, { "epoch": 0.8546091015169195, "grad_norm": 1.944923684609954, "learning_rate": 6.304111001063007e-07, "loss": 0.4599, "step": 36620 }, { "epoch": 0.8548424737456243, "grad_norm": 1.5405371329748456, "learning_rate": 6.284327163929949e-07, "loss": 0.4223, "step": 36630 }, { "epoch": 0.8550758459743291, "grad_norm": 1.6944107741844188, "learning_rate": 6.26457233696311e-07, "loss": 0.454, "step": 36640 }, { "epoch": 0.8553092182030338, "grad_norm": 1.7653299539632574, "learning_rate": 6.244846533271981e-07, "loss": 0.4295, "step": 36650 }, { "epoch": 0.8555425904317386, "grad_norm": 1.8774385634411008, "learning_rate": 6.225149765946808e-07, "loss": 0.4512, "step": 36660 }, { "epoch": 0.8557759626604434, "grad_norm": 1.5737943056167303, "learning_rate": 6.205482048058581e-07, "loss": 0.4304, "step": 36670 }, { "epoch": 0.8560093348891482, "grad_norm": 1.594054374123155, "learning_rate": 6.185843392658986e-07, "loss": 0.4392, "step": 36680 }, { "epoch": 0.856242707117853, "grad_norm": 1.6768967635369927, "learning_rate": 6.166233812780437e-07, "loss": 0.441, "step": 36690 }, { "epoch": 0.8564760793465578, "grad_norm": 1.8699319979996685, "learning_rate": 6.146653321436053e-07, "loss": 0.4543, "step": 36700 }, { "epoch": 0.8567094515752626, "grad_norm": 1.8685179390322975, "learning_rate": 6.127101931619634e-07, "loss": 0.4485, "step": 36710 }, { "epoch": 0.8569428238039674, "grad_norm": 1.6462674328424025, "learning_rate": 6.107579656305701e-07, "loss": 0.4317, "step": 36720 }, { "epoch": 0.8571761960326721, "grad_norm": 1.7815299173016725, "learning_rate": 6.088086508449426e-07, "loss": 0.443, "step": 36730 }, { "epoch": 0.8574095682613769, "grad_norm": 1.4783533469659613, "learning_rate": 6.068622500986654e-07, "loss": 0.4146, "step": 36740 }, { "epoch": 0.8576429404900817, "grad_norm": 1.7077162601758935, "learning_rate": 6.04918764683392e-07, "loss": 0.4426, "step": 36750 }, { "epoch": 0.8578763127187865, "grad_norm": 1.7276866299981906, "learning_rate": 6.029781958888375e-07, "loss": 0.4226, "step": 36760 }, { "epoch": 0.8581096849474913, "grad_norm": 1.8105219835693238, "learning_rate": 6.010405450027824e-07, "loss": 0.4476, "step": 36770 }, { "epoch": 0.8583430571761961, "grad_norm": 1.5968723929403237, "learning_rate": 5.991058133110733e-07, "loss": 0.467, "step": 36780 }, { "epoch": 0.8585764294049009, "grad_norm": 2.012163798399968, "learning_rate": 5.971740020976163e-07, "loss": 0.4366, "step": 36790 }, { "epoch": 0.8588098016336057, "grad_norm": 1.8520071721737883, "learning_rate": 5.952451126443832e-07, "loss": 0.4145, "step": 36800 }, { "epoch": 0.8590431738623104, "grad_norm": 1.924336166944489, "learning_rate": 5.933191462314037e-07, "loss": 0.4502, "step": 36810 }, { "epoch": 0.8592765460910151, "grad_norm": 1.960337611695549, "learning_rate": 5.913961041367666e-07, "loss": 0.4479, "step": 36820 }, { "epoch": 0.8595099183197199, "grad_norm": 1.7651215710479076, "learning_rate": 5.894759876366257e-07, "loss": 0.4406, "step": 36830 }, { "epoch": 0.8597432905484247, "grad_norm": 1.7328661314081553, "learning_rate": 5.875587980051883e-07, "loss": 0.4643, "step": 36840 }, { "epoch": 0.8599766627771295, "grad_norm": 1.8609136867180862, "learning_rate": 5.856445365147195e-07, "loss": 0.4649, "step": 36850 }, { "epoch": 0.8602100350058343, "grad_norm": 1.6778238192692763, "learning_rate": 5.837332044355459e-07, "loss": 0.4328, "step": 36860 }, { "epoch": 0.860443407234539, "grad_norm": 1.624567470002248, "learning_rate": 5.818248030360446e-07, "loss": 0.4319, "step": 36870 }, { "epoch": 0.8606767794632438, "grad_norm": 1.7482800655158983, "learning_rate": 5.799193335826503e-07, "loss": 0.4453, "step": 36880 }, { "epoch": 0.8609101516919486, "grad_norm": 1.7897281586099123, "learning_rate": 5.780167973398537e-07, "loss": 0.4493, "step": 36890 }, { "epoch": 0.8611435239206534, "grad_norm": 1.4934364515674283, "learning_rate": 5.761171955701961e-07, "loss": 0.4078, "step": 36900 }, { "epoch": 0.8613768961493582, "grad_norm": 1.906511125689699, "learning_rate": 5.742205295342712e-07, "loss": 0.4129, "step": 36910 }, { "epoch": 0.861610268378063, "grad_norm": 1.7296057062477785, "learning_rate": 5.723268004907285e-07, "loss": 0.4238, "step": 36920 }, { "epoch": 0.8618436406067678, "grad_norm": 2.1680805041805855, "learning_rate": 5.704360096962646e-07, "loss": 0.4145, "step": 36930 }, { "epoch": 0.8620770128354726, "grad_norm": 1.8161445036052681, "learning_rate": 5.685481584056269e-07, "loss": 0.4484, "step": 36940 }, { "epoch": 0.8623103850641773, "grad_norm": 1.8547545922686437, "learning_rate": 5.666632478716145e-07, "loss": 0.4495, "step": 36950 }, { "epoch": 0.8625437572928821, "grad_norm": 1.6127078288320433, "learning_rate": 5.647812793450713e-07, "loss": 0.4262, "step": 36960 }, { "epoch": 0.8627771295215869, "grad_norm": 1.4858314905535128, "learning_rate": 5.629022540748924e-07, "loss": 0.414, "step": 36970 }, { "epoch": 0.8630105017502917, "grad_norm": 1.84966503190314, "learning_rate": 5.61026173308018e-07, "loss": 0.4596, "step": 36980 }, { "epoch": 0.8632438739789965, "grad_norm": 1.9904462811154853, "learning_rate": 5.591530382894328e-07, "loss": 0.4097, "step": 36990 }, { "epoch": 0.8634772462077013, "grad_norm": 1.985669121155058, "learning_rate": 5.572828502621708e-07, "loss": 0.4394, "step": 37000 }, { "epoch": 0.8637106184364061, "grad_norm": 1.5369441374213628, "learning_rate": 5.554156104673058e-07, "loss": 0.3981, "step": 37010 }, { "epoch": 0.8639439906651109, "grad_norm": 1.9182484421805797, "learning_rate": 5.535513201439574e-07, "loss": 0.4448, "step": 37020 }, { "epoch": 0.8641773628938156, "grad_norm": 1.5566887668736846, "learning_rate": 5.516899805292891e-07, "loss": 0.4581, "step": 37030 }, { "epoch": 0.8644107351225204, "grad_norm": 1.795076790388636, "learning_rate": 5.498315928585035e-07, "loss": 0.4202, "step": 37040 }, { "epoch": 0.8646441073512252, "grad_norm": 1.8331571257362484, "learning_rate": 5.479761583648463e-07, "loss": 0.4441, "step": 37050 }, { "epoch": 0.86487747957993, "grad_norm": 1.7953299127554194, "learning_rate": 5.461236782796019e-07, "loss": 0.4372, "step": 37060 }, { "epoch": 0.8651108518086348, "grad_norm": 2.111905497404163, "learning_rate": 5.442741538320951e-07, "loss": 0.4334, "step": 37070 }, { "epoch": 0.8653442240373396, "grad_norm": 1.635103685665052, "learning_rate": 5.424275862496903e-07, "loss": 0.455, "step": 37080 }, { "epoch": 0.8655775962660444, "grad_norm": 1.6111259528030173, "learning_rate": 5.40583976757788e-07, "loss": 0.4575, "step": 37090 }, { "epoch": 0.8658109684947491, "grad_norm": 1.5654520819968272, "learning_rate": 5.387433265798247e-07, "loss": 0.4404, "step": 37100 }, { "epoch": 0.8660443407234539, "grad_norm": 1.8185117219447042, "learning_rate": 5.369056369372777e-07, "loss": 0.4485, "step": 37110 }, { "epoch": 0.8662777129521587, "grad_norm": 1.8939615304667852, "learning_rate": 5.350709090496542e-07, "loss": 0.4481, "step": 37120 }, { "epoch": 0.8665110851808635, "grad_norm": 1.6729494249664711, "learning_rate": 5.332391441344986e-07, "loss": 0.4405, "step": 37130 }, { "epoch": 0.8667444574095683, "grad_norm": 1.661177571966288, "learning_rate": 5.314103434073898e-07, "loss": 0.4663, "step": 37140 }, { "epoch": 0.8669778296382731, "grad_norm": 1.6578428138715497, "learning_rate": 5.295845080819384e-07, "loss": 0.434, "step": 37150 }, { "epoch": 0.8672112018669779, "grad_norm": 1.848688248098128, "learning_rate": 5.27761639369786e-07, "loss": 0.4322, "step": 37160 }, { "epoch": 0.8674445740956827, "grad_norm": 1.9868526481200763, "learning_rate": 5.259417384806081e-07, "loss": 0.4456, "step": 37170 }, { "epoch": 0.8676779463243874, "grad_norm": 1.7395790522327188, "learning_rate": 5.241248066221088e-07, "loss": 0.4532, "step": 37180 }, { "epoch": 0.8679113185530922, "grad_norm": 1.599956840796844, "learning_rate": 5.223108450000242e-07, "loss": 0.4665, "step": 37190 }, { "epoch": 0.868144690781797, "grad_norm": 1.6908798950095558, "learning_rate": 5.204998548181161e-07, "loss": 0.4515, "step": 37200 }, { "epoch": 0.8683780630105018, "grad_norm": 2.0301763609487793, "learning_rate": 5.186918372781757e-07, "loss": 0.4494, "step": 37210 }, { "epoch": 0.8686114352392066, "grad_norm": 1.7637801458812243, "learning_rate": 5.168867935800237e-07, "loss": 0.4293, "step": 37220 }, { "epoch": 0.8688448074679114, "grad_norm": 1.704458956785101, "learning_rate": 5.150847249215046e-07, "loss": 0.4477, "step": 37230 }, { "epoch": 0.869078179696616, "grad_norm": 1.5560648116964204, "learning_rate": 5.132856324984881e-07, "loss": 0.4096, "step": 37240 }, { "epoch": 0.8693115519253208, "grad_norm": 1.7197682398042533, "learning_rate": 5.114895175048729e-07, "loss": 0.4415, "step": 37250 }, { "epoch": 0.8695449241540256, "grad_norm": 1.727040203938477, "learning_rate": 5.096963811325772e-07, "loss": 0.378, "step": 37260 }, { "epoch": 0.8697782963827304, "grad_norm": 3.8447806128210633, "learning_rate": 5.07906224571545e-07, "loss": 0.4283, "step": 37270 }, { "epoch": 0.8700116686114352, "grad_norm": 1.5718381992398371, "learning_rate": 5.061190490097434e-07, "loss": 0.4549, "step": 37280 }, { "epoch": 0.87024504084014, "grad_norm": 1.6299869847335848, "learning_rate": 5.043348556331601e-07, "loss": 0.4526, "step": 37290 }, { "epoch": 0.8704784130688448, "grad_norm": 1.975182768515627, "learning_rate": 5.025536456258018e-07, "loss": 0.46, "step": 37300 }, { "epoch": 0.8707117852975496, "grad_norm": 1.4834755234639292, "learning_rate": 5.007754201697013e-07, "loss": 0.4355, "step": 37310 }, { "epoch": 0.8709451575262543, "grad_norm": 1.9208599327935554, "learning_rate": 4.990001804449046e-07, "loss": 0.452, "step": 37320 }, { "epoch": 0.8711785297549591, "grad_norm": 1.495911502213688, "learning_rate": 4.972279276294783e-07, "loss": 0.4541, "step": 37330 }, { "epoch": 0.8714119019836639, "grad_norm": 1.5877463015583755, "learning_rate": 4.954586628995106e-07, "loss": 0.4288, "step": 37340 }, { "epoch": 0.8716452742123687, "grad_norm": 1.839307534429881, "learning_rate": 4.936923874291e-07, "loss": 0.4451, "step": 37350 }, { "epoch": 0.8718786464410735, "grad_norm": 1.7896785202690384, "learning_rate": 4.919291023903677e-07, "loss": 0.4694, "step": 37360 }, { "epoch": 0.8721120186697783, "grad_norm": 2.0366338920105855, "learning_rate": 4.901688089534462e-07, "loss": 0.4478, "step": 37370 }, { "epoch": 0.8723453908984831, "grad_norm": 1.585199818843076, "learning_rate": 4.884115082864838e-07, "loss": 0.4618, "step": 37380 }, { "epoch": 0.8725787631271879, "grad_norm": 1.7189722511815684, "learning_rate": 4.866572015556448e-07, "loss": 0.4324, "step": 37390 }, { "epoch": 0.8728121353558926, "grad_norm": 2.0080376962488744, "learning_rate": 4.849058899251042e-07, "loss": 0.4157, "step": 37400 }, { "epoch": 0.8730455075845974, "grad_norm": 1.494243469572288, "learning_rate": 4.831575745570488e-07, "loss": 0.4379, "step": 37410 }, { "epoch": 0.8732788798133022, "grad_norm": 1.6481383447625613, "learning_rate": 4.814122566116813e-07, "loss": 0.4469, "step": 37420 }, { "epoch": 0.873512252042007, "grad_norm": 1.5127587865728875, "learning_rate": 4.796699372472102e-07, "loss": 0.4345, "step": 37430 }, { "epoch": 0.8737456242707118, "grad_norm": 1.5041199984081866, "learning_rate": 4.779306176198578e-07, "loss": 0.4459, "step": 37440 }, { "epoch": 0.8739789964994166, "grad_norm": 1.4852422896981505, "learning_rate": 4.761942988838536e-07, "loss": 0.4238, "step": 37450 }, { "epoch": 0.8742123687281214, "grad_norm": 1.7905833760649645, "learning_rate": 4.744609821914353e-07, "loss": 0.4411, "step": 37460 }, { "epoch": 0.8744457409568261, "grad_norm": 1.5543185141783635, "learning_rate": 4.727306686928523e-07, "loss": 0.4553, "step": 37470 }, { "epoch": 0.8746791131855309, "grad_norm": 1.737378598582436, "learning_rate": 4.710033595363567e-07, "loss": 0.4252, "step": 37480 }, { "epoch": 0.8749124854142357, "grad_norm": 1.93085883160132, "learning_rate": 4.69279055868207e-07, "loss": 0.4357, "step": 37490 }, { "epoch": 0.8751458576429405, "grad_norm": 1.6053819754206633, "learning_rate": 4.675577588326713e-07, "loss": 0.4431, "step": 37500 }, { "epoch": 0.8753792298716453, "grad_norm": 1.8665731321388108, "learning_rate": 4.658394695720192e-07, "loss": 0.4426, "step": 37510 }, { "epoch": 0.8756126021003501, "grad_norm": 1.4955436329432867, "learning_rate": 4.6412418922652303e-07, "loss": 0.4424, "step": 37520 }, { "epoch": 0.8758459743290549, "grad_norm": 1.7755504793501877, "learning_rate": 4.6241191893446336e-07, "loss": 0.4339, "step": 37530 }, { "epoch": 0.8760793465577597, "grad_norm": 1.6455444788665465, "learning_rate": 4.607026598321185e-07, "loss": 0.436, "step": 37540 }, { "epoch": 0.8763127187864644, "grad_norm": 1.796441576202534, "learning_rate": 4.589964130537694e-07, "loss": 0.4531, "step": 37550 }, { "epoch": 0.8765460910151692, "grad_norm": 1.4662991218140555, "learning_rate": 4.5729317973170097e-07, "loss": 0.4311, "step": 37560 }, { "epoch": 0.876779463243874, "grad_norm": 1.77271021302659, "learning_rate": 4.5559296099619345e-07, "loss": 0.4409, "step": 37570 }, { "epoch": 0.8770128354725788, "grad_norm": 1.9438948201778865, "learning_rate": 4.5389575797553263e-07, "loss": 0.4438, "step": 37580 }, { "epoch": 0.8772462077012836, "grad_norm": 1.8451886401437751, "learning_rate": 4.5220157179599764e-07, "loss": 0.4094, "step": 37590 }, { "epoch": 0.8774795799299884, "grad_norm": 1.5735135166325136, "learning_rate": 4.505104035818675e-07, "loss": 0.4459, "step": 37600 }, { "epoch": 0.8777129521586932, "grad_norm": 1.9062742935641837, "learning_rate": 4.4882225445541996e-07, "loss": 0.4525, "step": 37610 }, { "epoch": 0.877946324387398, "grad_norm": 2.1453623002045283, "learning_rate": 4.471371255369272e-07, "loss": 0.4475, "step": 37620 }, { "epoch": 0.8781796966161027, "grad_norm": 1.7830888034350552, "learning_rate": 4.454550179446576e-07, "loss": 0.4283, "step": 37630 }, { "epoch": 0.8784130688448075, "grad_norm": 1.4199292719873773, "learning_rate": 4.43775932794876e-07, "loss": 0.4265, "step": 37640 }, { "epoch": 0.8786464410735122, "grad_norm": 1.7647935939731039, "learning_rate": 4.4209987120183983e-07, "loss": 0.4527, "step": 37650 }, { "epoch": 0.878879813302217, "grad_norm": 1.7390213469916203, "learning_rate": 4.404268342778001e-07, "loss": 0.4168, "step": 37660 }, { "epoch": 0.8791131855309218, "grad_norm": 1.763569997328972, "learning_rate": 4.387568231330025e-07, "loss": 0.4463, "step": 37670 }, { "epoch": 0.8793465577596266, "grad_norm": 1.5347794401261952, "learning_rate": 4.3708983887568225e-07, "loss": 0.437, "step": 37680 }, { "epoch": 0.8795799299883313, "grad_norm": 1.7471263646283575, "learning_rate": 4.3542588261206666e-07, "loss": 0.4246, "step": 37690 }, { "epoch": 0.8798133022170361, "grad_norm": 1.7669520544493944, "learning_rate": 4.337649554463763e-07, "loss": 0.4585, "step": 37700 }, { "epoch": 0.8800466744457409, "grad_norm": 1.7646040235137201, "learning_rate": 4.3210705848081734e-07, "loss": 0.4467, "step": 37710 }, { "epoch": 0.8802800466744457, "grad_norm": 1.4921816626838278, "learning_rate": 4.304521928155875e-07, "loss": 0.4309, "step": 37720 }, { "epoch": 0.8805134189031505, "grad_norm": 1.6951042666649423, "learning_rate": 4.2880035954887323e-07, "loss": 0.4581, "step": 37730 }, { "epoch": 0.8807467911318553, "grad_norm": 1.8162688653671182, "learning_rate": 4.2715155977684663e-07, "loss": 0.413, "step": 37740 }, { "epoch": 0.8809801633605601, "grad_norm": 1.6254523263977643, "learning_rate": 4.255057945936697e-07, "loss": 0.4393, "step": 37750 }, { "epoch": 0.8812135355892649, "grad_norm": 1.68160106699203, "learning_rate": 4.2386306509148823e-07, "loss": 0.4367, "step": 37760 }, { "epoch": 0.8814469078179696, "grad_norm": 2.5339943816780317, "learning_rate": 4.222233723604335e-07, "loss": 0.4651, "step": 37770 }, { "epoch": 0.8816802800466744, "grad_norm": 1.9588761304221884, "learning_rate": 4.205867174886241e-07, "loss": 0.4634, "step": 37780 }, { "epoch": 0.8819136522753792, "grad_norm": 1.685406454149353, "learning_rate": 4.189531015621595e-07, "loss": 0.4457, "step": 37790 }, { "epoch": 0.882147024504084, "grad_norm": 1.7528414318077907, "learning_rate": 4.173225256651242e-07, "loss": 0.4586, "step": 37800 }, { "epoch": 0.8823803967327888, "grad_norm": 1.509485106517874, "learning_rate": 4.156949908795849e-07, "loss": 0.3947, "step": 37810 }, { "epoch": 0.8826137689614936, "grad_norm": 1.5932080665328316, "learning_rate": 4.1407049828558977e-07, "loss": 0.4406, "step": 37820 }, { "epoch": 0.8828471411901984, "grad_norm": 2.298789860982981, "learning_rate": 4.124490489611699e-07, "loss": 0.4239, "step": 37830 }, { "epoch": 0.8830805134189031, "grad_norm": 1.803339890188662, "learning_rate": 4.1083064398233506e-07, "loss": 0.4266, "step": 37840 }, { "epoch": 0.8833138856476079, "grad_norm": 1.9082089821440766, "learning_rate": 4.092152844230746e-07, "loss": 0.4267, "step": 37850 }, { "epoch": 0.8835472578763127, "grad_norm": 1.8424077218364672, "learning_rate": 4.0760297135535865e-07, "loss": 0.4391, "step": 37860 }, { "epoch": 0.8837806301050175, "grad_norm": 1.6633230363128264, "learning_rate": 4.059937058491342e-07, "loss": 0.454, "step": 37870 }, { "epoch": 0.8840140023337223, "grad_norm": 1.5093334243712617, "learning_rate": 4.043874889723254e-07, "loss": 0.4265, "step": 37880 }, { "epoch": 0.8842473745624271, "grad_norm": 2.1199375399724243, "learning_rate": 4.0278432179083614e-07, "loss": 0.4248, "step": 37890 }, { "epoch": 0.8844807467911319, "grad_norm": 1.7655621614053774, "learning_rate": 4.0118420536854277e-07, "loss": 0.4516, "step": 37900 }, { "epoch": 0.8847141190198367, "grad_norm": 1.7848150131439482, "learning_rate": 3.9958714076729855e-07, "loss": 0.4417, "step": 37910 }, { "epoch": 0.8849474912485414, "grad_norm": 1.8890635872325912, "learning_rate": 3.9799312904693357e-07, "loss": 0.4236, "step": 37920 }, { "epoch": 0.8851808634772462, "grad_norm": 1.4484304211523042, "learning_rate": 3.964021712652499e-07, "loss": 0.401, "step": 37930 }, { "epoch": 0.885414235705951, "grad_norm": 1.8111719345689485, "learning_rate": 3.948142684780215e-07, "loss": 0.4312, "step": 37940 }, { "epoch": 0.8856476079346558, "grad_norm": 1.5942463205143298, "learning_rate": 3.932294217389987e-07, "loss": 0.4281, "step": 37950 }, { "epoch": 0.8858809801633606, "grad_norm": 2.047067145517154, "learning_rate": 3.916476320999013e-07, "loss": 0.4435, "step": 37960 }, { "epoch": 0.8861143523920654, "grad_norm": 1.814662380385057, "learning_rate": 3.9006890061042137e-07, "loss": 0.4443, "step": 37970 }, { "epoch": 0.8863477246207702, "grad_norm": 1.400242943429172, "learning_rate": 3.8849322831822135e-07, "loss": 0.4321, "step": 37980 }, { "epoch": 0.886581096849475, "grad_norm": 1.9198495566782425, "learning_rate": 3.8692061626893205e-07, "loss": 0.4551, "step": 37990 }, { "epoch": 0.8868144690781797, "grad_norm": 1.6101789190549678, "learning_rate": 3.853510655061571e-07, "loss": 0.4307, "step": 38000 }, { "epoch": 0.8870478413068845, "grad_norm": 1.5859252142173421, "learning_rate": 3.837845770714649e-07, "loss": 0.4305, "step": 38010 }, { "epoch": 0.8872812135355893, "grad_norm": 1.7104748292541612, "learning_rate": 3.822211520043922e-07, "loss": 0.4433, "step": 38020 }, { "epoch": 0.8875145857642941, "grad_norm": 1.7540600204520174, "learning_rate": 3.806607913424465e-07, "loss": 0.4379, "step": 38030 }, { "epoch": 0.8877479579929989, "grad_norm": 1.7727489207270914, "learning_rate": 3.791034961210971e-07, "loss": 0.412, "step": 38040 }, { "epoch": 0.8879813302217037, "grad_norm": 1.7473830057996287, "learning_rate": 3.7754926737378127e-07, "loss": 0.45, "step": 38050 }, { "epoch": 0.8882147024504085, "grad_norm": 1.8108440949179858, "learning_rate": 3.7599810613190123e-07, "loss": 0.4313, "step": 38060 }, { "epoch": 0.8884480746791131, "grad_norm": 1.7734352522150711, "learning_rate": 3.744500134248241e-07, "loss": 0.4315, "step": 38070 }, { "epoch": 0.8886814469078179, "grad_norm": 1.8053067488826, "learning_rate": 3.7290499027987817e-07, "loss": 0.4336, "step": 38080 }, { "epoch": 0.8889148191365227, "grad_norm": 1.803351299950865, "learning_rate": 3.713630377223587e-07, "loss": 0.4536, "step": 38090 }, { "epoch": 0.8891481913652275, "grad_norm": 1.7314835736043095, "learning_rate": 3.6982415677552007e-07, "loss": 0.4206, "step": 38100 }, { "epoch": 0.8893815635939323, "grad_norm": 1.7732082902386754, "learning_rate": 3.682883484605787e-07, "loss": 0.4516, "step": 38110 }, { "epoch": 0.8896149358226371, "grad_norm": 1.8568320961039773, "learning_rate": 3.667556137967143e-07, "loss": 0.4235, "step": 38120 }, { "epoch": 0.8898483080513419, "grad_norm": 1.6766730098291334, "learning_rate": 3.652259538010633e-07, "loss": 0.4407, "step": 38130 }, { "epoch": 0.8900816802800466, "grad_norm": 1.4639669718701962, "learning_rate": 3.636993694887253e-07, "loss": 0.4167, "step": 38140 }, { "epoch": 0.8903150525087514, "grad_norm": 1.7624360646294404, "learning_rate": 3.6217586187275665e-07, "loss": 0.4491, "step": 38150 }, { "epoch": 0.8905484247374562, "grad_norm": 1.6199023135283173, "learning_rate": 3.60655431964172e-07, "loss": 0.4656, "step": 38160 }, { "epoch": 0.890781796966161, "grad_norm": 1.8220017537668607, "learning_rate": 3.591380807719447e-07, "loss": 0.4601, "step": 38170 }, { "epoch": 0.8910151691948658, "grad_norm": 1.5141249295288621, "learning_rate": 3.576238093030049e-07, "loss": 0.4251, "step": 38180 }, { "epoch": 0.8912485414235706, "grad_norm": 1.7729953457741368, "learning_rate": 3.5611261856223765e-07, "loss": 0.3965, "step": 38190 }, { "epoch": 0.8914819136522754, "grad_norm": 1.5292261150296622, "learning_rate": 3.5460450955248524e-07, "loss": 0.429, "step": 38200 }, { "epoch": 0.8917152858809801, "grad_norm": 1.9147439673855466, "learning_rate": 3.5309948327454327e-07, "loss": 0.427, "step": 38210 }, { "epoch": 0.8919486581096849, "grad_norm": 1.6608997585128733, "learning_rate": 3.515975407271638e-07, "loss": 0.4395, "step": 38220 }, { "epoch": 0.8921820303383897, "grad_norm": 1.6666275383601057, "learning_rate": 3.500986829070502e-07, "loss": 0.4081, "step": 38230 }, { "epoch": 0.8924154025670945, "grad_norm": 1.649265042516169, "learning_rate": 3.4860291080885956e-07, "loss": 0.4305, "step": 38240 }, { "epoch": 0.8926487747957993, "grad_norm": 1.8323450251659534, "learning_rate": 3.471102254252029e-07, "loss": 0.4359, "step": 38250 }, { "epoch": 0.8928821470245041, "grad_norm": 1.7017798979334373, "learning_rate": 3.4562062774664116e-07, "loss": 0.4288, "step": 38260 }, { "epoch": 0.8931155192532089, "grad_norm": 1.6273082761880882, "learning_rate": 3.441341187616848e-07, "loss": 0.4393, "step": 38270 }, { "epoch": 0.8933488914819137, "grad_norm": 1.9214029731042386, "learning_rate": 3.4265069945679794e-07, "loss": 0.4416, "step": 38280 }, { "epoch": 0.8935822637106184, "grad_norm": 1.4662731100424569, "learning_rate": 3.411703708163927e-07, "loss": 0.4539, "step": 38290 }, { "epoch": 0.8938156359393232, "grad_norm": 1.7262342499580374, "learning_rate": 3.3969313382282933e-07, "loss": 0.444, "step": 38300 }, { "epoch": 0.894049008168028, "grad_norm": 1.829035106473237, "learning_rate": 3.3821898945641863e-07, "loss": 0.4334, "step": 38310 }, { "epoch": 0.8942823803967328, "grad_norm": 1.6830992531921893, "learning_rate": 3.367479386954176e-07, "loss": 0.4501, "step": 38320 }, { "epoch": 0.8945157526254376, "grad_norm": 1.8914673166134797, "learning_rate": 3.352799825160286e-07, "loss": 0.4554, "step": 38330 }, { "epoch": 0.8947491248541424, "grad_norm": 1.836652155815649, "learning_rate": 3.3381512189240516e-07, "loss": 0.4551, "step": 38340 }, { "epoch": 0.8949824970828472, "grad_norm": 1.7221351364869792, "learning_rate": 3.3235335779664146e-07, "loss": 0.4366, "step": 38350 }, { "epoch": 0.895215869311552, "grad_norm": 1.606871050498548, "learning_rate": 3.3089469119878094e-07, "loss": 0.4308, "step": 38360 }, { "epoch": 0.8954492415402567, "grad_norm": 1.7506908869675128, "learning_rate": 3.2943912306680815e-07, "loss": 0.4297, "step": 38370 }, { "epoch": 0.8956826137689615, "grad_norm": 1.8447717455468713, "learning_rate": 3.2798665436665325e-07, "loss": 0.4387, "step": 38380 }, { "epoch": 0.8959159859976663, "grad_norm": 2.031763975687663, "learning_rate": 3.265372860621896e-07, "loss": 0.4197, "step": 38390 }, { "epoch": 0.8961493582263711, "grad_norm": 1.4499578441269405, "learning_rate": 3.2509101911523343e-07, "loss": 0.4474, "step": 38400 }, { "epoch": 0.8963827304550759, "grad_norm": 2.0437136511347584, "learning_rate": 3.2364785448554037e-07, "loss": 0.4456, "step": 38410 }, { "epoch": 0.8966161026837807, "grad_norm": 2.129409074952179, "learning_rate": 3.2220779313081096e-07, "loss": 0.4434, "step": 38420 }, { "epoch": 0.8968494749124855, "grad_norm": 1.5671701611619748, "learning_rate": 3.2077083600668414e-07, "loss": 0.4201, "step": 38430 }, { "epoch": 0.8970828471411902, "grad_norm": 1.7996976249636065, "learning_rate": 3.1933698406673817e-07, "loss": 0.4401, "step": 38440 }, { "epoch": 0.897316219369895, "grad_norm": 2.169129598048179, "learning_rate": 3.179062382624931e-07, "loss": 0.4488, "step": 38450 }, { "epoch": 0.8975495915985998, "grad_norm": 1.3965243997771937, "learning_rate": 3.1647859954340667e-07, "loss": 0.4403, "step": 38460 }, { "epoch": 0.8977829638273046, "grad_norm": 1.7482835492715738, "learning_rate": 3.150540688568732e-07, "loss": 0.4128, "step": 38470 }, { "epoch": 0.8980163360560094, "grad_norm": 1.4619254436045943, "learning_rate": 3.1363264714822706e-07, "loss": 0.4524, "step": 38480 }, { "epoch": 0.8982497082847141, "grad_norm": 1.4605647089860754, "learning_rate": 3.1221433536073764e-07, "loss": 0.4271, "step": 38490 }, { "epoch": 0.8984830805134189, "grad_norm": 1.672971069939739, "learning_rate": 3.107991344356104e-07, "loss": 0.44, "step": 38500 }, { "epoch": 0.8987164527421236, "grad_norm": 1.8184753979400687, "learning_rate": 3.0938704531198793e-07, "loss": 0.4442, "step": 38510 }, { "epoch": 0.8989498249708284, "grad_norm": 1.6228251243062508, "learning_rate": 3.079780689269468e-07, "loss": 0.4452, "step": 38520 }, { "epoch": 0.8991831971995332, "grad_norm": 1.7220074043694178, "learning_rate": 3.0657220621549856e-07, "loss": 0.4147, "step": 38530 }, { "epoch": 0.899416569428238, "grad_norm": 1.626142452282592, "learning_rate": 3.051694581105874e-07, "loss": 0.4214, "step": 38540 }, { "epoch": 0.8996499416569428, "grad_norm": 1.5499659082518809, "learning_rate": 3.0376982554309156e-07, "loss": 0.4466, "step": 38550 }, { "epoch": 0.8998833138856476, "grad_norm": 1.6890196129454715, "learning_rate": 3.023733094418219e-07, "loss": 0.4332, "step": 38560 }, { "epoch": 0.9001166861143524, "grad_norm": 1.9542159558866008, "learning_rate": 3.0097991073351876e-07, "loss": 0.4542, "step": 38570 }, { "epoch": 0.9003500583430571, "grad_norm": 1.71535580276011, "learning_rate": 2.995896303428586e-07, "loss": 0.4622, "step": 38580 }, { "epoch": 0.9005834305717619, "grad_norm": 1.8706697637546608, "learning_rate": 2.9820246919244347e-07, "loss": 0.4448, "step": 38590 }, { "epoch": 0.9008168028004667, "grad_norm": 1.806553434217712, "learning_rate": 2.9681842820280806e-07, "loss": 0.4464, "step": 38600 }, { "epoch": 0.9010501750291715, "grad_norm": 1.613949552825515, "learning_rate": 2.954375082924177e-07, "loss": 0.443, "step": 38610 }, { "epoch": 0.9012835472578763, "grad_norm": 1.638569103248583, "learning_rate": 2.940597103776632e-07, "loss": 0.4525, "step": 38620 }, { "epoch": 0.9015169194865811, "grad_norm": 1.8969623217804066, "learning_rate": 2.9268503537286496e-07, "loss": 0.4286, "step": 38630 }, { "epoch": 0.9017502917152859, "grad_norm": 1.6378462792058537, "learning_rate": 2.9131348419027316e-07, "loss": 0.4149, "step": 38640 }, { "epoch": 0.9019836639439907, "grad_norm": 1.6265731077646206, "learning_rate": 2.89945057740062e-07, "loss": 0.4511, "step": 38650 }, { "epoch": 0.9022170361726954, "grad_norm": 1.7633747517683238, "learning_rate": 2.885797569303328e-07, "loss": 0.4408, "step": 38660 }, { "epoch": 0.9024504084014002, "grad_norm": 1.7358097848760126, "learning_rate": 2.8721758266711417e-07, "loss": 0.436, "step": 38670 }, { "epoch": 0.902683780630105, "grad_norm": 1.7150149788622773, "learning_rate": 2.8585853585435806e-07, "loss": 0.4644, "step": 38680 }, { "epoch": 0.9029171528588098, "grad_norm": 1.8617042209962875, "learning_rate": 2.845026173939419e-07, "loss": 0.4426, "step": 38690 }, { "epoch": 0.9031505250875146, "grad_norm": 1.5548784794063044, "learning_rate": 2.831498281856676e-07, "loss": 0.4338, "step": 38700 }, { "epoch": 0.9033838973162194, "grad_norm": 1.7979960050650985, "learning_rate": 2.8180016912725984e-07, "loss": 0.4348, "step": 38710 }, { "epoch": 0.9036172695449242, "grad_norm": 1.5192410620111523, "learning_rate": 2.8045364111436435e-07, "loss": 0.4718, "step": 38720 }, { "epoch": 0.903850641773629, "grad_norm": 1.6895284784460498, "learning_rate": 2.791102450405536e-07, "loss": 0.4213, "step": 38730 }, { "epoch": 0.9040840140023337, "grad_norm": 1.7694092299174788, "learning_rate": 2.7776998179731673e-07, "loss": 0.4351, "step": 38740 }, { "epoch": 0.9043173862310385, "grad_norm": 1.5276971259376508, "learning_rate": 2.7643285227406727e-07, "loss": 0.4163, "step": 38750 }, { "epoch": 0.9045507584597433, "grad_norm": 1.5860760979136506, "learning_rate": 2.7509885735813813e-07, "loss": 0.4508, "step": 38760 }, { "epoch": 0.9047841306884481, "grad_norm": 3.9017823672952696, "learning_rate": 2.7376799793478014e-07, "loss": 0.4431, "step": 38770 }, { "epoch": 0.9050175029171529, "grad_norm": 1.5926208189684423, "learning_rate": 2.724402748871674e-07, "loss": 0.4035, "step": 38780 }, { "epoch": 0.9052508751458577, "grad_norm": 1.2279295908123193, "learning_rate": 2.7111568909639006e-07, "loss": 0.3873, "step": 38790 }, { "epoch": 0.9054842473745625, "grad_norm": 1.8169241640996083, "learning_rate": 2.6979424144145506e-07, "loss": 0.4239, "step": 38800 }, { "epoch": 0.9057176196032672, "grad_norm": 4.976364358854301, "learning_rate": 2.6847593279929083e-07, "loss": 0.4554, "step": 38810 }, { "epoch": 0.905950991831972, "grad_norm": 1.8389959595201049, "learning_rate": 2.671607640447388e-07, "loss": 0.416, "step": 38820 }, { "epoch": 0.9061843640606768, "grad_norm": 1.6235511403690328, "learning_rate": 2.658487360505585e-07, "loss": 0.4509, "step": 38830 }, { "epoch": 0.9064177362893816, "grad_norm": 1.6618666532480288, "learning_rate": 2.645398496874263e-07, "loss": 0.4557, "step": 38840 }, { "epoch": 0.9066511085180864, "grad_norm": 1.778575473573449, "learning_rate": 2.6323410582393184e-07, "loss": 0.4425, "step": 38850 }, { "epoch": 0.9068844807467912, "grad_norm": 1.605174004443482, "learning_rate": 2.619315053265792e-07, "loss": 0.441, "step": 38860 }, { "epoch": 0.907117852975496, "grad_norm": 1.8404266570806378, "learning_rate": 2.6063204905978855e-07, "loss": 0.4739, "step": 38870 }, { "epoch": 0.9073512252042008, "grad_norm": 1.9269290424877423, "learning_rate": 2.5933573788589184e-07, "loss": 0.4343, "step": 38880 }, { "epoch": 0.9075845974329055, "grad_norm": 1.4182300571175352, "learning_rate": 2.5804257266513364e-07, "loss": 0.4322, "step": 38890 }, { "epoch": 0.9078179696616102, "grad_norm": 1.7486279209662603, "learning_rate": 2.5675255425567316e-07, "loss": 0.428, "step": 38900 }, { "epoch": 0.908051341890315, "grad_norm": 1.7172053774449012, "learning_rate": 2.5546568351357827e-07, "loss": 0.4342, "step": 38910 }, { "epoch": 0.9082847141190198, "grad_norm": 1.5526819108678136, "learning_rate": 2.5418196129283145e-07, "loss": 0.4167, "step": 38920 }, { "epoch": 0.9085180863477246, "grad_norm": 2.0672530151882254, "learning_rate": 2.5290138844532244e-07, "loss": 0.4289, "step": 38930 }, { "epoch": 0.9087514585764294, "grad_norm": 4.603024566590288, "learning_rate": 2.516239658208514e-07, "loss": 0.4571, "step": 38940 }, { "epoch": 0.9089848308051341, "grad_norm": 1.6070249993590975, "learning_rate": 2.5034969426713076e-07, "loss": 0.439, "step": 38950 }, { "epoch": 0.9092182030338389, "grad_norm": 1.7475837681623017, "learning_rate": 2.490785746297797e-07, "loss": 0.4538, "step": 38960 }, { "epoch": 0.9094515752625437, "grad_norm": 1.5982058244038155, "learning_rate": 2.478106077523257e-07, "loss": 0.4353, "step": 38970 }, { "epoch": 0.9096849474912485, "grad_norm": 1.881941054375047, "learning_rate": 2.465457944762051e-07, "loss": 0.4384, "step": 38980 }, { "epoch": 0.9099183197199533, "grad_norm": 1.64827264137957, "learning_rate": 2.4528413564076036e-07, "loss": 0.4509, "step": 38990 }, { "epoch": 0.9101516919486581, "grad_norm": 1.7355508437538063, "learning_rate": 2.440256320832418e-07, "loss": 0.431, "step": 39000 }, { "epoch": 0.9103850641773629, "grad_norm": 1.7749310718580602, "learning_rate": 2.427702846388047e-07, "loss": 0.4122, "step": 39010 }, { "epoch": 0.9106184364060677, "grad_norm": 1.6959072807763502, "learning_rate": 2.415180941405099e-07, "loss": 0.4427, "step": 39020 }, { "epoch": 0.9108518086347724, "grad_norm": 2.048177884468545, "learning_rate": 2.4026906141932536e-07, "loss": 0.4703, "step": 39030 }, { "epoch": 0.9110851808634772, "grad_norm": 1.662452110187981, "learning_rate": 2.39023187304121e-07, "loss": 0.4653, "step": 39040 }, { "epoch": 0.911318553092182, "grad_norm": 1.7373839258867978, "learning_rate": 2.3778047262167093e-07, "loss": 0.4393, "step": 39050 }, { "epoch": 0.9115519253208868, "grad_norm": 1.8288017694052001, "learning_rate": 2.3654091819665436e-07, "loss": 0.4445, "step": 39060 }, { "epoch": 0.9117852975495916, "grad_norm": 1.783017361220902, "learning_rate": 2.3530452485165168e-07, "loss": 0.4555, "step": 39070 }, { "epoch": 0.9120186697782964, "grad_norm": 1.6853052950435068, "learning_rate": 2.3407129340714596e-07, "loss": 0.4353, "step": 39080 }, { "epoch": 0.9122520420070012, "grad_norm": 1.7730886209014813, "learning_rate": 2.3284122468152314e-07, "loss": 0.4276, "step": 39090 }, { "epoch": 0.912485414235706, "grad_norm": 1.493235633711758, "learning_rate": 2.3161431949106806e-07, "loss": 0.411, "step": 39100 }, { "epoch": 0.9127187864644107, "grad_norm": 1.5932385366556387, "learning_rate": 2.303905786499683e-07, "loss": 0.4464, "step": 39110 }, { "epoch": 0.9129521586931155, "grad_norm": 1.8646867541636418, "learning_rate": 2.2917000297031088e-07, "loss": 0.4514, "step": 39120 }, { "epoch": 0.9131855309218203, "grad_norm": 1.7245998154451176, "learning_rate": 2.279525932620813e-07, "loss": 0.4255, "step": 39130 }, { "epoch": 0.9134189031505251, "grad_norm": 1.740158345630619, "learning_rate": 2.267383503331666e-07, "loss": 0.4484, "step": 39140 }, { "epoch": 0.9136522753792299, "grad_norm": 1.6827961520080117, "learning_rate": 2.2552727498934946e-07, "loss": 0.4278, "step": 39150 }, { "epoch": 0.9138856476079347, "grad_norm": 1.8313355198776946, "learning_rate": 2.2431936803431198e-07, "loss": 0.4214, "step": 39160 }, { "epoch": 0.9141190198366395, "grad_norm": 2.2235878251000125, "learning_rate": 2.2311463026963465e-07, "loss": 0.4361, "step": 39170 }, { "epoch": 0.9143523920653442, "grad_norm": 2.4228361165158785, "learning_rate": 2.219130624947924e-07, "loss": 0.4454, "step": 39180 }, { "epoch": 0.914585764294049, "grad_norm": 1.7065553965905735, "learning_rate": 2.207146655071579e-07, "loss": 0.4465, "step": 39190 }, { "epoch": 0.9148191365227538, "grad_norm": 1.658879456230197, "learning_rate": 2.1951944010200122e-07, "loss": 0.4626, "step": 39200 }, { "epoch": 0.9150525087514586, "grad_norm": 1.5680088029854768, "learning_rate": 2.1832738707248447e-07, "loss": 0.4308, "step": 39210 }, { "epoch": 0.9152858809801634, "grad_norm": 1.8016665924198116, "learning_rate": 2.17138507209666e-07, "loss": 0.4466, "step": 39220 }, { "epoch": 0.9155192532088682, "grad_norm": 1.7349593563799612, "learning_rate": 2.159528013025003e-07, "loss": 0.4264, "step": 39230 }, { "epoch": 0.915752625437573, "grad_norm": 1.5816345564630203, "learning_rate": 2.1477027013783292e-07, "loss": 0.4059, "step": 39240 }, { "epoch": 0.9159859976662778, "grad_norm": 1.968879550728003, "learning_rate": 2.1359091450040282e-07, "loss": 0.4547, "step": 39250 }, { "epoch": 0.9162193698949825, "grad_norm": 1.7713112004619624, "learning_rate": 2.1241473517284338e-07, "loss": 0.4262, "step": 39260 }, { "epoch": 0.9164527421236873, "grad_norm": 1.6691426336377357, "learning_rate": 2.1124173293567972e-07, "loss": 0.4294, "step": 39270 }, { "epoch": 0.9166861143523921, "grad_norm": 1.647931250089369, "learning_rate": 2.100719085673264e-07, "loss": 0.4199, "step": 39280 }, { "epoch": 0.9169194865810969, "grad_norm": 1.2408482886667995, "learning_rate": 2.089052628440924e-07, "loss": 0.4205, "step": 39290 }, { "epoch": 0.9171528588098017, "grad_norm": 1.8451976666985956, "learning_rate": 2.0774179654017513e-07, "loss": 0.4097, "step": 39300 }, { "epoch": 0.9173862310385065, "grad_norm": 2.1662263755881126, "learning_rate": 2.065815104276625e-07, "loss": 0.4131, "step": 39310 }, { "epoch": 0.9176196032672111, "grad_norm": 1.852754941956357, "learning_rate": 2.054244052765325e-07, "loss": 0.4426, "step": 39320 }, { "epoch": 0.9178529754959159, "grad_norm": 1.6703838687540733, "learning_rate": 2.042704818546515e-07, "loss": 0.4505, "step": 39330 }, { "epoch": 0.9180863477246207, "grad_norm": 1.3951745579917747, "learning_rate": 2.031197409277752e-07, "loss": 0.4195, "step": 39340 }, { "epoch": 0.9183197199533255, "grad_norm": 1.7636713960725812, "learning_rate": 2.0197218325954726e-07, "loss": 0.4331, "step": 39350 }, { "epoch": 0.9185530921820303, "grad_norm": 1.616302274610223, "learning_rate": 2.0082780961149738e-07, "loss": 0.4455, "step": 39360 }, { "epoch": 0.9187864644107351, "grad_norm": 1.7793044848658073, "learning_rate": 1.9968662074304534e-07, "loss": 0.4416, "step": 39370 }, { "epoch": 0.9190198366394399, "grad_norm": 1.9516228633328914, "learning_rate": 1.9854861741149422e-07, "loss": 0.4333, "step": 39380 }, { "epoch": 0.9192532088681447, "grad_norm": 1.8927979166520437, "learning_rate": 1.9741380037203605e-07, "loss": 0.4206, "step": 39390 }, { "epoch": 0.9194865810968494, "grad_norm": 1.8771121289476589, "learning_rate": 1.9628217037774622e-07, "loss": 0.4545, "step": 39400 }, { "epoch": 0.9197199533255542, "grad_norm": 1.8886194933974814, "learning_rate": 1.9515372817958511e-07, "loss": 0.4249, "step": 39410 }, { "epoch": 0.919953325554259, "grad_norm": 1.6831382494351639, "learning_rate": 1.9402847452639984e-07, "loss": 0.4408, "step": 39420 }, { "epoch": 0.9201866977829638, "grad_norm": 1.6144303137262521, "learning_rate": 1.9290641016491972e-07, "loss": 0.4404, "step": 39430 }, { "epoch": 0.9204200700116686, "grad_norm": 2.0062276613272103, "learning_rate": 1.9178753583975752e-07, "loss": 0.4637, "step": 39440 }, { "epoch": 0.9206534422403734, "grad_norm": 1.8037533606703413, "learning_rate": 1.906718522934109e-07, "loss": 0.4375, "step": 39450 }, { "epoch": 0.9208868144690782, "grad_norm": 1.6568227124834078, "learning_rate": 1.895593602662582e-07, "loss": 0.438, "step": 39460 }, { "epoch": 0.921120186697783, "grad_norm": 1.7497648454015444, "learning_rate": 1.8845006049655946e-07, "loss": 0.4301, "step": 39470 }, { "epoch": 0.9213535589264877, "grad_norm": 1.6769960462828373, "learning_rate": 1.873439537204591e-07, "loss": 0.4531, "step": 39480 }, { "epoch": 0.9215869311551925, "grad_norm": 1.820909371135041, "learning_rate": 1.8624104067198e-07, "loss": 0.4355, "step": 39490 }, { "epoch": 0.9218203033838973, "grad_norm": 1.9317669154415384, "learning_rate": 1.851413220830256e-07, "loss": 0.448, "step": 39500 }, { "epoch": 0.9220536756126021, "grad_norm": 1.7346949860906953, "learning_rate": 1.840447986833821e-07, "loss": 0.4246, "step": 39510 }, { "epoch": 0.9222870478413069, "grad_norm": 2.173862522744535, "learning_rate": 1.82951471200713e-07, "loss": 0.4519, "step": 39520 }, { "epoch": 0.9225204200700117, "grad_norm": 1.8647103975779062, "learning_rate": 1.8186134036056015e-07, "loss": 0.4433, "step": 39530 }, { "epoch": 0.9227537922987165, "grad_norm": 1.9647884791704289, "learning_rate": 1.8077440688634763e-07, "loss": 0.446, "step": 39540 }, { "epoch": 0.9229871645274212, "grad_norm": 1.7688156898932355, "learning_rate": 1.7969067149937403e-07, "loss": 0.4591, "step": 39550 }, { "epoch": 0.923220536756126, "grad_norm": 3.5753076427844253, "learning_rate": 1.786101349188185e-07, "loss": 0.4238, "step": 39560 }, { "epoch": 0.9234539089848308, "grad_norm": 1.6591395059862444, "learning_rate": 1.7753279786173528e-07, "loss": 0.4411, "step": 39570 }, { "epoch": 0.9236872812135356, "grad_norm": 1.6973979544416848, "learning_rate": 1.7645866104305576e-07, "loss": 0.4379, "step": 39580 }, { "epoch": 0.9239206534422404, "grad_norm": 1.8143057282936013, "learning_rate": 1.7538772517558976e-07, "loss": 0.4153, "step": 39590 }, { "epoch": 0.9241540256709452, "grad_norm": 1.7503404711798085, "learning_rate": 1.7431999097001938e-07, "loss": 0.4541, "step": 39600 }, { "epoch": 0.92438739789965, "grad_norm": 1.7405050503641917, "learning_rate": 1.7325545913490505e-07, "loss": 0.4474, "step": 39610 }, { "epoch": 0.9246207701283548, "grad_norm": 2.1551321713786846, "learning_rate": 1.7219413037668054e-07, "loss": 0.4471, "step": 39620 }, { "epoch": 0.9248541423570595, "grad_norm": 1.9645359143245964, "learning_rate": 1.711360053996547e-07, "loss": 0.4329, "step": 39630 }, { "epoch": 0.9250875145857643, "grad_norm": 1.418655374288999, "learning_rate": 1.7008108490600916e-07, "loss": 0.4112, "step": 39640 }, { "epoch": 0.9253208868144691, "grad_norm": 1.691342372490125, "learning_rate": 1.6902936959580174e-07, "loss": 0.4333, "step": 39650 }, { "epoch": 0.9255542590431739, "grad_norm": 1.6128960562212102, "learning_rate": 1.679808601669597e-07, "loss": 0.4419, "step": 39660 }, { "epoch": 0.9257876312718787, "grad_norm": 1.4129641676961855, "learning_rate": 1.6693555731528532e-07, "loss": 0.4109, "step": 39670 }, { "epoch": 0.9260210035005835, "grad_norm": 1.6460454465129182, "learning_rate": 1.6589346173445265e-07, "loss": 0.4318, "step": 39680 }, { "epoch": 0.9262543757292883, "grad_norm": 1.5949188341044875, "learning_rate": 1.648545741160057e-07, "loss": 0.441, "step": 39690 }, { "epoch": 0.926487747957993, "grad_norm": 1.5562219123024819, "learning_rate": 1.6381889514936188e-07, "loss": 0.4207, "step": 39700 }, { "epoch": 0.9267211201866978, "grad_norm": 1.6876791548906136, "learning_rate": 1.6278642552180813e-07, "loss": 0.4555, "step": 39710 }, { "epoch": 0.9269544924154026, "grad_norm": 1.915006787090959, "learning_rate": 1.6175716591850133e-07, "loss": 0.4467, "step": 39720 }, { "epoch": 0.9271878646441073, "grad_norm": 1.8882069741547038, "learning_rate": 1.6073111702246902e-07, "loss": 0.454, "step": 39730 }, { "epoch": 0.9274212368728121, "grad_norm": 2.302162624474689, "learning_rate": 1.5970827951460822e-07, "loss": 0.4465, "step": 39740 }, { "epoch": 0.9276546091015169, "grad_norm": 1.6778050964482618, "learning_rate": 1.5868865407368373e-07, "loss": 0.4474, "step": 39750 }, { "epoch": 0.9278879813302217, "grad_norm": 1.703631500781939, "learning_rate": 1.576722413763293e-07, "loss": 0.4236, "step": 39760 }, { "epoch": 0.9281213535589264, "grad_norm": 1.8325535035032117, "learning_rate": 1.5665904209704652e-07, "loss": 0.4479, "step": 39770 }, { "epoch": 0.9283547257876312, "grad_norm": 1.6567827786309628, "learning_rate": 1.5564905690820642e-07, "loss": 0.4281, "step": 39780 }, { "epoch": 0.928588098016336, "grad_norm": 2.250305230201566, "learning_rate": 1.5464228648004342e-07, "loss": 0.4411, "step": 39790 }, { "epoch": 0.9288214702450408, "grad_norm": 1.9823139727812855, "learning_rate": 1.5363873148066144e-07, "loss": 0.4653, "step": 39800 }, { "epoch": 0.9290548424737456, "grad_norm": 1.8211540073184587, "learning_rate": 1.526383925760305e-07, "loss": 0.4523, "step": 39810 }, { "epoch": 0.9292882147024504, "grad_norm": 1.6301692124263403, "learning_rate": 1.5164127042998466e-07, "loss": 0.4349, "step": 39820 }, { "epoch": 0.9295215869311552, "grad_norm": 2.3328831566297747, "learning_rate": 1.5064736570422456e-07, "loss": 0.4438, "step": 39830 }, { "epoch": 0.92975495915986, "grad_norm": 1.9184744642653273, "learning_rate": 1.4965667905831594e-07, "loss": 0.4628, "step": 39840 }, { "epoch": 0.9299883313885647, "grad_norm": 1.7151971571154254, "learning_rate": 1.4866921114968847e-07, "loss": 0.4445, "step": 39850 }, { "epoch": 0.9302217036172695, "grad_norm": 1.771678623515809, "learning_rate": 1.4768496263363463e-07, "loss": 0.4374, "step": 39860 }, { "epoch": 0.9304550758459743, "grad_norm": 1.6206378562779413, "learning_rate": 1.4670393416331419e-07, "loss": 0.419, "step": 39870 }, { "epoch": 0.9306884480746791, "grad_norm": 1.8380265160120266, "learning_rate": 1.4572612638974582e-07, "loss": 0.4158, "step": 39880 }, { "epoch": 0.9309218203033839, "grad_norm": 1.7013166829019646, "learning_rate": 1.4475153996181324e-07, "loss": 0.4283, "step": 39890 }, { "epoch": 0.9311551925320887, "grad_norm": 1.7831646800402134, "learning_rate": 1.4378017552626245e-07, "loss": 0.4208, "step": 39900 }, { "epoch": 0.9313885647607935, "grad_norm": 1.6588414445448063, "learning_rate": 1.4281203372770002e-07, "loss": 0.4084, "step": 39910 }, { "epoch": 0.9316219369894982, "grad_norm": 1.9876321150285354, "learning_rate": 1.4184711520859428e-07, "loss": 0.4401, "step": 39920 }, { "epoch": 0.931855309218203, "grad_norm": 1.9208092143130802, "learning_rate": 1.4088542060927635e-07, "loss": 0.4484, "step": 39930 }, { "epoch": 0.9320886814469078, "grad_norm": 1.6707301369562233, "learning_rate": 1.399269505679357e-07, "loss": 0.4717, "step": 39940 }, { "epoch": 0.9323220536756126, "grad_norm": 1.7604478239906456, "learning_rate": 1.3897170572062357e-07, "loss": 0.4264, "step": 39950 }, { "epoch": 0.9325554259043174, "grad_norm": 1.6977893169945995, "learning_rate": 1.3801968670124898e-07, "loss": 0.4497, "step": 39960 }, { "epoch": 0.9327887981330222, "grad_norm": 1.8442427205993088, "learning_rate": 1.370708941415827e-07, "loss": 0.416, "step": 39970 }, { "epoch": 0.933022170361727, "grad_norm": 1.8444023398256293, "learning_rate": 1.3612532867125218e-07, "loss": 0.4374, "step": 39980 }, { "epoch": 0.9332555425904318, "grad_norm": 1.7816006746013522, "learning_rate": 1.3518299091774545e-07, "loss": 0.4469, "step": 39990 }, { "epoch": 0.9334889148191365, "grad_norm": 1.7825936205848438, "learning_rate": 1.3424388150640565e-07, "loss": 0.4458, "step": 40000 }, { "epoch": 0.9337222870478413, "grad_norm": 1.898175010050762, "learning_rate": 1.3330800106043707e-07, "loss": 0.4198, "step": 40010 }, { "epoch": 0.9339556592765461, "grad_norm": 1.6886665670826126, "learning_rate": 1.3237535020089897e-07, "loss": 0.4006, "step": 40020 }, { "epoch": 0.9341890315052509, "grad_norm": 1.6543070354871032, "learning_rate": 1.3144592954670744e-07, "loss": 0.4292, "step": 40030 }, { "epoch": 0.9344224037339557, "grad_norm": 1.7155468288985214, "learning_rate": 1.3051973971463628e-07, "loss": 0.4369, "step": 40040 }, { "epoch": 0.9346557759626605, "grad_norm": 1.822805610057077, "learning_rate": 1.2959678131931442e-07, "loss": 0.4308, "step": 40050 }, { "epoch": 0.9348891481913653, "grad_norm": 1.4370532999595076, "learning_rate": 1.2867705497322581e-07, "loss": 0.4319, "step": 40060 }, { "epoch": 0.93512252042007, "grad_norm": 1.7414616986783071, "learning_rate": 1.2776056128671054e-07, "loss": 0.4579, "step": 40070 }, { "epoch": 0.9353558926487748, "grad_norm": 1.7156456863265304, "learning_rate": 1.2684730086796326e-07, "loss": 0.4531, "step": 40080 }, { "epoch": 0.9355892648774796, "grad_norm": 1.4342300727584147, "learning_rate": 1.2593727432303305e-07, "loss": 0.4267, "step": 40090 }, { "epoch": 0.9358226371061844, "grad_norm": 1.799498027889698, "learning_rate": 1.2503048225582293e-07, "loss": 0.4655, "step": 40100 }, { "epoch": 0.9360560093348892, "grad_norm": 1.7950229409580083, "learning_rate": 1.2412692526808823e-07, "loss": 0.4485, "step": 40110 }, { "epoch": 0.936289381563594, "grad_norm": 3.634786234195023, "learning_rate": 1.2322660395944042e-07, "loss": 0.431, "step": 40120 }, { "epoch": 0.9365227537922988, "grad_norm": 1.9050631482916458, "learning_rate": 1.2232951892734047e-07, "loss": 0.4402, "step": 40130 }, { "epoch": 0.9367561260210036, "grad_norm": 1.6580861256407204, "learning_rate": 1.2143567076710326e-07, "loss": 0.4301, "step": 40140 }, { "epoch": 0.9369894982497082, "grad_norm": 1.5903475439668093, "learning_rate": 1.2054506007189603e-07, "loss": 0.4309, "step": 40150 }, { "epoch": 0.937222870478413, "grad_norm": 1.7207275560799562, "learning_rate": 1.1965768743273653e-07, "loss": 0.4379, "step": 40160 }, { "epoch": 0.9374562427071178, "grad_norm": 1.903879101298436, "learning_rate": 1.1877355343849539e-07, "loss": 0.4335, "step": 40170 }, { "epoch": 0.9376896149358226, "grad_norm": 1.627553869050655, "learning_rate": 1.1789265867589216e-07, "loss": 0.443, "step": 40180 }, { "epoch": 0.9379229871645274, "grad_norm": 1.7769785691183613, "learning_rate": 1.1701500372949759e-07, "loss": 0.4485, "step": 40190 }, { "epoch": 0.9381563593932322, "grad_norm": 1.7374623433373357, "learning_rate": 1.1614058918173298e-07, "loss": 0.4529, "step": 40200 }, { "epoch": 0.938389731621937, "grad_norm": 1.7644907535265033, "learning_rate": 1.1526941561286809e-07, "loss": 0.4237, "step": 40210 }, { "epoch": 0.9386231038506417, "grad_norm": 1.8810336745632434, "learning_rate": 1.144014836010221e-07, "loss": 0.4154, "step": 40220 }, { "epoch": 0.9388564760793465, "grad_norm": 1.8342647901791898, "learning_rate": 1.1353679372216487e-07, "loss": 0.4145, "step": 40230 }, { "epoch": 0.9390898483080513, "grad_norm": 1.7686431721327458, "learning_rate": 1.1267534655011291e-07, "loss": 0.4246, "step": 40240 }, { "epoch": 0.9393232205367561, "grad_norm": 1.831213007680744, "learning_rate": 1.1181714265653066e-07, "loss": 0.4492, "step": 40250 }, { "epoch": 0.9395565927654609, "grad_norm": 1.6297975075405997, "learning_rate": 1.1096218261093195e-07, "loss": 0.4296, "step": 40260 }, { "epoch": 0.9397899649941657, "grad_norm": 1.6559532545456253, "learning_rate": 1.1011046698067685e-07, "loss": 0.4363, "step": 40270 }, { "epoch": 0.9400233372228705, "grad_norm": 1.9427507737438856, "learning_rate": 1.0926199633097156e-07, "loss": 0.4422, "step": 40280 }, { "epoch": 0.9402567094515752, "grad_norm": 1.8641688445438835, "learning_rate": 1.0841677122487126e-07, "loss": 0.4363, "step": 40290 }, { "epoch": 0.94049008168028, "grad_norm": 2.382673656111318, "learning_rate": 1.0757479222327505e-07, "loss": 0.413, "step": 40300 }, { "epoch": 0.9407234539089848, "grad_norm": 1.6315946027893486, "learning_rate": 1.0673605988492875e-07, "loss": 0.4239, "step": 40310 }, { "epoch": 0.9409568261376896, "grad_norm": 1.6619711012754501, "learning_rate": 1.0590057476642435e-07, "loss": 0.4525, "step": 40320 }, { "epoch": 0.9411901983663944, "grad_norm": 1.5373915678432395, "learning_rate": 1.0506833742219724e-07, "loss": 0.437, "step": 40330 }, { "epoch": 0.9414235705950992, "grad_norm": 1.7920466314131362, "learning_rate": 1.0423934840452954e-07, "loss": 0.4465, "step": 40340 }, { "epoch": 0.941656942823804, "grad_norm": 1.6548917462484185, "learning_rate": 1.0341360826354674e-07, "loss": 0.4458, "step": 40350 }, { "epoch": 0.9418903150525088, "grad_norm": 1.8462578652319257, "learning_rate": 1.0259111754721773e-07, "loss": 0.4346, "step": 40360 }, { "epoch": 0.9421236872812135, "grad_norm": 1.7702673332969114, "learning_rate": 1.0177187680135648e-07, "loss": 0.4083, "step": 40370 }, { "epoch": 0.9423570595099183, "grad_norm": 1.6835585703239837, "learning_rate": 1.0095588656961975e-07, "loss": 0.4392, "step": 40380 }, { "epoch": 0.9425904317386231, "grad_norm": 1.6047150029532695, "learning_rate": 1.001431473935055e-07, "loss": 0.4248, "step": 40390 }, { "epoch": 0.9428238039673279, "grad_norm": 1.7072160271435528, "learning_rate": 9.933365981235731e-08, "loss": 0.4591, "step": 40400 }, { "epoch": 0.9430571761960327, "grad_norm": 1.9037398555130736, "learning_rate": 9.852742436335827e-08, "loss": 0.4605, "step": 40410 }, { "epoch": 0.9432905484247375, "grad_norm": 1.8144811024391172, "learning_rate": 9.772444158153537e-08, "loss": 0.4351, "step": 40420 }, { "epoch": 0.9435239206534423, "grad_norm": 1.3429562726193807, "learning_rate": 9.692471199975573e-08, "loss": 0.4268, "step": 40430 }, { "epoch": 0.943757292882147, "grad_norm": 1.657418592666933, "learning_rate": 9.612823614872757e-08, "loss": 0.427, "step": 40440 }, { "epoch": 0.9439906651108518, "grad_norm": 1.8266807282313537, "learning_rate": 9.533501455700145e-08, "loss": 0.4461, "step": 40450 }, { "epoch": 0.9442240373395566, "grad_norm": 1.7201329994573074, "learning_rate": 9.454504775096628e-08, "loss": 0.4298, "step": 40460 }, { "epoch": 0.9444574095682614, "grad_norm": 1.736396184712989, "learning_rate": 9.37583362548522e-08, "loss": 0.4633, "step": 40470 }, { "epoch": 0.9446907817969662, "grad_norm": 1.8646607325999442, "learning_rate": 9.29748805907299e-08, "loss": 0.4387, "step": 40480 }, { "epoch": 0.944924154025671, "grad_norm": 1.9009656471813825, "learning_rate": 9.219468127850795e-08, "loss": 0.4352, "step": 40490 }, { "epoch": 0.9451575262543758, "grad_norm": 1.8790713812711006, "learning_rate": 9.141773883593441e-08, "loss": 0.4357, "step": 40500 }, { "epoch": 0.9453908984830806, "grad_norm": 1.789081880310555, "learning_rate": 9.06440537785963e-08, "loss": 0.4259, "step": 40510 }, { "epoch": 0.9456242707117853, "grad_norm": 1.6918964187183316, "learning_rate": 8.987362661991961e-08, "loss": 0.4387, "step": 40520 }, { "epoch": 0.9458576429404901, "grad_norm": 1.663820751019136, "learning_rate": 8.91064578711659e-08, "loss": 0.434, "step": 40530 }, { "epoch": 0.9460910151691949, "grad_norm": 1.8303098737371315, "learning_rate": 8.834254804143794e-08, "loss": 0.4385, "step": 40540 }, { "epoch": 0.9463243873978997, "grad_norm": 1.8250345325935031, "learning_rate": 8.758189763767355e-08, "loss": 0.4366, "step": 40550 }, { "epoch": 0.9465577596266045, "grad_norm": 1.54015979823238, "learning_rate": 8.682450716464785e-08, "loss": 0.4232, "step": 40560 }, { "epoch": 0.9467911318553092, "grad_norm": 1.9444687300355512, "learning_rate": 8.607037712497323e-08, "loss": 0.4528, "step": 40570 }, { "epoch": 0.947024504084014, "grad_norm": 1.756634911273042, "learning_rate": 8.531950801909717e-08, "loss": 0.4603, "step": 40580 }, { "epoch": 0.9472578763127187, "grad_norm": 1.6626837141938158, "learning_rate": 8.45719003453055e-08, "loss": 0.4357, "step": 40590 }, { "epoch": 0.9474912485414235, "grad_norm": 1.7395013628445888, "learning_rate": 8.382755459971747e-08, "loss": 0.425, "step": 40600 }, { "epoch": 0.9477246207701283, "grad_norm": 1.753637692819224, "learning_rate": 8.3086471276288e-08, "loss": 0.4571, "step": 40610 }, { "epoch": 0.9479579929988331, "grad_norm": 1.7148276473216892, "learning_rate": 8.234865086680921e-08, "loss": 0.4416, "step": 40620 }, { "epoch": 0.9481913652275379, "grad_norm": 1.583460070892623, "learning_rate": 8.161409386090446e-08, "loss": 0.4199, "step": 40630 }, { "epoch": 0.9484247374562427, "grad_norm": 1.7106703441679312, "learning_rate": 8.088280074603382e-08, "loss": 0.4464, "step": 40640 }, { "epoch": 0.9486581096849475, "grad_norm": 1.6757996437114897, "learning_rate": 8.015477200749133e-08, "loss": 0.418, "step": 40650 }, { "epoch": 0.9488914819136522, "grad_norm": 1.528734197842929, "learning_rate": 7.943000812840385e-08, "loss": 0.4036, "step": 40660 }, { "epoch": 0.949124854142357, "grad_norm": 1.7052942527890353, "learning_rate": 7.87085095897322e-08, "loss": 0.4695, "step": 40670 }, { "epoch": 0.9493582263710618, "grad_norm": 1.8589489863055781, "learning_rate": 7.799027687027006e-08, "loss": 0.4476, "step": 40680 }, { "epoch": 0.9495915985997666, "grad_norm": 1.6295538724596508, "learning_rate": 7.727531044664449e-08, "loss": 0.443, "step": 40690 }, { "epoch": 0.9498249708284714, "grad_norm": 2.0579260351183133, "learning_rate": 7.65636107933132e-08, "loss": 0.4395, "step": 40700 }, { "epoch": 0.9500583430571762, "grad_norm": 1.723455391009297, "learning_rate": 7.585517838256951e-08, "loss": 0.4753, "step": 40710 }, { "epoch": 0.950291715285881, "grad_norm": 1.832614079497975, "learning_rate": 7.5150013684534e-08, "loss": 0.45, "step": 40720 }, { "epoch": 0.9505250875145858, "grad_norm": 1.8500583196916645, "learning_rate": 7.444811716716294e-08, "loss": 0.4286, "step": 40730 }, { "epoch": 0.9507584597432905, "grad_norm": 1.7154618555573862, "learning_rate": 7.374948929624093e-08, "loss": 0.4023, "step": 40740 }, { "epoch": 0.9509918319719953, "grad_norm": 1.6160508046011712, "learning_rate": 7.30541305353849e-08, "loss": 0.4319, "step": 40750 }, { "epoch": 0.9512252042007001, "grad_norm": 1.8759465852317085, "learning_rate": 7.23620413460424e-08, "loss": 0.427, "step": 40760 }, { "epoch": 0.9514585764294049, "grad_norm": 1.516382397892737, "learning_rate": 7.167322218749051e-08, "loss": 0.4626, "step": 40770 }, { "epoch": 0.9516919486581097, "grad_norm": 1.546043403490193, "learning_rate": 7.098767351683634e-08, "loss": 0.4255, "step": 40780 }, { "epoch": 0.9519253208868145, "grad_norm": 1.5667285017413695, "learning_rate": 7.030539578901819e-08, "loss": 0.4177, "step": 40790 }, { "epoch": 0.9521586931155193, "grad_norm": 1.72305206382575, "learning_rate": 6.962638945680111e-08, "loss": 0.4024, "step": 40800 }, { "epoch": 0.952392065344224, "grad_norm": 1.3851312732599574, "learning_rate": 6.895065497078135e-08, "loss": 0.4044, "step": 40810 }, { "epoch": 0.9526254375729288, "grad_norm": 1.6547745385851855, "learning_rate": 6.827819277938242e-08, "loss": 0.4474, "step": 40820 }, { "epoch": 0.9528588098016336, "grad_norm": 1.4834921133091852, "learning_rate": 6.760900332885789e-08, "loss": 0.4279, "step": 40830 }, { "epoch": 0.9530921820303384, "grad_norm": 1.5972972450890226, "learning_rate": 6.694308706328866e-08, "loss": 0.4646, "step": 40840 }, { "epoch": 0.9533255542590432, "grad_norm": 1.4764230577045478, "learning_rate": 6.628044442458347e-08, "loss": 0.4157, "step": 40850 }, { "epoch": 0.953558926487748, "grad_norm": 1.798699203028504, "learning_rate": 6.562107585247835e-08, "loss": 0.4101, "step": 40860 }, { "epoch": 0.9537922987164528, "grad_norm": 1.8162952101127614, "learning_rate": 6.496498178453769e-08, "loss": 0.4776, "step": 40870 }, { "epoch": 0.9540256709451576, "grad_norm": 1.6681085760398933, "learning_rate": 6.431216265615215e-08, "loss": 0.4209, "step": 40880 }, { "epoch": 0.9542590431738623, "grad_norm": 2.158315071208241, "learning_rate": 6.366261890053794e-08, "loss": 0.4509, "step": 40890 }, { "epoch": 0.9544924154025671, "grad_norm": 1.879764040384886, "learning_rate": 6.301635094874026e-08, "loss": 0.4383, "step": 40900 }, { "epoch": 0.9547257876312719, "grad_norm": 1.6403995974581451, "learning_rate": 6.237335922962884e-08, "loss": 0.4104, "step": 40910 }, { "epoch": 0.9549591598599767, "grad_norm": 1.7641840635472463, "learning_rate": 6.173364416989847e-08, "loss": 0.4596, "step": 40920 }, { "epoch": 0.9551925320886815, "grad_norm": 1.6078596793336954, "learning_rate": 6.109720619407234e-08, "loss": 0.4423, "step": 40930 }, { "epoch": 0.9554259043173863, "grad_norm": 1.7230743010668585, "learning_rate": 6.046404572449593e-08, "loss": 0.4372, "step": 40940 }, { "epoch": 0.9556592765460911, "grad_norm": 1.729407970621475, "learning_rate": 5.98341631813415e-08, "loss": 0.4649, "step": 40950 }, { "epoch": 0.9558926487747959, "grad_norm": 1.9845692041092586, "learning_rate": 5.920755898260577e-08, "loss": 0.4561, "step": 40960 }, { "epoch": 0.9561260210035006, "grad_norm": 1.6256125161380734, "learning_rate": 5.8584233544108914e-08, "loss": 0.4446, "step": 40970 }, { "epoch": 0.9563593932322053, "grad_norm": 1.5556486069249411, "learning_rate": 5.7964187279496155e-08, "loss": 0.4347, "step": 40980 }, { "epoch": 0.9565927654609101, "grad_norm": 1.9483527223215955, "learning_rate": 5.734742060023724e-08, "loss": 0.4369, "step": 40990 }, { "epoch": 0.9568261376896149, "grad_norm": 1.6175365890492388, "learning_rate": 5.6733933915624205e-08, "loss": 0.4476, "step": 41000 }, { "epoch": 0.9570595099183197, "grad_norm": 1.7372534006055511, "learning_rate": 5.612372763277363e-08, "loss": 0.4159, "step": 41010 }, { "epoch": 0.9572928821470245, "grad_norm": 1.8853085374354386, "learning_rate": 5.5516802156623807e-08, "loss": 0.4259, "step": 41020 }, { "epoch": 0.9575262543757292, "grad_norm": 1.7870772499545822, "learning_rate": 5.491315788993701e-08, "loss": 0.4602, "step": 41030 }, { "epoch": 0.957759626604434, "grad_norm": 1.818165822024644, "learning_rate": 5.431279523329835e-08, "loss": 0.4625, "step": 41040 }, { "epoch": 0.9579929988331388, "grad_norm": 2.117901834659644, "learning_rate": 5.3715714585113596e-08, "loss": 0.4535, "step": 41050 }, { "epoch": 0.9582263710618436, "grad_norm": 1.5504708400546008, "learning_rate": 5.31219163416119e-08, "loss": 0.4389, "step": 41060 }, { "epoch": 0.9584597432905484, "grad_norm": 1.7800405876818217, "learning_rate": 5.253140089684472e-08, "loss": 0.4501, "step": 41070 }, { "epoch": 0.9586931155192532, "grad_norm": 1.704282410532937, "learning_rate": 5.194416864268303e-08, "loss": 0.4456, "step": 41080 }, { "epoch": 0.958926487747958, "grad_norm": 2.0399996530321625, "learning_rate": 5.1360219968820124e-08, "loss": 0.4452, "step": 41090 }, { "epoch": 0.9591598599766628, "grad_norm": 1.6006752174004661, "learning_rate": 5.077955526277101e-08, "loss": 0.4175, "step": 41100 }, { "epoch": 0.9593932322053675, "grad_norm": 1.7235753532982374, "learning_rate": 5.020217490987023e-08, "loss": 0.4587, "step": 41110 }, { "epoch": 0.9596266044340723, "grad_norm": 1.9076434303248455, "learning_rate": 4.962807929327351e-08, "loss": 0.4554, "step": 41120 }, { "epoch": 0.9598599766627771, "grad_norm": 1.9400402348161052, "learning_rate": 4.905726879395667e-08, "loss": 0.4688, "step": 41130 }, { "epoch": 0.9600933488914819, "grad_norm": 1.6322549263469124, "learning_rate": 4.8489743790714475e-08, "loss": 0.4527, "step": 41140 }, { "epoch": 0.9603267211201867, "grad_norm": 1.8335024224468535, "learning_rate": 4.792550466016343e-08, "loss": 0.4451, "step": 41150 }, { "epoch": 0.9605600933488915, "grad_norm": 1.5963657717254907, "learning_rate": 4.7364551776737935e-08, "loss": 0.4355, "step": 41160 }, { "epoch": 0.9607934655775963, "grad_norm": 1.7230423745408912, "learning_rate": 4.680688551269186e-08, "loss": 0.3946, "step": 41170 }, { "epoch": 0.961026837806301, "grad_norm": 1.3939664818236406, "learning_rate": 4.6252506238098624e-08, "loss": 0.4534, "step": 41180 }, { "epoch": 0.9612602100350058, "grad_norm": 1.6937683666544048, "learning_rate": 4.5701414320849516e-08, "loss": 0.4271, "step": 41190 }, { "epoch": 0.9614935822637106, "grad_norm": 1.9148375533182935, "learning_rate": 4.515361012665531e-08, "loss": 0.4547, "step": 41200 }, { "epoch": 0.9617269544924154, "grad_norm": 2.527322015980741, "learning_rate": 4.4609094019044116e-08, "loss": 0.4349, "step": 41210 }, { "epoch": 0.9619603267211202, "grad_norm": 1.8737979630007724, "learning_rate": 4.4067866359361885e-08, "loss": 0.4579, "step": 41220 }, { "epoch": 0.962193698949825, "grad_norm": 1.6738356926833953, "learning_rate": 4.352992750677354e-08, "loss": 0.4589, "step": 41230 }, { "epoch": 0.9624270711785298, "grad_norm": 1.7156208182050297, "learning_rate": 4.299527781826074e-08, "loss": 0.445, "step": 41240 }, { "epoch": 0.9626604434072346, "grad_norm": 1.7246946648733905, "learning_rate": 4.24639176486219e-08, "loss": 0.4332, "step": 41250 }, { "epoch": 0.9628938156359393, "grad_norm": 1.7137300757192488, "learning_rate": 4.1935847350473845e-08, "loss": 0.4398, "step": 41260 }, { "epoch": 0.9631271878646441, "grad_norm": 1.7347144541563417, "learning_rate": 4.1411067274249036e-08, "loss": 0.4355, "step": 41270 }, { "epoch": 0.9633605600933489, "grad_norm": 1.5017422838091363, "learning_rate": 4.088957776819613e-08, "loss": 0.4145, "step": 41280 }, { "epoch": 0.9635939323220537, "grad_norm": 1.7451064818909896, "learning_rate": 4.0371379178382184e-08, "loss": 0.434, "step": 41290 }, { "epoch": 0.9638273045507585, "grad_norm": 1.6800084101586568, "learning_rate": 3.985647184868824e-08, "loss": 0.4552, "step": 41300 }, { "epoch": 0.9640606767794633, "grad_norm": 1.6624210597435725, "learning_rate": 3.9344856120812627e-08, "loss": 0.4256, "step": 41310 }, { "epoch": 0.9642940490081681, "grad_norm": 1.9121380105891, "learning_rate": 3.883653233426821e-08, "loss": 0.4135, "step": 41320 }, { "epoch": 0.9645274212368729, "grad_norm": 1.7442479228753, "learning_rate": 3.833150082638348e-08, "loss": 0.438, "step": 41330 }, { "epoch": 0.9647607934655776, "grad_norm": 1.5736828997896968, "learning_rate": 3.7829761932303696e-08, "loss": 0.3961, "step": 41340 }, { "epoch": 0.9649941656942824, "grad_norm": 1.8553935959961063, "learning_rate": 3.733131598498751e-08, "loss": 0.4265, "step": 41350 }, { "epoch": 0.9652275379229872, "grad_norm": 1.7663148404956657, "learning_rate": 3.6836163315207564e-08, "loss": 0.4398, "step": 41360 }, { "epoch": 0.965460910151692, "grad_norm": 1.6954104134744692, "learning_rate": 3.634430425155377e-08, "loss": 0.4482, "step": 41370 }, { "epoch": 0.9656942823803968, "grad_norm": 1.7463614149950073, "learning_rate": 3.585573912042839e-08, "loss": 0.4444, "step": 41380 }, { "epoch": 0.9659276546091016, "grad_norm": 1.9337476841870078, "learning_rate": 3.5370468246047615e-08, "loss": 0.4651, "step": 41390 }, { "epoch": 0.9661610268378062, "grad_norm": 1.7006282014834497, "learning_rate": 3.488849195044275e-08, "loss": 0.4491, "step": 41400 }, { "epoch": 0.966394399066511, "grad_norm": 1.8906672199494414, "learning_rate": 3.440981055345793e-08, "loss": 0.4449, "step": 41410 }, { "epoch": 0.9666277712952158, "grad_norm": 1.8683672382795289, "learning_rate": 3.393442437275074e-08, "loss": 0.4501, "step": 41420 }, { "epoch": 0.9668611435239206, "grad_norm": 1.5883918772995078, "learning_rate": 3.346233372379326e-08, "loss": 0.4391, "step": 41430 }, { "epoch": 0.9670945157526254, "grad_norm": 1.8640445331366162, "learning_rate": 3.2993538919868783e-08, "loss": 0.41, "step": 41440 }, { "epoch": 0.9673278879813302, "grad_norm": 1.5779191700682764, "learning_rate": 3.2528040272074015e-08, "loss": 0.428, "step": 41450 }, { "epoch": 0.967561260210035, "grad_norm": 1.4282855985511589, "learning_rate": 3.206583808931962e-08, "loss": 0.412, "step": 41460 }, { "epoch": 0.9677946324387398, "grad_norm": 1.8323684908022138, "learning_rate": 3.160693267832749e-08, "loss": 0.4092, "step": 41470 }, { "epoch": 0.9680280046674445, "grad_norm": 1.6882892741331985, "learning_rate": 3.115132434363122e-08, "loss": 0.4383, "step": 41480 }, { "epoch": 0.9682613768961493, "grad_norm": 2.098817248345456, "learning_rate": 3.0699013387577306e-08, "loss": 0.4272, "step": 41490 }, { "epoch": 0.9684947491248541, "grad_norm": 1.5781714195277068, "learning_rate": 3.025000011032453e-08, "loss": 0.438, "step": 41500 }, { "epoch": 0.9687281213535589, "grad_norm": 1.8631133950954122, "learning_rate": 2.980428480984232e-08, "loss": 0.4378, "step": 41510 }, { "epoch": 0.9689614935822637, "grad_norm": 1.7355841117054887, "learning_rate": 2.936186778191241e-08, "loss": 0.436, "step": 41520 }, { "epoch": 0.9691948658109685, "grad_norm": 1.5845238259445866, "learning_rate": 2.8922749320126063e-08, "loss": 0.4399, "step": 41530 }, { "epoch": 0.9694282380396733, "grad_norm": 1.549485766664584, "learning_rate": 2.84869297158874e-08, "loss": 0.4173, "step": 41540 }, { "epoch": 0.969661610268378, "grad_norm": 1.7944006869718652, "learning_rate": 2.8054409258411742e-08, "loss": 0.4564, "step": 41550 }, { "epoch": 0.9698949824970828, "grad_norm": 1.565769682110497, "learning_rate": 2.7625188234722267e-08, "loss": 0.435, "step": 41560 }, { "epoch": 0.9701283547257876, "grad_norm": 1.5938941232305037, "learning_rate": 2.7199266929655578e-08, "loss": 0.4468, "step": 41570 }, { "epoch": 0.9703617269544924, "grad_norm": 1.5804119800928744, "learning_rate": 2.6776645625856688e-08, "loss": 0.4558, "step": 41580 }, { "epoch": 0.9705950991831972, "grad_norm": 2.253632414502836, "learning_rate": 2.6357324603782376e-08, "loss": 0.4134, "step": 41590 }, { "epoch": 0.970828471411902, "grad_norm": 1.9196523049143601, "learning_rate": 2.5941304141697265e-08, "loss": 0.4348, "step": 41600 }, { "epoch": 0.9710618436406068, "grad_norm": 2.0301717163257806, "learning_rate": 2.5528584515677744e-08, "loss": 0.4659, "step": 41610 }, { "epoch": 0.9712952158693116, "grad_norm": 1.6216301459703064, "learning_rate": 2.5119165999608063e-08, "loss": 0.4372, "step": 41620 }, { "epoch": 0.9715285880980163, "grad_norm": 2.1036701398800393, "learning_rate": 2.471304886518311e-08, "loss": 0.4468, "step": 41630 }, { "epoch": 0.9717619603267211, "grad_norm": 1.7393211998533766, "learning_rate": 2.4310233381905635e-08, "loss": 0.4328, "step": 41640 }, { "epoch": 0.9719953325554259, "grad_norm": 1.6144847113708973, "learning_rate": 2.3910719817088478e-08, "loss": 0.4429, "step": 41650 }, { "epoch": 0.9722287047841307, "grad_norm": 1.5899829483979102, "learning_rate": 2.3514508435853457e-08, "loss": 0.4474, "step": 41660 }, { "epoch": 0.9724620770128355, "grad_norm": 1.8405855119992147, "learning_rate": 2.3121599501129687e-08, "loss": 0.447, "step": 41670 }, { "epoch": 0.9726954492415403, "grad_norm": 1.602313748517096, "learning_rate": 2.2731993273655827e-08, "loss": 0.4154, "step": 41680 }, { "epoch": 0.9729288214702451, "grad_norm": 1.6953923400953421, "learning_rate": 2.2345690011978393e-08, "loss": 0.4261, "step": 41690 }, { "epoch": 0.9731621936989499, "grad_norm": 1.5345303465695674, "learning_rate": 2.196268997245232e-08, "loss": 0.4433, "step": 41700 }, { "epoch": 0.9733955659276546, "grad_norm": 1.5432060089565625, "learning_rate": 2.1582993409240415e-08, "loss": 0.4197, "step": 41710 }, { "epoch": 0.9736289381563594, "grad_norm": 2.0401628246383368, "learning_rate": 2.1206600574312787e-08, "loss": 0.4626, "step": 41720 }, { "epoch": 0.9738623103850642, "grad_norm": 1.6390849241741194, "learning_rate": 2.083351171744852e-08, "loss": 0.4411, "step": 41730 }, { "epoch": 0.974095682613769, "grad_norm": 1.6835016917932033, "learning_rate": 2.0463727086232344e-08, "loss": 0.4344, "step": 41740 }, { "epoch": 0.9743290548424738, "grad_norm": 1.8250673850100043, "learning_rate": 2.0097246926056857e-08, "loss": 0.4281, "step": 41750 }, { "epoch": 0.9745624270711786, "grad_norm": 1.5458669987469418, "learning_rate": 1.973407148012252e-08, "loss": 0.4307, "step": 41760 }, { "epoch": 0.9747957992998834, "grad_norm": 1.4924432957679405, "learning_rate": 1.9374200989436543e-08, "loss": 0.4265, "step": 41770 }, { "epoch": 0.9750291715285881, "grad_norm": 1.6058232852482204, "learning_rate": 1.9017635692812343e-08, "loss": 0.4175, "step": 41780 }, { "epoch": 0.9752625437572929, "grad_norm": 1.7270079239964864, "learning_rate": 1.8664375826870084e-08, "loss": 0.437, "step": 41790 }, { "epoch": 0.9754959159859977, "grad_norm": 1.5775344867212235, "learning_rate": 1.831442162603725e-08, "loss": 0.4528, "step": 41800 }, { "epoch": 0.9757292882147024, "grad_norm": 1.6624776822286462, "learning_rate": 1.7967773322545845e-08, "loss": 0.4111, "step": 41810 }, { "epoch": 0.9759626604434072, "grad_norm": 1.8936029882307792, "learning_rate": 1.7624431146436305e-08, "loss": 0.4472, "step": 41820 }, { "epoch": 0.976196032672112, "grad_norm": 1.7098157462737067, "learning_rate": 1.7284395325553594e-08, "loss": 0.4139, "step": 41830 }, { "epoch": 0.9764294049008168, "grad_norm": 1.7221943395691346, "learning_rate": 1.6947666085548875e-08, "loss": 0.4179, "step": 41840 }, { "epoch": 0.9766627771295215, "grad_norm": 1.8432063415090922, "learning_rate": 1.6614243649879514e-08, "loss": 0.4576, "step": 41850 }, { "epoch": 0.9768961493582263, "grad_norm": 1.838697485480748, "learning_rate": 1.628412823980796e-08, "loss": 0.4459, "step": 41860 }, { "epoch": 0.9771295215869311, "grad_norm": 2.713042720234328, "learning_rate": 1.5957320074401205e-08, "loss": 0.4634, "step": 41870 }, { "epoch": 0.9773628938156359, "grad_norm": 1.6648816838499878, "learning_rate": 1.5633819370533542e-08, "loss": 0.4295, "step": 41880 }, { "epoch": 0.9775962660443407, "grad_norm": 1.6934466321821018, "learning_rate": 1.531362634288325e-08, "loss": 0.4548, "step": 41890 }, { "epoch": 0.9778296382730455, "grad_norm": 1.6617429592464785, "learning_rate": 1.499674120393313e-08, "loss": 0.4196, "step": 41900 }, { "epoch": 0.9780630105017503, "grad_norm": 1.7788159396652081, "learning_rate": 1.4683164163972197e-08, "loss": 0.4247, "step": 41910 }, { "epoch": 0.978296382730455, "grad_norm": 1.4602833229868002, "learning_rate": 1.4372895431092327e-08, "loss": 0.4445, "step": 41920 }, { "epoch": 0.9785297549591598, "grad_norm": 1.9043778389880128, "learning_rate": 1.406593521119215e-08, "loss": 0.4808, "step": 41930 }, { "epoch": 0.9787631271878646, "grad_norm": 1.7350683534182396, "learning_rate": 1.3762283707972612e-08, "loss": 0.4355, "step": 41940 }, { "epoch": 0.9789964994165694, "grad_norm": 1.5792883295177211, "learning_rate": 1.3461941122940304e-08, "loss": 0.4445, "step": 41950 }, { "epoch": 0.9792298716452742, "grad_norm": 1.637005404254888, "learning_rate": 1.316490765540579e-08, "loss": 0.4591, "step": 41960 }, { "epoch": 0.979463243873979, "grad_norm": 2.15723310628949, "learning_rate": 1.287118350248362e-08, "loss": 0.4556, "step": 41970 }, { "epoch": 0.9796966161026838, "grad_norm": 1.9227431480539283, "learning_rate": 1.2580768859091762e-08, "loss": 0.4253, "step": 41980 }, { "epoch": 0.9799299883313886, "grad_norm": 2.3167079654609894, "learning_rate": 1.229366391795217e-08, "loss": 0.4359, "step": 41990 }, { "epoch": 0.9801633605600933, "grad_norm": 1.6988781561177693, "learning_rate": 1.2009868869591323e-08, "loss": 0.4491, "step": 42000 }, { "epoch": 0.9803967327887981, "grad_norm": 1.70670255143934, "learning_rate": 1.1729383902338021e-08, "loss": 0.4226, "step": 42010 }, { "epoch": 0.9806301050175029, "grad_norm": 1.7616394645481317, "learning_rate": 1.1452209202325037e-08, "loss": 0.4396, "step": 42020 }, { "epoch": 0.9808634772462077, "grad_norm": 1.6771570085444993, "learning_rate": 1.1178344953488018e-08, "loss": 0.4463, "step": 42030 }, { "epoch": 0.9810968494749125, "grad_norm": 1.4934374551645169, "learning_rate": 1.0907791337567142e-08, "loss": 0.4448, "step": 42040 }, { "epoch": 0.9813302217036173, "grad_norm": 1.6328624372060896, "learning_rate": 1.0640548534103235e-08, "loss": 0.4198, "step": 42050 }, { "epoch": 0.9815635939323221, "grad_norm": 1.643237432225707, "learning_rate": 1.0376616720441657e-08, "loss": 0.4514, "step": 42060 }, { "epoch": 0.9817969661610269, "grad_norm": 1.7206736436553114, "learning_rate": 1.0115996071731193e-08, "loss": 0.4493, "step": 42070 }, { "epoch": 0.9820303383897316, "grad_norm": 1.668703479573972, "learning_rate": 9.858686760921277e-09, "loss": 0.4385, "step": 42080 }, { "epoch": 0.9822637106184364, "grad_norm": 1.7312469531777155, "learning_rate": 9.604688958765319e-09, "loss": 0.4111, "step": 42090 }, { "epoch": 0.9824970828471412, "grad_norm": 1.6687348368978956, "learning_rate": 9.354002833819043e-09, "loss": 0.4533, "step": 42100 }, { "epoch": 0.982730455075846, "grad_norm": 1.8722426746974252, "learning_rate": 9.106628552440489e-09, "loss": 0.4296, "step": 42110 }, { "epoch": 0.9829638273045508, "grad_norm": 2.051553685475174, "learning_rate": 8.862566278788897e-09, "loss": 0.4533, "step": 42120 }, { "epoch": 0.9831971995332556, "grad_norm": 1.6588080194129007, "learning_rate": 8.621816174827491e-09, "loss": 0.432, "step": 42130 }, { "epoch": 0.9834305717619604, "grad_norm": 1.815538459846712, "learning_rate": 8.384378400319582e-09, "loss": 0.4184, "step": 42140 }, { "epoch": 0.9836639439906651, "grad_norm": 1.60180748504032, "learning_rate": 8.150253112831907e-09, "loss": 0.4087, "step": 42150 }, { "epoch": 0.9838973162193699, "grad_norm": 1.796076505036259, "learning_rate": 7.919440467732408e-09, "loss": 0.4356, "step": 42160 }, { "epoch": 0.9841306884480747, "grad_norm": 1.9644878315895935, "learning_rate": 7.69194061819023e-09, "loss": 0.442, "step": 42170 }, { "epoch": 0.9843640606767795, "grad_norm": 1.7254099632574504, "learning_rate": 7.467753715177383e-09, "loss": 0.4405, "step": 42180 }, { "epoch": 0.9845974329054843, "grad_norm": 1.9325096022715356, "learning_rate": 7.246879907465976e-09, "loss": 0.4123, "step": 42190 }, { "epoch": 0.9848308051341891, "grad_norm": 1.666863377499646, "learning_rate": 7.02931934162987e-09, "loss": 0.4665, "step": 42200 }, { "epoch": 0.9850641773628939, "grad_norm": 1.9329948794056537, "learning_rate": 6.8150721620452444e-09, "loss": 0.4376, "step": 42210 }, { "epoch": 0.9852975495915987, "grad_norm": 2.441276769878383, "learning_rate": 6.60413851088837e-09, "loss": 0.417, "step": 42220 }, { "epoch": 0.9855309218203033, "grad_norm": 1.4716678659894018, "learning_rate": 6.3965185281367194e-09, "loss": 0.4211, "step": 42230 }, { "epoch": 0.9857642940490081, "grad_norm": 1.3368366520894355, "learning_rate": 6.192212351568971e-09, "loss": 0.433, "step": 42240 }, { "epoch": 0.9859976662777129, "grad_norm": 1.7468136410356268, "learning_rate": 5.991220116765562e-09, "loss": 0.4363, "step": 42250 }, { "epoch": 0.9862310385064177, "grad_norm": 1.8124310508413717, "learning_rate": 5.793541957105908e-09, "loss": 0.4335, "step": 42260 }, { "epoch": 0.9864644107351225, "grad_norm": 1.838978358895405, "learning_rate": 5.599178003772298e-09, "loss": 0.4499, "step": 42270 }, { "epoch": 0.9866977829638273, "grad_norm": 1.5035887820046243, "learning_rate": 5.408128385746003e-09, "loss": 0.4414, "step": 42280 }, { "epoch": 0.986931155192532, "grad_norm": 1.4433114682452988, "learning_rate": 5.220393229809495e-09, "loss": 0.4324, "step": 42290 }, { "epoch": 0.9871645274212368, "grad_norm": 1.5962029694761144, "learning_rate": 5.035972660545896e-09, "loss": 0.4269, "step": 42300 }, { "epoch": 0.9873978996499416, "grad_norm": 1.5881342197760253, "learning_rate": 4.854866800338975e-09, "loss": 0.4327, "step": 42310 }, { "epoch": 0.9876312718786464, "grad_norm": 1.7211166843172943, "learning_rate": 4.677075769371486e-09, "loss": 0.4514, "step": 42320 }, { "epoch": 0.9878646441073512, "grad_norm": 1.481107147290988, "learning_rate": 4.502599685628495e-09, "loss": 0.3913, "step": 42330 }, { "epoch": 0.988098016336056, "grad_norm": 1.9010663490716844, "learning_rate": 4.3314386648929395e-09, "loss": 0.4539, "step": 42340 }, { "epoch": 0.9883313885647608, "grad_norm": 3.1629519055092206, "learning_rate": 4.163592820750073e-09, "loss": 0.4407, "step": 42350 }, { "epoch": 0.9885647607934656, "grad_norm": 1.7508247883569457, "learning_rate": 3.9990622645830204e-09, "loss": 0.4615, "step": 42360 }, { "epoch": 0.9887981330221703, "grad_norm": 1.7684740504983998, "learning_rate": 3.837847105577219e-09, "loss": 0.4196, "step": 42370 }, { "epoch": 0.9890315052508751, "grad_norm": 1.553141078719589, "learning_rate": 3.6799474507154263e-09, "loss": 0.4237, "step": 42380 }, { "epoch": 0.9892648774795799, "grad_norm": 1.5255501700095377, "learning_rate": 3.5253634047821563e-09, "loss": 0.4378, "step": 42390 }, { "epoch": 0.9894982497082847, "grad_norm": 1.7390061999327318, "learning_rate": 3.3740950703614605e-09, "loss": 0.4508, "step": 42400 }, { "epoch": 0.9897316219369895, "grad_norm": 1.9279642834920074, "learning_rate": 3.226142547835265e-09, "loss": 0.4589, "step": 42410 }, { "epoch": 0.9899649941656943, "grad_norm": 1.6236704924039207, "learning_rate": 3.0815059353872523e-09, "loss": 0.4462, "step": 42420 }, { "epoch": 0.9901983663943991, "grad_norm": 1.8498520154555076, "learning_rate": 2.940185328999534e-09, "loss": 0.4266, "step": 42430 }, { "epoch": 0.9904317386231039, "grad_norm": 1.7280203052209084, "learning_rate": 2.8021808224543146e-09, "loss": 0.4373, "step": 42440 }, { "epoch": 0.9906651108518086, "grad_norm": 1.4859383709012686, "learning_rate": 2.6674925073322256e-09, "loss": 0.4341, "step": 42450 }, { "epoch": 0.9908984830805134, "grad_norm": 1.5619429699229905, "learning_rate": 2.536120473013437e-09, "loss": 0.4412, "step": 42460 }, { "epoch": 0.9911318553092182, "grad_norm": 1.7458295483054675, "learning_rate": 2.408064806678767e-09, "loss": 0.4447, "step": 42470 }, { "epoch": 0.991365227537923, "grad_norm": 1.91960979042732, "learning_rate": 2.2833255933069066e-09, "loss": 0.4524, "step": 42480 }, { "epoch": 0.9915985997666278, "grad_norm": 1.5766079797054027, "learning_rate": 2.161902915676084e-09, "loss": 0.449, "step": 42490 }, { "epoch": 0.9918319719953326, "grad_norm": 1.8813381314303723, "learning_rate": 2.043796854362956e-09, "loss": 0.4398, "step": 42500 }, { "epoch": 0.9920653442240374, "grad_norm": 4.239783366813281, "learning_rate": 1.9290074877448272e-09, "loss": 0.4136, "step": 42510 }, { "epoch": 0.9922987164527421, "grad_norm": 1.7653172625628724, "learning_rate": 1.8175348919968749e-09, "loss": 0.4476, "step": 42520 }, { "epoch": 0.9925320886814469, "grad_norm": 1.80484121885195, "learning_rate": 1.7093791410932592e-09, "loss": 0.434, "step": 42530 }, { "epoch": 0.9927654609101517, "grad_norm": 1.2764848830001894, "learning_rate": 1.604540306807123e-09, "loss": 0.4426, "step": 42540 }, { "epoch": 0.9929988331388565, "grad_norm": 1.6335345366674872, "learning_rate": 1.5030184587105923e-09, "loss": 0.4228, "step": 42550 }, { "epoch": 0.9932322053675613, "grad_norm": 1.5999630437032129, "learning_rate": 1.4048136641747756e-09, "loss": 0.4382, "step": 42560 }, { "epoch": 0.9934655775962661, "grad_norm": 2.047431711881157, "learning_rate": 1.3099259883697645e-09, "loss": 0.4537, "step": 42570 }, { "epoch": 0.9936989498249709, "grad_norm": 1.510161970480389, "learning_rate": 1.2183554942635235e-09, "loss": 0.4066, "step": 42580 }, { "epoch": 0.9939323220536757, "grad_norm": 2.409307328614469, "learning_rate": 1.1301022426229991e-09, "loss": 0.4361, "step": 42590 }, { "epoch": 0.9941656942823804, "grad_norm": 1.5768018645583348, "learning_rate": 1.0451662920141216e-09, "loss": 0.4374, "step": 42600 }, { "epoch": 0.9943990665110852, "grad_norm": 1.6916077664274163, "learning_rate": 9.635476988018034e-10, "loss": 0.429, "step": 42610 }, { "epoch": 0.99463243873979, "grad_norm": 2.218234140527449, "learning_rate": 8.852465171477198e-10, "loss": 0.4569, "step": 42620 }, { "epoch": 0.9948658109684948, "grad_norm": 1.5122967757012156, "learning_rate": 8.102627990147493e-10, "loss": 0.4312, "step": 42630 }, { "epoch": 0.9950991831971996, "grad_norm": 1.8675798150776972, "learning_rate": 7.385965941619777e-10, "loss": 0.436, "step": 42640 }, { "epoch": 0.9953325554259043, "grad_norm": 1.4756371225314695, "learning_rate": 6.702479501480285e-10, "loss": 0.4271, "step": 42650 }, { "epoch": 0.995565927654609, "grad_norm": 1.5765591625171624, "learning_rate": 6.052169123293983e-10, "loss": 0.4517, "step": 42660 }, { "epoch": 0.9957992998833138, "grad_norm": 1.8252037057239765, "learning_rate": 5.435035238621212e-10, "loss": 0.425, "step": 42670 }, { "epoch": 0.9960326721120186, "grad_norm": 1.698067741910492, "learning_rate": 4.851078256995489e-10, "loss": 0.4283, "step": 42680 }, { "epoch": 0.9962660443407234, "grad_norm": 2.0623536222122745, "learning_rate": 4.300298565934613e-10, "loss": 0.4316, "step": 42690 }, { "epoch": 0.9964994165694282, "grad_norm": 1.7103059452233673, "learning_rate": 3.782696530940655e-10, "loss": 0.4238, "step": 42700 }, { "epoch": 0.996732788798133, "grad_norm": 1.8083164206545608, "learning_rate": 3.2982724955055166e-10, "loss": 0.4446, "step": 42710 }, { "epoch": 0.9969661610268378, "grad_norm": 1.5456038202085278, "learning_rate": 2.8470267810887244e-10, "loss": 0.4329, "step": 42720 }, { "epoch": 0.9971995332555426, "grad_norm": 1.8128115964678453, "learning_rate": 2.4289596871507335e-10, "loss": 0.4269, "step": 42730 }, { "epoch": 0.9974329054842473, "grad_norm": 1.7780522789026876, "learning_rate": 2.044071491119626e-10, "loss": 0.4503, "step": 42740 }, { "epoch": 0.9976662777129521, "grad_norm": 1.6159566772243383, "learning_rate": 1.6923624484133094e-10, "loss": 0.4359, "step": 42750 }, { "epoch": 0.9978996499416569, "grad_norm": 1.9882260059825276, "learning_rate": 1.373832792422869e-10, "loss": 0.4494, "step": 42760 }, { "epoch": 0.9981330221703617, "grad_norm": 1.5690354303297414, "learning_rate": 1.0884827345403192e-10, "loss": 0.4515, "step": 42770 }, { "epoch": 0.9983663943990665, "grad_norm": 1.7957591019103287, "learning_rate": 8.363124641197484e-11, "loss": 0.4176, "step": 42780 }, { "epoch": 0.9985997666277713, "grad_norm": 1.905704962289021, "learning_rate": 6.173221484995217e-11, "loss": 0.4295, "step": 42790 }, { "epoch": 0.9988331388564761, "grad_norm": 2.274673295517274, "learning_rate": 4.315119330133843e-11, "loss": 0.4352, "step": 42800 }, { "epoch": 0.9990665110851809, "grad_norm": 1.8432841251710659, "learning_rate": 2.7888194096270528e-11, "loss": 0.4479, "step": 42810 }, { "epoch": 0.9992998833138856, "grad_norm": 1.9433016629247757, "learning_rate": 1.5943227363313106e-11, "loss": 0.4527, "step": 42820 }, { "epoch": 0.9995332555425904, "grad_norm": 1.8384669007411703, "learning_rate": 7.31630102890346e-12, "loss": 0.4212, "step": 42830 }, { "epoch": 0.9997666277712952, "grad_norm": 1.5108613292519795, "learning_rate": 2.0074208190168366e-12, "loss": 0.4495, "step": 42840 }, { "epoch": 1.0, "grad_norm": 2.0685159530459276, "learning_rate": 1.659025583578e-14, "loss": 0.4416, "step": 42850 }, { "epoch": 1.0, "step": 42850, "total_flos": 4464185935790080.0, "train_loss": 0.4821569056661154, "train_runtime": 141866.3462, "train_samples_per_second": 2.416, "train_steps_per_second": 0.302 } ], "logging_steps": 10, "max_steps": 42850, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4464185935790080.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }