{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 33556, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011920371915603766, "grad_norm": 121.50506591796875, "learning_rate": 5.952380952380952e-07, "loss": 16.0295, "step": 2 }, { "epoch": 0.00023840743831207532, "grad_norm": 110.85853576660156, "learning_rate": 1.1904761904761904e-06, "loss": 13.6977, "step": 4 }, { "epoch": 0.000357611157468113, "grad_norm": 123.97708129882812, "learning_rate": 1.7857142857142857e-06, "loss": 13.7236, "step": 6 }, { "epoch": 0.00047681487662415065, "grad_norm": 109.41820526123047, "learning_rate": 2.3809523809523808e-06, "loss": 13.1326, "step": 8 }, { "epoch": 0.0005960185957801883, "grad_norm": 100.62677001953125, "learning_rate": 2.9761904761904763e-06, "loss": 12.0103, "step": 10 }, { "epoch": 0.000715222314936226, "grad_norm": 73.8111343383789, "learning_rate": 3.5714285714285714e-06, "loss": 10.8356, "step": 12 }, { "epoch": 0.0008344260340922637, "grad_norm": 56.965576171875, "learning_rate": 4.166666666666667e-06, "loss": 9.5838, "step": 14 }, { "epoch": 0.0009536297532483013, "grad_norm": 45.88716506958008, "learning_rate": 4.7619047619047615e-06, "loss": 8.6006, "step": 16 }, { "epoch": 0.001072833472404339, "grad_norm": 35.14827346801758, "learning_rate": 5.357142857142857e-06, "loss": 7.6035, "step": 18 }, { "epoch": 0.0011920371915603767, "grad_norm": 25.799957275390625, "learning_rate": 5.9523809523809525e-06, "loss": 6.9105, "step": 20 }, { "epoch": 0.0013112409107164144, "grad_norm": 21.964374542236328, "learning_rate": 6.547619047619048e-06, "loss": 6.5332, "step": 22 }, { "epoch": 0.001430444629872452, "grad_norm": 19.061328887939453, "learning_rate": 7.142857142857143e-06, "loss": 6.3125, "step": 24 }, { "epoch": 0.0015496483490284897, "grad_norm": 15.475618362426758, "learning_rate": 7.738095238095238e-06, "loss": 5.7392, "step": 26 }, { "epoch": 0.0016688520681845274, "grad_norm": 16.83841896057129, "learning_rate": 8.333333333333334e-06, "loss": 5.4979, "step": 28 }, { "epoch": 0.0017880557873405651, "grad_norm": 9.515942573547363, "learning_rate": 8.92857142857143e-06, "loss": 5.5463, "step": 30 }, { "epoch": 0.0019072595064966026, "grad_norm": 11.77018928527832, "learning_rate": 9.523809523809523e-06, "loss": 5.2776, "step": 32 }, { "epoch": 0.0020264632256526403, "grad_norm": 8.007991790771484, "learning_rate": 1.011904761904762e-05, "loss": 5.0032, "step": 34 }, { "epoch": 0.002145666944808678, "grad_norm": 14.048178672790527, "learning_rate": 1.0714285714285714e-05, "loss": 5.0198, "step": 36 }, { "epoch": 0.0022648706639647157, "grad_norm": 8.482051849365234, "learning_rate": 1.130952380952381e-05, "loss": 4.9066, "step": 38 }, { "epoch": 0.0023840743831207534, "grad_norm": 6.235928535461426, "learning_rate": 1.1904761904761905e-05, "loss": 4.8064, "step": 40 }, { "epoch": 0.002503278102276791, "grad_norm": 5.929011344909668, "learning_rate": 1.25e-05, "loss": 4.9267, "step": 42 }, { "epoch": 0.0026224818214328287, "grad_norm": 6.369486331939697, "learning_rate": 1.3095238095238096e-05, "loss": 4.8895, "step": 44 }, { "epoch": 0.0027416855405888664, "grad_norm": 6.4098968505859375, "learning_rate": 1.3690476190476192e-05, "loss": 4.5647, "step": 46 }, { "epoch": 0.002860889259744904, "grad_norm": 6.036733627319336, "learning_rate": 1.4285714285714285e-05, "loss": 4.4067, "step": 48 }, { "epoch": 0.002980092978900942, "grad_norm": 7.251827239990234, "learning_rate": 1.4880952380952381e-05, "loss": 4.5829, "step": 50 }, { "epoch": 0.0030992966980569795, "grad_norm": 9.668909072875977, "learning_rate": 1.5476190476190476e-05, "loss": 4.3507, "step": 52 }, { "epoch": 0.003218500417213017, "grad_norm": 6.095469951629639, "learning_rate": 1.6071428571428572e-05, "loss": 4.288, "step": 54 }, { "epoch": 0.003337704136369055, "grad_norm": 6.457520484924316, "learning_rate": 1.6666666666666667e-05, "loss": 4.1759, "step": 56 }, { "epoch": 0.0034569078555250926, "grad_norm": 6.53852653503418, "learning_rate": 1.7261904761904763e-05, "loss": 4.1771, "step": 58 }, { "epoch": 0.0035761115746811302, "grad_norm": 6.1460795402526855, "learning_rate": 1.785714285714286e-05, "loss": 4.1286, "step": 60 }, { "epoch": 0.0036953152938371675, "grad_norm": 6.458080291748047, "learning_rate": 1.8452380952380954e-05, "loss": 4.0036, "step": 62 }, { "epoch": 0.003814519012993205, "grad_norm": 7.6938934326171875, "learning_rate": 1.9047619047619046e-05, "loss": 3.8599, "step": 64 }, { "epoch": 0.003933722732149243, "grad_norm": 8.15690803527832, "learning_rate": 1.9642857142857145e-05, "loss": 3.838, "step": 66 }, { "epoch": 0.004052926451305281, "grad_norm": 7.685982704162598, "learning_rate": 2.023809523809524e-05, "loss": 3.6339, "step": 68 }, { "epoch": 0.004172130170461318, "grad_norm": 6.831955432891846, "learning_rate": 2.0833333333333336e-05, "loss": 3.5921, "step": 70 }, { "epoch": 0.004291333889617356, "grad_norm": 7.538076400756836, "learning_rate": 2.1428571428571428e-05, "loss": 3.5376, "step": 72 }, { "epoch": 0.004410537608773394, "grad_norm": 7.233453273773193, "learning_rate": 2.2023809523809524e-05, "loss": 3.5104, "step": 74 }, { "epoch": 0.004529741327929431, "grad_norm": 9.339142799377441, "learning_rate": 2.261904761904762e-05, "loss": 3.4606, "step": 76 }, { "epoch": 0.004648945047085469, "grad_norm": 6.777206897735596, "learning_rate": 2.3214285714285715e-05, "loss": 3.2513, "step": 78 }, { "epoch": 0.004768148766241507, "grad_norm": 7.460524559020996, "learning_rate": 2.380952380952381e-05, "loss": 3.1966, "step": 80 }, { "epoch": 0.004887352485397544, "grad_norm": 6.421921253204346, "learning_rate": 2.4404761904761906e-05, "loss": 3.165, "step": 82 }, { "epoch": 0.005006556204553582, "grad_norm": 6.740740776062012, "learning_rate": 2.5e-05, "loss": 3.0655, "step": 84 }, { "epoch": 0.00512575992370962, "grad_norm": 6.041343688964844, "learning_rate": 2.5595238095238093e-05, "loss": 3.0973, "step": 86 }, { "epoch": 0.0052449636428656575, "grad_norm": 6.115782260894775, "learning_rate": 2.6190476190476192e-05, "loss": 3.1126, "step": 88 }, { "epoch": 0.005364167362021695, "grad_norm": 5.956474781036377, "learning_rate": 2.6785714285714288e-05, "loss": 3.0306, "step": 90 }, { "epoch": 0.005483371081177733, "grad_norm": 6.920962333679199, "learning_rate": 2.7380952380952383e-05, "loss": 2.9867, "step": 92 }, { "epoch": 0.0056025748003337705, "grad_norm": 6.542290687561035, "learning_rate": 2.797619047619048e-05, "loss": 2.8389, "step": 94 }, { "epoch": 0.005721778519489808, "grad_norm": 6.258520126342773, "learning_rate": 2.857142857142857e-05, "loss": 2.963, "step": 96 }, { "epoch": 0.005840982238645846, "grad_norm": 5.472183704376221, "learning_rate": 2.916666666666667e-05, "loss": 2.7756, "step": 98 }, { "epoch": 0.005960185957801884, "grad_norm": 5.66027307510376, "learning_rate": 2.9761904761904762e-05, "loss": 2.8274, "step": 100 }, { "epoch": 0.006079389676957921, "grad_norm": 5.976613998413086, "learning_rate": 3.0357142857142857e-05, "loss": 2.7036, "step": 102 }, { "epoch": 0.006198593396113959, "grad_norm": 5.60283088684082, "learning_rate": 3.095238095238095e-05, "loss": 2.724, "step": 104 }, { "epoch": 0.006317797115269997, "grad_norm": 5.856503963470459, "learning_rate": 3.154761904761905e-05, "loss": 2.5999, "step": 106 }, { "epoch": 0.006437000834426034, "grad_norm": 5.530018329620361, "learning_rate": 3.2142857142857144e-05, "loss": 2.6808, "step": 108 }, { "epoch": 0.006556204553582072, "grad_norm": 5.797548770904541, "learning_rate": 3.273809523809524e-05, "loss": 2.6626, "step": 110 }, { "epoch": 0.00667540827273811, "grad_norm": 5.2610764503479, "learning_rate": 3.3333333333333335e-05, "loss": 2.6186, "step": 112 }, { "epoch": 0.006794611991894147, "grad_norm": 5.613731861114502, "learning_rate": 3.392857142857143e-05, "loss": 2.519, "step": 114 }, { "epoch": 0.006913815711050185, "grad_norm": 5.41996955871582, "learning_rate": 3.4523809523809526e-05, "loss": 2.5644, "step": 116 }, { "epoch": 0.007033019430206223, "grad_norm": 5.902002334594727, "learning_rate": 3.511904761904762e-05, "loss": 2.6353, "step": 118 }, { "epoch": 0.0071522231493622605, "grad_norm": 6.296456336975098, "learning_rate": 3.571428571428572e-05, "loss": 2.5221, "step": 120 }, { "epoch": 0.007271426868518298, "grad_norm": 5.528128623962402, "learning_rate": 3.630952380952381e-05, "loss": 2.5491, "step": 122 }, { "epoch": 0.007390630587674335, "grad_norm": 5.898657321929932, "learning_rate": 3.690476190476191e-05, "loss": 2.6672, "step": 124 }, { "epoch": 0.007509834306830373, "grad_norm": 5.141305923461914, "learning_rate": 3.7500000000000003e-05, "loss": 2.5785, "step": 126 }, { "epoch": 0.00762903802598641, "grad_norm": 5.338465213775635, "learning_rate": 3.809523809523809e-05, "loss": 2.5329, "step": 128 }, { "epoch": 0.007748241745142448, "grad_norm": 5.396016597747803, "learning_rate": 3.8690476190476195e-05, "loss": 2.5043, "step": 130 }, { "epoch": 0.007867445464298486, "grad_norm": 5.133545398712158, "learning_rate": 3.928571428571429e-05, "loss": 2.4818, "step": 132 }, { "epoch": 0.007986649183454524, "grad_norm": 5.784792423248291, "learning_rate": 3.9880952380952386e-05, "loss": 2.359, "step": 134 }, { "epoch": 0.008105852902610561, "grad_norm": 5.402016639709473, "learning_rate": 4.047619047619048e-05, "loss": 2.454, "step": 136 }, { "epoch": 0.0082250566217666, "grad_norm": 5.288558483123779, "learning_rate": 4.107142857142857e-05, "loss": 2.5077, "step": 138 }, { "epoch": 0.008344260340922637, "grad_norm": 5.180827617645264, "learning_rate": 4.166666666666667e-05, "loss": 2.3765, "step": 140 }, { "epoch": 0.008463464060078675, "grad_norm": 5.20369815826416, "learning_rate": 4.226190476190476e-05, "loss": 2.4738, "step": 142 }, { "epoch": 0.008582667779234712, "grad_norm": 5.0655999183654785, "learning_rate": 4.2857142857142856e-05, "loss": 2.3455, "step": 144 }, { "epoch": 0.00870187149839075, "grad_norm": 5.737236976623535, "learning_rate": 4.345238095238096e-05, "loss": 2.4775, "step": 146 }, { "epoch": 0.008821075217546787, "grad_norm": 5.026365756988525, "learning_rate": 4.404761904761905e-05, "loss": 2.543, "step": 148 }, { "epoch": 0.008940278936702826, "grad_norm": 4.954567909240723, "learning_rate": 4.464285714285715e-05, "loss": 2.3105, "step": 150 }, { "epoch": 0.009059482655858863, "grad_norm": 5.220465660095215, "learning_rate": 4.523809523809524e-05, "loss": 2.4091, "step": 152 }, { "epoch": 0.009178686375014901, "grad_norm": 5.081100940704346, "learning_rate": 4.5833333333333334e-05, "loss": 2.4632, "step": 154 }, { "epoch": 0.009297890094170938, "grad_norm": 4.961655616760254, "learning_rate": 4.642857142857143e-05, "loss": 2.3466, "step": 156 }, { "epoch": 0.009417093813326977, "grad_norm": 5.073526859283447, "learning_rate": 4.7023809523809525e-05, "loss": 2.5058, "step": 158 }, { "epoch": 0.009536297532483013, "grad_norm": 4.781959056854248, "learning_rate": 4.761904761904762e-05, "loss": 2.3583, "step": 160 }, { "epoch": 0.009655501251639052, "grad_norm": 4.797377586364746, "learning_rate": 4.8214285714285716e-05, "loss": 2.217, "step": 162 }, { "epoch": 0.009774704970795089, "grad_norm": 5.302143573760986, "learning_rate": 4.880952380952381e-05, "loss": 2.3562, "step": 164 }, { "epoch": 0.009893908689951126, "grad_norm": 6.446615219116211, "learning_rate": 4.940476190476191e-05, "loss": 2.4365, "step": 166 }, { "epoch": 0.010013112409107164, "grad_norm": 4.935101509094238, "learning_rate": 5e-05, "loss": 2.2708, "step": 168 }, { "epoch": 0.010132316128263201, "grad_norm": 5.098577499389648, "learning_rate": 5.05952380952381e-05, "loss": 2.2395, "step": 170 }, { "epoch": 0.01025151984741924, "grad_norm": 5.2309722900390625, "learning_rate": 5.119047619047619e-05, "loss": 2.2782, "step": 172 }, { "epoch": 0.010370723566575276, "grad_norm": 4.807340621948242, "learning_rate": 5.1785714285714296e-05, "loss": 2.3685, "step": 174 }, { "epoch": 0.010489927285731315, "grad_norm": 5.194328784942627, "learning_rate": 5.2380952380952384e-05, "loss": 2.2101, "step": 176 }, { "epoch": 0.010609131004887352, "grad_norm": 5.2026753425598145, "learning_rate": 5.297619047619048e-05, "loss": 2.3184, "step": 178 }, { "epoch": 0.01072833472404339, "grad_norm": 5.302525997161865, "learning_rate": 5.3571428571428575e-05, "loss": 2.2856, "step": 180 }, { "epoch": 0.010847538443199427, "grad_norm": 5.0395894050598145, "learning_rate": 5.4166666666666664e-05, "loss": 2.2912, "step": 182 }, { "epoch": 0.010966742162355466, "grad_norm": 4.843148231506348, "learning_rate": 5.4761904761904766e-05, "loss": 2.0943, "step": 184 }, { "epoch": 0.011085945881511503, "grad_norm": 5.440023422241211, "learning_rate": 5.535714285714286e-05, "loss": 2.3585, "step": 186 }, { "epoch": 0.011205149600667541, "grad_norm": 4.829185485839844, "learning_rate": 5.595238095238096e-05, "loss": 2.1963, "step": 188 }, { "epoch": 0.011324353319823578, "grad_norm": 4.823942184448242, "learning_rate": 5.6547619047619046e-05, "loss": 2.2165, "step": 190 }, { "epoch": 0.011443557038979616, "grad_norm": 5.110050201416016, "learning_rate": 5.714285714285714e-05, "loss": 2.293, "step": 192 }, { "epoch": 0.011562760758135653, "grad_norm": 5.096272945404053, "learning_rate": 5.773809523809524e-05, "loss": 2.2458, "step": 194 }, { "epoch": 0.011681964477291692, "grad_norm": 5.159486293792725, "learning_rate": 5.833333333333334e-05, "loss": 2.1815, "step": 196 }, { "epoch": 0.011801168196447729, "grad_norm": 4.797835350036621, "learning_rate": 5.8928571428571435e-05, "loss": 2.1261, "step": 198 }, { "epoch": 0.011920371915603767, "grad_norm": 4.9112420082092285, "learning_rate": 5.9523809523809524e-05, "loss": 2.271, "step": 200 }, { "epoch": 0.012039575634759804, "grad_norm": 4.79754114151001, "learning_rate": 6.011904761904762e-05, "loss": 2.1241, "step": 202 }, { "epoch": 0.012158779353915843, "grad_norm": 4.747973918914795, "learning_rate": 6.0714285714285715e-05, "loss": 2.2473, "step": 204 }, { "epoch": 0.01227798307307188, "grad_norm": 4.543067932128906, "learning_rate": 6.130952380952381e-05, "loss": 2.153, "step": 206 }, { "epoch": 0.012397186792227918, "grad_norm": 5.071972846984863, "learning_rate": 6.19047619047619e-05, "loss": 2.246, "step": 208 }, { "epoch": 0.012516390511383955, "grad_norm": 5.960568428039551, "learning_rate": 6.25e-05, "loss": 2.1066, "step": 210 }, { "epoch": 0.012635594230539993, "grad_norm": 4.816952705383301, "learning_rate": 6.30952380952381e-05, "loss": 2.1825, "step": 212 }, { "epoch": 0.01275479794969603, "grad_norm": 4.414649963378906, "learning_rate": 6.369047619047619e-05, "loss": 2.2468, "step": 214 }, { "epoch": 0.012874001668852069, "grad_norm": 4.6052422523498535, "learning_rate": 6.428571428571429e-05, "loss": 2.2037, "step": 216 }, { "epoch": 0.012993205388008106, "grad_norm": 4.583548069000244, "learning_rate": 6.488095238095238e-05, "loss": 2.197, "step": 218 }, { "epoch": 0.013112409107164144, "grad_norm": 4.7177042961120605, "learning_rate": 6.547619047619048e-05, "loss": 2.2937, "step": 220 }, { "epoch": 0.013231612826320181, "grad_norm": 4.7831807136535645, "learning_rate": 6.607142857142857e-05, "loss": 2.2202, "step": 222 }, { "epoch": 0.01335081654547622, "grad_norm": 4.942572116851807, "learning_rate": 6.666666666666667e-05, "loss": 2.1736, "step": 224 }, { "epoch": 0.013470020264632256, "grad_norm": 4.845131874084473, "learning_rate": 6.726190476190477e-05, "loss": 2.1754, "step": 226 }, { "epoch": 0.013589223983788295, "grad_norm": 4.915685653686523, "learning_rate": 6.785714285714286e-05, "loss": 2.1783, "step": 228 }, { "epoch": 0.013708427702944332, "grad_norm": 5.197696685791016, "learning_rate": 6.845238095238096e-05, "loss": 2.1984, "step": 230 }, { "epoch": 0.01382763142210037, "grad_norm": 4.638832092285156, "learning_rate": 6.904761904761905e-05, "loss": 2.2506, "step": 232 }, { "epoch": 0.013946835141256407, "grad_norm": 4.965087890625, "learning_rate": 6.964285714285715e-05, "loss": 2.1042, "step": 234 }, { "epoch": 0.014066038860412446, "grad_norm": 4.8698201179504395, "learning_rate": 7.023809523809524e-05, "loss": 2.2558, "step": 236 }, { "epoch": 0.014185242579568482, "grad_norm": 4.577701568603516, "learning_rate": 7.083333333333334e-05, "loss": 2.2403, "step": 238 }, { "epoch": 0.014304446298724521, "grad_norm": 4.556105136871338, "learning_rate": 7.142857142857143e-05, "loss": 2.0918, "step": 240 }, { "epoch": 0.014423650017880558, "grad_norm": 5.1342573165893555, "learning_rate": 7.202380952380953e-05, "loss": 2.2796, "step": 242 }, { "epoch": 0.014542853737036596, "grad_norm": 4.4098711013793945, "learning_rate": 7.261904761904762e-05, "loss": 2.1222, "step": 244 }, { "epoch": 0.014662057456192633, "grad_norm": 4.606745719909668, "learning_rate": 7.321428571428571e-05, "loss": 2.2614, "step": 246 }, { "epoch": 0.01478126117534867, "grad_norm": 4.363879680633545, "learning_rate": 7.380952380952382e-05, "loss": 2.2211, "step": 248 }, { "epoch": 0.014900464894504709, "grad_norm": 5.428186416625977, "learning_rate": 7.440476190476191e-05, "loss": 2.1354, "step": 250 }, { "epoch": 0.015019668613660745, "grad_norm": 4.616875648498535, "learning_rate": 7.500000000000001e-05, "loss": 2.1453, "step": 252 }, { "epoch": 0.015138872332816784, "grad_norm": 4.1831841468811035, "learning_rate": 7.55952380952381e-05, "loss": 2.1566, "step": 254 }, { "epoch": 0.01525807605197282, "grad_norm": 4.248276710510254, "learning_rate": 7.619047619047618e-05, "loss": 2.1122, "step": 256 }, { "epoch": 0.01537727977112886, "grad_norm": 4.470058441162109, "learning_rate": 7.67857142857143e-05, "loss": 2.166, "step": 258 }, { "epoch": 0.015496483490284896, "grad_norm": 4.867092609405518, "learning_rate": 7.738095238095239e-05, "loss": 2.2181, "step": 260 }, { "epoch": 0.015615687209440935, "grad_norm": 4.998398780822754, "learning_rate": 7.797619047619048e-05, "loss": 2.1745, "step": 262 }, { "epoch": 0.01573489092859697, "grad_norm": 4.46311092376709, "learning_rate": 7.857142857142858e-05, "loss": 1.9947, "step": 264 }, { "epoch": 0.01585409464775301, "grad_norm": 4.693447589874268, "learning_rate": 7.916666666666666e-05, "loss": 2.1929, "step": 266 }, { "epoch": 0.01597329836690905, "grad_norm": 4.494935512542725, "learning_rate": 7.976190476190477e-05, "loss": 2.0718, "step": 268 }, { "epoch": 0.016092502086065084, "grad_norm": 4.053395748138428, "learning_rate": 8.035714285714287e-05, "loss": 2.1305, "step": 270 }, { "epoch": 0.016211705805221122, "grad_norm": 4.168095588684082, "learning_rate": 8.095238095238096e-05, "loss": 2.2346, "step": 272 }, { "epoch": 0.01633090952437716, "grad_norm": 4.409247398376465, "learning_rate": 8.154761904761904e-05, "loss": 2.2228, "step": 274 }, { "epoch": 0.0164501132435332, "grad_norm": 4.415099620819092, "learning_rate": 8.214285714285714e-05, "loss": 2.0837, "step": 276 }, { "epoch": 0.016569316962689234, "grad_norm": 4.49747896194458, "learning_rate": 8.273809523809524e-05, "loss": 2.0639, "step": 278 }, { "epoch": 0.016688520681845273, "grad_norm": 4.162817001342773, "learning_rate": 8.333333333333334e-05, "loss": 2.0206, "step": 280 }, { "epoch": 0.01680772440100131, "grad_norm": 4.283680438995361, "learning_rate": 8.392857142857144e-05, "loss": 2.026, "step": 282 }, { "epoch": 0.01692692812015735, "grad_norm": 4.816770553588867, "learning_rate": 8.452380952380952e-05, "loss": 2.0113, "step": 284 }, { "epoch": 0.017046131839313385, "grad_norm": 4.8433990478515625, "learning_rate": 8.511904761904762e-05, "loss": 1.9689, "step": 286 }, { "epoch": 0.017165335558469424, "grad_norm": 4.29586935043335, "learning_rate": 8.571428571428571e-05, "loss": 1.9463, "step": 288 }, { "epoch": 0.017284539277625462, "grad_norm": 4.348787307739258, "learning_rate": 8.630952380952382e-05, "loss": 2.0937, "step": 290 }, { "epoch": 0.0174037429967815, "grad_norm": 4.270557403564453, "learning_rate": 8.690476190476192e-05, "loss": 2.0741, "step": 292 }, { "epoch": 0.017522946715937536, "grad_norm": 4.226287364959717, "learning_rate": 8.75e-05, "loss": 2.0923, "step": 294 }, { "epoch": 0.017642150435093575, "grad_norm": 4.277609348297119, "learning_rate": 8.80952380952381e-05, "loss": 2.0188, "step": 296 }, { "epoch": 0.017761354154249613, "grad_norm": 4.310199737548828, "learning_rate": 8.869047619047619e-05, "loss": 2.1085, "step": 298 }, { "epoch": 0.01788055787340565, "grad_norm": 4.2167134284973145, "learning_rate": 8.92857142857143e-05, "loss": 1.9473, "step": 300 }, { "epoch": 0.017999761592561687, "grad_norm": 4.304976463317871, "learning_rate": 8.988095238095238e-05, "loss": 2.0005, "step": 302 }, { "epoch": 0.018118965311717725, "grad_norm": 4.19467306137085, "learning_rate": 9.047619047619048e-05, "loss": 2.1255, "step": 304 }, { "epoch": 0.018238169030873764, "grad_norm": 4.663391590118408, "learning_rate": 9.107142857142857e-05, "loss": 2.0653, "step": 306 }, { "epoch": 0.018357372750029802, "grad_norm": 4.67908239364624, "learning_rate": 9.166666666666667e-05, "loss": 2.1058, "step": 308 }, { "epoch": 0.018476576469185838, "grad_norm": 5.005115509033203, "learning_rate": 9.226190476190478e-05, "loss": 2.0625, "step": 310 }, { "epoch": 0.018595780188341876, "grad_norm": 4.4068403244018555, "learning_rate": 9.285714285714286e-05, "loss": 2.2295, "step": 312 }, { "epoch": 0.018714983907497915, "grad_norm": 4.328132152557373, "learning_rate": 9.345238095238095e-05, "loss": 2.1263, "step": 314 }, { "epoch": 0.018834187626653953, "grad_norm": 4.454077243804932, "learning_rate": 9.404761904761905e-05, "loss": 2.0945, "step": 316 }, { "epoch": 0.01895339134580999, "grad_norm": 4.451523303985596, "learning_rate": 9.464285714285715e-05, "loss": 2.0691, "step": 318 }, { "epoch": 0.019072595064966027, "grad_norm": 4.381167411804199, "learning_rate": 9.523809523809524e-05, "loss": 2.022, "step": 320 }, { "epoch": 0.019191798784122065, "grad_norm": 4.444738388061523, "learning_rate": 9.583333333333334e-05, "loss": 2.0732, "step": 322 }, { "epoch": 0.019311002503278104, "grad_norm": 4.356651306152344, "learning_rate": 9.642857142857143e-05, "loss": 2.0575, "step": 324 }, { "epoch": 0.01943020622243414, "grad_norm": 5.674497604370117, "learning_rate": 9.702380952380953e-05, "loss": 2.1728, "step": 326 }, { "epoch": 0.019549409941590178, "grad_norm": 4.5333251953125, "learning_rate": 9.761904761904762e-05, "loss": 1.9931, "step": 328 }, { "epoch": 0.019668613660746216, "grad_norm": 4.250735282897949, "learning_rate": 9.821428571428572e-05, "loss": 2.0263, "step": 330 }, { "epoch": 0.01978781737990225, "grad_norm": 4.260344505310059, "learning_rate": 9.880952380952381e-05, "loss": 2.0743, "step": 332 }, { "epoch": 0.01990702109905829, "grad_norm": 4.2949066162109375, "learning_rate": 9.940476190476191e-05, "loss": 2.013, "step": 334 }, { "epoch": 0.02002622481821433, "grad_norm": 4.104248523712158, "learning_rate": 0.0001, "loss": 2.0529, "step": 336 }, { "epoch": 0.020145428537370367, "grad_norm": 4.452236175537109, "learning_rate": 9.999999910566447e-05, "loss": 2.2044, "step": 338 }, { "epoch": 0.020264632256526402, "grad_norm": 4.214008331298828, "learning_rate": 9.999999642265789e-05, "loss": 2.171, "step": 340 }, { "epoch": 0.02038383597568244, "grad_norm": 4.2079997062683105, "learning_rate": 9.999999195098037e-05, "loss": 2.1326, "step": 342 }, { "epoch": 0.02050303969483848, "grad_norm": 3.983579397201538, "learning_rate": 9.999998569063206e-05, "loss": 2.0528, "step": 344 }, { "epoch": 0.020622243413994518, "grad_norm": 4.492088317871094, "learning_rate": 9.99999776416132e-05, "loss": 2.047, "step": 346 }, { "epoch": 0.020741447133150553, "grad_norm": 4.252041816711426, "learning_rate": 9.999996780392404e-05, "loss": 2.0547, "step": 348 }, { "epoch": 0.02086065085230659, "grad_norm": 4.31392240524292, "learning_rate": 9.999995617756498e-05, "loss": 2.1434, "step": 350 }, { "epoch": 0.02097985457146263, "grad_norm": 4.520379543304443, "learning_rate": 9.999994276253642e-05, "loss": 2.063, "step": 352 }, { "epoch": 0.02109905829061867, "grad_norm": 4.003165245056152, "learning_rate": 9.999992755883882e-05, "loss": 1.8828, "step": 354 }, { "epoch": 0.021218262009774704, "grad_norm": 4.410750389099121, "learning_rate": 9.999991056647274e-05, "loss": 2.0729, "step": 356 }, { "epoch": 0.021337465728930742, "grad_norm": 4.490664958953857, "learning_rate": 9.999989178543879e-05, "loss": 2.0459, "step": 358 }, { "epoch": 0.02145666944808678, "grad_norm": 4.508906364440918, "learning_rate": 9.999987121573764e-05, "loss": 2.0083, "step": 360 }, { "epoch": 0.02157587316724282, "grad_norm": 4.066667079925537, "learning_rate": 9.999984885737002e-05, "loss": 2.0824, "step": 362 }, { "epoch": 0.021695076886398854, "grad_norm": 4.2700018882751465, "learning_rate": 9.999982471033673e-05, "loss": 2.1612, "step": 364 }, { "epoch": 0.021814280605554893, "grad_norm": 4.437095642089844, "learning_rate": 9.999979877463865e-05, "loss": 2.0952, "step": 366 }, { "epoch": 0.02193348432471093, "grad_norm": 3.9456911087036133, "learning_rate": 9.999977105027669e-05, "loss": 2.0013, "step": 368 }, { "epoch": 0.02205268804386697, "grad_norm": 4.232547283172607, "learning_rate": 9.999974153725184e-05, "loss": 1.9426, "step": 370 }, { "epoch": 0.022171891763023005, "grad_norm": 4.270139694213867, "learning_rate": 9.999971023556517e-05, "loss": 2.0443, "step": 372 }, { "epoch": 0.022291095482179044, "grad_norm": 4.107934951782227, "learning_rate": 9.999967714521779e-05, "loss": 2.0425, "step": 374 }, { "epoch": 0.022410299201335082, "grad_norm": 4.0284223556518555, "learning_rate": 9.999964226621088e-05, "loss": 2.0926, "step": 376 }, { "epoch": 0.02252950292049112, "grad_norm": 4.1788530349731445, "learning_rate": 9.99996055985457e-05, "loss": 1.9729, "step": 378 }, { "epoch": 0.022648706639647156, "grad_norm": 4.2001447677612305, "learning_rate": 9.999956714222355e-05, "loss": 2.0065, "step": 380 }, { "epoch": 0.022767910358803194, "grad_norm": 4.310724258422852, "learning_rate": 9.999952689724583e-05, "loss": 1.9837, "step": 382 }, { "epoch": 0.022887114077959233, "grad_norm": 4.270334720611572, "learning_rate": 9.999948486361394e-05, "loss": 2.1719, "step": 384 }, { "epoch": 0.02300631779711527, "grad_norm": 4.309887409210205, "learning_rate": 9.999944104132944e-05, "loss": 2.0184, "step": 386 }, { "epoch": 0.023125521516271307, "grad_norm": 4.216723442077637, "learning_rate": 9.999939543039382e-05, "loss": 1.9099, "step": 388 }, { "epoch": 0.023244725235427345, "grad_norm": 4.276523590087891, "learning_rate": 9.999934803080876e-05, "loss": 2.0397, "step": 390 }, { "epoch": 0.023363928954583384, "grad_norm": 4.156580448150635, "learning_rate": 9.999929884257599e-05, "loss": 2.0732, "step": 392 }, { "epoch": 0.023483132673739422, "grad_norm": 3.883631944656372, "learning_rate": 9.999924786569718e-05, "loss": 1.8978, "step": 394 }, { "epoch": 0.023602336392895457, "grad_norm": 4.137846946716309, "learning_rate": 9.999919510017423e-05, "loss": 1.9789, "step": 396 }, { "epoch": 0.023721540112051496, "grad_norm": 4.162403583526611, "learning_rate": 9.999914054600901e-05, "loss": 2.1091, "step": 398 }, { "epoch": 0.023840743831207534, "grad_norm": 3.9970157146453857, "learning_rate": 9.999908420320344e-05, "loss": 2.1142, "step": 400 }, { "epoch": 0.023959947550363573, "grad_norm": 4.671848297119141, "learning_rate": 9.999902607175957e-05, "loss": 2.0595, "step": 402 }, { "epoch": 0.024079151269519608, "grad_norm": 3.9333367347717285, "learning_rate": 9.999896615167947e-05, "loss": 1.8963, "step": 404 }, { "epoch": 0.024198354988675647, "grad_norm": 4.755949974060059, "learning_rate": 9.999890444296526e-05, "loss": 2.0077, "step": 406 }, { "epoch": 0.024317558707831685, "grad_norm": 4.286973476409912, "learning_rate": 9.99988409456192e-05, "loss": 2.1007, "step": 408 }, { "epoch": 0.024436762426987724, "grad_norm": 4.054914474487305, "learning_rate": 9.99987756596435e-05, "loss": 1.8409, "step": 410 }, { "epoch": 0.02455596614614376, "grad_norm": 4.119385242462158, "learning_rate": 9.999870858504055e-05, "loss": 2.077, "step": 412 }, { "epoch": 0.024675169865299797, "grad_norm": 4.788358688354492, "learning_rate": 9.999863972181271e-05, "loss": 2.0435, "step": 414 }, { "epoch": 0.024794373584455836, "grad_norm": 3.9225261211395264, "learning_rate": 9.999856906996247e-05, "loss": 2.0755, "step": 416 }, { "epoch": 0.02491357730361187, "grad_norm": 5.088071346282959, "learning_rate": 9.999849662949233e-05, "loss": 2.0157, "step": 418 }, { "epoch": 0.02503278102276791, "grad_norm": 4.089044570922852, "learning_rate": 9.999842240040491e-05, "loss": 1.9659, "step": 420 }, { "epoch": 0.025151984741923948, "grad_norm": 4.620054244995117, "learning_rate": 9.999834638270285e-05, "loss": 2.0322, "step": 422 }, { "epoch": 0.025271188461079987, "grad_norm": 3.943803310394287, "learning_rate": 9.999826857638887e-05, "loss": 2.0364, "step": 424 }, { "epoch": 0.025390392180236022, "grad_norm": 4.00911283493042, "learning_rate": 9.999818898146576e-05, "loss": 1.8323, "step": 426 }, { "epoch": 0.02550959589939206, "grad_norm": 3.925180435180664, "learning_rate": 9.999810759793637e-05, "loss": 1.9724, "step": 428 }, { "epoch": 0.0256287996185481, "grad_norm": 3.878373384475708, "learning_rate": 9.99980244258036e-05, "loss": 1.9464, "step": 430 }, { "epoch": 0.025748003337704137, "grad_norm": 4.273764610290527, "learning_rate": 9.99979394650704e-05, "loss": 2.1609, "step": 432 }, { "epoch": 0.025867207056860173, "grad_norm": 3.837289810180664, "learning_rate": 9.999785271573988e-05, "loss": 2.1223, "step": 434 }, { "epoch": 0.02598641077601621, "grad_norm": 3.7536025047302246, "learning_rate": 9.999776417781509e-05, "loss": 1.9499, "step": 436 }, { "epoch": 0.02610561449517225, "grad_norm": 4.123745918273926, "learning_rate": 9.999767385129921e-05, "loss": 1.9476, "step": 438 }, { "epoch": 0.026224818214328288, "grad_norm": 3.668419599533081, "learning_rate": 9.999758173619547e-05, "loss": 1.9528, "step": 440 }, { "epoch": 0.026344021933484323, "grad_norm": 3.7763912677764893, "learning_rate": 9.999748783250716e-05, "loss": 1.9542, "step": 442 }, { "epoch": 0.026463225652640362, "grad_norm": 4.292168617248535, "learning_rate": 9.999739214023765e-05, "loss": 2.017, "step": 444 }, { "epoch": 0.0265824293717964, "grad_norm": 4.245398044586182, "learning_rate": 9.999729465939036e-05, "loss": 1.931, "step": 446 }, { "epoch": 0.02670163309095244, "grad_norm": 3.793325901031494, "learning_rate": 9.999719538996878e-05, "loss": 1.939, "step": 448 }, { "epoch": 0.026820836810108474, "grad_norm": 3.8308868408203125, "learning_rate": 9.999709433197646e-05, "loss": 1.9768, "step": 450 }, { "epoch": 0.026940040529264513, "grad_norm": 4.208525657653809, "learning_rate": 9.999699148541698e-05, "loss": 2.1803, "step": 452 }, { "epoch": 0.02705924424842055, "grad_norm": 4.270174503326416, "learning_rate": 9.999688685029408e-05, "loss": 1.9843, "step": 454 }, { "epoch": 0.02717844796757659, "grad_norm": 4.054558277130127, "learning_rate": 9.999678042661149e-05, "loss": 1.8099, "step": 456 }, { "epoch": 0.027297651686732625, "grad_norm": 4.595709323883057, "learning_rate": 9.999667221437299e-05, "loss": 2.0192, "step": 458 }, { "epoch": 0.027416855405888663, "grad_norm": 3.8418993949890137, "learning_rate": 9.999656221358245e-05, "loss": 1.9321, "step": 460 }, { "epoch": 0.027536059125044702, "grad_norm": 3.4978368282318115, "learning_rate": 9.999645042424382e-05, "loss": 2.0943, "step": 462 }, { "epoch": 0.02765526284420074, "grad_norm": 3.5452005863189697, "learning_rate": 9.999633684636112e-05, "loss": 1.9146, "step": 464 }, { "epoch": 0.027774466563356776, "grad_norm": 4.016761779785156, "learning_rate": 9.999622147993836e-05, "loss": 1.9475, "step": 466 }, { "epoch": 0.027893670282512814, "grad_norm": 4.090212345123291, "learning_rate": 9.999610432497971e-05, "loss": 2.0226, "step": 468 }, { "epoch": 0.028012874001668853, "grad_norm": 4.127773761749268, "learning_rate": 9.999598538148936e-05, "loss": 1.8531, "step": 470 }, { "epoch": 0.02813207772082489, "grad_norm": 3.692826271057129, "learning_rate": 9.999586464947152e-05, "loss": 1.8801, "step": 472 }, { "epoch": 0.028251281439980926, "grad_norm": 3.6499807834625244, "learning_rate": 9.999574212893058e-05, "loss": 1.8802, "step": 474 }, { "epoch": 0.028370485159136965, "grad_norm": 3.82844614982605, "learning_rate": 9.999561781987087e-05, "loss": 2.0072, "step": 476 }, { "epoch": 0.028489688878293003, "grad_norm": 4.183495998382568, "learning_rate": 9.999549172229685e-05, "loss": 2.1327, "step": 478 }, { "epoch": 0.028608892597449042, "grad_norm": 3.8257932662963867, "learning_rate": 9.999536383621304e-05, "loss": 2.013, "step": 480 }, { "epoch": 0.028728096316605077, "grad_norm": 3.720527172088623, "learning_rate": 9.9995234161624e-05, "loss": 1.8964, "step": 482 }, { "epoch": 0.028847300035761116, "grad_norm": 3.9114062786102295, "learning_rate": 9.999510269853438e-05, "loss": 2.0217, "step": 484 }, { "epoch": 0.028966503754917154, "grad_norm": 4.005721092224121, "learning_rate": 9.999496944694888e-05, "loss": 2.0487, "step": 486 }, { "epoch": 0.029085707474073193, "grad_norm": 4.493856906890869, "learning_rate": 9.999483440687227e-05, "loss": 2.0874, "step": 488 }, { "epoch": 0.029204911193229228, "grad_norm": 3.784331798553467, "learning_rate": 9.999469757830938e-05, "loss": 1.8757, "step": 490 }, { "epoch": 0.029324114912385266, "grad_norm": 4.934168815612793, "learning_rate": 9.999455896126511e-05, "loss": 2.2296, "step": 492 }, { "epoch": 0.029443318631541305, "grad_norm": 5.689783096313477, "learning_rate": 9.999441855574439e-05, "loss": 2.0036, "step": 494 }, { "epoch": 0.02956252235069734, "grad_norm": 4.025609493255615, "learning_rate": 9.999427636175228e-05, "loss": 1.9804, "step": 496 }, { "epoch": 0.02968172606985338, "grad_norm": 4.076634883880615, "learning_rate": 9.999413237929385e-05, "loss": 2.0361, "step": 498 }, { "epoch": 0.029800929789009417, "grad_norm": 4.094231128692627, "learning_rate": 9.999398660837425e-05, "loss": 2.1403, "step": 500 }, { "epoch": 0.029920133508165456, "grad_norm": 3.6801645755767822, "learning_rate": 9.999383904899869e-05, "loss": 1.8414, "step": 502 }, { "epoch": 0.03003933722732149, "grad_norm": 4.137254238128662, "learning_rate": 9.999368970117245e-05, "loss": 2.0883, "step": 504 }, { "epoch": 0.03015854094647753, "grad_norm": 3.8041841983795166, "learning_rate": 9.999353856490089e-05, "loss": 1.8697, "step": 506 }, { "epoch": 0.030277744665633568, "grad_norm": 4.5356340408325195, "learning_rate": 9.999338564018941e-05, "loss": 1.8547, "step": 508 }, { "epoch": 0.030396948384789606, "grad_norm": 3.7681736946105957, "learning_rate": 9.999323092704347e-05, "loss": 1.9116, "step": 510 }, { "epoch": 0.03051615210394564, "grad_norm": 4.213504791259766, "learning_rate": 9.999307442546859e-05, "loss": 2.0183, "step": 512 }, { "epoch": 0.03063535582310168, "grad_norm": 4.05720329284668, "learning_rate": 9.999291613547041e-05, "loss": 2.0087, "step": 514 }, { "epoch": 0.03075455954225772, "grad_norm": 3.546346426010132, "learning_rate": 9.999275605705456e-05, "loss": 1.9092, "step": 516 }, { "epoch": 0.030873763261413757, "grad_norm": 4.351977825164795, "learning_rate": 9.99925941902268e-05, "loss": 1.956, "step": 518 }, { "epoch": 0.030992966980569792, "grad_norm": 3.8898580074310303, "learning_rate": 9.999243053499287e-05, "loss": 1.999, "step": 520 }, { "epoch": 0.03111217069972583, "grad_norm": 3.746230125427246, "learning_rate": 9.999226509135866e-05, "loss": 1.8562, "step": 522 }, { "epoch": 0.03123137441888187, "grad_norm": 4.225787162780762, "learning_rate": 9.99920978593301e-05, "loss": 1.9769, "step": 524 }, { "epoch": 0.031350578138037904, "grad_norm": 3.8328192234039307, "learning_rate": 9.999192883891313e-05, "loss": 1.886, "step": 526 }, { "epoch": 0.03146978185719394, "grad_norm": 3.947373628616333, "learning_rate": 9.999175803011383e-05, "loss": 1.9288, "step": 528 }, { "epoch": 0.03158898557634998, "grad_norm": 3.722130298614502, "learning_rate": 9.999158543293829e-05, "loss": 1.7995, "step": 530 }, { "epoch": 0.03170818929550602, "grad_norm": 4.145242214202881, "learning_rate": 9.999141104739271e-05, "loss": 1.9221, "step": 532 }, { "epoch": 0.03182739301466206, "grad_norm": 4.1413373947143555, "learning_rate": 9.999123487348329e-05, "loss": 1.9761, "step": 534 }, { "epoch": 0.0319465967338181, "grad_norm": 4.195774555206299, "learning_rate": 9.999105691121638e-05, "loss": 2.0058, "step": 536 }, { "epoch": 0.032065800452974136, "grad_norm": 3.729015588760376, "learning_rate": 9.999087716059832e-05, "loss": 1.9504, "step": 538 }, { "epoch": 0.03218500417213017, "grad_norm": 3.524674415588379, "learning_rate": 9.999069562163554e-05, "loss": 1.7871, "step": 540 }, { "epoch": 0.032304207891286206, "grad_norm": 4.159297466278076, "learning_rate": 9.999051229433451e-05, "loss": 1.8986, "step": 542 }, { "epoch": 0.032423411610442245, "grad_norm": 4.346841335296631, "learning_rate": 9.999032717870185e-05, "loss": 2.0131, "step": 544 }, { "epoch": 0.03254261532959828, "grad_norm": 4.111457347869873, "learning_rate": 9.999014027474414e-05, "loss": 1.9433, "step": 546 }, { "epoch": 0.03266181904875432, "grad_norm": 4.044780254364014, "learning_rate": 9.998995158246805e-05, "loss": 2.0054, "step": 548 }, { "epoch": 0.03278102276791036, "grad_norm": 4.020186424255371, "learning_rate": 9.998976110188038e-05, "loss": 2.067, "step": 550 }, { "epoch": 0.0329002264870664, "grad_norm": 4.010038375854492, "learning_rate": 9.99895688329879e-05, "loss": 1.8961, "step": 552 }, { "epoch": 0.03301943020622244, "grad_norm": 3.755471706390381, "learning_rate": 9.998937477579751e-05, "loss": 1.9858, "step": 554 }, { "epoch": 0.03313863392537847, "grad_norm": 3.981943368911743, "learning_rate": 9.998917893031616e-05, "loss": 1.8551, "step": 556 }, { "epoch": 0.03325783764453451, "grad_norm": 3.88299298286438, "learning_rate": 9.998898129655083e-05, "loss": 1.8901, "step": 558 }, { "epoch": 0.033377041363690546, "grad_norm": 4.115593910217285, "learning_rate": 9.99887818745086e-05, "loss": 1.8272, "step": 560 }, { "epoch": 0.033496245082846585, "grad_norm": 4.787846088409424, "learning_rate": 9.998858066419661e-05, "loss": 1.9643, "step": 562 }, { "epoch": 0.03361544880200262, "grad_norm": 3.9095871448516846, "learning_rate": 9.998837766562206e-05, "loss": 2.1697, "step": 564 }, { "epoch": 0.03373465252115866, "grad_norm": 3.6306307315826416, "learning_rate": 9.99881728787922e-05, "loss": 1.9494, "step": 566 }, { "epoch": 0.0338538562403147, "grad_norm": 4.106777191162109, "learning_rate": 9.998796630371439e-05, "loss": 1.9746, "step": 568 }, { "epoch": 0.03397305995947074, "grad_norm": 3.8930914402008057, "learning_rate": 9.998775794039597e-05, "loss": 1.798, "step": 570 }, { "epoch": 0.03409226367862677, "grad_norm": 3.985443115234375, "learning_rate": 9.99875477888444e-05, "loss": 1.9318, "step": 572 }, { "epoch": 0.03421146739778281, "grad_norm": 3.8978285789489746, "learning_rate": 9.998733584906726e-05, "loss": 1.9347, "step": 574 }, { "epoch": 0.03433067111693885, "grad_norm": 4.048766613006592, "learning_rate": 9.998712212107205e-05, "loss": 1.8802, "step": 576 }, { "epoch": 0.034449874836094886, "grad_norm": 3.9987893104553223, "learning_rate": 9.998690660486647e-05, "loss": 1.8155, "step": 578 }, { "epoch": 0.034569078555250925, "grad_norm": 3.7978057861328125, "learning_rate": 9.99866893004582e-05, "loss": 1.8257, "step": 580 }, { "epoch": 0.03468828227440696, "grad_norm": 4.041508674621582, "learning_rate": 9.998647020785503e-05, "loss": 2.018, "step": 582 }, { "epoch": 0.034807485993563, "grad_norm": 4.477261543273926, "learning_rate": 9.99862493270648e-05, "loss": 1.9215, "step": 584 }, { "epoch": 0.03492668971271903, "grad_norm": 3.855879068374634, "learning_rate": 9.99860266580954e-05, "loss": 2.0123, "step": 586 }, { "epoch": 0.03504589343187507, "grad_norm": 3.653276205062866, "learning_rate": 9.998580220095482e-05, "loss": 1.9389, "step": 588 }, { "epoch": 0.03516509715103111, "grad_norm": 3.664243698120117, "learning_rate": 9.998557595565105e-05, "loss": 1.8671, "step": 590 }, { "epoch": 0.03528430087018715, "grad_norm": 3.936702013015747, "learning_rate": 9.998534792219221e-05, "loss": 1.9632, "step": 592 }, { "epoch": 0.03540350458934319, "grad_norm": 3.733975648880005, "learning_rate": 9.998511810058644e-05, "loss": 1.858, "step": 594 }, { "epoch": 0.035522708308499226, "grad_norm": 3.9623677730560303, "learning_rate": 9.9984886490842e-05, "loss": 1.8401, "step": 596 }, { "epoch": 0.035641912027655265, "grad_norm": 3.8693408966064453, "learning_rate": 9.998465309296711e-05, "loss": 1.9721, "step": 598 }, { "epoch": 0.0357611157468113, "grad_norm": 4.1588592529296875, "learning_rate": 9.99844179069702e-05, "loss": 1.8468, "step": 600 }, { "epoch": 0.035880319465967335, "grad_norm": 3.67008113861084, "learning_rate": 9.998418093285962e-05, "loss": 1.9686, "step": 602 }, { "epoch": 0.035999523185123374, "grad_norm": 3.765162229537964, "learning_rate": 9.998394217064387e-05, "loss": 1.7751, "step": 604 }, { "epoch": 0.03611872690427941, "grad_norm": 4.207659721374512, "learning_rate": 9.998370162033148e-05, "loss": 2.009, "step": 606 }, { "epoch": 0.03623793062343545, "grad_norm": 3.963036060333252, "learning_rate": 9.998345928193108e-05, "loss": 1.9777, "step": 608 }, { "epoch": 0.03635713434259149, "grad_norm": 3.9007999897003174, "learning_rate": 9.998321515545132e-05, "loss": 1.9073, "step": 610 }, { "epoch": 0.03647633806174753, "grad_norm": 3.6975529193878174, "learning_rate": 9.998296924090095e-05, "loss": 1.8315, "step": 612 }, { "epoch": 0.036595541780903566, "grad_norm": 3.9848759174346924, "learning_rate": 9.998272153828873e-05, "loss": 1.7704, "step": 614 }, { "epoch": 0.036714745500059605, "grad_norm": 3.7332348823547363, "learning_rate": 9.998247204762357e-05, "loss": 1.7354, "step": 616 }, { "epoch": 0.036833949219215636, "grad_norm": 4.034224987030029, "learning_rate": 9.998222076891437e-05, "loss": 1.9241, "step": 618 }, { "epoch": 0.036953152938371675, "grad_norm": 3.8335888385772705, "learning_rate": 9.998196770217012e-05, "loss": 1.8793, "step": 620 }, { "epoch": 0.037072356657527714, "grad_norm": 4.0255045890808105, "learning_rate": 9.998171284739986e-05, "loss": 1.8685, "step": 622 }, { "epoch": 0.03719156037668375, "grad_norm": 3.929762601852417, "learning_rate": 9.998145620461273e-05, "loss": 1.9563, "step": 624 }, { "epoch": 0.03731076409583979, "grad_norm": 3.7175371646881104, "learning_rate": 9.99811977738179e-05, "loss": 1.9196, "step": 626 }, { "epoch": 0.03742996781499583, "grad_norm": 3.9324283599853516, "learning_rate": 9.998093755502463e-05, "loss": 1.7888, "step": 628 }, { "epoch": 0.03754917153415187, "grad_norm": 3.446769952774048, "learning_rate": 9.99806755482422e-05, "loss": 1.8325, "step": 630 }, { "epoch": 0.037668375253307906, "grad_norm": 3.537858247756958, "learning_rate": 9.998041175348e-05, "loss": 1.7931, "step": 632 }, { "epoch": 0.03778757897246394, "grad_norm": 3.346661329269409, "learning_rate": 9.998014617074746e-05, "loss": 1.8449, "step": 634 }, { "epoch": 0.03790678269161998, "grad_norm": 4.0200419425964355, "learning_rate": 9.99798788000541e-05, "loss": 1.9551, "step": 636 }, { "epoch": 0.038025986410776015, "grad_norm": 4.163057804107666, "learning_rate": 9.997960964140947e-05, "loss": 1.8472, "step": 638 }, { "epoch": 0.038145190129932054, "grad_norm": 3.9334471225738525, "learning_rate": 9.997933869482319e-05, "loss": 1.862, "step": 640 }, { "epoch": 0.03826439384908809, "grad_norm": 3.604588270187378, "learning_rate": 9.997906596030498e-05, "loss": 1.9827, "step": 642 }, { "epoch": 0.03838359756824413, "grad_norm": 3.863837957382202, "learning_rate": 9.997879143786456e-05, "loss": 1.8988, "step": 644 }, { "epoch": 0.03850280128740017, "grad_norm": 3.965510129928589, "learning_rate": 9.997851512751177e-05, "loss": 2.0578, "step": 646 }, { "epoch": 0.03862200500655621, "grad_norm": 3.8054611682891846, "learning_rate": 9.99782370292565e-05, "loss": 1.976, "step": 648 }, { "epoch": 0.03874120872571224, "grad_norm": 3.6208248138427734, "learning_rate": 9.997795714310869e-05, "loss": 1.8175, "step": 650 }, { "epoch": 0.03886041244486828, "grad_norm": 3.6987156867980957, "learning_rate": 9.997767546907837e-05, "loss": 1.7983, "step": 652 }, { "epoch": 0.03897961616402432, "grad_norm": 4.122995853424072, "learning_rate": 9.997739200717559e-05, "loss": 1.8905, "step": 654 }, { "epoch": 0.039098819883180355, "grad_norm": 3.7570419311523438, "learning_rate": 9.99771067574105e-05, "loss": 1.8945, "step": 656 }, { "epoch": 0.039218023602336394, "grad_norm": 3.9117379188537598, "learning_rate": 9.99768197197933e-05, "loss": 1.9658, "step": 658 }, { "epoch": 0.03933722732149243, "grad_norm": 3.4835267066955566, "learning_rate": 9.997653089433428e-05, "loss": 1.7373, "step": 660 }, { "epoch": 0.03945643104064847, "grad_norm": 3.7922916412353516, "learning_rate": 9.997624028104375e-05, "loss": 1.8872, "step": 662 }, { "epoch": 0.0395756347598045, "grad_norm": 4.020468711853027, "learning_rate": 9.997594787993211e-05, "loss": 1.8308, "step": 664 }, { "epoch": 0.03969483847896054, "grad_norm": 4.367739677429199, "learning_rate": 9.997565369100983e-05, "loss": 1.9742, "step": 666 }, { "epoch": 0.03981404219811658, "grad_norm": 3.851945638656616, "learning_rate": 9.997535771428742e-05, "loss": 1.9222, "step": 668 }, { "epoch": 0.03993324591727262, "grad_norm": 3.8889737129211426, "learning_rate": 9.997505994977548e-05, "loss": 1.9163, "step": 670 }, { "epoch": 0.04005244963642866, "grad_norm": 4.18920373916626, "learning_rate": 9.997476039748466e-05, "loss": 1.9672, "step": 672 }, { "epoch": 0.040171653355584695, "grad_norm": 3.826341152191162, "learning_rate": 9.997445905742567e-05, "loss": 1.7877, "step": 674 }, { "epoch": 0.040290857074740734, "grad_norm": 3.8386824131011963, "learning_rate": 9.99741559296093e-05, "loss": 1.7206, "step": 676 }, { "epoch": 0.04041006079389677, "grad_norm": 4.40521764755249, "learning_rate": 9.997385101404638e-05, "loss": 1.9755, "step": 678 }, { "epoch": 0.040529264513052804, "grad_norm": 3.651841163635254, "learning_rate": 9.997354431074784e-05, "loss": 1.7704, "step": 680 }, { "epoch": 0.04064846823220884, "grad_norm": 3.867647171020508, "learning_rate": 9.997323581972461e-05, "loss": 1.8241, "step": 682 }, { "epoch": 0.04076767195136488, "grad_norm": 3.76701283454895, "learning_rate": 9.997292554098779e-05, "loss": 1.8484, "step": 684 }, { "epoch": 0.04088687567052092, "grad_norm": 3.934725522994995, "learning_rate": 9.997261347454841e-05, "loss": 1.9475, "step": 686 }, { "epoch": 0.04100607938967696, "grad_norm": 3.898554563522339, "learning_rate": 9.997229962041768e-05, "loss": 2.0182, "step": 688 }, { "epoch": 0.041125283108833, "grad_norm": 3.7962496280670166, "learning_rate": 9.997198397860681e-05, "loss": 1.8032, "step": 690 }, { "epoch": 0.041244486827989035, "grad_norm": 3.613719940185547, "learning_rate": 9.997166654912711e-05, "loss": 1.8542, "step": 692 }, { "epoch": 0.041363690547145074, "grad_norm": 3.71389102935791, "learning_rate": 9.997134733198989e-05, "loss": 1.7995, "step": 694 }, { "epoch": 0.041482894266301105, "grad_norm": 3.9173388481140137, "learning_rate": 9.997102632720663e-05, "loss": 1.8936, "step": 696 }, { "epoch": 0.041602097985457144, "grad_norm": 3.7893924713134766, "learning_rate": 9.997070353478878e-05, "loss": 1.8157, "step": 698 }, { "epoch": 0.04172130170461318, "grad_norm": 3.657656192779541, "learning_rate": 9.997037895474788e-05, "loss": 1.8777, "step": 700 }, { "epoch": 0.04184050542376922, "grad_norm": 4.204029560089111, "learning_rate": 9.997005258709556e-05, "loss": 1.8848, "step": 702 }, { "epoch": 0.04195970914292526, "grad_norm": 4.150764465332031, "learning_rate": 9.99697244318435e-05, "loss": 1.9288, "step": 704 }, { "epoch": 0.0420789128620813, "grad_norm": 3.922637939453125, "learning_rate": 9.996939448900341e-05, "loss": 1.9001, "step": 706 }, { "epoch": 0.04219811658123734, "grad_norm": 3.6897096633911133, "learning_rate": 9.996906275858711e-05, "loss": 1.8096, "step": 708 }, { "epoch": 0.042317320300393375, "grad_norm": 3.906731605529785, "learning_rate": 9.996872924060647e-05, "loss": 2.0527, "step": 710 }, { "epoch": 0.04243652401954941, "grad_norm": 3.814807891845703, "learning_rate": 9.996839393507342e-05, "loss": 1.8374, "step": 712 }, { "epoch": 0.042555727738705446, "grad_norm": 3.8117659091949463, "learning_rate": 9.996805684199997e-05, "loss": 1.8369, "step": 714 }, { "epoch": 0.042674931457861484, "grad_norm": 3.3465750217437744, "learning_rate": 9.996771796139814e-05, "loss": 1.7518, "step": 716 }, { "epoch": 0.04279413517701752, "grad_norm": 4.134846210479736, "learning_rate": 9.99673772932801e-05, "loss": 1.9011, "step": 718 }, { "epoch": 0.04291333889617356, "grad_norm": 4.281081199645996, "learning_rate": 9.996703483765799e-05, "loss": 1.9606, "step": 720 }, { "epoch": 0.0430325426153296, "grad_norm": 3.7926313877105713, "learning_rate": 9.996669059454409e-05, "loss": 1.8251, "step": 722 }, { "epoch": 0.04315174633448564, "grad_norm": 3.68916392326355, "learning_rate": 9.99663445639507e-05, "loss": 1.8347, "step": 724 }, { "epoch": 0.04327095005364168, "grad_norm": 4.421775817871094, "learning_rate": 9.996599674589022e-05, "loss": 1.9194, "step": 726 }, { "epoch": 0.04339015377279771, "grad_norm": 3.7389602661132812, "learning_rate": 9.996564714037508e-05, "loss": 1.9651, "step": 728 }, { "epoch": 0.04350935749195375, "grad_norm": 3.9194064140319824, "learning_rate": 9.99652957474178e-05, "loss": 1.8898, "step": 730 }, { "epoch": 0.043628561211109786, "grad_norm": 3.86000657081604, "learning_rate": 9.99649425670309e-05, "loss": 1.8009, "step": 732 }, { "epoch": 0.043747764930265824, "grad_norm": 3.8095295429229736, "learning_rate": 9.996458759922708e-05, "loss": 1.8679, "step": 734 }, { "epoch": 0.04386696864942186, "grad_norm": 3.9861080646514893, "learning_rate": 9.9964230844019e-05, "loss": 1.9085, "step": 736 }, { "epoch": 0.0439861723685779, "grad_norm": 3.5536251068115234, "learning_rate": 9.996387230141944e-05, "loss": 1.7742, "step": 738 }, { "epoch": 0.04410537608773394, "grad_norm": 3.8289072513580322, "learning_rate": 9.99635119714412e-05, "loss": 1.8445, "step": 740 }, { "epoch": 0.04422457980688998, "grad_norm": 3.687739133834839, "learning_rate": 9.996314985409721e-05, "loss": 1.9523, "step": 742 }, { "epoch": 0.04434378352604601, "grad_norm": 3.773627281188965, "learning_rate": 9.996278594940039e-05, "loss": 1.9188, "step": 744 }, { "epoch": 0.04446298724520205, "grad_norm": 3.4975404739379883, "learning_rate": 9.996242025736377e-05, "loss": 1.8382, "step": 746 }, { "epoch": 0.04458219096435809, "grad_norm": 3.71069073677063, "learning_rate": 9.996205277800044e-05, "loss": 1.8684, "step": 748 }, { "epoch": 0.044701394683514126, "grad_norm": 4.074552536010742, "learning_rate": 9.996168351132352e-05, "loss": 1.9744, "step": 750 }, { "epoch": 0.044820598402670164, "grad_norm": 3.6034059524536133, "learning_rate": 9.996131245734625e-05, "loss": 1.6966, "step": 752 }, { "epoch": 0.0449398021218262, "grad_norm": 3.566887855529785, "learning_rate": 9.99609396160819e-05, "loss": 1.8389, "step": 754 }, { "epoch": 0.04505900584098224, "grad_norm": 3.527583599090576, "learning_rate": 9.996056498754381e-05, "loss": 1.6927, "step": 756 }, { "epoch": 0.04517820956013827, "grad_norm": 3.5187318325042725, "learning_rate": 9.996018857174533e-05, "loss": 1.872, "step": 758 }, { "epoch": 0.04529741327929431, "grad_norm": 4.067770004272461, "learning_rate": 9.995981036870001e-05, "loss": 1.9225, "step": 760 }, { "epoch": 0.04541661699845035, "grad_norm": 3.694514751434326, "learning_rate": 9.995943037842132e-05, "loss": 1.9903, "step": 762 }, { "epoch": 0.04553582071760639, "grad_norm": 3.5118587017059326, "learning_rate": 9.995904860092288e-05, "loss": 1.8263, "step": 764 }, { "epoch": 0.04565502443676243, "grad_norm": 3.7378005981445312, "learning_rate": 9.995866503621834e-05, "loss": 1.8798, "step": 766 }, { "epoch": 0.045774228155918466, "grad_norm": 3.9164252281188965, "learning_rate": 9.99582796843214e-05, "loss": 1.7218, "step": 768 }, { "epoch": 0.045893431875074504, "grad_norm": 3.647066354751587, "learning_rate": 9.995789254524587e-05, "loss": 1.8104, "step": 770 }, { "epoch": 0.04601263559423054, "grad_norm": 3.614243984222412, "learning_rate": 9.995750361900561e-05, "loss": 1.8769, "step": 772 }, { "epoch": 0.046131839313386575, "grad_norm": 3.5727996826171875, "learning_rate": 9.995711290561451e-05, "loss": 1.7465, "step": 774 }, { "epoch": 0.04625104303254261, "grad_norm": 3.378617525100708, "learning_rate": 9.995672040508655e-05, "loss": 1.7697, "step": 776 }, { "epoch": 0.04637024675169865, "grad_norm": 4.0992751121521, "learning_rate": 9.99563261174358e-05, "loss": 1.8911, "step": 778 }, { "epoch": 0.04648945047085469, "grad_norm": 4.2164225578308105, "learning_rate": 9.995593004267631e-05, "loss": 1.8917, "step": 780 }, { "epoch": 0.04660865419001073, "grad_norm": 4.017842769622803, "learning_rate": 9.995553218082229e-05, "loss": 1.8643, "step": 782 }, { "epoch": 0.04672785790916677, "grad_norm": 3.7039167881011963, "learning_rate": 9.995513253188797e-05, "loss": 1.8071, "step": 784 }, { "epoch": 0.046847061628322806, "grad_norm": 3.6898577213287354, "learning_rate": 9.995473109588763e-05, "loss": 1.8414, "step": 786 }, { "epoch": 0.046966265347478844, "grad_norm": 3.8253822326660156, "learning_rate": 9.995432787283565e-05, "loss": 1.897, "step": 788 }, { "epoch": 0.047085469066634876, "grad_norm": 4.436020374298096, "learning_rate": 9.995392286274642e-05, "loss": 1.8466, "step": 790 }, { "epoch": 0.047204672785790915, "grad_norm": 3.691279888153076, "learning_rate": 9.995351606563449e-05, "loss": 1.7705, "step": 792 }, { "epoch": 0.04732387650494695, "grad_norm": 3.701401472091675, "learning_rate": 9.995310748151437e-05, "loss": 1.7921, "step": 794 }, { "epoch": 0.04744308022410299, "grad_norm": 3.548257350921631, "learning_rate": 9.995269711040068e-05, "loss": 1.7781, "step": 796 }, { "epoch": 0.04756228394325903, "grad_norm": 3.6858410835266113, "learning_rate": 9.995228495230808e-05, "loss": 1.857, "step": 798 }, { "epoch": 0.04768148766241507, "grad_norm": 3.3474960327148438, "learning_rate": 9.995187100725136e-05, "loss": 1.7365, "step": 800 }, { "epoch": 0.04780069138157111, "grad_norm": 3.5009377002716064, "learning_rate": 9.99514552752453e-05, "loss": 1.7881, "step": 802 }, { "epoch": 0.047919895100727146, "grad_norm": 4.080969333648682, "learning_rate": 9.995103775630476e-05, "loss": 1.8294, "step": 804 }, { "epoch": 0.04803909881988318, "grad_norm": 3.835939884185791, "learning_rate": 9.995061845044472e-05, "loss": 1.8197, "step": 806 }, { "epoch": 0.048158302539039216, "grad_norm": 3.813527822494507, "learning_rate": 9.995019735768014e-05, "loss": 1.777, "step": 808 }, { "epoch": 0.048277506258195255, "grad_norm": 3.9161291122436523, "learning_rate": 9.99497744780261e-05, "loss": 1.8329, "step": 810 }, { "epoch": 0.04839670997735129, "grad_norm": 3.9254884719848633, "learning_rate": 9.994934981149772e-05, "loss": 1.9529, "step": 812 }, { "epoch": 0.04851591369650733, "grad_norm": 3.522062301635742, "learning_rate": 9.994892335811021e-05, "loss": 1.7565, "step": 814 }, { "epoch": 0.04863511741566337, "grad_norm": 3.8852336406707764, "learning_rate": 9.994849511787881e-05, "loss": 1.8815, "step": 816 }, { "epoch": 0.04875432113481941, "grad_norm": 3.971338987350464, "learning_rate": 9.994806509081884e-05, "loss": 1.8765, "step": 818 }, { "epoch": 0.04887352485397545, "grad_norm": 3.4861109256744385, "learning_rate": 9.994763327694569e-05, "loss": 1.8983, "step": 820 }, { "epoch": 0.04899272857313148, "grad_norm": 3.403838634490967, "learning_rate": 9.99471996762748e-05, "loss": 1.7268, "step": 822 }, { "epoch": 0.04911193229228752, "grad_norm": 3.697918176651001, "learning_rate": 9.994676428882168e-05, "loss": 1.88, "step": 824 }, { "epoch": 0.049231136011443556, "grad_norm": 3.857123613357544, "learning_rate": 9.994632711460192e-05, "loss": 1.7663, "step": 826 }, { "epoch": 0.049350339730599595, "grad_norm": 3.8153762817382812, "learning_rate": 9.994588815363114e-05, "loss": 1.9672, "step": 828 }, { "epoch": 0.04946954344975563, "grad_norm": 3.2994906902313232, "learning_rate": 9.994544740592506e-05, "loss": 1.7124, "step": 830 }, { "epoch": 0.04958874716891167, "grad_norm": 3.7279748916625977, "learning_rate": 9.994500487149945e-05, "loss": 1.8864, "step": 832 }, { "epoch": 0.04970795088806771, "grad_norm": 3.7360031604766846, "learning_rate": 9.994456055037011e-05, "loss": 2.0099, "step": 834 }, { "epoch": 0.04982715460722374, "grad_norm": 3.575025796890259, "learning_rate": 9.994411444255297e-05, "loss": 1.9056, "step": 836 }, { "epoch": 0.04994635832637978, "grad_norm": 3.97017502784729, "learning_rate": 9.994366654806398e-05, "loss": 1.9719, "step": 838 }, { "epoch": 0.05006556204553582, "grad_norm": 3.485664129257202, "learning_rate": 9.994321686691915e-05, "loss": 1.8353, "step": 840 }, { "epoch": 0.05018476576469186, "grad_norm": 3.8267781734466553, "learning_rate": 9.994276539913456e-05, "loss": 1.7878, "step": 842 }, { "epoch": 0.050303969483847896, "grad_norm": 3.779309034347534, "learning_rate": 9.994231214472638e-05, "loss": 1.7437, "step": 844 }, { "epoch": 0.050423173203003935, "grad_norm": 3.966761350631714, "learning_rate": 9.994185710371082e-05, "loss": 1.8075, "step": 846 }, { "epoch": 0.05054237692215997, "grad_norm": 3.594508171081543, "learning_rate": 9.994140027610417e-05, "loss": 1.9554, "step": 848 }, { "epoch": 0.05066158064131601, "grad_norm": 3.6486332416534424, "learning_rate": 9.994094166192274e-05, "loss": 1.8938, "step": 850 }, { "epoch": 0.050780784360472044, "grad_norm": 3.3717823028564453, "learning_rate": 9.994048126118296e-05, "loss": 1.7696, "step": 852 }, { "epoch": 0.05089998807962808, "grad_norm": 3.659984827041626, "learning_rate": 9.994001907390131e-05, "loss": 1.9393, "step": 854 }, { "epoch": 0.05101919179878412, "grad_norm": 3.4614334106445312, "learning_rate": 9.99395551000943e-05, "loss": 1.6926, "step": 856 }, { "epoch": 0.05113839551794016, "grad_norm": 4.0653910636901855, "learning_rate": 9.993908933977854e-05, "loss": 1.8882, "step": 858 }, { "epoch": 0.0512575992370962, "grad_norm": 3.571371078491211, "learning_rate": 9.993862179297069e-05, "loss": 1.8311, "step": 860 }, { "epoch": 0.051376802956252236, "grad_norm": 4.019344806671143, "learning_rate": 9.993815245968747e-05, "loss": 1.9202, "step": 862 }, { "epoch": 0.051496006675408275, "grad_norm": 3.553947687149048, "learning_rate": 9.993768133994567e-05, "loss": 1.8766, "step": 864 }, { "epoch": 0.05161521039456431, "grad_norm": 3.9357428550720215, "learning_rate": 9.993720843376217e-05, "loss": 1.9242, "step": 866 }, { "epoch": 0.051734414113720345, "grad_norm": 3.5692825317382812, "learning_rate": 9.993673374115384e-05, "loss": 1.7867, "step": 868 }, { "epoch": 0.051853617832876384, "grad_norm": 3.616994857788086, "learning_rate": 9.993625726213771e-05, "loss": 1.757, "step": 870 }, { "epoch": 0.05197282155203242, "grad_norm": 3.72725510597229, "learning_rate": 9.99357789967308e-05, "loss": 1.793, "step": 872 }, { "epoch": 0.05209202527118846, "grad_norm": 3.3693716526031494, "learning_rate": 9.993529894495021e-05, "loss": 1.8274, "step": 874 }, { "epoch": 0.0522112289903445, "grad_norm": 3.5625436305999756, "learning_rate": 9.993481710681313e-05, "loss": 1.7243, "step": 876 }, { "epoch": 0.05233043270950054, "grad_norm": 3.9876627922058105, "learning_rate": 9.993433348233678e-05, "loss": 1.8309, "step": 878 }, { "epoch": 0.052449636428656576, "grad_norm": 3.5387027263641357, "learning_rate": 9.99338480715385e-05, "loss": 1.7539, "step": 880 }, { "epoch": 0.052568840147812615, "grad_norm": 3.562152624130249, "learning_rate": 9.99333608744356e-05, "loss": 1.7705, "step": 882 }, { "epoch": 0.05268804386696865, "grad_norm": 3.401320457458496, "learning_rate": 9.993287189104556e-05, "loss": 1.6985, "step": 884 }, { "epoch": 0.052807247586124685, "grad_norm": 3.529999256134033, "learning_rate": 9.993238112138583e-05, "loss": 1.8182, "step": 886 }, { "epoch": 0.052926451305280724, "grad_norm": 3.926300048828125, "learning_rate": 9.9931888565474e-05, "loss": 1.8697, "step": 888 }, { "epoch": 0.05304565502443676, "grad_norm": 3.303511381149292, "learning_rate": 9.993139422332766e-05, "loss": 1.8213, "step": 890 }, { "epoch": 0.0531648587435928, "grad_norm": 3.819309711456299, "learning_rate": 9.993089809496453e-05, "loss": 1.873, "step": 892 }, { "epoch": 0.05328406246274884, "grad_norm": 4.1472554206848145, "learning_rate": 9.993040018040231e-05, "loss": 1.9204, "step": 894 }, { "epoch": 0.05340326618190488, "grad_norm": 3.6811506748199463, "learning_rate": 9.992990047965887e-05, "loss": 1.8029, "step": 896 }, { "epoch": 0.053522469901060916, "grad_norm": 3.7124621868133545, "learning_rate": 9.992939899275204e-05, "loss": 1.8154, "step": 898 }, { "epoch": 0.05364167362021695, "grad_norm": 3.735851526260376, "learning_rate": 9.992889571969978e-05, "loss": 1.8408, "step": 900 }, { "epoch": 0.05376087733937299, "grad_norm": 3.6281087398529053, "learning_rate": 9.992839066052008e-05, "loss": 1.8678, "step": 902 }, { "epoch": 0.053880081058529025, "grad_norm": 3.6980764865875244, "learning_rate": 9.992788381523104e-05, "loss": 1.9158, "step": 904 }, { "epoch": 0.053999284777685064, "grad_norm": 3.967695713043213, "learning_rate": 9.992737518385076e-05, "loss": 1.8351, "step": 906 }, { "epoch": 0.0541184884968411, "grad_norm": 3.925257682800293, "learning_rate": 9.992686476639745e-05, "loss": 1.8789, "step": 908 }, { "epoch": 0.05423769221599714, "grad_norm": 3.7681806087493896, "learning_rate": 9.992635256288936e-05, "loss": 1.7656, "step": 910 }, { "epoch": 0.05435689593515318, "grad_norm": 3.802093267440796, "learning_rate": 9.992583857334481e-05, "loss": 1.7965, "step": 912 }, { "epoch": 0.05447609965430921, "grad_norm": 3.676847457885742, "learning_rate": 9.99253227977822e-05, "loss": 1.8855, "step": 914 }, { "epoch": 0.05459530337346525, "grad_norm": 3.3771257400512695, "learning_rate": 9.992480523621999e-05, "loss": 1.7397, "step": 916 }, { "epoch": 0.05471450709262129, "grad_norm": 3.88736629486084, "learning_rate": 9.992428588867667e-05, "loss": 1.8386, "step": 918 }, { "epoch": 0.05483371081177733, "grad_norm": 3.5050785541534424, "learning_rate": 9.992376475517081e-05, "loss": 1.7935, "step": 920 }, { "epoch": 0.054952914530933365, "grad_norm": 4.313678741455078, "learning_rate": 9.99232418357211e-05, "loss": 1.9285, "step": 922 }, { "epoch": 0.055072118250089404, "grad_norm": 4.013608455657959, "learning_rate": 9.99227171303462e-05, "loss": 1.8951, "step": 924 }, { "epoch": 0.05519132196924544, "grad_norm": 3.541102647781372, "learning_rate": 9.992219063906491e-05, "loss": 1.8487, "step": 926 }, { "epoch": 0.05531052568840148, "grad_norm": 3.809490919113159, "learning_rate": 9.992166236189605e-05, "loss": 2.0524, "step": 928 }, { "epoch": 0.05542972940755751, "grad_norm": 3.7029149532318115, "learning_rate": 9.992113229885852e-05, "loss": 1.9346, "step": 930 }, { "epoch": 0.05554893312671355, "grad_norm": 3.8153576850891113, "learning_rate": 9.992060044997128e-05, "loss": 1.9552, "step": 932 }, { "epoch": 0.05566813684586959, "grad_norm": 4.296749114990234, "learning_rate": 9.992006681525338e-05, "loss": 1.7736, "step": 934 }, { "epoch": 0.05578734056502563, "grad_norm": 3.9364700317382812, "learning_rate": 9.991953139472388e-05, "loss": 1.8532, "step": 936 }, { "epoch": 0.05590654428418167, "grad_norm": 3.9413812160491943, "learning_rate": 9.991899418840193e-05, "loss": 1.8589, "step": 938 }, { "epoch": 0.056025748003337705, "grad_norm": 3.272742986679077, "learning_rate": 9.991845519630678e-05, "loss": 1.7166, "step": 940 }, { "epoch": 0.056144951722493744, "grad_norm": 3.5168800354003906, "learning_rate": 9.99179144184577e-05, "loss": 1.7771, "step": 942 }, { "epoch": 0.05626415544164978, "grad_norm": 3.864030361175537, "learning_rate": 9.991737185487403e-05, "loss": 1.8549, "step": 944 }, { "epoch": 0.056383359160805814, "grad_norm": 3.8456621170043945, "learning_rate": 9.991682750557515e-05, "loss": 1.7874, "step": 946 }, { "epoch": 0.05650256287996185, "grad_norm": 3.9575977325439453, "learning_rate": 9.991628137058057e-05, "loss": 1.7603, "step": 948 }, { "epoch": 0.05662176659911789, "grad_norm": 3.593646764755249, "learning_rate": 9.991573344990984e-05, "loss": 1.7789, "step": 950 }, { "epoch": 0.05674097031827393, "grad_norm": 3.555929183959961, "learning_rate": 9.991518374358252e-05, "loss": 1.7524, "step": 952 }, { "epoch": 0.05686017403742997, "grad_norm": 3.7276995182037354, "learning_rate": 9.991463225161831e-05, "loss": 1.8145, "step": 954 }, { "epoch": 0.05697937775658601, "grad_norm": 3.6785635948181152, "learning_rate": 9.991407897403691e-05, "loss": 1.871, "step": 956 }, { "epoch": 0.057098581475742045, "grad_norm": 3.703866720199585, "learning_rate": 9.991352391085811e-05, "loss": 1.7782, "step": 958 }, { "epoch": 0.057217785194898084, "grad_norm": 3.687192440032959, "learning_rate": 9.991296706210181e-05, "loss": 1.7882, "step": 960 }, { "epoch": 0.057336988914054116, "grad_norm": 3.9526002407073975, "learning_rate": 9.99124084277879e-05, "loss": 1.834, "step": 962 }, { "epoch": 0.057456192633210154, "grad_norm": 3.62290096282959, "learning_rate": 9.991184800793636e-05, "loss": 1.8968, "step": 964 }, { "epoch": 0.05757539635236619, "grad_norm": 3.7782137393951416, "learning_rate": 9.991128580256724e-05, "loss": 1.9465, "step": 966 }, { "epoch": 0.05769460007152223, "grad_norm": 4.29383659362793, "learning_rate": 9.991072181170067e-05, "loss": 1.9567, "step": 968 }, { "epoch": 0.05781380379067827, "grad_norm": 4.026127338409424, "learning_rate": 9.99101560353568e-05, "loss": 1.9486, "step": 970 }, { "epoch": 0.05793300750983431, "grad_norm": 3.5634799003601074, "learning_rate": 9.990958847355589e-05, "loss": 1.7063, "step": 972 }, { "epoch": 0.05805221122899035, "grad_norm": 3.2677130699157715, "learning_rate": 9.990901912631825e-05, "loss": 1.7335, "step": 974 }, { "epoch": 0.058171414948146385, "grad_norm": 3.6377756595611572, "learning_rate": 9.990844799366421e-05, "loss": 1.7514, "step": 976 }, { "epoch": 0.05829061866730242, "grad_norm": 3.6083273887634277, "learning_rate": 9.990787507561424e-05, "loss": 1.8575, "step": 978 }, { "epoch": 0.058409822386458456, "grad_norm": 3.2262775897979736, "learning_rate": 9.990730037218882e-05, "loss": 1.7294, "step": 980 }, { "epoch": 0.058529026105614494, "grad_norm": 3.46047043800354, "learning_rate": 9.99067238834085e-05, "loss": 1.7746, "step": 982 }, { "epoch": 0.05864822982477053, "grad_norm": 4.813882350921631, "learning_rate": 9.990614560929392e-05, "loss": 1.7416, "step": 984 }, { "epoch": 0.05876743354392657, "grad_norm": 3.699923515319824, "learning_rate": 9.990556554986577e-05, "loss": 1.7607, "step": 986 }, { "epoch": 0.05888663726308261, "grad_norm": 3.4346272945404053, "learning_rate": 9.990498370514478e-05, "loss": 1.7349, "step": 988 }, { "epoch": 0.05900584098223865, "grad_norm": 3.622134208679199, "learning_rate": 9.990440007515176e-05, "loss": 1.8268, "step": 990 }, { "epoch": 0.05912504470139468, "grad_norm": 3.656534194946289, "learning_rate": 9.990381465990762e-05, "loss": 1.8283, "step": 992 }, { "epoch": 0.05924424842055072, "grad_norm": 3.869019031524658, "learning_rate": 9.990322745943328e-05, "loss": 1.8796, "step": 994 }, { "epoch": 0.05936345213970676, "grad_norm": 3.5469677448272705, "learning_rate": 9.990263847374976e-05, "loss": 1.8488, "step": 996 }, { "epoch": 0.059482655858862796, "grad_norm": 3.8248910903930664, "learning_rate": 9.990204770287811e-05, "loss": 1.8356, "step": 998 }, { "epoch": 0.059601859578018834, "grad_norm": 3.514333486557007, "learning_rate": 9.990145514683948e-05, "loss": 1.8984, "step": 1000 }, { "epoch": 0.05972106329717487, "grad_norm": 3.6628236770629883, "learning_rate": 9.990086080565506e-05, "loss": 1.6875, "step": 1002 }, { "epoch": 0.05984026701633091, "grad_norm": 3.7326371669769287, "learning_rate": 9.990026467934612e-05, "loss": 1.9041, "step": 1004 }, { "epoch": 0.05995947073548695, "grad_norm": 3.6256396770477295, "learning_rate": 9.989966676793398e-05, "loss": 1.9606, "step": 1006 }, { "epoch": 0.06007867445464298, "grad_norm": 3.3569083213806152, "learning_rate": 9.989906707144004e-05, "loss": 1.789, "step": 1008 }, { "epoch": 0.06019787817379902, "grad_norm": 3.516369104385376, "learning_rate": 9.989846558988574e-05, "loss": 1.6888, "step": 1010 }, { "epoch": 0.06031708189295506, "grad_norm": 3.6158735752105713, "learning_rate": 9.989786232329259e-05, "loss": 1.8141, "step": 1012 }, { "epoch": 0.0604362856121111, "grad_norm": 4.084103584289551, "learning_rate": 9.989725727168217e-05, "loss": 1.7988, "step": 1014 }, { "epoch": 0.060555489331267136, "grad_norm": 5.1263275146484375, "learning_rate": 9.989665043507615e-05, "loss": 1.8793, "step": 1016 }, { "epoch": 0.060674693050423174, "grad_norm": 3.5846827030181885, "learning_rate": 9.989604181349624e-05, "loss": 1.7009, "step": 1018 }, { "epoch": 0.06079389676957921, "grad_norm": 4.350070953369141, "learning_rate": 9.989543140696418e-05, "loss": 1.8294, "step": 1020 }, { "epoch": 0.06091310048873525, "grad_norm": 3.414588451385498, "learning_rate": 9.989481921550181e-05, "loss": 1.7753, "step": 1022 }, { "epoch": 0.06103230420789128, "grad_norm": 3.6540069580078125, "learning_rate": 9.989420523913107e-05, "loss": 1.7715, "step": 1024 }, { "epoch": 0.06115150792704732, "grad_norm": 3.8252127170562744, "learning_rate": 9.989358947787389e-05, "loss": 1.6736, "step": 1026 }, { "epoch": 0.06127071164620336, "grad_norm": 3.843416690826416, "learning_rate": 9.989297193175228e-05, "loss": 1.8592, "step": 1028 }, { "epoch": 0.0613899153653594, "grad_norm": 3.635848045349121, "learning_rate": 9.989235260078838e-05, "loss": 1.743, "step": 1030 }, { "epoch": 0.06150911908451544, "grad_norm": 3.7918972969055176, "learning_rate": 9.989173148500432e-05, "loss": 1.9597, "step": 1032 }, { "epoch": 0.061628322803671476, "grad_norm": 3.60603666305542, "learning_rate": 9.989110858442232e-05, "loss": 1.9236, "step": 1034 }, { "epoch": 0.061747526522827514, "grad_norm": 3.318141460418701, "learning_rate": 9.989048389906468e-05, "loss": 1.6342, "step": 1036 }, { "epoch": 0.06186673024198355, "grad_norm": 3.4963765144348145, "learning_rate": 9.988985742895372e-05, "loss": 1.7679, "step": 1038 }, { "epoch": 0.061985933961139585, "grad_norm": 3.5454185009002686, "learning_rate": 9.988922917411185e-05, "loss": 1.6942, "step": 1040 }, { "epoch": 0.06210513768029562, "grad_norm": 3.778968572616577, "learning_rate": 9.988859913456159e-05, "loss": 1.8256, "step": 1042 }, { "epoch": 0.06222434139945166, "grad_norm": 3.735098361968994, "learning_rate": 9.988796731032541e-05, "loss": 1.9177, "step": 1044 }, { "epoch": 0.0623435451186077, "grad_norm": 3.61877703666687, "learning_rate": 9.988733370142598e-05, "loss": 1.8521, "step": 1046 }, { "epoch": 0.06246274883776374, "grad_norm": 3.7278854846954346, "learning_rate": 9.988669830788593e-05, "loss": 1.9146, "step": 1048 }, { "epoch": 0.06258195255691977, "grad_norm": 3.5517444610595703, "learning_rate": 9.988606112972799e-05, "loss": 1.8413, "step": 1050 }, { "epoch": 0.06270115627607581, "grad_norm": 3.8435776233673096, "learning_rate": 9.988542216697495e-05, "loss": 1.8414, "step": 1052 }, { "epoch": 0.06282035999523185, "grad_norm": 3.7683281898498535, "learning_rate": 9.98847814196497e-05, "loss": 1.7264, "step": 1054 }, { "epoch": 0.06293956371438789, "grad_norm": 4.004631996154785, "learning_rate": 9.988413888777512e-05, "loss": 1.7055, "step": 1056 }, { "epoch": 0.06305876743354392, "grad_norm": 3.592968463897705, "learning_rate": 9.988349457137423e-05, "loss": 1.6644, "step": 1058 }, { "epoch": 0.06317797115269996, "grad_norm": 3.209564208984375, "learning_rate": 9.988284847047005e-05, "loss": 1.6262, "step": 1060 }, { "epoch": 0.063297174871856, "grad_norm": 3.2618496417999268, "learning_rate": 9.988220058508572e-05, "loss": 1.92, "step": 1062 }, { "epoch": 0.06341637859101204, "grad_norm": 3.7120251655578613, "learning_rate": 9.988155091524439e-05, "loss": 1.7304, "step": 1064 }, { "epoch": 0.06353558231016808, "grad_norm": 3.237438440322876, "learning_rate": 9.988089946096933e-05, "loss": 1.6457, "step": 1066 }, { "epoch": 0.06365478602932412, "grad_norm": 3.3386600017547607, "learning_rate": 9.988024622228383e-05, "loss": 1.8396, "step": 1068 }, { "epoch": 0.06377398974848016, "grad_norm": 3.235105514526367, "learning_rate": 9.987959119921125e-05, "loss": 1.7075, "step": 1070 }, { "epoch": 0.0638931934676362, "grad_norm": 3.3713080883026123, "learning_rate": 9.987893439177505e-05, "loss": 1.6504, "step": 1072 }, { "epoch": 0.06401239718679223, "grad_norm": 3.8551156520843506, "learning_rate": 9.987827579999869e-05, "loss": 1.8779, "step": 1074 }, { "epoch": 0.06413160090594827, "grad_norm": 3.336139678955078, "learning_rate": 9.987761542390574e-05, "loss": 1.7687, "step": 1076 }, { "epoch": 0.0642508046251043, "grad_norm": 3.6591739654541016, "learning_rate": 9.987695326351986e-05, "loss": 1.9188, "step": 1078 }, { "epoch": 0.06437000834426033, "grad_norm": 3.3875620365142822, "learning_rate": 9.987628931886469e-05, "loss": 1.7101, "step": 1080 }, { "epoch": 0.06448921206341637, "grad_norm": 3.580967426300049, "learning_rate": 9.987562358996401e-05, "loss": 1.8128, "step": 1082 }, { "epoch": 0.06460841578257241, "grad_norm": 3.641923189163208, "learning_rate": 9.987495607684162e-05, "loss": 2.0039, "step": 1084 }, { "epoch": 0.06472761950172845, "grad_norm": 3.2823033332824707, "learning_rate": 9.987428677952141e-05, "loss": 1.6739, "step": 1086 }, { "epoch": 0.06484682322088449, "grad_norm": 3.3813345432281494, "learning_rate": 9.987361569802731e-05, "loss": 1.7168, "step": 1088 }, { "epoch": 0.06496602694004053, "grad_norm": 3.2854068279266357, "learning_rate": 9.987294283238336e-05, "loss": 1.7608, "step": 1090 }, { "epoch": 0.06508523065919657, "grad_norm": 3.6483232975006104, "learning_rate": 9.987226818261358e-05, "loss": 1.7545, "step": 1092 }, { "epoch": 0.0652044343783526, "grad_norm": 3.5432775020599365, "learning_rate": 9.987159174874215e-05, "loss": 1.8473, "step": 1094 }, { "epoch": 0.06532363809750864, "grad_norm": 3.4211320877075195, "learning_rate": 9.987091353079323e-05, "loss": 1.8315, "step": 1096 }, { "epoch": 0.06544284181666468, "grad_norm": 3.4620048999786377, "learning_rate": 9.98702335287911e-05, "loss": 1.7428, "step": 1098 }, { "epoch": 0.06556204553582072, "grad_norm": 3.483276128768921, "learning_rate": 9.986955174276011e-05, "loss": 1.8987, "step": 1100 }, { "epoch": 0.06568124925497676, "grad_norm": 3.771211624145508, "learning_rate": 9.986886817272462e-05, "loss": 1.7877, "step": 1102 }, { "epoch": 0.0658004529741328, "grad_norm": 3.341813802719116, "learning_rate": 9.986818281870909e-05, "loss": 1.721, "step": 1104 }, { "epoch": 0.06591965669328884, "grad_norm": 3.499447822570801, "learning_rate": 9.986749568073803e-05, "loss": 1.8581, "step": 1106 }, { "epoch": 0.06603886041244487, "grad_norm": 3.45731520652771, "learning_rate": 9.986680675883604e-05, "loss": 1.7587, "step": 1108 }, { "epoch": 0.0661580641316009, "grad_norm": 3.719034433364868, "learning_rate": 9.986611605302774e-05, "loss": 1.7599, "step": 1110 }, { "epoch": 0.06627726785075694, "grad_norm": 3.8845632076263428, "learning_rate": 9.986542356333787e-05, "loss": 1.8297, "step": 1112 }, { "epoch": 0.06639647156991298, "grad_norm": 3.710519313812256, "learning_rate": 9.986472928979119e-05, "loss": 1.9252, "step": 1114 }, { "epoch": 0.06651567528906902, "grad_norm": 3.4144201278686523, "learning_rate": 9.986403323241251e-05, "loss": 1.7123, "step": 1116 }, { "epoch": 0.06663487900822505, "grad_norm": 3.5341315269470215, "learning_rate": 9.986333539122677e-05, "loss": 1.8001, "step": 1118 }, { "epoch": 0.06675408272738109, "grad_norm": 3.7582271099090576, "learning_rate": 9.986263576625892e-05, "loss": 1.7659, "step": 1120 }, { "epoch": 0.06687328644653713, "grad_norm": 3.4154446125030518, "learning_rate": 9.986193435753397e-05, "loss": 1.7792, "step": 1122 }, { "epoch": 0.06699249016569317, "grad_norm": 3.4495387077331543, "learning_rate": 9.986123116507704e-05, "loss": 1.8192, "step": 1124 }, { "epoch": 0.06711169388484921, "grad_norm": 3.911012887954712, "learning_rate": 9.986052618891327e-05, "loss": 1.8643, "step": 1126 }, { "epoch": 0.06723089760400525, "grad_norm": 3.9185376167297363, "learning_rate": 9.985981942906787e-05, "loss": 1.7103, "step": 1128 }, { "epoch": 0.06735010132316128, "grad_norm": 3.8976593017578125, "learning_rate": 9.985911088556616e-05, "loss": 1.9389, "step": 1130 }, { "epoch": 0.06746930504231732, "grad_norm": 3.499690294265747, "learning_rate": 9.985840055843345e-05, "loss": 1.9238, "step": 1132 }, { "epoch": 0.06758850876147336, "grad_norm": 3.4203085899353027, "learning_rate": 9.985768844769516e-05, "loss": 1.8087, "step": 1134 }, { "epoch": 0.0677077124806294, "grad_norm": 3.5953428745269775, "learning_rate": 9.985697455337677e-05, "loss": 1.8619, "step": 1136 }, { "epoch": 0.06782691619978544, "grad_norm": 3.7423336505889893, "learning_rate": 9.985625887550381e-05, "loss": 2.0257, "step": 1138 }, { "epoch": 0.06794611991894148, "grad_norm": 3.6879429817199707, "learning_rate": 9.98555414141019e-05, "loss": 1.872, "step": 1140 }, { "epoch": 0.0680653236380975, "grad_norm": 3.6937875747680664, "learning_rate": 9.985482216919669e-05, "loss": 1.8286, "step": 1142 }, { "epoch": 0.06818452735725354, "grad_norm": 3.709442138671875, "learning_rate": 9.985410114081393e-05, "loss": 1.9799, "step": 1144 }, { "epoch": 0.06830373107640958, "grad_norm": 3.8401966094970703, "learning_rate": 9.985337832897937e-05, "loss": 1.6496, "step": 1146 }, { "epoch": 0.06842293479556562, "grad_norm": 3.5123889446258545, "learning_rate": 9.985265373371891e-05, "loss": 1.7221, "step": 1148 }, { "epoch": 0.06854213851472166, "grad_norm": 3.4847071170806885, "learning_rate": 9.985192735505846e-05, "loss": 1.789, "step": 1150 }, { "epoch": 0.0686613422338777, "grad_norm": 3.909097194671631, "learning_rate": 9.985119919302399e-05, "loss": 1.8568, "step": 1152 }, { "epoch": 0.06878054595303373, "grad_norm": 3.6898012161254883, "learning_rate": 9.985046924764157e-05, "loss": 1.8335, "step": 1154 }, { "epoch": 0.06889974967218977, "grad_norm": 3.8115742206573486, "learning_rate": 9.984973751893732e-05, "loss": 1.86, "step": 1156 }, { "epoch": 0.06901895339134581, "grad_norm": 3.4137120246887207, "learning_rate": 9.984900400693737e-05, "loss": 1.8177, "step": 1158 }, { "epoch": 0.06913815711050185, "grad_norm": 3.417830228805542, "learning_rate": 9.9848268711668e-05, "loss": 1.6834, "step": 1160 }, { "epoch": 0.06925736082965789, "grad_norm": 3.5088119506835938, "learning_rate": 9.984753163315552e-05, "loss": 1.8328, "step": 1162 }, { "epoch": 0.06937656454881393, "grad_norm": 3.3067991733551025, "learning_rate": 9.984679277142626e-05, "loss": 1.7713, "step": 1164 }, { "epoch": 0.06949576826796997, "grad_norm": 3.666386365890503, "learning_rate": 9.984605212650669e-05, "loss": 1.9376, "step": 1166 }, { "epoch": 0.069614971987126, "grad_norm": 3.8203041553497314, "learning_rate": 9.984530969842327e-05, "loss": 1.8376, "step": 1168 }, { "epoch": 0.06973417570628204, "grad_norm": 3.838639974594116, "learning_rate": 9.98445654872026e-05, "loss": 1.8972, "step": 1170 }, { "epoch": 0.06985337942543807, "grad_norm": 2.9467432498931885, "learning_rate": 9.984381949287127e-05, "loss": 1.7213, "step": 1172 }, { "epoch": 0.0699725831445941, "grad_norm": 3.7433695793151855, "learning_rate": 9.984307171545597e-05, "loss": 1.7782, "step": 1174 }, { "epoch": 0.07009178686375014, "grad_norm": 3.4375503063201904, "learning_rate": 9.984232215498347e-05, "loss": 1.6845, "step": 1176 }, { "epoch": 0.07021099058290618, "grad_norm": 3.7470409870147705, "learning_rate": 9.984157081148058e-05, "loss": 1.7564, "step": 1178 }, { "epoch": 0.07033019430206222, "grad_norm": 3.5575122833251953, "learning_rate": 9.984081768497415e-05, "loss": 1.7716, "step": 1180 }, { "epoch": 0.07044939802121826, "grad_norm": 3.3193256855010986, "learning_rate": 9.984006277549116e-05, "loss": 1.7145, "step": 1182 }, { "epoch": 0.0705686017403743, "grad_norm": 3.48976469039917, "learning_rate": 9.983930608305857e-05, "loss": 1.8958, "step": 1184 }, { "epoch": 0.07068780545953034, "grad_norm": 3.723472833633423, "learning_rate": 9.983854760770353e-05, "loss": 1.6984, "step": 1186 }, { "epoch": 0.07080700917868638, "grad_norm": 3.605712890625, "learning_rate": 9.983778734945308e-05, "loss": 1.8301, "step": 1188 }, { "epoch": 0.07092621289784241, "grad_norm": 3.6623265743255615, "learning_rate": 9.983702530833448e-05, "loss": 1.7316, "step": 1190 }, { "epoch": 0.07104541661699845, "grad_norm": 3.516927480697632, "learning_rate": 9.983626148437495e-05, "loss": 1.6905, "step": 1192 }, { "epoch": 0.07116462033615449, "grad_norm": 3.5900490283966064, "learning_rate": 9.983549587760183e-05, "loss": 1.8688, "step": 1194 }, { "epoch": 0.07128382405531053, "grad_norm": 3.663727283477783, "learning_rate": 9.983472848804253e-05, "loss": 1.8098, "step": 1196 }, { "epoch": 0.07140302777446657, "grad_norm": 3.5920753479003906, "learning_rate": 9.983395931572449e-05, "loss": 1.7462, "step": 1198 }, { "epoch": 0.0715222314936226, "grad_norm": 3.514986991882324, "learning_rate": 9.98331883606752e-05, "loss": 1.7611, "step": 1200 }, { "epoch": 0.07164143521277865, "grad_norm": 4.374067783355713, "learning_rate": 9.983241562292227e-05, "loss": 1.9222, "step": 1202 }, { "epoch": 0.07176063893193467, "grad_norm": 3.5253233909606934, "learning_rate": 9.983164110249335e-05, "loss": 1.7442, "step": 1204 }, { "epoch": 0.07187984265109071, "grad_norm": 4.039009094238281, "learning_rate": 9.98308647994161e-05, "loss": 1.8445, "step": 1206 }, { "epoch": 0.07199904637024675, "grad_norm": 3.624509811401367, "learning_rate": 9.983008671371834e-05, "loss": 1.6837, "step": 1208 }, { "epoch": 0.07211825008940279, "grad_norm": 3.7301676273345947, "learning_rate": 9.982930684542789e-05, "loss": 1.8225, "step": 1210 }, { "epoch": 0.07223745380855882, "grad_norm": 3.2725844383239746, "learning_rate": 9.982852519457263e-05, "loss": 1.7847, "step": 1212 }, { "epoch": 0.07235665752771486, "grad_norm": 3.4499568939208984, "learning_rate": 9.982774176118055e-05, "loss": 1.78, "step": 1214 }, { "epoch": 0.0724758612468709, "grad_norm": 3.282174825668335, "learning_rate": 9.982695654527965e-05, "loss": 1.7061, "step": 1216 }, { "epoch": 0.07259506496602694, "grad_norm": 3.683845043182373, "learning_rate": 9.982616954689804e-05, "loss": 1.7958, "step": 1218 }, { "epoch": 0.07271426868518298, "grad_norm": 3.6483757495880127, "learning_rate": 9.982538076606388e-05, "loss": 1.7077, "step": 1220 }, { "epoch": 0.07283347240433902, "grad_norm": 3.6903293132781982, "learning_rate": 9.982459020280534e-05, "loss": 1.8683, "step": 1222 }, { "epoch": 0.07295267612349506, "grad_norm": 3.4415857791900635, "learning_rate": 9.982379785715077e-05, "loss": 1.6704, "step": 1224 }, { "epoch": 0.0730718798426511, "grad_norm": 3.7454817295074463, "learning_rate": 9.982300372912847e-05, "loss": 1.744, "step": 1226 }, { "epoch": 0.07319108356180713, "grad_norm": 3.5526676177978516, "learning_rate": 9.982220781876686e-05, "loss": 1.8452, "step": 1228 }, { "epoch": 0.07331028728096317, "grad_norm": 3.5075430870056152, "learning_rate": 9.98214101260944e-05, "loss": 1.7916, "step": 1230 }, { "epoch": 0.07342949100011921, "grad_norm": 3.263101816177368, "learning_rate": 9.982061065113966e-05, "loss": 1.7085, "step": 1232 }, { "epoch": 0.07354869471927525, "grad_norm": 3.312952756881714, "learning_rate": 9.981980939393118e-05, "loss": 1.6782, "step": 1234 }, { "epoch": 0.07366789843843127, "grad_norm": 3.7838549613952637, "learning_rate": 9.98190063544977e-05, "loss": 1.8228, "step": 1236 }, { "epoch": 0.07378710215758731, "grad_norm": 3.847461223602295, "learning_rate": 9.98182015328679e-05, "loss": 1.8246, "step": 1238 }, { "epoch": 0.07390630587674335, "grad_norm": 3.792604446411133, "learning_rate": 9.981739492907056e-05, "loss": 2.0125, "step": 1240 }, { "epoch": 0.07402550959589939, "grad_norm": 3.2333269119262695, "learning_rate": 9.981658654313457e-05, "loss": 1.7052, "step": 1242 }, { "epoch": 0.07414471331505543, "grad_norm": 3.712606430053711, "learning_rate": 9.981577637508883e-05, "loss": 1.918, "step": 1244 }, { "epoch": 0.07426391703421147, "grad_norm": 3.2563059329986572, "learning_rate": 9.981496442496234e-05, "loss": 1.6897, "step": 1246 }, { "epoch": 0.0743831207533675, "grad_norm": 3.1528546810150146, "learning_rate": 9.981415069278411e-05, "loss": 1.7142, "step": 1248 }, { "epoch": 0.07450232447252354, "grad_norm": 3.6652863025665283, "learning_rate": 9.98133351785833e-05, "loss": 1.7622, "step": 1250 }, { "epoch": 0.07462152819167958, "grad_norm": 3.4871182441711426, "learning_rate": 9.981251788238903e-05, "loss": 1.735, "step": 1252 }, { "epoch": 0.07474073191083562, "grad_norm": 4.178586483001709, "learning_rate": 9.981169880423058e-05, "loss": 1.9561, "step": 1254 }, { "epoch": 0.07485993562999166, "grad_norm": 3.3862648010253906, "learning_rate": 9.981087794413721e-05, "loss": 1.7706, "step": 1256 }, { "epoch": 0.0749791393491477, "grad_norm": 3.7896416187286377, "learning_rate": 9.981005530213834e-05, "loss": 1.8424, "step": 1258 }, { "epoch": 0.07509834306830374, "grad_norm": 3.3379554748535156, "learning_rate": 9.980923087826336e-05, "loss": 1.7024, "step": 1260 }, { "epoch": 0.07521754678745977, "grad_norm": 3.3996353149414062, "learning_rate": 9.980840467254176e-05, "loss": 1.8047, "step": 1262 }, { "epoch": 0.07533675050661581, "grad_norm": 3.5422980785369873, "learning_rate": 9.980757668500311e-05, "loss": 1.8684, "step": 1264 }, { "epoch": 0.07545595422577184, "grad_norm": 3.314535617828369, "learning_rate": 9.980674691567704e-05, "loss": 1.677, "step": 1266 }, { "epoch": 0.07557515794492788, "grad_norm": 3.5688118934631348, "learning_rate": 9.980591536459322e-05, "loss": 1.8406, "step": 1268 }, { "epoch": 0.07569436166408391, "grad_norm": 3.203021287918091, "learning_rate": 9.980508203178139e-05, "loss": 1.6707, "step": 1270 }, { "epoch": 0.07581356538323995, "grad_norm": 3.5121963024139404, "learning_rate": 9.980424691727137e-05, "loss": 1.7319, "step": 1272 }, { "epoch": 0.07593276910239599, "grad_norm": 3.731142044067383, "learning_rate": 9.980341002109304e-05, "loss": 1.7058, "step": 1274 }, { "epoch": 0.07605197282155203, "grad_norm": 3.4342949390411377, "learning_rate": 9.980257134327632e-05, "loss": 1.8059, "step": 1276 }, { "epoch": 0.07617117654070807, "grad_norm": 3.491163492202759, "learning_rate": 9.980173088385123e-05, "loss": 1.7887, "step": 1278 }, { "epoch": 0.07629038025986411, "grad_norm": 3.654867649078369, "learning_rate": 9.980088864284785e-05, "loss": 1.8383, "step": 1280 }, { "epoch": 0.07640958397902015, "grad_norm": 3.47200870513916, "learning_rate": 9.980004462029627e-05, "loss": 1.6902, "step": 1282 }, { "epoch": 0.07652878769817618, "grad_norm": 3.6370792388916016, "learning_rate": 9.97991988162267e-05, "loss": 1.6764, "step": 1284 }, { "epoch": 0.07664799141733222, "grad_norm": 3.8195700645446777, "learning_rate": 9.979835123066944e-05, "loss": 1.801, "step": 1286 }, { "epoch": 0.07676719513648826, "grad_norm": 3.9889931678771973, "learning_rate": 9.979750186365473e-05, "loss": 1.7133, "step": 1288 }, { "epoch": 0.0768863988556443, "grad_norm": 4.398395538330078, "learning_rate": 9.979665071521301e-05, "loss": 1.8691, "step": 1290 }, { "epoch": 0.07700560257480034, "grad_norm": 3.0416085720062256, "learning_rate": 9.979579778537474e-05, "loss": 1.7023, "step": 1292 }, { "epoch": 0.07712480629395638, "grad_norm": 3.773315191268921, "learning_rate": 9.979494307417038e-05, "loss": 1.8401, "step": 1294 }, { "epoch": 0.07724401001311242, "grad_norm": 3.598823070526123, "learning_rate": 9.979408658163054e-05, "loss": 1.8711, "step": 1296 }, { "epoch": 0.07736321373226844, "grad_norm": 3.635864734649658, "learning_rate": 9.979322830778586e-05, "loss": 1.7637, "step": 1298 }, { "epoch": 0.07748241745142448, "grad_norm": 3.7948806285858154, "learning_rate": 9.979236825266703e-05, "loss": 1.82, "step": 1300 }, { "epoch": 0.07760162117058052, "grad_norm": 3.63915753364563, "learning_rate": 9.979150641630484e-05, "loss": 1.646, "step": 1302 }, { "epoch": 0.07772082488973656, "grad_norm": 3.6633965969085693, "learning_rate": 9.979064279873009e-05, "loss": 1.7235, "step": 1304 }, { "epoch": 0.0778400286088926, "grad_norm": 3.50696063041687, "learning_rate": 9.97897773999737e-05, "loss": 1.8597, "step": 1306 }, { "epoch": 0.07795923232804863, "grad_norm": 3.2794313430786133, "learning_rate": 9.978891022006662e-05, "loss": 1.5614, "step": 1308 }, { "epoch": 0.07807843604720467, "grad_norm": 3.342439651489258, "learning_rate": 9.978804125903986e-05, "loss": 1.6063, "step": 1310 }, { "epoch": 0.07819763976636071, "grad_norm": 3.5072388648986816, "learning_rate": 9.978717051692453e-05, "loss": 1.7843, "step": 1312 }, { "epoch": 0.07831684348551675, "grad_norm": 3.4176290035247803, "learning_rate": 9.978629799375177e-05, "loss": 1.8615, "step": 1314 }, { "epoch": 0.07843604720467279, "grad_norm": 3.646674394607544, "learning_rate": 9.978542368955277e-05, "loss": 1.9695, "step": 1316 }, { "epoch": 0.07855525092382883, "grad_norm": 3.4913547039031982, "learning_rate": 9.978454760435882e-05, "loss": 1.7693, "step": 1318 }, { "epoch": 0.07867445464298486, "grad_norm": 3.979012966156006, "learning_rate": 9.978366973820128e-05, "loss": 1.7575, "step": 1320 }, { "epoch": 0.0787936583621409, "grad_norm": 3.669872522354126, "learning_rate": 9.978279009111156e-05, "loss": 1.7682, "step": 1322 }, { "epoch": 0.07891286208129694, "grad_norm": 3.485316514968872, "learning_rate": 9.978190866312108e-05, "loss": 1.7673, "step": 1324 }, { "epoch": 0.07903206580045298, "grad_norm": 3.5757508277893066, "learning_rate": 9.97810254542614e-05, "loss": 1.725, "step": 1326 }, { "epoch": 0.079151269519609, "grad_norm": 3.757988929748535, "learning_rate": 9.978014046456412e-05, "loss": 1.7626, "step": 1328 }, { "epoch": 0.07927047323876504, "grad_norm": 3.3967325687408447, "learning_rate": 9.97792536940609e-05, "loss": 1.8744, "step": 1330 }, { "epoch": 0.07938967695792108, "grad_norm": 3.47346830368042, "learning_rate": 9.977836514278347e-05, "loss": 1.6545, "step": 1332 }, { "epoch": 0.07950888067707712, "grad_norm": 3.8784379959106445, "learning_rate": 9.977747481076359e-05, "loss": 1.8167, "step": 1334 }, { "epoch": 0.07962808439623316, "grad_norm": 3.455589532852173, "learning_rate": 9.977658269803311e-05, "loss": 1.6291, "step": 1336 }, { "epoch": 0.0797472881153892, "grad_norm": 3.5574207305908203, "learning_rate": 9.977568880462397e-05, "loss": 1.958, "step": 1338 }, { "epoch": 0.07986649183454524, "grad_norm": 3.4736530780792236, "learning_rate": 9.977479313056815e-05, "loss": 1.8002, "step": 1340 }, { "epoch": 0.07998569555370127, "grad_norm": 3.582127094268799, "learning_rate": 9.977389567589766e-05, "loss": 1.8842, "step": 1342 }, { "epoch": 0.08010489927285731, "grad_norm": 3.3700969219207764, "learning_rate": 9.977299644064463e-05, "loss": 1.7929, "step": 1344 }, { "epoch": 0.08022410299201335, "grad_norm": 3.362180709838867, "learning_rate": 9.977209542484121e-05, "loss": 1.6389, "step": 1346 }, { "epoch": 0.08034330671116939, "grad_norm": 3.773305654525757, "learning_rate": 9.977119262851967e-05, "loss": 1.7301, "step": 1348 }, { "epoch": 0.08046251043032543, "grad_norm": 3.8943870067596436, "learning_rate": 9.977028805171225e-05, "loss": 1.9031, "step": 1350 }, { "epoch": 0.08058171414948147, "grad_norm": 3.8066773414611816, "learning_rate": 9.976938169445134e-05, "loss": 1.741, "step": 1352 }, { "epoch": 0.0807009178686375, "grad_norm": 3.58738374710083, "learning_rate": 9.976847355676938e-05, "loss": 1.7452, "step": 1354 }, { "epoch": 0.08082012158779354, "grad_norm": 3.702650785446167, "learning_rate": 9.976756363869883e-05, "loss": 1.8143, "step": 1356 }, { "epoch": 0.08093932530694958, "grad_norm": 3.6582441329956055, "learning_rate": 9.976665194027225e-05, "loss": 1.8999, "step": 1358 }, { "epoch": 0.08105852902610561, "grad_norm": 3.596874713897705, "learning_rate": 9.976573846152226e-05, "loss": 1.6849, "step": 1360 }, { "epoch": 0.08117773274526165, "grad_norm": 3.343517780303955, "learning_rate": 9.976482320248154e-05, "loss": 1.6102, "step": 1362 }, { "epoch": 0.08129693646441769, "grad_norm": 3.8453445434570312, "learning_rate": 9.97639061631828e-05, "loss": 1.8322, "step": 1364 }, { "epoch": 0.08141614018357372, "grad_norm": 3.4844932556152344, "learning_rate": 9.97629873436589e-05, "loss": 1.852, "step": 1366 }, { "epoch": 0.08153534390272976, "grad_norm": 3.4822773933410645, "learning_rate": 9.976206674394267e-05, "loss": 1.9348, "step": 1368 }, { "epoch": 0.0816545476218858, "grad_norm": 3.5315051078796387, "learning_rate": 9.976114436406706e-05, "loss": 1.6915, "step": 1370 }, { "epoch": 0.08177375134104184, "grad_norm": 3.366960287094116, "learning_rate": 9.976022020406505e-05, "loss": 1.7148, "step": 1372 }, { "epoch": 0.08189295506019788, "grad_norm": 3.8711090087890625, "learning_rate": 9.975929426396971e-05, "loss": 1.8062, "step": 1374 }, { "epoch": 0.08201215877935392, "grad_norm": 3.314504384994507, "learning_rate": 9.975836654381416e-05, "loss": 1.621, "step": 1376 }, { "epoch": 0.08213136249850995, "grad_norm": 3.542593002319336, "learning_rate": 9.97574370436316e-05, "loss": 1.5616, "step": 1378 }, { "epoch": 0.082250566217666, "grad_norm": 3.5327260494232178, "learning_rate": 9.975650576345528e-05, "loss": 1.755, "step": 1380 }, { "epoch": 0.08236976993682203, "grad_norm": 3.35787034034729, "learning_rate": 9.975557270331851e-05, "loss": 1.6562, "step": 1382 }, { "epoch": 0.08248897365597807, "grad_norm": 3.142829656600952, "learning_rate": 9.975463786325465e-05, "loss": 1.6164, "step": 1384 }, { "epoch": 0.08260817737513411, "grad_norm": 3.6782703399658203, "learning_rate": 9.975370124329719e-05, "loss": 1.7807, "step": 1386 }, { "epoch": 0.08272738109429015, "grad_norm": 3.6840991973876953, "learning_rate": 9.975276284347958e-05, "loss": 1.7354, "step": 1388 }, { "epoch": 0.08284658481344619, "grad_norm": 3.878715753555298, "learning_rate": 9.975182266383542e-05, "loss": 1.6777, "step": 1390 }, { "epoch": 0.08296578853260221, "grad_norm": 3.4451255798339844, "learning_rate": 9.975088070439834e-05, "loss": 1.7291, "step": 1392 }, { "epoch": 0.08308499225175825, "grad_norm": 3.8603785037994385, "learning_rate": 9.974993696520204e-05, "loss": 1.792, "step": 1394 }, { "epoch": 0.08320419597091429, "grad_norm": 3.6420559883117676, "learning_rate": 9.974899144628027e-05, "loss": 1.6976, "step": 1396 }, { "epoch": 0.08332339969007033, "grad_norm": 3.647757053375244, "learning_rate": 9.974804414766688e-05, "loss": 2.0348, "step": 1398 }, { "epoch": 0.08344260340922637, "grad_norm": 3.681328535079956, "learning_rate": 9.974709506939572e-05, "loss": 1.7728, "step": 1400 }, { "epoch": 0.0835618071283824, "grad_norm": 3.46156907081604, "learning_rate": 9.974614421150076e-05, "loss": 1.6765, "step": 1402 }, { "epoch": 0.08368101084753844, "grad_norm": 3.530211925506592, "learning_rate": 9.974519157401602e-05, "loss": 1.5881, "step": 1404 }, { "epoch": 0.08380021456669448, "grad_norm": 3.9834890365600586, "learning_rate": 9.974423715697559e-05, "loss": 1.7467, "step": 1406 }, { "epoch": 0.08391941828585052, "grad_norm": 3.755049228668213, "learning_rate": 9.974328096041358e-05, "loss": 1.8433, "step": 1408 }, { "epoch": 0.08403862200500656, "grad_norm": 3.8339991569519043, "learning_rate": 9.974232298436424e-05, "loss": 1.7898, "step": 1410 }, { "epoch": 0.0841578257241626, "grad_norm": 3.9891273975372314, "learning_rate": 9.974136322886179e-05, "loss": 1.8549, "step": 1412 }, { "epoch": 0.08427702944331864, "grad_norm": 3.4567134380340576, "learning_rate": 9.97404016939406e-05, "loss": 1.7399, "step": 1414 }, { "epoch": 0.08439623316247467, "grad_norm": 3.2299134731292725, "learning_rate": 9.973943837963506e-05, "loss": 1.7492, "step": 1416 }, { "epoch": 0.08451543688163071, "grad_norm": 3.767427921295166, "learning_rate": 9.973847328597963e-05, "loss": 1.6724, "step": 1418 }, { "epoch": 0.08463464060078675, "grad_norm": 3.735825777053833, "learning_rate": 9.973750641300883e-05, "loss": 1.8367, "step": 1420 }, { "epoch": 0.08475384431994278, "grad_norm": 3.388457775115967, "learning_rate": 9.973653776075724e-05, "loss": 1.7954, "step": 1422 }, { "epoch": 0.08487304803909881, "grad_norm": 3.462625741958618, "learning_rate": 9.973556732925953e-05, "loss": 1.8003, "step": 1424 }, { "epoch": 0.08499225175825485, "grad_norm": 3.358743667602539, "learning_rate": 9.97345951185504e-05, "loss": 1.796, "step": 1426 }, { "epoch": 0.08511145547741089, "grad_norm": 3.627706289291382, "learning_rate": 9.973362112866464e-05, "loss": 1.9184, "step": 1428 }, { "epoch": 0.08523065919656693, "grad_norm": 3.5576324462890625, "learning_rate": 9.973264535963711e-05, "loss": 1.8212, "step": 1430 }, { "epoch": 0.08534986291572297, "grad_norm": 3.4222915172576904, "learning_rate": 9.973166781150267e-05, "loss": 1.7529, "step": 1432 }, { "epoch": 0.085469066634879, "grad_norm": 3.2059643268585205, "learning_rate": 9.973068848429634e-05, "loss": 1.734, "step": 1434 }, { "epoch": 0.08558827035403505, "grad_norm": 3.210353136062622, "learning_rate": 9.972970737805311e-05, "loss": 1.7681, "step": 1436 }, { "epoch": 0.08570747407319108, "grad_norm": 3.583374261856079, "learning_rate": 9.972872449280812e-05, "loss": 1.8045, "step": 1438 }, { "epoch": 0.08582667779234712, "grad_norm": 3.6153831481933594, "learning_rate": 9.972773982859649e-05, "loss": 1.7968, "step": 1440 }, { "epoch": 0.08594588151150316, "grad_norm": 3.5514678955078125, "learning_rate": 9.972675338545347e-05, "loss": 1.7141, "step": 1442 }, { "epoch": 0.0860650852306592, "grad_norm": 3.242830753326416, "learning_rate": 9.972576516341436e-05, "loss": 1.7587, "step": 1444 }, { "epoch": 0.08618428894981524, "grad_norm": 3.6983964443206787, "learning_rate": 9.972477516251448e-05, "loss": 1.7795, "step": 1446 }, { "epoch": 0.08630349266897128, "grad_norm": 3.285928726196289, "learning_rate": 9.972378338278925e-05, "loss": 1.6696, "step": 1448 }, { "epoch": 0.08642269638812732, "grad_norm": 3.5635788440704346, "learning_rate": 9.972278982427418e-05, "loss": 1.824, "step": 1450 }, { "epoch": 0.08654190010728335, "grad_norm": 3.4196910858154297, "learning_rate": 9.972179448700479e-05, "loss": 1.7364, "step": 1452 }, { "epoch": 0.08666110382643938, "grad_norm": 3.6765685081481934, "learning_rate": 9.97207973710167e-05, "loss": 1.7834, "step": 1454 }, { "epoch": 0.08678030754559542, "grad_norm": 3.9781832695007324, "learning_rate": 9.971979847634553e-05, "loss": 1.9106, "step": 1456 }, { "epoch": 0.08689951126475146, "grad_norm": 3.8840088844299316, "learning_rate": 9.97187978030271e-05, "loss": 1.7384, "step": 1458 }, { "epoch": 0.0870187149839075, "grad_norm": 3.8144822120666504, "learning_rate": 9.971779535109714e-05, "loss": 1.5908, "step": 1460 }, { "epoch": 0.08713791870306353, "grad_norm": 3.7436625957489014, "learning_rate": 9.971679112059154e-05, "loss": 1.8143, "step": 1462 }, { "epoch": 0.08725712242221957, "grad_norm": 3.280153274536133, "learning_rate": 9.97157851115462e-05, "loss": 1.8224, "step": 1464 }, { "epoch": 0.08737632614137561, "grad_norm": 3.3518266677856445, "learning_rate": 9.971477732399715e-05, "loss": 1.7053, "step": 1466 }, { "epoch": 0.08749552986053165, "grad_norm": 3.4488675594329834, "learning_rate": 9.97137677579804e-05, "loss": 1.8116, "step": 1468 }, { "epoch": 0.08761473357968769, "grad_norm": 3.6042869091033936, "learning_rate": 9.97127564135321e-05, "loss": 1.7381, "step": 1470 }, { "epoch": 0.08773393729884373, "grad_norm": 3.392010450363159, "learning_rate": 9.971174329068839e-05, "loss": 1.9255, "step": 1472 }, { "epoch": 0.08785314101799976, "grad_norm": 3.7100117206573486, "learning_rate": 9.971072838948555e-05, "loss": 1.7445, "step": 1474 }, { "epoch": 0.0879723447371558, "grad_norm": 3.4481704235076904, "learning_rate": 9.970971170995988e-05, "loss": 1.7155, "step": 1476 }, { "epoch": 0.08809154845631184, "grad_norm": 3.5754690170288086, "learning_rate": 9.970869325214772e-05, "loss": 1.6698, "step": 1478 }, { "epoch": 0.08821075217546788, "grad_norm": 3.4560627937316895, "learning_rate": 9.970767301608554e-05, "loss": 1.6701, "step": 1480 }, { "epoch": 0.08832995589462392, "grad_norm": 3.317169427871704, "learning_rate": 9.970665100180983e-05, "loss": 1.7863, "step": 1482 }, { "epoch": 0.08844915961377996, "grad_norm": 3.524965524673462, "learning_rate": 9.970562720935712e-05, "loss": 1.6894, "step": 1484 }, { "epoch": 0.08856836333293598, "grad_norm": 3.6242361068725586, "learning_rate": 9.970460163876408e-05, "loss": 1.7705, "step": 1486 }, { "epoch": 0.08868756705209202, "grad_norm": 3.1650779247283936, "learning_rate": 9.970357429006738e-05, "loss": 1.6626, "step": 1488 }, { "epoch": 0.08880677077124806, "grad_norm": 3.4861302375793457, "learning_rate": 9.970254516330376e-05, "loss": 1.7556, "step": 1490 }, { "epoch": 0.0889259744904041, "grad_norm": 3.4656131267547607, "learning_rate": 9.970151425851004e-05, "loss": 1.8112, "step": 1492 }, { "epoch": 0.08904517820956014, "grad_norm": 3.4824657440185547, "learning_rate": 9.970048157572312e-05, "loss": 1.7688, "step": 1494 }, { "epoch": 0.08916438192871617, "grad_norm": 3.4366374015808105, "learning_rate": 9.96994471149799e-05, "loss": 1.8313, "step": 1496 }, { "epoch": 0.08928358564787221, "grad_norm": 3.384801149368286, "learning_rate": 9.969841087631742e-05, "loss": 1.6503, "step": 1498 }, { "epoch": 0.08940278936702825, "grad_norm": 3.6648294925689697, "learning_rate": 9.969737285977275e-05, "loss": 1.6766, "step": 1500 }, { "epoch": 0.08952199308618429, "grad_norm": 3.55092716217041, "learning_rate": 9.969633306538302e-05, "loss": 1.8865, "step": 1502 }, { "epoch": 0.08964119680534033, "grad_norm": 3.51655650138855, "learning_rate": 9.969529149318541e-05, "loss": 1.6674, "step": 1504 }, { "epoch": 0.08976040052449637, "grad_norm": 3.174532890319824, "learning_rate": 9.96942481432172e-05, "loss": 1.7015, "step": 1506 }, { "epoch": 0.0898796042436524, "grad_norm": 3.3113887310028076, "learning_rate": 9.96932030155157e-05, "loss": 1.7542, "step": 1508 }, { "epoch": 0.08999880796280844, "grad_norm": 3.513577461242676, "learning_rate": 9.96921561101183e-05, "loss": 1.8592, "step": 1510 }, { "epoch": 0.09011801168196448, "grad_norm": 3.580087184906006, "learning_rate": 9.969110742706246e-05, "loss": 1.8279, "step": 1512 }, { "epoch": 0.09023721540112052, "grad_norm": 3.546623706817627, "learning_rate": 9.96900569663857e-05, "loss": 1.735, "step": 1514 }, { "epoch": 0.09035641912027655, "grad_norm": 3.162435293197632, "learning_rate": 9.968900472812559e-05, "loss": 1.7253, "step": 1516 }, { "epoch": 0.09047562283943258, "grad_norm": 3.7489843368530273, "learning_rate": 9.968795071231975e-05, "loss": 1.9047, "step": 1518 }, { "epoch": 0.09059482655858862, "grad_norm": 3.571848154067993, "learning_rate": 9.968689491900593e-05, "loss": 1.7966, "step": 1520 }, { "epoch": 0.09071403027774466, "grad_norm": 3.3313937187194824, "learning_rate": 9.968583734822188e-05, "loss": 1.7117, "step": 1522 }, { "epoch": 0.0908332339969007, "grad_norm": 3.6510493755340576, "learning_rate": 9.968477800000542e-05, "loss": 1.8002, "step": 1524 }, { "epoch": 0.09095243771605674, "grad_norm": 3.2988204956054688, "learning_rate": 9.968371687439446e-05, "loss": 1.8651, "step": 1526 }, { "epoch": 0.09107164143521278, "grad_norm": 3.4793357849121094, "learning_rate": 9.968265397142695e-05, "loss": 1.8352, "step": 1528 }, { "epoch": 0.09119084515436882, "grad_norm": 3.574395179748535, "learning_rate": 9.968158929114092e-05, "loss": 1.7599, "step": 1530 }, { "epoch": 0.09131004887352485, "grad_norm": 3.5748822689056396, "learning_rate": 9.968052283357446e-05, "loss": 1.782, "step": 1532 }, { "epoch": 0.09142925259268089, "grad_norm": 3.2783772945404053, "learning_rate": 9.967945459876571e-05, "loss": 1.7137, "step": 1534 }, { "epoch": 0.09154845631183693, "grad_norm": 3.6078343391418457, "learning_rate": 9.967838458675291e-05, "loss": 1.7895, "step": 1536 }, { "epoch": 0.09166766003099297, "grad_norm": 3.8707895278930664, "learning_rate": 9.96773127975743e-05, "loss": 1.8086, "step": 1538 }, { "epoch": 0.09178686375014901, "grad_norm": 3.4192657470703125, "learning_rate": 9.967623923126826e-05, "loss": 1.736, "step": 1540 }, { "epoch": 0.09190606746930505, "grad_norm": 3.5482983589172363, "learning_rate": 9.967516388787319e-05, "loss": 1.8622, "step": 1542 }, { "epoch": 0.09202527118846109, "grad_norm": 3.7126691341400146, "learning_rate": 9.967408676742751e-05, "loss": 1.6863, "step": 1544 }, { "epoch": 0.09214447490761712, "grad_norm": 3.80886173248291, "learning_rate": 9.96730078699698e-05, "loss": 1.8234, "step": 1546 }, { "epoch": 0.09226367862677315, "grad_norm": 3.4129858016967773, "learning_rate": 9.967192719553866e-05, "loss": 1.8052, "step": 1548 }, { "epoch": 0.09238288234592919, "grad_norm": 3.194262742996216, "learning_rate": 9.967084474417272e-05, "loss": 1.7544, "step": 1550 }, { "epoch": 0.09250208606508523, "grad_norm": 3.668684244155884, "learning_rate": 9.966976051591073e-05, "loss": 1.7585, "step": 1552 }, { "epoch": 0.09262128978424126, "grad_norm": 3.5179789066314697, "learning_rate": 9.966867451079145e-05, "loss": 1.6725, "step": 1554 }, { "epoch": 0.0927404935033973, "grad_norm": 3.7375104427337646, "learning_rate": 9.966758672885374e-05, "loss": 1.939, "step": 1556 }, { "epoch": 0.09285969722255334, "grad_norm": 3.765428304672241, "learning_rate": 9.966649717013653e-05, "loss": 1.676, "step": 1558 }, { "epoch": 0.09297890094170938, "grad_norm": 3.443650245666504, "learning_rate": 9.966540583467877e-05, "loss": 1.6827, "step": 1560 }, { "epoch": 0.09309810466086542, "grad_norm": 3.7900826930999756, "learning_rate": 9.966431272251952e-05, "loss": 1.7976, "step": 1562 }, { "epoch": 0.09321730838002146, "grad_norm": 3.1369621753692627, "learning_rate": 9.966321783369788e-05, "loss": 1.6686, "step": 1564 }, { "epoch": 0.0933365120991775, "grad_norm": 3.854391098022461, "learning_rate": 9.966212116825303e-05, "loss": 1.8085, "step": 1566 }, { "epoch": 0.09345571581833353, "grad_norm": 3.4513425827026367, "learning_rate": 9.966102272622418e-05, "loss": 1.7881, "step": 1568 }, { "epoch": 0.09357491953748957, "grad_norm": 3.2307186126708984, "learning_rate": 9.965992250765062e-05, "loss": 1.6931, "step": 1570 }, { "epoch": 0.09369412325664561, "grad_norm": 3.2324209213256836, "learning_rate": 9.965882051257173e-05, "loss": 1.8859, "step": 1572 }, { "epoch": 0.09381332697580165, "grad_norm": 3.2629523277282715, "learning_rate": 9.965771674102693e-05, "loss": 1.7738, "step": 1574 }, { "epoch": 0.09393253069495769, "grad_norm": 3.7953572273254395, "learning_rate": 9.965661119305569e-05, "loss": 1.7236, "step": 1576 }, { "epoch": 0.09405173441411371, "grad_norm": 3.7325174808502197, "learning_rate": 9.965550386869759e-05, "loss": 1.6849, "step": 1578 }, { "epoch": 0.09417093813326975, "grad_norm": 3.5052075386047363, "learning_rate": 9.965439476799221e-05, "loss": 1.7732, "step": 1580 }, { "epoch": 0.09429014185242579, "grad_norm": 3.17322039604187, "learning_rate": 9.965328389097924e-05, "loss": 1.5824, "step": 1582 }, { "epoch": 0.09440934557158183, "grad_norm": 3.6295242309570312, "learning_rate": 9.96521712376984e-05, "loss": 1.9074, "step": 1584 }, { "epoch": 0.09452854929073787, "grad_norm": 3.7566850185394287, "learning_rate": 9.965105680818954e-05, "loss": 1.9483, "step": 1586 }, { "epoch": 0.0946477530098939, "grad_norm": 3.3706283569335938, "learning_rate": 9.964994060249248e-05, "loss": 1.7541, "step": 1588 }, { "epoch": 0.09476695672904994, "grad_norm": 3.510632276535034, "learning_rate": 9.964882262064718e-05, "loss": 1.723, "step": 1590 }, { "epoch": 0.09488616044820598, "grad_norm": 3.4004416465759277, "learning_rate": 9.964770286269363e-05, "loss": 1.6332, "step": 1592 }, { "epoch": 0.09500536416736202, "grad_norm": 3.6042771339416504, "learning_rate": 9.964658132867186e-05, "loss": 1.7854, "step": 1594 }, { "epoch": 0.09512456788651806, "grad_norm": 2.9847424030303955, "learning_rate": 9.964545801862201e-05, "loss": 1.5774, "step": 1596 }, { "epoch": 0.0952437716056741, "grad_norm": 3.605445384979248, "learning_rate": 9.964433293258429e-05, "loss": 1.666, "step": 1598 }, { "epoch": 0.09536297532483014, "grad_norm": 3.337280511856079, "learning_rate": 9.964320607059892e-05, "loss": 1.5577, "step": 1600 }, { "epoch": 0.09548217904398618, "grad_norm": 3.4570605754852295, "learning_rate": 9.96420774327062e-05, "loss": 1.8153, "step": 1602 }, { "epoch": 0.09560138276314221, "grad_norm": 3.469172477722168, "learning_rate": 9.964094701894653e-05, "loss": 1.8469, "step": 1604 }, { "epoch": 0.09572058648229825, "grad_norm": 3.6258788108825684, "learning_rate": 9.963981482936034e-05, "loss": 1.6985, "step": 1606 }, { "epoch": 0.09583979020145429, "grad_norm": 3.71018648147583, "learning_rate": 9.963868086398814e-05, "loss": 1.8215, "step": 1608 }, { "epoch": 0.09595899392061032, "grad_norm": 3.404317855834961, "learning_rate": 9.963754512287048e-05, "loss": 1.6509, "step": 1610 }, { "epoch": 0.09607819763976636, "grad_norm": 3.7699508666992188, "learning_rate": 9.9636407606048e-05, "loss": 1.8011, "step": 1612 }, { "epoch": 0.0961974013589224, "grad_norm": 3.3621344566345215, "learning_rate": 9.96352683135614e-05, "loss": 1.7025, "step": 1614 }, { "epoch": 0.09631660507807843, "grad_norm": 3.3212685585021973, "learning_rate": 9.963412724545141e-05, "loss": 1.7377, "step": 1616 }, { "epoch": 0.09643580879723447, "grad_norm": 3.3342528343200684, "learning_rate": 9.963298440175888e-05, "loss": 1.7035, "step": 1618 }, { "epoch": 0.09655501251639051, "grad_norm": 3.571611166000366, "learning_rate": 9.963183978252468e-05, "loss": 1.5749, "step": 1620 }, { "epoch": 0.09667421623554655, "grad_norm": 3.885051727294922, "learning_rate": 9.963069338778974e-05, "loss": 1.7736, "step": 1622 }, { "epoch": 0.09679341995470259, "grad_norm": 3.3324685096740723, "learning_rate": 9.962954521759512e-05, "loss": 1.8014, "step": 1624 }, { "epoch": 0.09691262367385862, "grad_norm": 3.409446954727173, "learning_rate": 9.962839527198183e-05, "loss": 1.8483, "step": 1626 }, { "epoch": 0.09703182739301466, "grad_norm": 3.2565219402313232, "learning_rate": 9.962724355099106e-05, "loss": 1.6643, "step": 1628 }, { "epoch": 0.0971510311121707, "grad_norm": 3.324794292449951, "learning_rate": 9.962609005466397e-05, "loss": 1.7448, "step": 1630 }, { "epoch": 0.09727023483132674, "grad_norm": 3.5139787197113037, "learning_rate": 9.962493478304187e-05, "loss": 1.7145, "step": 1632 }, { "epoch": 0.09738943855048278, "grad_norm": 3.6599738597869873, "learning_rate": 9.962377773616604e-05, "loss": 1.6759, "step": 1634 }, { "epoch": 0.09750864226963882, "grad_norm": 3.429868698120117, "learning_rate": 9.962261891407791e-05, "loss": 1.6488, "step": 1636 }, { "epoch": 0.09762784598879486, "grad_norm": 3.5038788318634033, "learning_rate": 9.962145831681893e-05, "loss": 1.7197, "step": 1638 }, { "epoch": 0.0977470497079509, "grad_norm": 3.5144076347351074, "learning_rate": 9.962029594443058e-05, "loss": 1.6632, "step": 1640 }, { "epoch": 0.09786625342710692, "grad_norm": 5.4111127853393555, "learning_rate": 9.961913179695449e-05, "loss": 1.6274, "step": 1642 }, { "epoch": 0.09798545714626296, "grad_norm": 3.2178304195404053, "learning_rate": 9.961796587443228e-05, "loss": 1.7424, "step": 1644 }, { "epoch": 0.098104660865419, "grad_norm": 3.438911199569702, "learning_rate": 9.961679817690566e-05, "loss": 1.6282, "step": 1646 }, { "epoch": 0.09822386458457504, "grad_norm": 3.339799404144287, "learning_rate": 9.961562870441641e-05, "loss": 1.5929, "step": 1648 }, { "epoch": 0.09834306830373107, "grad_norm": 3.609057903289795, "learning_rate": 9.961445745700637e-05, "loss": 1.7595, "step": 1650 }, { "epoch": 0.09846227202288711, "grad_norm": 3.3149733543395996, "learning_rate": 9.961328443471742e-05, "loss": 1.5912, "step": 1652 }, { "epoch": 0.09858147574204315, "grad_norm": 3.351043939590454, "learning_rate": 9.961210963759155e-05, "loss": 1.7362, "step": 1654 }, { "epoch": 0.09870067946119919, "grad_norm": 3.2802305221557617, "learning_rate": 9.961093306567075e-05, "loss": 1.7061, "step": 1656 }, { "epoch": 0.09881988318035523, "grad_norm": 3.723987102508545, "learning_rate": 9.960975471899717e-05, "loss": 1.832, "step": 1658 }, { "epoch": 0.09893908689951127, "grad_norm": 3.656142234802246, "learning_rate": 9.96085745976129e-05, "loss": 1.6897, "step": 1660 }, { "epoch": 0.0990582906186673, "grad_norm": 3.180243492126465, "learning_rate": 9.960739270156018e-05, "loss": 1.6723, "step": 1662 }, { "epoch": 0.09917749433782334, "grad_norm": 3.030062675476074, "learning_rate": 9.960620903088129e-05, "loss": 1.5449, "step": 1664 }, { "epoch": 0.09929669805697938, "grad_norm": 3.801417112350464, "learning_rate": 9.960502358561859e-05, "loss": 1.6751, "step": 1666 }, { "epoch": 0.09941590177613542, "grad_norm": 3.857790231704712, "learning_rate": 9.960383636581447e-05, "loss": 1.6668, "step": 1668 }, { "epoch": 0.09953510549529146, "grad_norm": 3.3846182823181152, "learning_rate": 9.96026473715114e-05, "loss": 1.6007, "step": 1670 }, { "epoch": 0.09965430921444748, "grad_norm": 3.308237075805664, "learning_rate": 9.960145660275193e-05, "loss": 1.6138, "step": 1672 }, { "epoch": 0.09977351293360352, "grad_norm": 3.467362403869629, "learning_rate": 9.960026405957865e-05, "loss": 1.7597, "step": 1674 }, { "epoch": 0.09989271665275956, "grad_norm": 3.3984901905059814, "learning_rate": 9.959906974203422e-05, "loss": 1.7648, "step": 1676 }, { "epoch": 0.1000119203719156, "grad_norm": 3.507359266281128, "learning_rate": 9.959787365016135e-05, "loss": 1.8353, "step": 1678 }, { "epoch": 0.10013112409107164, "grad_norm": 3.4426660537719727, "learning_rate": 9.959667578400284e-05, "loss": 1.659, "step": 1680 }, { "epoch": 0.10025032781022768, "grad_norm": 3.37581729888916, "learning_rate": 9.959547614360155e-05, "loss": 1.6606, "step": 1682 }, { "epoch": 0.10036953152938372, "grad_norm": 4.226818561553955, "learning_rate": 9.959427472900041e-05, "loss": 1.6779, "step": 1684 }, { "epoch": 0.10048873524853975, "grad_norm": 3.972050666809082, "learning_rate": 9.959307154024234e-05, "loss": 1.9374, "step": 1686 }, { "epoch": 0.10060793896769579, "grad_norm": 3.3915321826934814, "learning_rate": 9.959186657737045e-05, "loss": 1.681, "step": 1688 }, { "epoch": 0.10072714268685183, "grad_norm": 3.5887503623962402, "learning_rate": 9.959065984042781e-05, "loss": 1.8375, "step": 1690 }, { "epoch": 0.10084634640600787, "grad_norm": 3.075599193572998, "learning_rate": 9.958945132945758e-05, "loss": 1.7722, "step": 1692 }, { "epoch": 0.10096555012516391, "grad_norm": 3.4977991580963135, "learning_rate": 9.958824104450301e-05, "loss": 1.8045, "step": 1694 }, { "epoch": 0.10108475384431995, "grad_norm": 3.44573974609375, "learning_rate": 9.95870289856074e-05, "loss": 1.6387, "step": 1696 }, { "epoch": 0.10120395756347599, "grad_norm": 4.026516437530518, "learning_rate": 9.958581515281411e-05, "loss": 1.7529, "step": 1698 }, { "epoch": 0.10132316128263202, "grad_norm": 3.5956671237945557, "learning_rate": 9.958459954616655e-05, "loss": 1.7107, "step": 1700 }, { "epoch": 0.10144236500178806, "grad_norm": 3.5036942958831787, "learning_rate": 9.958338216570822e-05, "loss": 1.7173, "step": 1702 }, { "epoch": 0.10156156872094409, "grad_norm": 3.390690803527832, "learning_rate": 9.958216301148266e-05, "loss": 1.8144, "step": 1704 }, { "epoch": 0.10168077244010013, "grad_norm": 3.5691752433776855, "learning_rate": 9.958094208353348e-05, "loss": 1.6819, "step": 1706 }, { "epoch": 0.10179997615925616, "grad_norm": 3.2744674682617188, "learning_rate": 9.957971938190436e-05, "loss": 1.6488, "step": 1708 }, { "epoch": 0.1019191798784122, "grad_norm": 3.228999376296997, "learning_rate": 9.957849490663904e-05, "loss": 1.6936, "step": 1710 }, { "epoch": 0.10203838359756824, "grad_norm": 3.7009201049804688, "learning_rate": 9.957726865778134e-05, "loss": 1.6387, "step": 1712 }, { "epoch": 0.10215758731672428, "grad_norm": 3.3828113079071045, "learning_rate": 9.957604063537511e-05, "loss": 1.6908, "step": 1714 }, { "epoch": 0.10227679103588032, "grad_norm": 3.341613292694092, "learning_rate": 9.957481083946428e-05, "loss": 1.6723, "step": 1716 }, { "epoch": 0.10239599475503636, "grad_norm": 3.7579102516174316, "learning_rate": 9.957357927009283e-05, "loss": 1.7117, "step": 1718 }, { "epoch": 0.1025151984741924, "grad_norm": 3.812056064605713, "learning_rate": 9.957234592730487e-05, "loss": 1.6957, "step": 1720 }, { "epoch": 0.10263440219334843, "grad_norm": 3.480839490890503, "learning_rate": 9.957111081114445e-05, "loss": 1.7309, "step": 1722 }, { "epoch": 0.10275360591250447, "grad_norm": 3.5303592681884766, "learning_rate": 9.956987392165582e-05, "loss": 1.6129, "step": 1724 }, { "epoch": 0.10287280963166051, "grad_norm": 3.7383885383605957, "learning_rate": 9.956863525888318e-05, "loss": 1.8375, "step": 1726 }, { "epoch": 0.10299201335081655, "grad_norm": 3.6120355129241943, "learning_rate": 9.956739482287088e-05, "loss": 1.8224, "step": 1728 }, { "epoch": 0.10311121706997259, "grad_norm": 3.610412120819092, "learning_rate": 9.956615261366326e-05, "loss": 1.9062, "step": 1730 }, { "epoch": 0.10323042078912863, "grad_norm": 3.355319023132324, "learning_rate": 9.956490863130477e-05, "loss": 1.6555, "step": 1732 }, { "epoch": 0.10334962450828467, "grad_norm": 3.2773566246032715, "learning_rate": 9.956366287583991e-05, "loss": 1.7686, "step": 1734 }, { "epoch": 0.10346882822744069, "grad_norm": 3.3851256370544434, "learning_rate": 9.956241534731326e-05, "loss": 1.8735, "step": 1736 }, { "epoch": 0.10358803194659673, "grad_norm": 4.050483703613281, "learning_rate": 9.956116604576944e-05, "loss": 1.7292, "step": 1738 }, { "epoch": 0.10370723566575277, "grad_norm": 3.6238460540771484, "learning_rate": 9.955991497125311e-05, "loss": 1.6992, "step": 1740 }, { "epoch": 0.1038264393849088, "grad_norm": 3.357663631439209, "learning_rate": 9.95586621238091e-05, "loss": 1.6857, "step": 1742 }, { "epoch": 0.10394564310406484, "grad_norm": 3.347565174102783, "learning_rate": 9.955740750348215e-05, "loss": 1.7817, "step": 1744 }, { "epoch": 0.10406484682322088, "grad_norm": 3.3387134075164795, "learning_rate": 9.955615111031718e-05, "loss": 1.6514, "step": 1746 }, { "epoch": 0.10418405054237692, "grad_norm": 3.3899261951446533, "learning_rate": 9.955489294435914e-05, "loss": 1.6362, "step": 1748 }, { "epoch": 0.10430325426153296, "grad_norm": 3.209803819656372, "learning_rate": 9.955363300565301e-05, "loss": 1.85, "step": 1750 }, { "epoch": 0.104422457980689, "grad_norm": 3.6403088569641113, "learning_rate": 9.95523712942439e-05, "loss": 1.8052, "step": 1752 }, { "epoch": 0.10454166169984504, "grad_norm": 3.556954860687256, "learning_rate": 9.955110781017692e-05, "loss": 1.7028, "step": 1754 }, { "epoch": 0.10466086541900108, "grad_norm": 3.561685562133789, "learning_rate": 9.954984255349727e-05, "loss": 1.782, "step": 1756 }, { "epoch": 0.10478006913815711, "grad_norm": 3.6883599758148193, "learning_rate": 9.954857552425024e-05, "loss": 1.7361, "step": 1758 }, { "epoch": 0.10489927285731315, "grad_norm": 3.5283539295196533, "learning_rate": 9.954730672248111e-05, "loss": 1.8567, "step": 1760 }, { "epoch": 0.10501847657646919, "grad_norm": 3.405984401702881, "learning_rate": 9.954603614823531e-05, "loss": 1.707, "step": 1762 }, { "epoch": 0.10513768029562523, "grad_norm": 3.216557502746582, "learning_rate": 9.954476380155828e-05, "loss": 1.6697, "step": 1764 }, { "epoch": 0.10525688401478125, "grad_norm": 3.224238872528076, "learning_rate": 9.954348968249551e-05, "loss": 1.6906, "step": 1766 }, { "epoch": 0.1053760877339373, "grad_norm": 3.442073106765747, "learning_rate": 9.954221379109263e-05, "loss": 1.7592, "step": 1768 }, { "epoch": 0.10549529145309333, "grad_norm": 3.5882503986358643, "learning_rate": 9.954093612739523e-05, "loss": 1.7164, "step": 1770 }, { "epoch": 0.10561449517224937, "grad_norm": 3.4438583850860596, "learning_rate": 9.953965669144907e-05, "loss": 1.7819, "step": 1772 }, { "epoch": 0.10573369889140541, "grad_norm": 3.286425828933716, "learning_rate": 9.953837548329987e-05, "loss": 1.684, "step": 1774 }, { "epoch": 0.10585290261056145, "grad_norm": 3.5444655418395996, "learning_rate": 9.95370925029935e-05, "loss": 1.7222, "step": 1776 }, { "epoch": 0.10597210632971749, "grad_norm": 3.279332160949707, "learning_rate": 9.953580775057583e-05, "loss": 1.5956, "step": 1778 }, { "epoch": 0.10609131004887352, "grad_norm": 3.335007905960083, "learning_rate": 9.953452122609283e-05, "loss": 1.6225, "step": 1780 }, { "epoch": 0.10621051376802956, "grad_norm": 3.719616413116455, "learning_rate": 9.953323292959054e-05, "loss": 1.7445, "step": 1782 }, { "epoch": 0.1063297174871856, "grad_norm": 3.402519941329956, "learning_rate": 9.953194286111502e-05, "loss": 1.8148, "step": 1784 }, { "epoch": 0.10644892120634164, "grad_norm": 3.0419936180114746, "learning_rate": 9.953065102071244e-05, "loss": 1.7216, "step": 1786 }, { "epoch": 0.10656812492549768, "grad_norm": 3.1532535552978516, "learning_rate": 9.952935740842901e-05, "loss": 1.5965, "step": 1788 }, { "epoch": 0.10668732864465372, "grad_norm": 4.080580711364746, "learning_rate": 9.9528062024311e-05, "loss": 1.8223, "step": 1790 }, { "epoch": 0.10680653236380976, "grad_norm": 3.4030680656433105, "learning_rate": 9.952676486840475e-05, "loss": 1.787, "step": 1792 }, { "epoch": 0.1069257360829658, "grad_norm": 3.46463942527771, "learning_rate": 9.952546594075666e-05, "loss": 1.7306, "step": 1794 }, { "epoch": 0.10704493980212183, "grad_norm": 3.6165833473205566, "learning_rate": 9.952416524141321e-05, "loss": 1.8512, "step": 1796 }, { "epoch": 0.10716414352127786, "grad_norm": 3.6449384689331055, "learning_rate": 9.952286277042092e-05, "loss": 1.7676, "step": 1798 }, { "epoch": 0.1072833472404339, "grad_norm": 3.1501736640930176, "learning_rate": 9.952155852782639e-05, "loss": 1.7426, "step": 1800 }, { "epoch": 0.10740255095958993, "grad_norm": 3.4605674743652344, "learning_rate": 9.952025251367627e-05, "loss": 1.7435, "step": 1802 }, { "epoch": 0.10752175467874597, "grad_norm": 3.1857547760009766, "learning_rate": 9.95189447280173e-05, "loss": 1.6244, "step": 1804 }, { "epoch": 0.10764095839790201, "grad_norm": 3.359135866165161, "learning_rate": 9.951763517089624e-05, "loss": 1.7692, "step": 1806 }, { "epoch": 0.10776016211705805, "grad_norm": 3.713874101638794, "learning_rate": 9.951632384235995e-05, "loss": 1.7057, "step": 1808 }, { "epoch": 0.10787936583621409, "grad_norm": 3.6781468391418457, "learning_rate": 9.951501074245533e-05, "loss": 1.7191, "step": 1810 }, { "epoch": 0.10799856955537013, "grad_norm": 3.140148878097534, "learning_rate": 9.951369587122937e-05, "loss": 1.6402, "step": 1812 }, { "epoch": 0.10811777327452617, "grad_norm": 3.548614978790283, "learning_rate": 9.95123792287291e-05, "loss": 1.7429, "step": 1814 }, { "epoch": 0.1082369769936822, "grad_norm": 3.4340994358062744, "learning_rate": 9.951106081500161e-05, "loss": 1.7566, "step": 1816 }, { "epoch": 0.10835618071283824, "grad_norm": 3.5486536026000977, "learning_rate": 9.950974063009408e-05, "loss": 1.8327, "step": 1818 }, { "epoch": 0.10847538443199428, "grad_norm": 3.3267314434051514, "learning_rate": 9.950841867405374e-05, "loss": 1.6308, "step": 1820 }, { "epoch": 0.10859458815115032, "grad_norm": 3.483440399169922, "learning_rate": 9.950709494692786e-05, "loss": 1.6816, "step": 1822 }, { "epoch": 0.10871379187030636, "grad_norm": 3.5145411491394043, "learning_rate": 9.950576944876382e-05, "loss": 1.6309, "step": 1824 }, { "epoch": 0.1088329955894624, "grad_norm": 3.5372071266174316, "learning_rate": 9.950444217960902e-05, "loss": 1.7674, "step": 1826 }, { "epoch": 0.10895219930861842, "grad_norm": 3.4969711303710938, "learning_rate": 9.950311313951096e-05, "loss": 1.8397, "step": 1828 }, { "epoch": 0.10907140302777446, "grad_norm": 3.4098939895629883, "learning_rate": 9.950178232851715e-05, "loss": 1.7202, "step": 1830 }, { "epoch": 0.1091906067469305, "grad_norm": 3.305128574371338, "learning_rate": 9.950044974667521e-05, "loss": 1.7734, "step": 1832 }, { "epoch": 0.10930981046608654, "grad_norm": 3.5323798656463623, "learning_rate": 9.949911539403283e-05, "loss": 1.8069, "step": 1834 }, { "epoch": 0.10942901418524258, "grad_norm": 3.4210221767425537, "learning_rate": 9.949777927063775e-05, "loss": 1.6627, "step": 1836 }, { "epoch": 0.10954821790439861, "grad_norm": 3.5854709148406982, "learning_rate": 9.949644137653773e-05, "loss": 1.8991, "step": 1838 }, { "epoch": 0.10966742162355465, "grad_norm": 3.274210214614868, "learning_rate": 9.949510171178065e-05, "loss": 1.5698, "step": 1840 }, { "epoch": 0.10978662534271069, "grad_norm": 3.4235482215881348, "learning_rate": 9.949376027641443e-05, "loss": 1.6641, "step": 1842 }, { "epoch": 0.10990582906186673, "grad_norm": 3.3966550827026367, "learning_rate": 9.949241707048709e-05, "loss": 1.6621, "step": 1844 }, { "epoch": 0.11002503278102277, "grad_norm": 3.3617868423461914, "learning_rate": 9.949107209404665e-05, "loss": 1.8084, "step": 1846 }, { "epoch": 0.11014423650017881, "grad_norm": 3.409029483795166, "learning_rate": 9.94897253471412e-05, "loss": 1.802, "step": 1848 }, { "epoch": 0.11026344021933485, "grad_norm": 2.974686861038208, "learning_rate": 9.948837682981897e-05, "loss": 1.6485, "step": 1850 }, { "epoch": 0.11038264393849088, "grad_norm": 3.478619337081909, "learning_rate": 9.948702654212819e-05, "loss": 1.7154, "step": 1852 }, { "epoch": 0.11050184765764692, "grad_norm": 3.751108169555664, "learning_rate": 9.948567448411713e-05, "loss": 1.7105, "step": 1854 }, { "epoch": 0.11062105137680296, "grad_norm": 3.4699466228485107, "learning_rate": 9.94843206558342e-05, "loss": 1.6619, "step": 1856 }, { "epoch": 0.110740255095959, "grad_norm": 3.3925888538360596, "learning_rate": 9.948296505732779e-05, "loss": 1.615, "step": 1858 }, { "epoch": 0.11085945881511503, "grad_norm": 3.5669193267822266, "learning_rate": 9.948160768864642e-05, "loss": 1.8116, "step": 1860 }, { "epoch": 0.11097866253427106, "grad_norm": 3.129812479019165, "learning_rate": 9.948024854983866e-05, "loss": 1.6589, "step": 1862 }, { "epoch": 0.1110978662534271, "grad_norm": 3.0907034873962402, "learning_rate": 9.94788876409531e-05, "loss": 1.6308, "step": 1864 }, { "epoch": 0.11121706997258314, "grad_norm": 4.0583696365356445, "learning_rate": 9.947752496203844e-05, "loss": 1.7367, "step": 1866 }, { "epoch": 0.11133627369173918, "grad_norm": 3.7000677585601807, "learning_rate": 9.947616051314343e-05, "loss": 1.6938, "step": 1868 }, { "epoch": 0.11145547741089522, "grad_norm": 3.489556312561035, "learning_rate": 9.947479429431687e-05, "loss": 1.615, "step": 1870 }, { "epoch": 0.11157468113005126, "grad_norm": 3.1975183486938477, "learning_rate": 9.947342630560765e-05, "loss": 1.6106, "step": 1872 }, { "epoch": 0.1116938848492073, "grad_norm": 3.67930006980896, "learning_rate": 9.94720565470647e-05, "loss": 1.6573, "step": 1874 }, { "epoch": 0.11181308856836333, "grad_norm": 3.574110269546509, "learning_rate": 9.947068501873701e-05, "loss": 1.5659, "step": 1876 }, { "epoch": 0.11193229228751937, "grad_norm": 3.465013265609741, "learning_rate": 9.946931172067368e-05, "loss": 1.7533, "step": 1878 }, { "epoch": 0.11205149600667541, "grad_norm": 3.319779396057129, "learning_rate": 9.946793665292379e-05, "loss": 1.7924, "step": 1880 }, { "epoch": 0.11217069972583145, "grad_norm": 3.19730806350708, "learning_rate": 9.946655981553656e-05, "loss": 1.651, "step": 1882 }, { "epoch": 0.11228990344498749, "grad_norm": 3.3580920696258545, "learning_rate": 9.946518120856123e-05, "loss": 1.7917, "step": 1884 }, { "epoch": 0.11240910716414353, "grad_norm": 3.385403633117676, "learning_rate": 9.946380083204714e-05, "loss": 1.7329, "step": 1886 }, { "epoch": 0.11252831088329956, "grad_norm": 3.5336432456970215, "learning_rate": 9.946241868604364e-05, "loss": 1.7771, "step": 1888 }, { "epoch": 0.1126475146024556, "grad_norm": 3.6844112873077393, "learning_rate": 9.94610347706002e-05, "loss": 1.8655, "step": 1890 }, { "epoch": 0.11276671832161163, "grad_norm": 3.6113383769989014, "learning_rate": 9.945964908576631e-05, "loss": 1.7077, "step": 1892 }, { "epoch": 0.11288592204076767, "grad_norm": 3.1706459522247314, "learning_rate": 9.945826163159155e-05, "loss": 1.5964, "step": 1894 }, { "epoch": 0.1130051257599237, "grad_norm": 3.5695066452026367, "learning_rate": 9.945687240812556e-05, "loss": 1.7324, "step": 1896 }, { "epoch": 0.11312432947907974, "grad_norm": 3.412782669067383, "learning_rate": 9.945548141541802e-05, "loss": 1.894, "step": 1898 }, { "epoch": 0.11324353319823578, "grad_norm": 3.8303656578063965, "learning_rate": 9.945408865351869e-05, "loss": 1.6533, "step": 1900 }, { "epoch": 0.11336273691739182, "grad_norm": 3.7388250827789307, "learning_rate": 9.945269412247741e-05, "loss": 1.845, "step": 1902 }, { "epoch": 0.11348194063654786, "grad_norm": 3.1500649452209473, "learning_rate": 9.945129782234408e-05, "loss": 1.5339, "step": 1904 }, { "epoch": 0.1136011443557039, "grad_norm": 3.41607403755188, "learning_rate": 9.944989975316862e-05, "loss": 1.8463, "step": 1906 }, { "epoch": 0.11372034807485994, "grad_norm": 3.374551773071289, "learning_rate": 9.944849991500105e-05, "loss": 1.6939, "step": 1908 }, { "epoch": 0.11383955179401598, "grad_norm": 3.4755256175994873, "learning_rate": 9.944709830789145e-05, "loss": 1.7902, "step": 1910 }, { "epoch": 0.11395875551317201, "grad_norm": 3.6185927391052246, "learning_rate": 9.944569493188997e-05, "loss": 1.6827, "step": 1912 }, { "epoch": 0.11407795923232805, "grad_norm": 3.523472309112549, "learning_rate": 9.944428978704681e-05, "loss": 1.5976, "step": 1914 }, { "epoch": 0.11419716295148409, "grad_norm": 3.491861343383789, "learning_rate": 9.944288287341221e-05, "loss": 1.7464, "step": 1916 }, { "epoch": 0.11431636667064013, "grad_norm": 3.2838351726531982, "learning_rate": 9.944147419103655e-05, "loss": 1.6494, "step": 1918 }, { "epoch": 0.11443557038979617, "grad_norm": 3.2097740173339844, "learning_rate": 9.944006373997017e-05, "loss": 1.6749, "step": 1920 }, { "epoch": 0.11455477410895219, "grad_norm": 3.2876529693603516, "learning_rate": 9.943865152026357e-05, "loss": 1.8266, "step": 1922 }, { "epoch": 0.11467397782810823, "grad_norm": 3.8941004276275635, "learning_rate": 9.943723753196726e-05, "loss": 1.6745, "step": 1924 }, { "epoch": 0.11479318154726427, "grad_norm": 3.254788637161255, "learning_rate": 9.943582177513179e-05, "loss": 1.6372, "step": 1926 }, { "epoch": 0.11491238526642031, "grad_norm": 3.2021636962890625, "learning_rate": 9.943440424980785e-05, "loss": 1.8122, "step": 1928 }, { "epoch": 0.11503158898557635, "grad_norm": 3.436769723892212, "learning_rate": 9.943298495604612e-05, "loss": 1.6225, "step": 1930 }, { "epoch": 0.11515079270473239, "grad_norm": 3.3997082710266113, "learning_rate": 9.94315638938974e-05, "loss": 1.5441, "step": 1932 }, { "epoch": 0.11526999642388842, "grad_norm": 3.67645001411438, "learning_rate": 9.943014106341251e-05, "loss": 1.6919, "step": 1934 }, { "epoch": 0.11538920014304446, "grad_norm": 3.2066595554351807, "learning_rate": 9.942871646464233e-05, "loss": 1.5171, "step": 1936 }, { "epoch": 0.1155084038622005, "grad_norm": 3.345633029937744, "learning_rate": 9.942729009763787e-05, "loss": 1.6501, "step": 1938 }, { "epoch": 0.11562760758135654, "grad_norm": 3.521437168121338, "learning_rate": 9.942586196245012e-05, "loss": 1.6423, "step": 1940 }, { "epoch": 0.11574681130051258, "grad_norm": 3.444786310195923, "learning_rate": 9.942443205913017e-05, "loss": 1.7258, "step": 1942 }, { "epoch": 0.11586601501966862, "grad_norm": 3.360020875930786, "learning_rate": 9.942300038772918e-05, "loss": 1.6448, "step": 1944 }, { "epoch": 0.11598521873882466, "grad_norm": 3.9691171646118164, "learning_rate": 9.942156694829839e-05, "loss": 1.7131, "step": 1946 }, { "epoch": 0.1161044224579807, "grad_norm": 3.4833900928497314, "learning_rate": 9.942013174088904e-05, "loss": 1.8274, "step": 1948 }, { "epoch": 0.11622362617713673, "grad_norm": 3.6458117961883545, "learning_rate": 9.941869476555248e-05, "loss": 1.7519, "step": 1950 }, { "epoch": 0.11634282989629277, "grad_norm": 3.6676666736602783, "learning_rate": 9.941725602234013e-05, "loss": 1.6924, "step": 1952 }, { "epoch": 0.1164620336154488, "grad_norm": 3.9696342945098877, "learning_rate": 9.941581551130346e-05, "loss": 1.8548, "step": 1954 }, { "epoch": 0.11658123733460483, "grad_norm": 3.4733967781066895, "learning_rate": 9.941437323249399e-05, "loss": 1.7171, "step": 1956 }, { "epoch": 0.11670044105376087, "grad_norm": 3.364511251449585, "learning_rate": 9.941292918596332e-05, "loss": 1.7501, "step": 1958 }, { "epoch": 0.11681964477291691, "grad_norm": 3.6049249172210693, "learning_rate": 9.941148337176311e-05, "loss": 1.6826, "step": 1960 }, { "epoch": 0.11693884849207295, "grad_norm": 3.5651447772979736, "learning_rate": 9.941003578994509e-05, "loss": 1.7247, "step": 1962 }, { "epoch": 0.11705805221122899, "grad_norm": 2.9510080814361572, "learning_rate": 9.940858644056102e-05, "loss": 1.5425, "step": 1964 }, { "epoch": 0.11717725593038503, "grad_norm": 3.160367488861084, "learning_rate": 9.940713532366278e-05, "loss": 1.6029, "step": 1966 }, { "epoch": 0.11729645964954107, "grad_norm": 3.6745691299438477, "learning_rate": 9.940568243930224e-05, "loss": 1.7481, "step": 1968 }, { "epoch": 0.1174156633686971, "grad_norm": 3.7236459255218506, "learning_rate": 9.940422778753142e-05, "loss": 1.6841, "step": 1970 }, { "epoch": 0.11753486708785314, "grad_norm": 3.328451633453369, "learning_rate": 9.940277136840232e-05, "loss": 1.7145, "step": 1972 }, { "epoch": 0.11765407080700918, "grad_norm": 3.5320537090301514, "learning_rate": 9.940131318196709e-05, "loss": 1.6697, "step": 1974 }, { "epoch": 0.11777327452616522, "grad_norm": 3.2120022773742676, "learning_rate": 9.939985322827783e-05, "loss": 1.7107, "step": 1976 }, { "epoch": 0.11789247824532126, "grad_norm": 3.1344070434570312, "learning_rate": 9.939839150738681e-05, "loss": 1.7258, "step": 1978 }, { "epoch": 0.1180116819644773, "grad_norm": 3.1034059524536133, "learning_rate": 9.939692801934631e-05, "loss": 1.6114, "step": 1980 }, { "epoch": 0.11813088568363334, "grad_norm": 3.5711371898651123, "learning_rate": 9.939546276420869e-05, "loss": 1.7734, "step": 1982 }, { "epoch": 0.11825008940278936, "grad_norm": 4.348093032836914, "learning_rate": 9.939399574202636e-05, "loss": 1.9755, "step": 1984 }, { "epoch": 0.1183692931219454, "grad_norm": 3.6294617652893066, "learning_rate": 9.939252695285181e-05, "loss": 1.7245, "step": 1986 }, { "epoch": 0.11848849684110144, "grad_norm": 3.1456336975097656, "learning_rate": 9.939105639673757e-05, "loss": 1.6104, "step": 1988 }, { "epoch": 0.11860770056025748, "grad_norm": 3.281660318374634, "learning_rate": 9.938958407373624e-05, "loss": 1.7197, "step": 1990 }, { "epoch": 0.11872690427941351, "grad_norm": 3.2393622398376465, "learning_rate": 9.938810998390052e-05, "loss": 1.6821, "step": 1992 }, { "epoch": 0.11884610799856955, "grad_norm": 3.4545412063598633, "learning_rate": 9.938663412728311e-05, "loss": 1.8219, "step": 1994 }, { "epoch": 0.11896531171772559, "grad_norm": 3.4243781566619873, "learning_rate": 9.938515650393684e-05, "loss": 1.7022, "step": 1996 }, { "epoch": 0.11908451543688163, "grad_norm": 3.425262689590454, "learning_rate": 9.938367711391455e-05, "loss": 1.7219, "step": 1998 }, { "epoch": 0.11920371915603767, "grad_norm": 3.1155765056610107, "learning_rate": 9.938219595726916e-05, "loss": 1.5884, "step": 2000 }, { "epoch": 0.11932292287519371, "grad_norm": 3.1735453605651855, "learning_rate": 9.938071303405366e-05, "loss": 1.6875, "step": 2002 }, { "epoch": 0.11944212659434975, "grad_norm": 3.040654420852661, "learning_rate": 9.937922834432111e-05, "loss": 1.5426, "step": 2004 }, { "epoch": 0.11956133031350578, "grad_norm": 3.2452969551086426, "learning_rate": 9.93777418881246e-05, "loss": 1.6883, "step": 2006 }, { "epoch": 0.11968053403266182, "grad_norm": 3.7112669944763184, "learning_rate": 9.937625366551732e-05, "loss": 1.7808, "step": 2008 }, { "epoch": 0.11979973775181786, "grad_norm": 3.273932695388794, "learning_rate": 9.937476367655252e-05, "loss": 1.4719, "step": 2010 }, { "epoch": 0.1199189414709739, "grad_norm": 3.683868885040283, "learning_rate": 9.937327192128348e-05, "loss": 1.7942, "step": 2012 }, { "epoch": 0.12003814519012994, "grad_norm": 3.5545945167541504, "learning_rate": 9.937177839976357e-05, "loss": 1.7922, "step": 2014 }, { "epoch": 0.12015734890928596, "grad_norm": 3.58520770072937, "learning_rate": 9.937028311204624e-05, "loss": 1.7665, "step": 2016 }, { "epoch": 0.120276552628442, "grad_norm": 3.7973697185516357, "learning_rate": 9.936878605818493e-05, "loss": 1.5885, "step": 2018 }, { "epoch": 0.12039575634759804, "grad_norm": 3.161785125732422, "learning_rate": 9.936728723823327e-05, "loss": 1.7519, "step": 2020 }, { "epoch": 0.12051496006675408, "grad_norm": 3.337517499923706, "learning_rate": 9.93657866522448e-05, "loss": 1.7059, "step": 2022 }, { "epoch": 0.12063416378591012, "grad_norm": 3.8723981380462646, "learning_rate": 9.936428430027327e-05, "loss": 1.7684, "step": 2024 }, { "epoch": 0.12075336750506616, "grad_norm": 3.293447256088257, "learning_rate": 9.936278018237239e-05, "loss": 1.8539, "step": 2026 }, { "epoch": 0.1208725712242222, "grad_norm": 3.307431697845459, "learning_rate": 9.936127429859595e-05, "loss": 1.5729, "step": 2028 }, { "epoch": 0.12099177494337823, "grad_norm": 3.231950044631958, "learning_rate": 9.935976664899785e-05, "loss": 1.7247, "step": 2030 }, { "epoch": 0.12111097866253427, "grad_norm": 3.2279698848724365, "learning_rate": 9.935825723363201e-05, "loss": 1.6626, "step": 2032 }, { "epoch": 0.12123018238169031, "grad_norm": 3.692302942276001, "learning_rate": 9.935674605255245e-05, "loss": 1.8231, "step": 2034 }, { "epoch": 0.12134938610084635, "grad_norm": 3.7381303310394287, "learning_rate": 9.935523310581319e-05, "loss": 1.7837, "step": 2036 }, { "epoch": 0.12146858982000239, "grad_norm": 3.438990592956543, "learning_rate": 9.935371839346839e-05, "loss": 1.6655, "step": 2038 }, { "epoch": 0.12158779353915843, "grad_norm": 3.5696167945861816, "learning_rate": 9.935220191557221e-05, "loss": 1.8109, "step": 2040 }, { "epoch": 0.12170699725831446, "grad_norm": 3.3362269401550293, "learning_rate": 9.935068367217892e-05, "loss": 1.7416, "step": 2042 }, { "epoch": 0.1218262009774705, "grad_norm": 3.366258144378662, "learning_rate": 9.934916366334282e-05, "loss": 1.7383, "step": 2044 }, { "epoch": 0.12194540469662654, "grad_norm": 4.022164344787598, "learning_rate": 9.934764188911827e-05, "loss": 1.8238, "step": 2046 }, { "epoch": 0.12206460841578257, "grad_norm": 2.980452060699463, "learning_rate": 9.934611834955976e-05, "loss": 1.598, "step": 2048 }, { "epoch": 0.1221838121349386, "grad_norm": 3.5217597484588623, "learning_rate": 9.934459304472175e-05, "loss": 1.6801, "step": 2050 }, { "epoch": 0.12230301585409464, "grad_norm": 3.967456579208374, "learning_rate": 9.93430659746588e-05, "loss": 1.8028, "step": 2052 }, { "epoch": 0.12242221957325068, "grad_norm": 3.076724052429199, "learning_rate": 9.934153713942557e-05, "loss": 1.8547, "step": 2054 }, { "epoch": 0.12254142329240672, "grad_norm": 3.3855772018432617, "learning_rate": 9.934000653907673e-05, "loss": 1.7127, "step": 2056 }, { "epoch": 0.12266062701156276, "grad_norm": 3.5960500240325928, "learning_rate": 9.933847417366704e-05, "loss": 1.7294, "step": 2058 }, { "epoch": 0.1227798307307188, "grad_norm": 3.5840511322021484, "learning_rate": 9.933694004325133e-05, "loss": 1.7473, "step": 2060 }, { "epoch": 0.12289903444987484, "grad_norm": 3.4078891277313232, "learning_rate": 9.933540414788445e-05, "loss": 1.777, "step": 2062 }, { "epoch": 0.12301823816903087, "grad_norm": 3.616844415664673, "learning_rate": 9.933386648762139e-05, "loss": 1.6822, "step": 2064 }, { "epoch": 0.12313744188818691, "grad_norm": 3.0425920486450195, "learning_rate": 9.933232706251711e-05, "loss": 1.6505, "step": 2066 }, { "epoch": 0.12325664560734295, "grad_norm": 3.4365592002868652, "learning_rate": 9.933078587262673e-05, "loss": 1.6501, "step": 2068 }, { "epoch": 0.12337584932649899, "grad_norm": 3.257793664932251, "learning_rate": 9.932924291800533e-05, "loss": 1.733, "step": 2070 }, { "epoch": 0.12349505304565503, "grad_norm": 3.0407397747039795, "learning_rate": 9.932769819870812e-05, "loss": 1.6461, "step": 2072 }, { "epoch": 0.12361425676481107, "grad_norm": 3.216494083404541, "learning_rate": 9.93261517147904e-05, "loss": 1.7751, "step": 2074 }, { "epoch": 0.1237334604839671, "grad_norm": 3.384068727493286, "learning_rate": 9.932460346630747e-05, "loss": 1.6026, "step": 2076 }, { "epoch": 0.12385266420312313, "grad_norm": 3.875842571258545, "learning_rate": 9.93230534533147e-05, "loss": 1.8655, "step": 2078 }, { "epoch": 0.12397186792227917, "grad_norm": 3.4703381061553955, "learning_rate": 9.932150167586756e-05, "loss": 1.7864, "step": 2080 }, { "epoch": 0.12409107164143521, "grad_norm": 3.6109187602996826, "learning_rate": 9.931994813402153e-05, "loss": 1.7373, "step": 2082 }, { "epoch": 0.12421027536059125, "grad_norm": 3.398914337158203, "learning_rate": 9.931839282783222e-05, "loss": 1.7327, "step": 2084 }, { "epoch": 0.12432947907974728, "grad_norm": 3.356861114501953, "learning_rate": 9.931683575735526e-05, "loss": 1.6303, "step": 2086 }, { "epoch": 0.12444868279890332, "grad_norm": 3.506976366043091, "learning_rate": 9.931527692264635e-05, "loss": 1.8476, "step": 2088 }, { "epoch": 0.12456788651805936, "grad_norm": 3.5592753887176514, "learning_rate": 9.931371632376127e-05, "loss": 1.591, "step": 2090 }, { "epoch": 0.1246870902372154, "grad_norm": 3.301497220993042, "learning_rate": 9.93121539607558e-05, "loss": 1.8114, "step": 2092 }, { "epoch": 0.12480629395637144, "grad_norm": 3.4363224506378174, "learning_rate": 9.931058983368589e-05, "loss": 1.6291, "step": 2094 }, { "epoch": 0.12492549767552748, "grad_norm": 3.2775492668151855, "learning_rate": 9.930902394260747e-05, "loss": 1.6535, "step": 2096 }, { "epoch": 0.12504470139468352, "grad_norm": 3.208679676055908, "learning_rate": 9.930745628757652e-05, "loss": 1.5779, "step": 2098 }, { "epoch": 0.12516390511383954, "grad_norm": 3.3363912105560303, "learning_rate": 9.930588686864918e-05, "loss": 1.7858, "step": 2100 }, { "epoch": 0.1252831088329956, "grad_norm": 3.6206283569335938, "learning_rate": 9.930431568588157e-05, "loss": 1.7052, "step": 2102 }, { "epoch": 0.12540231255215162, "grad_norm": 3.822934150695801, "learning_rate": 9.930274273932988e-05, "loss": 1.7208, "step": 2104 }, { "epoch": 0.12552151627130767, "grad_norm": 3.3782858848571777, "learning_rate": 9.930116802905041e-05, "loss": 1.8083, "step": 2106 }, { "epoch": 0.1256407199904637, "grad_norm": 3.07191801071167, "learning_rate": 9.929959155509947e-05, "loss": 1.6705, "step": 2108 }, { "epoch": 0.12575992370961975, "grad_norm": 3.2976081371307373, "learning_rate": 9.929801331753346e-05, "loss": 1.6514, "step": 2110 }, { "epoch": 0.12587912742877577, "grad_norm": 4.050699710845947, "learning_rate": 9.929643331640886e-05, "loss": 1.7677, "step": 2112 }, { "epoch": 0.12599833114793182, "grad_norm": 3.246612071990967, "learning_rate": 9.929485155178215e-05, "loss": 1.5457, "step": 2114 }, { "epoch": 0.12611753486708785, "grad_norm": 3.6563327312469482, "learning_rate": 9.929326802370996e-05, "loss": 1.6727, "step": 2116 }, { "epoch": 0.1262367385862439, "grad_norm": 3.3438384532928467, "learning_rate": 9.92916827322489e-05, "loss": 1.7601, "step": 2118 }, { "epoch": 0.12635594230539993, "grad_norm": 3.2082998752593994, "learning_rate": 9.929009567745571e-05, "loss": 1.8425, "step": 2120 }, { "epoch": 0.12647514602455598, "grad_norm": 3.3496880531311035, "learning_rate": 9.928850685938716e-05, "loss": 1.7366, "step": 2122 }, { "epoch": 0.126594349743712, "grad_norm": 3.3726274967193604, "learning_rate": 9.928691627810006e-05, "loss": 1.7512, "step": 2124 }, { "epoch": 0.12671355346286803, "grad_norm": 3.3224639892578125, "learning_rate": 9.928532393365136e-05, "loss": 1.6964, "step": 2126 }, { "epoch": 0.12683275718202408, "grad_norm": 3.3483335971832275, "learning_rate": 9.928372982609797e-05, "loss": 1.7166, "step": 2128 }, { "epoch": 0.1269519609011801, "grad_norm": 3.566387414932251, "learning_rate": 9.928213395549695e-05, "loss": 1.6744, "step": 2130 }, { "epoch": 0.12707116462033616, "grad_norm": 3.350903034210205, "learning_rate": 9.928053632190537e-05, "loss": 1.4955, "step": 2132 }, { "epoch": 0.12719036833949218, "grad_norm": 3.328598737716675, "learning_rate": 9.927893692538041e-05, "loss": 1.747, "step": 2134 }, { "epoch": 0.12730957205864823, "grad_norm": 3.6242523193359375, "learning_rate": 9.927733576597926e-05, "loss": 1.7854, "step": 2136 }, { "epoch": 0.12742877577780426, "grad_norm": 3.7558135986328125, "learning_rate": 9.927573284375922e-05, "loss": 1.7052, "step": 2138 }, { "epoch": 0.1275479794969603, "grad_norm": 3.447221040725708, "learning_rate": 9.927412815877762e-05, "loss": 1.6901, "step": 2140 }, { "epoch": 0.12766718321611634, "grad_norm": 3.235630750656128, "learning_rate": 9.927252171109186e-05, "loss": 1.6364, "step": 2142 }, { "epoch": 0.1277863869352724, "grad_norm": 3.1439766883850098, "learning_rate": 9.927091350075941e-05, "loss": 1.6278, "step": 2144 }, { "epoch": 0.1279055906544284, "grad_norm": 3.422398328781128, "learning_rate": 9.926930352783781e-05, "loss": 1.6407, "step": 2146 }, { "epoch": 0.12802479437358447, "grad_norm": 3.4489948749542236, "learning_rate": 9.926769179238466e-05, "loss": 1.6297, "step": 2148 }, { "epoch": 0.1281439980927405, "grad_norm": 3.171137571334839, "learning_rate": 9.92660782944576e-05, "loss": 1.5343, "step": 2150 }, { "epoch": 0.12826320181189654, "grad_norm": 3.538090944290161, "learning_rate": 9.926446303411434e-05, "loss": 1.5958, "step": 2152 }, { "epoch": 0.12838240553105257, "grad_norm": 3.517216920852661, "learning_rate": 9.926284601141271e-05, "loss": 1.5423, "step": 2154 }, { "epoch": 0.1285016092502086, "grad_norm": 3.3155319690704346, "learning_rate": 9.926122722641051e-05, "loss": 1.6403, "step": 2156 }, { "epoch": 0.12862081296936465, "grad_norm": 3.2894363403320312, "learning_rate": 9.925960667916568e-05, "loss": 1.6805, "step": 2158 }, { "epoch": 0.12874001668852067, "grad_norm": 3.14568829536438, "learning_rate": 9.925798436973616e-05, "loss": 1.6882, "step": 2160 }, { "epoch": 0.12885922040767672, "grad_norm": 2.9227170944213867, "learning_rate": 9.925636029818002e-05, "loss": 1.5006, "step": 2162 }, { "epoch": 0.12897842412683275, "grad_norm": 3.1640400886535645, "learning_rate": 9.925473446455533e-05, "loss": 1.5697, "step": 2164 }, { "epoch": 0.1290976278459888, "grad_norm": 3.568995475769043, "learning_rate": 9.925310686892027e-05, "loss": 1.6864, "step": 2166 }, { "epoch": 0.12921683156514482, "grad_norm": 3.6862823963165283, "learning_rate": 9.925147751133306e-05, "loss": 1.7356, "step": 2168 }, { "epoch": 0.12933603528430088, "grad_norm": 3.219540596008301, "learning_rate": 9.9249846391852e-05, "loss": 1.6234, "step": 2170 }, { "epoch": 0.1294552390034569, "grad_norm": 2.988090991973877, "learning_rate": 9.924821351053542e-05, "loss": 1.4327, "step": 2172 }, { "epoch": 0.12957444272261295, "grad_norm": 3.2474513053894043, "learning_rate": 9.924657886744172e-05, "loss": 1.5904, "step": 2174 }, { "epoch": 0.12969364644176898, "grad_norm": 3.1921586990356445, "learning_rate": 9.924494246262944e-05, "loss": 1.6275, "step": 2176 }, { "epoch": 0.12981285016092503, "grad_norm": 3.4152116775512695, "learning_rate": 9.924330429615705e-05, "loss": 1.7081, "step": 2178 }, { "epoch": 0.12993205388008106, "grad_norm": 3.478074550628662, "learning_rate": 9.924166436808318e-05, "loss": 1.7253, "step": 2180 }, { "epoch": 0.1300512575992371, "grad_norm": 3.134094715118408, "learning_rate": 9.92400226784665e-05, "loss": 1.6425, "step": 2182 }, { "epoch": 0.13017046131839313, "grad_norm": 3.418165922164917, "learning_rate": 9.923837922736574e-05, "loss": 1.6377, "step": 2184 }, { "epoch": 0.13028966503754918, "grad_norm": 3.2226247787475586, "learning_rate": 9.923673401483968e-05, "loss": 1.5644, "step": 2186 }, { "epoch": 0.1304088687567052, "grad_norm": 3.5689356327056885, "learning_rate": 9.92350870409472e-05, "loss": 1.7965, "step": 2188 }, { "epoch": 0.13052807247586123, "grad_norm": 3.250588893890381, "learning_rate": 9.923343830574718e-05, "loss": 1.6397, "step": 2190 }, { "epoch": 0.1306472761950173, "grad_norm": 3.171823024749756, "learning_rate": 9.923178780929864e-05, "loss": 1.684, "step": 2192 }, { "epoch": 0.1307664799141733, "grad_norm": 3.584585666656494, "learning_rate": 9.92301355516606e-05, "loss": 1.7049, "step": 2194 }, { "epoch": 0.13088568363332936, "grad_norm": 3.8022522926330566, "learning_rate": 9.922848153289217e-05, "loss": 1.8985, "step": 2196 }, { "epoch": 0.1310048873524854, "grad_norm": 3.5255532264709473, "learning_rate": 9.922682575305253e-05, "loss": 1.5998, "step": 2198 }, { "epoch": 0.13112409107164144, "grad_norm": 3.139300584793091, "learning_rate": 9.92251682122009e-05, "loss": 1.6473, "step": 2200 }, { "epoch": 0.13124329479079747, "grad_norm": 3.2644548416137695, "learning_rate": 9.922350891039657e-05, "loss": 1.5992, "step": 2202 }, { "epoch": 0.13136249850995352, "grad_norm": 3.373943328857422, "learning_rate": 9.922184784769891e-05, "loss": 1.6045, "step": 2204 }, { "epoch": 0.13148170222910954, "grad_norm": 3.473144292831421, "learning_rate": 9.922018502416736e-05, "loss": 1.6868, "step": 2206 }, { "epoch": 0.1316009059482656, "grad_norm": 3.185955286026001, "learning_rate": 9.921852043986137e-05, "loss": 1.6771, "step": 2208 }, { "epoch": 0.13172010966742162, "grad_norm": 3.219651460647583, "learning_rate": 9.921685409484051e-05, "loss": 1.6198, "step": 2210 }, { "epoch": 0.13183931338657767, "grad_norm": 3.314180612564087, "learning_rate": 9.921518598916438e-05, "loss": 1.9109, "step": 2212 }, { "epoch": 0.1319585171057337, "grad_norm": 3.0807442665100098, "learning_rate": 9.921351612289267e-05, "loss": 1.7757, "step": 2214 }, { "epoch": 0.13207772082488975, "grad_norm": 3.3761098384857178, "learning_rate": 9.92118444960851e-05, "loss": 1.636, "step": 2216 }, { "epoch": 0.13219692454404577, "grad_norm": 3.2762296199798584, "learning_rate": 9.921017110880149e-05, "loss": 1.7114, "step": 2218 }, { "epoch": 0.1323161282632018, "grad_norm": 3.1869373321533203, "learning_rate": 9.920849596110167e-05, "loss": 1.7921, "step": 2220 }, { "epoch": 0.13243533198235785, "grad_norm": 3.196213960647583, "learning_rate": 9.920681905304559e-05, "loss": 1.7137, "step": 2222 }, { "epoch": 0.13255453570151388, "grad_norm": 3.3762924671173096, "learning_rate": 9.920514038469323e-05, "loss": 1.7807, "step": 2224 }, { "epoch": 0.13267373942066993, "grad_norm": 3.457260847091675, "learning_rate": 9.920345995610465e-05, "loss": 1.6661, "step": 2226 }, { "epoch": 0.13279294313982595, "grad_norm": 3.2863082885742188, "learning_rate": 9.920177776733995e-05, "loss": 1.6105, "step": 2228 }, { "epoch": 0.132912146858982, "grad_norm": 3.87615704536438, "learning_rate": 9.920009381845933e-05, "loss": 1.9194, "step": 2230 }, { "epoch": 0.13303135057813803, "grad_norm": 3.4466052055358887, "learning_rate": 9.919840810952302e-05, "loss": 1.5864, "step": 2232 }, { "epoch": 0.13315055429729408, "grad_norm": 3.4098732471466064, "learning_rate": 9.919672064059131e-05, "loss": 1.7161, "step": 2234 }, { "epoch": 0.1332697580164501, "grad_norm": 3.4825944900512695, "learning_rate": 9.919503141172457e-05, "loss": 1.542, "step": 2236 }, { "epoch": 0.13338896173560616, "grad_norm": 3.412724733352661, "learning_rate": 9.919334042298324e-05, "loss": 1.6195, "step": 2238 }, { "epoch": 0.13350816545476218, "grad_norm": 3.3583824634552, "learning_rate": 9.919164767442784e-05, "loss": 1.6436, "step": 2240 }, { "epoch": 0.13362736917391824, "grad_norm": 3.854823112487793, "learning_rate": 9.918995316611887e-05, "loss": 1.6258, "step": 2242 }, { "epoch": 0.13374657289307426, "grad_norm": 3.569323778152466, "learning_rate": 9.918825689811697e-05, "loss": 1.6347, "step": 2244 }, { "epoch": 0.13386577661223031, "grad_norm": 4.429409980773926, "learning_rate": 9.918655887048285e-05, "loss": 1.568, "step": 2246 }, { "epoch": 0.13398498033138634, "grad_norm": 2.911966323852539, "learning_rate": 9.91848590832772e-05, "loss": 1.6028, "step": 2248 }, { "epoch": 0.13410418405054236, "grad_norm": 3.375375509262085, "learning_rate": 9.918315753656087e-05, "loss": 1.6849, "step": 2250 }, { "epoch": 0.13422338776969842, "grad_norm": 3.62204909324646, "learning_rate": 9.918145423039471e-05, "loss": 1.7878, "step": 2252 }, { "epoch": 0.13434259148885444, "grad_norm": 3.815580129623413, "learning_rate": 9.917974916483967e-05, "loss": 2.0641, "step": 2254 }, { "epoch": 0.1344617952080105, "grad_norm": 3.6708991527557373, "learning_rate": 9.917804233995673e-05, "loss": 1.9504, "step": 2256 }, { "epoch": 0.13458099892716652, "grad_norm": 3.3917973041534424, "learning_rate": 9.917633375580695e-05, "loss": 1.7567, "step": 2258 }, { "epoch": 0.13470020264632257, "grad_norm": 3.120969295501709, "learning_rate": 9.917462341245147e-05, "loss": 1.5991, "step": 2260 }, { "epoch": 0.1348194063654786, "grad_norm": 3.3867509365081787, "learning_rate": 9.917291130995146e-05, "loss": 1.742, "step": 2262 }, { "epoch": 0.13493861008463465, "grad_norm": 3.5482876300811768, "learning_rate": 9.917119744836816e-05, "loss": 1.6183, "step": 2264 }, { "epoch": 0.13505781380379067, "grad_norm": 3.6385915279388428, "learning_rate": 9.91694818277629e-05, "loss": 1.7468, "step": 2266 }, { "epoch": 0.13517701752294672, "grad_norm": 3.0792338848114014, "learning_rate": 9.916776444819703e-05, "loss": 1.6599, "step": 2268 }, { "epoch": 0.13529622124210275, "grad_norm": 3.4039320945739746, "learning_rate": 9.916604530973199e-05, "loss": 1.6137, "step": 2270 }, { "epoch": 0.1354154249612588, "grad_norm": 3.5953097343444824, "learning_rate": 9.916432441242931e-05, "loss": 1.6866, "step": 2272 }, { "epoch": 0.13553462868041483, "grad_norm": 3.3772170543670654, "learning_rate": 9.916260175635053e-05, "loss": 1.6505, "step": 2274 }, { "epoch": 0.13565383239957088, "grad_norm": 3.4480502605438232, "learning_rate": 9.916087734155728e-05, "loss": 1.6621, "step": 2276 }, { "epoch": 0.1357730361187269, "grad_norm": 3.484278440475464, "learning_rate": 9.915915116811124e-05, "loss": 1.7022, "step": 2278 }, { "epoch": 0.13589223983788296, "grad_norm": 3.6837453842163086, "learning_rate": 9.915742323607416e-05, "loss": 1.5372, "step": 2280 }, { "epoch": 0.13601144355703898, "grad_norm": 3.176135301589966, "learning_rate": 9.915569354550787e-05, "loss": 1.7182, "step": 2282 }, { "epoch": 0.136130647276195, "grad_norm": 3.517517328262329, "learning_rate": 9.915396209647423e-05, "loss": 1.7315, "step": 2284 }, { "epoch": 0.13624985099535106, "grad_norm": 3.3685600757598877, "learning_rate": 9.91522288890352e-05, "loss": 1.6326, "step": 2286 }, { "epoch": 0.13636905471450708, "grad_norm": 3.559080123901367, "learning_rate": 9.915049392325275e-05, "loss": 1.8854, "step": 2288 }, { "epoch": 0.13648825843366313, "grad_norm": 3.2242205142974854, "learning_rate": 9.914875719918898e-05, "loss": 1.4464, "step": 2290 }, { "epoch": 0.13660746215281916, "grad_norm": 3.5074684619903564, "learning_rate": 9.914701871690599e-05, "loss": 1.6209, "step": 2292 }, { "epoch": 0.1367266658719752, "grad_norm": 3.4697513580322266, "learning_rate": 9.914527847646601e-05, "loss": 1.697, "step": 2294 }, { "epoch": 0.13684586959113124, "grad_norm": 4.183584690093994, "learning_rate": 9.914353647793125e-05, "loss": 1.5859, "step": 2296 }, { "epoch": 0.1369650733102873, "grad_norm": 3.1200709342956543, "learning_rate": 9.914179272136406e-05, "loss": 1.5802, "step": 2298 }, { "epoch": 0.1370842770294433, "grad_norm": 3.4828150272369385, "learning_rate": 9.91400472068268e-05, "loss": 1.8301, "step": 2300 }, { "epoch": 0.13720348074859937, "grad_norm": 2.974910020828247, "learning_rate": 9.913829993438192e-05, "loss": 1.7117, "step": 2302 }, { "epoch": 0.1373226844677554, "grad_norm": 3.21637225151062, "learning_rate": 9.913655090409192e-05, "loss": 1.7016, "step": 2304 }, { "epoch": 0.13744188818691144, "grad_norm": 3.229166269302368, "learning_rate": 9.913480011601938e-05, "loss": 1.605, "step": 2306 }, { "epoch": 0.13756109190606747, "grad_norm": 3.1430530548095703, "learning_rate": 9.913304757022695e-05, "loss": 1.5924, "step": 2308 }, { "epoch": 0.13768029562522352, "grad_norm": 3.4219741821289062, "learning_rate": 9.913129326677729e-05, "loss": 1.6357, "step": 2310 }, { "epoch": 0.13779949934437954, "grad_norm": 3.1348671913146973, "learning_rate": 9.912953720573315e-05, "loss": 1.4927, "step": 2312 }, { "epoch": 0.13791870306353557, "grad_norm": 3.517573356628418, "learning_rate": 9.912777938715738e-05, "loss": 1.8099, "step": 2314 }, { "epoch": 0.13803790678269162, "grad_norm": 2.968467950820923, "learning_rate": 9.912601981111286e-05, "loss": 1.603, "step": 2316 }, { "epoch": 0.13815711050184765, "grad_norm": 3.1023025512695312, "learning_rate": 9.912425847766251e-05, "loss": 1.5866, "step": 2318 }, { "epoch": 0.1382763142210037, "grad_norm": 3.5085339546203613, "learning_rate": 9.912249538686937e-05, "loss": 1.6717, "step": 2320 }, { "epoch": 0.13839551794015972, "grad_norm": 3.5876402854919434, "learning_rate": 9.912073053879652e-05, "loss": 1.6237, "step": 2322 }, { "epoch": 0.13851472165931578, "grad_norm": 3.379185914993286, "learning_rate": 9.911896393350704e-05, "loss": 1.5986, "step": 2324 }, { "epoch": 0.1386339253784718, "grad_norm": 3.360795259475708, "learning_rate": 9.911719557106416e-05, "loss": 1.7104, "step": 2326 }, { "epoch": 0.13875312909762785, "grad_norm": 3.434669256210327, "learning_rate": 9.911542545153116e-05, "loss": 1.6703, "step": 2328 }, { "epoch": 0.13887233281678388, "grad_norm": 3.4199275970458984, "learning_rate": 9.911365357497135e-05, "loss": 1.6639, "step": 2330 }, { "epoch": 0.13899153653593993, "grad_norm": 3.4693350791931152, "learning_rate": 9.911187994144811e-05, "loss": 1.8971, "step": 2332 }, { "epoch": 0.13911074025509595, "grad_norm": 3.2447891235351562, "learning_rate": 9.911010455102488e-05, "loss": 1.6081, "step": 2334 }, { "epoch": 0.139229943974252, "grad_norm": 3.512636423110962, "learning_rate": 9.910832740376518e-05, "loss": 1.6275, "step": 2336 }, { "epoch": 0.13934914769340803, "grad_norm": 3.504363536834717, "learning_rate": 9.910654849973258e-05, "loss": 1.8176, "step": 2338 }, { "epoch": 0.13946835141256408, "grad_norm": 3.4894635677337646, "learning_rate": 9.910476783899075e-05, "loss": 1.5377, "step": 2340 }, { "epoch": 0.1395875551317201, "grad_norm": 3.5242795944213867, "learning_rate": 9.910298542160334e-05, "loss": 1.9222, "step": 2342 }, { "epoch": 0.13970675885087613, "grad_norm": 3.4234671592712402, "learning_rate": 9.910120124763413e-05, "loss": 1.8162, "step": 2344 }, { "epoch": 0.1398259625700322, "grad_norm": 3.7556004524230957, "learning_rate": 9.909941531714699e-05, "loss": 1.7739, "step": 2346 }, { "epoch": 0.1399451662891882, "grad_norm": 3.875126838684082, "learning_rate": 9.909762763020573e-05, "loss": 1.7047, "step": 2348 }, { "epoch": 0.14006437000834426, "grad_norm": 3.3291714191436768, "learning_rate": 9.909583818687439e-05, "loss": 1.9815, "step": 2350 }, { "epoch": 0.1401835737275003, "grad_norm": 3.4249093532562256, "learning_rate": 9.909404698721689e-05, "loss": 1.6074, "step": 2352 }, { "epoch": 0.14030277744665634, "grad_norm": 3.393805503845215, "learning_rate": 9.909225403129739e-05, "loss": 1.5941, "step": 2354 }, { "epoch": 0.14042198116581237, "grad_norm": 3.45086407661438, "learning_rate": 9.909045931917998e-05, "loss": 1.5927, "step": 2356 }, { "epoch": 0.14054118488496842, "grad_norm": 3.8529481887817383, "learning_rate": 9.908866285092889e-05, "loss": 1.6863, "step": 2358 }, { "epoch": 0.14066038860412444, "grad_norm": 3.458120584487915, "learning_rate": 9.908686462660837e-05, "loss": 1.658, "step": 2360 }, { "epoch": 0.1407795923232805, "grad_norm": 3.3795711994171143, "learning_rate": 9.908506464628276e-05, "loss": 1.9416, "step": 2362 }, { "epoch": 0.14089879604243652, "grad_norm": 3.5310468673706055, "learning_rate": 9.908326291001643e-05, "loss": 1.6541, "step": 2364 }, { "epoch": 0.14101799976159257, "grad_norm": 3.104370355606079, "learning_rate": 9.908145941787386e-05, "loss": 1.598, "step": 2366 }, { "epoch": 0.1411372034807486, "grad_norm": 3.3069021701812744, "learning_rate": 9.907965416991955e-05, "loss": 1.6917, "step": 2368 }, { "epoch": 0.14125640719990465, "grad_norm": 3.5606918334960938, "learning_rate": 9.907784716621809e-05, "loss": 1.6494, "step": 2370 }, { "epoch": 0.14137561091906067, "grad_norm": 3.1182126998901367, "learning_rate": 9.907603840683411e-05, "loss": 1.701, "step": 2372 }, { "epoch": 0.14149481463821673, "grad_norm": 3.3112621307373047, "learning_rate": 9.907422789183234e-05, "loss": 1.5667, "step": 2374 }, { "epoch": 0.14161401835737275, "grad_norm": 3.4442481994628906, "learning_rate": 9.907241562127751e-05, "loss": 1.6754, "step": 2376 }, { "epoch": 0.14173322207652878, "grad_norm": 3.6803083419799805, "learning_rate": 9.90706015952345e-05, "loss": 1.6248, "step": 2378 }, { "epoch": 0.14185242579568483, "grad_norm": 3.451200246810913, "learning_rate": 9.906878581376816e-05, "loss": 1.5856, "step": 2380 }, { "epoch": 0.14197162951484085, "grad_norm": 3.4180846214294434, "learning_rate": 9.906696827694348e-05, "loss": 1.6014, "step": 2382 }, { "epoch": 0.1420908332339969, "grad_norm": 3.2151501178741455, "learning_rate": 9.906514898482544e-05, "loss": 1.6144, "step": 2384 }, { "epoch": 0.14221003695315293, "grad_norm": 3.7548165321350098, "learning_rate": 9.906332793747917e-05, "loss": 1.8192, "step": 2386 }, { "epoch": 0.14232924067230898, "grad_norm": 3.694478750228882, "learning_rate": 9.906150513496977e-05, "loss": 1.675, "step": 2388 }, { "epoch": 0.142448444391465, "grad_norm": 3.535451650619507, "learning_rate": 9.90596805773625e-05, "loss": 1.6816, "step": 2390 }, { "epoch": 0.14256764811062106, "grad_norm": 3.5727381706237793, "learning_rate": 9.905785426472257e-05, "loss": 1.6055, "step": 2392 }, { "epoch": 0.14268685182977708, "grad_norm": 3.27485728263855, "learning_rate": 9.905602619711535e-05, "loss": 1.7425, "step": 2394 }, { "epoch": 0.14280605554893314, "grad_norm": 3.6223998069763184, "learning_rate": 9.905419637460625e-05, "loss": 1.6168, "step": 2396 }, { "epoch": 0.14292525926808916, "grad_norm": 3.54744553565979, "learning_rate": 9.905236479726068e-05, "loss": 1.7081, "step": 2398 }, { "epoch": 0.1430444629872452, "grad_norm": 3.418964385986328, "learning_rate": 9.90505314651442e-05, "loss": 1.7283, "step": 2400 }, { "epoch": 0.14316366670640124, "grad_norm": 3.723985433578491, "learning_rate": 9.904869637832239e-05, "loss": 1.5766, "step": 2402 }, { "epoch": 0.1432828704255573, "grad_norm": 3.3465538024902344, "learning_rate": 9.904685953686089e-05, "loss": 1.5992, "step": 2404 }, { "epoch": 0.14340207414471332, "grad_norm": 3.3602826595306396, "learning_rate": 9.904502094082542e-05, "loss": 1.605, "step": 2406 }, { "epoch": 0.14352127786386934, "grad_norm": 3.155268669128418, "learning_rate": 9.904318059028172e-05, "loss": 1.7636, "step": 2408 }, { "epoch": 0.1436404815830254, "grad_norm": 3.113708734512329, "learning_rate": 9.904133848529568e-05, "loss": 1.6784, "step": 2410 }, { "epoch": 0.14375968530218142, "grad_norm": 3.5514976978302, "learning_rate": 9.903949462593316e-05, "loss": 1.5422, "step": 2412 }, { "epoch": 0.14387888902133747, "grad_norm": 3.464318037033081, "learning_rate": 9.903764901226012e-05, "loss": 1.5171, "step": 2414 }, { "epoch": 0.1439980927404935, "grad_norm": 3.161888360977173, "learning_rate": 9.903580164434261e-05, "loss": 1.5212, "step": 2416 }, { "epoch": 0.14411729645964955, "grad_norm": 3.4944674968719482, "learning_rate": 9.903395252224669e-05, "loss": 1.5877, "step": 2418 }, { "epoch": 0.14423650017880557, "grad_norm": 3.131819248199463, "learning_rate": 9.903210164603852e-05, "loss": 1.5686, "step": 2420 }, { "epoch": 0.14435570389796162, "grad_norm": 3.373889923095703, "learning_rate": 9.903024901578432e-05, "loss": 1.5705, "step": 2422 }, { "epoch": 0.14447490761711765, "grad_norm": 3.54888653755188, "learning_rate": 9.902839463155035e-05, "loss": 1.7104, "step": 2424 }, { "epoch": 0.1445941113362737, "grad_norm": 3.237995147705078, "learning_rate": 9.902653849340295e-05, "loss": 1.5907, "step": 2426 }, { "epoch": 0.14471331505542973, "grad_norm": 4.384692192077637, "learning_rate": 9.902468060140854e-05, "loss": 1.7712, "step": 2428 }, { "epoch": 0.14483251877458578, "grad_norm": 4.477648735046387, "learning_rate": 9.902282095563357e-05, "loss": 1.5685, "step": 2430 }, { "epoch": 0.1449517224937418, "grad_norm": 3.3440136909484863, "learning_rate": 9.902095955614455e-05, "loss": 1.4261, "step": 2432 }, { "epoch": 0.14507092621289785, "grad_norm": 3.533400297164917, "learning_rate": 9.901909640300809e-05, "loss": 1.6612, "step": 2434 }, { "epoch": 0.14519012993205388, "grad_norm": 3.678306818008423, "learning_rate": 9.901723149629084e-05, "loss": 1.5645, "step": 2436 }, { "epoch": 0.1453093336512099, "grad_norm": 3.539987802505493, "learning_rate": 9.901536483605952e-05, "loss": 1.5067, "step": 2438 }, { "epoch": 0.14542853737036596, "grad_norm": 3.2213852405548096, "learning_rate": 9.901349642238087e-05, "loss": 1.6182, "step": 2440 }, { "epoch": 0.14554774108952198, "grad_norm": 3.325028419494629, "learning_rate": 9.901162625532176e-05, "loss": 1.7105, "step": 2442 }, { "epoch": 0.14566694480867803, "grad_norm": 3.3727598190307617, "learning_rate": 9.900975433494909e-05, "loss": 1.5764, "step": 2444 }, { "epoch": 0.14578614852783406, "grad_norm": 3.623033285140991, "learning_rate": 9.900788066132983e-05, "loss": 1.8201, "step": 2446 }, { "epoch": 0.1459053522469901, "grad_norm": 2.957929849624634, "learning_rate": 9.900600523453098e-05, "loss": 1.4381, "step": 2448 }, { "epoch": 0.14602455596614614, "grad_norm": 3.782317876815796, "learning_rate": 9.900412805461967e-05, "loss": 1.7495, "step": 2450 }, { "epoch": 0.1461437596853022, "grad_norm": 3.029216766357422, "learning_rate": 9.900224912166302e-05, "loss": 1.6306, "step": 2452 }, { "epoch": 0.1462629634044582, "grad_norm": 2.9962265491485596, "learning_rate": 9.900036843572829e-05, "loss": 1.494, "step": 2454 }, { "epoch": 0.14638216712361427, "grad_norm": 3.0436153411865234, "learning_rate": 9.89984859968827e-05, "loss": 1.5944, "step": 2456 }, { "epoch": 0.1465013708427703, "grad_norm": 3.60076642036438, "learning_rate": 9.899660180519362e-05, "loss": 1.6049, "step": 2458 }, { "epoch": 0.14662057456192634, "grad_norm": 3.3755438327789307, "learning_rate": 9.899471586072846e-05, "loss": 1.642, "step": 2460 }, { "epoch": 0.14673977828108237, "grad_norm": 3.184661388397217, "learning_rate": 9.899282816355469e-05, "loss": 1.6195, "step": 2462 }, { "epoch": 0.14685898200023842, "grad_norm": 3.1015753746032715, "learning_rate": 9.899093871373981e-05, "loss": 1.67, "step": 2464 }, { "epoch": 0.14697818571939444, "grad_norm": 3.040264129638672, "learning_rate": 9.898904751135144e-05, "loss": 1.5939, "step": 2466 }, { "epoch": 0.1470973894385505, "grad_norm": 3.0282680988311768, "learning_rate": 9.898715455645722e-05, "loss": 1.5282, "step": 2468 }, { "epoch": 0.14721659315770652, "grad_norm": 3.6518566608428955, "learning_rate": 9.898525984912489e-05, "loss": 1.821, "step": 2470 }, { "epoch": 0.14733579687686255, "grad_norm": 3.4186201095581055, "learning_rate": 9.89833633894222e-05, "loss": 1.6316, "step": 2472 }, { "epoch": 0.1474550005960186, "grad_norm": 3.2734427452087402, "learning_rate": 9.898146517741702e-05, "loss": 1.7101, "step": 2474 }, { "epoch": 0.14757420431517462, "grad_norm": 3.3881895542144775, "learning_rate": 9.897956521317722e-05, "loss": 1.7906, "step": 2476 }, { "epoch": 0.14769340803433068, "grad_norm": 3.609528064727783, "learning_rate": 9.897766349677082e-05, "loss": 1.6248, "step": 2478 }, { "epoch": 0.1478126117534867, "grad_norm": 3.1370365619659424, "learning_rate": 9.89757600282658e-05, "loss": 1.6291, "step": 2480 }, { "epoch": 0.14793181547264275, "grad_norm": 3.3078243732452393, "learning_rate": 9.897385480773027e-05, "loss": 1.6274, "step": 2482 }, { "epoch": 0.14805101919179878, "grad_norm": 2.920330762863159, "learning_rate": 9.89719478352324e-05, "loss": 1.4879, "step": 2484 }, { "epoch": 0.14817022291095483, "grad_norm": 3.676748514175415, "learning_rate": 9.897003911084042e-05, "loss": 1.5834, "step": 2486 }, { "epoch": 0.14828942663011085, "grad_norm": 3.3859081268310547, "learning_rate": 9.896812863462257e-05, "loss": 1.6903, "step": 2488 }, { "epoch": 0.1484086303492669, "grad_norm": 3.4209065437316895, "learning_rate": 9.896621640664721e-05, "loss": 1.6039, "step": 2490 }, { "epoch": 0.14852783406842293, "grad_norm": 3.136169672012329, "learning_rate": 9.896430242698277e-05, "loss": 1.612, "step": 2492 }, { "epoch": 0.14864703778757898, "grad_norm": 3.7060582637786865, "learning_rate": 9.89623866956977e-05, "loss": 1.7029, "step": 2494 }, { "epoch": 0.148766241506735, "grad_norm": 3.0664069652557373, "learning_rate": 9.896046921286053e-05, "loss": 1.5132, "step": 2496 }, { "epoch": 0.14888544522589106, "grad_norm": 3.7840492725372314, "learning_rate": 9.895854997853987e-05, "loss": 1.5217, "step": 2498 }, { "epoch": 0.14900464894504709, "grad_norm": 3.6037209033966064, "learning_rate": 9.895662899280434e-05, "loss": 1.6499, "step": 2500 }, { "epoch": 0.1491238526642031, "grad_norm": 3.5279414653778076, "learning_rate": 9.89547062557227e-05, "loss": 1.738, "step": 2502 }, { "epoch": 0.14924305638335916, "grad_norm": 3.290250778198242, "learning_rate": 9.895278176736373e-05, "loss": 1.5676, "step": 2504 }, { "epoch": 0.1493622601025152, "grad_norm": 3.478375196456909, "learning_rate": 9.895085552779626e-05, "loss": 1.8225, "step": 2506 }, { "epoch": 0.14948146382167124, "grad_norm": 3.571981191635132, "learning_rate": 9.894892753708921e-05, "loss": 1.6883, "step": 2508 }, { "epoch": 0.14960066754082726, "grad_norm": 3.622737169265747, "learning_rate": 9.894699779531154e-05, "loss": 1.7798, "step": 2510 }, { "epoch": 0.14971987125998332, "grad_norm": 3.1986913681030273, "learning_rate": 9.894506630253228e-05, "loss": 1.7146, "step": 2512 }, { "epoch": 0.14983907497913934, "grad_norm": 3.446655511856079, "learning_rate": 9.894313305882054e-05, "loss": 1.6023, "step": 2514 }, { "epoch": 0.1499582786982954, "grad_norm": 3.08729887008667, "learning_rate": 9.894119806424549e-05, "loss": 1.5248, "step": 2516 }, { "epoch": 0.15007748241745142, "grad_norm": 3.492255687713623, "learning_rate": 9.893926131887632e-05, "loss": 1.7188, "step": 2518 }, { "epoch": 0.15019668613660747, "grad_norm": 3.3900210857391357, "learning_rate": 9.893732282278235e-05, "loss": 1.4816, "step": 2520 }, { "epoch": 0.1503158898557635, "grad_norm": 3.1167514324188232, "learning_rate": 9.893538257603288e-05, "loss": 1.6189, "step": 2522 }, { "epoch": 0.15043509357491955, "grad_norm": 3.2404680252075195, "learning_rate": 9.893344057869734e-05, "loss": 1.7802, "step": 2524 }, { "epoch": 0.15055429729407557, "grad_norm": 3.1253316402435303, "learning_rate": 9.893149683084521e-05, "loss": 1.6906, "step": 2526 }, { "epoch": 0.15067350101323163, "grad_norm": 3.0979135036468506, "learning_rate": 9.892955133254604e-05, "loss": 1.6993, "step": 2528 }, { "epoch": 0.15079270473238765, "grad_norm": 2.9504218101501465, "learning_rate": 9.892760408386939e-05, "loss": 1.7224, "step": 2530 }, { "epoch": 0.15091190845154367, "grad_norm": 3.3050973415374756, "learning_rate": 9.892565508488494e-05, "loss": 1.5841, "step": 2532 }, { "epoch": 0.15103111217069973, "grad_norm": 3.840404748916626, "learning_rate": 9.892370433566243e-05, "loss": 1.6064, "step": 2534 }, { "epoch": 0.15115031588985575, "grad_norm": 3.436929225921631, "learning_rate": 9.892175183627161e-05, "loss": 1.5107, "step": 2536 }, { "epoch": 0.1512695196090118, "grad_norm": 3.4134514331817627, "learning_rate": 9.891979758678234e-05, "loss": 1.5484, "step": 2538 }, { "epoch": 0.15138872332816783, "grad_norm": 3.4533698558807373, "learning_rate": 9.891784158726453e-05, "loss": 1.669, "step": 2540 }, { "epoch": 0.15150792704732388, "grad_norm": 3.2660441398620605, "learning_rate": 9.891588383778816e-05, "loss": 1.6943, "step": 2542 }, { "epoch": 0.1516271307664799, "grad_norm": 3.5938546657562256, "learning_rate": 9.891392433842327e-05, "loss": 1.53, "step": 2544 }, { "epoch": 0.15174633448563596, "grad_norm": 3.3040730953216553, "learning_rate": 9.891196308923994e-05, "loss": 1.7307, "step": 2546 }, { "epoch": 0.15186553820479198, "grad_norm": 3.377136468887329, "learning_rate": 9.891000009030835e-05, "loss": 1.805, "step": 2548 }, { "epoch": 0.15198474192394804, "grad_norm": 3.4472556114196777, "learning_rate": 9.89080353416987e-05, "loss": 1.5359, "step": 2550 }, { "epoch": 0.15210394564310406, "grad_norm": 3.2447564601898193, "learning_rate": 9.89060688434813e-05, "loss": 1.6599, "step": 2552 }, { "epoch": 0.1522231493622601, "grad_norm": 3.069822311401367, "learning_rate": 9.890410059572648e-05, "loss": 1.6069, "step": 2554 }, { "epoch": 0.15234235308141614, "grad_norm": 3.2300970554351807, "learning_rate": 9.890213059850466e-05, "loss": 1.666, "step": 2556 }, { "epoch": 0.1524615568005722, "grad_norm": 3.248227834701538, "learning_rate": 9.890015885188631e-05, "loss": 1.6978, "step": 2558 }, { "epoch": 0.15258076051972821, "grad_norm": 3.168769359588623, "learning_rate": 9.889818535594197e-05, "loss": 1.6863, "step": 2560 }, { "epoch": 0.15269996423888427, "grad_norm": 3.283676862716675, "learning_rate": 9.889621011074222e-05, "loss": 1.477, "step": 2562 }, { "epoch": 0.1528191679580403, "grad_norm": 3.5518903732299805, "learning_rate": 9.889423311635777e-05, "loss": 1.6346, "step": 2564 }, { "epoch": 0.15293837167719632, "grad_norm": 3.199092388153076, "learning_rate": 9.889225437285928e-05, "loss": 1.5312, "step": 2566 }, { "epoch": 0.15305757539635237, "grad_norm": 3.566997766494751, "learning_rate": 9.889027388031759e-05, "loss": 1.6497, "step": 2568 }, { "epoch": 0.1531767791155084, "grad_norm": 3.6696743965148926, "learning_rate": 9.888829163880351e-05, "loss": 1.8379, "step": 2570 }, { "epoch": 0.15329598283466445, "grad_norm": 3.6701202392578125, "learning_rate": 9.8886307648388e-05, "loss": 1.5432, "step": 2572 }, { "epoch": 0.15341518655382047, "grad_norm": 3.5292577743530273, "learning_rate": 9.888432190914196e-05, "loss": 1.899, "step": 2574 }, { "epoch": 0.15353439027297652, "grad_norm": 3.0382587909698486, "learning_rate": 9.88823344211365e-05, "loss": 1.6332, "step": 2576 }, { "epoch": 0.15365359399213255, "grad_norm": 3.3988895416259766, "learning_rate": 9.888034518444266e-05, "loss": 1.5834, "step": 2578 }, { "epoch": 0.1537727977112886, "grad_norm": 3.5434110164642334, "learning_rate": 9.887835419913168e-05, "loss": 1.6128, "step": 2580 }, { "epoch": 0.15389200143044462, "grad_norm": 3.1286284923553467, "learning_rate": 9.88763614652747e-05, "loss": 1.5151, "step": 2582 }, { "epoch": 0.15401120514960068, "grad_norm": 3.154369354248047, "learning_rate": 9.887436698294303e-05, "loss": 1.6097, "step": 2584 }, { "epoch": 0.1541304088687567, "grad_norm": 3.260528326034546, "learning_rate": 9.887237075220805e-05, "loss": 1.6019, "step": 2586 }, { "epoch": 0.15424961258791275, "grad_norm": 3.5835535526275635, "learning_rate": 9.887037277314115e-05, "loss": 1.6287, "step": 2588 }, { "epoch": 0.15436881630706878, "grad_norm": 3.1732382774353027, "learning_rate": 9.88683730458138e-05, "loss": 1.5323, "step": 2590 }, { "epoch": 0.15448802002622483, "grad_norm": 3.1766903400421143, "learning_rate": 9.886637157029757e-05, "loss": 1.6181, "step": 2592 }, { "epoch": 0.15460722374538086, "grad_norm": 3.3479065895080566, "learning_rate": 9.886436834666401e-05, "loss": 1.559, "step": 2594 }, { "epoch": 0.15472642746453688, "grad_norm": 3.577511787414551, "learning_rate": 9.886236337498481e-05, "loss": 1.8179, "step": 2596 }, { "epoch": 0.15484563118369293, "grad_norm": 3.0816762447357178, "learning_rate": 9.88603566553317e-05, "loss": 1.6409, "step": 2598 }, { "epoch": 0.15496483490284896, "grad_norm": 3.1805810928344727, "learning_rate": 9.885834818777646e-05, "loss": 1.7673, "step": 2600 }, { "epoch": 0.155084038622005, "grad_norm": 3.197600841522217, "learning_rate": 9.885633797239092e-05, "loss": 1.5972, "step": 2602 }, { "epoch": 0.15520324234116104, "grad_norm": 3.065690755844116, "learning_rate": 9.885432600924702e-05, "loss": 1.5556, "step": 2604 }, { "epoch": 0.1553224460603171, "grad_norm": 3.3069396018981934, "learning_rate": 9.885231229841674e-05, "loss": 1.7689, "step": 2606 }, { "epoch": 0.1554416497794731, "grad_norm": 3.1051974296569824, "learning_rate": 9.88502968399721e-05, "loss": 1.5112, "step": 2608 }, { "epoch": 0.15556085349862916, "grad_norm": 3.2284958362579346, "learning_rate": 9.884827963398517e-05, "loss": 1.6427, "step": 2610 }, { "epoch": 0.1556800572177852, "grad_norm": 3.168043375015259, "learning_rate": 9.884626068052818e-05, "loss": 1.5817, "step": 2612 }, { "epoch": 0.15579926093694124, "grad_norm": 3.08084774017334, "learning_rate": 9.88442399796733e-05, "loss": 1.5805, "step": 2614 }, { "epoch": 0.15591846465609727, "grad_norm": 3.405367374420166, "learning_rate": 9.884221753149286e-05, "loss": 1.5904, "step": 2616 }, { "epoch": 0.15603766837525332, "grad_norm": 3.2350730895996094, "learning_rate": 9.884019333605917e-05, "loss": 1.6125, "step": 2618 }, { "epoch": 0.15615687209440934, "grad_norm": 3.4423599243164062, "learning_rate": 9.883816739344465e-05, "loss": 1.6344, "step": 2620 }, { "epoch": 0.1562760758135654, "grad_norm": 3.4177117347717285, "learning_rate": 9.88361397037218e-05, "loss": 1.7922, "step": 2622 }, { "epoch": 0.15639527953272142, "grad_norm": 3.294095039367676, "learning_rate": 9.883411026696315e-05, "loss": 1.6678, "step": 2624 }, { "epoch": 0.15651448325187745, "grad_norm": 3.1219570636749268, "learning_rate": 9.883207908324126e-05, "loss": 1.5166, "step": 2626 }, { "epoch": 0.1566336869710335, "grad_norm": 3.2298431396484375, "learning_rate": 9.883004615262885e-05, "loss": 1.4979, "step": 2628 }, { "epoch": 0.15675289069018952, "grad_norm": 3.1785247325897217, "learning_rate": 9.882801147519861e-05, "loss": 1.4966, "step": 2630 }, { "epoch": 0.15687209440934557, "grad_norm": 3.4263429641723633, "learning_rate": 9.882597505102334e-05, "loss": 1.4812, "step": 2632 }, { "epoch": 0.1569912981285016, "grad_norm": 3.553084135055542, "learning_rate": 9.882393688017588e-05, "loss": 1.7314, "step": 2634 }, { "epoch": 0.15711050184765765, "grad_norm": 3.105832576751709, "learning_rate": 9.882189696272916e-05, "loss": 1.6128, "step": 2636 }, { "epoch": 0.15722970556681368, "grad_norm": 3.096172332763672, "learning_rate": 9.881985529875613e-05, "loss": 1.8047, "step": 2638 }, { "epoch": 0.15734890928596973, "grad_norm": 3.0455026626586914, "learning_rate": 9.881781188832985e-05, "loss": 1.6492, "step": 2640 }, { "epoch": 0.15746811300512575, "grad_norm": 3.519191265106201, "learning_rate": 9.881576673152341e-05, "loss": 1.6077, "step": 2642 }, { "epoch": 0.1575873167242818, "grad_norm": 3.0753512382507324, "learning_rate": 9.881371982840996e-05, "loss": 1.6244, "step": 2644 }, { "epoch": 0.15770652044343783, "grad_norm": 3.273916482925415, "learning_rate": 9.881167117906275e-05, "loss": 1.6949, "step": 2646 }, { "epoch": 0.15782572416259388, "grad_norm": 3.5469777584075928, "learning_rate": 9.880962078355506e-05, "loss": 1.5895, "step": 2648 }, { "epoch": 0.1579449278817499, "grad_norm": 3.3441617488861084, "learning_rate": 9.880756864196022e-05, "loss": 1.578, "step": 2650 }, { "epoch": 0.15806413160090596, "grad_norm": 3.4318313598632812, "learning_rate": 9.880551475435167e-05, "loss": 1.6181, "step": 2652 }, { "epoch": 0.15818333532006199, "grad_norm": 3.381779193878174, "learning_rate": 9.880345912080287e-05, "loss": 1.6388, "step": 2654 }, { "epoch": 0.158302539039218, "grad_norm": 3.4218122959136963, "learning_rate": 9.880140174138734e-05, "loss": 1.7001, "step": 2656 }, { "epoch": 0.15842174275837406, "grad_norm": 3.164663553237915, "learning_rate": 9.879934261617871e-05, "loss": 1.5981, "step": 2658 }, { "epoch": 0.1585409464775301, "grad_norm": 2.7820653915405273, "learning_rate": 9.879728174525062e-05, "loss": 1.7163, "step": 2660 }, { "epoch": 0.15866015019668614, "grad_norm": 3.4118452072143555, "learning_rate": 9.879521912867682e-05, "loss": 1.7433, "step": 2662 }, { "epoch": 0.15877935391584216, "grad_norm": 2.989683151245117, "learning_rate": 9.879315476653107e-05, "loss": 1.7841, "step": 2664 }, { "epoch": 0.15889855763499822, "grad_norm": 3.1140449047088623, "learning_rate": 9.879108865888725e-05, "loss": 1.6893, "step": 2666 }, { "epoch": 0.15901776135415424, "grad_norm": 3.303821563720703, "learning_rate": 9.878902080581922e-05, "loss": 1.5211, "step": 2668 }, { "epoch": 0.1591369650733103, "grad_norm": 3.3390989303588867, "learning_rate": 9.8786951207401e-05, "loss": 1.5308, "step": 2670 }, { "epoch": 0.15925616879246632, "grad_norm": 3.5691967010498047, "learning_rate": 9.878487986370661e-05, "loss": 1.8598, "step": 2672 }, { "epoch": 0.15937537251162237, "grad_norm": 3.567586898803711, "learning_rate": 9.878280677481016e-05, "loss": 1.6787, "step": 2674 }, { "epoch": 0.1594945762307784, "grad_norm": 3.3336470127105713, "learning_rate": 9.87807319407858e-05, "loss": 1.6254, "step": 2676 }, { "epoch": 0.15961377994993445, "grad_norm": 3.2213284969329834, "learning_rate": 9.877865536170775e-05, "loss": 1.5279, "step": 2678 }, { "epoch": 0.15973298366909047, "grad_norm": 3.07309889793396, "learning_rate": 9.87765770376503e-05, "loss": 1.4919, "step": 2680 }, { "epoch": 0.15985218738824652, "grad_norm": 3.1726841926574707, "learning_rate": 9.877449696868783e-05, "loss": 1.6173, "step": 2682 }, { "epoch": 0.15997139110740255, "grad_norm": 3.635042667388916, "learning_rate": 9.877241515489471e-05, "loss": 1.6133, "step": 2684 }, { "epoch": 0.1600905948265586, "grad_norm": 3.608236312866211, "learning_rate": 9.877033159634541e-05, "loss": 1.6136, "step": 2686 }, { "epoch": 0.16020979854571463, "grad_norm": 3.3275551795959473, "learning_rate": 9.87682462931145e-05, "loss": 1.7059, "step": 2688 }, { "epoch": 0.16032900226487065, "grad_norm": 3.5199053287506104, "learning_rate": 9.876615924527655e-05, "loss": 1.7241, "step": 2690 }, { "epoch": 0.1604482059840267, "grad_norm": 3.3687584400177, "learning_rate": 9.876407045290624e-05, "loss": 1.6615, "step": 2692 }, { "epoch": 0.16056740970318273, "grad_norm": 3.3089711666107178, "learning_rate": 9.876197991607828e-05, "loss": 1.6375, "step": 2694 }, { "epoch": 0.16068661342233878, "grad_norm": 3.4273841381073, "learning_rate": 9.875988763486746e-05, "loss": 1.7154, "step": 2696 }, { "epoch": 0.1608058171414948, "grad_norm": 3.3514463901519775, "learning_rate": 9.875779360934862e-05, "loss": 1.6336, "step": 2698 }, { "epoch": 0.16092502086065086, "grad_norm": 3.314821720123291, "learning_rate": 9.875569783959671e-05, "loss": 1.6227, "step": 2700 }, { "epoch": 0.16104422457980688, "grad_norm": 3.500047206878662, "learning_rate": 9.875360032568666e-05, "loss": 1.7895, "step": 2702 }, { "epoch": 0.16116342829896294, "grad_norm": 3.109715223312378, "learning_rate": 9.875150106769351e-05, "loss": 1.5876, "step": 2704 }, { "epoch": 0.16128263201811896, "grad_norm": 4.139030933380127, "learning_rate": 9.874940006569235e-05, "loss": 1.7323, "step": 2706 }, { "epoch": 0.161401835737275, "grad_norm": 4.0664286613464355, "learning_rate": 9.874729731975838e-05, "loss": 1.6665, "step": 2708 }, { "epoch": 0.16152103945643104, "grad_norm": 3.375335216522217, "learning_rate": 9.874519282996679e-05, "loss": 1.6501, "step": 2710 }, { "epoch": 0.1616402431755871, "grad_norm": 3.322829246520996, "learning_rate": 9.874308659639288e-05, "loss": 1.6573, "step": 2712 }, { "epoch": 0.16175944689474311, "grad_norm": 3.759446382522583, "learning_rate": 9.874097861911198e-05, "loss": 1.7412, "step": 2714 }, { "epoch": 0.16187865061389917, "grad_norm": 3.2163968086242676, "learning_rate": 9.873886889819953e-05, "loss": 1.5585, "step": 2716 }, { "epoch": 0.1619978543330552, "grad_norm": 3.135279893875122, "learning_rate": 9.873675743373094e-05, "loss": 1.5505, "step": 2718 }, { "epoch": 0.16211705805221122, "grad_norm": 3.3538811206817627, "learning_rate": 9.873464422578183e-05, "loss": 1.6783, "step": 2720 }, { "epoch": 0.16223626177136727, "grad_norm": 3.5925631523132324, "learning_rate": 9.873252927442773e-05, "loss": 1.7153, "step": 2722 }, { "epoch": 0.1623554654905233, "grad_norm": 3.178523540496826, "learning_rate": 9.873041257974432e-05, "loss": 1.6329, "step": 2724 }, { "epoch": 0.16247466920967935, "grad_norm": 3.165836811065674, "learning_rate": 9.872829414180733e-05, "loss": 1.5832, "step": 2726 }, { "epoch": 0.16259387292883537, "grad_norm": 3.143101930618286, "learning_rate": 9.872617396069252e-05, "loss": 1.5139, "step": 2728 }, { "epoch": 0.16271307664799142, "grad_norm": 3.153965473175049, "learning_rate": 9.872405203647577e-05, "loss": 1.56, "step": 2730 }, { "epoch": 0.16283228036714745, "grad_norm": 3.3204729557037354, "learning_rate": 9.872192836923296e-05, "loss": 1.6425, "step": 2732 }, { "epoch": 0.1629514840863035, "grad_norm": 3.324897050857544, "learning_rate": 9.871980295904008e-05, "loss": 1.6983, "step": 2734 }, { "epoch": 0.16307068780545952, "grad_norm": 3.2396018505096436, "learning_rate": 9.871767580597315e-05, "loss": 1.6896, "step": 2736 }, { "epoch": 0.16318989152461558, "grad_norm": 3.035999059677124, "learning_rate": 9.871554691010827e-05, "loss": 1.4862, "step": 2738 }, { "epoch": 0.1633090952437716, "grad_norm": 3.025545120239258, "learning_rate": 9.871341627152161e-05, "loss": 1.6613, "step": 2740 }, { "epoch": 0.16342829896292765, "grad_norm": 3.7628679275512695, "learning_rate": 9.871128389028937e-05, "loss": 1.982, "step": 2742 }, { "epoch": 0.16354750268208368, "grad_norm": 3.037651777267456, "learning_rate": 9.870914976648785e-05, "loss": 1.721, "step": 2744 }, { "epoch": 0.16366670640123973, "grad_norm": 3.4678497314453125, "learning_rate": 9.870701390019336e-05, "loss": 1.5009, "step": 2746 }, { "epoch": 0.16378591012039576, "grad_norm": 3.250636100769043, "learning_rate": 9.870487629148237e-05, "loss": 1.6483, "step": 2748 }, { "epoch": 0.16390511383955178, "grad_norm": 3.14042329788208, "learning_rate": 9.870273694043129e-05, "loss": 1.6016, "step": 2750 }, { "epoch": 0.16402431755870783, "grad_norm": 3.580308675765991, "learning_rate": 9.870059584711668e-05, "loss": 2.0166, "step": 2752 }, { "epoch": 0.16414352127786386, "grad_norm": 5.301333427429199, "learning_rate": 9.869845301161513e-05, "loss": 1.7236, "step": 2754 }, { "epoch": 0.1642627249970199, "grad_norm": 3.022714138031006, "learning_rate": 9.86963084340033e-05, "loss": 1.6438, "step": 2756 }, { "epoch": 0.16438192871617593, "grad_norm": 2.9199583530426025, "learning_rate": 9.86941621143579e-05, "loss": 1.6875, "step": 2758 }, { "epoch": 0.164501132435332, "grad_norm": 3.160698413848877, "learning_rate": 9.869201405275571e-05, "loss": 1.6054, "step": 2760 }, { "epoch": 0.164620336154488, "grad_norm": 3.2056212425231934, "learning_rate": 9.868986424927359e-05, "loss": 1.6629, "step": 2762 }, { "epoch": 0.16473953987364406, "grad_norm": 3.1508500576019287, "learning_rate": 9.868771270398842e-05, "loss": 1.66, "step": 2764 }, { "epoch": 0.1648587435928001, "grad_norm": 3.4855873584747314, "learning_rate": 9.86855594169772e-05, "loss": 1.652, "step": 2766 }, { "epoch": 0.16497794731195614, "grad_norm": 3.172001838684082, "learning_rate": 9.868340438831693e-05, "loss": 1.6127, "step": 2768 }, { "epoch": 0.16509715103111217, "grad_norm": 3.173006772994995, "learning_rate": 9.868124761808474e-05, "loss": 1.8746, "step": 2770 }, { "epoch": 0.16521635475026822, "grad_norm": 3.5850064754486084, "learning_rate": 9.867908910635774e-05, "loss": 1.614, "step": 2772 }, { "epoch": 0.16533555846942424, "grad_norm": 3.422652244567871, "learning_rate": 9.867692885321318e-05, "loss": 1.6485, "step": 2774 }, { "epoch": 0.1654547621885803, "grad_norm": 3.029264450073242, "learning_rate": 9.867476685872832e-05, "loss": 1.5229, "step": 2776 }, { "epoch": 0.16557396590773632, "grad_norm": 3.0300562381744385, "learning_rate": 9.867260312298052e-05, "loss": 1.6132, "step": 2778 }, { "epoch": 0.16569316962689237, "grad_norm": 3.4076991081237793, "learning_rate": 9.867043764604717e-05, "loss": 1.6599, "step": 2780 }, { "epoch": 0.1658123733460484, "grad_norm": 3.263192653656006, "learning_rate": 9.866827042800572e-05, "loss": 1.6242, "step": 2782 }, { "epoch": 0.16593157706520442, "grad_norm": 3.3069968223571777, "learning_rate": 9.866610146893375e-05, "loss": 1.5311, "step": 2784 }, { "epoch": 0.16605078078436047, "grad_norm": 3.14589524269104, "learning_rate": 9.86639307689088e-05, "loss": 1.5339, "step": 2786 }, { "epoch": 0.1661699845035165, "grad_norm": 3.2669289112091064, "learning_rate": 9.866175832800857e-05, "loss": 1.541, "step": 2788 }, { "epoch": 0.16628918822267255, "grad_norm": 3.112126111984253, "learning_rate": 9.865958414631074e-05, "loss": 1.6174, "step": 2790 }, { "epoch": 0.16640839194182858, "grad_norm": 3.146116256713867, "learning_rate": 9.865740822389309e-05, "loss": 1.443, "step": 2792 }, { "epoch": 0.16652759566098463, "grad_norm": 3.160033941268921, "learning_rate": 9.865523056083348e-05, "loss": 1.5475, "step": 2794 }, { "epoch": 0.16664679938014065, "grad_norm": 3.3180437088012695, "learning_rate": 9.865305115720978e-05, "loss": 1.6039, "step": 2796 }, { "epoch": 0.1667660030992967, "grad_norm": 3.1264166831970215, "learning_rate": 9.86508700131e-05, "loss": 1.4779, "step": 2798 }, { "epoch": 0.16688520681845273, "grad_norm": 3.566401481628418, "learning_rate": 9.864868712858212e-05, "loss": 1.7131, "step": 2800 }, { "epoch": 0.16700441053760878, "grad_norm": 3.370703935623169, "learning_rate": 9.864650250373428e-05, "loss": 1.6268, "step": 2802 }, { "epoch": 0.1671236142567648, "grad_norm": 3.464846611022949, "learning_rate": 9.864431613863458e-05, "loss": 1.655, "step": 2804 }, { "epoch": 0.16724281797592086, "grad_norm": 3.3292644023895264, "learning_rate": 9.864212803336125e-05, "loss": 1.6225, "step": 2806 }, { "epoch": 0.16736202169507688, "grad_norm": 3.3788392543792725, "learning_rate": 9.86399381879926e-05, "loss": 1.5415, "step": 2808 }, { "epoch": 0.16748122541423294, "grad_norm": 3.6341521739959717, "learning_rate": 9.863774660260691e-05, "loss": 1.6339, "step": 2810 }, { "epoch": 0.16760042913338896, "grad_norm": 3.6271443367004395, "learning_rate": 9.863555327728264e-05, "loss": 1.7106, "step": 2812 }, { "epoch": 0.167719632852545, "grad_norm": 3.408254861831665, "learning_rate": 9.863335821209822e-05, "loss": 1.6704, "step": 2814 }, { "epoch": 0.16783883657170104, "grad_norm": 3.1275203227996826, "learning_rate": 9.863116140713218e-05, "loss": 1.5017, "step": 2816 }, { "epoch": 0.16795804029085706, "grad_norm": 3.393876791000366, "learning_rate": 9.862896286246309e-05, "loss": 1.5699, "step": 2818 }, { "epoch": 0.16807724401001312, "grad_norm": 3.784426689147949, "learning_rate": 9.862676257816962e-05, "loss": 1.5715, "step": 2820 }, { "epoch": 0.16819644772916914, "grad_norm": 3.098666191101074, "learning_rate": 9.862456055433049e-05, "loss": 1.4908, "step": 2822 }, { "epoch": 0.1683156514483252, "grad_norm": 3.5284626483917236, "learning_rate": 9.862235679102445e-05, "loss": 1.7509, "step": 2824 }, { "epoch": 0.16843485516748122, "grad_norm": 3.4833970069885254, "learning_rate": 9.862015128833036e-05, "loss": 1.6702, "step": 2826 }, { "epoch": 0.16855405888663727, "grad_norm": 3.128997564315796, "learning_rate": 9.861794404632711e-05, "loss": 1.7576, "step": 2828 }, { "epoch": 0.1686732626057933, "grad_norm": 3.342858076095581, "learning_rate": 9.861573506509364e-05, "loss": 1.6953, "step": 2830 }, { "epoch": 0.16879246632494935, "grad_norm": 3.449270009994507, "learning_rate": 9.8613524344709e-05, "loss": 1.6406, "step": 2832 }, { "epoch": 0.16891167004410537, "grad_norm": 3.4475042819976807, "learning_rate": 9.861131188525225e-05, "loss": 1.7216, "step": 2834 }, { "epoch": 0.16903087376326142, "grad_norm": 3.689100980758667, "learning_rate": 9.860909768680258e-05, "loss": 1.7405, "step": 2836 }, { "epoch": 0.16915007748241745, "grad_norm": 3.6193251609802246, "learning_rate": 9.860688174943915e-05, "loss": 1.6418, "step": 2838 }, { "epoch": 0.1692692812015735, "grad_norm": 3.3080694675445557, "learning_rate": 9.860466407324125e-05, "loss": 1.6657, "step": 2840 }, { "epoch": 0.16938848492072953, "grad_norm": 3.4081554412841797, "learning_rate": 9.860244465828823e-05, "loss": 1.6619, "step": 2842 }, { "epoch": 0.16950768863988555, "grad_norm": 3.1808054447174072, "learning_rate": 9.860022350465947e-05, "loss": 1.5005, "step": 2844 }, { "epoch": 0.1696268923590416, "grad_norm": 3.579587697982788, "learning_rate": 9.859800061243443e-05, "loss": 1.5123, "step": 2846 }, { "epoch": 0.16974609607819763, "grad_norm": 3.0466012954711914, "learning_rate": 9.859577598169263e-05, "loss": 1.5358, "step": 2848 }, { "epoch": 0.16986529979735368, "grad_norm": 3.282266855239868, "learning_rate": 9.859354961251366e-05, "loss": 1.583, "step": 2850 }, { "epoch": 0.1699845035165097, "grad_norm": 3.4891586303710938, "learning_rate": 9.859132150497716e-05, "loss": 1.5271, "step": 2852 }, { "epoch": 0.17010370723566576, "grad_norm": 3.17948842048645, "learning_rate": 9.858909165916283e-05, "loss": 1.7115, "step": 2854 }, { "epoch": 0.17022291095482178, "grad_norm": 3.1886215209960938, "learning_rate": 9.858686007515044e-05, "loss": 1.7062, "step": 2856 }, { "epoch": 0.17034211467397783, "grad_norm": 3.2338767051696777, "learning_rate": 9.858462675301985e-05, "loss": 1.5165, "step": 2858 }, { "epoch": 0.17046131839313386, "grad_norm": 3.6987411975860596, "learning_rate": 9.858239169285091e-05, "loss": 1.6112, "step": 2860 }, { "epoch": 0.1705805221122899, "grad_norm": 3.096522569656372, "learning_rate": 9.85801548947236e-05, "loss": 1.7254, "step": 2862 }, { "epoch": 0.17069972583144594, "grad_norm": 3.4436984062194824, "learning_rate": 9.857791635871794e-05, "loss": 1.7298, "step": 2864 }, { "epoch": 0.170818929550602, "grad_norm": 3.222323417663574, "learning_rate": 9.857567608491399e-05, "loss": 1.5141, "step": 2866 }, { "epoch": 0.170938133269758, "grad_norm": 3.370968818664551, "learning_rate": 9.857343407339193e-05, "loss": 1.785, "step": 2868 }, { "epoch": 0.17105733698891407, "grad_norm": 3.1113836765289307, "learning_rate": 9.857119032423192e-05, "loss": 1.7266, "step": 2870 }, { "epoch": 0.1711765407080701, "grad_norm": 3.395111322402954, "learning_rate": 9.856894483751424e-05, "loss": 1.6575, "step": 2872 }, { "epoch": 0.17129574442722614, "grad_norm": 3.2687578201293945, "learning_rate": 9.856669761331925e-05, "loss": 1.5076, "step": 2874 }, { "epoch": 0.17141494814638217, "grad_norm": 3.3231289386749268, "learning_rate": 9.856444865172731e-05, "loss": 1.5842, "step": 2876 }, { "epoch": 0.1715341518655382, "grad_norm": 3.3990249633789062, "learning_rate": 9.856219795281887e-05, "loss": 1.7782, "step": 2878 }, { "epoch": 0.17165335558469424, "grad_norm": 3.8819656372070312, "learning_rate": 9.855994551667447e-05, "loss": 1.5453, "step": 2880 }, { "epoch": 0.17177255930385027, "grad_norm": 3.6016063690185547, "learning_rate": 9.855769134337466e-05, "loss": 1.6748, "step": 2882 }, { "epoch": 0.17189176302300632, "grad_norm": 3.033020496368408, "learning_rate": 9.855543543300012e-05, "loss": 1.572, "step": 2884 }, { "epoch": 0.17201096674216235, "grad_norm": 3.379474401473999, "learning_rate": 9.855317778563149e-05, "loss": 1.6351, "step": 2886 }, { "epoch": 0.1721301704613184, "grad_norm": 3.2419259548187256, "learning_rate": 9.855091840134958e-05, "loss": 1.5771, "step": 2888 }, { "epoch": 0.17224937418047442, "grad_norm": 3.28143048286438, "learning_rate": 9.854865728023521e-05, "loss": 1.6545, "step": 2890 }, { "epoch": 0.17236857789963048, "grad_norm": 3.0641531944274902, "learning_rate": 9.854639442236926e-05, "loss": 1.5651, "step": 2892 }, { "epoch": 0.1724877816187865, "grad_norm": 3.442991256713867, "learning_rate": 9.854412982783267e-05, "loss": 1.6737, "step": 2894 }, { "epoch": 0.17260698533794255, "grad_norm": 3.159721612930298, "learning_rate": 9.854186349670647e-05, "loss": 1.4146, "step": 2896 }, { "epoch": 0.17272618905709858, "grad_norm": 3.2295727729797363, "learning_rate": 9.853959542907174e-05, "loss": 1.5887, "step": 2898 }, { "epoch": 0.17284539277625463, "grad_norm": 3.094088077545166, "learning_rate": 9.853732562500959e-05, "loss": 1.5084, "step": 2900 }, { "epoch": 0.17296459649541066, "grad_norm": 3.5272510051727295, "learning_rate": 9.853505408460124e-05, "loss": 1.7363, "step": 2902 }, { "epoch": 0.1730838002145667, "grad_norm": 3.6915721893310547, "learning_rate": 9.853278080792794e-05, "loss": 1.7103, "step": 2904 }, { "epoch": 0.17320300393372273, "grad_norm": 3.1929514408111572, "learning_rate": 9.853050579507102e-05, "loss": 1.5587, "step": 2906 }, { "epoch": 0.17332220765287876, "grad_norm": 3.3810200691223145, "learning_rate": 9.852822904611187e-05, "loss": 1.583, "step": 2908 }, { "epoch": 0.1734414113720348, "grad_norm": 3.4208292961120605, "learning_rate": 9.852595056113192e-05, "loss": 1.7338, "step": 2910 }, { "epoch": 0.17356061509119083, "grad_norm": 3.009211540222168, "learning_rate": 9.852367034021269e-05, "loss": 1.625, "step": 2912 }, { "epoch": 0.1736798188103469, "grad_norm": 3.1599960327148438, "learning_rate": 9.852138838343576e-05, "loss": 1.6332, "step": 2914 }, { "epoch": 0.1737990225295029, "grad_norm": 3.337761163711548, "learning_rate": 9.851910469088275e-05, "loss": 1.7559, "step": 2916 }, { "epoch": 0.17391822624865896, "grad_norm": 3.1090075969696045, "learning_rate": 9.851681926263534e-05, "loss": 1.6118, "step": 2918 }, { "epoch": 0.174037429967815, "grad_norm": 3.2532289028167725, "learning_rate": 9.851453209877532e-05, "loss": 1.6906, "step": 2920 }, { "epoch": 0.17415663368697104, "grad_norm": 3.573869228363037, "learning_rate": 9.851224319938451e-05, "loss": 1.6254, "step": 2922 }, { "epoch": 0.17427583740612707, "grad_norm": 3.4856677055358887, "learning_rate": 9.850995256454475e-05, "loss": 1.5366, "step": 2924 }, { "epoch": 0.17439504112528312, "grad_norm": 3.1499783992767334, "learning_rate": 9.850766019433804e-05, "loss": 1.5785, "step": 2926 }, { "epoch": 0.17451424484443914, "grad_norm": 3.348296642303467, "learning_rate": 9.850536608884634e-05, "loss": 1.7593, "step": 2928 }, { "epoch": 0.1746334485635952, "grad_norm": 3.571582078933716, "learning_rate": 9.850307024815173e-05, "loss": 1.5737, "step": 2930 }, { "epoch": 0.17475265228275122, "grad_norm": 3.1500496864318848, "learning_rate": 9.850077267233636e-05, "loss": 1.5941, "step": 2932 }, { "epoch": 0.17487185600190727, "grad_norm": 3.237762451171875, "learning_rate": 9.849847336148241e-05, "loss": 1.4801, "step": 2934 }, { "epoch": 0.1749910597210633, "grad_norm": 3.770026683807373, "learning_rate": 9.849617231567212e-05, "loss": 1.5952, "step": 2936 }, { "epoch": 0.17511026344021932, "grad_norm": 3.5733373165130615, "learning_rate": 9.849386953498783e-05, "loss": 1.637, "step": 2938 }, { "epoch": 0.17522946715937537, "grad_norm": 3.723011016845703, "learning_rate": 9.84915650195119e-05, "loss": 1.632, "step": 2940 }, { "epoch": 0.1753486708785314, "grad_norm": 3.1347570419311523, "learning_rate": 9.848925876932678e-05, "loss": 1.7338, "step": 2942 }, { "epoch": 0.17546787459768745, "grad_norm": 3.0044212341308594, "learning_rate": 9.848695078451495e-05, "loss": 1.7421, "step": 2944 }, { "epoch": 0.17558707831684348, "grad_norm": 3.2650558948516846, "learning_rate": 9.848464106515902e-05, "loss": 1.5097, "step": 2946 }, { "epoch": 0.17570628203599953, "grad_norm": 4.337365627288818, "learning_rate": 9.848232961134158e-05, "loss": 1.6777, "step": 2948 }, { "epoch": 0.17582548575515555, "grad_norm": 3.4489355087280273, "learning_rate": 9.848001642314533e-05, "loss": 1.7208, "step": 2950 }, { "epoch": 0.1759446894743116, "grad_norm": 3.2645153999328613, "learning_rate": 9.847770150065303e-05, "loss": 1.5967, "step": 2952 }, { "epoch": 0.17606389319346763, "grad_norm": 3.286466121673584, "learning_rate": 9.847538484394748e-05, "loss": 1.8013, "step": 2954 }, { "epoch": 0.17618309691262368, "grad_norm": 3.094705820083618, "learning_rate": 9.847306645311155e-05, "loss": 1.6381, "step": 2956 }, { "epoch": 0.1763023006317797, "grad_norm": 3.2742342948913574, "learning_rate": 9.847074632822818e-05, "loss": 1.6708, "step": 2958 }, { "epoch": 0.17642150435093576, "grad_norm": 3.3660473823547363, "learning_rate": 9.846842446938039e-05, "loss": 1.6869, "step": 2960 }, { "epoch": 0.17654070807009178, "grad_norm": 3.514094829559326, "learning_rate": 9.84661008766512e-05, "loss": 1.6336, "step": 2962 }, { "epoch": 0.17665991178924784, "grad_norm": 3.4387471675872803, "learning_rate": 9.846377555012376e-05, "loss": 1.5675, "step": 2964 }, { "epoch": 0.17677911550840386, "grad_norm": 3.134000778198242, "learning_rate": 9.846144848988126e-05, "loss": 1.5067, "step": 2966 }, { "epoch": 0.1768983192275599, "grad_norm": 3.2872865200042725, "learning_rate": 9.845911969600694e-05, "loss": 1.5277, "step": 2968 }, { "epoch": 0.17701752294671594, "grad_norm": 3.594992160797119, "learning_rate": 9.845678916858411e-05, "loss": 1.5944, "step": 2970 }, { "epoch": 0.17713672666587196, "grad_norm": 3.2223708629608154, "learning_rate": 9.845445690769614e-05, "loss": 1.6072, "step": 2972 }, { "epoch": 0.17725593038502802, "grad_norm": 3.2728586196899414, "learning_rate": 9.845212291342645e-05, "loss": 1.5056, "step": 2974 }, { "epoch": 0.17737513410418404, "grad_norm": 3.252713203430176, "learning_rate": 9.844978718585855e-05, "loss": 1.5776, "step": 2976 }, { "epoch": 0.1774943378233401, "grad_norm": 3.43379807472229, "learning_rate": 9.8447449725076e-05, "loss": 1.8201, "step": 2978 }, { "epoch": 0.17761354154249612, "grad_norm": 3.1009504795074463, "learning_rate": 9.844511053116241e-05, "loss": 1.6258, "step": 2980 }, { "epoch": 0.17773274526165217, "grad_norm": 3.56827974319458, "learning_rate": 9.844276960420147e-05, "loss": 1.5424, "step": 2982 }, { "epoch": 0.1778519489808082, "grad_norm": 3.231050729751587, "learning_rate": 9.84404269442769e-05, "loss": 1.6803, "step": 2984 }, { "epoch": 0.17797115269996425, "grad_norm": 3.3307478427886963, "learning_rate": 9.843808255147253e-05, "loss": 1.672, "step": 2986 }, { "epoch": 0.17809035641912027, "grad_norm": 3.5645151138305664, "learning_rate": 9.84357364258722e-05, "loss": 1.6509, "step": 2988 }, { "epoch": 0.17820956013827632, "grad_norm": 3.139033317565918, "learning_rate": 9.843338856755988e-05, "loss": 1.7049, "step": 2990 }, { "epoch": 0.17832876385743235, "grad_norm": 3.437943935394287, "learning_rate": 9.843103897661953e-05, "loss": 1.6662, "step": 2992 }, { "epoch": 0.1784479675765884, "grad_norm": 3.272118330001831, "learning_rate": 9.842868765313523e-05, "loss": 1.6462, "step": 2994 }, { "epoch": 0.17856717129574443, "grad_norm": 3.54435658454895, "learning_rate": 9.842633459719104e-05, "loss": 1.6999, "step": 2996 }, { "epoch": 0.17868637501490048, "grad_norm": 3.502678155899048, "learning_rate": 9.842397980887119e-05, "loss": 1.5727, "step": 2998 }, { "epoch": 0.1788055787340565, "grad_norm": 3.3480684757232666, "learning_rate": 9.84216232882599e-05, "loss": 1.6511, "step": 3000 }, { "epoch": 0.17892478245321253, "grad_norm": 3.594221591949463, "learning_rate": 9.841926503544148e-05, "loss": 1.7434, "step": 3002 }, { "epoch": 0.17904398617236858, "grad_norm": 3.2991020679473877, "learning_rate": 9.841690505050027e-05, "loss": 1.6909, "step": 3004 }, { "epoch": 0.1791631898915246, "grad_norm": 3.403155565261841, "learning_rate": 9.841454333352072e-05, "loss": 1.7393, "step": 3006 }, { "epoch": 0.17928239361068066, "grad_norm": 3.0387754440307617, "learning_rate": 9.841217988458731e-05, "loss": 1.543, "step": 3008 }, { "epoch": 0.17940159732983668, "grad_norm": 3.184896945953369, "learning_rate": 9.840981470378458e-05, "loss": 1.65, "step": 3010 }, { "epoch": 0.17952080104899273, "grad_norm": 3.0707104206085205, "learning_rate": 9.840744779119714e-05, "loss": 1.6403, "step": 3012 }, { "epoch": 0.17964000476814876, "grad_norm": 3.260286331176758, "learning_rate": 9.840507914690968e-05, "loss": 1.7195, "step": 3014 }, { "epoch": 0.1797592084873048, "grad_norm": 3.0032241344451904, "learning_rate": 9.840270877100692e-05, "loss": 1.5946, "step": 3016 }, { "epoch": 0.17987841220646084, "grad_norm": 3.0453133583068848, "learning_rate": 9.840033666357364e-05, "loss": 1.4995, "step": 3018 }, { "epoch": 0.1799976159256169, "grad_norm": 3.4872989654541016, "learning_rate": 9.839796282469473e-05, "loss": 1.6155, "step": 3020 }, { "epoch": 0.1801168196447729, "grad_norm": 3.2864372730255127, "learning_rate": 9.83955872544551e-05, "loss": 1.6374, "step": 3022 }, { "epoch": 0.18023602336392897, "grad_norm": 3.0833566188812256, "learning_rate": 9.839320995293975e-05, "loss": 1.5378, "step": 3024 }, { "epoch": 0.180355227083085, "grad_norm": 3.224132776260376, "learning_rate": 9.839083092023367e-05, "loss": 1.6391, "step": 3026 }, { "epoch": 0.18047443080224104, "grad_norm": 3.516789674758911, "learning_rate": 9.838845015642202e-05, "loss": 1.6712, "step": 3028 }, { "epoch": 0.18059363452139707, "grad_norm": 3.850301742553711, "learning_rate": 9.838606766158996e-05, "loss": 1.6858, "step": 3030 }, { "epoch": 0.1807128382405531, "grad_norm": 3.5181081295013428, "learning_rate": 9.838368343582267e-05, "loss": 1.5702, "step": 3032 }, { "epoch": 0.18083204195970914, "grad_norm": 3.361384391784668, "learning_rate": 9.838129747920551e-05, "loss": 1.6487, "step": 3034 }, { "epoch": 0.18095124567886517, "grad_norm": 3.412649154663086, "learning_rate": 9.83789097918238e-05, "loss": 1.7885, "step": 3036 }, { "epoch": 0.18107044939802122, "grad_norm": 3.656994342803955, "learning_rate": 9.837652037376295e-05, "loss": 1.6496, "step": 3038 }, { "epoch": 0.18118965311717725, "grad_norm": 3.194084644317627, "learning_rate": 9.837412922510847e-05, "loss": 1.6802, "step": 3040 }, { "epoch": 0.1813088568363333, "grad_norm": 3.2890453338623047, "learning_rate": 9.837173634594586e-05, "loss": 1.5851, "step": 3042 }, { "epoch": 0.18142806055548932, "grad_norm": 3.1163811683654785, "learning_rate": 9.836934173636075e-05, "loss": 1.6136, "step": 3044 }, { "epoch": 0.18154726427464538, "grad_norm": 3.9580423831939697, "learning_rate": 9.836694539643877e-05, "loss": 1.5085, "step": 3046 }, { "epoch": 0.1816664679938014, "grad_norm": 3.4354021549224854, "learning_rate": 9.836454732626567e-05, "loss": 1.7041, "step": 3048 }, { "epoch": 0.18178567171295745, "grad_norm": 3.336216926574707, "learning_rate": 9.836214752592726e-05, "loss": 1.6028, "step": 3050 }, { "epoch": 0.18190487543211348, "grad_norm": 3.107112169265747, "learning_rate": 9.835974599550934e-05, "loss": 1.5836, "step": 3052 }, { "epoch": 0.18202407915126953, "grad_norm": 3.1826770305633545, "learning_rate": 9.835734273509786e-05, "loss": 1.5535, "step": 3054 }, { "epoch": 0.18214328287042555, "grad_norm": 2.710376501083374, "learning_rate": 9.835493774477877e-05, "loss": 1.4797, "step": 3056 }, { "epoch": 0.1822624865895816, "grad_norm": 3.378706455230713, "learning_rate": 9.835253102463812e-05, "loss": 1.6213, "step": 3058 }, { "epoch": 0.18238169030873763, "grad_norm": 3.0963261127471924, "learning_rate": 9.835012257476199e-05, "loss": 1.6034, "step": 3060 }, { "epoch": 0.18250089402789366, "grad_norm": 3.6250967979431152, "learning_rate": 9.834771239523655e-05, "loss": 1.7131, "step": 3062 }, { "epoch": 0.1826200977470497, "grad_norm": 3.275186777114868, "learning_rate": 9.834530048614802e-05, "loss": 1.6882, "step": 3064 }, { "epoch": 0.18273930146620573, "grad_norm": 3.5533785820007324, "learning_rate": 9.834288684758268e-05, "loss": 1.6468, "step": 3066 }, { "epoch": 0.18285850518536179, "grad_norm": 3.0165741443634033, "learning_rate": 9.834047147962686e-05, "loss": 1.5085, "step": 3068 }, { "epoch": 0.1829777089045178, "grad_norm": 3.0788755416870117, "learning_rate": 9.833805438236701e-05, "loss": 1.623, "step": 3070 }, { "epoch": 0.18309691262367386, "grad_norm": 3.188742160797119, "learning_rate": 9.833563555588953e-05, "loss": 1.8038, "step": 3072 }, { "epoch": 0.1832161163428299, "grad_norm": 3.185060739517212, "learning_rate": 9.833321500028102e-05, "loss": 1.6722, "step": 3074 }, { "epoch": 0.18333532006198594, "grad_norm": 3.297851324081421, "learning_rate": 9.833079271562802e-05, "loss": 1.7985, "step": 3076 }, { "epoch": 0.18345452378114196, "grad_norm": 3.46793270111084, "learning_rate": 9.83283687020172e-05, "loss": 1.5738, "step": 3078 }, { "epoch": 0.18357372750029802, "grad_norm": 3.277827024459839, "learning_rate": 9.83259429595353e-05, "loss": 1.6553, "step": 3080 }, { "epoch": 0.18369293121945404, "grad_norm": 3.167062997817993, "learning_rate": 9.832351548826905e-05, "loss": 1.6204, "step": 3082 }, { "epoch": 0.1838121349386101, "grad_norm": 3.1767256259918213, "learning_rate": 9.832108628830534e-05, "loss": 1.6351, "step": 3084 }, { "epoch": 0.18393133865776612, "grad_norm": 3.258890151977539, "learning_rate": 9.831865535973103e-05, "loss": 1.7498, "step": 3086 }, { "epoch": 0.18405054237692217, "grad_norm": 3.3142619132995605, "learning_rate": 9.83162227026331e-05, "loss": 1.7002, "step": 3088 }, { "epoch": 0.1841697460960782, "grad_norm": 3.2077324390411377, "learning_rate": 9.831378831709856e-05, "loss": 1.6521, "step": 3090 }, { "epoch": 0.18428894981523425, "grad_norm": 3.277979850769043, "learning_rate": 9.831135220321452e-05, "loss": 1.6351, "step": 3092 }, { "epoch": 0.18440815353439027, "grad_norm": 3.1203954219818115, "learning_rate": 9.830891436106811e-05, "loss": 1.6446, "step": 3094 }, { "epoch": 0.1845273572535463, "grad_norm": 3.1486971378326416, "learning_rate": 9.830647479074654e-05, "loss": 1.8411, "step": 3096 }, { "epoch": 0.18464656097270235, "grad_norm": 2.893900156021118, "learning_rate": 9.830403349233712e-05, "loss": 1.7211, "step": 3098 }, { "epoch": 0.18476576469185838, "grad_norm": 3.5631844997406006, "learning_rate": 9.830159046592712e-05, "loss": 1.5665, "step": 3100 }, { "epoch": 0.18488496841101443, "grad_norm": 3.6640844345092773, "learning_rate": 9.829914571160397e-05, "loss": 1.6271, "step": 3102 }, { "epoch": 0.18500417213017045, "grad_norm": 3.4025931358337402, "learning_rate": 9.829669922945513e-05, "loss": 1.6314, "step": 3104 }, { "epoch": 0.1851233758493265, "grad_norm": 3.198671340942383, "learning_rate": 9.829425101956812e-05, "loss": 1.6416, "step": 3106 }, { "epoch": 0.18524257956848253, "grad_norm": 3.562913656234741, "learning_rate": 9.829180108203052e-05, "loss": 1.5391, "step": 3108 }, { "epoch": 0.18536178328763858, "grad_norm": 3.752713203430176, "learning_rate": 9.828934941692993e-05, "loss": 1.7118, "step": 3110 }, { "epoch": 0.1854809870067946, "grad_norm": 3.2473721504211426, "learning_rate": 9.828689602435412e-05, "loss": 1.5768, "step": 3112 }, { "epoch": 0.18560019072595066, "grad_norm": 3.153852939605713, "learning_rate": 9.828444090439083e-05, "loss": 1.521, "step": 3114 }, { "epoch": 0.18571939444510668, "grad_norm": 3.369377374649048, "learning_rate": 9.828198405712787e-05, "loss": 1.6533, "step": 3116 }, { "epoch": 0.18583859816426274, "grad_norm": 3.451143503189087, "learning_rate": 9.827952548265316e-05, "loss": 1.7538, "step": 3118 }, { "epoch": 0.18595780188341876, "grad_norm": 3.432741403579712, "learning_rate": 9.827706518105463e-05, "loss": 1.5938, "step": 3120 }, { "epoch": 0.1860770056025748, "grad_norm": 3.4018032550811768, "learning_rate": 9.82746031524203e-05, "loss": 1.6559, "step": 3122 }, { "epoch": 0.18619620932173084, "grad_norm": 3.4102768898010254, "learning_rate": 9.827213939683824e-05, "loss": 1.6708, "step": 3124 }, { "epoch": 0.18631541304088686, "grad_norm": 2.783742666244507, "learning_rate": 9.826967391439661e-05, "loss": 1.6367, "step": 3126 }, { "epoch": 0.18643461676004291, "grad_norm": 3.3889482021331787, "learning_rate": 9.826720670518357e-05, "loss": 1.8539, "step": 3128 }, { "epoch": 0.18655382047919894, "grad_norm": 2.7397313117980957, "learning_rate": 9.826473776928741e-05, "loss": 1.5624, "step": 3130 }, { "epoch": 0.186673024198355, "grad_norm": 3.176417112350464, "learning_rate": 9.826226710679644e-05, "loss": 1.6656, "step": 3132 }, { "epoch": 0.18679222791751102, "grad_norm": 2.9250128269195557, "learning_rate": 9.825979471779906e-05, "loss": 1.7263, "step": 3134 }, { "epoch": 0.18691143163666707, "grad_norm": 3.3444976806640625, "learning_rate": 9.825732060238369e-05, "loss": 1.7256, "step": 3136 }, { "epoch": 0.1870306353558231, "grad_norm": 3.2286434173583984, "learning_rate": 9.825484476063887e-05, "loss": 1.6158, "step": 3138 }, { "epoch": 0.18714983907497915, "grad_norm": 3.346147298812866, "learning_rate": 9.825236719265314e-05, "loss": 1.7515, "step": 3140 }, { "epoch": 0.18726904279413517, "grad_norm": 3.894049882888794, "learning_rate": 9.824988789851514e-05, "loss": 1.676, "step": 3142 }, { "epoch": 0.18738824651329122, "grad_norm": 3.1326088905334473, "learning_rate": 9.824740687831355e-05, "loss": 1.6162, "step": 3144 }, { "epoch": 0.18750745023244725, "grad_norm": 3.330949544906616, "learning_rate": 9.824492413213718e-05, "loss": 1.5834, "step": 3146 }, { "epoch": 0.1876266539516033, "grad_norm": 3.614518880844116, "learning_rate": 9.824243966007476e-05, "loss": 1.4329, "step": 3148 }, { "epoch": 0.18774585767075933, "grad_norm": 3.4487454891204834, "learning_rate": 9.823995346221523e-05, "loss": 1.4857, "step": 3150 }, { "epoch": 0.18786506138991538, "grad_norm": 3.4425694942474365, "learning_rate": 9.823746553864752e-05, "loss": 1.7374, "step": 3152 }, { "epoch": 0.1879842651090714, "grad_norm": 3.546394109725952, "learning_rate": 9.823497588946063e-05, "loss": 1.4883, "step": 3154 }, { "epoch": 0.18810346882822743, "grad_norm": 3.3431365489959717, "learning_rate": 9.823248451474361e-05, "loss": 1.5601, "step": 3156 }, { "epoch": 0.18822267254738348, "grad_norm": 3.3936233520507812, "learning_rate": 9.82299914145856e-05, "loss": 1.6488, "step": 3158 }, { "epoch": 0.1883418762665395, "grad_norm": 3.149364948272705, "learning_rate": 9.822749658907578e-05, "loss": 1.5029, "step": 3160 }, { "epoch": 0.18846107998569556, "grad_norm": 2.9991862773895264, "learning_rate": 9.822500003830337e-05, "loss": 1.4947, "step": 3162 }, { "epoch": 0.18858028370485158, "grad_norm": 3.520718574523926, "learning_rate": 9.822250176235775e-05, "loss": 1.6507, "step": 3164 }, { "epoch": 0.18869948742400763, "grad_norm": 3.335380792617798, "learning_rate": 9.822000176132822e-05, "loss": 1.5439, "step": 3166 }, { "epoch": 0.18881869114316366, "grad_norm": 3.398329496383667, "learning_rate": 9.821750003530426e-05, "loss": 1.5443, "step": 3168 }, { "epoch": 0.1889378948623197, "grad_norm": 3.160707950592041, "learning_rate": 9.821499658437535e-05, "loss": 1.4184, "step": 3170 }, { "epoch": 0.18905709858147574, "grad_norm": 3.4853482246398926, "learning_rate": 9.821249140863102e-05, "loss": 1.612, "step": 3172 }, { "epoch": 0.1891763023006318, "grad_norm": 3.388920545578003, "learning_rate": 9.820998450816094e-05, "loss": 1.5527, "step": 3174 }, { "epoch": 0.1892955060197878, "grad_norm": 3.039548635482788, "learning_rate": 9.820747588305477e-05, "loss": 1.5379, "step": 3176 }, { "epoch": 0.18941470973894386, "grad_norm": 3.658850908279419, "learning_rate": 9.820496553340224e-05, "loss": 1.5744, "step": 3178 }, { "epoch": 0.1895339134580999, "grad_norm": 3.197697877883911, "learning_rate": 9.820245345929315e-05, "loss": 1.4726, "step": 3180 }, { "epoch": 0.18965311717725594, "grad_norm": 2.9863953590393066, "learning_rate": 9.81999396608174e-05, "loss": 1.7366, "step": 3182 }, { "epoch": 0.18977232089641197, "grad_norm": 3.0569310188293457, "learning_rate": 9.819742413806487e-05, "loss": 1.5463, "step": 3184 }, { "epoch": 0.18989152461556802, "grad_norm": 3.315061569213867, "learning_rate": 9.819490689112558e-05, "loss": 1.5838, "step": 3186 }, { "epoch": 0.19001072833472404, "grad_norm": 3.136634588241577, "learning_rate": 9.819238792008958e-05, "loss": 1.6093, "step": 3188 }, { "epoch": 0.19012993205388007, "grad_norm": 2.7996537685394287, "learning_rate": 9.818986722504697e-05, "loss": 1.4906, "step": 3190 }, { "epoch": 0.19024913577303612, "grad_norm": 3.13615345954895, "learning_rate": 9.818734480608794e-05, "loss": 1.5834, "step": 3192 }, { "epoch": 0.19036833949219215, "grad_norm": 4.194802284240723, "learning_rate": 9.81848206633027e-05, "loss": 1.6122, "step": 3194 }, { "epoch": 0.1904875432113482, "grad_norm": 3.5692734718322754, "learning_rate": 9.818229479678158e-05, "loss": 1.7051, "step": 3196 }, { "epoch": 0.19060674693050422, "grad_norm": 3.5917441844940186, "learning_rate": 9.81797672066149e-05, "loss": 1.5218, "step": 3198 }, { "epoch": 0.19072595064966028, "grad_norm": 3.2814629077911377, "learning_rate": 9.817723789289312e-05, "loss": 1.8011, "step": 3200 }, { "epoch": 0.1908451543688163, "grad_norm": 3.3416810035705566, "learning_rate": 9.81747068557067e-05, "loss": 1.7767, "step": 3202 }, { "epoch": 0.19096435808797235, "grad_norm": 3.0933988094329834, "learning_rate": 9.817217409514618e-05, "loss": 1.5326, "step": 3204 }, { "epoch": 0.19108356180712838, "grad_norm": 3.467771053314209, "learning_rate": 9.816963961130218e-05, "loss": 1.657, "step": 3206 }, { "epoch": 0.19120276552628443, "grad_norm": 3.2331786155700684, "learning_rate": 9.816710340426536e-05, "loss": 1.5094, "step": 3208 }, { "epoch": 0.19132196924544045, "grad_norm": 3.920698881149292, "learning_rate": 9.816456547412644e-05, "loss": 1.6194, "step": 3210 }, { "epoch": 0.1914411729645965, "grad_norm": 3.4486923217773438, "learning_rate": 9.816202582097623e-05, "loss": 1.5782, "step": 3212 }, { "epoch": 0.19156037668375253, "grad_norm": 2.752264976501465, "learning_rate": 9.815948444490557e-05, "loss": 1.4298, "step": 3214 }, { "epoch": 0.19167958040290858, "grad_norm": 3.151461362838745, "learning_rate": 9.815694134600538e-05, "loss": 1.731, "step": 3216 }, { "epoch": 0.1917987841220646, "grad_norm": 3.345426321029663, "learning_rate": 9.815439652436661e-05, "loss": 1.632, "step": 3218 }, { "epoch": 0.19191798784122063, "grad_norm": 3.3018558025360107, "learning_rate": 9.815184998008034e-05, "loss": 1.5645, "step": 3220 }, { "epoch": 0.19203719156037669, "grad_norm": 3.3624889850616455, "learning_rate": 9.814930171323764e-05, "loss": 1.6475, "step": 3222 }, { "epoch": 0.1921563952795327, "grad_norm": 3.2626662254333496, "learning_rate": 9.814675172392969e-05, "loss": 1.6997, "step": 3224 }, { "epoch": 0.19227559899868876, "grad_norm": 3.309507131576538, "learning_rate": 9.814420001224766e-05, "loss": 1.5731, "step": 3226 }, { "epoch": 0.1923948027178448, "grad_norm": 3.4603545665740967, "learning_rate": 9.81416465782829e-05, "loss": 1.5145, "step": 3228 }, { "epoch": 0.19251400643700084, "grad_norm": 3.0572705268859863, "learning_rate": 9.813909142212674e-05, "loss": 1.5001, "step": 3230 }, { "epoch": 0.19263321015615686, "grad_norm": 3.1770429611206055, "learning_rate": 9.813653454387055e-05, "loss": 1.6081, "step": 3232 }, { "epoch": 0.19275241387531292, "grad_norm": 3.463428020477295, "learning_rate": 9.813397594360583e-05, "loss": 1.6016, "step": 3234 }, { "epoch": 0.19287161759446894, "grad_norm": 3.186581611633301, "learning_rate": 9.813141562142409e-05, "loss": 1.5748, "step": 3236 }, { "epoch": 0.192990821313625, "grad_norm": 3.367102861404419, "learning_rate": 9.812885357741695e-05, "loss": 1.5495, "step": 3238 }, { "epoch": 0.19311002503278102, "grad_norm": 3.3424532413482666, "learning_rate": 9.812628981167604e-05, "loss": 1.6831, "step": 3240 }, { "epoch": 0.19322922875193707, "grad_norm": 3.0241944789886475, "learning_rate": 9.812372432429309e-05, "loss": 1.5871, "step": 3242 }, { "epoch": 0.1933484324710931, "grad_norm": 3.2533676624298096, "learning_rate": 9.812115711535986e-05, "loss": 1.5474, "step": 3244 }, { "epoch": 0.19346763619024915, "grad_norm": 2.8148889541625977, "learning_rate": 9.811858818496819e-05, "loss": 1.4771, "step": 3246 }, { "epoch": 0.19358683990940517, "grad_norm": 3.2723238468170166, "learning_rate": 9.811601753321e-05, "loss": 1.6205, "step": 3248 }, { "epoch": 0.1937060436285612, "grad_norm": 3.2936222553253174, "learning_rate": 9.811344516017722e-05, "loss": 1.528, "step": 3250 }, { "epoch": 0.19382524734771725, "grad_norm": 3.332009792327881, "learning_rate": 9.811087106596188e-05, "loss": 1.6725, "step": 3252 }, { "epoch": 0.19394445106687327, "grad_norm": 3.2259836196899414, "learning_rate": 9.810829525065611e-05, "loss": 1.6205, "step": 3254 }, { "epoch": 0.19406365478602933, "grad_norm": 3.205132246017456, "learning_rate": 9.810571771435198e-05, "loss": 1.5755, "step": 3256 }, { "epoch": 0.19418285850518535, "grad_norm": 3.098475456237793, "learning_rate": 9.810313845714174e-05, "loss": 1.4846, "step": 3258 }, { "epoch": 0.1943020622243414, "grad_norm": 3.2397255897521973, "learning_rate": 9.810055747911768e-05, "loss": 1.5926, "step": 3260 }, { "epoch": 0.19442126594349743, "grad_norm": 3.3983867168426514, "learning_rate": 9.809797478037208e-05, "loss": 1.594, "step": 3262 }, { "epoch": 0.19454046966265348, "grad_norm": 3.9808216094970703, "learning_rate": 9.809539036099737e-05, "loss": 1.7972, "step": 3264 }, { "epoch": 0.1946596733818095, "grad_norm": 2.9690189361572266, "learning_rate": 9.809280422108598e-05, "loss": 1.5887, "step": 3266 }, { "epoch": 0.19477887710096556, "grad_norm": 2.8225529193878174, "learning_rate": 9.809021636073043e-05, "loss": 1.5518, "step": 3268 }, { "epoch": 0.19489808082012158, "grad_norm": 3.1293060779571533, "learning_rate": 9.808762678002332e-05, "loss": 1.5482, "step": 3270 }, { "epoch": 0.19501728453927764, "grad_norm": 3.3872475624084473, "learning_rate": 9.808503547905727e-05, "loss": 1.7347, "step": 3272 }, { "epoch": 0.19513648825843366, "grad_norm": 3.112752676010132, "learning_rate": 9.808244245792497e-05, "loss": 1.4483, "step": 3274 }, { "epoch": 0.1952556919775897, "grad_norm": 3.282349109649658, "learning_rate": 9.807984771671919e-05, "loss": 1.6433, "step": 3276 }, { "epoch": 0.19537489569674574, "grad_norm": 3.003225088119507, "learning_rate": 9.807725125553275e-05, "loss": 1.5173, "step": 3278 }, { "epoch": 0.1954940994159018, "grad_norm": 3.17875599861145, "learning_rate": 9.807465307445855e-05, "loss": 1.5082, "step": 3280 }, { "epoch": 0.19561330313505781, "grad_norm": 3.519972324371338, "learning_rate": 9.807205317358952e-05, "loss": 1.5894, "step": 3282 }, { "epoch": 0.19573250685421384, "grad_norm": 3.1989221572875977, "learning_rate": 9.806945155301868e-05, "loss": 1.58, "step": 3284 }, { "epoch": 0.1958517105733699, "grad_norm": 3.4492616653442383, "learning_rate": 9.806684821283907e-05, "loss": 1.6066, "step": 3286 }, { "epoch": 0.19597091429252592, "grad_norm": 3.523961067199707, "learning_rate": 9.806424315314385e-05, "loss": 1.9249, "step": 3288 }, { "epoch": 0.19609011801168197, "grad_norm": 2.9786746501922607, "learning_rate": 9.80616363740262e-05, "loss": 1.6753, "step": 3290 }, { "epoch": 0.196209321730838, "grad_norm": 3.306519031524658, "learning_rate": 9.805902787557939e-05, "loss": 1.6288, "step": 3292 }, { "epoch": 0.19632852544999405, "grad_norm": 3.213555097579956, "learning_rate": 9.805641765789671e-05, "loss": 1.5542, "step": 3294 }, { "epoch": 0.19644772916915007, "grad_norm": 3.288299798965454, "learning_rate": 9.805380572107153e-05, "loss": 1.6622, "step": 3296 }, { "epoch": 0.19656693288830612, "grad_norm": 3.0995922088623047, "learning_rate": 9.805119206519732e-05, "loss": 1.5476, "step": 3298 }, { "epoch": 0.19668613660746215, "grad_norm": 2.9019172191619873, "learning_rate": 9.804857669036757e-05, "loss": 1.4989, "step": 3300 }, { "epoch": 0.1968053403266182, "grad_norm": 2.7685556411743164, "learning_rate": 9.804595959667584e-05, "loss": 1.5626, "step": 3302 }, { "epoch": 0.19692454404577422, "grad_norm": 3.4616990089416504, "learning_rate": 9.804334078421573e-05, "loss": 1.5624, "step": 3304 }, { "epoch": 0.19704374776493028, "grad_norm": 3.1639974117279053, "learning_rate": 9.804072025308095e-05, "loss": 1.5868, "step": 3306 }, { "epoch": 0.1971629514840863, "grad_norm": 3.6536779403686523, "learning_rate": 9.803809800336523e-05, "loss": 1.6181, "step": 3308 }, { "epoch": 0.19728215520324235, "grad_norm": 3.136460542678833, "learning_rate": 9.803547403516241e-05, "loss": 1.5918, "step": 3310 }, { "epoch": 0.19740135892239838, "grad_norm": 3.269526481628418, "learning_rate": 9.80328483485663e-05, "loss": 1.8216, "step": 3312 }, { "epoch": 0.1975205626415544, "grad_norm": 2.8923795223236084, "learning_rate": 9.803022094367088e-05, "loss": 1.5336, "step": 3314 }, { "epoch": 0.19763976636071046, "grad_norm": 3.5441036224365234, "learning_rate": 9.802759182057012e-05, "loss": 1.7596, "step": 3316 }, { "epoch": 0.19775897007986648, "grad_norm": 3.4919943809509277, "learning_rate": 9.802496097935807e-05, "loss": 1.7005, "step": 3318 }, { "epoch": 0.19787817379902253, "grad_norm": 3.144278049468994, "learning_rate": 9.802232842012887e-05, "loss": 1.5954, "step": 3320 }, { "epoch": 0.19799737751817856, "grad_norm": 3.3455913066864014, "learning_rate": 9.801969414297664e-05, "loss": 1.6526, "step": 3322 }, { "epoch": 0.1981165812373346, "grad_norm": 3.4899892807006836, "learning_rate": 9.801705814799568e-05, "loss": 1.5798, "step": 3324 }, { "epoch": 0.19823578495649063, "grad_norm": 3.1925013065338135, "learning_rate": 9.801442043528025e-05, "loss": 1.7371, "step": 3326 }, { "epoch": 0.1983549886756467, "grad_norm": 3.340785026550293, "learning_rate": 9.801178100492473e-05, "loss": 1.6599, "step": 3328 }, { "epoch": 0.1984741923948027, "grad_norm": 3.574087381362915, "learning_rate": 9.800913985702353e-05, "loss": 1.6767, "step": 3330 }, { "epoch": 0.19859339611395876, "grad_norm": 3.447324514389038, "learning_rate": 9.800649699167112e-05, "loss": 1.489, "step": 3332 }, { "epoch": 0.1987125998331148, "grad_norm": 3.274339199066162, "learning_rate": 9.800385240896208e-05, "loss": 1.5605, "step": 3334 }, { "epoch": 0.19883180355227084, "grad_norm": 2.9603545665740967, "learning_rate": 9.8001206108991e-05, "loss": 1.501, "step": 3336 }, { "epoch": 0.19895100727142687, "grad_norm": 3.5120575428009033, "learning_rate": 9.799855809185254e-05, "loss": 1.6908, "step": 3338 }, { "epoch": 0.19907021099058292, "grad_norm": 3.570380449295044, "learning_rate": 9.799590835764142e-05, "loss": 1.5166, "step": 3340 }, { "epoch": 0.19918941470973894, "grad_norm": 3.0926427841186523, "learning_rate": 9.799325690645245e-05, "loss": 1.6477, "step": 3342 }, { "epoch": 0.19930861842889497, "grad_norm": 3.1012516021728516, "learning_rate": 9.799060373838047e-05, "loss": 1.7827, "step": 3344 }, { "epoch": 0.19942782214805102, "grad_norm": 2.8943865299224854, "learning_rate": 9.79879488535204e-05, "loss": 1.5523, "step": 3346 }, { "epoch": 0.19954702586720705, "grad_norm": 3.3741543292999268, "learning_rate": 9.798529225196719e-05, "loss": 1.5494, "step": 3348 }, { "epoch": 0.1996662295863631, "grad_norm": 3.597294569015503, "learning_rate": 9.798263393381592e-05, "loss": 1.5842, "step": 3350 }, { "epoch": 0.19978543330551912, "grad_norm": 3.492034673690796, "learning_rate": 9.797997389916167e-05, "loss": 1.6466, "step": 3352 }, { "epoch": 0.19990463702467517, "grad_norm": 3.3269577026367188, "learning_rate": 9.797731214809958e-05, "loss": 1.7257, "step": 3354 }, { "epoch": 0.2000238407438312, "grad_norm": 3.1268513202667236, "learning_rate": 9.797464868072488e-05, "loss": 1.6084, "step": 3356 }, { "epoch": 0.20014304446298725, "grad_norm": 3.0762155055999756, "learning_rate": 9.797198349713284e-05, "loss": 1.5657, "step": 3358 }, { "epoch": 0.20026224818214328, "grad_norm": 3.2906200885772705, "learning_rate": 9.796931659741884e-05, "loss": 1.6133, "step": 3360 }, { "epoch": 0.20038145190129933, "grad_norm": 3.5236289501190186, "learning_rate": 9.796664798167825e-05, "loss": 1.6744, "step": 3362 }, { "epoch": 0.20050065562045535, "grad_norm": 3.2730679512023926, "learning_rate": 9.796397765000655e-05, "loss": 1.5752, "step": 3364 }, { "epoch": 0.2006198593396114, "grad_norm": 3.4672739505767822, "learning_rate": 9.796130560249926e-05, "loss": 1.7117, "step": 3366 }, { "epoch": 0.20073906305876743, "grad_norm": 3.307217597961426, "learning_rate": 9.795863183925195e-05, "loss": 1.5261, "step": 3368 }, { "epoch": 0.20085826677792348, "grad_norm": 3.1971232891082764, "learning_rate": 9.795595636036032e-05, "loss": 1.6312, "step": 3370 }, { "epoch": 0.2009774704970795, "grad_norm": 4.0129313468933105, "learning_rate": 9.795327916592004e-05, "loss": 1.6127, "step": 3372 }, { "epoch": 0.20109667421623556, "grad_norm": 3.18214750289917, "learning_rate": 9.795060025602689e-05, "loss": 1.5666, "step": 3374 }, { "epoch": 0.20121587793539158, "grad_norm": 3.398785352706909, "learning_rate": 9.794791963077672e-05, "loss": 1.5396, "step": 3376 }, { "epoch": 0.2013350816545476, "grad_norm": 3.2684857845306396, "learning_rate": 9.79452372902654e-05, "loss": 1.7737, "step": 3378 }, { "epoch": 0.20145428537370366, "grad_norm": 3.3796088695526123, "learning_rate": 9.794255323458891e-05, "loss": 1.5965, "step": 3380 }, { "epoch": 0.2015734890928597, "grad_norm": 3.349621295928955, "learning_rate": 9.793986746384326e-05, "loss": 1.5789, "step": 3382 }, { "epoch": 0.20169269281201574, "grad_norm": 5.040905952453613, "learning_rate": 9.793717997812451e-05, "loss": 1.6092, "step": 3384 }, { "epoch": 0.20181189653117176, "grad_norm": 3.1945126056671143, "learning_rate": 9.793449077752882e-05, "loss": 1.7068, "step": 3386 }, { "epoch": 0.20193110025032782, "grad_norm": 3.211031675338745, "learning_rate": 9.793179986215239e-05, "loss": 1.5409, "step": 3388 }, { "epoch": 0.20205030396948384, "grad_norm": 3.651949882507324, "learning_rate": 9.79291072320915e-05, "loss": 1.7242, "step": 3390 }, { "epoch": 0.2021695076886399, "grad_norm": 3.2636921405792236, "learning_rate": 9.792641288744243e-05, "loss": 1.5109, "step": 3392 }, { "epoch": 0.20228871140779592, "grad_norm": 2.9829936027526855, "learning_rate": 9.792371682830161e-05, "loss": 1.5971, "step": 3394 }, { "epoch": 0.20240791512695197, "grad_norm": 3.471949577331543, "learning_rate": 9.792101905476546e-05, "loss": 1.5735, "step": 3396 }, { "epoch": 0.202527118846108, "grad_norm": 3.101541042327881, "learning_rate": 9.79183195669305e-05, "loss": 1.6272, "step": 3398 }, { "epoch": 0.20264632256526405, "grad_norm": 3.255383014678955, "learning_rate": 9.791561836489331e-05, "loss": 1.6064, "step": 3400 }, { "epoch": 0.20276552628442007, "grad_norm": 3.434084892272949, "learning_rate": 9.79129154487505e-05, "loss": 1.5126, "step": 3402 }, { "epoch": 0.20288473000357612, "grad_norm": 3.1557135581970215, "learning_rate": 9.791021081859877e-05, "loss": 1.4478, "step": 3404 }, { "epoch": 0.20300393372273215, "grad_norm": 3.4861373901367188, "learning_rate": 9.790750447453487e-05, "loss": 1.6338, "step": 3406 }, { "epoch": 0.20312313744188817, "grad_norm": 3.335268020629883, "learning_rate": 9.790479641665564e-05, "loss": 1.5005, "step": 3408 }, { "epoch": 0.20324234116104423, "grad_norm": 3.5297069549560547, "learning_rate": 9.790208664505793e-05, "loss": 1.5978, "step": 3410 }, { "epoch": 0.20336154488020025, "grad_norm": 3.400726079940796, "learning_rate": 9.789937515983868e-05, "loss": 1.7276, "step": 3412 }, { "epoch": 0.2034807485993563, "grad_norm": 3.054853916168213, "learning_rate": 9.78966619610949e-05, "loss": 1.7544, "step": 3414 }, { "epoch": 0.20359995231851233, "grad_norm": 3.371655225753784, "learning_rate": 9.789394704892364e-05, "loss": 1.642, "step": 3416 }, { "epoch": 0.20371915603766838, "grad_norm": 3.3930561542510986, "learning_rate": 9.789123042342203e-05, "loss": 1.8016, "step": 3418 }, { "epoch": 0.2038383597568244, "grad_norm": 3.3046441078186035, "learning_rate": 9.788851208468726e-05, "loss": 1.7149, "step": 3420 }, { "epoch": 0.20395756347598046, "grad_norm": 3.4325754642486572, "learning_rate": 9.788579203281653e-05, "loss": 1.6199, "step": 3422 }, { "epoch": 0.20407676719513648, "grad_norm": 3.364133358001709, "learning_rate": 9.788307026790721e-05, "loss": 1.5234, "step": 3424 }, { "epoch": 0.20419597091429253, "grad_norm": 3.4539902210235596, "learning_rate": 9.788034679005664e-05, "loss": 1.5981, "step": 3426 }, { "epoch": 0.20431517463344856, "grad_norm": 3.1799488067626953, "learning_rate": 9.787762159936222e-05, "loss": 1.5984, "step": 3428 }, { "epoch": 0.2044343783526046, "grad_norm": 2.8675785064697266, "learning_rate": 9.787489469592148e-05, "loss": 1.5127, "step": 3430 }, { "epoch": 0.20455358207176064, "grad_norm": 3.369901180267334, "learning_rate": 9.787216607983196e-05, "loss": 1.5705, "step": 3432 }, { "epoch": 0.2046727857909167, "grad_norm": 2.9826948642730713, "learning_rate": 9.786943575119125e-05, "loss": 1.5733, "step": 3434 }, { "epoch": 0.20479198951007271, "grad_norm": 3.0612082481384277, "learning_rate": 9.786670371009706e-05, "loss": 1.4496, "step": 3436 }, { "epoch": 0.20491119322922874, "grad_norm": 3.207648277282715, "learning_rate": 9.786396995664709e-05, "loss": 1.6936, "step": 3438 }, { "epoch": 0.2050303969483848, "grad_norm": 3.3257274627685547, "learning_rate": 9.786123449093915e-05, "loss": 1.6032, "step": 3440 }, { "epoch": 0.20514960066754082, "grad_norm": 3.1841320991516113, "learning_rate": 9.785849731307109e-05, "loss": 1.6638, "step": 3442 }, { "epoch": 0.20526880438669687, "grad_norm": 3.3601596355438232, "learning_rate": 9.785575842314085e-05, "loss": 1.7218, "step": 3444 }, { "epoch": 0.2053880081058529, "grad_norm": 3.6045145988464355, "learning_rate": 9.785301782124639e-05, "loss": 1.5954, "step": 3446 }, { "epoch": 0.20550721182500895, "grad_norm": 3.0081779956817627, "learning_rate": 9.785027550748576e-05, "loss": 1.6979, "step": 3448 }, { "epoch": 0.20562641554416497, "grad_norm": 3.437130928039551, "learning_rate": 9.784753148195705e-05, "loss": 1.8635, "step": 3450 }, { "epoch": 0.20574561926332102, "grad_norm": 3.196760416030884, "learning_rate": 9.784478574475842e-05, "loss": 1.4126, "step": 3452 }, { "epoch": 0.20586482298247705, "grad_norm": 3.155945301055908, "learning_rate": 9.784203829598813e-05, "loss": 1.6428, "step": 3454 }, { "epoch": 0.2059840267016331, "grad_norm": 3.1132402420043945, "learning_rate": 9.783928913574442e-05, "loss": 1.7066, "step": 3456 }, { "epoch": 0.20610323042078912, "grad_norm": 3.578331470489502, "learning_rate": 9.783653826412567e-05, "loss": 1.6238, "step": 3458 }, { "epoch": 0.20622243413994518, "grad_norm": 3.016698122024536, "learning_rate": 9.783378568123026e-05, "loss": 1.6788, "step": 3460 }, { "epoch": 0.2063416378591012, "grad_norm": 3.2284300327301025, "learning_rate": 9.783103138715667e-05, "loss": 1.447, "step": 3462 }, { "epoch": 0.20646084157825725, "grad_norm": 3.2498373985290527, "learning_rate": 9.782827538200345e-05, "loss": 1.5622, "step": 3464 }, { "epoch": 0.20658004529741328, "grad_norm": 2.9709670543670654, "learning_rate": 9.782551766586919e-05, "loss": 1.6521, "step": 3466 }, { "epoch": 0.20669924901656933, "grad_norm": 2.9320313930511475, "learning_rate": 9.782275823885252e-05, "loss": 1.5631, "step": 3468 }, { "epoch": 0.20681845273572536, "grad_norm": 3.3416635990142822, "learning_rate": 9.781999710105215e-05, "loss": 1.5423, "step": 3470 }, { "epoch": 0.20693765645488138, "grad_norm": 3.4545059204101562, "learning_rate": 9.781723425256688e-05, "loss": 1.5799, "step": 3472 }, { "epoch": 0.20705686017403743, "grad_norm": 3.5979645252227783, "learning_rate": 9.781446969349552e-05, "loss": 1.5489, "step": 3474 }, { "epoch": 0.20717606389319346, "grad_norm": 3.51577091217041, "learning_rate": 9.781170342393702e-05, "loss": 1.6339, "step": 3476 }, { "epoch": 0.2072952676123495, "grad_norm": 2.823756694793701, "learning_rate": 9.780893544399028e-05, "loss": 1.58, "step": 3478 }, { "epoch": 0.20741447133150553, "grad_norm": 3.4005069732666016, "learning_rate": 9.780616575375434e-05, "loss": 1.7418, "step": 3480 }, { "epoch": 0.2075336750506616, "grad_norm": 3.3638458251953125, "learning_rate": 9.780339435332828e-05, "loss": 1.7985, "step": 3482 }, { "epoch": 0.2076528787698176, "grad_norm": 3.1903867721557617, "learning_rate": 9.780062124281127e-05, "loss": 1.423, "step": 3484 }, { "epoch": 0.20777208248897366, "grad_norm": 3.086381196975708, "learning_rate": 9.779784642230246e-05, "loss": 1.7542, "step": 3486 }, { "epoch": 0.2078912862081297, "grad_norm": 3.2528622150421143, "learning_rate": 9.779506989190115e-05, "loss": 1.5563, "step": 3488 }, { "epoch": 0.20801048992728574, "grad_norm": 3.0645523071289062, "learning_rate": 9.779229165170668e-05, "loss": 1.6369, "step": 3490 }, { "epoch": 0.20812969364644177, "grad_norm": 3.5641636848449707, "learning_rate": 9.77895117018184e-05, "loss": 1.5872, "step": 3492 }, { "epoch": 0.20824889736559782, "grad_norm": 3.4069206714630127, "learning_rate": 9.778673004233579e-05, "loss": 1.536, "step": 3494 }, { "epoch": 0.20836810108475384, "grad_norm": 3.022057294845581, "learning_rate": 9.778394667335834e-05, "loss": 1.6195, "step": 3496 }, { "epoch": 0.2084873048039099, "grad_norm": 3.197512626647949, "learning_rate": 9.778116159498562e-05, "loss": 1.5047, "step": 3498 }, { "epoch": 0.20860650852306592, "grad_norm": 3.0474367141723633, "learning_rate": 9.777837480731729e-05, "loss": 1.4634, "step": 3500 }, { "epoch": 0.20872571224222194, "grad_norm": 3.2189273834228516, "learning_rate": 9.777558631045299e-05, "loss": 1.6277, "step": 3502 }, { "epoch": 0.208844915961378, "grad_norm": 2.919283390045166, "learning_rate": 9.777279610449252e-05, "loss": 1.5199, "step": 3504 }, { "epoch": 0.20896411968053402, "grad_norm": 3.3085596561431885, "learning_rate": 9.777000418953568e-05, "loss": 1.6183, "step": 3506 }, { "epoch": 0.20908332339969007, "grad_norm": 3.225026845932007, "learning_rate": 9.776721056568235e-05, "loss": 1.5643, "step": 3508 }, { "epoch": 0.2092025271188461, "grad_norm": 2.9119553565979004, "learning_rate": 9.776441523303247e-05, "loss": 1.4559, "step": 3510 }, { "epoch": 0.20932173083800215, "grad_norm": 3.3324127197265625, "learning_rate": 9.776161819168602e-05, "loss": 1.5494, "step": 3512 }, { "epoch": 0.20944093455715818, "grad_norm": 3.2116665840148926, "learning_rate": 9.775881944174308e-05, "loss": 1.6937, "step": 3514 }, { "epoch": 0.20956013827631423, "grad_norm": 3.5113766193389893, "learning_rate": 9.775601898330377e-05, "loss": 1.6033, "step": 3516 }, { "epoch": 0.20967934199547025, "grad_norm": 3.4405219554901123, "learning_rate": 9.775321681646825e-05, "loss": 1.7546, "step": 3518 }, { "epoch": 0.2097985457146263, "grad_norm": 3.150426149368286, "learning_rate": 9.775041294133678e-05, "loss": 1.4337, "step": 3520 }, { "epoch": 0.20991774943378233, "grad_norm": 3.447502613067627, "learning_rate": 9.774760735800967e-05, "loss": 1.5056, "step": 3522 }, { "epoch": 0.21003695315293838, "grad_norm": 3.5024564266204834, "learning_rate": 9.774480006658729e-05, "loss": 1.6248, "step": 3524 }, { "epoch": 0.2101561568720944, "grad_norm": 3.1689071655273438, "learning_rate": 9.774199106717003e-05, "loss": 1.6746, "step": 3526 }, { "epoch": 0.21027536059125046, "grad_norm": 3.285257339477539, "learning_rate": 9.773918035985842e-05, "loss": 1.6242, "step": 3528 }, { "epoch": 0.21039456431040648, "grad_norm": 3.1465461254119873, "learning_rate": 9.773636794475298e-05, "loss": 1.5771, "step": 3530 }, { "epoch": 0.2105137680295625, "grad_norm": 3.2690794467926025, "learning_rate": 9.773355382195434e-05, "loss": 1.6101, "step": 3532 }, { "epoch": 0.21063297174871856, "grad_norm": 3.4934611320495605, "learning_rate": 9.773073799156315e-05, "loss": 1.638, "step": 3534 }, { "epoch": 0.2107521754678746, "grad_norm": 3.3178317546844482, "learning_rate": 9.772792045368016e-05, "loss": 1.6473, "step": 3536 }, { "epoch": 0.21087137918703064, "grad_norm": 3.34496808052063, "learning_rate": 9.772510120840616e-05, "loss": 1.6079, "step": 3538 }, { "epoch": 0.21099058290618666, "grad_norm": 3.3387398719787598, "learning_rate": 9.7722280255842e-05, "loss": 1.5544, "step": 3540 }, { "epoch": 0.21110978662534272, "grad_norm": 3.1578032970428467, "learning_rate": 9.771945759608858e-05, "loss": 1.6771, "step": 3542 }, { "epoch": 0.21122899034449874, "grad_norm": 3.3931567668914795, "learning_rate": 9.77166332292469e-05, "loss": 1.5728, "step": 3544 }, { "epoch": 0.2113481940636548, "grad_norm": 3.374657154083252, "learning_rate": 9.771380715541799e-05, "loss": 1.5494, "step": 3546 }, { "epoch": 0.21146739778281082, "grad_norm": 3.804443597793579, "learning_rate": 9.771097937470294e-05, "loss": 1.5558, "step": 3548 }, { "epoch": 0.21158660150196687, "grad_norm": 3.4939968585968018, "learning_rate": 9.770814988720294e-05, "loss": 1.6977, "step": 3550 }, { "epoch": 0.2117058052211229, "grad_norm": 3.2856180667877197, "learning_rate": 9.770531869301916e-05, "loss": 1.6111, "step": 3552 }, { "epoch": 0.21182500894027895, "grad_norm": 3.2428741455078125, "learning_rate": 9.770248579225292e-05, "loss": 1.6039, "step": 3554 }, { "epoch": 0.21194421265943497, "grad_norm": 3.26495623588562, "learning_rate": 9.769965118500555e-05, "loss": 1.7184, "step": 3556 }, { "epoch": 0.21206341637859102, "grad_norm": 3.2656733989715576, "learning_rate": 9.769681487137844e-05, "loss": 1.6717, "step": 3558 }, { "epoch": 0.21218262009774705, "grad_norm": 3.0063652992248535, "learning_rate": 9.76939768514731e-05, "loss": 1.6955, "step": 3560 }, { "epoch": 0.21230182381690307, "grad_norm": 3.353954315185547, "learning_rate": 9.769113712539101e-05, "loss": 1.6199, "step": 3562 }, { "epoch": 0.21242102753605913, "grad_norm": 3.124513626098633, "learning_rate": 9.768829569323376e-05, "loss": 1.5278, "step": 3564 }, { "epoch": 0.21254023125521515, "grad_norm": 3.146937370300293, "learning_rate": 9.768545255510302e-05, "loss": 1.6134, "step": 3566 }, { "epoch": 0.2126594349743712, "grad_norm": 2.8780033588409424, "learning_rate": 9.76826077111005e-05, "loss": 1.5017, "step": 3568 }, { "epoch": 0.21277863869352723, "grad_norm": 3.016238212585449, "learning_rate": 9.767976116132793e-05, "loss": 1.6187, "step": 3570 }, { "epoch": 0.21289784241268328, "grad_norm": 3.3327579498291016, "learning_rate": 9.767691290588719e-05, "loss": 1.5401, "step": 3572 }, { "epoch": 0.2130170461318393, "grad_norm": 3.041365623474121, "learning_rate": 9.767406294488016e-05, "loss": 1.4552, "step": 3574 }, { "epoch": 0.21313624985099536, "grad_norm": 3.8965189456939697, "learning_rate": 9.767121127840874e-05, "loss": 1.7196, "step": 3576 }, { "epoch": 0.21325545357015138, "grad_norm": 3.072492837905884, "learning_rate": 9.766835790657502e-05, "loss": 1.5342, "step": 3578 }, { "epoch": 0.21337465728930743, "grad_norm": 2.932629108428955, "learning_rate": 9.766550282948104e-05, "loss": 1.499, "step": 3580 }, { "epoch": 0.21349386100846346, "grad_norm": 2.998199462890625, "learning_rate": 9.766264604722894e-05, "loss": 1.4208, "step": 3582 }, { "epoch": 0.2136130647276195, "grad_norm": 3.280808687210083, "learning_rate": 9.765978755992092e-05, "loss": 1.5928, "step": 3584 }, { "epoch": 0.21373226844677554, "grad_norm": 3.2383134365081787, "learning_rate": 9.765692736765922e-05, "loss": 1.582, "step": 3586 }, { "epoch": 0.2138514721659316, "grad_norm": 3.2840747833251953, "learning_rate": 9.765406547054616e-05, "loss": 1.6444, "step": 3588 }, { "epoch": 0.2139706758850876, "grad_norm": 9.229419708251953, "learning_rate": 9.765120186868415e-05, "loss": 1.5155, "step": 3590 }, { "epoch": 0.21408987960424367, "grad_norm": 3.1449525356292725, "learning_rate": 9.764833656217561e-05, "loss": 1.5106, "step": 3592 }, { "epoch": 0.2142090833233997, "grad_norm": 3.130314588546753, "learning_rate": 9.764546955112303e-05, "loss": 1.6485, "step": 3594 }, { "epoch": 0.21432828704255572, "grad_norm": 3.12973690032959, "learning_rate": 9.764260083562902e-05, "loss": 1.5935, "step": 3596 }, { "epoch": 0.21444749076171177, "grad_norm": 3.1843159198760986, "learning_rate": 9.763973041579614e-05, "loss": 1.6255, "step": 3598 }, { "epoch": 0.2145666944808678, "grad_norm": 3.4111697673797607, "learning_rate": 9.763685829172712e-05, "loss": 1.6901, "step": 3600 }, { "epoch": 0.21468589820002384, "grad_norm": 3.200512409210205, "learning_rate": 9.763398446352469e-05, "loss": 1.6684, "step": 3602 }, { "epoch": 0.21480510191917987, "grad_norm": 3.1620161533355713, "learning_rate": 9.763110893129165e-05, "loss": 1.6614, "step": 3604 }, { "epoch": 0.21492430563833592, "grad_norm": 3.0218350887298584, "learning_rate": 9.762823169513088e-05, "loss": 1.5392, "step": 3606 }, { "epoch": 0.21504350935749195, "grad_norm": 2.8369102478027344, "learning_rate": 9.76253527551453e-05, "loss": 1.4885, "step": 3608 }, { "epoch": 0.215162713076648, "grad_norm": 3.054290771484375, "learning_rate": 9.762247211143791e-05, "loss": 1.6106, "step": 3610 }, { "epoch": 0.21528191679580402, "grad_norm": 3.3943331241607666, "learning_rate": 9.761958976411175e-05, "loss": 1.5411, "step": 3612 }, { "epoch": 0.21540112051496008, "grad_norm": 3.4875998497009277, "learning_rate": 9.761670571326993e-05, "loss": 1.3941, "step": 3614 }, { "epoch": 0.2155203242341161, "grad_norm": 3.451662540435791, "learning_rate": 9.761381995901565e-05, "loss": 1.7784, "step": 3616 }, { "epoch": 0.21563952795327215, "grad_norm": 3.081329345703125, "learning_rate": 9.76109325014521e-05, "loss": 1.6695, "step": 3618 }, { "epoch": 0.21575873167242818, "grad_norm": 3.1397294998168945, "learning_rate": 9.760804334068261e-05, "loss": 1.5397, "step": 3620 }, { "epoch": 0.21587793539158423, "grad_norm": 3.122957706451416, "learning_rate": 9.76051524768105e-05, "loss": 1.5824, "step": 3622 }, { "epoch": 0.21599713911074025, "grad_norm": 3.0398714542388916, "learning_rate": 9.760225990993922e-05, "loss": 1.6103, "step": 3624 }, { "epoch": 0.21611634282989628, "grad_norm": 2.9870617389678955, "learning_rate": 9.759936564017223e-05, "loss": 1.5975, "step": 3626 }, { "epoch": 0.21623554654905233, "grad_norm": 3.3082761764526367, "learning_rate": 9.759646966761307e-05, "loss": 1.6538, "step": 3628 }, { "epoch": 0.21635475026820836, "grad_norm": 3.1021924018859863, "learning_rate": 9.759357199236535e-05, "loss": 1.6908, "step": 3630 }, { "epoch": 0.2164739539873644, "grad_norm": 3.278897285461426, "learning_rate": 9.759067261453272e-05, "loss": 1.5489, "step": 3632 }, { "epoch": 0.21659315770652043, "grad_norm": 3.1781458854675293, "learning_rate": 9.758777153421889e-05, "loss": 1.4807, "step": 3634 }, { "epoch": 0.2167123614256765, "grad_norm": 3.2059848308563232, "learning_rate": 9.758486875152766e-05, "loss": 1.6313, "step": 3636 }, { "epoch": 0.2168315651448325, "grad_norm": 3.1820969581604004, "learning_rate": 9.758196426656286e-05, "loss": 1.3536, "step": 3638 }, { "epoch": 0.21695076886398856, "grad_norm": 3.7392659187316895, "learning_rate": 9.75790580794284e-05, "loss": 1.601, "step": 3640 }, { "epoch": 0.2170699725831446, "grad_norm": 3.361302137374878, "learning_rate": 9.757615019022824e-05, "loss": 1.7062, "step": 3642 }, { "epoch": 0.21718917630230064, "grad_norm": 3.4333200454711914, "learning_rate": 9.757324059906642e-05, "loss": 1.5895, "step": 3644 }, { "epoch": 0.21730838002145667, "grad_norm": 3.209749460220337, "learning_rate": 9.757032930604699e-05, "loss": 1.6087, "step": 3646 }, { "epoch": 0.21742758374061272, "grad_norm": 3.152958393096924, "learning_rate": 9.756741631127415e-05, "loss": 1.5024, "step": 3648 }, { "epoch": 0.21754678745976874, "grad_norm": 2.899167776107788, "learning_rate": 9.756450161485206e-05, "loss": 1.3834, "step": 3650 }, { "epoch": 0.2176659911789248, "grad_norm": 3.1885015964508057, "learning_rate": 9.756158521688501e-05, "loss": 1.7777, "step": 3652 }, { "epoch": 0.21778519489808082, "grad_norm": 3.2710111141204834, "learning_rate": 9.755866711747733e-05, "loss": 1.6841, "step": 3654 }, { "epoch": 0.21790439861723684, "grad_norm": 3.389787197113037, "learning_rate": 9.755574731673341e-05, "loss": 1.6568, "step": 3656 }, { "epoch": 0.2180236023363929, "grad_norm": 3.2813308238983154, "learning_rate": 9.755282581475769e-05, "loss": 1.5714, "step": 3658 }, { "epoch": 0.21814280605554892, "grad_norm": 3.148862838745117, "learning_rate": 9.754990261165469e-05, "loss": 1.5549, "step": 3660 }, { "epoch": 0.21826200977470497, "grad_norm": 3.1032071113586426, "learning_rate": 9.754697770752898e-05, "loss": 1.8596, "step": 3662 }, { "epoch": 0.218381213493861, "grad_norm": 2.8363449573516846, "learning_rate": 9.754405110248521e-05, "loss": 1.4343, "step": 3664 }, { "epoch": 0.21850041721301705, "grad_norm": 3.446896553039551, "learning_rate": 9.754112279662804e-05, "loss": 1.4313, "step": 3666 }, { "epoch": 0.21861962093217308, "grad_norm": 3.1653711795806885, "learning_rate": 9.753819279006226e-05, "loss": 1.5734, "step": 3668 }, { "epoch": 0.21873882465132913, "grad_norm": 3.199435234069824, "learning_rate": 9.753526108289268e-05, "loss": 1.561, "step": 3670 }, { "epoch": 0.21885802837048515, "grad_norm": 2.832686424255371, "learning_rate": 9.753232767522418e-05, "loss": 1.4657, "step": 3672 }, { "epoch": 0.2189772320896412, "grad_norm": 3.3176732063293457, "learning_rate": 9.752939256716167e-05, "loss": 1.5275, "step": 3674 }, { "epoch": 0.21909643580879723, "grad_norm": 3.1064858436584473, "learning_rate": 9.752645575881018e-05, "loss": 1.6884, "step": 3676 }, { "epoch": 0.21921563952795328, "grad_norm": 3.200119733810425, "learning_rate": 9.752351725027475e-05, "loss": 1.6182, "step": 3678 }, { "epoch": 0.2193348432471093, "grad_norm": 3.1647019386291504, "learning_rate": 9.752057704166051e-05, "loss": 1.7067, "step": 3680 }, { "epoch": 0.21945404696626536, "grad_norm": 3.274932861328125, "learning_rate": 9.751763513307264e-05, "loss": 1.544, "step": 3682 }, { "epoch": 0.21957325068542138, "grad_norm": 3.3199734687805176, "learning_rate": 9.751469152461638e-05, "loss": 1.5484, "step": 3684 }, { "epoch": 0.21969245440457744, "grad_norm": 3.1314947605133057, "learning_rate": 9.751174621639703e-05, "loss": 1.6331, "step": 3686 }, { "epoch": 0.21981165812373346, "grad_norm": 3.1165547370910645, "learning_rate": 9.750879920851996e-05, "loss": 1.6357, "step": 3688 }, { "epoch": 0.21993086184288949, "grad_norm": 3.3507862091064453, "learning_rate": 9.75058505010906e-05, "loss": 1.5215, "step": 3690 }, { "epoch": 0.22005006556204554, "grad_norm": 3.1464388370513916, "learning_rate": 9.750290009421442e-05, "loss": 1.618, "step": 3692 }, { "epoch": 0.22016926928120156, "grad_norm": 3.482578754425049, "learning_rate": 9.749994798799698e-05, "loss": 1.7289, "step": 3694 }, { "epoch": 0.22028847300035762, "grad_norm": 3.2914485931396484, "learning_rate": 9.749699418254388e-05, "loss": 1.5459, "step": 3696 }, { "epoch": 0.22040767671951364, "grad_norm": 3.3558993339538574, "learning_rate": 9.749403867796079e-05, "loss": 1.5194, "step": 3698 }, { "epoch": 0.2205268804386697, "grad_norm": 3.100008964538574, "learning_rate": 9.749108147435343e-05, "loss": 1.7473, "step": 3700 }, { "epoch": 0.22064608415782572, "grad_norm": 3.25327467918396, "learning_rate": 9.74881225718276e-05, "loss": 1.6013, "step": 3702 }, { "epoch": 0.22076528787698177, "grad_norm": 2.9329421520233154, "learning_rate": 9.748516197048915e-05, "loss": 1.4206, "step": 3704 }, { "epoch": 0.2208844915961378, "grad_norm": 3.1764466762542725, "learning_rate": 9.748219967044398e-05, "loss": 1.6094, "step": 3706 }, { "epoch": 0.22100369531529385, "grad_norm": 3.3824105262756348, "learning_rate": 9.747923567179808e-05, "loss": 1.6191, "step": 3708 }, { "epoch": 0.22112289903444987, "grad_norm": 3.3722424507141113, "learning_rate": 9.747626997465746e-05, "loss": 1.5389, "step": 3710 }, { "epoch": 0.22124210275360592, "grad_norm": 3.342087745666504, "learning_rate": 9.747330257912824e-05, "loss": 1.6496, "step": 3712 }, { "epoch": 0.22136130647276195, "grad_norm": 3.419100761413574, "learning_rate": 9.747033348531655e-05, "loss": 1.6778, "step": 3714 }, { "epoch": 0.221480510191918, "grad_norm": 3.241788864135742, "learning_rate": 9.746736269332861e-05, "loss": 1.6422, "step": 3716 }, { "epoch": 0.22159971391107403, "grad_norm": 3.4933981895446777, "learning_rate": 9.746439020327071e-05, "loss": 1.6677, "step": 3718 }, { "epoch": 0.22171891763023005, "grad_norm": 2.922450304031372, "learning_rate": 9.746141601524917e-05, "loss": 1.6817, "step": 3720 }, { "epoch": 0.2218381213493861, "grad_norm": 3.4315552711486816, "learning_rate": 9.745844012937038e-05, "loss": 1.5641, "step": 3722 }, { "epoch": 0.22195732506854213, "grad_norm": 3.198305368423462, "learning_rate": 9.745546254574081e-05, "loss": 1.6377, "step": 3724 }, { "epoch": 0.22207652878769818, "grad_norm": 3.11789608001709, "learning_rate": 9.745248326446699e-05, "loss": 1.6738, "step": 3726 }, { "epoch": 0.2221957325068542, "grad_norm": 3.108720302581787, "learning_rate": 9.744950228565549e-05, "loss": 1.4085, "step": 3728 }, { "epoch": 0.22231493622601026, "grad_norm": 3.4521677494049072, "learning_rate": 9.744651960941294e-05, "loss": 1.6342, "step": 3730 }, { "epoch": 0.22243413994516628, "grad_norm": 3.2862446308135986, "learning_rate": 9.744353523584604e-05, "loss": 1.6147, "step": 3732 }, { "epoch": 0.22255334366432233, "grad_norm": 3.035871744155884, "learning_rate": 9.744054916506156e-05, "loss": 1.5, "step": 3734 }, { "epoch": 0.22267254738347836, "grad_norm": 3.2548859119415283, "learning_rate": 9.743756139716633e-05, "loss": 1.5005, "step": 3736 }, { "epoch": 0.2227917511026344, "grad_norm": 3.3881068229675293, "learning_rate": 9.743457193226721e-05, "loss": 1.5168, "step": 3738 }, { "epoch": 0.22291095482179044, "grad_norm": 3.3839638233184814, "learning_rate": 9.743158077047117e-05, "loss": 1.6627, "step": 3740 }, { "epoch": 0.2230301585409465, "grad_norm": 3.2248668670654297, "learning_rate": 9.742858791188518e-05, "loss": 1.682, "step": 3742 }, { "epoch": 0.2231493622601025, "grad_norm": 3.1282126903533936, "learning_rate": 9.742559335661635e-05, "loss": 1.5745, "step": 3744 }, { "epoch": 0.22326856597925857, "grad_norm": 3.5690884590148926, "learning_rate": 9.742259710477177e-05, "loss": 1.5937, "step": 3746 }, { "epoch": 0.2233877696984146, "grad_norm": 3.0919408798217773, "learning_rate": 9.741959915645864e-05, "loss": 1.5827, "step": 3748 }, { "epoch": 0.22350697341757061, "grad_norm": 3.080469846725464, "learning_rate": 9.741659951178419e-05, "loss": 1.5332, "step": 3750 }, { "epoch": 0.22362617713672667, "grad_norm": 3.2658207416534424, "learning_rate": 9.741359817085575e-05, "loss": 1.5397, "step": 3752 }, { "epoch": 0.2237453808558827, "grad_norm": 3.1440205574035645, "learning_rate": 9.741059513378068e-05, "loss": 1.6174, "step": 3754 }, { "epoch": 0.22386458457503874, "grad_norm": 3.372804880142212, "learning_rate": 9.740759040066641e-05, "loss": 1.7548, "step": 3756 }, { "epoch": 0.22398378829419477, "grad_norm": 3.0693857669830322, "learning_rate": 9.740458397162043e-05, "loss": 1.6067, "step": 3758 }, { "epoch": 0.22410299201335082, "grad_norm": 4.772977352142334, "learning_rate": 9.740157584675029e-05, "loss": 1.4722, "step": 3760 }, { "epoch": 0.22422219573250685, "grad_norm": 3.097811460494995, "learning_rate": 9.73985660261636e-05, "loss": 1.5938, "step": 3762 }, { "epoch": 0.2243413994516629, "grad_norm": 3.420819044113159, "learning_rate": 9.739555450996802e-05, "loss": 1.6408, "step": 3764 }, { "epoch": 0.22446060317081892, "grad_norm": 3.3035826683044434, "learning_rate": 9.73925412982713e-05, "loss": 1.532, "step": 3766 }, { "epoch": 0.22457980688997498, "grad_norm": 3.482778310775757, "learning_rate": 9.738952639118121e-05, "loss": 1.5832, "step": 3768 }, { "epoch": 0.224699010609131, "grad_norm": 3.3296761512756348, "learning_rate": 9.738650978880564e-05, "loss": 1.6709, "step": 3770 }, { "epoch": 0.22481821432828705, "grad_norm": 3.3979008197784424, "learning_rate": 9.738349149125248e-05, "loss": 1.622, "step": 3772 }, { "epoch": 0.22493741804744308, "grad_norm": 3.0029447078704834, "learning_rate": 9.738047149862971e-05, "loss": 1.6352, "step": 3774 }, { "epoch": 0.22505662176659913, "grad_norm": 3.140397310256958, "learning_rate": 9.737744981104536e-05, "loss": 1.5459, "step": 3776 }, { "epoch": 0.22517582548575515, "grad_norm": 3.0679543018341064, "learning_rate": 9.737442642860753e-05, "loss": 1.4753, "step": 3778 }, { "epoch": 0.2252950292049112, "grad_norm": 3.4575185775756836, "learning_rate": 9.737140135142437e-05, "loss": 1.6391, "step": 3780 }, { "epoch": 0.22541423292406723, "grad_norm": 3.3181865215301514, "learning_rate": 9.736837457960413e-05, "loss": 1.7425, "step": 3782 }, { "epoch": 0.22553343664322326, "grad_norm": 3.1415789127349854, "learning_rate": 9.736534611325503e-05, "loss": 1.7034, "step": 3784 }, { "epoch": 0.2256526403623793, "grad_norm": 3.255277633666992, "learning_rate": 9.736231595248545e-05, "loss": 1.4812, "step": 3786 }, { "epoch": 0.22577184408153533, "grad_norm": 3.0430667400360107, "learning_rate": 9.73592840974038e-05, "loss": 1.4603, "step": 3788 }, { "epoch": 0.22589104780069139, "grad_norm": 2.9213318824768066, "learning_rate": 9.73562505481185e-05, "loss": 1.4779, "step": 3790 }, { "epoch": 0.2260102515198474, "grad_norm": 3.175231456756592, "learning_rate": 9.735321530473808e-05, "loss": 1.643, "step": 3792 }, { "epoch": 0.22612945523900346, "grad_norm": 3.2848308086395264, "learning_rate": 9.735017836737116e-05, "loss": 1.5734, "step": 3794 }, { "epoch": 0.2262486589581595, "grad_norm": 3.4932830333709717, "learning_rate": 9.734713973612633e-05, "loss": 1.5558, "step": 3796 }, { "epoch": 0.22636786267731554, "grad_norm": 2.952070474624634, "learning_rate": 9.734409941111234e-05, "loss": 1.4784, "step": 3798 }, { "epoch": 0.22648706639647156, "grad_norm": 3.212423324584961, "learning_rate": 9.73410573924379e-05, "loss": 1.6484, "step": 3800 }, { "epoch": 0.22660627011562762, "grad_norm": 3.116879463195801, "learning_rate": 9.733801368021187e-05, "loss": 1.5636, "step": 3802 }, { "epoch": 0.22672547383478364, "grad_norm": 3.151348352432251, "learning_rate": 9.733496827454313e-05, "loss": 1.6563, "step": 3804 }, { "epoch": 0.2268446775539397, "grad_norm": 3.3869564533233643, "learning_rate": 9.733192117554063e-05, "loss": 1.4943, "step": 3806 }, { "epoch": 0.22696388127309572, "grad_norm": 3.4352118968963623, "learning_rate": 9.732887238331333e-05, "loss": 1.6321, "step": 3808 }, { "epoch": 0.22708308499225177, "grad_norm": 3.4648802280426025, "learning_rate": 9.732582189797037e-05, "loss": 1.6739, "step": 3810 }, { "epoch": 0.2272022887114078, "grad_norm": 3.283060312271118, "learning_rate": 9.732276971962082e-05, "loss": 1.3326, "step": 3812 }, { "epoch": 0.22732149243056382, "grad_norm": 3.2837400436401367, "learning_rate": 9.73197158483739e-05, "loss": 1.5538, "step": 3814 }, { "epoch": 0.22744069614971987, "grad_norm": 3.355170965194702, "learning_rate": 9.731666028433882e-05, "loss": 1.6369, "step": 3816 }, { "epoch": 0.2275598998688759, "grad_norm": 3.2628891468048096, "learning_rate": 9.731360302762491e-05, "loss": 1.5782, "step": 3818 }, { "epoch": 0.22767910358803195, "grad_norm": 3.7422399520874023, "learning_rate": 9.731054407834155e-05, "loss": 1.7175, "step": 3820 }, { "epoch": 0.22779830730718797, "grad_norm": 3.3137588500976562, "learning_rate": 9.730748343659815e-05, "loss": 1.4625, "step": 3822 }, { "epoch": 0.22791751102634403, "grad_norm": 3.1076111793518066, "learning_rate": 9.73044211025042e-05, "loss": 1.5183, "step": 3824 }, { "epoch": 0.22803671474550005, "grad_norm": 3.371227502822876, "learning_rate": 9.730135707616927e-05, "loss": 1.3768, "step": 3826 }, { "epoch": 0.2281559184646561, "grad_norm": 3.2682037353515625, "learning_rate": 9.729829135770295e-05, "loss": 1.6206, "step": 3828 }, { "epoch": 0.22827512218381213, "grad_norm": 3.339808702468872, "learning_rate": 9.729522394721493e-05, "loss": 1.4942, "step": 3830 }, { "epoch": 0.22839432590296818, "grad_norm": 2.9476993083953857, "learning_rate": 9.729215484481491e-05, "loss": 1.5669, "step": 3832 }, { "epoch": 0.2285135296221242, "grad_norm": 2.9289133548736572, "learning_rate": 9.728908405061273e-05, "loss": 1.5208, "step": 3834 }, { "epoch": 0.22863273334128026, "grad_norm": 3.497271776199341, "learning_rate": 9.72860115647182e-05, "loss": 1.551, "step": 3836 }, { "epoch": 0.22875193706043628, "grad_norm": 3.114896774291992, "learning_rate": 9.728293738724124e-05, "loss": 1.4375, "step": 3838 }, { "epoch": 0.22887114077959234, "grad_norm": 3.315446138381958, "learning_rate": 9.727986151829185e-05, "loss": 1.4918, "step": 3840 }, { "epoch": 0.22899034449874836, "grad_norm": 3.345768928527832, "learning_rate": 9.727678395798003e-05, "loss": 1.6792, "step": 3842 }, { "epoch": 0.22910954821790439, "grad_norm": 3.2211930751800537, "learning_rate": 9.72737047064159e-05, "loss": 1.7221, "step": 3844 }, { "epoch": 0.22922875193706044, "grad_norm": 3.0168869495391846, "learning_rate": 9.727062376370961e-05, "loss": 1.6442, "step": 3846 }, { "epoch": 0.22934795565621646, "grad_norm": 3.071113348007202, "learning_rate": 9.726754112997137e-05, "loss": 1.5842, "step": 3848 }, { "epoch": 0.22946715937537251, "grad_norm": 3.055790662765503, "learning_rate": 9.726445680531147e-05, "loss": 1.613, "step": 3850 }, { "epoch": 0.22958636309452854, "grad_norm": 3.0160834789276123, "learning_rate": 9.726137078984023e-05, "loss": 1.5087, "step": 3852 }, { "epoch": 0.2297055668136846, "grad_norm": 3.1848976612091064, "learning_rate": 9.725828308366805e-05, "loss": 1.5565, "step": 3854 }, { "epoch": 0.22982477053284062, "grad_norm": 3.0971932411193848, "learning_rate": 9.725519368690538e-05, "loss": 1.5874, "step": 3856 }, { "epoch": 0.22994397425199667, "grad_norm": 3.2896289825439453, "learning_rate": 9.725210259966277e-05, "loss": 1.4234, "step": 3858 }, { "epoch": 0.2300631779711527, "grad_norm": 3.694059133529663, "learning_rate": 9.724900982205077e-05, "loss": 1.6574, "step": 3860 }, { "epoch": 0.23018238169030875, "grad_norm": 3.55314564704895, "learning_rate": 9.724591535418003e-05, "loss": 1.5468, "step": 3862 }, { "epoch": 0.23030158540946477, "grad_norm": 3.181727886199951, "learning_rate": 9.724281919616125e-05, "loss": 1.6629, "step": 3864 }, { "epoch": 0.23042078912862082, "grad_norm": 3.26986026763916, "learning_rate": 9.723972134810519e-05, "loss": 1.6835, "step": 3866 }, { "epoch": 0.23053999284777685, "grad_norm": 3.1929726600646973, "learning_rate": 9.723662181012265e-05, "loss": 1.5389, "step": 3868 }, { "epoch": 0.2306591965669329, "grad_norm": 3.1998653411865234, "learning_rate": 9.723352058232456e-05, "loss": 1.587, "step": 3870 }, { "epoch": 0.23077840028608892, "grad_norm": 3.1637837886810303, "learning_rate": 9.72304176648218e-05, "loss": 1.5607, "step": 3872 }, { "epoch": 0.23089760400524498, "grad_norm": 3.077854633331299, "learning_rate": 9.722731305772542e-05, "loss": 1.5623, "step": 3874 }, { "epoch": 0.231016807724401, "grad_norm": 3.623720645904541, "learning_rate": 9.722420676114647e-05, "loss": 1.7893, "step": 3876 }, { "epoch": 0.23113601144355703, "grad_norm": 3.3467187881469727, "learning_rate": 9.722109877519605e-05, "loss": 1.6066, "step": 3878 }, { "epoch": 0.23125521516271308, "grad_norm": 3.504910469055176, "learning_rate": 9.721798909998539e-05, "loss": 1.664, "step": 3880 }, { "epoch": 0.2313744188818691, "grad_norm": 3.302095651626587, "learning_rate": 9.721487773562567e-05, "loss": 1.5206, "step": 3882 }, { "epoch": 0.23149362260102516, "grad_norm": 3.410524845123291, "learning_rate": 9.721176468222825e-05, "loss": 1.5304, "step": 3884 }, { "epoch": 0.23161282632018118, "grad_norm": 2.8689091205596924, "learning_rate": 9.720864993990448e-05, "loss": 1.6816, "step": 3886 }, { "epoch": 0.23173203003933723, "grad_norm": 3.320760726928711, "learning_rate": 9.720553350876577e-05, "loss": 1.7852, "step": 3888 }, { "epoch": 0.23185123375849326, "grad_norm": 3.161815881729126, "learning_rate": 9.720241538892361e-05, "loss": 1.6339, "step": 3890 }, { "epoch": 0.2319704374776493, "grad_norm": 3.2224769592285156, "learning_rate": 9.719929558048954e-05, "loss": 1.5877, "step": 3892 }, { "epoch": 0.23208964119680534, "grad_norm": 3.28979229927063, "learning_rate": 9.71961740835752e-05, "loss": 1.5581, "step": 3894 }, { "epoch": 0.2322088449159614, "grad_norm": 3.338663339614868, "learning_rate": 9.719305089829223e-05, "loss": 1.5702, "step": 3896 }, { "epoch": 0.2323280486351174, "grad_norm": 3.242757558822632, "learning_rate": 9.718992602475236e-05, "loss": 1.4102, "step": 3898 }, { "epoch": 0.23244725235427346, "grad_norm": 3.0437026023864746, "learning_rate": 9.718679946306736e-05, "loss": 1.6874, "step": 3900 }, { "epoch": 0.2325664560734295, "grad_norm": 3.677067995071411, "learning_rate": 9.718367121334911e-05, "loss": 1.5488, "step": 3902 }, { "epoch": 0.23268565979258554, "grad_norm": 3.5956461429595947, "learning_rate": 9.71805412757095e-05, "loss": 1.6884, "step": 3904 }, { "epoch": 0.23280486351174157, "grad_norm": 3.330266237258911, "learning_rate": 9.71774096502605e-05, "loss": 1.5663, "step": 3906 }, { "epoch": 0.2329240672308976, "grad_norm": 3.005612373352051, "learning_rate": 9.717427633711416e-05, "loss": 1.55, "step": 3908 }, { "epoch": 0.23304327095005364, "grad_norm": 3.3087306022644043, "learning_rate": 9.717114133638252e-05, "loss": 1.5485, "step": 3910 }, { "epoch": 0.23316247466920967, "grad_norm": 3.1162362098693848, "learning_rate": 9.716800464817777e-05, "loss": 1.6327, "step": 3912 }, { "epoch": 0.23328167838836572, "grad_norm": 3.4408113956451416, "learning_rate": 9.716486627261212e-05, "loss": 1.7399, "step": 3914 }, { "epoch": 0.23340088210752175, "grad_norm": 3.1582367420196533, "learning_rate": 9.716172620979782e-05, "loss": 1.639, "step": 3916 }, { "epoch": 0.2335200858266778, "grad_norm": 3.4896976947784424, "learning_rate": 9.715858445984723e-05, "loss": 1.6713, "step": 3918 }, { "epoch": 0.23363928954583382, "grad_norm": 3.5474860668182373, "learning_rate": 9.715544102287271e-05, "loss": 1.5142, "step": 3920 }, { "epoch": 0.23375849326498988, "grad_norm": 3.519123077392578, "learning_rate": 9.715229589898672e-05, "loss": 1.6589, "step": 3922 }, { "epoch": 0.2338776969841459, "grad_norm": 3.2995355129241943, "learning_rate": 9.714914908830178e-05, "loss": 1.4963, "step": 3924 }, { "epoch": 0.23399690070330195, "grad_norm": 3.223140001296997, "learning_rate": 9.714600059093046e-05, "loss": 1.5412, "step": 3926 }, { "epoch": 0.23411610442245798, "grad_norm": 2.9476914405822754, "learning_rate": 9.714285040698539e-05, "loss": 1.4595, "step": 3928 }, { "epoch": 0.23423530814161403, "grad_norm": 3.289832830429077, "learning_rate": 9.713969853657925e-05, "loss": 1.6392, "step": 3930 }, { "epoch": 0.23435451186077005, "grad_norm": 3.560026168823242, "learning_rate": 9.713654497982482e-05, "loss": 1.6164, "step": 3932 }, { "epoch": 0.2344737155799261, "grad_norm": 3.723384380340576, "learning_rate": 9.71333897368349e-05, "loss": 1.5318, "step": 3934 }, { "epoch": 0.23459291929908213, "grad_norm": 3.3435721397399902, "learning_rate": 9.713023280772236e-05, "loss": 1.5632, "step": 3936 }, { "epoch": 0.23471212301823816, "grad_norm": 3.332396984100342, "learning_rate": 9.712707419260014e-05, "loss": 1.5068, "step": 3938 }, { "epoch": 0.2348313267373942, "grad_norm": 3.491363286972046, "learning_rate": 9.712391389158122e-05, "loss": 1.6496, "step": 3940 }, { "epoch": 0.23495053045655023, "grad_norm": 3.2280595302581787, "learning_rate": 9.712075190477869e-05, "loss": 1.4827, "step": 3942 }, { "epoch": 0.23506973417570629, "grad_norm": 3.1464462280273438, "learning_rate": 9.711758823230564e-05, "loss": 1.5824, "step": 3944 }, { "epoch": 0.2351889378948623, "grad_norm": 2.968881368637085, "learning_rate": 9.711442287427523e-05, "loss": 1.6576, "step": 3946 }, { "epoch": 0.23530814161401836, "grad_norm": 2.999349594116211, "learning_rate": 9.711125583080072e-05, "loss": 1.5258, "step": 3948 }, { "epoch": 0.2354273453331744, "grad_norm": 3.0488622188568115, "learning_rate": 9.710808710199541e-05, "loss": 1.6313, "step": 3950 }, { "epoch": 0.23554654905233044, "grad_norm": 3.4879074096679688, "learning_rate": 9.710491668797264e-05, "loss": 1.6595, "step": 3952 }, { "epoch": 0.23566575277148646, "grad_norm": 3.241469621658325, "learning_rate": 9.710174458884583e-05, "loss": 1.5448, "step": 3954 }, { "epoch": 0.23578495649064252, "grad_norm": 3.5449090003967285, "learning_rate": 9.709857080472846e-05, "loss": 1.5812, "step": 3956 }, { "epoch": 0.23590416020979854, "grad_norm": 3.558616876602173, "learning_rate": 9.709539533573406e-05, "loss": 1.5823, "step": 3958 }, { "epoch": 0.2360233639289546, "grad_norm": 3.549826145172119, "learning_rate": 9.709221818197624e-05, "loss": 1.5487, "step": 3960 }, { "epoch": 0.23614256764811062, "grad_norm": 3.2009549140930176, "learning_rate": 9.708903934356865e-05, "loss": 1.6469, "step": 3962 }, { "epoch": 0.23626177136726667, "grad_norm": 3.0523436069488525, "learning_rate": 9.708585882062502e-05, "loss": 1.4169, "step": 3964 }, { "epoch": 0.2363809750864227, "grad_norm": 3.366708517074585, "learning_rate": 9.708267661325909e-05, "loss": 1.6609, "step": 3966 }, { "epoch": 0.23650017880557872, "grad_norm": 3.356398105621338, "learning_rate": 9.707949272158475e-05, "loss": 1.6133, "step": 3968 }, { "epoch": 0.23661938252473477, "grad_norm": 3.3415486812591553, "learning_rate": 9.707630714571587e-05, "loss": 1.6899, "step": 3970 }, { "epoch": 0.2367385862438908, "grad_norm": 3.134153366088867, "learning_rate": 9.707311988576642e-05, "loss": 1.6563, "step": 3972 }, { "epoch": 0.23685778996304685, "grad_norm": 3.253305435180664, "learning_rate": 9.70699309418504e-05, "loss": 1.5643, "step": 3974 }, { "epoch": 0.23697699368220287, "grad_norm": 3.206498622894287, "learning_rate": 9.706674031408191e-05, "loss": 1.6222, "step": 3976 }, { "epoch": 0.23709619740135893, "grad_norm": 3.1538658142089844, "learning_rate": 9.706354800257507e-05, "loss": 1.6089, "step": 3978 }, { "epoch": 0.23721540112051495, "grad_norm": 3.2569446563720703, "learning_rate": 9.706035400744411e-05, "loss": 1.6319, "step": 3980 }, { "epoch": 0.237334604839671, "grad_norm": 3.3876919746398926, "learning_rate": 9.705715832880326e-05, "loss": 1.45, "step": 3982 }, { "epoch": 0.23745380855882703, "grad_norm": 2.981034278869629, "learning_rate": 9.705396096676685e-05, "loss": 1.4613, "step": 3984 }, { "epoch": 0.23757301227798308, "grad_norm": 3.230318784713745, "learning_rate": 9.705076192144926e-05, "loss": 1.5211, "step": 3986 }, { "epoch": 0.2376922159971391, "grad_norm": 3.479602813720703, "learning_rate": 9.704756119296495e-05, "loss": 1.668, "step": 3988 }, { "epoch": 0.23781141971629516, "grad_norm": 3.240710496902466, "learning_rate": 9.70443587814284e-05, "loss": 1.5457, "step": 3990 }, { "epoch": 0.23793062343545118, "grad_norm": 3.573698043823242, "learning_rate": 9.704115468695416e-05, "loss": 1.5389, "step": 3992 }, { "epoch": 0.23804982715460724, "grad_norm": 2.968106508255005, "learning_rate": 9.703794890965689e-05, "loss": 1.4812, "step": 3994 }, { "epoch": 0.23816903087376326, "grad_norm": 3.442652463912964, "learning_rate": 9.703474144965123e-05, "loss": 1.5653, "step": 3996 }, { "epoch": 0.2382882345929193, "grad_norm": 3.0942766666412354, "learning_rate": 9.703153230705195e-05, "loss": 1.5925, "step": 3998 }, { "epoch": 0.23840743831207534, "grad_norm": 3.40303897857666, "learning_rate": 9.702832148197385e-05, "loss": 1.5674, "step": 4000 }, { "epoch": 0.23852664203123136, "grad_norm": 3.3170621395111084, "learning_rate": 9.702510897453176e-05, "loss": 1.4872, "step": 4002 }, { "epoch": 0.23864584575038741, "grad_norm": 3.1437604427337646, "learning_rate": 9.702189478484065e-05, "loss": 1.5449, "step": 4004 }, { "epoch": 0.23876504946954344, "grad_norm": 3.20884370803833, "learning_rate": 9.701867891301549e-05, "loss": 1.5216, "step": 4006 }, { "epoch": 0.2388842531886995, "grad_norm": 3.298253297805786, "learning_rate": 9.701546135917128e-05, "loss": 1.6437, "step": 4008 }, { "epoch": 0.23900345690785552, "grad_norm": 3.2154226303100586, "learning_rate": 9.701224212342317e-05, "loss": 1.5024, "step": 4010 }, { "epoch": 0.23912266062701157, "grad_norm": 3.0712430477142334, "learning_rate": 9.700902120588631e-05, "loss": 1.5795, "step": 4012 }, { "epoch": 0.2392418643461676, "grad_norm": 3.1499202251434326, "learning_rate": 9.700579860667593e-05, "loss": 1.5474, "step": 4014 }, { "epoch": 0.23936106806532365, "grad_norm": 3.101085662841797, "learning_rate": 9.70025743259073e-05, "loss": 1.7097, "step": 4016 }, { "epoch": 0.23948027178447967, "grad_norm": 3.167114019393921, "learning_rate": 9.699934836369576e-05, "loss": 1.5226, "step": 4018 }, { "epoch": 0.23959947550363572, "grad_norm": 3.3208348751068115, "learning_rate": 9.699612072015674e-05, "loss": 1.5317, "step": 4020 }, { "epoch": 0.23971867922279175, "grad_norm": 3.1107053756713867, "learning_rate": 9.699289139540566e-05, "loss": 1.5138, "step": 4022 }, { "epoch": 0.2398378829419478, "grad_norm": 3.5704150199890137, "learning_rate": 9.698966038955809e-05, "loss": 1.7381, "step": 4024 }, { "epoch": 0.23995708666110382, "grad_norm": 3.5849499702453613, "learning_rate": 9.698642770272959e-05, "loss": 1.6263, "step": 4026 }, { "epoch": 0.24007629038025988, "grad_norm": 2.9011552333831787, "learning_rate": 9.698319333503581e-05, "loss": 1.4887, "step": 4028 }, { "epoch": 0.2401954940994159, "grad_norm": 3.1148698329925537, "learning_rate": 9.697995728659245e-05, "loss": 1.4675, "step": 4030 }, { "epoch": 0.24031469781857193, "grad_norm": 3.226850748062134, "learning_rate": 9.697671955751528e-05, "loss": 1.5673, "step": 4032 }, { "epoch": 0.24043390153772798, "grad_norm": 3.1542439460754395, "learning_rate": 9.697348014792012e-05, "loss": 1.6028, "step": 4034 }, { "epoch": 0.240553105256884, "grad_norm": 3.1755709648132324, "learning_rate": 9.697023905792286e-05, "loss": 1.5398, "step": 4036 }, { "epoch": 0.24067230897604006, "grad_norm": 3.691760301589966, "learning_rate": 9.696699628763943e-05, "loss": 1.7387, "step": 4038 }, { "epoch": 0.24079151269519608, "grad_norm": 3.5333878993988037, "learning_rate": 9.696375183718586e-05, "loss": 1.5666, "step": 4040 }, { "epoch": 0.24091071641435213, "grad_norm": 2.8770899772644043, "learning_rate": 9.69605057066782e-05, "loss": 1.3595, "step": 4042 }, { "epoch": 0.24102992013350816, "grad_norm": 3.15079402923584, "learning_rate": 9.695725789623258e-05, "loss": 1.5513, "step": 4044 }, { "epoch": 0.2411491238526642, "grad_norm": 3.060497999191284, "learning_rate": 9.695400840596518e-05, "loss": 1.5954, "step": 4046 }, { "epoch": 0.24126832757182023, "grad_norm": 3.0307822227478027, "learning_rate": 9.695075723599225e-05, "loss": 1.5521, "step": 4048 }, { "epoch": 0.2413875312909763, "grad_norm": 3.246253728866577, "learning_rate": 9.694750438643009e-05, "loss": 1.4723, "step": 4050 }, { "epoch": 0.2415067350101323, "grad_norm": 3.3459818363189697, "learning_rate": 9.694424985739507e-05, "loss": 1.631, "step": 4052 }, { "epoch": 0.24162593872928836, "grad_norm": 3.41745662689209, "learning_rate": 9.69409936490036e-05, "loss": 1.4616, "step": 4054 }, { "epoch": 0.2417451424484444, "grad_norm": 3.0982141494750977, "learning_rate": 9.69377357613722e-05, "loss": 1.5902, "step": 4056 }, { "epoch": 0.24186434616760044, "grad_norm": 3.3951172828674316, "learning_rate": 9.693447619461739e-05, "loss": 1.5151, "step": 4058 }, { "epoch": 0.24198354988675647, "grad_norm": 3.238283395767212, "learning_rate": 9.693121494885579e-05, "loss": 1.7107, "step": 4060 }, { "epoch": 0.2421027536059125, "grad_norm": 3.0567140579223633, "learning_rate": 9.692795202420403e-05, "loss": 1.6036, "step": 4062 }, { "epoch": 0.24222195732506854, "grad_norm": 3.7260921001434326, "learning_rate": 9.692468742077889e-05, "loss": 1.5469, "step": 4064 }, { "epoch": 0.24234116104422457, "grad_norm": 3.040503740310669, "learning_rate": 9.692142113869713e-05, "loss": 1.6014, "step": 4066 }, { "epoch": 0.24246036476338062, "grad_norm": 3.409562110900879, "learning_rate": 9.69181531780756e-05, "loss": 1.5699, "step": 4068 }, { "epoch": 0.24257956848253664, "grad_norm": 3.4131433963775635, "learning_rate": 9.691488353903119e-05, "loss": 1.5033, "step": 4070 }, { "epoch": 0.2426987722016927, "grad_norm": 3.531572103500366, "learning_rate": 9.691161222168088e-05, "loss": 1.578, "step": 4072 }, { "epoch": 0.24281797592084872, "grad_norm": 3.5140440464019775, "learning_rate": 9.690833922614168e-05, "loss": 1.5942, "step": 4074 }, { "epoch": 0.24293717964000477, "grad_norm": 3.3468918800354004, "learning_rate": 9.690506455253073e-05, "loss": 1.6207, "step": 4076 }, { "epoch": 0.2430563833591608, "grad_norm": 3.2186965942382812, "learning_rate": 9.69017882009651e-05, "loss": 1.574, "step": 4078 }, { "epoch": 0.24317558707831685, "grad_norm": 3.3477699756622314, "learning_rate": 9.689851017156204e-05, "loss": 1.5238, "step": 4080 }, { "epoch": 0.24329479079747288, "grad_norm": 3.2165799140930176, "learning_rate": 9.689523046443882e-05, "loss": 1.756, "step": 4082 }, { "epoch": 0.24341399451662893, "grad_norm": 3.2508368492126465, "learning_rate": 9.689194907971275e-05, "loss": 1.5758, "step": 4084 }, { "epoch": 0.24353319823578495, "grad_norm": 3.5001907348632812, "learning_rate": 9.688866601750121e-05, "loss": 1.558, "step": 4086 }, { "epoch": 0.243652401954941, "grad_norm": 3.1312685012817383, "learning_rate": 9.688538127792169e-05, "loss": 1.557, "step": 4088 }, { "epoch": 0.24377160567409703, "grad_norm": 3.4422197341918945, "learning_rate": 9.688209486109165e-05, "loss": 1.5722, "step": 4090 }, { "epoch": 0.24389080939325308, "grad_norm": 2.8750174045562744, "learning_rate": 9.687880676712866e-05, "loss": 1.5542, "step": 4092 }, { "epoch": 0.2440100131124091, "grad_norm": 3.3107242584228516, "learning_rate": 9.687551699615037e-05, "loss": 1.6804, "step": 4094 }, { "epoch": 0.24412921683156513, "grad_norm": 3.997831106185913, "learning_rate": 9.687222554827444e-05, "loss": 1.7124, "step": 4096 }, { "epoch": 0.24424842055072118, "grad_norm": 3.5312600135803223, "learning_rate": 9.686893242361866e-05, "loss": 1.6284, "step": 4098 }, { "epoch": 0.2443676242698772, "grad_norm": 3.2536425590515137, "learning_rate": 9.686563762230077e-05, "loss": 1.5516, "step": 4100 }, { "epoch": 0.24448682798903326, "grad_norm": 3.8804359436035156, "learning_rate": 9.68623411444387e-05, "loss": 1.5675, "step": 4102 }, { "epoch": 0.2446060317081893, "grad_norm": 3.272829532623291, "learning_rate": 9.685904299015034e-05, "loss": 1.7263, "step": 4104 }, { "epoch": 0.24472523542734534, "grad_norm": 3.1410531997680664, "learning_rate": 9.685574315955368e-05, "loss": 1.5099, "step": 4106 }, { "epoch": 0.24484443914650136, "grad_norm": 3.299726963043213, "learning_rate": 9.68524416527668e-05, "loss": 1.5683, "step": 4108 }, { "epoch": 0.24496364286565742, "grad_norm": 3.2961790561676025, "learning_rate": 9.684913846990773e-05, "loss": 1.5542, "step": 4110 }, { "epoch": 0.24508284658481344, "grad_norm": 3.01055645942688, "learning_rate": 9.684583361109472e-05, "loss": 1.4811, "step": 4112 }, { "epoch": 0.2452020503039695, "grad_norm": 3.5075955390930176, "learning_rate": 9.684252707644596e-05, "loss": 1.5639, "step": 4114 }, { "epoch": 0.24532125402312552, "grad_norm": 3.63812518119812, "learning_rate": 9.683921886607972e-05, "loss": 1.752, "step": 4116 }, { "epoch": 0.24544045774228157, "grad_norm": 3.205719470977783, "learning_rate": 9.683590898011438e-05, "loss": 1.7145, "step": 4118 }, { "epoch": 0.2455596614614376, "grad_norm": 3.6857292652130127, "learning_rate": 9.683259741866831e-05, "loss": 1.6121, "step": 4120 }, { "epoch": 0.24567886518059365, "grad_norm": 3.1192233562469482, "learning_rate": 9.682928418186001e-05, "loss": 1.4634, "step": 4122 }, { "epoch": 0.24579806889974967, "grad_norm": 3.213615655899048, "learning_rate": 9.682596926980798e-05, "loss": 1.698, "step": 4124 }, { "epoch": 0.2459172726189057, "grad_norm": 3.3776464462280273, "learning_rate": 9.682265268263082e-05, "loss": 1.6188, "step": 4126 }, { "epoch": 0.24603647633806175, "grad_norm": 3.1582181453704834, "learning_rate": 9.681933442044717e-05, "loss": 1.4601, "step": 4128 }, { "epoch": 0.24615568005721777, "grad_norm": 5.304425239562988, "learning_rate": 9.681601448337574e-05, "loss": 1.5012, "step": 4130 }, { "epoch": 0.24627488377637383, "grad_norm": 3.251554489135742, "learning_rate": 9.681269287153529e-05, "loss": 1.5023, "step": 4132 }, { "epoch": 0.24639408749552985, "grad_norm": 3.1292190551757812, "learning_rate": 9.680936958504466e-05, "loss": 1.5213, "step": 4134 }, { "epoch": 0.2465132912146859, "grad_norm": 3.063262462615967, "learning_rate": 9.680604462402269e-05, "loss": 1.5781, "step": 4136 }, { "epoch": 0.24663249493384193, "grad_norm": 3.3107244968414307, "learning_rate": 9.68027179885884e-05, "loss": 1.5821, "step": 4138 }, { "epoch": 0.24675169865299798, "grad_norm": 3.3962295055389404, "learning_rate": 9.679938967886073e-05, "loss": 1.5769, "step": 4140 }, { "epoch": 0.246870902372154, "grad_norm": 3.2303760051727295, "learning_rate": 9.679605969495877e-05, "loss": 1.5081, "step": 4142 }, { "epoch": 0.24699010609131006, "grad_norm": 3.1063039302825928, "learning_rate": 9.679272803700165e-05, "loss": 1.4554, "step": 4144 }, { "epoch": 0.24710930981046608, "grad_norm": 3.146441698074341, "learning_rate": 9.678939470510855e-05, "loss": 1.6269, "step": 4146 }, { "epoch": 0.24722851352962213, "grad_norm": 3.491431713104248, "learning_rate": 9.67860596993987e-05, "loss": 1.6894, "step": 4148 }, { "epoch": 0.24734771724877816, "grad_norm": 2.9927854537963867, "learning_rate": 9.678272301999142e-05, "loss": 1.4894, "step": 4150 }, { "epoch": 0.2474669209679342, "grad_norm": 3.291508913040161, "learning_rate": 9.67793846670061e-05, "loss": 1.5597, "step": 4152 }, { "epoch": 0.24758612468709024, "grad_norm": 3.0064475536346436, "learning_rate": 9.67760446405621e-05, "loss": 1.6994, "step": 4154 }, { "epoch": 0.24770532840624626, "grad_norm": 3.0235543251037598, "learning_rate": 9.677270294077896e-05, "loss": 1.4979, "step": 4156 }, { "epoch": 0.2478245321254023, "grad_norm": 2.954878091812134, "learning_rate": 9.67693595677762e-05, "loss": 1.5485, "step": 4158 }, { "epoch": 0.24794373584455834, "grad_norm": 3.3392231464385986, "learning_rate": 9.676601452167342e-05, "loss": 1.5476, "step": 4160 }, { "epoch": 0.2480629395637144, "grad_norm": 2.918098211288452, "learning_rate": 9.676266780259032e-05, "loss": 1.5017, "step": 4162 }, { "epoch": 0.24818214328287042, "grad_norm": 3.1941163539886475, "learning_rate": 9.675931941064656e-05, "loss": 1.6183, "step": 4164 }, { "epoch": 0.24830134700202647, "grad_norm": 3.576869010925293, "learning_rate": 9.675596934596198e-05, "loss": 1.8375, "step": 4166 }, { "epoch": 0.2484205507211825, "grad_norm": 3.205925226211548, "learning_rate": 9.675261760865641e-05, "loss": 1.6307, "step": 4168 }, { "epoch": 0.24853975444033855, "grad_norm": 3.461775541305542, "learning_rate": 9.674926419884973e-05, "loss": 1.4231, "step": 4170 }, { "epoch": 0.24865895815949457, "grad_norm": 3.154392957687378, "learning_rate": 9.674590911666193e-05, "loss": 1.6237, "step": 4172 }, { "epoch": 0.24877816187865062, "grad_norm": 3.1528921127319336, "learning_rate": 9.6742552362213e-05, "loss": 1.4176, "step": 4174 }, { "epoch": 0.24889736559780665, "grad_norm": 3.012495994567871, "learning_rate": 9.673919393562308e-05, "loss": 1.4779, "step": 4176 }, { "epoch": 0.2490165693169627, "grad_norm": 2.708242177963257, "learning_rate": 9.673583383701224e-05, "loss": 1.4254, "step": 4178 }, { "epoch": 0.24913577303611872, "grad_norm": 3.0847039222717285, "learning_rate": 9.673247206650074e-05, "loss": 1.5595, "step": 4180 }, { "epoch": 0.24925497675527478, "grad_norm": 3.4199302196502686, "learning_rate": 9.67291086242088e-05, "loss": 1.6438, "step": 4182 }, { "epoch": 0.2493741804744308, "grad_norm": 3.0162227153778076, "learning_rate": 9.67257435102568e-05, "loss": 1.507, "step": 4184 }, { "epoch": 0.24949338419358685, "grad_norm": 3.320214033126831, "learning_rate": 9.672237672476506e-05, "loss": 1.6291, "step": 4186 }, { "epoch": 0.24961258791274288, "grad_norm": 2.995864152908325, "learning_rate": 9.671900826785405e-05, "loss": 1.4817, "step": 4188 }, { "epoch": 0.2497317916318989, "grad_norm": 3.175133466720581, "learning_rate": 9.671563813964426e-05, "loss": 1.6792, "step": 4190 }, { "epoch": 0.24985099535105496, "grad_norm": 3.143202304840088, "learning_rate": 9.671226634025626e-05, "loss": 1.6962, "step": 4192 }, { "epoch": 0.24997019907021098, "grad_norm": 3.3874573707580566, "learning_rate": 9.670889286981068e-05, "loss": 1.6421, "step": 4194 }, { "epoch": 0.25008940278936703, "grad_norm": 3.4050209522247314, "learning_rate": 9.670551772842818e-05, "loss": 1.6089, "step": 4196 }, { "epoch": 0.25020860650852306, "grad_norm": 3.088042974472046, "learning_rate": 9.670214091622951e-05, "loss": 1.385, "step": 4198 }, { "epoch": 0.2503278102276791, "grad_norm": 3.4521281719207764, "learning_rate": 9.669876243333546e-05, "loss": 1.5904, "step": 4200 }, { "epoch": 0.25044701394683516, "grad_norm": 3.398024320602417, "learning_rate": 9.669538227986692e-05, "loss": 1.5663, "step": 4202 }, { "epoch": 0.2505662176659912, "grad_norm": 3.060065746307373, "learning_rate": 9.669200045594477e-05, "loss": 1.415, "step": 4204 }, { "epoch": 0.2506854213851472, "grad_norm": 3.237143039703369, "learning_rate": 9.668861696169002e-05, "loss": 1.5945, "step": 4206 }, { "epoch": 0.25080462510430324, "grad_norm": 3.0865108966827393, "learning_rate": 9.668523179722371e-05, "loss": 1.7711, "step": 4208 }, { "epoch": 0.2509238288234593, "grad_norm": 3.2183163166046143, "learning_rate": 9.66818449626669e-05, "loss": 1.5812, "step": 4210 }, { "epoch": 0.25104303254261534, "grad_norm": 3.3460443019866943, "learning_rate": 9.66784564581408e-05, "loss": 1.5733, "step": 4212 }, { "epoch": 0.25116223626177137, "grad_norm": 3.234931707382202, "learning_rate": 9.66750662837666e-05, "loss": 1.5503, "step": 4214 }, { "epoch": 0.2512814399809274, "grad_norm": 3.1726608276367188, "learning_rate": 9.667167443966556e-05, "loss": 1.5841, "step": 4216 }, { "epoch": 0.2514006437000834, "grad_norm": 3.0824592113494873, "learning_rate": 9.666828092595905e-05, "loss": 1.5329, "step": 4218 }, { "epoch": 0.2515198474192395, "grad_norm": 3.3624486923217773, "learning_rate": 9.666488574276846e-05, "loss": 1.5433, "step": 4220 }, { "epoch": 0.2516390511383955, "grad_norm": 2.999370574951172, "learning_rate": 9.666148889021525e-05, "loss": 1.627, "step": 4222 }, { "epoch": 0.25175825485755154, "grad_norm": 3.2008514404296875, "learning_rate": 9.665809036842093e-05, "loss": 1.6313, "step": 4224 }, { "epoch": 0.25187745857670757, "grad_norm": 3.1551575660705566, "learning_rate": 9.665469017750708e-05, "loss": 1.6706, "step": 4226 }, { "epoch": 0.25199666229586365, "grad_norm": 3.297968626022339, "learning_rate": 9.665128831759534e-05, "loss": 1.6004, "step": 4228 }, { "epoch": 0.2521158660150197, "grad_norm": 3.566168785095215, "learning_rate": 9.664788478880738e-05, "loss": 1.6684, "step": 4230 }, { "epoch": 0.2522350697341757, "grad_norm": 2.9814682006835938, "learning_rate": 9.6644479591265e-05, "loss": 1.555, "step": 4232 }, { "epoch": 0.2523542734533317, "grad_norm": 3.1192474365234375, "learning_rate": 9.664107272508997e-05, "loss": 1.6667, "step": 4234 }, { "epoch": 0.2524734771724878, "grad_norm": 3.3345391750335693, "learning_rate": 9.66376641904042e-05, "loss": 1.7265, "step": 4236 }, { "epoch": 0.25259268089164383, "grad_norm": 3.4400696754455566, "learning_rate": 9.663425398732962e-05, "loss": 1.493, "step": 4238 }, { "epoch": 0.25271188461079985, "grad_norm": 3.137972116470337, "learning_rate": 9.66308421159882e-05, "loss": 1.6012, "step": 4240 }, { "epoch": 0.2528310883299559, "grad_norm": 3.9216015338897705, "learning_rate": 9.662742857650203e-05, "loss": 1.6023, "step": 4242 }, { "epoch": 0.25295029204911196, "grad_norm": 3.3427317142486572, "learning_rate": 9.662401336899321e-05, "loss": 1.5568, "step": 4244 }, { "epoch": 0.253069495768268, "grad_norm": 2.8768606185913086, "learning_rate": 9.662059649358388e-05, "loss": 1.5012, "step": 4246 }, { "epoch": 0.253188699487424, "grad_norm": 3.189110279083252, "learning_rate": 9.661717795039633e-05, "loss": 1.5151, "step": 4248 }, { "epoch": 0.25330790320658003, "grad_norm": 3.24090576171875, "learning_rate": 9.661375773955282e-05, "loss": 1.6822, "step": 4250 }, { "epoch": 0.25342710692573606, "grad_norm": 3.2825803756713867, "learning_rate": 9.661033586117569e-05, "loss": 1.4628, "step": 4252 }, { "epoch": 0.25354631064489214, "grad_norm": 3.5513787269592285, "learning_rate": 9.66069123153874e-05, "loss": 1.7657, "step": 4254 }, { "epoch": 0.25366551436404816, "grad_norm": 3.1512675285339355, "learning_rate": 9.660348710231037e-05, "loss": 1.5298, "step": 4256 }, { "epoch": 0.2537847180832042, "grad_norm": 3.1994214057922363, "learning_rate": 9.660006022206716e-05, "loss": 1.6703, "step": 4258 }, { "epoch": 0.2539039218023602, "grad_norm": 3.268706798553467, "learning_rate": 9.659663167478038e-05, "loss": 1.5342, "step": 4260 }, { "epoch": 0.2540231255215163, "grad_norm": 3.1116943359375, "learning_rate": 9.659320146057262e-05, "loss": 1.5246, "step": 4262 }, { "epoch": 0.2541423292406723, "grad_norm": 3.268547296524048, "learning_rate": 9.658976957956665e-05, "loss": 1.5376, "step": 4264 }, { "epoch": 0.25426153295982834, "grad_norm": 3.4582245349884033, "learning_rate": 9.65863360318852e-05, "loss": 1.6884, "step": 4266 }, { "epoch": 0.25438073667898436, "grad_norm": 3.1438701152801514, "learning_rate": 9.658290081765112e-05, "loss": 1.6503, "step": 4268 }, { "epoch": 0.25449994039814045, "grad_norm": 2.982571840286255, "learning_rate": 9.657946393698731e-05, "loss": 1.4732, "step": 4270 }, { "epoch": 0.25461914411729647, "grad_norm": 3.1367926597595215, "learning_rate": 9.657602539001669e-05, "loss": 1.5817, "step": 4272 }, { "epoch": 0.2547383478364525, "grad_norm": 3.0519797801971436, "learning_rate": 9.65725851768623e-05, "loss": 1.5021, "step": 4274 }, { "epoch": 0.2548575515556085, "grad_norm": 3.412484884262085, "learning_rate": 9.656914329764718e-05, "loss": 1.7014, "step": 4276 }, { "epoch": 0.2549767552747646, "grad_norm": 3.258786201477051, "learning_rate": 9.656569975249447e-05, "loss": 1.6635, "step": 4278 }, { "epoch": 0.2550959589939206, "grad_norm": 3.2214388847351074, "learning_rate": 9.656225454152736e-05, "loss": 1.5883, "step": 4280 }, { "epoch": 0.25521516271307665, "grad_norm": 3.246769905090332, "learning_rate": 9.655880766486909e-05, "loss": 1.5659, "step": 4282 }, { "epoch": 0.2553343664322327, "grad_norm": 3.1772098541259766, "learning_rate": 9.655535912264297e-05, "loss": 1.6115, "step": 4284 }, { "epoch": 0.2554535701513887, "grad_norm": 3.1234309673309326, "learning_rate": 9.655190891497237e-05, "loss": 1.39, "step": 4286 }, { "epoch": 0.2555727738705448, "grad_norm": 3.488744020462036, "learning_rate": 9.654845704198072e-05, "loss": 1.5908, "step": 4288 }, { "epoch": 0.2556919775897008, "grad_norm": 3.270484685897827, "learning_rate": 9.654500350379147e-05, "loss": 1.5054, "step": 4290 }, { "epoch": 0.2558111813088568, "grad_norm": 3.3670976161956787, "learning_rate": 9.654154830052821e-05, "loss": 1.611, "step": 4292 }, { "epoch": 0.25593038502801285, "grad_norm": 3.676069736480713, "learning_rate": 9.653809143231454e-05, "loss": 1.5231, "step": 4294 }, { "epoch": 0.25604958874716893, "grad_norm": 3.294712543487549, "learning_rate": 9.653463289927411e-05, "loss": 1.5216, "step": 4296 }, { "epoch": 0.25616879246632496, "grad_norm": 3.299283742904663, "learning_rate": 9.653117270153062e-05, "loss": 1.5651, "step": 4298 }, { "epoch": 0.256287996185481, "grad_norm": 3.524458169937134, "learning_rate": 9.65277108392079e-05, "loss": 1.5114, "step": 4300 }, { "epoch": 0.256407199904637, "grad_norm": 3.080388069152832, "learning_rate": 9.652424731242976e-05, "loss": 1.7133, "step": 4302 }, { "epoch": 0.2565264036237931, "grad_norm": 3.2750232219696045, "learning_rate": 9.652078212132012e-05, "loss": 1.6322, "step": 4304 }, { "epoch": 0.2566456073429491, "grad_norm": 3.0588605403900146, "learning_rate": 9.651731526600293e-05, "loss": 1.4449, "step": 4306 }, { "epoch": 0.25676481106210514, "grad_norm": 3.4657905101776123, "learning_rate": 9.651384674660223e-05, "loss": 1.6091, "step": 4308 }, { "epoch": 0.25688401478126116, "grad_norm": 3.1780831813812256, "learning_rate": 9.651037656324205e-05, "loss": 1.4736, "step": 4310 }, { "epoch": 0.2570032185004172, "grad_norm": 3.5108449459075928, "learning_rate": 9.65069047160466e-05, "loss": 1.6848, "step": 4312 }, { "epoch": 0.25712242221957327, "grad_norm": 3.3111422061920166, "learning_rate": 9.650343120514003e-05, "loss": 1.5661, "step": 4314 }, { "epoch": 0.2572416259387293, "grad_norm": 3.122866630554199, "learning_rate": 9.649995603064663e-05, "loss": 1.5371, "step": 4316 }, { "epoch": 0.2573608296578853, "grad_norm": 3.330577850341797, "learning_rate": 9.649647919269069e-05, "loss": 1.574, "step": 4318 }, { "epoch": 0.25748003337704134, "grad_norm": 3.227316379547119, "learning_rate": 9.649300069139662e-05, "loss": 1.6699, "step": 4320 }, { "epoch": 0.2575992370961974, "grad_norm": 3.0997886657714844, "learning_rate": 9.648952052688882e-05, "loss": 1.428, "step": 4322 }, { "epoch": 0.25771844081535344, "grad_norm": 3.494342565536499, "learning_rate": 9.648603869929183e-05, "loss": 1.6492, "step": 4324 }, { "epoch": 0.25783764453450947, "grad_norm": 3.3319196701049805, "learning_rate": 9.648255520873018e-05, "loss": 1.5152, "step": 4326 }, { "epoch": 0.2579568482536655, "grad_norm": 3.016892433166504, "learning_rate": 9.647907005532848e-05, "loss": 1.5066, "step": 4328 }, { "epoch": 0.2580760519728216, "grad_norm": 3.5012876987457275, "learning_rate": 9.647558323921142e-05, "loss": 1.494, "step": 4330 }, { "epoch": 0.2581952556919776, "grad_norm": 3.0961408615112305, "learning_rate": 9.647209476050375e-05, "loss": 1.4699, "step": 4332 }, { "epoch": 0.2583144594111336, "grad_norm": 3.2576491832733154, "learning_rate": 9.646860461933023e-05, "loss": 1.5261, "step": 4334 }, { "epoch": 0.25843366313028965, "grad_norm": 3.422697067260742, "learning_rate": 9.646511281581574e-05, "loss": 1.5384, "step": 4336 }, { "epoch": 0.25855286684944573, "grad_norm": 2.8333091735839844, "learning_rate": 9.646161935008517e-05, "loss": 1.6456, "step": 4338 }, { "epoch": 0.25867207056860175, "grad_norm": 3.5783166885375977, "learning_rate": 9.645812422226353e-05, "loss": 1.622, "step": 4340 }, { "epoch": 0.2587912742877578, "grad_norm": 3.476062774658203, "learning_rate": 9.645462743247582e-05, "loss": 1.6021, "step": 4342 }, { "epoch": 0.2589104780069138, "grad_norm": 3.0284249782562256, "learning_rate": 9.645112898084715e-05, "loss": 1.4617, "step": 4344 }, { "epoch": 0.2590296817260698, "grad_norm": 3.138605833053589, "learning_rate": 9.644762886750268e-05, "loss": 1.5336, "step": 4346 }, { "epoch": 0.2591488854452259, "grad_norm": 2.874337673187256, "learning_rate": 9.644412709256758e-05, "loss": 1.5836, "step": 4348 }, { "epoch": 0.25926808916438193, "grad_norm": 3.0628926753997803, "learning_rate": 9.644062365616715e-05, "loss": 1.5407, "step": 4350 }, { "epoch": 0.25938729288353796, "grad_norm": 3.1047918796539307, "learning_rate": 9.643711855842673e-05, "loss": 1.3903, "step": 4352 }, { "epoch": 0.259506496602694, "grad_norm": 3.2677509784698486, "learning_rate": 9.64336117994717e-05, "loss": 1.6637, "step": 4354 }, { "epoch": 0.25962570032185006, "grad_norm": 3.0273470878601074, "learning_rate": 9.643010337942748e-05, "loss": 1.4367, "step": 4356 }, { "epoch": 0.2597449040410061, "grad_norm": 2.9734432697296143, "learning_rate": 9.642659329841963e-05, "loss": 1.545, "step": 4358 }, { "epoch": 0.2598641077601621, "grad_norm": 3.1097989082336426, "learning_rate": 9.642308155657368e-05, "loss": 1.5089, "step": 4360 }, { "epoch": 0.25998331147931814, "grad_norm": 3.5497183799743652, "learning_rate": 9.641956815401528e-05, "loss": 1.5307, "step": 4362 }, { "epoch": 0.2601025151984742, "grad_norm": 3.5529425144195557, "learning_rate": 9.641605309087007e-05, "loss": 1.5656, "step": 4364 }, { "epoch": 0.26022171891763024, "grad_norm": 3.1180593967437744, "learning_rate": 9.641253636726386e-05, "loss": 1.7187, "step": 4366 }, { "epoch": 0.26034092263678626, "grad_norm": 3.528881072998047, "learning_rate": 9.640901798332242e-05, "loss": 1.4681, "step": 4368 }, { "epoch": 0.2604601263559423, "grad_norm": 3.303501605987549, "learning_rate": 9.640549793917162e-05, "loss": 1.6732, "step": 4370 }, { "epoch": 0.26057933007509837, "grad_norm": 3.3253438472747803, "learning_rate": 9.640197623493737e-05, "loss": 1.4881, "step": 4372 }, { "epoch": 0.2606985337942544, "grad_norm": 3.3704793453216553, "learning_rate": 9.639845287074569e-05, "loss": 1.6504, "step": 4374 }, { "epoch": 0.2608177375134104, "grad_norm": 3.349289894104004, "learning_rate": 9.639492784672259e-05, "loss": 1.5519, "step": 4376 }, { "epoch": 0.26093694123256644, "grad_norm": 2.8504178524017334, "learning_rate": 9.639140116299417e-05, "loss": 1.6023, "step": 4378 }, { "epoch": 0.26105614495172247, "grad_norm": 3.2015457153320312, "learning_rate": 9.638787281968661e-05, "loss": 1.5089, "step": 4380 }, { "epoch": 0.26117534867087855, "grad_norm": 3.418616533279419, "learning_rate": 9.638434281692614e-05, "loss": 1.5654, "step": 4382 }, { "epoch": 0.2612945523900346, "grad_norm": 3.2380459308624268, "learning_rate": 9.638081115483901e-05, "loss": 1.4886, "step": 4384 }, { "epoch": 0.2614137561091906, "grad_norm": 3.52251935005188, "learning_rate": 9.637727783355159e-05, "loss": 1.6222, "step": 4386 }, { "epoch": 0.2615329598283466, "grad_norm": 2.938265085220337, "learning_rate": 9.637374285319025e-05, "loss": 1.5264, "step": 4388 }, { "epoch": 0.2616521635475027, "grad_norm": 3.1952168941497803, "learning_rate": 9.637020621388146e-05, "loss": 1.5427, "step": 4390 }, { "epoch": 0.2617713672666587, "grad_norm": 3.0514307022094727, "learning_rate": 9.636666791575175e-05, "loss": 1.7268, "step": 4392 }, { "epoch": 0.26189057098581475, "grad_norm": 3.1901957988739014, "learning_rate": 9.636312795892769e-05, "loss": 1.5307, "step": 4394 }, { "epoch": 0.2620097747049708, "grad_norm": 2.7030718326568604, "learning_rate": 9.63595863435359e-05, "loss": 1.5239, "step": 4396 }, { "epoch": 0.26212897842412686, "grad_norm": 3.224717378616333, "learning_rate": 9.635604306970309e-05, "loss": 1.601, "step": 4398 }, { "epoch": 0.2622481821432829, "grad_norm": 3.4013311862945557, "learning_rate": 9.635249813755603e-05, "loss": 1.6556, "step": 4400 }, { "epoch": 0.2623673858624389, "grad_norm": 3.0836684703826904, "learning_rate": 9.63489515472215e-05, "loss": 1.5876, "step": 4402 }, { "epoch": 0.26248658958159493, "grad_norm": 3.1574034690856934, "learning_rate": 9.63454032988264e-05, "loss": 1.6136, "step": 4404 }, { "epoch": 0.26260579330075096, "grad_norm": 3.2809982299804688, "learning_rate": 9.634185339249766e-05, "loss": 1.4941, "step": 4406 }, { "epoch": 0.26272499701990704, "grad_norm": 3.229454517364502, "learning_rate": 9.633830182836226e-05, "loss": 1.4616, "step": 4408 }, { "epoch": 0.26284420073906306, "grad_norm": 3.1565167903900146, "learning_rate": 9.633474860654725e-05, "loss": 1.6133, "step": 4410 }, { "epoch": 0.2629634044582191, "grad_norm": 3.178096055984497, "learning_rate": 9.633119372717978e-05, "loss": 1.5405, "step": 4412 }, { "epoch": 0.2630826081773751, "grad_norm": 3.0820815563201904, "learning_rate": 9.632763719038695e-05, "loss": 1.4482, "step": 4414 }, { "epoch": 0.2632018118965312, "grad_norm": 3.004037380218506, "learning_rate": 9.632407899629606e-05, "loss": 1.557, "step": 4416 }, { "epoch": 0.2633210156156872, "grad_norm": 3.1411259174346924, "learning_rate": 9.632051914503435e-05, "loss": 1.3969, "step": 4418 }, { "epoch": 0.26344021933484324, "grad_norm": 3.416767120361328, "learning_rate": 9.63169576367292e-05, "loss": 1.5367, "step": 4420 }, { "epoch": 0.26355942305399926, "grad_norm": 3.1022286415100098, "learning_rate": 9.6313394471508e-05, "loss": 1.4991, "step": 4422 }, { "epoch": 0.26367862677315534, "grad_norm": 3.0193464756011963, "learning_rate": 9.630982964949822e-05, "loss": 1.5244, "step": 4424 }, { "epoch": 0.26379783049231137, "grad_norm": 3.0886547565460205, "learning_rate": 9.630626317082737e-05, "loss": 1.5498, "step": 4426 }, { "epoch": 0.2639170342114674, "grad_norm": 2.90261173248291, "learning_rate": 9.630269503562306e-05, "loss": 1.5832, "step": 4428 }, { "epoch": 0.2640362379306234, "grad_norm": 3.5088446140289307, "learning_rate": 9.629912524401295e-05, "loss": 1.4665, "step": 4430 }, { "epoch": 0.2641554416497795, "grad_norm": 2.9917824268341064, "learning_rate": 9.629555379612469e-05, "loss": 1.4433, "step": 4432 }, { "epoch": 0.2642746453689355, "grad_norm": 2.78833270072937, "learning_rate": 9.629198069208608e-05, "loss": 1.55, "step": 4434 }, { "epoch": 0.26439384908809155, "grad_norm": 3.418910264968872, "learning_rate": 9.628840593202493e-05, "loss": 1.6494, "step": 4436 }, { "epoch": 0.2645130528072476, "grad_norm": 3.1594748497009277, "learning_rate": 9.628482951606914e-05, "loss": 1.5399, "step": 4438 }, { "epoch": 0.2646322565264036, "grad_norm": 2.9460792541503906, "learning_rate": 9.628125144434662e-05, "loss": 1.455, "step": 4440 }, { "epoch": 0.2647514602455597, "grad_norm": 3.3608508110046387, "learning_rate": 9.627767171698539e-05, "loss": 1.5996, "step": 4442 }, { "epoch": 0.2648706639647157, "grad_norm": 3.6514317989349365, "learning_rate": 9.627409033411351e-05, "loss": 1.4382, "step": 4444 }, { "epoch": 0.2649898676838717, "grad_norm": 3.0929510593414307, "learning_rate": 9.62705072958591e-05, "loss": 1.518, "step": 4446 }, { "epoch": 0.26510907140302775, "grad_norm": 3.8368775844573975, "learning_rate": 9.626692260235033e-05, "loss": 1.7029, "step": 4448 }, { "epoch": 0.26522827512218383, "grad_norm": 3.2854459285736084, "learning_rate": 9.626333625371542e-05, "loss": 1.539, "step": 4450 }, { "epoch": 0.26534747884133986, "grad_norm": 3.4936578273773193, "learning_rate": 9.62597482500827e-05, "loss": 1.535, "step": 4452 }, { "epoch": 0.2654666825604959, "grad_norm": 3.2652339935302734, "learning_rate": 9.625615859158051e-05, "loss": 1.5781, "step": 4454 }, { "epoch": 0.2655858862796519, "grad_norm": 3.3912060260772705, "learning_rate": 9.625256727833726e-05, "loss": 1.4498, "step": 4456 }, { "epoch": 0.265705089998808, "grad_norm": 3.299360990524292, "learning_rate": 9.624897431048142e-05, "loss": 1.5464, "step": 4458 }, { "epoch": 0.265824293717964, "grad_norm": 3.1788134574890137, "learning_rate": 9.624537968814153e-05, "loss": 1.4887, "step": 4460 }, { "epoch": 0.26594349743712004, "grad_norm": 3.2211740016937256, "learning_rate": 9.624178341144619e-05, "loss": 1.5925, "step": 4462 }, { "epoch": 0.26606270115627606, "grad_norm": 3.1186630725860596, "learning_rate": 9.623818548052404e-05, "loss": 1.5007, "step": 4464 }, { "epoch": 0.26618190487543214, "grad_norm": 3.0157721042633057, "learning_rate": 9.62345858955038e-05, "loss": 1.5846, "step": 4466 }, { "epoch": 0.26630110859458817, "grad_norm": 2.991914987564087, "learning_rate": 9.623098465651422e-05, "loss": 1.491, "step": 4468 }, { "epoch": 0.2664203123137442, "grad_norm": 3.150686740875244, "learning_rate": 9.622738176368415e-05, "loss": 1.5374, "step": 4470 }, { "epoch": 0.2665395160329002, "grad_norm": 3.199544668197632, "learning_rate": 9.622377721714247e-05, "loss": 1.7142, "step": 4472 }, { "epoch": 0.26665871975205624, "grad_norm": 2.840750217437744, "learning_rate": 9.622017101701813e-05, "loss": 1.4479, "step": 4474 }, { "epoch": 0.2667779234712123, "grad_norm": 3.407024621963501, "learning_rate": 9.62165631634401e-05, "loss": 1.4582, "step": 4476 }, { "epoch": 0.26689712719036834, "grad_norm": 3.132146120071411, "learning_rate": 9.621295365653751e-05, "loss": 1.5632, "step": 4478 }, { "epoch": 0.26701633090952437, "grad_norm": 3.5297935009002686, "learning_rate": 9.620934249643945e-05, "loss": 1.6006, "step": 4480 }, { "epoch": 0.2671355346286804, "grad_norm": 2.7996675968170166, "learning_rate": 9.62057296832751e-05, "loss": 1.4783, "step": 4482 }, { "epoch": 0.2672547383478365, "grad_norm": 3.375276803970337, "learning_rate": 9.62021152171737e-05, "loss": 1.6999, "step": 4484 }, { "epoch": 0.2673739420669925, "grad_norm": 2.8803393840789795, "learning_rate": 9.619849909826458e-05, "loss": 1.5053, "step": 4486 }, { "epoch": 0.2674931457861485, "grad_norm": 3.1492154598236084, "learning_rate": 9.619488132667705e-05, "loss": 1.5804, "step": 4488 }, { "epoch": 0.26761234950530455, "grad_norm": 3.3518474102020264, "learning_rate": 9.619126190254058e-05, "loss": 1.6829, "step": 4490 }, { "epoch": 0.26773155322446063, "grad_norm": 3.0046632289886475, "learning_rate": 9.618764082598462e-05, "loss": 1.5204, "step": 4492 }, { "epoch": 0.26785075694361665, "grad_norm": 3.1114346981048584, "learning_rate": 9.618401809713872e-05, "loss": 1.5177, "step": 4494 }, { "epoch": 0.2679699606627727, "grad_norm": 3.3696653842926025, "learning_rate": 9.61803937161325e-05, "loss": 1.4522, "step": 4496 }, { "epoch": 0.2680891643819287, "grad_norm": 3.3776352405548096, "learning_rate": 9.617676768309557e-05, "loss": 1.5356, "step": 4498 }, { "epoch": 0.2682083681010847, "grad_norm": 3.4389290809631348, "learning_rate": 9.617313999815767e-05, "loss": 1.6263, "step": 4500 }, { "epoch": 0.2683275718202408, "grad_norm": 3.151472806930542, "learning_rate": 9.616951066144857e-05, "loss": 1.5976, "step": 4502 }, { "epoch": 0.26844677553939683, "grad_norm": 3.386439800262451, "learning_rate": 9.616587967309812e-05, "loss": 1.7749, "step": 4504 }, { "epoch": 0.26856597925855286, "grad_norm": 3.1024441719055176, "learning_rate": 9.61622470332362e-05, "loss": 1.5567, "step": 4506 }, { "epoch": 0.2686851829777089, "grad_norm": 3.217954158782959, "learning_rate": 9.615861274199273e-05, "loss": 1.4653, "step": 4508 }, { "epoch": 0.26880438669686496, "grad_norm": 3.257110834121704, "learning_rate": 9.615497679949779e-05, "loss": 1.6667, "step": 4510 }, { "epoch": 0.268923590416021, "grad_norm": 2.993154525756836, "learning_rate": 9.61513392058814e-05, "loss": 1.52, "step": 4512 }, { "epoch": 0.269042794135177, "grad_norm": 3.2244911193847656, "learning_rate": 9.614769996127369e-05, "loss": 1.52, "step": 4514 }, { "epoch": 0.26916199785433303, "grad_norm": 3.0713703632354736, "learning_rate": 9.614405906580485e-05, "loss": 1.5535, "step": 4516 }, { "epoch": 0.2692812015734891, "grad_norm": 3.6366405487060547, "learning_rate": 9.614041651960517e-05, "loss": 1.5099, "step": 4518 }, { "epoch": 0.26940040529264514, "grad_norm": 3.3176026344299316, "learning_rate": 9.613677232280491e-05, "loss": 1.5438, "step": 4520 }, { "epoch": 0.26951960901180116, "grad_norm": 2.923842430114746, "learning_rate": 9.613312647553444e-05, "loss": 1.5097, "step": 4522 }, { "epoch": 0.2696388127309572, "grad_norm": 3.1996102333068848, "learning_rate": 9.61294789779242e-05, "loss": 1.6669, "step": 4524 }, { "epoch": 0.26975801645011327, "grad_norm": 3.079253673553467, "learning_rate": 9.612582983010468e-05, "loss": 1.591, "step": 4526 }, { "epoch": 0.2698772201692693, "grad_norm": 3.4678103923797607, "learning_rate": 9.612217903220638e-05, "loss": 1.5682, "step": 4528 }, { "epoch": 0.2699964238884253, "grad_norm": 3.3160481452941895, "learning_rate": 9.611852658435996e-05, "loss": 1.5472, "step": 4530 }, { "epoch": 0.27011562760758134, "grad_norm": 3.227982759475708, "learning_rate": 9.611487248669603e-05, "loss": 1.6505, "step": 4532 }, { "epoch": 0.27023483132673737, "grad_norm": 3.529270887374878, "learning_rate": 9.611121673934533e-05, "loss": 1.5694, "step": 4534 }, { "epoch": 0.27035403504589345, "grad_norm": 3.357116460800171, "learning_rate": 9.610755934243865e-05, "loss": 1.4753, "step": 4536 }, { "epoch": 0.2704732387650495, "grad_norm": 3.047661304473877, "learning_rate": 9.610390029610681e-05, "loss": 1.4146, "step": 4538 }, { "epoch": 0.2705924424842055, "grad_norm": 3.249624729156494, "learning_rate": 9.610023960048072e-05, "loss": 1.5443, "step": 4540 }, { "epoch": 0.2707116462033615, "grad_norm": 3.286393880844116, "learning_rate": 9.609657725569134e-05, "loss": 1.5969, "step": 4542 }, { "epoch": 0.2708308499225176, "grad_norm": 3.2686893939971924, "learning_rate": 9.609291326186965e-05, "loss": 1.483, "step": 4544 }, { "epoch": 0.2709500536416736, "grad_norm": 3.4183690547943115, "learning_rate": 9.608924761914677e-05, "loss": 1.7975, "step": 4546 }, { "epoch": 0.27106925736082965, "grad_norm": 3.1939451694488525, "learning_rate": 9.60855803276538e-05, "loss": 1.4533, "step": 4548 }, { "epoch": 0.2711884610799857, "grad_norm": 3.2259509563446045, "learning_rate": 9.608191138752193e-05, "loss": 1.6173, "step": 4550 }, { "epoch": 0.27130766479914176, "grad_norm": 3.369791030883789, "learning_rate": 9.607824079888243e-05, "loss": 1.4625, "step": 4552 }, { "epoch": 0.2714268685182978, "grad_norm": 3.190115213394165, "learning_rate": 9.60745685618666e-05, "loss": 1.4621, "step": 4554 }, { "epoch": 0.2715460722374538, "grad_norm": 3.122750997543335, "learning_rate": 9.607089467660581e-05, "loss": 1.4866, "step": 4556 }, { "epoch": 0.27166527595660983, "grad_norm": 3.0322563648223877, "learning_rate": 9.60672191432315e-05, "loss": 1.5828, "step": 4558 }, { "epoch": 0.2717844796757659, "grad_norm": 3.354121685028076, "learning_rate": 9.606354196187514e-05, "loss": 1.6733, "step": 4560 }, { "epoch": 0.27190368339492194, "grad_norm": 3.2673449516296387, "learning_rate": 9.605986313266827e-05, "loss": 1.7407, "step": 4562 }, { "epoch": 0.27202288711407796, "grad_norm": 2.852285861968994, "learning_rate": 9.60561826557425e-05, "loss": 1.472, "step": 4564 }, { "epoch": 0.272142090833234, "grad_norm": 2.9541757106781006, "learning_rate": 9.605250053122952e-05, "loss": 1.5687, "step": 4566 }, { "epoch": 0.27226129455239, "grad_norm": 3.326993465423584, "learning_rate": 9.604881675926101e-05, "loss": 1.6948, "step": 4568 }, { "epoch": 0.2723804982715461, "grad_norm": 3.247677803039551, "learning_rate": 9.604513133996877e-05, "loss": 1.5737, "step": 4570 }, { "epoch": 0.2724997019907021, "grad_norm": 3.1539249420166016, "learning_rate": 9.604144427348464e-05, "loss": 1.5894, "step": 4572 }, { "epoch": 0.27261890570985814, "grad_norm": 3.3344483375549316, "learning_rate": 9.603775555994053e-05, "loss": 1.5815, "step": 4574 }, { "epoch": 0.27273810942901416, "grad_norm": 3.2013304233551025, "learning_rate": 9.603406519946838e-05, "loss": 1.3693, "step": 4576 }, { "epoch": 0.27285731314817024, "grad_norm": 3.2674360275268555, "learning_rate": 9.603037319220022e-05, "loss": 1.6108, "step": 4578 }, { "epoch": 0.27297651686732627, "grad_norm": 3.100829839706421, "learning_rate": 9.602667953826812e-05, "loss": 1.4313, "step": 4580 }, { "epoch": 0.2730957205864823, "grad_norm": 3.6243247985839844, "learning_rate": 9.60229842378042e-05, "loss": 1.4879, "step": 4582 }, { "epoch": 0.2732149243056383, "grad_norm": 3.0596771240234375, "learning_rate": 9.601928729094067e-05, "loss": 1.6528, "step": 4584 }, { "epoch": 0.2733341280247944, "grad_norm": 3.1925573348999023, "learning_rate": 9.60155886978098e-05, "loss": 1.5336, "step": 4586 }, { "epoch": 0.2734533317439504, "grad_norm": 3.1729929447174072, "learning_rate": 9.601188845854388e-05, "loss": 1.7256, "step": 4588 }, { "epoch": 0.27357253546310645, "grad_norm": 3.187028408050537, "learning_rate": 9.600818657327525e-05, "loss": 1.6339, "step": 4590 }, { "epoch": 0.2736917391822625, "grad_norm": 3.329464912414551, "learning_rate": 9.60044830421364e-05, "loss": 1.6108, "step": 4592 }, { "epoch": 0.2738109429014185, "grad_norm": 3.440882921218872, "learning_rate": 9.600077786525978e-05, "loss": 1.5077, "step": 4594 }, { "epoch": 0.2739301466205746, "grad_norm": 2.9928081035614014, "learning_rate": 9.599707104277795e-05, "loss": 1.4344, "step": 4596 }, { "epoch": 0.2740493503397306, "grad_norm": 3.0643136501312256, "learning_rate": 9.599336257482349e-05, "loss": 1.4821, "step": 4598 }, { "epoch": 0.2741685540588866, "grad_norm": 3.125964403152466, "learning_rate": 9.59896524615291e-05, "loss": 1.5151, "step": 4600 }, { "epoch": 0.27428775777804265, "grad_norm": 3.3408043384552, "learning_rate": 9.598594070302752e-05, "loss": 1.6209, "step": 4602 }, { "epoch": 0.27440696149719873, "grad_norm": 2.973214626312256, "learning_rate": 9.598222729945147e-05, "loss": 1.4659, "step": 4604 }, { "epoch": 0.27452616521635476, "grad_norm": 3.0089550018310547, "learning_rate": 9.597851225093382e-05, "loss": 1.6093, "step": 4606 }, { "epoch": 0.2746453689355108, "grad_norm": 2.9606990814208984, "learning_rate": 9.597479555760749e-05, "loss": 1.4452, "step": 4608 }, { "epoch": 0.2747645726546668, "grad_norm": 3.2283546924591064, "learning_rate": 9.59710772196054e-05, "loss": 1.5665, "step": 4610 }, { "epoch": 0.2748837763738229, "grad_norm": 2.9721460342407227, "learning_rate": 9.596735723706061e-05, "loss": 1.43, "step": 4612 }, { "epoch": 0.2750029800929789, "grad_norm": 3.008014678955078, "learning_rate": 9.596363561010618e-05, "loss": 1.527, "step": 4614 }, { "epoch": 0.27512218381213494, "grad_norm": 2.9407272338867188, "learning_rate": 9.595991233887522e-05, "loss": 1.4693, "step": 4616 }, { "epoch": 0.27524138753129096, "grad_norm": 3.14914870262146, "learning_rate": 9.595618742350097e-05, "loss": 1.5391, "step": 4618 }, { "epoch": 0.27536059125044704, "grad_norm": 3.207643747329712, "learning_rate": 9.595246086411664e-05, "loss": 1.4983, "step": 4620 }, { "epoch": 0.27547979496960306, "grad_norm": 3.0730669498443604, "learning_rate": 9.594873266085555e-05, "loss": 1.5363, "step": 4622 }, { "epoch": 0.2755989986887591, "grad_norm": 2.8327717781066895, "learning_rate": 9.59450028138511e-05, "loss": 1.5454, "step": 4624 }, { "epoch": 0.2757182024079151, "grad_norm": 3.0936310291290283, "learning_rate": 9.594127132323669e-05, "loss": 1.3573, "step": 4626 }, { "epoch": 0.27583740612707114, "grad_norm": 3.780331611633301, "learning_rate": 9.593753818914584e-05, "loss": 1.7003, "step": 4628 }, { "epoch": 0.2759566098462272, "grad_norm": 3.136197805404663, "learning_rate": 9.593380341171205e-05, "loss": 1.5683, "step": 4630 }, { "epoch": 0.27607581356538324, "grad_norm": 3.3316562175750732, "learning_rate": 9.593006699106895e-05, "loss": 1.6914, "step": 4632 }, { "epoch": 0.27619501728453927, "grad_norm": 3.0235328674316406, "learning_rate": 9.592632892735023e-05, "loss": 1.5324, "step": 4634 }, { "epoch": 0.2763142210036953, "grad_norm": 3.1123735904693604, "learning_rate": 9.592258922068957e-05, "loss": 1.5529, "step": 4636 }, { "epoch": 0.2764334247228514, "grad_norm": 3.336514711380005, "learning_rate": 9.591884787122077e-05, "loss": 1.556, "step": 4638 }, { "epoch": 0.2765526284420074, "grad_norm": 3.042681932449341, "learning_rate": 9.591510487907769e-05, "loss": 1.4611, "step": 4640 }, { "epoch": 0.2766718321611634, "grad_norm": 3.3512721061706543, "learning_rate": 9.59113602443942e-05, "loss": 1.7318, "step": 4642 }, { "epoch": 0.27679103588031945, "grad_norm": 3.2014498710632324, "learning_rate": 9.590761396730426e-05, "loss": 1.5978, "step": 4644 }, { "epoch": 0.2769102395994755, "grad_norm": 3.3327832221984863, "learning_rate": 9.59038660479419e-05, "loss": 1.6284, "step": 4646 }, { "epoch": 0.27702944331863155, "grad_norm": 3.0799665451049805, "learning_rate": 9.590011648644122e-05, "loss": 1.4865, "step": 4648 }, { "epoch": 0.2771486470377876, "grad_norm": 2.981672525405884, "learning_rate": 9.58963652829363e-05, "loss": 1.5322, "step": 4650 }, { "epoch": 0.2772678507569436, "grad_norm": 3.0774049758911133, "learning_rate": 9.589261243756135e-05, "loss": 1.4491, "step": 4652 }, { "epoch": 0.2773870544760997, "grad_norm": 3.849201202392578, "learning_rate": 9.588885795045066e-05, "loss": 1.6079, "step": 4654 }, { "epoch": 0.2775062581952557, "grad_norm": 3.2858545780181885, "learning_rate": 9.588510182173851e-05, "loss": 1.5677, "step": 4656 }, { "epoch": 0.27762546191441173, "grad_norm": 3.2162866592407227, "learning_rate": 9.588134405155927e-05, "loss": 1.6253, "step": 4658 }, { "epoch": 0.27774466563356776, "grad_norm": 3.5238993167877197, "learning_rate": 9.587758464004737e-05, "loss": 1.6604, "step": 4660 }, { "epoch": 0.2778638693527238, "grad_norm": 3.2981066703796387, "learning_rate": 9.587382358733731e-05, "loss": 1.6005, "step": 4662 }, { "epoch": 0.27798307307187986, "grad_norm": 3.081233501434326, "learning_rate": 9.58700608935636e-05, "loss": 1.6611, "step": 4664 }, { "epoch": 0.2781022767910359, "grad_norm": 3.2940587997436523, "learning_rate": 9.58662965588609e-05, "loss": 1.4934, "step": 4666 }, { "epoch": 0.2782214805101919, "grad_norm": 3.6456758975982666, "learning_rate": 9.586253058336382e-05, "loss": 1.6806, "step": 4668 }, { "epoch": 0.27834068422934793, "grad_norm": 3.335192918777466, "learning_rate": 9.585876296720712e-05, "loss": 1.5412, "step": 4670 }, { "epoch": 0.278459887948504, "grad_norm": 3.055626392364502, "learning_rate": 9.585499371052555e-05, "loss": 1.5752, "step": 4672 }, { "epoch": 0.27857909166766004, "grad_norm": 3.230292797088623, "learning_rate": 9.585122281345397e-05, "loss": 1.6065, "step": 4674 }, { "epoch": 0.27869829538681606, "grad_norm": 2.7884669303894043, "learning_rate": 9.584745027612728e-05, "loss": 1.4302, "step": 4676 }, { "epoch": 0.2788174991059721, "grad_norm": 3.5480639934539795, "learning_rate": 9.584367609868042e-05, "loss": 1.6504, "step": 4678 }, { "epoch": 0.27893670282512817, "grad_norm": 2.8328440189361572, "learning_rate": 9.583990028124841e-05, "loss": 1.449, "step": 4680 }, { "epoch": 0.2790559065442842, "grad_norm": 3.1341488361358643, "learning_rate": 9.583612282396632e-05, "loss": 1.5857, "step": 4682 }, { "epoch": 0.2791751102634402, "grad_norm": 2.9867515563964844, "learning_rate": 9.583234372696929e-05, "loss": 1.5382, "step": 4684 }, { "epoch": 0.27929431398259624, "grad_norm": 3.4026899337768555, "learning_rate": 9.582856299039254e-05, "loss": 1.692, "step": 4686 }, { "epoch": 0.27941351770175227, "grad_norm": 3.200453281402588, "learning_rate": 9.582478061437126e-05, "loss": 1.5312, "step": 4688 }, { "epoch": 0.27953272142090835, "grad_norm": 3.2750353813171387, "learning_rate": 9.58209965990408e-05, "loss": 1.4759, "step": 4690 }, { "epoch": 0.2796519251400644, "grad_norm": 3.308692693710327, "learning_rate": 9.581721094453653e-05, "loss": 1.4529, "step": 4692 }, { "epoch": 0.2797711288592204, "grad_norm": 3.108527898788452, "learning_rate": 9.581342365099384e-05, "loss": 1.4796, "step": 4694 }, { "epoch": 0.2798903325783764, "grad_norm": 3.282447576522827, "learning_rate": 9.580963471854825e-05, "loss": 1.655, "step": 4696 }, { "epoch": 0.2800095362975325, "grad_norm": 3.5798869132995605, "learning_rate": 9.580584414733529e-05, "loss": 1.4028, "step": 4698 }, { "epoch": 0.2801287400166885, "grad_norm": 2.9474570751190186, "learning_rate": 9.580205193749057e-05, "loss": 1.448, "step": 4700 }, { "epoch": 0.28024794373584455, "grad_norm": 3.5590083599090576, "learning_rate": 9.579825808914972e-05, "loss": 1.5851, "step": 4702 }, { "epoch": 0.2803671474550006, "grad_norm": 2.9159655570983887, "learning_rate": 9.579446260244849e-05, "loss": 1.4957, "step": 4704 }, { "epoch": 0.28048635117415666, "grad_norm": 2.9566636085510254, "learning_rate": 9.579066547752267e-05, "loss": 1.4841, "step": 4706 }, { "epoch": 0.2806055548933127, "grad_norm": 3.1736669540405273, "learning_rate": 9.578686671450805e-05, "loss": 1.6921, "step": 4708 }, { "epoch": 0.2807247586124687, "grad_norm": 3.342266321182251, "learning_rate": 9.578306631354054e-05, "loss": 1.6027, "step": 4710 }, { "epoch": 0.28084396233162473, "grad_norm": 3.367511510848999, "learning_rate": 9.577926427475614e-05, "loss": 1.476, "step": 4712 }, { "epoch": 0.2809631660507808, "grad_norm": 3.238633394241333, "learning_rate": 9.57754605982908e-05, "loss": 1.5121, "step": 4714 }, { "epoch": 0.28108236976993684, "grad_norm": 3.0081379413604736, "learning_rate": 9.577165528428063e-05, "loss": 1.5792, "step": 4716 }, { "epoch": 0.28120157348909286, "grad_norm": 3.168341875076294, "learning_rate": 9.576784833286171e-05, "loss": 1.5657, "step": 4718 }, { "epoch": 0.2813207772082489, "grad_norm": 3.2343573570251465, "learning_rate": 9.57640397441703e-05, "loss": 1.5136, "step": 4720 }, { "epoch": 0.2814399809274049, "grad_norm": 3.1354308128356934, "learning_rate": 9.57602295183426e-05, "loss": 1.591, "step": 4722 }, { "epoch": 0.281559184646561, "grad_norm": 3.089090585708618, "learning_rate": 9.57564176555149e-05, "loss": 1.5619, "step": 4724 }, { "epoch": 0.281678388365717, "grad_norm": 3.2372629642486572, "learning_rate": 9.575260415582361e-05, "loss": 1.5894, "step": 4726 }, { "epoch": 0.28179759208487304, "grad_norm": 3.135193109512329, "learning_rate": 9.574878901940513e-05, "loss": 1.4426, "step": 4728 }, { "epoch": 0.28191679580402906, "grad_norm": 3.1261298656463623, "learning_rate": 9.574497224639594e-05, "loss": 1.691, "step": 4730 }, { "epoch": 0.28203599952318514, "grad_norm": 3.183467149734497, "learning_rate": 9.574115383693256e-05, "loss": 1.6671, "step": 4732 }, { "epoch": 0.28215520324234117, "grad_norm": 2.765094757080078, "learning_rate": 9.573733379115161e-05, "loss": 1.5011, "step": 4734 }, { "epoch": 0.2822744069614972, "grad_norm": 3.3196771144866943, "learning_rate": 9.573351210918974e-05, "loss": 1.542, "step": 4736 }, { "epoch": 0.2823936106806532, "grad_norm": 3.5278563499450684, "learning_rate": 9.57296887911837e-05, "loss": 1.5434, "step": 4738 }, { "epoch": 0.2825128143998093, "grad_norm": 3.3224740028381348, "learning_rate": 9.572586383727018e-05, "loss": 1.6916, "step": 4740 }, { "epoch": 0.2826320181189653, "grad_norm": 3.217360734939575, "learning_rate": 9.57220372475861e-05, "loss": 1.4542, "step": 4742 }, { "epoch": 0.28275122183812135, "grad_norm": 3.4192681312561035, "learning_rate": 9.571820902226829e-05, "loss": 1.7291, "step": 4744 }, { "epoch": 0.28287042555727737, "grad_norm": 3.446218967437744, "learning_rate": 9.571437916145372e-05, "loss": 1.7072, "step": 4746 }, { "epoch": 0.28298962927643345, "grad_norm": 3.088148593902588, "learning_rate": 9.571054766527941e-05, "loss": 1.6057, "step": 4748 }, { "epoch": 0.2831088329955895, "grad_norm": 3.104292154312134, "learning_rate": 9.57067145338824e-05, "loss": 1.4518, "step": 4750 }, { "epoch": 0.2832280367147455, "grad_norm": 3.192610025405884, "learning_rate": 9.570287976739984e-05, "loss": 1.7191, "step": 4752 }, { "epoch": 0.2833472404339015, "grad_norm": 2.8586435317993164, "learning_rate": 9.569904336596889e-05, "loss": 1.3852, "step": 4754 }, { "epoch": 0.28346644415305755, "grad_norm": 3.0883665084838867, "learning_rate": 9.56952053297268e-05, "loss": 1.5495, "step": 4756 }, { "epoch": 0.28358564787221363, "grad_norm": 3.274345874786377, "learning_rate": 9.569136565881088e-05, "loss": 1.7014, "step": 4758 }, { "epoch": 0.28370485159136966, "grad_norm": 3.0109262466430664, "learning_rate": 9.568752435335848e-05, "loss": 1.5881, "step": 4760 }, { "epoch": 0.2838240553105257, "grad_norm": 3.222832441329956, "learning_rate": 9.568368141350701e-05, "loss": 1.5838, "step": 4762 }, { "epoch": 0.2839432590296817, "grad_norm": 2.938915729522705, "learning_rate": 9.567983683939394e-05, "loss": 1.4972, "step": 4764 }, { "epoch": 0.2840624627488378, "grad_norm": 3.006519079208374, "learning_rate": 9.567599063115683e-05, "loss": 1.5323, "step": 4766 }, { "epoch": 0.2841816664679938, "grad_norm": 3.2693023681640625, "learning_rate": 9.567214278893327e-05, "loss": 1.5376, "step": 4768 }, { "epoch": 0.28430087018714983, "grad_norm": 2.948835611343384, "learning_rate": 9.566829331286086e-05, "loss": 1.6017, "step": 4770 }, { "epoch": 0.28442007390630586, "grad_norm": 3.003934383392334, "learning_rate": 9.566444220307736e-05, "loss": 1.4861, "step": 4772 }, { "epoch": 0.28453927762546194, "grad_norm": 3.2166152000427246, "learning_rate": 9.566058945972054e-05, "loss": 1.5423, "step": 4774 }, { "epoch": 0.28465848134461796, "grad_norm": 3.022969961166382, "learning_rate": 9.565673508292818e-05, "loss": 1.3927, "step": 4776 }, { "epoch": 0.284777685063774, "grad_norm": 3.1492769718170166, "learning_rate": 9.565287907283822e-05, "loss": 1.3272, "step": 4778 }, { "epoch": 0.28489688878293, "grad_norm": 2.896333694458008, "learning_rate": 9.564902142958856e-05, "loss": 1.3897, "step": 4780 }, { "epoch": 0.28501609250208604, "grad_norm": 3.0082521438598633, "learning_rate": 9.564516215331722e-05, "loss": 1.6097, "step": 4782 }, { "epoch": 0.2851352962212421, "grad_norm": 3.3636839389801025, "learning_rate": 9.564130124416227e-05, "loss": 1.6395, "step": 4784 }, { "epoch": 0.28525449994039814, "grad_norm": 3.469761848449707, "learning_rate": 9.563743870226181e-05, "loss": 1.4861, "step": 4786 }, { "epoch": 0.28537370365955417, "grad_norm": 3.084181070327759, "learning_rate": 9.563357452775401e-05, "loss": 1.4696, "step": 4788 }, { "epoch": 0.2854929073787102, "grad_norm": 2.9556548595428467, "learning_rate": 9.562970872077712e-05, "loss": 1.5887, "step": 4790 }, { "epoch": 0.2856121110978663, "grad_norm": 2.9942729473114014, "learning_rate": 9.562584128146944e-05, "loss": 1.7437, "step": 4792 }, { "epoch": 0.2857313148170223, "grad_norm": 3.099820137023926, "learning_rate": 9.56219722099693e-05, "loss": 1.529, "step": 4794 }, { "epoch": 0.2858505185361783, "grad_norm": 3.1905300617218018, "learning_rate": 9.561810150641511e-05, "loss": 1.5474, "step": 4796 }, { "epoch": 0.28596972225533435, "grad_norm": 3.5255470275878906, "learning_rate": 9.561422917094536e-05, "loss": 1.6308, "step": 4798 }, { "epoch": 0.2860889259744904, "grad_norm": 3.107342004776001, "learning_rate": 9.561035520369855e-05, "loss": 1.4517, "step": 4800 }, { "epoch": 0.28620812969364645, "grad_norm": 3.3203344345092773, "learning_rate": 9.560647960481331e-05, "loss": 1.4295, "step": 4802 }, { "epoch": 0.2863273334128025, "grad_norm": 2.955579996109009, "learning_rate": 9.560260237442822e-05, "loss": 1.4887, "step": 4804 }, { "epoch": 0.2864465371319585, "grad_norm": 3.2293601036071777, "learning_rate": 9.559872351268205e-05, "loss": 1.5802, "step": 4806 }, { "epoch": 0.2865657408511146, "grad_norm": 3.063300609588623, "learning_rate": 9.559484301971349e-05, "loss": 1.5539, "step": 4808 }, { "epoch": 0.2866849445702706, "grad_norm": 3.232402801513672, "learning_rate": 9.559096089566142e-05, "loss": 1.4678, "step": 4810 }, { "epoch": 0.28680414828942663, "grad_norm": 3.3017148971557617, "learning_rate": 9.558707714066468e-05, "loss": 1.5585, "step": 4812 }, { "epoch": 0.28692335200858265, "grad_norm": 3.131727457046509, "learning_rate": 9.558319175486222e-05, "loss": 1.5544, "step": 4814 }, { "epoch": 0.2870425557277387, "grad_norm": 3.182154417037964, "learning_rate": 9.557930473839302e-05, "loss": 1.6974, "step": 4816 }, { "epoch": 0.28716175944689476, "grad_norm": 3.5844931602478027, "learning_rate": 9.557541609139615e-05, "loss": 1.6613, "step": 4818 }, { "epoch": 0.2872809631660508, "grad_norm": 3.158721685409546, "learning_rate": 9.557152581401072e-05, "loss": 1.4455, "step": 4820 }, { "epoch": 0.2874001668852068, "grad_norm": 3.097898244857788, "learning_rate": 9.556763390637589e-05, "loss": 1.5138, "step": 4822 }, { "epoch": 0.28751937060436283, "grad_norm": 4.445497512817383, "learning_rate": 9.556374036863087e-05, "loss": 1.7446, "step": 4824 }, { "epoch": 0.2876385743235189, "grad_norm": 3.029057264328003, "learning_rate": 9.555984520091496e-05, "loss": 1.5747, "step": 4826 }, { "epoch": 0.28775777804267494, "grad_norm": 2.848007917404175, "learning_rate": 9.555594840336752e-05, "loss": 1.4553, "step": 4828 }, { "epoch": 0.28787698176183096, "grad_norm": 3.1143157482147217, "learning_rate": 9.555204997612794e-05, "loss": 1.6323, "step": 4830 }, { "epoch": 0.287996185480987, "grad_norm": 3.2813374996185303, "learning_rate": 9.554814991933567e-05, "loss": 1.5164, "step": 4832 }, { "epoch": 0.28811538920014307, "grad_norm": 3.4002344608306885, "learning_rate": 9.554424823313024e-05, "loss": 1.6583, "step": 4834 }, { "epoch": 0.2882345929192991, "grad_norm": 3.2786920070648193, "learning_rate": 9.554034491765123e-05, "loss": 1.5381, "step": 4836 }, { "epoch": 0.2883537966384551, "grad_norm": 3.457812786102295, "learning_rate": 9.553643997303826e-05, "loss": 1.559, "step": 4838 }, { "epoch": 0.28847300035761114, "grad_norm": 3.1538844108581543, "learning_rate": 9.553253339943102e-05, "loss": 1.5612, "step": 4840 }, { "epoch": 0.2885922040767672, "grad_norm": 3.060164451599121, "learning_rate": 9.552862519696929e-05, "loss": 1.5577, "step": 4842 }, { "epoch": 0.28871140779592325, "grad_norm": 3.2772634029388428, "learning_rate": 9.552471536579286e-05, "loss": 1.4827, "step": 4844 }, { "epoch": 0.28883061151507927, "grad_norm": 3.196524143218994, "learning_rate": 9.55208039060416e-05, "loss": 1.5763, "step": 4846 }, { "epoch": 0.2889498152342353, "grad_norm": 4.108537197113037, "learning_rate": 9.551689081785542e-05, "loss": 1.7436, "step": 4848 }, { "epoch": 0.2890690189533913, "grad_norm": 2.8822944164276123, "learning_rate": 9.551297610137433e-05, "loss": 1.454, "step": 4850 }, { "epoch": 0.2891882226725474, "grad_norm": 2.749915361404419, "learning_rate": 9.550905975673837e-05, "loss": 1.3346, "step": 4852 }, { "epoch": 0.2893074263917034, "grad_norm": 3.54911732673645, "learning_rate": 9.550514178408763e-05, "loss": 1.6391, "step": 4854 }, { "epoch": 0.28942663011085945, "grad_norm": 3.0033156871795654, "learning_rate": 9.550122218356227e-05, "loss": 1.6204, "step": 4856 }, { "epoch": 0.2895458338300155, "grad_norm": 3.363264560699463, "learning_rate": 9.549730095530253e-05, "loss": 1.5137, "step": 4858 }, { "epoch": 0.28966503754917156, "grad_norm": 2.9546473026275635, "learning_rate": 9.549337809944864e-05, "loss": 1.5447, "step": 4860 }, { "epoch": 0.2897842412683276, "grad_norm": 2.7842578887939453, "learning_rate": 9.5489453616141e-05, "loss": 1.3988, "step": 4862 }, { "epoch": 0.2899034449874836, "grad_norm": 2.9882259368896484, "learning_rate": 9.548552750551994e-05, "loss": 1.4201, "step": 4864 }, { "epoch": 0.29002264870663963, "grad_norm": 3.1442840099334717, "learning_rate": 9.548159976772592e-05, "loss": 1.7337, "step": 4866 }, { "epoch": 0.2901418524257957, "grad_norm": 3.1810383796691895, "learning_rate": 9.547767040289948e-05, "loss": 1.4327, "step": 4868 }, { "epoch": 0.29026105614495173, "grad_norm": 3.0057027339935303, "learning_rate": 9.547373941118117e-05, "loss": 1.5199, "step": 4870 }, { "epoch": 0.29038025986410776, "grad_norm": 3.6813361644744873, "learning_rate": 9.546980679271161e-05, "loss": 1.5654, "step": 4872 }, { "epoch": 0.2904994635832638, "grad_norm": 3.097698926925659, "learning_rate": 9.546587254763149e-05, "loss": 1.4504, "step": 4874 }, { "epoch": 0.2906186673024198, "grad_norm": 3.0256330966949463, "learning_rate": 9.546193667608155e-05, "loss": 1.5043, "step": 4876 }, { "epoch": 0.2907378710215759, "grad_norm": 3.266190528869629, "learning_rate": 9.545799917820257e-05, "loss": 1.568, "step": 4878 }, { "epoch": 0.2908570747407319, "grad_norm": 3.1168808937072754, "learning_rate": 9.545406005413545e-05, "loss": 1.6115, "step": 4880 }, { "epoch": 0.29097627845988794, "grad_norm": 3.227479934692383, "learning_rate": 9.545011930402108e-05, "loss": 1.5368, "step": 4882 }, { "epoch": 0.29109548217904396, "grad_norm": 3.09354305267334, "learning_rate": 9.544617692800043e-05, "loss": 1.5336, "step": 4884 }, { "epoch": 0.29121468589820004, "grad_norm": 3.170145034790039, "learning_rate": 9.544223292621454e-05, "loss": 1.516, "step": 4886 }, { "epoch": 0.29133388961735607, "grad_norm": 2.9074676036834717, "learning_rate": 9.54382872988045e-05, "loss": 1.5698, "step": 4888 }, { "epoch": 0.2914530933365121, "grad_norm": 3.1306676864624023, "learning_rate": 9.543434004591147e-05, "loss": 1.6733, "step": 4890 }, { "epoch": 0.2915722970556681, "grad_norm": 3.052995443344116, "learning_rate": 9.543039116767662e-05, "loss": 1.4726, "step": 4892 }, { "epoch": 0.2916915007748242, "grad_norm": 3.3397865295410156, "learning_rate": 9.542644066424125e-05, "loss": 1.5112, "step": 4894 }, { "epoch": 0.2918107044939802, "grad_norm": 3.171666383743286, "learning_rate": 9.54224885357467e-05, "loss": 1.5333, "step": 4896 }, { "epoch": 0.29192990821313625, "grad_norm": 3.749962091445923, "learning_rate": 9.541853478233429e-05, "loss": 1.5547, "step": 4898 }, { "epoch": 0.29204911193229227, "grad_norm": 3.433119773864746, "learning_rate": 9.541457940414549e-05, "loss": 1.5663, "step": 4900 }, { "epoch": 0.29216831565144835, "grad_norm": 3.2945165634155273, "learning_rate": 9.541062240132182e-05, "loss": 1.5074, "step": 4902 }, { "epoch": 0.2922875193706044, "grad_norm": 3.0600979328155518, "learning_rate": 9.54066637740048e-05, "loss": 1.5237, "step": 4904 }, { "epoch": 0.2924067230897604, "grad_norm": 3.803267478942871, "learning_rate": 9.540270352233607e-05, "loss": 1.6079, "step": 4906 }, { "epoch": 0.2925259268089164, "grad_norm": 3.56306791305542, "learning_rate": 9.539874164645728e-05, "loss": 1.6171, "step": 4908 }, { "epoch": 0.29264513052807245, "grad_norm": 3.2139692306518555, "learning_rate": 9.539477814651019e-05, "loss": 1.5314, "step": 4910 }, { "epoch": 0.29276433424722853, "grad_norm": 2.8954272270202637, "learning_rate": 9.539081302263657e-05, "loss": 1.5308, "step": 4912 }, { "epoch": 0.29288353796638456, "grad_norm": 3.2797694206237793, "learning_rate": 9.538684627497825e-05, "loss": 1.5122, "step": 4914 }, { "epoch": 0.2930027416855406, "grad_norm": 3.152761220932007, "learning_rate": 9.538287790367715e-05, "loss": 1.5024, "step": 4916 }, { "epoch": 0.2931219454046966, "grad_norm": 2.9202942848205566, "learning_rate": 9.537890790887523e-05, "loss": 1.5784, "step": 4918 }, { "epoch": 0.2932411491238527, "grad_norm": 3.259695291519165, "learning_rate": 9.537493629071453e-05, "loss": 1.4772, "step": 4920 }, { "epoch": 0.2933603528430087, "grad_norm": 3.3326151371002197, "learning_rate": 9.537096304933708e-05, "loss": 1.5046, "step": 4922 }, { "epoch": 0.29347955656216473, "grad_norm": 3.2646584510803223, "learning_rate": 9.536698818488507e-05, "loss": 1.4769, "step": 4924 }, { "epoch": 0.29359876028132076, "grad_norm": 3.1233184337615967, "learning_rate": 9.536301169750068e-05, "loss": 1.594, "step": 4926 }, { "epoch": 0.29371796400047684, "grad_norm": 3.00107479095459, "learning_rate": 9.535903358732612e-05, "loss": 1.4226, "step": 4928 }, { "epoch": 0.29383716771963286, "grad_norm": 3.278097629547119, "learning_rate": 9.535505385450377e-05, "loss": 1.5517, "step": 4930 }, { "epoch": 0.2939563714387889, "grad_norm": 3.3336942195892334, "learning_rate": 9.535107249917594e-05, "loss": 1.5041, "step": 4932 }, { "epoch": 0.2940755751579449, "grad_norm": 3.2215700149536133, "learning_rate": 9.534708952148508e-05, "loss": 1.5098, "step": 4934 }, { "epoch": 0.294194778877101, "grad_norm": 3.2780215740203857, "learning_rate": 9.534310492157368e-05, "loss": 1.6502, "step": 4936 }, { "epoch": 0.294313982596257, "grad_norm": 3.298902988433838, "learning_rate": 9.533911869958428e-05, "loss": 1.5308, "step": 4938 }, { "epoch": 0.29443318631541304, "grad_norm": 2.952861785888672, "learning_rate": 9.533513085565947e-05, "loss": 1.475, "step": 4940 }, { "epoch": 0.29455239003456907, "grad_norm": 3.1351943016052246, "learning_rate": 9.533114138994192e-05, "loss": 1.638, "step": 4942 }, { "epoch": 0.2946715937537251, "grad_norm": 3.0867154598236084, "learning_rate": 9.532715030257436e-05, "loss": 1.4988, "step": 4944 }, { "epoch": 0.29479079747288117, "grad_norm": 3.471689462661743, "learning_rate": 9.532315759369952e-05, "loss": 1.4366, "step": 4946 }, { "epoch": 0.2949100011920372, "grad_norm": 3.184947967529297, "learning_rate": 9.531916326346029e-05, "loss": 1.4954, "step": 4948 }, { "epoch": 0.2950292049111932, "grad_norm": 3.043001651763916, "learning_rate": 9.53151673119995e-05, "loss": 1.2998, "step": 4950 }, { "epoch": 0.29514840863034925, "grad_norm": 3.2204723358154297, "learning_rate": 9.531116973946015e-05, "loss": 1.5973, "step": 4952 }, { "epoch": 0.2952676123495053, "grad_norm": 3.2554712295532227, "learning_rate": 9.530717054598524e-05, "loss": 1.561, "step": 4954 }, { "epoch": 0.29538681606866135, "grad_norm": 2.908526659011841, "learning_rate": 9.53031697317178e-05, "loss": 1.5704, "step": 4956 }, { "epoch": 0.2955060197878174, "grad_norm": 3.3345484733581543, "learning_rate": 9.529916729680099e-05, "loss": 1.4323, "step": 4958 }, { "epoch": 0.2956252235069734, "grad_norm": 2.9748880863189697, "learning_rate": 9.529516324137798e-05, "loss": 1.5626, "step": 4960 }, { "epoch": 0.2957444272261295, "grad_norm": 3.243945837020874, "learning_rate": 9.5291157565592e-05, "loss": 1.5302, "step": 4962 }, { "epoch": 0.2958636309452855, "grad_norm": 3.3225226402282715, "learning_rate": 9.528715026958635e-05, "loss": 1.6178, "step": 4964 }, { "epoch": 0.29598283466444153, "grad_norm": 3.2833657264709473, "learning_rate": 9.528314135350438e-05, "loss": 1.5085, "step": 4966 }, { "epoch": 0.29610203838359755, "grad_norm": 3.304749011993408, "learning_rate": 9.527913081748951e-05, "loss": 1.3147, "step": 4968 }, { "epoch": 0.2962212421027536, "grad_norm": 3.0167603492736816, "learning_rate": 9.527511866168521e-05, "loss": 1.3433, "step": 4970 }, { "epoch": 0.29634044582190966, "grad_norm": 3.0178334712982178, "learning_rate": 9.527110488623502e-05, "loss": 1.4731, "step": 4972 }, { "epoch": 0.2964596495410657, "grad_norm": 3.0865328311920166, "learning_rate": 9.52670894912825e-05, "loss": 1.3743, "step": 4974 }, { "epoch": 0.2965788532602217, "grad_norm": 3.032958984375, "learning_rate": 9.526307247697132e-05, "loss": 1.6385, "step": 4976 }, { "epoch": 0.29669805697937773, "grad_norm": 3.0614140033721924, "learning_rate": 9.525905384344517e-05, "loss": 1.5911, "step": 4978 }, { "epoch": 0.2968172606985338, "grad_norm": 3.182431936264038, "learning_rate": 9.52550335908478e-05, "loss": 1.4609, "step": 4980 }, { "epoch": 0.29693646441768984, "grad_norm": 3.1324820518493652, "learning_rate": 9.525101171932306e-05, "loss": 1.617, "step": 4982 }, { "epoch": 0.29705566813684586, "grad_norm": 3.6137139797210693, "learning_rate": 9.524698822901479e-05, "loss": 1.6689, "step": 4984 }, { "epoch": 0.2971748718560019, "grad_norm": 3.2928435802459717, "learning_rate": 9.524296312006696e-05, "loss": 1.5496, "step": 4986 }, { "epoch": 0.29729407557515797, "grad_norm": 4.206320762634277, "learning_rate": 9.523893639262351e-05, "loss": 1.6373, "step": 4988 }, { "epoch": 0.297413279294314, "grad_norm": 3.1976563930511475, "learning_rate": 9.523490804682856e-05, "loss": 1.4314, "step": 4990 }, { "epoch": 0.29753248301347, "grad_norm": 3.027874231338501, "learning_rate": 9.523087808282614e-05, "loss": 1.6627, "step": 4992 }, { "epoch": 0.29765168673262604, "grad_norm": 3.2893669605255127, "learning_rate": 9.522684650076048e-05, "loss": 1.4748, "step": 4994 }, { "epoch": 0.2977708904517821, "grad_norm": 3.421936273574829, "learning_rate": 9.522281330077578e-05, "loss": 1.5686, "step": 4996 }, { "epoch": 0.29789009417093815, "grad_norm": 3.2464945316314697, "learning_rate": 9.52187784830163e-05, "loss": 1.5959, "step": 4998 }, { "epoch": 0.29800929789009417, "grad_norm": 3.051805257797241, "learning_rate": 9.521474204762642e-05, "loss": 1.5072, "step": 5000 }, { "epoch": 0.2981285016092502, "grad_norm": 3.239159107208252, "learning_rate": 9.521070399475052e-05, "loss": 1.509, "step": 5002 }, { "epoch": 0.2982477053284062, "grad_norm": 3.121206760406494, "learning_rate": 9.520666432453303e-05, "loss": 1.5429, "step": 5004 }, { "epoch": 0.2983669090475623, "grad_norm": 3.0837299823760986, "learning_rate": 9.52026230371185e-05, "loss": 1.5184, "step": 5006 }, { "epoch": 0.2984861127667183, "grad_norm": 3.1342270374298096, "learning_rate": 9.519858013265148e-05, "loss": 1.4404, "step": 5008 }, { "epoch": 0.29860531648587435, "grad_norm": 3.3634679317474365, "learning_rate": 9.51945356112766e-05, "loss": 1.648, "step": 5010 }, { "epoch": 0.2987245202050304, "grad_norm": 3.4866340160369873, "learning_rate": 9.519048947313855e-05, "loss": 1.6454, "step": 5012 }, { "epoch": 0.29884372392418646, "grad_norm": 3.2859718799591064, "learning_rate": 9.518644171838208e-05, "loss": 1.6243, "step": 5014 }, { "epoch": 0.2989629276433425, "grad_norm": 2.8871994018554688, "learning_rate": 9.518239234715198e-05, "loss": 1.4861, "step": 5016 }, { "epoch": 0.2990821313624985, "grad_norm": 3.043259620666504, "learning_rate": 9.517834135959311e-05, "loss": 1.5821, "step": 5018 }, { "epoch": 0.29920133508165453, "grad_norm": 3.2219080924987793, "learning_rate": 9.517428875585041e-05, "loss": 1.5222, "step": 5020 }, { "epoch": 0.2993205388008106, "grad_norm": 3.4097518920898438, "learning_rate": 9.517023453606884e-05, "loss": 1.4663, "step": 5022 }, { "epoch": 0.29943974251996663, "grad_norm": 3.588890552520752, "learning_rate": 9.516617870039342e-05, "loss": 1.6018, "step": 5024 }, { "epoch": 0.29955894623912266, "grad_norm": 3.6274755001068115, "learning_rate": 9.516212124896926e-05, "loss": 1.5523, "step": 5026 }, { "epoch": 0.2996781499582787, "grad_norm": 3.275925636291504, "learning_rate": 9.51580621819415e-05, "loss": 1.5447, "step": 5028 }, { "epoch": 0.29979735367743476, "grad_norm": 2.954026460647583, "learning_rate": 9.515400149945535e-05, "loss": 1.3703, "step": 5030 }, { "epoch": 0.2999165573965908, "grad_norm": 3.124220371246338, "learning_rate": 9.514993920165607e-05, "loss": 1.6303, "step": 5032 }, { "epoch": 0.3000357611157468, "grad_norm": 3.0530855655670166, "learning_rate": 9.514587528868898e-05, "loss": 1.623, "step": 5034 }, { "epoch": 0.30015496483490284, "grad_norm": 3.1445066928863525, "learning_rate": 9.514180976069947e-05, "loss": 1.5715, "step": 5036 }, { "epoch": 0.30027416855405886, "grad_norm": 3.076817750930786, "learning_rate": 9.513774261783298e-05, "loss": 1.5837, "step": 5038 }, { "epoch": 0.30039337227321494, "grad_norm": 3.5814363956451416, "learning_rate": 9.5133673860235e-05, "loss": 1.6379, "step": 5040 }, { "epoch": 0.30051257599237097, "grad_norm": 3.564347505569458, "learning_rate": 9.512960348805108e-05, "loss": 1.5719, "step": 5042 }, { "epoch": 0.300631779711527, "grad_norm": 2.8188560009002686, "learning_rate": 9.512553150142684e-05, "loss": 1.3807, "step": 5044 }, { "epoch": 0.300750983430683, "grad_norm": 3.2689013481140137, "learning_rate": 9.512145790050793e-05, "loss": 1.5272, "step": 5046 }, { "epoch": 0.3008701871498391, "grad_norm": 3.2064361572265625, "learning_rate": 9.51173826854401e-05, "loss": 1.471, "step": 5048 }, { "epoch": 0.3009893908689951, "grad_norm": 3.4726345539093018, "learning_rate": 9.511330585636912e-05, "loss": 1.8402, "step": 5050 }, { "epoch": 0.30110859458815115, "grad_norm": 3.6736764907836914, "learning_rate": 9.510922741344084e-05, "loss": 1.6469, "step": 5052 }, { "epoch": 0.30122779830730717, "grad_norm": 3.1719729900360107, "learning_rate": 9.510514735680114e-05, "loss": 1.4247, "step": 5054 }, { "epoch": 0.30134700202646325, "grad_norm": 3.2086846828460693, "learning_rate": 9.5101065686596e-05, "loss": 1.5042, "step": 5056 }, { "epoch": 0.3014662057456193, "grad_norm": 2.915534019470215, "learning_rate": 9.509698240297144e-05, "loss": 1.5159, "step": 5058 }, { "epoch": 0.3015854094647753, "grad_norm": 3.4971060752868652, "learning_rate": 9.509289750607352e-05, "loss": 1.5996, "step": 5060 }, { "epoch": 0.3017046131839313, "grad_norm": 3.1543991565704346, "learning_rate": 9.508881099604837e-05, "loss": 1.7167, "step": 5062 }, { "epoch": 0.30182381690308735, "grad_norm": 2.9662530422210693, "learning_rate": 9.508472287304217e-05, "loss": 1.601, "step": 5064 }, { "epoch": 0.30194302062224343, "grad_norm": 3.4160094261169434, "learning_rate": 9.508063313720119e-05, "loss": 1.5173, "step": 5066 }, { "epoch": 0.30206222434139945, "grad_norm": 3.2825353145599365, "learning_rate": 9.507654178867173e-05, "loss": 1.7079, "step": 5068 }, { "epoch": 0.3021814280605555, "grad_norm": 3.0762572288513184, "learning_rate": 9.507244882760013e-05, "loss": 1.362, "step": 5070 }, { "epoch": 0.3023006317797115, "grad_norm": 3.3378655910491943, "learning_rate": 9.50683542541328e-05, "loss": 1.4396, "step": 5072 }, { "epoch": 0.3024198354988676, "grad_norm": 3.051704168319702, "learning_rate": 9.506425806841628e-05, "loss": 1.4727, "step": 5074 }, { "epoch": 0.3025390392180236, "grad_norm": 3.1690964698791504, "learning_rate": 9.506016027059703e-05, "loss": 1.5329, "step": 5076 }, { "epoch": 0.30265824293717963, "grad_norm": 3.1909055709838867, "learning_rate": 9.50560608608217e-05, "loss": 1.4964, "step": 5078 }, { "epoch": 0.30277744665633566, "grad_norm": 4.274167060852051, "learning_rate": 9.505195983923691e-05, "loss": 1.6003, "step": 5080 }, { "epoch": 0.30289665037549174, "grad_norm": 3.2434916496276855, "learning_rate": 9.504785720598936e-05, "loss": 1.4776, "step": 5082 }, { "epoch": 0.30301585409464776, "grad_norm": 2.9129488468170166, "learning_rate": 9.504375296122583e-05, "loss": 1.5361, "step": 5084 }, { "epoch": 0.3031350578138038, "grad_norm": 2.9780423641204834, "learning_rate": 9.503964710509314e-05, "loss": 1.4081, "step": 5086 }, { "epoch": 0.3032542615329598, "grad_norm": 3.3958323001861572, "learning_rate": 9.50355396377382e-05, "loss": 1.4789, "step": 5088 }, { "epoch": 0.3033734652521159, "grad_norm": 3.144693374633789, "learning_rate": 9.503143055930788e-05, "loss": 1.6775, "step": 5090 }, { "epoch": 0.3034926689712719, "grad_norm": 3.4055919647216797, "learning_rate": 9.502731986994924e-05, "loss": 1.4737, "step": 5092 }, { "epoch": 0.30361187269042794, "grad_norm": 3.031198024749756, "learning_rate": 9.50232075698093e-05, "loss": 1.4829, "step": 5094 }, { "epoch": 0.30373107640958397, "grad_norm": 3.023434638977051, "learning_rate": 9.501909365903519e-05, "loss": 1.5345, "step": 5096 }, { "epoch": 0.30385028012874, "grad_norm": 3.2235002517700195, "learning_rate": 9.501497813777405e-05, "loss": 1.5957, "step": 5098 }, { "epoch": 0.30396948384789607, "grad_norm": 3.461583137512207, "learning_rate": 9.501086100617314e-05, "loss": 1.6729, "step": 5100 }, { "epoch": 0.3040886875670521, "grad_norm": 3.081343412399292, "learning_rate": 9.500674226437973e-05, "loss": 1.4844, "step": 5102 }, { "epoch": 0.3042078912862081, "grad_norm": 3.230583429336548, "learning_rate": 9.500262191254114e-05, "loss": 1.4826, "step": 5104 }, { "epoch": 0.30432709500536415, "grad_norm": 2.9400415420532227, "learning_rate": 9.499849995080481e-05, "loss": 1.7159, "step": 5106 }, { "epoch": 0.3044462987245202, "grad_norm": 3.2333247661590576, "learning_rate": 9.499437637931817e-05, "loss": 1.4536, "step": 5108 }, { "epoch": 0.30456550244367625, "grad_norm": 3.037260055541992, "learning_rate": 9.499025119822875e-05, "loss": 1.3166, "step": 5110 }, { "epoch": 0.3046847061628323, "grad_norm": 2.8004300594329834, "learning_rate": 9.49861244076841e-05, "loss": 1.642, "step": 5112 }, { "epoch": 0.3048039098819883, "grad_norm": 3.044008255004883, "learning_rate": 9.498199600783187e-05, "loss": 1.676, "step": 5114 }, { "epoch": 0.3049231136011444, "grad_norm": 3.193570137023926, "learning_rate": 9.497786599881973e-05, "loss": 1.4324, "step": 5116 }, { "epoch": 0.3050423173203004, "grad_norm": 3.340731382369995, "learning_rate": 9.497373438079543e-05, "loss": 1.6186, "step": 5118 }, { "epoch": 0.30516152103945643, "grad_norm": 3.2029612064361572, "learning_rate": 9.496960115390678e-05, "loss": 1.7033, "step": 5120 }, { "epoch": 0.30528072475861245, "grad_norm": 2.9689292907714844, "learning_rate": 9.496546631830164e-05, "loss": 1.4339, "step": 5122 }, { "epoch": 0.30539992847776853, "grad_norm": 3.1982297897338867, "learning_rate": 9.496132987412792e-05, "loss": 1.5569, "step": 5124 }, { "epoch": 0.30551913219692456, "grad_norm": 3.342332124710083, "learning_rate": 9.49571918215336e-05, "loss": 1.4967, "step": 5126 }, { "epoch": 0.3056383359160806, "grad_norm": 3.1145169734954834, "learning_rate": 9.49530521606667e-05, "loss": 1.6775, "step": 5128 }, { "epoch": 0.3057575396352366, "grad_norm": 2.7782983779907227, "learning_rate": 9.494891089167532e-05, "loss": 1.5136, "step": 5130 }, { "epoch": 0.30587674335439263, "grad_norm": 2.9866745471954346, "learning_rate": 9.494476801470761e-05, "loss": 1.4614, "step": 5132 }, { "epoch": 0.3059959470735487, "grad_norm": 3.476433515548706, "learning_rate": 9.494062352991177e-05, "loss": 1.5856, "step": 5134 }, { "epoch": 0.30611515079270474, "grad_norm": 3.2575302124023438, "learning_rate": 9.493647743743605e-05, "loss": 1.6094, "step": 5136 }, { "epoch": 0.30623435451186076, "grad_norm": 2.860130786895752, "learning_rate": 9.49323297374288e-05, "loss": 1.535, "step": 5138 }, { "epoch": 0.3063535582310168, "grad_norm": 3.731839895248413, "learning_rate": 9.492818043003839e-05, "loss": 1.543, "step": 5140 }, { "epoch": 0.30647276195017287, "grad_norm": 3.184924364089966, "learning_rate": 9.492402951541324e-05, "loss": 1.4595, "step": 5142 }, { "epoch": 0.3065919656693289, "grad_norm": 3.0816709995269775, "learning_rate": 9.491987699370184e-05, "loss": 1.5861, "step": 5144 }, { "epoch": 0.3067111693884849, "grad_norm": 3.3111941814422607, "learning_rate": 9.491572286505275e-05, "loss": 1.3552, "step": 5146 }, { "epoch": 0.30683037310764094, "grad_norm": 3.1438848972320557, "learning_rate": 9.491156712961458e-05, "loss": 1.572, "step": 5148 }, { "epoch": 0.306949576826797, "grad_norm": 3.0444014072418213, "learning_rate": 9.490740978753598e-05, "loss": 1.44, "step": 5150 }, { "epoch": 0.30706878054595305, "grad_norm": 3.393939733505249, "learning_rate": 9.49032508389657e-05, "loss": 1.7676, "step": 5152 }, { "epoch": 0.30718798426510907, "grad_norm": 3.337449550628662, "learning_rate": 9.489909028405248e-05, "loss": 1.57, "step": 5154 }, { "epoch": 0.3073071879842651, "grad_norm": 3.2386667728424072, "learning_rate": 9.48949281229452e-05, "loss": 1.5461, "step": 5156 }, { "epoch": 0.3074263917034211, "grad_norm": 3.2412734031677246, "learning_rate": 9.489076435579272e-05, "loss": 1.6496, "step": 5158 }, { "epoch": 0.3075455954225772, "grad_norm": 3.241642951965332, "learning_rate": 9.488659898274402e-05, "loss": 1.6411, "step": 5160 }, { "epoch": 0.3076647991417332, "grad_norm": 3.240355968475342, "learning_rate": 9.48824320039481e-05, "loss": 1.4545, "step": 5162 }, { "epoch": 0.30778400286088925, "grad_norm": 3.072890520095825, "learning_rate": 9.487826341955401e-05, "loss": 1.3803, "step": 5164 }, { "epoch": 0.3079032065800453, "grad_norm": 3.1125917434692383, "learning_rate": 9.48740932297109e-05, "loss": 1.4796, "step": 5166 }, { "epoch": 0.30802241029920135, "grad_norm": 3.261962890625, "learning_rate": 9.486992143456792e-05, "loss": 1.4693, "step": 5168 }, { "epoch": 0.3081416140183574, "grad_norm": 3.5255556106567383, "learning_rate": 9.486574803427435e-05, "loss": 1.4575, "step": 5170 }, { "epoch": 0.3082608177375134, "grad_norm": 3.278642177581787, "learning_rate": 9.486157302897945e-05, "loss": 1.4283, "step": 5172 }, { "epoch": 0.30838002145666943, "grad_norm": 3.3033392429351807, "learning_rate": 9.48573964188326e-05, "loss": 1.4794, "step": 5174 }, { "epoch": 0.3084992251758255, "grad_norm": 3.0110249519348145, "learning_rate": 9.485321820398321e-05, "loss": 1.5528, "step": 5176 }, { "epoch": 0.30861842889498153, "grad_norm": 3.4598441123962402, "learning_rate": 9.484903838458072e-05, "loss": 1.5735, "step": 5178 }, { "epoch": 0.30873763261413756, "grad_norm": 3.076693534851074, "learning_rate": 9.48448569607747e-05, "loss": 1.6036, "step": 5180 }, { "epoch": 0.3088568363332936, "grad_norm": 3.0669326782226562, "learning_rate": 9.48406739327147e-05, "loss": 1.5423, "step": 5182 }, { "epoch": 0.30897604005244966, "grad_norm": 2.998628854751587, "learning_rate": 9.483648930055038e-05, "loss": 1.473, "step": 5184 }, { "epoch": 0.3090952437716057, "grad_norm": 3.295362949371338, "learning_rate": 9.483230306443142e-05, "loss": 1.7846, "step": 5186 }, { "epoch": 0.3092144474907617, "grad_norm": 3.239880323410034, "learning_rate": 9.48281152245076e-05, "loss": 1.4886, "step": 5188 }, { "epoch": 0.30933365120991774, "grad_norm": 2.920661687850952, "learning_rate": 9.482392578092871e-05, "loss": 1.3797, "step": 5190 }, { "epoch": 0.30945285492907376, "grad_norm": 2.9148366451263428, "learning_rate": 9.481973473384464e-05, "loss": 1.3421, "step": 5192 }, { "epoch": 0.30957205864822984, "grad_norm": 3.0754013061523438, "learning_rate": 9.481554208340531e-05, "loss": 1.5017, "step": 5194 }, { "epoch": 0.30969126236738587, "grad_norm": 3.1309783458709717, "learning_rate": 9.481134782976069e-05, "loss": 1.4698, "step": 5196 }, { "epoch": 0.3098104660865419, "grad_norm": 3.1094624996185303, "learning_rate": 9.480715197306085e-05, "loss": 1.5317, "step": 5198 }, { "epoch": 0.3099296698056979, "grad_norm": 3.2720072269439697, "learning_rate": 9.480295451345587e-05, "loss": 1.5042, "step": 5200 }, { "epoch": 0.310048873524854, "grad_norm": 3.3551828861236572, "learning_rate": 9.479875545109592e-05, "loss": 1.4165, "step": 5202 }, { "epoch": 0.31016807724401, "grad_norm": 3.275683879852295, "learning_rate": 9.479455478613122e-05, "loss": 1.4478, "step": 5204 }, { "epoch": 0.31028728096316605, "grad_norm": 2.873471260070801, "learning_rate": 9.479035251871202e-05, "loss": 1.6234, "step": 5206 }, { "epoch": 0.31040648468232207, "grad_norm": 3.2551534175872803, "learning_rate": 9.478614864898868e-05, "loss": 1.5645, "step": 5208 }, { "epoch": 0.31052568840147815, "grad_norm": 3.0537922382354736, "learning_rate": 9.478194317711153e-05, "loss": 1.5429, "step": 5210 }, { "epoch": 0.3106448921206342, "grad_norm": 3.334773302078247, "learning_rate": 9.477773610323108e-05, "loss": 1.5648, "step": 5212 }, { "epoch": 0.3107640958397902, "grad_norm": 3.015465259552002, "learning_rate": 9.477352742749782e-05, "loss": 1.4811, "step": 5214 }, { "epoch": 0.3108832995589462, "grad_norm": 3.435375452041626, "learning_rate": 9.476931715006225e-05, "loss": 1.5047, "step": 5216 }, { "epoch": 0.31100250327810225, "grad_norm": 3.1061275005340576, "learning_rate": 9.476510527107507e-05, "loss": 1.4736, "step": 5218 }, { "epoch": 0.31112170699725833, "grad_norm": 2.981306314468384, "learning_rate": 9.476089179068689e-05, "loss": 1.5235, "step": 5220 }, { "epoch": 0.31124091071641435, "grad_norm": 3.30625319480896, "learning_rate": 9.475667670904847e-05, "loss": 1.5587, "step": 5222 }, { "epoch": 0.3113601144355704, "grad_norm": 3.096696376800537, "learning_rate": 9.475246002631059e-05, "loss": 1.5417, "step": 5224 }, { "epoch": 0.3114793181547264, "grad_norm": 3.118129014968872, "learning_rate": 9.474824174262408e-05, "loss": 1.5377, "step": 5226 }, { "epoch": 0.3115985218738825, "grad_norm": 3.250837564468384, "learning_rate": 9.474402185813989e-05, "loss": 1.4987, "step": 5228 }, { "epoch": 0.3117177255930385, "grad_norm": 3.391319751739502, "learning_rate": 9.473980037300892e-05, "loss": 1.5603, "step": 5230 }, { "epoch": 0.31183692931219453, "grad_norm": 3.064580202102661, "learning_rate": 9.473557728738222e-05, "loss": 1.5112, "step": 5232 }, { "epoch": 0.31195613303135056, "grad_norm": 3.224332571029663, "learning_rate": 9.473135260141089e-05, "loss": 1.607, "step": 5234 }, { "epoch": 0.31207533675050664, "grad_norm": 3.312861204147339, "learning_rate": 9.472712631524599e-05, "loss": 1.7242, "step": 5236 }, { "epoch": 0.31219454046966266, "grad_norm": 3.034890651702881, "learning_rate": 9.472289842903876e-05, "loss": 1.4282, "step": 5238 }, { "epoch": 0.3123137441888187, "grad_norm": 3.466587781906128, "learning_rate": 9.471866894294045e-05, "loss": 1.5056, "step": 5240 }, { "epoch": 0.3124329479079747, "grad_norm": 3.1213886737823486, "learning_rate": 9.471443785710234e-05, "loss": 1.5765, "step": 5242 }, { "epoch": 0.3125521516271308, "grad_norm": 3.31585955619812, "learning_rate": 9.47102051716758e-05, "loss": 1.4007, "step": 5244 }, { "epoch": 0.3126713553462868, "grad_norm": 3.3130738735198975, "learning_rate": 9.470597088681224e-05, "loss": 1.5295, "step": 5246 }, { "epoch": 0.31279055906544284, "grad_norm": 3.074357271194458, "learning_rate": 9.470173500266314e-05, "loss": 1.601, "step": 5248 }, { "epoch": 0.31290976278459887, "grad_norm": 3.1606972217559814, "learning_rate": 9.469749751938006e-05, "loss": 1.5575, "step": 5250 }, { "epoch": 0.3130289665037549, "grad_norm": 3.2692058086395264, "learning_rate": 9.469325843711455e-05, "loss": 1.5503, "step": 5252 }, { "epoch": 0.31314817022291097, "grad_norm": 3.338106632232666, "learning_rate": 9.468901775601825e-05, "loss": 1.6387, "step": 5254 }, { "epoch": 0.313267373942067, "grad_norm": 3.254753828048706, "learning_rate": 9.468477547624289e-05, "loss": 1.5328, "step": 5256 }, { "epoch": 0.313386577661223, "grad_norm": 3.2630817890167236, "learning_rate": 9.468053159794024e-05, "loss": 1.325, "step": 5258 }, { "epoch": 0.31350578138037904, "grad_norm": 3.3477590084075928, "learning_rate": 9.467628612126209e-05, "loss": 1.509, "step": 5260 }, { "epoch": 0.3136249850995351, "grad_norm": 2.905951976776123, "learning_rate": 9.467203904636033e-05, "loss": 1.3993, "step": 5262 }, { "epoch": 0.31374418881869115, "grad_norm": 3.1579818725585938, "learning_rate": 9.466779037338689e-05, "loss": 1.6218, "step": 5264 }, { "epoch": 0.3138633925378472, "grad_norm": 3.1596851348876953, "learning_rate": 9.466354010249375e-05, "loss": 1.5164, "step": 5266 }, { "epoch": 0.3139825962570032, "grad_norm": 3.3887672424316406, "learning_rate": 9.465928823383298e-05, "loss": 1.5242, "step": 5268 }, { "epoch": 0.3141017999761593, "grad_norm": 3.1852993965148926, "learning_rate": 9.465503476755667e-05, "loss": 1.4631, "step": 5270 }, { "epoch": 0.3142210036953153, "grad_norm": 3.555940866470337, "learning_rate": 9.465077970381697e-05, "loss": 1.546, "step": 5272 }, { "epoch": 0.31434020741447133, "grad_norm": 3.0646932125091553, "learning_rate": 9.464652304276611e-05, "loss": 1.4102, "step": 5274 }, { "epoch": 0.31445941113362735, "grad_norm": 3.194326639175415, "learning_rate": 9.464226478455636e-05, "loss": 1.583, "step": 5276 }, { "epoch": 0.31457861485278343, "grad_norm": 3.2426087856292725, "learning_rate": 9.463800492934007e-05, "loss": 1.5317, "step": 5278 }, { "epoch": 0.31469781857193946, "grad_norm": 3.08128023147583, "learning_rate": 9.46337434772696e-05, "loss": 1.418, "step": 5280 }, { "epoch": 0.3148170222910955, "grad_norm": 3.3504045009613037, "learning_rate": 9.462948042849745e-05, "loss": 1.4571, "step": 5282 }, { "epoch": 0.3149362260102515, "grad_norm": 3.410836935043335, "learning_rate": 9.462521578317605e-05, "loss": 1.6344, "step": 5284 }, { "epoch": 0.31505542972940753, "grad_norm": 3.0470077991485596, "learning_rate": 9.4620949541458e-05, "loss": 1.6175, "step": 5286 }, { "epoch": 0.3151746334485636, "grad_norm": 3.9891278743743896, "learning_rate": 9.461668170349593e-05, "loss": 1.4868, "step": 5288 }, { "epoch": 0.31529383716771964, "grad_norm": 3.1833577156066895, "learning_rate": 9.46124122694425e-05, "loss": 1.4884, "step": 5290 }, { "epoch": 0.31541304088687566, "grad_norm": 3.044454336166382, "learning_rate": 9.460814123945043e-05, "loss": 1.5145, "step": 5292 }, { "epoch": 0.3155322446060317, "grad_norm": 2.958728551864624, "learning_rate": 9.460386861367254e-05, "loss": 1.4821, "step": 5294 }, { "epoch": 0.31565144832518777, "grad_norm": 3.1206088066101074, "learning_rate": 9.459959439226165e-05, "loss": 1.4749, "step": 5296 }, { "epoch": 0.3157706520443438, "grad_norm": 2.980412483215332, "learning_rate": 9.459531857537067e-05, "loss": 1.7817, "step": 5298 }, { "epoch": 0.3158898557634998, "grad_norm": 2.996438980102539, "learning_rate": 9.459104116315257e-05, "loss": 1.3006, "step": 5300 }, { "epoch": 0.31600905948265584, "grad_norm": 3.422783136367798, "learning_rate": 9.458676215576037e-05, "loss": 1.5411, "step": 5302 }, { "epoch": 0.3161282632018119, "grad_norm": 2.9994401931762695, "learning_rate": 9.458248155334711e-05, "loss": 1.4795, "step": 5304 }, { "epoch": 0.31624746692096795, "grad_norm": 2.974851608276367, "learning_rate": 9.457819935606597e-05, "loss": 1.5679, "step": 5306 }, { "epoch": 0.31636667064012397, "grad_norm": 3.2047829627990723, "learning_rate": 9.45739155640701e-05, "loss": 1.4131, "step": 5308 }, { "epoch": 0.31648587435928, "grad_norm": 3.606198787689209, "learning_rate": 9.456963017751277e-05, "loss": 1.4251, "step": 5310 }, { "epoch": 0.316605078078436, "grad_norm": 3.13852596282959, "learning_rate": 9.456534319654729e-05, "loss": 1.4952, "step": 5312 }, { "epoch": 0.3167242817975921, "grad_norm": 3.1790690422058105, "learning_rate": 9.456105462132698e-05, "loss": 1.5149, "step": 5314 }, { "epoch": 0.3168434855167481, "grad_norm": 3.1193008422851562, "learning_rate": 9.45567644520053e-05, "loss": 1.4727, "step": 5316 }, { "epoch": 0.31696268923590415, "grad_norm": 3.1925172805786133, "learning_rate": 9.455247268873569e-05, "loss": 1.4807, "step": 5318 }, { "epoch": 0.3170818929550602, "grad_norm": 2.8565146923065186, "learning_rate": 9.454817933167171e-05, "loss": 1.6542, "step": 5320 }, { "epoch": 0.31720109667421625, "grad_norm": 3.520625352859497, "learning_rate": 9.454388438096693e-05, "loss": 1.7116, "step": 5322 }, { "epoch": 0.3173203003933723, "grad_norm": 2.764164447784424, "learning_rate": 9.4539587836775e-05, "loss": 1.6357, "step": 5324 }, { "epoch": 0.3174395041125283, "grad_norm": 2.8812289237976074, "learning_rate": 9.453528969924963e-05, "loss": 1.339, "step": 5326 }, { "epoch": 0.31755870783168433, "grad_norm": 3.536576271057129, "learning_rate": 9.453098996854456e-05, "loss": 1.6576, "step": 5328 }, { "epoch": 0.3176779115508404, "grad_norm": 3.3525562286376953, "learning_rate": 9.452668864481363e-05, "loss": 1.4661, "step": 5330 }, { "epoch": 0.31779711526999643, "grad_norm": 3.198549747467041, "learning_rate": 9.452238572821068e-05, "loss": 1.4175, "step": 5332 }, { "epoch": 0.31791631898915246, "grad_norm": 3.279179096221924, "learning_rate": 9.451808121888969e-05, "loss": 1.5467, "step": 5334 }, { "epoch": 0.3180355227083085, "grad_norm": 3.1892569065093994, "learning_rate": 9.45137751170046e-05, "loss": 1.3565, "step": 5336 }, { "epoch": 0.31815472642746456, "grad_norm": 3.3972697257995605, "learning_rate": 9.450946742270948e-05, "loss": 1.574, "step": 5338 }, { "epoch": 0.3182739301466206, "grad_norm": 3.135770320892334, "learning_rate": 9.450515813615841e-05, "loss": 1.7242, "step": 5340 }, { "epoch": 0.3183931338657766, "grad_norm": 3.115259885787964, "learning_rate": 9.450084725750556e-05, "loss": 1.5718, "step": 5342 }, { "epoch": 0.31851233758493264, "grad_norm": 3.114417791366577, "learning_rate": 9.449653478690516e-05, "loss": 1.4389, "step": 5344 }, { "epoch": 0.31863154130408866, "grad_norm": 3.032707691192627, "learning_rate": 9.449222072451147e-05, "loss": 1.6165, "step": 5346 }, { "epoch": 0.31875074502324474, "grad_norm": 3.136658191680908, "learning_rate": 9.44879050704788e-05, "loss": 1.5398, "step": 5348 }, { "epoch": 0.31886994874240077, "grad_norm": 3.060586452484131, "learning_rate": 9.448358782496157e-05, "loss": 1.6844, "step": 5350 }, { "epoch": 0.3189891524615568, "grad_norm": 3.373425245285034, "learning_rate": 9.447926898811419e-05, "loss": 1.7413, "step": 5352 }, { "epoch": 0.3191083561807128, "grad_norm": 2.919524669647217, "learning_rate": 9.447494856009118e-05, "loss": 1.4199, "step": 5354 }, { "epoch": 0.3192275598998689, "grad_norm": 2.949401378631592, "learning_rate": 9.447062654104708e-05, "loss": 1.4763, "step": 5356 }, { "epoch": 0.3193467636190249, "grad_norm": 3.515465497970581, "learning_rate": 9.446630293113651e-05, "loss": 1.5227, "step": 5358 }, { "epoch": 0.31946596733818095, "grad_norm": 3.4183716773986816, "learning_rate": 9.446197773051418e-05, "loss": 1.5871, "step": 5360 }, { "epoch": 0.31958517105733697, "grad_norm": 3.37990403175354, "learning_rate": 9.445765093933475e-05, "loss": 1.6384, "step": 5362 }, { "epoch": 0.31970437477649305, "grad_norm": 3.2364790439605713, "learning_rate": 9.445332255775305e-05, "loss": 1.5001, "step": 5364 }, { "epoch": 0.3198235784956491, "grad_norm": 3.1732561588287354, "learning_rate": 9.44489925859239e-05, "loss": 1.5827, "step": 5366 }, { "epoch": 0.3199427822148051, "grad_norm": 3.0753748416900635, "learning_rate": 9.44446610240022e-05, "loss": 1.5612, "step": 5368 }, { "epoch": 0.3200619859339611, "grad_norm": 3.105729579925537, "learning_rate": 9.444032787214293e-05, "loss": 1.3962, "step": 5370 }, { "epoch": 0.3201811896531172, "grad_norm": 3.175410747528076, "learning_rate": 9.443599313050105e-05, "loss": 1.4811, "step": 5372 }, { "epoch": 0.32030039337227323, "grad_norm": 3.3437540531158447, "learning_rate": 9.443165679923168e-05, "loss": 1.4717, "step": 5374 }, { "epoch": 0.32041959709142925, "grad_norm": 2.9960649013519287, "learning_rate": 9.442731887848992e-05, "loss": 1.5331, "step": 5376 }, { "epoch": 0.3205388008105853, "grad_norm": 3.1699979305267334, "learning_rate": 9.442297936843097e-05, "loss": 1.5428, "step": 5378 }, { "epoch": 0.3206580045297413, "grad_norm": 3.2008984088897705, "learning_rate": 9.441863826921003e-05, "loss": 1.6108, "step": 5380 }, { "epoch": 0.3207772082488974, "grad_norm": 2.744929790496826, "learning_rate": 9.441429558098245e-05, "loss": 1.5703, "step": 5382 }, { "epoch": 0.3208964119680534, "grad_norm": 3.014240264892578, "learning_rate": 9.440995130390353e-05, "loss": 1.5439, "step": 5384 }, { "epoch": 0.32101561568720943, "grad_norm": 3.136612892150879, "learning_rate": 9.440560543812872e-05, "loss": 1.5301, "step": 5386 }, { "epoch": 0.32113481940636546, "grad_norm": 2.6495492458343506, "learning_rate": 9.440125798381348e-05, "loss": 1.4079, "step": 5388 }, { "epoch": 0.32125402312552154, "grad_norm": 3.1101293563842773, "learning_rate": 9.43969089411133e-05, "loss": 1.3515, "step": 5390 }, { "epoch": 0.32137322684467756, "grad_norm": 3.0192153453826904, "learning_rate": 9.43925583101838e-05, "loss": 1.509, "step": 5392 }, { "epoch": 0.3214924305638336, "grad_norm": 3.0872304439544678, "learning_rate": 9.43882060911806e-05, "loss": 1.4639, "step": 5394 }, { "epoch": 0.3216116342829896, "grad_norm": 3.345735788345337, "learning_rate": 9.438385228425938e-05, "loss": 1.6851, "step": 5396 }, { "epoch": 0.3217308380021457, "grad_norm": 3.1631217002868652, "learning_rate": 9.437949688957594e-05, "loss": 1.3737, "step": 5398 }, { "epoch": 0.3218500417213017, "grad_norm": 3.6982462406158447, "learning_rate": 9.437513990728604e-05, "loss": 1.6186, "step": 5400 }, { "epoch": 0.32196924544045774, "grad_norm": 3.1487514972686768, "learning_rate": 9.437078133754553e-05, "loss": 1.5124, "step": 5402 }, { "epoch": 0.32208844915961377, "grad_norm": 3.1659128665924072, "learning_rate": 9.436642118051039e-05, "loss": 1.5385, "step": 5404 }, { "epoch": 0.3222076528787698, "grad_norm": 3.1411967277526855, "learning_rate": 9.436205943633656e-05, "loss": 1.4788, "step": 5406 }, { "epoch": 0.32232685659792587, "grad_norm": 3.412505865097046, "learning_rate": 9.435769610518008e-05, "loss": 1.4052, "step": 5408 }, { "epoch": 0.3224460603170819, "grad_norm": 2.6842501163482666, "learning_rate": 9.435333118719703e-05, "loss": 1.2874, "step": 5410 }, { "epoch": 0.3225652640362379, "grad_norm": 3.0977745056152344, "learning_rate": 9.434896468254359e-05, "loss": 1.5639, "step": 5412 }, { "epoch": 0.32268446775539394, "grad_norm": 2.840681552886963, "learning_rate": 9.434459659137593e-05, "loss": 1.3786, "step": 5414 }, { "epoch": 0.32280367147455, "grad_norm": 3.226121425628662, "learning_rate": 9.434022691385033e-05, "loss": 1.6247, "step": 5416 }, { "epoch": 0.32292287519370605, "grad_norm": 3.077732801437378, "learning_rate": 9.433585565012311e-05, "loss": 1.4465, "step": 5418 }, { "epoch": 0.3230420789128621, "grad_norm": 3.3843815326690674, "learning_rate": 9.433148280035063e-05, "loss": 1.4989, "step": 5420 }, { "epoch": 0.3231612826320181, "grad_norm": 2.9853625297546387, "learning_rate": 9.432710836468934e-05, "loss": 1.5705, "step": 5422 }, { "epoch": 0.3232804863511742, "grad_norm": 3.290572166442871, "learning_rate": 9.432273234329572e-05, "loss": 1.5225, "step": 5424 }, { "epoch": 0.3233996900703302, "grad_norm": 3.224750518798828, "learning_rate": 9.43183547363263e-05, "loss": 1.394, "step": 5426 }, { "epoch": 0.32351889378948623, "grad_norm": 3.274851083755493, "learning_rate": 9.431397554393772e-05, "loss": 1.5915, "step": 5428 }, { "epoch": 0.32363809750864225, "grad_norm": 2.6562790870666504, "learning_rate": 9.430959476628662e-05, "loss": 1.4655, "step": 5430 }, { "epoch": 0.32375730122779833, "grad_norm": 2.8568203449249268, "learning_rate": 9.43052124035297e-05, "loss": 1.5173, "step": 5432 }, { "epoch": 0.32387650494695436, "grad_norm": 2.875783681869507, "learning_rate": 9.430082845582373e-05, "loss": 1.4458, "step": 5434 }, { "epoch": 0.3239957086661104, "grad_norm": 2.8465757369995117, "learning_rate": 9.429644292332557e-05, "loss": 1.4578, "step": 5436 }, { "epoch": 0.3241149123852664, "grad_norm": 3.165846824645996, "learning_rate": 9.429205580619208e-05, "loss": 1.4418, "step": 5438 }, { "epoch": 0.32423411610442243, "grad_norm": 2.9025473594665527, "learning_rate": 9.428766710458021e-05, "loss": 1.5717, "step": 5440 }, { "epoch": 0.3243533198235785, "grad_norm": 3.4069621562957764, "learning_rate": 9.428327681864698e-05, "loss": 1.6474, "step": 5442 }, { "epoch": 0.32447252354273454, "grad_norm": 3.198854684829712, "learning_rate": 9.42788849485494e-05, "loss": 1.4776, "step": 5444 }, { "epoch": 0.32459172726189056, "grad_norm": 3.0663795471191406, "learning_rate": 9.42744914944446e-05, "loss": 1.5136, "step": 5446 }, { "epoch": 0.3247109309810466, "grad_norm": 3.3482210636138916, "learning_rate": 9.427009645648977e-05, "loss": 1.5856, "step": 5448 }, { "epoch": 0.32483013470020267, "grad_norm": 3.441655397415161, "learning_rate": 9.426569983484213e-05, "loss": 1.5319, "step": 5450 }, { "epoch": 0.3249493384193587, "grad_norm": 3.1847269535064697, "learning_rate": 9.426130162965894e-05, "loss": 1.6839, "step": 5452 }, { "epoch": 0.3250685421385147, "grad_norm": 3.131527900695801, "learning_rate": 9.425690184109756e-05, "loss": 1.3583, "step": 5454 }, { "epoch": 0.32518774585767074, "grad_norm": 3.3733253479003906, "learning_rate": 9.425250046931537e-05, "loss": 1.6146, "step": 5456 }, { "epoch": 0.3253069495768268, "grad_norm": 2.8163363933563232, "learning_rate": 9.424809751446983e-05, "loss": 1.456, "step": 5458 }, { "epoch": 0.32542615329598285, "grad_norm": 3.3794074058532715, "learning_rate": 9.424369297671847e-05, "loss": 1.5499, "step": 5460 }, { "epoch": 0.32554535701513887, "grad_norm": 3.2149453163146973, "learning_rate": 9.423928685621881e-05, "loss": 1.4924, "step": 5462 }, { "epoch": 0.3256645607342949, "grad_norm": 3.529895782470703, "learning_rate": 9.423487915312849e-05, "loss": 1.4869, "step": 5464 }, { "epoch": 0.325783764453451, "grad_norm": 3.2763988971710205, "learning_rate": 9.423046986760522e-05, "loss": 1.5376, "step": 5466 }, { "epoch": 0.325902968172607, "grad_norm": 2.797898292541504, "learning_rate": 9.42260589998067e-05, "loss": 1.4035, "step": 5468 }, { "epoch": 0.326022171891763, "grad_norm": 3.1715359687805176, "learning_rate": 9.422164654989072e-05, "loss": 1.6418, "step": 5470 }, { "epoch": 0.32614137561091905, "grad_norm": 3.3821675777435303, "learning_rate": 9.421723251801515e-05, "loss": 1.6226, "step": 5472 }, { "epoch": 0.3262605793300751, "grad_norm": 3.0405545234680176, "learning_rate": 9.421281690433789e-05, "loss": 1.5439, "step": 5474 }, { "epoch": 0.32637978304923115, "grad_norm": 3.288055896759033, "learning_rate": 9.420839970901689e-05, "loss": 1.5043, "step": 5476 }, { "epoch": 0.3264989867683872, "grad_norm": 3.225848436355591, "learning_rate": 9.420398093221017e-05, "loss": 1.597, "step": 5478 }, { "epoch": 0.3266181904875432, "grad_norm": 2.84224796295166, "learning_rate": 9.419956057407582e-05, "loss": 1.4314, "step": 5480 }, { "epoch": 0.3267373942066992, "grad_norm": 3.0750844478607178, "learning_rate": 9.419513863477194e-05, "loss": 1.4587, "step": 5482 }, { "epoch": 0.3268565979258553, "grad_norm": 3.2350621223449707, "learning_rate": 9.419071511445678e-05, "loss": 1.5737, "step": 5484 }, { "epoch": 0.32697580164501133, "grad_norm": 3.3960154056549072, "learning_rate": 9.418629001328851e-05, "loss": 1.5135, "step": 5486 }, { "epoch": 0.32709500536416736, "grad_norm": 3.219897508621216, "learning_rate": 9.418186333142547e-05, "loss": 1.4957, "step": 5488 }, { "epoch": 0.3272142090833234, "grad_norm": 3.3213117122650146, "learning_rate": 9.417743506902602e-05, "loss": 1.5632, "step": 5490 }, { "epoch": 0.32733341280247946, "grad_norm": 3.400284767150879, "learning_rate": 9.417300522624857e-05, "loss": 1.6877, "step": 5492 }, { "epoch": 0.3274526165216355, "grad_norm": 3.4603493213653564, "learning_rate": 9.416857380325157e-05, "loss": 1.6403, "step": 5494 }, { "epoch": 0.3275718202407915, "grad_norm": 3.362291097640991, "learning_rate": 9.416414080019358e-05, "loss": 1.3958, "step": 5496 }, { "epoch": 0.32769102395994754, "grad_norm": 3.235795497894287, "learning_rate": 9.415970621723316e-05, "loss": 1.4386, "step": 5498 }, { "epoch": 0.32781022767910356, "grad_norm": 2.9571402072906494, "learning_rate": 9.415527005452896e-05, "loss": 1.485, "step": 5500 }, { "epoch": 0.32792943139825964, "grad_norm": 2.724745273590088, "learning_rate": 9.415083231223966e-05, "loss": 1.3294, "step": 5502 }, { "epoch": 0.32804863511741567, "grad_norm": 2.8712522983551025, "learning_rate": 9.414639299052405e-05, "loss": 1.4856, "step": 5504 }, { "epoch": 0.3281678388365717, "grad_norm": 3.2548975944519043, "learning_rate": 9.41419520895409e-05, "loss": 1.6389, "step": 5506 }, { "epoch": 0.3282870425557277, "grad_norm": 3.377567768096924, "learning_rate": 9.41375096094491e-05, "loss": 1.5356, "step": 5508 }, { "epoch": 0.3284062462748838, "grad_norm": 3.511382818222046, "learning_rate": 9.413306555040758e-05, "loss": 1.4453, "step": 5510 }, { "epoch": 0.3285254499940398, "grad_norm": 3.382082462310791, "learning_rate": 9.412861991257528e-05, "loss": 1.5072, "step": 5512 }, { "epoch": 0.32864465371319584, "grad_norm": 3.429266929626465, "learning_rate": 9.412417269611129e-05, "loss": 1.3683, "step": 5514 }, { "epoch": 0.32876385743235187, "grad_norm": 3.370415687561035, "learning_rate": 9.411972390117465e-05, "loss": 1.555, "step": 5516 }, { "epoch": 0.32888306115150795, "grad_norm": 2.9564478397369385, "learning_rate": 9.411527352792454e-05, "loss": 1.4679, "step": 5518 }, { "epoch": 0.329002264870664, "grad_norm": 3.16694974899292, "learning_rate": 9.411082157652016e-05, "loss": 1.521, "step": 5520 }, { "epoch": 0.32912146858982, "grad_norm": 2.982666015625, "learning_rate": 9.410636804712076e-05, "loss": 1.4547, "step": 5522 }, { "epoch": 0.329240672308976, "grad_norm": 3.192324638366699, "learning_rate": 9.410191293988569e-05, "loss": 1.3995, "step": 5524 }, { "epoch": 0.3293598760281321, "grad_norm": 2.875544548034668, "learning_rate": 9.409745625497428e-05, "loss": 1.3084, "step": 5526 }, { "epoch": 0.32947907974728813, "grad_norm": 3.2865371704101562, "learning_rate": 9.409299799254598e-05, "loss": 1.4438, "step": 5528 }, { "epoch": 0.32959828346644415, "grad_norm": 3.2521860599517822, "learning_rate": 9.408853815276028e-05, "loss": 1.3767, "step": 5530 }, { "epoch": 0.3297174871856002, "grad_norm": 3.0352232456207275, "learning_rate": 9.408407673577673e-05, "loss": 1.8005, "step": 5532 }, { "epoch": 0.3298366909047562, "grad_norm": 3.152726173400879, "learning_rate": 9.407961374175492e-05, "loss": 1.5396, "step": 5534 }, { "epoch": 0.3299558946239123, "grad_norm": 3.0342864990234375, "learning_rate": 9.407514917085451e-05, "loss": 1.339, "step": 5536 }, { "epoch": 0.3300750983430683, "grad_norm": 2.9380977153778076, "learning_rate": 9.40706830232352e-05, "loss": 1.3903, "step": 5538 }, { "epoch": 0.33019430206222433, "grad_norm": 3.0803229808807373, "learning_rate": 9.40662152990568e-05, "loss": 1.5099, "step": 5540 }, { "epoch": 0.33031350578138036, "grad_norm": 2.897786855697632, "learning_rate": 9.40617459984791e-05, "loss": 1.5496, "step": 5542 }, { "epoch": 0.33043270950053644, "grad_norm": 3.099252700805664, "learning_rate": 9.405727512166198e-05, "loss": 1.4835, "step": 5544 }, { "epoch": 0.33055191321969246, "grad_norm": 3.115448474884033, "learning_rate": 9.40528026687654e-05, "loss": 1.4264, "step": 5546 }, { "epoch": 0.3306711169388485, "grad_norm": 3.253068208694458, "learning_rate": 9.404832863994932e-05, "loss": 1.5725, "step": 5548 }, { "epoch": 0.3307903206580045, "grad_norm": 3.2846202850341797, "learning_rate": 9.404385303537384e-05, "loss": 1.5079, "step": 5550 }, { "epoch": 0.3309095243771606, "grad_norm": 3.271657705307007, "learning_rate": 9.403937585519901e-05, "loss": 1.5772, "step": 5552 }, { "epoch": 0.3310287280963166, "grad_norm": 2.998309850692749, "learning_rate": 9.403489709958506e-05, "loss": 1.4229, "step": 5554 }, { "epoch": 0.33114793181547264, "grad_norm": 2.8048863410949707, "learning_rate": 9.403041676869218e-05, "loss": 1.425, "step": 5556 }, { "epoch": 0.33126713553462867, "grad_norm": 3.3690171241760254, "learning_rate": 9.402593486268062e-05, "loss": 1.3653, "step": 5558 }, { "epoch": 0.33138633925378475, "grad_norm": 2.9970993995666504, "learning_rate": 9.402145138171075e-05, "loss": 1.4796, "step": 5560 }, { "epoch": 0.33150554297294077, "grad_norm": 3.1165852546691895, "learning_rate": 9.401696632594295e-05, "loss": 1.4819, "step": 5562 }, { "epoch": 0.3316247466920968, "grad_norm": 3.5961203575134277, "learning_rate": 9.401247969553765e-05, "loss": 1.4781, "step": 5564 }, { "epoch": 0.3317439504112528, "grad_norm": 3.2881789207458496, "learning_rate": 9.400799149065536e-05, "loss": 1.5613, "step": 5566 }, { "epoch": 0.33186315413040884, "grad_norm": 3.2445571422576904, "learning_rate": 9.400350171145667e-05, "loss": 1.5307, "step": 5568 }, { "epoch": 0.3319823578495649, "grad_norm": 3.0884029865264893, "learning_rate": 9.399901035810213e-05, "loss": 1.5995, "step": 5570 }, { "epoch": 0.33210156156872095, "grad_norm": 2.9127135276794434, "learning_rate": 9.399451743075247e-05, "loss": 1.4954, "step": 5572 }, { "epoch": 0.332220765287877, "grad_norm": 2.900336503982544, "learning_rate": 9.39900229295684e-05, "loss": 1.4336, "step": 5574 }, { "epoch": 0.332339969007033, "grad_norm": 3.2338435649871826, "learning_rate": 9.39855268547107e-05, "loss": 1.564, "step": 5576 }, { "epoch": 0.3324591727261891, "grad_norm": 3.272751808166504, "learning_rate": 9.39810292063402e-05, "loss": 1.4463, "step": 5578 }, { "epoch": 0.3325783764453451, "grad_norm": 4.192210674285889, "learning_rate": 9.39765299846178e-05, "loss": 1.4957, "step": 5580 }, { "epoch": 0.3326975801645011, "grad_norm": 2.9892280101776123, "learning_rate": 9.397202918970447e-05, "loss": 1.6211, "step": 5582 }, { "epoch": 0.33281678388365715, "grad_norm": 3.1626505851745605, "learning_rate": 9.39675268217612e-05, "loss": 1.6284, "step": 5584 }, { "epoch": 0.33293598760281323, "grad_norm": 3.0964889526367188, "learning_rate": 9.396302288094907e-05, "loss": 1.6406, "step": 5586 }, { "epoch": 0.33305519132196926, "grad_norm": 3.7029929161071777, "learning_rate": 9.39585173674292e-05, "loss": 1.6536, "step": 5588 }, { "epoch": 0.3331743950411253, "grad_norm": 3.1729846000671387, "learning_rate": 9.395401028136275e-05, "loss": 1.4869, "step": 5590 }, { "epoch": 0.3332935987602813, "grad_norm": 3.2696361541748047, "learning_rate": 9.394950162291097e-05, "loss": 1.5277, "step": 5592 }, { "epoch": 0.33341280247943733, "grad_norm": 3.3698062896728516, "learning_rate": 9.394499139223513e-05, "loss": 1.5563, "step": 5594 }, { "epoch": 0.3335320061985934, "grad_norm": 2.8826723098754883, "learning_rate": 9.394047958949661e-05, "loss": 1.5071, "step": 5596 }, { "epoch": 0.33365120991774944, "grad_norm": 3.4995219707489014, "learning_rate": 9.393596621485678e-05, "loss": 1.5174, "step": 5598 }, { "epoch": 0.33377041363690546, "grad_norm": 3.6092617511749268, "learning_rate": 9.393145126847713e-05, "loss": 1.6362, "step": 5600 }, { "epoch": 0.3338896173560615, "grad_norm": 3.2620387077331543, "learning_rate": 9.392693475051914e-05, "loss": 1.6332, "step": 5602 }, { "epoch": 0.33400882107521757, "grad_norm": 3.583016872406006, "learning_rate": 9.392241666114442e-05, "loss": 1.5727, "step": 5604 }, { "epoch": 0.3341280247943736, "grad_norm": 2.9379048347473145, "learning_rate": 9.391789700051457e-05, "loss": 1.5793, "step": 5606 }, { "epoch": 0.3342472285135296, "grad_norm": 2.904406785964966, "learning_rate": 9.391337576879127e-05, "loss": 1.4644, "step": 5608 }, { "epoch": 0.33436643223268564, "grad_norm": 3.540248155593872, "learning_rate": 9.390885296613628e-05, "loss": 1.6872, "step": 5610 }, { "epoch": 0.3344856359518417, "grad_norm": 3.031590461730957, "learning_rate": 9.390432859271139e-05, "loss": 1.6065, "step": 5612 }, { "epoch": 0.33460483967099774, "grad_norm": 3.4267730712890625, "learning_rate": 9.389980264867844e-05, "loss": 1.6337, "step": 5614 }, { "epoch": 0.33472404339015377, "grad_norm": 3.1751229763031006, "learning_rate": 9.389527513419934e-05, "loss": 1.597, "step": 5616 }, { "epoch": 0.3348432471093098, "grad_norm": 3.2876675128936768, "learning_rate": 9.389074604943608e-05, "loss": 1.6362, "step": 5618 }, { "epoch": 0.3349624508284659, "grad_norm": 3.346050977706909, "learning_rate": 9.388621539455065e-05, "loss": 1.4736, "step": 5620 }, { "epoch": 0.3350816545476219, "grad_norm": 3.3363969326019287, "learning_rate": 9.388168316970516e-05, "loss": 1.6104, "step": 5622 }, { "epoch": 0.3352008582667779, "grad_norm": 3.4528775215148926, "learning_rate": 9.387714937506171e-05, "loss": 1.4984, "step": 5624 }, { "epoch": 0.33532006198593395, "grad_norm": 3.3667571544647217, "learning_rate": 9.38726140107825e-05, "loss": 1.3719, "step": 5626 }, { "epoch": 0.33543926570509, "grad_norm": 2.7880256175994873, "learning_rate": 9.386807707702978e-05, "loss": 1.4234, "step": 5628 }, { "epoch": 0.33555846942424605, "grad_norm": 3.1239871978759766, "learning_rate": 9.386353857396585e-05, "loss": 1.6129, "step": 5630 }, { "epoch": 0.3356776731434021, "grad_norm": 3.0659101009368896, "learning_rate": 9.385899850175308e-05, "loss": 1.5452, "step": 5632 }, { "epoch": 0.3357968768625581, "grad_norm": 2.968599319458008, "learning_rate": 9.385445686055385e-05, "loss": 1.5201, "step": 5634 }, { "epoch": 0.3359160805817141, "grad_norm": 3.2863686084747314, "learning_rate": 9.384991365053067e-05, "loss": 1.5171, "step": 5636 }, { "epoch": 0.3360352843008702, "grad_norm": 3.459214210510254, "learning_rate": 9.384536887184604e-05, "loss": 1.4989, "step": 5638 }, { "epoch": 0.33615448802002623, "grad_norm": 3.341761350631714, "learning_rate": 9.384082252466255e-05, "loss": 1.7595, "step": 5640 }, { "epoch": 0.33627369173918226, "grad_norm": 2.909048080444336, "learning_rate": 9.383627460914284e-05, "loss": 1.4204, "step": 5642 }, { "epoch": 0.3363928954583383, "grad_norm": 3.0914182662963867, "learning_rate": 9.38317251254496e-05, "loss": 1.6297, "step": 5644 }, { "epoch": 0.33651209917749436, "grad_norm": 3.3147335052490234, "learning_rate": 9.382717407374559e-05, "loss": 1.4724, "step": 5646 }, { "epoch": 0.3366313028966504, "grad_norm": 3.1640405654907227, "learning_rate": 9.38226214541936e-05, "loss": 1.5496, "step": 5648 }, { "epoch": 0.3367505066158064, "grad_norm": 3.1098427772521973, "learning_rate": 9.381806726695652e-05, "loss": 1.5328, "step": 5650 }, { "epoch": 0.33686971033496244, "grad_norm": 3.1184186935424805, "learning_rate": 9.381351151219724e-05, "loss": 1.4634, "step": 5652 }, { "epoch": 0.3369889140541185, "grad_norm": 3.042060613632202, "learning_rate": 9.380895419007875e-05, "loss": 1.4875, "step": 5654 }, { "epoch": 0.33710811777327454, "grad_norm": 3.2522993087768555, "learning_rate": 9.380439530076409e-05, "loss": 1.5822, "step": 5656 }, { "epoch": 0.33722732149243057, "grad_norm": 3.128405809402466, "learning_rate": 9.37998348444163e-05, "loss": 1.4759, "step": 5658 }, { "epoch": 0.3373465252115866, "grad_norm": 3.061269760131836, "learning_rate": 9.379527282119858e-05, "loss": 1.6108, "step": 5660 }, { "epoch": 0.3374657289307426, "grad_norm": 3.112351894378662, "learning_rate": 9.379070923127411e-05, "loss": 1.5931, "step": 5662 }, { "epoch": 0.3375849326498987, "grad_norm": 3.0353541374206543, "learning_rate": 9.378614407480615e-05, "loss": 1.4627, "step": 5664 }, { "epoch": 0.3377041363690547, "grad_norm": 3.286867618560791, "learning_rate": 9.378157735195798e-05, "loss": 1.5637, "step": 5666 }, { "epoch": 0.33782334008821074, "grad_norm": 3.4573192596435547, "learning_rate": 9.377700906289302e-05, "loss": 1.5599, "step": 5668 }, { "epoch": 0.33794254380736677, "grad_norm": 3.138794183731079, "learning_rate": 9.377243920777464e-05, "loss": 1.3442, "step": 5670 }, { "epoch": 0.33806174752652285, "grad_norm": 3.1079821586608887, "learning_rate": 9.376786778676635e-05, "loss": 1.5218, "step": 5672 }, { "epoch": 0.3381809512456789, "grad_norm": 3.0475666522979736, "learning_rate": 9.376329480003168e-05, "loss": 1.4856, "step": 5674 }, { "epoch": 0.3383001549648349, "grad_norm": 3.3187241554260254, "learning_rate": 9.375872024773422e-05, "loss": 1.6241, "step": 5676 }, { "epoch": 0.3384193586839909, "grad_norm": 3.3537817001342773, "learning_rate": 9.375414413003763e-05, "loss": 1.5226, "step": 5678 }, { "epoch": 0.338538562403147, "grad_norm": 3.115356683731079, "learning_rate": 9.374956644710558e-05, "loss": 1.4746, "step": 5680 }, { "epoch": 0.33865776612230303, "grad_norm": 2.789433479309082, "learning_rate": 9.374498719910186e-05, "loss": 1.3747, "step": 5682 }, { "epoch": 0.33877696984145905, "grad_norm": 3.1724958419799805, "learning_rate": 9.374040638619028e-05, "loss": 1.3801, "step": 5684 }, { "epoch": 0.3388961735606151, "grad_norm": 2.8897452354431152, "learning_rate": 9.373582400853471e-05, "loss": 1.5152, "step": 5686 }, { "epoch": 0.3390153772797711, "grad_norm": 3.2717785835266113, "learning_rate": 9.373124006629908e-05, "loss": 1.5455, "step": 5688 }, { "epoch": 0.3391345809989272, "grad_norm": 3.66338849067688, "learning_rate": 9.372665455964736e-05, "loss": 1.5553, "step": 5690 }, { "epoch": 0.3392537847180832, "grad_norm": 2.832216739654541, "learning_rate": 9.372206748874358e-05, "loss": 1.4964, "step": 5692 }, { "epoch": 0.33937298843723923, "grad_norm": 3.055699586868286, "learning_rate": 9.371747885375187e-05, "loss": 1.4665, "step": 5694 }, { "epoch": 0.33949219215639526, "grad_norm": 2.9494082927703857, "learning_rate": 9.371288865483636e-05, "loss": 1.424, "step": 5696 }, { "epoch": 0.33961139587555134, "grad_norm": 2.9000465869903564, "learning_rate": 9.370829689216126e-05, "loss": 1.5386, "step": 5698 }, { "epoch": 0.33973059959470736, "grad_norm": 3.0261919498443604, "learning_rate": 9.370370356589084e-05, "loss": 1.6013, "step": 5700 }, { "epoch": 0.3398498033138634, "grad_norm": 3.264124870300293, "learning_rate": 9.369910867618941e-05, "loss": 1.4416, "step": 5702 }, { "epoch": 0.3399690070330194, "grad_norm": 3.3714168071746826, "learning_rate": 9.369451222322135e-05, "loss": 1.6025, "step": 5704 }, { "epoch": 0.3400882107521755, "grad_norm": 3.3367083072662354, "learning_rate": 9.368991420715108e-05, "loss": 1.7204, "step": 5706 }, { "epoch": 0.3402074144713315, "grad_norm": 3.0502917766571045, "learning_rate": 9.36853146281431e-05, "loss": 1.4414, "step": 5708 }, { "epoch": 0.34032661819048754, "grad_norm": 3.3812079429626465, "learning_rate": 9.368071348636195e-05, "loss": 1.4925, "step": 5710 }, { "epoch": 0.34044582190964356, "grad_norm": 2.967468738555908, "learning_rate": 9.367611078197222e-05, "loss": 1.4532, "step": 5712 }, { "epoch": 0.34056502562879964, "grad_norm": 3.2168562412261963, "learning_rate": 9.367150651513858e-05, "loss": 1.5377, "step": 5714 }, { "epoch": 0.34068422934795567, "grad_norm": 3.257300853729248, "learning_rate": 9.366690068602573e-05, "loss": 1.4336, "step": 5716 }, { "epoch": 0.3408034330671117, "grad_norm": 3.22257137298584, "learning_rate": 9.366229329479843e-05, "loss": 1.554, "step": 5718 }, { "epoch": 0.3409226367862677, "grad_norm": 3.204373359680176, "learning_rate": 9.365768434162153e-05, "loss": 1.5956, "step": 5720 }, { "epoch": 0.34104184050542374, "grad_norm": 3.382711887359619, "learning_rate": 9.365307382665987e-05, "loss": 1.5761, "step": 5722 }, { "epoch": 0.3411610442245798, "grad_norm": 3.0327000617980957, "learning_rate": 9.36484617500784e-05, "loss": 1.5266, "step": 5724 }, { "epoch": 0.34128024794373585, "grad_norm": 3.09185528755188, "learning_rate": 9.364384811204211e-05, "loss": 1.4876, "step": 5726 }, { "epoch": 0.3413994516628919, "grad_norm": 2.9343440532684326, "learning_rate": 9.363923291271605e-05, "loss": 1.4305, "step": 5728 }, { "epoch": 0.3415186553820479, "grad_norm": 3.0556325912475586, "learning_rate": 9.363461615226534e-05, "loss": 1.4146, "step": 5730 }, { "epoch": 0.341637859101204, "grad_norm": 3.059718370437622, "learning_rate": 9.362999783085509e-05, "loss": 1.5709, "step": 5732 }, { "epoch": 0.34175706282036, "grad_norm": 3.3725924491882324, "learning_rate": 9.362537794865054e-05, "loss": 1.5942, "step": 5734 }, { "epoch": 0.341876266539516, "grad_norm": 3.122262954711914, "learning_rate": 9.362075650581698e-05, "loss": 1.4499, "step": 5736 }, { "epoch": 0.34199547025867205, "grad_norm": 2.7787322998046875, "learning_rate": 9.36161335025197e-05, "loss": 1.4228, "step": 5738 }, { "epoch": 0.34211467397782813, "grad_norm": 2.8872199058532715, "learning_rate": 9.361150893892408e-05, "loss": 1.4699, "step": 5740 }, { "epoch": 0.34223387769698416, "grad_norm": 3.0835559368133545, "learning_rate": 9.360688281519558e-05, "loss": 1.6388, "step": 5742 }, { "epoch": 0.3423530814161402, "grad_norm": 3.269010305404663, "learning_rate": 9.36022551314997e-05, "loss": 1.4529, "step": 5744 }, { "epoch": 0.3424722851352962, "grad_norm": 3.1942946910858154, "learning_rate": 9.359762588800195e-05, "loss": 1.5062, "step": 5746 }, { "epoch": 0.3425914888544523, "grad_norm": 3.1036176681518555, "learning_rate": 9.359299508486796e-05, "loss": 1.5361, "step": 5748 }, { "epoch": 0.3427106925736083, "grad_norm": 3.387681722640991, "learning_rate": 9.358836272226338e-05, "loss": 1.474, "step": 5750 }, { "epoch": 0.34282989629276434, "grad_norm": 3.1211624145507812, "learning_rate": 9.358372880035393e-05, "loss": 1.4573, "step": 5752 }, { "epoch": 0.34294910001192036, "grad_norm": 3.043271541595459, "learning_rate": 9.357909331930539e-05, "loss": 1.4841, "step": 5754 }, { "epoch": 0.3430683037310764, "grad_norm": 3.071627140045166, "learning_rate": 9.357445627928356e-05, "loss": 1.4867, "step": 5756 }, { "epoch": 0.34318750745023247, "grad_norm": 3.095184564590454, "learning_rate": 9.356981768045436e-05, "loss": 1.4881, "step": 5758 }, { "epoch": 0.3433067111693885, "grad_norm": 2.9864301681518555, "learning_rate": 9.35651775229837e-05, "loss": 1.3404, "step": 5760 }, { "epoch": 0.3434259148885445, "grad_norm": 3.392692804336548, "learning_rate": 9.356053580703759e-05, "loss": 1.4866, "step": 5762 }, { "epoch": 0.34354511860770054, "grad_norm": 2.956660270690918, "learning_rate": 9.355589253278208e-05, "loss": 1.5002, "step": 5764 }, { "epoch": 0.3436643223268566, "grad_norm": 3.1117773056030273, "learning_rate": 9.355124770038323e-05, "loss": 1.5225, "step": 5766 }, { "epoch": 0.34378352604601264, "grad_norm": 3.0754358768463135, "learning_rate": 9.354660131000728e-05, "loss": 1.4988, "step": 5768 }, { "epoch": 0.34390272976516867, "grad_norm": 3.335470199584961, "learning_rate": 9.35419533618204e-05, "loss": 1.5875, "step": 5770 }, { "epoch": 0.3440219334843247, "grad_norm": 3.3377504348754883, "learning_rate": 9.353730385598887e-05, "loss": 1.5314, "step": 5772 }, { "epoch": 0.3441411372034808, "grad_norm": 3.2971200942993164, "learning_rate": 9.353265279267901e-05, "loss": 1.4433, "step": 5774 }, { "epoch": 0.3442603409226368, "grad_norm": 3.044184446334839, "learning_rate": 9.352800017205722e-05, "loss": 1.5221, "step": 5776 }, { "epoch": 0.3443795446417928, "grad_norm": 3.076190233230591, "learning_rate": 9.352334599428993e-05, "loss": 1.4421, "step": 5778 }, { "epoch": 0.34449874836094885, "grad_norm": 3.1743509769439697, "learning_rate": 9.351869025954365e-05, "loss": 1.4885, "step": 5780 }, { "epoch": 0.3446179520801049, "grad_norm": 3.229741096496582, "learning_rate": 9.351403296798492e-05, "loss": 1.5426, "step": 5782 }, { "epoch": 0.34473715579926095, "grad_norm": 3.236217975616455, "learning_rate": 9.350937411978035e-05, "loss": 1.4485, "step": 5784 }, { "epoch": 0.344856359518417, "grad_norm": 3.4891185760498047, "learning_rate": 9.350471371509659e-05, "loss": 1.4725, "step": 5786 }, { "epoch": 0.344975563237573, "grad_norm": 3.07450008392334, "learning_rate": 9.350005175410038e-05, "loss": 1.5731, "step": 5788 }, { "epoch": 0.345094766956729, "grad_norm": 2.9783895015716553, "learning_rate": 9.349538823695849e-05, "loss": 1.5847, "step": 5790 }, { "epoch": 0.3452139706758851, "grad_norm": 2.789235830307007, "learning_rate": 9.349072316383773e-05, "loss": 1.4261, "step": 5792 }, { "epoch": 0.34533317439504113, "grad_norm": 3.0190088748931885, "learning_rate": 9.348605653490502e-05, "loss": 1.4183, "step": 5794 }, { "epoch": 0.34545237811419716, "grad_norm": 3.2180559635162354, "learning_rate": 9.348138835032727e-05, "loss": 1.487, "step": 5796 }, { "epoch": 0.3455715818333532, "grad_norm": 3.1317453384399414, "learning_rate": 9.347671861027149e-05, "loss": 1.5495, "step": 5798 }, { "epoch": 0.34569078555250926, "grad_norm": 3.434493064880371, "learning_rate": 9.347204731490474e-05, "loss": 1.6965, "step": 5800 }, { "epoch": 0.3458099892716653, "grad_norm": 3.469968318939209, "learning_rate": 9.346737446439411e-05, "loss": 1.6668, "step": 5802 }, { "epoch": 0.3459291929908213, "grad_norm": 3.0140037536621094, "learning_rate": 9.346270005890677e-05, "loss": 1.5802, "step": 5804 }, { "epoch": 0.34604839670997734, "grad_norm": 3.2569212913513184, "learning_rate": 9.345802409860995e-05, "loss": 1.6442, "step": 5806 }, { "epoch": 0.3461676004291334, "grad_norm": 2.765805959701538, "learning_rate": 9.34533465836709e-05, "loss": 1.3804, "step": 5808 }, { "epoch": 0.34628680414828944, "grad_norm": 3.1060736179351807, "learning_rate": 9.3448667514257e-05, "loss": 1.5874, "step": 5810 }, { "epoch": 0.34640600786744546, "grad_norm": 3.3088977336883545, "learning_rate": 9.344398689053558e-05, "loss": 1.499, "step": 5812 }, { "epoch": 0.3465252115866015, "grad_norm": 3.3358423709869385, "learning_rate": 9.343930471267413e-05, "loss": 1.4184, "step": 5814 }, { "epoch": 0.3466444153057575, "grad_norm": 3.0133934020996094, "learning_rate": 9.34346209808401e-05, "loss": 1.4289, "step": 5816 }, { "epoch": 0.3467636190249136, "grad_norm": 3.3441648483276367, "learning_rate": 9.342993569520108e-05, "loss": 1.5567, "step": 5818 }, { "epoch": 0.3468828227440696, "grad_norm": 3.519843578338623, "learning_rate": 9.342524885592466e-05, "loss": 1.4984, "step": 5820 }, { "epoch": 0.34700202646322564, "grad_norm": 2.9533536434173584, "learning_rate": 9.342056046317851e-05, "loss": 1.5079, "step": 5822 }, { "epoch": 0.34712123018238167, "grad_norm": 3.3343992233276367, "learning_rate": 9.341587051713037e-05, "loss": 1.5694, "step": 5824 }, { "epoch": 0.34724043390153775, "grad_norm": 4.267447471618652, "learning_rate": 9.341117901794797e-05, "loss": 1.6392, "step": 5826 }, { "epoch": 0.3473596376206938, "grad_norm": 3.5092267990112305, "learning_rate": 9.340648596579917e-05, "loss": 1.5944, "step": 5828 }, { "epoch": 0.3474788413398498, "grad_norm": 3.021256446838379, "learning_rate": 9.340179136085187e-05, "loss": 1.3995, "step": 5830 }, { "epoch": 0.3475980450590058, "grad_norm": 3.197542428970337, "learning_rate": 9.339709520327398e-05, "loss": 1.4405, "step": 5832 }, { "epoch": 0.3477172487781619, "grad_norm": 3.081662893295288, "learning_rate": 9.339239749323352e-05, "loss": 1.3637, "step": 5834 }, { "epoch": 0.3478364524973179, "grad_norm": 3.2321200370788574, "learning_rate": 9.338769823089853e-05, "loss": 1.5103, "step": 5836 }, { "epoch": 0.34795565621647395, "grad_norm": 3.1399927139282227, "learning_rate": 9.338299741643714e-05, "loss": 1.4549, "step": 5838 }, { "epoch": 0.34807485993563, "grad_norm": 3.0812127590179443, "learning_rate": 9.33782950500175e-05, "loss": 1.5117, "step": 5840 }, { "epoch": 0.34819406365478606, "grad_norm": 2.8014841079711914, "learning_rate": 9.33735911318078e-05, "loss": 1.4124, "step": 5842 }, { "epoch": 0.3483132673739421, "grad_norm": 2.9531683921813965, "learning_rate": 9.336888566197637e-05, "loss": 1.35, "step": 5844 }, { "epoch": 0.3484324710930981, "grad_norm": 3.3335120677948, "learning_rate": 9.336417864069151e-05, "loss": 1.662, "step": 5846 }, { "epoch": 0.34855167481225413, "grad_norm": 3.211176633834839, "learning_rate": 9.33594700681216e-05, "loss": 1.486, "step": 5848 }, { "epoch": 0.34867087853141016, "grad_norm": 3.1936004161834717, "learning_rate": 9.33547599444351e-05, "loss": 1.3844, "step": 5850 }, { "epoch": 0.34879008225056624, "grad_norm": 3.686082363128662, "learning_rate": 9.33500482698005e-05, "loss": 1.6299, "step": 5852 }, { "epoch": 0.34890928596972226, "grad_norm": 2.9380667209625244, "learning_rate": 9.334533504438637e-05, "loss": 1.5909, "step": 5854 }, { "epoch": 0.3490284896888783, "grad_norm": 3.1083338260650635, "learning_rate": 9.334062026836127e-05, "loss": 1.5455, "step": 5856 }, { "epoch": 0.3491476934080343, "grad_norm": 3.0904393196105957, "learning_rate": 9.333590394189391e-05, "loss": 1.4724, "step": 5858 }, { "epoch": 0.3492668971271904, "grad_norm": 2.9789164066314697, "learning_rate": 9.3331186065153e-05, "loss": 1.3886, "step": 5860 }, { "epoch": 0.3493861008463464, "grad_norm": 2.937526226043701, "learning_rate": 9.332646663830731e-05, "loss": 1.4374, "step": 5862 }, { "epoch": 0.34950530456550244, "grad_norm": 3.2525525093078613, "learning_rate": 9.332174566152564e-05, "loss": 1.4511, "step": 5864 }, { "epoch": 0.34962450828465846, "grad_norm": 3.4215824604034424, "learning_rate": 9.331702313497693e-05, "loss": 1.5425, "step": 5866 }, { "epoch": 0.34974371200381454, "grad_norm": 3.1398847103118896, "learning_rate": 9.33122990588301e-05, "loss": 1.3982, "step": 5868 }, { "epoch": 0.34986291572297057, "grad_norm": 3.3147385120391846, "learning_rate": 9.330757343325412e-05, "loss": 1.3737, "step": 5870 }, { "epoch": 0.3499821194421266, "grad_norm": 3.136226177215576, "learning_rate": 9.330284625841807e-05, "loss": 1.3548, "step": 5872 }, { "epoch": 0.3501013231612826, "grad_norm": 2.731173038482666, "learning_rate": 9.329811753449106e-05, "loss": 1.5622, "step": 5874 }, { "epoch": 0.35022052688043864, "grad_norm": 3.3395633697509766, "learning_rate": 9.329338726164224e-05, "loss": 1.6752, "step": 5876 }, { "epoch": 0.3503397305995947, "grad_norm": 3.235158681869507, "learning_rate": 9.328865544004082e-05, "loss": 1.5043, "step": 5878 }, { "epoch": 0.35045893431875075, "grad_norm": 3.2159056663513184, "learning_rate": 9.32839220698561e-05, "loss": 1.547, "step": 5880 }, { "epoch": 0.3505781380379068, "grad_norm": 3.4171359539031982, "learning_rate": 9.327918715125738e-05, "loss": 1.5651, "step": 5882 }, { "epoch": 0.3506973417570628, "grad_norm": 3.324777364730835, "learning_rate": 9.327445068441407e-05, "loss": 1.532, "step": 5884 }, { "epoch": 0.3508165454762189, "grad_norm": 2.8953545093536377, "learning_rate": 9.326971266949558e-05, "loss": 1.4324, "step": 5886 }, { "epoch": 0.3509357491953749, "grad_norm": 2.830899238586426, "learning_rate": 9.326497310667143e-05, "loss": 1.4161, "step": 5888 }, { "epoch": 0.3510549529145309, "grad_norm": 3.1755356788635254, "learning_rate": 9.326023199611116e-05, "loss": 1.5219, "step": 5890 }, { "epoch": 0.35117415663368695, "grad_norm": 3.0728988647460938, "learning_rate": 9.325548933798439e-05, "loss": 1.414, "step": 5892 }, { "epoch": 0.35129336035284303, "grad_norm": 2.923626184463501, "learning_rate": 9.325074513246076e-05, "loss": 1.4115, "step": 5894 }, { "epoch": 0.35141256407199906, "grad_norm": 2.9444334506988525, "learning_rate": 9.324599937970999e-05, "loss": 1.3905, "step": 5896 }, { "epoch": 0.3515317677911551, "grad_norm": 3.1170175075531006, "learning_rate": 9.324125207990187e-05, "loss": 1.4957, "step": 5898 }, { "epoch": 0.3516509715103111, "grad_norm": 2.8409476280212402, "learning_rate": 9.32365032332062e-05, "loss": 1.4776, "step": 5900 }, { "epoch": 0.3517701752294672, "grad_norm": 3.145580768585205, "learning_rate": 9.323175283979291e-05, "loss": 1.6003, "step": 5902 }, { "epoch": 0.3518893789486232, "grad_norm": 2.905287981033325, "learning_rate": 9.322700089983188e-05, "loss": 1.4573, "step": 5904 }, { "epoch": 0.35200858266777924, "grad_norm": 3.5856943130493164, "learning_rate": 9.322224741349312e-05, "loss": 1.4662, "step": 5906 }, { "epoch": 0.35212778638693526, "grad_norm": 2.8971915245056152, "learning_rate": 9.32174923809467e-05, "loss": 1.4692, "step": 5908 }, { "epoch": 0.3522469901060913, "grad_norm": 3.2027604579925537, "learning_rate": 9.321273580236271e-05, "loss": 1.4633, "step": 5910 }, { "epoch": 0.35236619382524736, "grad_norm": 3.1008589267730713, "learning_rate": 9.320797767791129e-05, "loss": 1.407, "step": 5912 }, { "epoch": 0.3524853975444034, "grad_norm": 2.9496819972991943, "learning_rate": 9.32032180077627e-05, "loss": 1.3946, "step": 5914 }, { "epoch": 0.3526046012635594, "grad_norm": 3.10660719871521, "learning_rate": 9.319845679208717e-05, "loss": 1.4117, "step": 5916 }, { "epoch": 0.35272380498271544, "grad_norm": 3.186102867126465, "learning_rate": 9.319369403105504e-05, "loss": 1.5516, "step": 5918 }, { "epoch": 0.3528430087018715, "grad_norm": 3.175119400024414, "learning_rate": 9.31889297248367e-05, "loss": 1.4819, "step": 5920 }, { "epoch": 0.35296221242102754, "grad_norm": 3.5470752716064453, "learning_rate": 9.318416387360258e-05, "loss": 1.6774, "step": 5922 }, { "epoch": 0.35308141614018357, "grad_norm": 2.8929169178009033, "learning_rate": 9.317939647752315e-05, "loss": 1.4432, "step": 5924 }, { "epoch": 0.3532006198593396, "grad_norm": 3.1610827445983887, "learning_rate": 9.317462753676895e-05, "loss": 1.5756, "step": 5926 }, { "epoch": 0.3533198235784957, "grad_norm": 4.311499118804932, "learning_rate": 9.316985705151064e-05, "loss": 1.5883, "step": 5928 }, { "epoch": 0.3534390272976517, "grad_norm": 2.9280762672424316, "learning_rate": 9.316508502191883e-05, "loss": 1.3959, "step": 5930 }, { "epoch": 0.3535582310168077, "grad_norm": 2.7286524772644043, "learning_rate": 9.316031144816422e-05, "loss": 1.3661, "step": 5932 }, { "epoch": 0.35367743473596375, "grad_norm": 3.3812708854675293, "learning_rate": 9.315553633041763e-05, "loss": 1.7509, "step": 5934 }, { "epoch": 0.3537966384551198, "grad_norm": 3.0119099617004395, "learning_rate": 9.315075966884983e-05, "loss": 1.3736, "step": 5936 }, { "epoch": 0.35391584217427585, "grad_norm": 3.1911239624023438, "learning_rate": 9.314598146363171e-05, "loss": 1.3944, "step": 5938 }, { "epoch": 0.3540350458934319, "grad_norm": 3.475054979324341, "learning_rate": 9.314120171493423e-05, "loss": 1.3968, "step": 5940 }, { "epoch": 0.3541542496125879, "grad_norm": 3.348637342453003, "learning_rate": 9.313642042292836e-05, "loss": 1.5044, "step": 5942 }, { "epoch": 0.3542734533317439, "grad_norm": 3.169386863708496, "learning_rate": 9.313163758778513e-05, "loss": 1.4989, "step": 5944 }, { "epoch": 0.3543926570509, "grad_norm": 3.109949827194214, "learning_rate": 9.312685320967564e-05, "loss": 1.6518, "step": 5946 }, { "epoch": 0.35451186077005603, "grad_norm": 3.047818660736084, "learning_rate": 9.312206728877108e-05, "loss": 1.6401, "step": 5948 }, { "epoch": 0.35463106448921206, "grad_norm": 3.2772419452667236, "learning_rate": 9.31172798252426e-05, "loss": 1.5586, "step": 5950 }, { "epoch": 0.3547502682083681, "grad_norm": 2.988046884536743, "learning_rate": 9.311249081926153e-05, "loss": 1.5151, "step": 5952 }, { "epoch": 0.35486947192752416, "grad_norm": 2.9522924423217773, "learning_rate": 9.310770027099915e-05, "loss": 1.402, "step": 5954 }, { "epoch": 0.3549886756466802, "grad_norm": 3.1190741062164307, "learning_rate": 9.310290818062683e-05, "loss": 1.4951, "step": 5956 }, { "epoch": 0.3551078793658362, "grad_norm": 3.247727155685425, "learning_rate": 9.3098114548316e-05, "loss": 1.505, "step": 5958 }, { "epoch": 0.35522708308499223, "grad_norm": 3.0795226097106934, "learning_rate": 9.309331937423818e-05, "loss": 1.372, "step": 5960 }, { "epoch": 0.3553462868041483, "grad_norm": 3.2633609771728516, "learning_rate": 9.308852265856485e-05, "loss": 1.7517, "step": 5962 }, { "epoch": 0.35546549052330434, "grad_norm": 3.097119092941284, "learning_rate": 9.308372440146766e-05, "loss": 1.5776, "step": 5964 }, { "epoch": 0.35558469424246036, "grad_norm": 3.3188211917877197, "learning_rate": 9.307892460311825e-05, "loss": 1.5122, "step": 5966 }, { "epoch": 0.3557038979616164, "grad_norm": 3.107797145843506, "learning_rate": 9.307412326368829e-05, "loss": 1.5936, "step": 5968 }, { "epoch": 0.3558231016807724, "grad_norm": 3.520655632019043, "learning_rate": 9.306932038334957e-05, "loss": 1.4495, "step": 5970 }, { "epoch": 0.3559423053999285, "grad_norm": 3.2746310234069824, "learning_rate": 9.30645159622739e-05, "loss": 1.5317, "step": 5972 }, { "epoch": 0.3560615091190845, "grad_norm": 3.2789645195007324, "learning_rate": 9.305971000063316e-05, "loss": 1.5308, "step": 5974 }, { "epoch": 0.35618071283824054, "grad_norm": 3.1964426040649414, "learning_rate": 9.305490249859926e-05, "loss": 1.3816, "step": 5976 }, { "epoch": 0.35629991655739657, "grad_norm": 3.1577367782592773, "learning_rate": 9.305009345634418e-05, "loss": 1.5245, "step": 5978 }, { "epoch": 0.35641912027655265, "grad_norm": 3.2653112411499023, "learning_rate": 9.304528287403999e-05, "loss": 1.6122, "step": 5980 }, { "epoch": 0.3565383239957087, "grad_norm": 3.542057752609253, "learning_rate": 9.304047075185874e-05, "loss": 1.5788, "step": 5982 }, { "epoch": 0.3566575277148647, "grad_norm": 3.234515905380249, "learning_rate": 9.303565708997257e-05, "loss": 1.5578, "step": 5984 }, { "epoch": 0.3567767314340207, "grad_norm": 3.165949583053589, "learning_rate": 9.303084188855371e-05, "loss": 1.4436, "step": 5986 }, { "epoch": 0.3568959351531768, "grad_norm": 3.0290377140045166, "learning_rate": 9.30260251477744e-05, "loss": 1.5294, "step": 5988 }, { "epoch": 0.3570151388723328, "grad_norm": 3.0273730754852295, "learning_rate": 9.302120686780699e-05, "loss": 1.5005, "step": 5990 }, { "epoch": 0.35713434259148885, "grad_norm": 3.257049798965454, "learning_rate": 9.30163870488238e-05, "loss": 1.5035, "step": 5992 }, { "epoch": 0.3572535463106449, "grad_norm": 3.041304111480713, "learning_rate": 9.301156569099725e-05, "loss": 1.3924, "step": 5994 }, { "epoch": 0.35737275002980096, "grad_norm": 3.3584389686584473, "learning_rate": 9.300674279449986e-05, "loss": 1.535, "step": 5996 }, { "epoch": 0.357491953748957, "grad_norm": 3.6249637603759766, "learning_rate": 9.300191835950411e-05, "loss": 1.625, "step": 5998 }, { "epoch": 0.357611157468113, "grad_norm": 3.1015610694885254, "learning_rate": 9.299709238618261e-05, "loss": 1.587, "step": 6000 }, { "epoch": 0.35773036118726903, "grad_norm": 3.294438362121582, "learning_rate": 9.299226487470803e-05, "loss": 1.439, "step": 6002 }, { "epoch": 0.35784956490642505, "grad_norm": 3.0272626876831055, "learning_rate": 9.298743582525301e-05, "loss": 1.503, "step": 6004 }, { "epoch": 0.35796876862558114, "grad_norm": 3.1909289360046387, "learning_rate": 9.298260523799034e-05, "loss": 1.4111, "step": 6006 }, { "epoch": 0.35808797234473716, "grad_norm": 3.0531978607177734, "learning_rate": 9.297777311309282e-05, "loss": 1.523, "step": 6008 }, { "epoch": 0.3582071760638932, "grad_norm": 2.925508975982666, "learning_rate": 9.297293945073331e-05, "loss": 1.5064, "step": 6010 }, { "epoch": 0.3583263797830492, "grad_norm": 3.014747381210327, "learning_rate": 9.296810425108472e-05, "loss": 1.5409, "step": 6012 }, { "epoch": 0.3584455835022053, "grad_norm": 3.2104523181915283, "learning_rate": 9.296326751432003e-05, "loss": 1.4051, "step": 6014 }, { "epoch": 0.3585647872213613, "grad_norm": 3.146009683609009, "learning_rate": 9.295842924061227e-05, "loss": 1.5369, "step": 6016 }, { "epoch": 0.35868399094051734, "grad_norm": 3.000568389892578, "learning_rate": 9.29535894301345e-05, "loss": 1.5081, "step": 6018 }, { "epoch": 0.35880319465967336, "grad_norm": 2.991842269897461, "learning_rate": 9.294874808305989e-05, "loss": 1.3445, "step": 6020 }, { "epoch": 0.35892239837882944, "grad_norm": 3.371014356613159, "learning_rate": 9.29439051995616e-05, "loss": 1.5609, "step": 6022 }, { "epoch": 0.35904160209798547, "grad_norm": 2.6795706748962402, "learning_rate": 9.293906077981289e-05, "loss": 1.3182, "step": 6024 }, { "epoch": 0.3591608058171415, "grad_norm": 3.2078773975372314, "learning_rate": 9.293421482398708e-05, "loss": 1.439, "step": 6026 }, { "epoch": 0.3592800095362975, "grad_norm": 3.312256336212158, "learning_rate": 9.292936733225748e-05, "loss": 1.5129, "step": 6028 }, { "epoch": 0.3593992132554536, "grad_norm": 3.3440616130828857, "learning_rate": 9.292451830479755e-05, "loss": 1.4669, "step": 6030 }, { "epoch": 0.3595184169746096, "grad_norm": 3.129924774169922, "learning_rate": 9.291966774178073e-05, "loss": 1.307, "step": 6032 }, { "epoch": 0.35963762069376565, "grad_norm": 3.0695765018463135, "learning_rate": 9.291481564338054e-05, "loss": 1.6398, "step": 6034 }, { "epoch": 0.35975682441292167, "grad_norm": 3.0499987602233887, "learning_rate": 9.290996200977058e-05, "loss": 1.427, "step": 6036 }, { "epoch": 0.3598760281320777, "grad_norm": 3.2691543102264404, "learning_rate": 9.290510684112444e-05, "loss": 1.4607, "step": 6038 }, { "epoch": 0.3599952318512338, "grad_norm": 2.928147792816162, "learning_rate": 9.290025013761586e-05, "loss": 1.4392, "step": 6040 }, { "epoch": 0.3601144355703898, "grad_norm": 3.366096019744873, "learning_rate": 9.289539189941851e-05, "loss": 1.4798, "step": 6042 }, { "epoch": 0.3602336392895458, "grad_norm": 3.2610082626342773, "learning_rate": 9.289053212670626e-05, "loss": 1.5155, "step": 6044 }, { "epoch": 0.36035284300870185, "grad_norm": 2.9495015144348145, "learning_rate": 9.288567081965292e-05, "loss": 1.5629, "step": 6046 }, { "epoch": 0.36047204672785793, "grad_norm": 3.0830230712890625, "learning_rate": 9.28808079784324e-05, "loss": 1.5016, "step": 6048 }, { "epoch": 0.36059125044701396, "grad_norm": 3.0691845417022705, "learning_rate": 9.287594360321867e-05, "loss": 1.5048, "step": 6050 }, { "epoch": 0.36071045416617, "grad_norm": 3.1242787837982178, "learning_rate": 9.287107769418573e-05, "loss": 1.4903, "step": 6052 }, { "epoch": 0.360829657885326, "grad_norm": 3.5716187953948975, "learning_rate": 9.286621025150765e-05, "loss": 1.5274, "step": 6054 }, { "epoch": 0.3609488616044821, "grad_norm": 3.17706561088562, "learning_rate": 9.286134127535859e-05, "loss": 1.5802, "step": 6056 }, { "epoch": 0.3610680653236381, "grad_norm": 3.2291481494903564, "learning_rate": 9.285647076591268e-05, "loss": 1.5316, "step": 6058 }, { "epoch": 0.36118726904279413, "grad_norm": 2.9111225605010986, "learning_rate": 9.28515987233442e-05, "loss": 1.3282, "step": 6060 }, { "epoch": 0.36130647276195016, "grad_norm": 3.022707462310791, "learning_rate": 9.28467251478274e-05, "loss": 1.4113, "step": 6062 }, { "epoch": 0.3614256764811062, "grad_norm": 3.232487916946411, "learning_rate": 9.284185003953666e-05, "loss": 1.542, "step": 6064 }, { "epoch": 0.36154488020026226, "grad_norm": 3.132124900817871, "learning_rate": 9.283697339864636e-05, "loss": 1.5844, "step": 6066 }, { "epoch": 0.3616640839194183, "grad_norm": 2.9355273246765137, "learning_rate": 9.283209522533094e-05, "loss": 1.3565, "step": 6068 }, { "epoch": 0.3617832876385743, "grad_norm": 3.043612003326416, "learning_rate": 9.282721551976494e-05, "loss": 1.4422, "step": 6070 }, { "epoch": 0.36190249135773034, "grad_norm": 3.1862106323242188, "learning_rate": 9.28223342821229e-05, "loss": 1.4647, "step": 6072 }, { "epoch": 0.3620216950768864, "grad_norm": 2.7617745399475098, "learning_rate": 9.281745151257946e-05, "loss": 1.5012, "step": 6074 }, { "epoch": 0.36214089879604244, "grad_norm": 3.0721359252929688, "learning_rate": 9.281256721130927e-05, "loss": 1.2938, "step": 6076 }, { "epoch": 0.36226010251519847, "grad_norm": 3.1452391147613525, "learning_rate": 9.280768137848707e-05, "loss": 1.4905, "step": 6078 }, { "epoch": 0.3623793062343545, "grad_norm": 3.4379539489746094, "learning_rate": 9.280279401428764e-05, "loss": 1.6458, "step": 6080 }, { "epoch": 0.3624985099535106, "grad_norm": 3.3948302268981934, "learning_rate": 9.279790511888582e-05, "loss": 1.6469, "step": 6082 }, { "epoch": 0.3626177136726666, "grad_norm": 2.9604904651641846, "learning_rate": 9.279301469245651e-05, "loss": 1.3778, "step": 6084 }, { "epoch": 0.3627369173918226, "grad_norm": 3.0723989009857178, "learning_rate": 9.278812273517465e-05, "loss": 1.5692, "step": 6086 }, { "epoch": 0.36285612111097865, "grad_norm": 3.0733115673065186, "learning_rate": 9.278322924721523e-05, "loss": 1.4354, "step": 6088 }, { "epoch": 0.3629753248301347, "grad_norm": 2.909907579421997, "learning_rate": 9.277833422875334e-05, "loss": 1.5049, "step": 6090 }, { "epoch": 0.36309452854929075, "grad_norm": 3.412144899368286, "learning_rate": 9.277343767996405e-05, "loss": 1.4558, "step": 6092 }, { "epoch": 0.3632137322684468, "grad_norm": 3.3650200366973877, "learning_rate": 9.276853960102256e-05, "loss": 1.4937, "step": 6094 }, { "epoch": 0.3633329359876028, "grad_norm": 3.113588809967041, "learning_rate": 9.276363999210406e-05, "loss": 1.5318, "step": 6096 }, { "epoch": 0.3634521397067588, "grad_norm": 3.2146413326263428, "learning_rate": 9.275873885338387e-05, "loss": 1.5597, "step": 6098 }, { "epoch": 0.3635713434259149, "grad_norm": 2.9477367401123047, "learning_rate": 9.275383618503727e-05, "loss": 1.5022, "step": 6100 }, { "epoch": 0.36369054714507093, "grad_norm": 3.073209285736084, "learning_rate": 9.274893198723969e-05, "loss": 1.4307, "step": 6102 }, { "epoch": 0.36380975086422696, "grad_norm": 3.27542781829834, "learning_rate": 9.274402626016652e-05, "loss": 1.3864, "step": 6104 }, { "epoch": 0.363928954583383, "grad_norm": 3.0889830589294434, "learning_rate": 9.27391190039933e-05, "loss": 1.4555, "step": 6106 }, { "epoch": 0.36404815830253906, "grad_norm": 2.742691993713379, "learning_rate": 9.273421021889556e-05, "loss": 1.4617, "step": 6108 }, { "epoch": 0.3641673620216951, "grad_norm": 3.2338812351226807, "learning_rate": 9.27292999050489e-05, "loss": 1.6399, "step": 6110 }, { "epoch": 0.3642865657408511, "grad_norm": 3.6022751331329346, "learning_rate": 9.2724388062629e-05, "loss": 1.4702, "step": 6112 }, { "epoch": 0.36440576946000713, "grad_norm": 3.2386465072631836, "learning_rate": 9.271947469181157e-05, "loss": 1.6524, "step": 6114 }, { "epoch": 0.3645249731791632, "grad_norm": 3.0240085124969482, "learning_rate": 9.271455979277235e-05, "loss": 1.4048, "step": 6116 }, { "epoch": 0.36464417689831924, "grad_norm": 3.4117791652679443, "learning_rate": 9.270964336568716e-05, "loss": 1.5208, "step": 6118 }, { "epoch": 0.36476338061747526, "grad_norm": 2.942746639251709, "learning_rate": 9.270472541073194e-05, "loss": 1.5016, "step": 6120 }, { "epoch": 0.3648825843366313, "grad_norm": 3.090555191040039, "learning_rate": 9.269980592808255e-05, "loss": 1.648, "step": 6122 }, { "epoch": 0.3650017880557873, "grad_norm": 3.1056230068206787, "learning_rate": 9.269488491791502e-05, "loss": 1.512, "step": 6124 }, { "epoch": 0.3651209917749434, "grad_norm": 2.837160348892212, "learning_rate": 9.268996238040537e-05, "loss": 1.3897, "step": 6126 }, { "epoch": 0.3652401954940994, "grad_norm": 2.9225003719329834, "learning_rate": 9.26850383157297e-05, "loss": 1.5047, "step": 6128 }, { "epoch": 0.36535939921325544, "grad_norm": 3.437091112136841, "learning_rate": 9.268011272406417e-05, "loss": 1.4564, "step": 6130 }, { "epoch": 0.36547860293241147, "grad_norm": 3.465796709060669, "learning_rate": 9.267518560558498e-05, "loss": 1.4945, "step": 6132 }, { "epoch": 0.36559780665156755, "grad_norm": 3.080397844314575, "learning_rate": 9.267025696046838e-05, "loss": 1.4479, "step": 6134 }, { "epoch": 0.36571701037072357, "grad_norm": 2.9689786434173584, "learning_rate": 9.26653267888907e-05, "loss": 1.4225, "step": 6136 }, { "epoch": 0.3658362140898796, "grad_norm": 2.8770289421081543, "learning_rate": 9.26603950910283e-05, "loss": 1.405, "step": 6138 }, { "epoch": 0.3659554178090356, "grad_norm": 2.9634766578674316, "learning_rate": 9.265546186705762e-05, "loss": 1.4466, "step": 6140 }, { "epoch": 0.3660746215281917, "grad_norm": 3.3533358573913574, "learning_rate": 9.265052711715512e-05, "loss": 1.5158, "step": 6142 }, { "epoch": 0.3661938252473477, "grad_norm": 2.9860024452209473, "learning_rate": 9.264559084149733e-05, "loss": 1.5174, "step": 6144 }, { "epoch": 0.36631302896650375, "grad_norm": 3.1114389896392822, "learning_rate": 9.264065304026086e-05, "loss": 1.4259, "step": 6146 }, { "epoch": 0.3664322326856598, "grad_norm": 2.780012369155884, "learning_rate": 9.263571371362232e-05, "loss": 1.5062, "step": 6148 }, { "epoch": 0.36655143640481586, "grad_norm": 3.054941415786743, "learning_rate": 9.263077286175844e-05, "loss": 1.4224, "step": 6150 }, { "epoch": 0.3666706401239719, "grad_norm": 3.0565807819366455, "learning_rate": 9.262583048484595e-05, "loss": 1.4881, "step": 6152 }, { "epoch": 0.3667898438431279, "grad_norm": 2.7421517372131348, "learning_rate": 9.262088658306165e-05, "loss": 1.3053, "step": 6154 }, { "epoch": 0.36690904756228393, "grad_norm": 3.3723037242889404, "learning_rate": 9.26159411565824e-05, "loss": 1.4805, "step": 6156 }, { "epoch": 0.36702825128143995, "grad_norm": 3.6252634525299072, "learning_rate": 9.261099420558515e-05, "loss": 1.5704, "step": 6158 }, { "epoch": 0.36714745500059603, "grad_norm": 3.1330339908599854, "learning_rate": 9.260604573024685e-05, "loss": 1.6013, "step": 6160 }, { "epoch": 0.36726665871975206, "grad_norm": 2.784301996231079, "learning_rate": 9.26010957307445e-05, "loss": 1.4699, "step": 6162 }, { "epoch": 0.3673858624389081, "grad_norm": 2.9286181926727295, "learning_rate": 9.25961442072552e-05, "loss": 1.3825, "step": 6164 }, { "epoch": 0.3675050661580641, "grad_norm": 3.386676549911499, "learning_rate": 9.259119115995609e-05, "loss": 1.5757, "step": 6166 }, { "epoch": 0.3676242698772202, "grad_norm": 2.860389471054077, "learning_rate": 9.258623658902434e-05, "loss": 1.3357, "step": 6168 }, { "epoch": 0.3677434735963762, "grad_norm": 2.9507806301116943, "learning_rate": 9.258128049463721e-05, "loss": 1.6047, "step": 6170 }, { "epoch": 0.36786267731553224, "grad_norm": 3.1106667518615723, "learning_rate": 9.257632287697197e-05, "loss": 1.4619, "step": 6172 }, { "epoch": 0.36798188103468826, "grad_norm": 3.101996421813965, "learning_rate": 9.257136373620601e-05, "loss": 1.4297, "step": 6174 }, { "epoch": 0.36810108475384434, "grad_norm": 3.2386417388916016, "learning_rate": 9.256640307251671e-05, "loss": 1.5738, "step": 6176 }, { "epoch": 0.36822028847300037, "grad_norm": 3.1920413970947266, "learning_rate": 9.256144088608152e-05, "loss": 1.5315, "step": 6178 }, { "epoch": 0.3683394921921564, "grad_norm": 3.132539987564087, "learning_rate": 9.255647717707796e-05, "loss": 1.6184, "step": 6180 }, { "epoch": 0.3684586959113124, "grad_norm": 2.679598569869995, "learning_rate": 9.255151194568363e-05, "loss": 1.407, "step": 6182 }, { "epoch": 0.3685778996304685, "grad_norm": 3.1814048290252686, "learning_rate": 9.25465451920761e-05, "loss": 1.5864, "step": 6184 }, { "epoch": 0.3686971033496245, "grad_norm": 2.975311040878296, "learning_rate": 9.25415769164331e-05, "loss": 1.5176, "step": 6186 }, { "epoch": 0.36881630706878055, "grad_norm": 3.2800252437591553, "learning_rate": 9.253660711893233e-05, "loss": 1.6346, "step": 6188 }, { "epoch": 0.36893551078793657, "grad_norm": 3.227907419204712, "learning_rate": 9.253163579975159e-05, "loss": 1.3833, "step": 6190 }, { "epoch": 0.3690547145070926, "grad_norm": 3.212534189224243, "learning_rate": 9.252666295906872e-05, "loss": 1.5171, "step": 6192 }, { "epoch": 0.3691739182262487, "grad_norm": 2.7388908863067627, "learning_rate": 9.25216885970616e-05, "loss": 1.4713, "step": 6194 }, { "epoch": 0.3692931219454047, "grad_norm": 2.995266914367676, "learning_rate": 9.25167127139082e-05, "loss": 1.5534, "step": 6196 }, { "epoch": 0.3694123256645607, "grad_norm": 2.568828582763672, "learning_rate": 9.251173530978651e-05, "loss": 1.6121, "step": 6198 }, { "epoch": 0.36953152938371675, "grad_norm": 3.345082998275757, "learning_rate": 9.250675638487461e-05, "loss": 1.4974, "step": 6200 }, { "epoch": 0.36965073310287283, "grad_norm": 3.0315628051757812, "learning_rate": 9.250177593935059e-05, "loss": 1.2953, "step": 6202 }, { "epoch": 0.36976993682202886, "grad_norm": 2.8051490783691406, "learning_rate": 9.249679397339264e-05, "loss": 1.4291, "step": 6204 }, { "epoch": 0.3698891405411849, "grad_norm": 3.692209482192993, "learning_rate": 9.249181048717895e-05, "loss": 1.4517, "step": 6206 }, { "epoch": 0.3700083442603409, "grad_norm": 3.3065154552459717, "learning_rate": 9.248682548088782e-05, "loss": 1.5463, "step": 6208 }, { "epoch": 0.370127547979497, "grad_norm": 2.9311773777008057, "learning_rate": 9.248183895469758e-05, "loss": 1.4765, "step": 6210 }, { "epoch": 0.370246751698653, "grad_norm": 3.0818891525268555, "learning_rate": 9.24768509087866e-05, "loss": 1.5258, "step": 6212 }, { "epoch": 0.37036595541780903, "grad_norm": 2.948866128921509, "learning_rate": 9.247186134333334e-05, "loss": 1.5326, "step": 6214 }, { "epoch": 0.37048515913696506, "grad_norm": 3.1422019004821777, "learning_rate": 9.246687025851628e-05, "loss": 1.5411, "step": 6216 }, { "epoch": 0.3706043628561211, "grad_norm": 3.228635311126709, "learning_rate": 9.246187765451397e-05, "loss": 1.4527, "step": 6218 }, { "epoch": 0.37072356657527716, "grad_norm": 2.8665809631347656, "learning_rate": 9.245688353150502e-05, "loss": 1.3088, "step": 6220 }, { "epoch": 0.3708427702944332, "grad_norm": 2.746981382369995, "learning_rate": 9.245188788966808e-05, "loss": 1.3508, "step": 6222 }, { "epoch": 0.3709619740135892, "grad_norm": 3.1950604915618896, "learning_rate": 9.244689072918186e-05, "loss": 1.4778, "step": 6224 }, { "epoch": 0.37108117773274524, "grad_norm": 3.0930747985839844, "learning_rate": 9.244189205022513e-05, "loss": 1.5677, "step": 6226 }, { "epoch": 0.3712003814519013, "grad_norm": 3.3744096755981445, "learning_rate": 9.24368918529767e-05, "loss": 1.491, "step": 6228 }, { "epoch": 0.37131958517105734, "grad_norm": 3.440490484237671, "learning_rate": 9.243189013761547e-05, "loss": 1.5813, "step": 6230 }, { "epoch": 0.37143878889021337, "grad_norm": 3.1518969535827637, "learning_rate": 9.242688690432034e-05, "loss": 1.2901, "step": 6232 }, { "epoch": 0.3715579926093694, "grad_norm": 2.9591002464294434, "learning_rate": 9.24218821532703e-05, "loss": 1.4276, "step": 6234 }, { "epoch": 0.37167719632852547, "grad_norm": 3.025712251663208, "learning_rate": 9.24168758846444e-05, "loss": 1.3822, "step": 6236 }, { "epoch": 0.3717964000476815, "grad_norm": 3.300996780395508, "learning_rate": 9.241186809862171e-05, "loss": 1.6513, "step": 6238 }, { "epoch": 0.3719156037668375, "grad_norm": 2.8518264293670654, "learning_rate": 9.24068587953814e-05, "loss": 1.4192, "step": 6240 }, { "epoch": 0.37203480748599355, "grad_norm": 2.7644286155700684, "learning_rate": 9.240184797510265e-05, "loss": 1.5027, "step": 6242 }, { "epoch": 0.3721540112051496, "grad_norm": 3.1240155696868896, "learning_rate": 9.239683563796472e-05, "loss": 1.5289, "step": 6244 }, { "epoch": 0.37227321492430565, "grad_norm": 3.3047425746917725, "learning_rate": 9.239182178414692e-05, "loss": 1.5443, "step": 6246 }, { "epoch": 0.3723924186434617, "grad_norm": 3.3741161823272705, "learning_rate": 9.238680641382862e-05, "loss": 1.6011, "step": 6248 }, { "epoch": 0.3725116223626177, "grad_norm": 2.811885118484497, "learning_rate": 9.238178952718923e-05, "loss": 1.3487, "step": 6250 }, { "epoch": 0.3726308260817737, "grad_norm": 3.0632736682891846, "learning_rate": 9.237677112440821e-05, "loss": 1.5495, "step": 6252 }, { "epoch": 0.3727500298009298, "grad_norm": 3.2519421577453613, "learning_rate": 9.23717512056651e-05, "loss": 1.5308, "step": 6254 }, { "epoch": 0.37286923352008583, "grad_norm": 3.3187716007232666, "learning_rate": 9.236672977113948e-05, "loss": 1.4682, "step": 6256 }, { "epoch": 0.37298843723924185, "grad_norm": 3.199998617172241, "learning_rate": 9.236170682101097e-05, "loss": 1.5488, "step": 6258 }, { "epoch": 0.3731076409583979, "grad_norm": 3.4027369022369385, "learning_rate": 9.23566823554593e-05, "loss": 1.7149, "step": 6260 }, { "epoch": 0.37322684467755396, "grad_norm": 3.061403512954712, "learning_rate": 9.235165637466414e-05, "loss": 1.5565, "step": 6262 }, { "epoch": 0.37334604839671, "grad_norm": 3.009673595428467, "learning_rate": 9.234662887880534e-05, "loss": 1.4854, "step": 6264 }, { "epoch": 0.373465252115866, "grad_norm": 3.115523099899292, "learning_rate": 9.234159986806275e-05, "loss": 1.5242, "step": 6266 }, { "epoch": 0.37358445583502203, "grad_norm": 3.1952552795410156, "learning_rate": 9.233656934261626e-05, "loss": 1.5409, "step": 6268 }, { "epoch": 0.3737036595541781, "grad_norm": 3.251732110977173, "learning_rate": 9.233153730264581e-05, "loss": 1.4695, "step": 6270 }, { "epoch": 0.37382286327333414, "grad_norm": 3.1389875411987305, "learning_rate": 9.232650374833146e-05, "loss": 1.7476, "step": 6272 }, { "epoch": 0.37394206699249016, "grad_norm": 3.098037004470825, "learning_rate": 9.232146867985323e-05, "loss": 1.5182, "step": 6274 }, { "epoch": 0.3740612707116462, "grad_norm": 2.919199228286743, "learning_rate": 9.231643209739128e-05, "loss": 1.5033, "step": 6276 }, { "epoch": 0.37418047443080227, "grad_norm": 3.0533957481384277, "learning_rate": 9.231139400112576e-05, "loss": 1.4949, "step": 6278 }, { "epoch": 0.3742996781499583, "grad_norm": 3.0644586086273193, "learning_rate": 9.230635439123692e-05, "loss": 1.4426, "step": 6280 }, { "epoch": 0.3744188818691143, "grad_norm": 2.991454601287842, "learning_rate": 9.230131326790503e-05, "loss": 1.3973, "step": 6282 }, { "epoch": 0.37453808558827034, "grad_norm": 2.7206151485443115, "learning_rate": 9.229627063131044e-05, "loss": 1.463, "step": 6284 }, { "epoch": 0.37465728930742637, "grad_norm": 3.1498875617980957, "learning_rate": 9.229122648163351e-05, "loss": 1.5823, "step": 6286 }, { "epoch": 0.37477649302658245, "grad_norm": 3.1586005687713623, "learning_rate": 9.228618081905473e-05, "loss": 1.569, "step": 6288 }, { "epoch": 0.37489569674573847, "grad_norm": 3.362332582473755, "learning_rate": 9.228113364375458e-05, "loss": 1.548, "step": 6290 }, { "epoch": 0.3750149004648945, "grad_norm": 3.196453094482422, "learning_rate": 9.227608495591361e-05, "loss": 1.4932, "step": 6292 }, { "epoch": 0.3751341041840505, "grad_norm": 3.2448363304138184, "learning_rate": 9.227103475571244e-05, "loss": 1.348, "step": 6294 }, { "epoch": 0.3752533079032066, "grad_norm": 2.6840925216674805, "learning_rate": 9.226598304333174e-05, "loss": 1.4316, "step": 6296 }, { "epoch": 0.3753725116223626, "grad_norm": 3.345693826675415, "learning_rate": 9.22609298189522e-05, "loss": 1.4974, "step": 6298 }, { "epoch": 0.37549171534151865, "grad_norm": 3.0339083671569824, "learning_rate": 9.22558750827546e-05, "loss": 1.4603, "step": 6300 }, { "epoch": 0.3756109190606747, "grad_norm": 3.2486374378204346, "learning_rate": 9.225081883491977e-05, "loss": 1.5992, "step": 6302 }, { "epoch": 0.37573012277983076, "grad_norm": 2.965641736984253, "learning_rate": 9.224576107562861e-05, "loss": 1.4367, "step": 6304 }, { "epoch": 0.3758493264989868, "grad_norm": 3.392976999282837, "learning_rate": 9.224070180506202e-05, "loss": 1.7119, "step": 6306 }, { "epoch": 0.3759685302181428, "grad_norm": 3.084547519683838, "learning_rate": 9.223564102340101e-05, "loss": 1.4265, "step": 6308 }, { "epoch": 0.37608773393729883, "grad_norm": 3.0885682106018066, "learning_rate": 9.223057873082661e-05, "loss": 1.4213, "step": 6310 }, { "epoch": 0.37620693765645485, "grad_norm": 3.002877950668335, "learning_rate": 9.222551492751991e-05, "loss": 1.449, "step": 6312 }, { "epoch": 0.37632614137561093, "grad_norm": 3.109955310821533, "learning_rate": 9.222044961366208e-05, "loss": 1.3411, "step": 6314 }, { "epoch": 0.37644534509476696, "grad_norm": 3.1500368118286133, "learning_rate": 9.221538278943432e-05, "loss": 1.4034, "step": 6316 }, { "epoch": 0.376564548813923, "grad_norm": 3.0718233585357666, "learning_rate": 9.221031445501785e-05, "loss": 1.3598, "step": 6318 }, { "epoch": 0.376683752533079, "grad_norm": 3.4397647380828857, "learning_rate": 9.220524461059403e-05, "loss": 1.5363, "step": 6320 }, { "epoch": 0.3768029562522351, "grad_norm": 3.2371723651885986, "learning_rate": 9.22001732563442e-05, "loss": 1.6362, "step": 6322 }, { "epoch": 0.3769221599713911, "grad_norm": 2.9681406021118164, "learning_rate": 9.21951003924498e-05, "loss": 1.5972, "step": 6324 }, { "epoch": 0.37704136369054714, "grad_norm": 3.1830899715423584, "learning_rate": 9.219002601909227e-05, "loss": 1.7399, "step": 6326 }, { "epoch": 0.37716056740970316, "grad_norm": 2.930710792541504, "learning_rate": 9.218495013645317e-05, "loss": 1.5364, "step": 6328 }, { "epoch": 0.37727977112885924, "grad_norm": 3.051072359085083, "learning_rate": 9.217987274471406e-05, "loss": 1.3714, "step": 6330 }, { "epoch": 0.37739897484801527, "grad_norm": 3.6787736415863037, "learning_rate": 9.21747938440566e-05, "loss": 1.5788, "step": 6332 }, { "epoch": 0.3775181785671713, "grad_norm": 3.382267713546753, "learning_rate": 9.216971343466245e-05, "loss": 1.4981, "step": 6334 }, { "epoch": 0.3776373822863273, "grad_norm": 2.932062864303589, "learning_rate": 9.216463151671337e-05, "loss": 1.5791, "step": 6336 }, { "epoch": 0.3777565860054834, "grad_norm": 2.983574151992798, "learning_rate": 9.215954809039118e-05, "loss": 1.493, "step": 6338 }, { "epoch": 0.3778757897246394, "grad_norm": 2.8859851360321045, "learning_rate": 9.215446315587768e-05, "loss": 1.3612, "step": 6340 }, { "epoch": 0.37799499344379545, "grad_norm": 3.114478349685669, "learning_rate": 9.214937671335482e-05, "loss": 1.3973, "step": 6342 }, { "epoch": 0.37811419716295147, "grad_norm": 3.0081825256347656, "learning_rate": 9.214428876300453e-05, "loss": 1.5833, "step": 6344 }, { "epoch": 0.3782334008821075, "grad_norm": 3.44064998626709, "learning_rate": 9.213919930500882e-05, "loss": 1.5584, "step": 6346 }, { "epoch": 0.3783526046012636, "grad_norm": 2.981821060180664, "learning_rate": 9.213410833954981e-05, "loss": 1.5351, "step": 6348 }, { "epoch": 0.3784718083204196, "grad_norm": 2.8425729274749756, "learning_rate": 9.212901586680956e-05, "loss": 1.4435, "step": 6350 }, { "epoch": 0.3785910120395756, "grad_norm": 2.9961955547332764, "learning_rate": 9.212392188697027e-05, "loss": 1.5342, "step": 6352 }, { "epoch": 0.37871021575873165, "grad_norm": 3.1610031127929688, "learning_rate": 9.211882640021418e-05, "loss": 1.4277, "step": 6354 }, { "epoch": 0.37882941947788773, "grad_norm": 3.2417209148406982, "learning_rate": 9.211372940672356e-05, "loss": 1.4787, "step": 6356 }, { "epoch": 0.37894862319704375, "grad_norm": 3.116273880004883, "learning_rate": 9.210863090668076e-05, "loss": 1.3415, "step": 6358 }, { "epoch": 0.3790678269161998, "grad_norm": 3.092738628387451, "learning_rate": 9.210353090026813e-05, "loss": 1.4335, "step": 6360 }, { "epoch": 0.3791870306353558, "grad_norm": 3.179461717605591, "learning_rate": 9.209842938766816e-05, "loss": 1.5038, "step": 6362 }, { "epoch": 0.3793062343545119, "grad_norm": 2.8163981437683105, "learning_rate": 9.209332636906334e-05, "loss": 1.3477, "step": 6364 }, { "epoch": 0.3794254380736679, "grad_norm": 3.3586559295654297, "learning_rate": 9.20882218446362e-05, "loss": 1.525, "step": 6366 }, { "epoch": 0.37954464179282393, "grad_norm": 3.1753652095794678, "learning_rate": 9.208311581456937e-05, "loss": 1.4878, "step": 6368 }, { "epoch": 0.37966384551197996, "grad_norm": 3.2698724269866943, "learning_rate": 9.20780082790455e-05, "loss": 1.5738, "step": 6370 }, { "epoch": 0.37978304923113604, "grad_norm": 2.985924243927002, "learning_rate": 9.20728992382473e-05, "loss": 1.6161, "step": 6372 }, { "epoch": 0.37990225295029206, "grad_norm": 3.248769998550415, "learning_rate": 9.206778869235754e-05, "loss": 1.4914, "step": 6374 }, { "epoch": 0.3800214566694481, "grad_norm": 3.0873124599456787, "learning_rate": 9.206267664155907e-05, "loss": 1.4849, "step": 6376 }, { "epoch": 0.3801406603886041, "grad_norm": 2.8476240634918213, "learning_rate": 9.205756308603471e-05, "loss": 1.4976, "step": 6378 }, { "epoch": 0.38025986410776014, "grad_norm": 3.014453649520874, "learning_rate": 9.205244802596745e-05, "loss": 1.4316, "step": 6380 }, { "epoch": 0.3803790678269162, "grad_norm": 3.539113759994507, "learning_rate": 9.204733146154022e-05, "loss": 1.4368, "step": 6382 }, { "epoch": 0.38049827154607224, "grad_norm": 3.159184455871582, "learning_rate": 9.204221339293608e-05, "loss": 1.5123, "step": 6384 }, { "epoch": 0.38061747526522827, "grad_norm": 3.24882435798645, "learning_rate": 9.203709382033814e-05, "loss": 1.5046, "step": 6386 }, { "epoch": 0.3807366789843843, "grad_norm": 3.130180597305298, "learning_rate": 9.20319727439295e-05, "loss": 1.535, "step": 6388 }, { "epoch": 0.38085588270354037, "grad_norm": 3.3446059226989746, "learning_rate": 9.20268501638934e-05, "loss": 1.6419, "step": 6390 }, { "epoch": 0.3809750864226964, "grad_norm": 3.505857229232788, "learning_rate": 9.202172608041308e-05, "loss": 1.4575, "step": 6392 }, { "epoch": 0.3810942901418524, "grad_norm": 3.1726632118225098, "learning_rate": 9.201660049367183e-05, "loss": 1.5294, "step": 6394 }, { "epoch": 0.38121349386100845, "grad_norm": 2.9357635974884033, "learning_rate": 9.201147340385304e-05, "loss": 1.5545, "step": 6396 }, { "epoch": 0.3813326975801645, "grad_norm": 3.197927236557007, "learning_rate": 9.200634481114009e-05, "loss": 1.5171, "step": 6398 }, { "epoch": 0.38145190129932055, "grad_norm": 2.9427988529205322, "learning_rate": 9.200121471571647e-05, "loss": 1.5187, "step": 6400 }, { "epoch": 0.3815711050184766, "grad_norm": 3.190507650375366, "learning_rate": 9.199608311776569e-05, "loss": 1.559, "step": 6402 }, { "epoch": 0.3816903087376326, "grad_norm": 3.088956117630005, "learning_rate": 9.199095001747134e-05, "loss": 1.502, "step": 6404 }, { "epoch": 0.3818095124567886, "grad_norm": 3.3263049125671387, "learning_rate": 9.198581541501702e-05, "loss": 1.4017, "step": 6406 }, { "epoch": 0.3819287161759447, "grad_norm": 3.1313865184783936, "learning_rate": 9.198067931058643e-05, "loss": 1.5432, "step": 6408 }, { "epoch": 0.38204791989510073, "grad_norm": 3.1091601848602295, "learning_rate": 9.19755417043633e-05, "loss": 1.416, "step": 6410 }, { "epoch": 0.38216712361425675, "grad_norm": 3.0417308807373047, "learning_rate": 9.197040259653146e-05, "loss": 1.4936, "step": 6412 }, { "epoch": 0.3822863273334128, "grad_norm": 2.859609365463257, "learning_rate": 9.19652619872747e-05, "loss": 1.3737, "step": 6414 }, { "epoch": 0.38240553105256886, "grad_norm": 3.271678924560547, "learning_rate": 9.196011987677693e-05, "loss": 1.4728, "step": 6416 }, { "epoch": 0.3825247347717249, "grad_norm": 2.997920036315918, "learning_rate": 9.195497626522211e-05, "loss": 1.4099, "step": 6418 }, { "epoch": 0.3826439384908809, "grad_norm": 3.3794848918914795, "learning_rate": 9.194983115279422e-05, "loss": 1.495, "step": 6420 }, { "epoch": 0.38276314221003693, "grad_norm": 3.0681850910186768, "learning_rate": 9.194468453967737e-05, "loss": 1.487, "step": 6422 }, { "epoch": 0.382882345929193, "grad_norm": 3.0911829471588135, "learning_rate": 9.193953642605564e-05, "loss": 1.4064, "step": 6424 }, { "epoch": 0.38300154964834904, "grad_norm": 2.742919683456421, "learning_rate": 9.193438681211319e-05, "loss": 1.4486, "step": 6426 }, { "epoch": 0.38312075336750506, "grad_norm": 3.1367242336273193, "learning_rate": 9.192923569803426e-05, "loss": 1.5451, "step": 6428 }, { "epoch": 0.3832399570866611, "grad_norm": 2.8709654808044434, "learning_rate": 9.192408308400309e-05, "loss": 1.3618, "step": 6430 }, { "epoch": 0.38335916080581717, "grad_norm": 3.0753543376922607, "learning_rate": 9.191892897020403e-05, "loss": 1.3808, "step": 6432 }, { "epoch": 0.3834783645249732, "grad_norm": 3.0147478580474854, "learning_rate": 9.191377335682147e-05, "loss": 1.5246, "step": 6434 }, { "epoch": 0.3835975682441292, "grad_norm": 2.9684484004974365, "learning_rate": 9.190861624403981e-05, "loss": 1.5505, "step": 6436 }, { "epoch": 0.38371677196328524, "grad_norm": 3.04282283782959, "learning_rate": 9.190345763204358e-05, "loss": 1.4807, "step": 6438 }, { "epoch": 0.38383597568244127, "grad_norm": 3.072206735610962, "learning_rate": 9.189829752101728e-05, "loss": 1.5467, "step": 6440 }, { "epoch": 0.38395517940159735, "grad_norm": 3.216926097869873, "learning_rate": 9.189313591114555e-05, "loss": 1.4753, "step": 6442 }, { "epoch": 0.38407438312075337, "grad_norm": 3.415825128555298, "learning_rate": 9.188797280261298e-05, "loss": 1.4764, "step": 6444 }, { "epoch": 0.3841935868399094, "grad_norm": 2.9404289722442627, "learning_rate": 9.188280819560432e-05, "loss": 1.472, "step": 6446 }, { "epoch": 0.3843127905590654, "grad_norm": 3.3742191791534424, "learning_rate": 9.187764209030432e-05, "loss": 1.4612, "step": 6448 }, { "epoch": 0.3844319942782215, "grad_norm": 3.2562875747680664, "learning_rate": 9.187247448689776e-05, "loss": 1.5403, "step": 6450 }, { "epoch": 0.3845511979973775, "grad_norm": 2.9288241863250732, "learning_rate": 9.186730538556954e-05, "loss": 1.5768, "step": 6452 }, { "epoch": 0.38467040171653355, "grad_norm": 2.9799411296844482, "learning_rate": 9.186213478650454e-05, "loss": 1.5557, "step": 6454 }, { "epoch": 0.3847896054356896, "grad_norm": 3.0609447956085205, "learning_rate": 9.185696268988778e-05, "loss": 1.4894, "step": 6456 }, { "epoch": 0.38490880915484565, "grad_norm": 3.0658323764801025, "learning_rate": 9.185178909590422e-05, "loss": 1.3746, "step": 6458 }, { "epoch": 0.3850280128740017, "grad_norm": 2.900233030319214, "learning_rate": 9.184661400473898e-05, "loss": 1.3106, "step": 6460 }, { "epoch": 0.3851472165931577, "grad_norm": 3.3602139949798584, "learning_rate": 9.184143741657719e-05, "loss": 1.5553, "step": 6462 }, { "epoch": 0.38526642031231373, "grad_norm": 3.411714553833008, "learning_rate": 9.183625933160401e-05, "loss": 1.299, "step": 6464 }, { "epoch": 0.3853856240314698, "grad_norm": 3.271225929260254, "learning_rate": 9.183107975000472e-05, "loss": 1.5155, "step": 6466 }, { "epoch": 0.38550482775062583, "grad_norm": 2.7829461097717285, "learning_rate": 9.182589867196454e-05, "loss": 1.4641, "step": 6468 }, { "epoch": 0.38562403146978186, "grad_norm": 3.073212146759033, "learning_rate": 9.182071609766889e-05, "loss": 1.4955, "step": 6470 }, { "epoch": 0.3857432351889379, "grad_norm": 3.0802998542785645, "learning_rate": 9.181553202730313e-05, "loss": 1.5057, "step": 6472 }, { "epoch": 0.3858624389080939, "grad_norm": 2.8543221950531006, "learning_rate": 9.181034646105272e-05, "loss": 1.3369, "step": 6474 }, { "epoch": 0.38598164262725, "grad_norm": 2.824394702911377, "learning_rate": 9.180515939910316e-05, "loss": 1.3866, "step": 6476 }, { "epoch": 0.386100846346406, "grad_norm": 3.0901541709899902, "learning_rate": 9.179997084164002e-05, "loss": 1.3275, "step": 6478 }, { "epoch": 0.38622005006556204, "grad_norm": 3.5651121139526367, "learning_rate": 9.179478078884891e-05, "loss": 1.4259, "step": 6480 }, { "epoch": 0.38633925378471806, "grad_norm": 3.212860107421875, "learning_rate": 9.178958924091547e-05, "loss": 1.4452, "step": 6482 }, { "epoch": 0.38645845750387414, "grad_norm": 3.127648115158081, "learning_rate": 9.178439619802547e-05, "loss": 1.4863, "step": 6484 }, { "epoch": 0.38657766122303017, "grad_norm": 3.200079917907715, "learning_rate": 9.177920166036464e-05, "loss": 1.4667, "step": 6486 }, { "epoch": 0.3866968649421862, "grad_norm": 3.163461685180664, "learning_rate": 9.177400562811883e-05, "loss": 1.6824, "step": 6488 }, { "epoch": 0.3868160686613422, "grad_norm": 3.1507294178009033, "learning_rate": 9.176880810147389e-05, "loss": 1.3839, "step": 6490 }, { "epoch": 0.3869352723804983, "grad_norm": 2.716488838195801, "learning_rate": 9.176360908061579e-05, "loss": 1.4572, "step": 6492 }, { "epoch": 0.3870544760996543, "grad_norm": 3.285050868988037, "learning_rate": 9.17584085657305e-05, "loss": 1.776, "step": 6494 }, { "epoch": 0.38717367981881035, "grad_norm": 3.0787203311920166, "learning_rate": 9.175320655700406e-05, "loss": 1.5059, "step": 6496 }, { "epoch": 0.38729288353796637, "grad_norm": 3.408456325531006, "learning_rate": 9.174800305462254e-05, "loss": 1.5154, "step": 6498 }, { "epoch": 0.3874120872571224, "grad_norm": 3.039179801940918, "learning_rate": 9.174279805877215e-05, "loss": 1.5462, "step": 6500 }, { "epoch": 0.3875312909762785, "grad_norm": 2.9405415058135986, "learning_rate": 9.173759156963902e-05, "loss": 1.3901, "step": 6502 }, { "epoch": 0.3876504946954345, "grad_norm": 3.0320558547973633, "learning_rate": 9.173238358740946e-05, "loss": 1.4403, "step": 6504 }, { "epoch": 0.3877696984145905, "grad_norm": 3.2046868801116943, "learning_rate": 9.172717411226974e-05, "loss": 1.3906, "step": 6506 }, { "epoch": 0.38788890213374655, "grad_norm": 3.176084041595459, "learning_rate": 9.172196314440624e-05, "loss": 1.4441, "step": 6508 }, { "epoch": 0.38800810585290263, "grad_norm": 3.225867748260498, "learning_rate": 9.171675068400536e-05, "loss": 1.5563, "step": 6510 }, { "epoch": 0.38812730957205865, "grad_norm": 2.850637197494507, "learning_rate": 9.171153673125359e-05, "loss": 1.5166, "step": 6512 }, { "epoch": 0.3882465132912147, "grad_norm": 2.9932150840759277, "learning_rate": 9.170632128633743e-05, "loss": 1.4926, "step": 6514 }, { "epoch": 0.3883657170103707, "grad_norm": 3.1401655673980713, "learning_rate": 9.170110434944346e-05, "loss": 1.3427, "step": 6516 }, { "epoch": 0.3884849207295268, "grad_norm": 3.2247672080993652, "learning_rate": 9.169588592075831e-05, "loss": 1.4992, "step": 6518 }, { "epoch": 0.3886041244486828, "grad_norm": 3.0506176948547363, "learning_rate": 9.169066600046865e-05, "loss": 1.4337, "step": 6520 }, { "epoch": 0.38872332816783883, "grad_norm": 2.849949598312378, "learning_rate": 9.168544458876123e-05, "loss": 1.3821, "step": 6522 }, { "epoch": 0.38884253188699486, "grad_norm": 3.4376959800720215, "learning_rate": 9.168022168582285e-05, "loss": 1.716, "step": 6524 }, { "epoch": 0.38896173560615094, "grad_norm": 3.124131441116333, "learning_rate": 9.16749972918403e-05, "loss": 1.4129, "step": 6526 }, { "epoch": 0.38908093932530696, "grad_norm": 3.0915112495422363, "learning_rate": 9.166977140700053e-05, "loss": 1.4427, "step": 6528 }, { "epoch": 0.389200143044463, "grad_norm": 3.1312382221221924, "learning_rate": 9.166454403149045e-05, "loss": 1.5089, "step": 6530 }, { "epoch": 0.389319346763619, "grad_norm": 3.221221685409546, "learning_rate": 9.165931516549708e-05, "loss": 1.4024, "step": 6532 }, { "epoch": 0.38943855048277504, "grad_norm": 3.0893354415893555, "learning_rate": 9.165408480920748e-05, "loss": 1.5323, "step": 6534 }, { "epoch": 0.3895577542019311, "grad_norm": 3.223233938217163, "learning_rate": 9.164885296280876e-05, "loss": 1.3462, "step": 6536 }, { "epoch": 0.38967695792108714, "grad_norm": 2.835059642791748, "learning_rate": 9.164361962648805e-05, "loss": 1.3628, "step": 6538 }, { "epoch": 0.38979616164024317, "grad_norm": 3.0359959602355957, "learning_rate": 9.163838480043259e-05, "loss": 1.4186, "step": 6540 }, { "epoch": 0.3899153653593992, "grad_norm": 3.104283571243286, "learning_rate": 9.163314848482962e-05, "loss": 1.4701, "step": 6542 }, { "epoch": 0.39003456907855527, "grad_norm": 3.3949778079986572, "learning_rate": 9.162791067986651e-05, "loss": 1.536, "step": 6544 }, { "epoch": 0.3901537727977113, "grad_norm": 2.9642159938812256, "learning_rate": 9.162267138573059e-05, "loss": 1.4517, "step": 6546 }, { "epoch": 0.3902729765168673, "grad_norm": 2.987941265106201, "learning_rate": 9.161743060260933e-05, "loss": 1.4964, "step": 6548 }, { "epoch": 0.39039218023602335, "grad_norm": 5.045437812805176, "learning_rate": 9.161218833069016e-05, "loss": 1.7325, "step": 6550 }, { "epoch": 0.3905113839551794, "grad_norm": 2.7246620655059814, "learning_rate": 9.160694457016064e-05, "loss": 1.3698, "step": 6552 }, { "epoch": 0.39063058767433545, "grad_norm": 2.8276801109313965, "learning_rate": 9.160169932120837e-05, "loss": 1.3312, "step": 6554 }, { "epoch": 0.3907497913934915, "grad_norm": 3.0454015731811523, "learning_rate": 9.159645258402097e-05, "loss": 1.4784, "step": 6556 }, { "epoch": 0.3908689951126475, "grad_norm": 2.9287056922912598, "learning_rate": 9.159120435878614e-05, "loss": 1.3674, "step": 6558 }, { "epoch": 0.3909881988318036, "grad_norm": 3.0939104557037354, "learning_rate": 9.158595464569162e-05, "loss": 1.4824, "step": 6560 }, { "epoch": 0.3911074025509596, "grad_norm": 3.0971951484680176, "learning_rate": 9.158070344492524e-05, "loss": 1.322, "step": 6562 }, { "epoch": 0.39122660627011563, "grad_norm": 3.151232957839966, "learning_rate": 9.15754507566748e-05, "loss": 1.5344, "step": 6564 }, { "epoch": 0.39134580998927165, "grad_norm": 3.202833652496338, "learning_rate": 9.157019658112825e-05, "loss": 1.4867, "step": 6566 }, { "epoch": 0.3914650137084277, "grad_norm": 3.2629613876342773, "learning_rate": 9.156494091847354e-05, "loss": 1.4764, "step": 6568 }, { "epoch": 0.39158421742758376, "grad_norm": 3.043257474899292, "learning_rate": 9.155968376889869e-05, "loss": 1.5885, "step": 6570 }, { "epoch": 0.3917034211467398, "grad_norm": 3.3022611141204834, "learning_rate": 9.155442513259174e-05, "loss": 1.4909, "step": 6572 }, { "epoch": 0.3918226248658958, "grad_norm": 2.711249828338623, "learning_rate": 9.154916500974083e-05, "loss": 1.4418, "step": 6574 }, { "epoch": 0.39194182858505183, "grad_norm": 3.0936617851257324, "learning_rate": 9.154390340053414e-05, "loss": 1.3815, "step": 6576 }, { "epoch": 0.3920610323042079, "grad_norm": 2.769357919692993, "learning_rate": 9.153864030515988e-05, "loss": 1.4576, "step": 6578 }, { "epoch": 0.39218023602336394, "grad_norm": 3.2175638675689697, "learning_rate": 9.153337572380631e-05, "loss": 1.5898, "step": 6580 }, { "epoch": 0.39229943974251996, "grad_norm": 3.2500593662261963, "learning_rate": 9.152810965666181e-05, "loss": 1.4674, "step": 6582 }, { "epoch": 0.392418643461676, "grad_norm": 3.324204444885254, "learning_rate": 9.152284210391474e-05, "loss": 1.363, "step": 6584 }, { "epoch": 0.39253784718083207, "grad_norm": 2.9378063678741455, "learning_rate": 9.151757306575354e-05, "loss": 1.4409, "step": 6586 }, { "epoch": 0.3926570508999881, "grad_norm": 3.139061450958252, "learning_rate": 9.151230254236669e-05, "loss": 1.5074, "step": 6588 }, { "epoch": 0.3927762546191441, "grad_norm": 3.2549712657928467, "learning_rate": 9.150703053394275e-05, "loss": 1.4832, "step": 6590 }, { "epoch": 0.39289545833830014, "grad_norm": 3.094877243041992, "learning_rate": 9.150175704067032e-05, "loss": 1.4884, "step": 6592 }, { "epoch": 0.39301466205745617, "grad_norm": 3.252997636795044, "learning_rate": 9.149648206273803e-05, "loss": 1.4219, "step": 6594 }, { "epoch": 0.39313386577661225, "grad_norm": 3.1870908737182617, "learning_rate": 9.14912056003346e-05, "loss": 1.6012, "step": 6596 }, { "epoch": 0.39325306949576827, "grad_norm": 2.9790878295898438, "learning_rate": 9.148592765364881e-05, "loss": 1.3747, "step": 6598 }, { "epoch": 0.3933722732149243, "grad_norm": 3.3059935569763184, "learning_rate": 9.148064822286941e-05, "loss": 1.64, "step": 6600 }, { "epoch": 0.3934914769340803, "grad_norm": 3.3263227939605713, "learning_rate": 9.14753673081853e-05, "loss": 1.4756, "step": 6602 }, { "epoch": 0.3936106806532364, "grad_norm": 3.2112648487091064, "learning_rate": 9.147008490978542e-05, "loss": 1.5226, "step": 6604 }, { "epoch": 0.3937298843723924, "grad_norm": 3.098065137863159, "learning_rate": 9.14648010278587e-05, "loss": 1.609, "step": 6606 }, { "epoch": 0.39384908809154845, "grad_norm": 3.5691370964050293, "learning_rate": 9.145951566259419e-05, "loss": 1.4709, "step": 6608 }, { "epoch": 0.3939682918107045, "grad_norm": 2.999488353729248, "learning_rate": 9.145422881418094e-05, "loss": 1.4657, "step": 6610 }, { "epoch": 0.39408749552986055, "grad_norm": 2.9414403438568115, "learning_rate": 9.14489404828081e-05, "loss": 1.3799, "step": 6612 }, { "epoch": 0.3942066992490166, "grad_norm": 3.148829221725464, "learning_rate": 9.144365066866483e-05, "loss": 1.563, "step": 6614 }, { "epoch": 0.3943259029681726, "grad_norm": 3.0570883750915527, "learning_rate": 9.143835937194038e-05, "loss": 1.3948, "step": 6616 }, { "epoch": 0.39444510668732863, "grad_norm": 2.9361352920532227, "learning_rate": 9.143306659282405e-05, "loss": 1.3966, "step": 6618 }, { "epoch": 0.3945643104064847, "grad_norm": 2.9241912364959717, "learning_rate": 9.142777233150515e-05, "loss": 1.2102, "step": 6620 }, { "epoch": 0.39468351412564073, "grad_norm": 2.990499496459961, "learning_rate": 9.142247658817311e-05, "loss": 1.5508, "step": 6622 }, { "epoch": 0.39480271784479676, "grad_norm": 3.1699795722961426, "learning_rate": 9.141717936301734e-05, "loss": 1.4977, "step": 6624 }, { "epoch": 0.3949219215639528, "grad_norm": 3.0068395137786865, "learning_rate": 9.141188065622735e-05, "loss": 1.4408, "step": 6626 }, { "epoch": 0.3950411252831088, "grad_norm": 2.923381805419922, "learning_rate": 9.140658046799272e-05, "loss": 1.5001, "step": 6628 }, { "epoch": 0.3951603290022649, "grad_norm": 3.12786602973938, "learning_rate": 9.140127879850302e-05, "loss": 1.5128, "step": 6630 }, { "epoch": 0.3952795327214209, "grad_norm": 2.7978105545043945, "learning_rate": 9.139597564794793e-05, "loss": 1.4138, "step": 6632 }, { "epoch": 0.39539873644057694, "grad_norm": 3.1400110721588135, "learning_rate": 9.139067101651716e-05, "loss": 1.5233, "step": 6634 }, { "epoch": 0.39551794015973296, "grad_norm": 2.8988404273986816, "learning_rate": 9.138536490440046e-05, "loss": 1.351, "step": 6636 }, { "epoch": 0.39563714387888904, "grad_norm": 3.0605967044830322, "learning_rate": 9.138005731178768e-05, "loss": 1.4285, "step": 6638 }, { "epoch": 0.39575634759804507, "grad_norm": 3.24806809425354, "learning_rate": 9.137474823886865e-05, "loss": 1.5649, "step": 6640 }, { "epoch": 0.3958755513172011, "grad_norm": 2.8490543365478516, "learning_rate": 9.136943768583332e-05, "loss": 1.5697, "step": 6642 }, { "epoch": 0.3959947550363571, "grad_norm": 3.187243700027466, "learning_rate": 9.136412565287165e-05, "loss": 1.6271, "step": 6644 }, { "epoch": 0.3961139587555132, "grad_norm": 2.8736519813537598, "learning_rate": 9.13588121401737e-05, "loss": 1.4635, "step": 6646 }, { "epoch": 0.3962331624746692, "grad_norm": 2.8424606323242188, "learning_rate": 9.135349714792952e-05, "loss": 1.4194, "step": 6648 }, { "epoch": 0.39635236619382525, "grad_norm": 3.391202926635742, "learning_rate": 9.134818067632927e-05, "loss": 1.5667, "step": 6650 }, { "epoch": 0.39647156991298127, "grad_norm": 3.2076523303985596, "learning_rate": 9.134286272556309e-05, "loss": 1.6849, "step": 6652 }, { "epoch": 0.39659077363213735, "grad_norm": 3.175849437713623, "learning_rate": 9.133754329582129e-05, "loss": 1.3583, "step": 6654 }, { "epoch": 0.3967099773512934, "grad_norm": 3.0068836212158203, "learning_rate": 9.133222238729413e-05, "loss": 1.3068, "step": 6656 }, { "epoch": 0.3968291810704494, "grad_norm": 3.39634370803833, "learning_rate": 9.132690000017195e-05, "loss": 1.5125, "step": 6658 }, { "epoch": 0.3969483847896054, "grad_norm": 2.7725284099578857, "learning_rate": 9.132157613464516e-05, "loss": 1.4761, "step": 6660 }, { "epoch": 0.39706758850876145, "grad_norm": 3.164355754852295, "learning_rate": 9.13162507909042e-05, "loss": 1.6638, "step": 6662 }, { "epoch": 0.39718679222791753, "grad_norm": 2.971327543258667, "learning_rate": 9.131092396913961e-05, "loss": 1.6315, "step": 6664 }, { "epoch": 0.39730599594707355, "grad_norm": 3.171140193939209, "learning_rate": 9.130559566954191e-05, "loss": 1.6982, "step": 6666 }, { "epoch": 0.3974251996662296, "grad_norm": 2.9976842403411865, "learning_rate": 9.130026589230173e-05, "loss": 1.416, "step": 6668 }, { "epoch": 0.3975444033853856, "grad_norm": 3.108184814453125, "learning_rate": 9.129493463760973e-05, "loss": 1.4847, "step": 6670 }, { "epoch": 0.3976636071045417, "grad_norm": 3.039062976837158, "learning_rate": 9.128960190565664e-05, "loss": 1.3465, "step": 6672 }, { "epoch": 0.3977828108236977, "grad_norm": 3.060480833053589, "learning_rate": 9.128426769663321e-05, "loss": 1.5475, "step": 6674 }, { "epoch": 0.39790201454285373, "grad_norm": 3.753566265106201, "learning_rate": 9.127893201073027e-05, "loss": 1.4876, "step": 6676 }, { "epoch": 0.39802121826200976, "grad_norm": 3.2065908908843994, "learning_rate": 9.12735948481387e-05, "loss": 1.4841, "step": 6678 }, { "epoch": 0.39814042198116584, "grad_norm": 3.249243974685669, "learning_rate": 9.126825620904943e-05, "loss": 1.68, "step": 6680 }, { "epoch": 0.39825962570032186, "grad_norm": 3.2916929721832275, "learning_rate": 9.126291609365343e-05, "loss": 1.4544, "step": 6682 }, { "epoch": 0.3983788294194779, "grad_norm": 3.0752007961273193, "learning_rate": 9.125757450214175e-05, "loss": 1.683, "step": 6684 }, { "epoch": 0.3984980331386339, "grad_norm": 2.9651618003845215, "learning_rate": 9.125223143470547e-05, "loss": 1.4802, "step": 6686 }, { "epoch": 0.39861723685778994, "grad_norm": 2.6893062591552734, "learning_rate": 9.124688689153572e-05, "loss": 1.3773, "step": 6688 }, { "epoch": 0.398736440576946, "grad_norm": 3.218385934829712, "learning_rate": 9.12415408728237e-05, "loss": 1.4526, "step": 6690 }, { "epoch": 0.39885564429610204, "grad_norm": 3.1860597133636475, "learning_rate": 9.123619337876066e-05, "loss": 1.3805, "step": 6692 }, { "epoch": 0.39897484801525807, "grad_norm": 3.4799113273620605, "learning_rate": 9.12308444095379e-05, "loss": 1.4148, "step": 6694 }, { "epoch": 0.3990940517344141, "grad_norm": 3.5261013507843018, "learning_rate": 9.122549396534677e-05, "loss": 1.5155, "step": 6696 }, { "epoch": 0.39921325545357017, "grad_norm": 3.2200708389282227, "learning_rate": 9.122014204637866e-05, "loss": 1.4523, "step": 6698 }, { "epoch": 0.3993324591727262, "grad_norm": 3.2982304096221924, "learning_rate": 9.121478865282502e-05, "loss": 1.5964, "step": 6700 }, { "epoch": 0.3994516628918822, "grad_norm": 3.116912841796875, "learning_rate": 9.12094337848774e-05, "loss": 1.6971, "step": 6702 }, { "epoch": 0.39957086661103824, "grad_norm": 2.904448986053467, "learning_rate": 9.120407744272734e-05, "loss": 1.6132, "step": 6704 }, { "epoch": 0.3996900703301943, "grad_norm": 2.903820037841797, "learning_rate": 9.119871962656643e-05, "loss": 1.4551, "step": 6706 }, { "epoch": 0.39980927404935035, "grad_norm": 2.8989455699920654, "learning_rate": 9.119336033658638e-05, "loss": 1.302, "step": 6708 }, { "epoch": 0.3999284777685064, "grad_norm": 3.1904373168945312, "learning_rate": 9.118799957297888e-05, "loss": 1.5351, "step": 6710 }, { "epoch": 0.4000476814876624, "grad_norm": 3.0440287590026855, "learning_rate": 9.11826373359357e-05, "loss": 1.4779, "step": 6712 }, { "epoch": 0.4001668852068185, "grad_norm": 2.96848201751709, "learning_rate": 9.11772736256487e-05, "loss": 1.4661, "step": 6714 }, { "epoch": 0.4002860889259745, "grad_norm": 3.3694496154785156, "learning_rate": 9.117190844230971e-05, "loss": 1.6758, "step": 6716 }, { "epoch": 0.40040529264513053, "grad_norm": 2.7853431701660156, "learning_rate": 9.11665417861107e-05, "loss": 1.4137, "step": 6718 }, { "epoch": 0.40052449636428655, "grad_norm": 2.8250014781951904, "learning_rate": 9.116117365724364e-05, "loss": 1.4202, "step": 6720 }, { "epoch": 0.4006437000834426, "grad_norm": 3.207322597503662, "learning_rate": 9.115580405590059e-05, "loss": 1.5473, "step": 6722 }, { "epoch": 0.40076290380259866, "grad_norm": 2.8126680850982666, "learning_rate": 9.115043298227358e-05, "loss": 1.5441, "step": 6724 }, { "epoch": 0.4008821075217547, "grad_norm": 3.0540945529937744, "learning_rate": 9.11450604365548e-05, "loss": 1.6283, "step": 6726 }, { "epoch": 0.4010013112409107, "grad_norm": 3.2534236907958984, "learning_rate": 9.113968641893644e-05, "loss": 1.3843, "step": 6728 }, { "epoch": 0.40112051496006673, "grad_norm": 3.121769905090332, "learning_rate": 9.113431092961074e-05, "loss": 1.3413, "step": 6730 }, { "epoch": 0.4012397186792228, "grad_norm": 3.0577359199523926, "learning_rate": 9.112893396876998e-05, "loss": 1.527, "step": 6732 }, { "epoch": 0.40135892239837884, "grad_norm": 2.96030330657959, "learning_rate": 9.112355553660655e-05, "loss": 1.4175, "step": 6734 }, { "epoch": 0.40147812611753486, "grad_norm": 3.164825677871704, "learning_rate": 9.111817563331282e-05, "loss": 1.5856, "step": 6736 }, { "epoch": 0.4015973298366909, "grad_norm": 2.8998117446899414, "learning_rate": 9.111279425908128e-05, "loss": 1.4169, "step": 6738 }, { "epoch": 0.40171653355584697, "grad_norm": 2.88403058052063, "learning_rate": 9.110741141410441e-05, "loss": 1.4671, "step": 6740 }, { "epoch": 0.401835737275003, "grad_norm": 3.282465696334839, "learning_rate": 9.110202709857478e-05, "loss": 1.4132, "step": 6742 }, { "epoch": 0.401954940994159, "grad_norm": 3.0310678482055664, "learning_rate": 9.109664131268503e-05, "loss": 1.5782, "step": 6744 }, { "epoch": 0.40207414471331504, "grad_norm": 2.959393262863159, "learning_rate": 9.10912540566278e-05, "loss": 1.4856, "step": 6746 }, { "epoch": 0.4021933484324711, "grad_norm": 3.245466947555542, "learning_rate": 9.108586533059582e-05, "loss": 1.5337, "step": 6748 }, { "epoch": 0.40231255215162715, "grad_norm": 3.024240732192993, "learning_rate": 9.108047513478188e-05, "loss": 1.3889, "step": 6750 }, { "epoch": 0.40243175587078317, "grad_norm": 3.0557162761688232, "learning_rate": 9.107508346937877e-05, "loss": 1.381, "step": 6752 }, { "epoch": 0.4025509595899392, "grad_norm": 2.7399253845214844, "learning_rate": 9.10696903345794e-05, "loss": 1.5146, "step": 6754 }, { "epoch": 0.4026701633090952, "grad_norm": 3.148747444152832, "learning_rate": 9.106429573057666e-05, "loss": 1.4879, "step": 6756 }, { "epoch": 0.4027893670282513, "grad_norm": 2.912980794906616, "learning_rate": 9.105889965756358e-05, "loss": 1.4604, "step": 6758 }, { "epoch": 0.4029085707474073, "grad_norm": 2.9966225624084473, "learning_rate": 9.105350211573318e-05, "loss": 1.4033, "step": 6760 }, { "epoch": 0.40302777446656335, "grad_norm": 3.3531246185302734, "learning_rate": 9.104810310527856e-05, "loss": 1.4459, "step": 6762 }, { "epoch": 0.4031469781857194, "grad_norm": 3.0033679008483887, "learning_rate": 9.104270262639283e-05, "loss": 1.4402, "step": 6764 }, { "epoch": 0.40326618190487545, "grad_norm": 3.1618804931640625, "learning_rate": 9.10373006792692e-05, "loss": 1.459, "step": 6766 }, { "epoch": 0.4033853856240315, "grad_norm": 3.0821311473846436, "learning_rate": 9.103189726410093e-05, "loss": 1.4565, "step": 6768 }, { "epoch": 0.4035045893431875, "grad_norm": 3.078052282333374, "learning_rate": 9.10264923810813e-05, "loss": 1.4362, "step": 6770 }, { "epoch": 0.4036237930623435, "grad_norm": 3.2153239250183105, "learning_rate": 9.102108603040366e-05, "loss": 1.5781, "step": 6772 }, { "epoch": 0.4037429967814996, "grad_norm": 3.2244386672973633, "learning_rate": 9.101567821226143e-05, "loss": 1.592, "step": 6774 }, { "epoch": 0.40386220050065563, "grad_norm": 3.01310658454895, "learning_rate": 9.101026892684805e-05, "loss": 1.3588, "step": 6776 }, { "epoch": 0.40398140421981166, "grad_norm": 2.998304843902588, "learning_rate": 9.100485817435703e-05, "loss": 1.6058, "step": 6778 }, { "epoch": 0.4041006079389677, "grad_norm": 3.1902949810028076, "learning_rate": 9.099944595498194e-05, "loss": 1.544, "step": 6780 }, { "epoch": 0.4042198116581237, "grad_norm": 3.217256546020508, "learning_rate": 9.09940322689164e-05, "loss": 1.3667, "step": 6782 }, { "epoch": 0.4043390153772798, "grad_norm": 3.311164140701294, "learning_rate": 9.098861711635405e-05, "loss": 1.5606, "step": 6784 }, { "epoch": 0.4044582190964358, "grad_norm": 3.0726726055145264, "learning_rate": 9.098320049748864e-05, "loss": 1.4073, "step": 6786 }, { "epoch": 0.40457742281559184, "grad_norm": 3.153935432434082, "learning_rate": 9.097778241251391e-05, "loss": 1.398, "step": 6788 }, { "epoch": 0.40469662653474786, "grad_norm": 3.4978859424591064, "learning_rate": 9.097236286162372e-05, "loss": 1.5709, "step": 6790 }, { "epoch": 0.40481583025390394, "grad_norm": 3.196486711502075, "learning_rate": 9.096694184501192e-05, "loss": 1.4169, "step": 6792 }, { "epoch": 0.40493503397305997, "grad_norm": 3.0427486896514893, "learning_rate": 9.096151936287242e-05, "loss": 1.4216, "step": 6794 }, { "epoch": 0.405054237692216, "grad_norm": 3.059060573577881, "learning_rate": 9.095609541539924e-05, "loss": 1.6348, "step": 6796 }, { "epoch": 0.405173441411372, "grad_norm": 3.187624216079712, "learning_rate": 9.095067000278641e-05, "loss": 1.5149, "step": 6798 }, { "epoch": 0.4052926451305281, "grad_norm": 3.3670406341552734, "learning_rate": 9.094524312522799e-05, "loss": 1.434, "step": 6800 }, { "epoch": 0.4054118488496841, "grad_norm": 3.2379493713378906, "learning_rate": 9.093981478291813e-05, "loss": 1.5029, "step": 6802 }, { "epoch": 0.40553105256884014, "grad_norm": 3.35917592048645, "learning_rate": 9.093438497605102e-05, "loss": 1.428, "step": 6804 }, { "epoch": 0.40565025628799617, "grad_norm": 3.3441946506500244, "learning_rate": 9.092895370482091e-05, "loss": 1.6333, "step": 6806 }, { "epoch": 0.40576946000715225, "grad_norm": 2.9599499702453613, "learning_rate": 9.09235209694221e-05, "loss": 1.4384, "step": 6808 }, { "epoch": 0.4058886637263083, "grad_norm": 2.834998369216919, "learning_rate": 9.091808677004891e-05, "loss": 1.4288, "step": 6810 }, { "epoch": 0.4060078674454643, "grad_norm": 2.5975306034088135, "learning_rate": 9.091265110689575e-05, "loss": 1.316, "step": 6812 }, { "epoch": 0.4061270711646203, "grad_norm": 3.0678598880767822, "learning_rate": 9.09072139801571e-05, "loss": 1.345, "step": 6814 }, { "epoch": 0.40624627488377635, "grad_norm": 2.9362308979034424, "learning_rate": 9.090177539002742e-05, "loss": 1.4057, "step": 6816 }, { "epoch": 0.40636547860293243, "grad_norm": 2.9320244789123535, "learning_rate": 9.08963353367013e-05, "loss": 1.5127, "step": 6818 }, { "epoch": 0.40648468232208845, "grad_norm": 3.5756852626800537, "learning_rate": 9.089089382037335e-05, "loss": 1.561, "step": 6820 }, { "epoch": 0.4066038860412445, "grad_norm": 3.113816022872925, "learning_rate": 9.088545084123821e-05, "loss": 1.5453, "step": 6822 }, { "epoch": 0.4067230897604005, "grad_norm": 2.6783502101898193, "learning_rate": 9.088000639949061e-05, "loss": 1.3754, "step": 6824 }, { "epoch": 0.4068422934795566, "grad_norm": 3.206747531890869, "learning_rate": 9.08745604953253e-05, "loss": 1.4283, "step": 6826 }, { "epoch": 0.4069614971987126, "grad_norm": 3.048313856124878, "learning_rate": 9.086911312893714e-05, "loss": 1.6667, "step": 6828 }, { "epoch": 0.40708070091786863, "grad_norm": 3.067180871963501, "learning_rate": 9.086366430052094e-05, "loss": 1.4799, "step": 6830 }, { "epoch": 0.40719990463702466, "grad_norm": 3.499464988708496, "learning_rate": 9.085821401027165e-05, "loss": 1.4523, "step": 6832 }, { "epoch": 0.40731910835618074, "grad_norm": 3.341095447540283, "learning_rate": 9.085276225838428e-05, "loss": 1.5801, "step": 6834 }, { "epoch": 0.40743831207533676, "grad_norm": 3.039884328842163, "learning_rate": 9.084730904505381e-05, "loss": 1.5796, "step": 6836 }, { "epoch": 0.4075575157944928, "grad_norm": 2.9649877548217773, "learning_rate": 9.084185437047535e-05, "loss": 1.5206, "step": 6838 }, { "epoch": 0.4076767195136488, "grad_norm": 3.280958414077759, "learning_rate": 9.0836398234844e-05, "loss": 1.5334, "step": 6840 }, { "epoch": 0.4077959232328049, "grad_norm": 3.2738192081451416, "learning_rate": 9.083094063835498e-05, "loss": 1.5316, "step": 6842 }, { "epoch": 0.4079151269519609, "grad_norm": 3.146629810333252, "learning_rate": 9.08254815812035e-05, "loss": 1.4372, "step": 6844 }, { "epoch": 0.40803433067111694, "grad_norm": 3.4218661785125732, "learning_rate": 9.082002106358489e-05, "loss": 1.586, "step": 6846 }, { "epoch": 0.40815353439027297, "grad_norm": 2.898968458175659, "learning_rate": 9.081455908569442e-05, "loss": 1.3542, "step": 6848 }, { "epoch": 0.408272738109429, "grad_norm": 3.055513381958008, "learning_rate": 9.080909564772756e-05, "loss": 1.413, "step": 6850 }, { "epoch": 0.40839194182858507, "grad_norm": 3.0217361450195312, "learning_rate": 9.080363074987971e-05, "loss": 1.3964, "step": 6852 }, { "epoch": 0.4085111455477411, "grad_norm": 3.0327203273773193, "learning_rate": 9.079816439234638e-05, "loss": 1.4566, "step": 6854 }, { "epoch": 0.4086303492668971, "grad_norm": 3.1297378540039062, "learning_rate": 9.079269657532312e-05, "loss": 1.403, "step": 6856 }, { "epoch": 0.40874955298605314, "grad_norm": 3.0139129161834717, "learning_rate": 9.078722729900553e-05, "loss": 1.2354, "step": 6858 }, { "epoch": 0.4088687567052092, "grad_norm": 3.440227746963501, "learning_rate": 9.078175656358927e-05, "loss": 1.4592, "step": 6860 }, { "epoch": 0.40898796042436525, "grad_norm": 3.167045831680298, "learning_rate": 9.077628436927006e-05, "loss": 1.4684, "step": 6862 }, { "epoch": 0.4091071641435213, "grad_norm": 3.155129909515381, "learning_rate": 9.077081071624361e-05, "loss": 1.5046, "step": 6864 }, { "epoch": 0.4092263678626773, "grad_norm": 3.389488458633423, "learning_rate": 9.07653356047058e-05, "loss": 1.396, "step": 6866 }, { "epoch": 0.4093455715818334, "grad_norm": 2.982114315032959, "learning_rate": 9.075985903485244e-05, "loss": 1.4785, "step": 6868 }, { "epoch": 0.4094647753009894, "grad_norm": 3.12968373298645, "learning_rate": 9.075438100687945e-05, "loss": 1.5359, "step": 6870 }, { "epoch": 0.40958397902014543, "grad_norm": 3.454163074493408, "learning_rate": 9.074890152098283e-05, "loss": 1.5115, "step": 6872 }, { "epoch": 0.40970318273930145, "grad_norm": 2.874182939529419, "learning_rate": 9.074342057735858e-05, "loss": 1.3059, "step": 6874 }, { "epoch": 0.4098223864584575, "grad_norm": 2.962953805923462, "learning_rate": 9.073793817620278e-05, "loss": 1.4268, "step": 6876 }, { "epoch": 0.40994159017761356, "grad_norm": 3.4989781379699707, "learning_rate": 9.073245431771154e-05, "loss": 1.479, "step": 6878 }, { "epoch": 0.4100607938967696, "grad_norm": 3.079761028289795, "learning_rate": 9.072696900208105e-05, "loss": 1.4899, "step": 6880 }, { "epoch": 0.4101799976159256, "grad_norm": 3.278963804244995, "learning_rate": 9.072148222950752e-05, "loss": 1.4349, "step": 6882 }, { "epoch": 0.41029920133508163, "grad_norm": 3.3621394634246826, "learning_rate": 9.071599400018726e-05, "loss": 1.8967, "step": 6884 }, { "epoch": 0.4104184050542377, "grad_norm": 2.86906099319458, "learning_rate": 9.071050431431658e-05, "loss": 1.3475, "step": 6886 }, { "epoch": 0.41053760877339374, "grad_norm": 3.005958318710327, "learning_rate": 9.070501317209186e-05, "loss": 1.5633, "step": 6888 }, { "epoch": 0.41065681249254976, "grad_norm": 3.4778997898101807, "learning_rate": 9.069952057370957e-05, "loss": 1.5125, "step": 6890 }, { "epoch": 0.4107760162117058, "grad_norm": 3.3980534076690674, "learning_rate": 9.069402651936615e-05, "loss": 1.397, "step": 6892 }, { "epoch": 0.41089521993086187, "grad_norm": 3.1048758029937744, "learning_rate": 9.068853100925818e-05, "loss": 1.5078, "step": 6894 }, { "epoch": 0.4110144236500179, "grad_norm": 2.763911247253418, "learning_rate": 9.068303404358224e-05, "loss": 1.4374, "step": 6896 }, { "epoch": 0.4111336273691739, "grad_norm": 3.050563335418701, "learning_rate": 9.0677535622535e-05, "loss": 1.4223, "step": 6898 }, { "epoch": 0.41125283108832994, "grad_norm": 2.9593818187713623, "learning_rate": 9.067203574631311e-05, "loss": 1.3856, "step": 6900 }, { "epoch": 0.411372034807486, "grad_norm": 2.768618583679199, "learning_rate": 9.066653441511335e-05, "loss": 1.2359, "step": 6902 }, { "epoch": 0.41149123852664204, "grad_norm": 3.042203187942505, "learning_rate": 9.06610316291325e-05, "loss": 1.585, "step": 6904 }, { "epoch": 0.41161044224579807, "grad_norm": 3.252241373062134, "learning_rate": 9.065552738856745e-05, "loss": 1.4996, "step": 6906 }, { "epoch": 0.4117296459649541, "grad_norm": 3.272703170776367, "learning_rate": 9.065002169361508e-05, "loss": 1.4532, "step": 6908 }, { "epoch": 0.4118488496841101, "grad_norm": 3.3305413722991943, "learning_rate": 9.064451454447235e-05, "loss": 1.29, "step": 6910 }, { "epoch": 0.4119680534032662, "grad_norm": 3.333184242248535, "learning_rate": 9.063900594133626e-05, "loss": 1.6413, "step": 6912 }, { "epoch": 0.4120872571224222, "grad_norm": 2.8648409843444824, "learning_rate": 9.063349588440389e-05, "loss": 1.4601, "step": 6914 }, { "epoch": 0.41220646084157825, "grad_norm": 2.7740695476531982, "learning_rate": 9.062798437387236e-05, "loss": 1.4253, "step": 6916 }, { "epoch": 0.4123256645607343, "grad_norm": 2.9332540035247803, "learning_rate": 9.062247140993881e-05, "loss": 1.2914, "step": 6918 }, { "epoch": 0.41244486827989035, "grad_norm": 2.9439003467559814, "learning_rate": 9.061695699280046e-05, "loss": 1.3823, "step": 6920 }, { "epoch": 0.4125640719990464, "grad_norm": 3.54530930519104, "learning_rate": 9.06114411226546e-05, "loss": 1.5097, "step": 6922 }, { "epoch": 0.4126832757182024, "grad_norm": 3.053757667541504, "learning_rate": 9.060592379969854e-05, "loss": 1.5305, "step": 6924 }, { "epoch": 0.4128024794373584, "grad_norm": 3.346435546875, "learning_rate": 9.060040502412965e-05, "loss": 1.4292, "step": 6926 }, { "epoch": 0.4129216831565145, "grad_norm": 3.1192071437835693, "learning_rate": 9.059488479614535e-05, "loss": 1.4454, "step": 6928 }, { "epoch": 0.41304088687567053, "grad_norm": 3.3194820880889893, "learning_rate": 9.058936311594315e-05, "loss": 1.4339, "step": 6930 }, { "epoch": 0.41316009059482656, "grad_norm": 2.9898416996002197, "learning_rate": 9.058383998372054e-05, "loss": 1.4096, "step": 6932 }, { "epoch": 0.4132792943139826, "grad_norm": 3.0326809883117676, "learning_rate": 9.057831539967511e-05, "loss": 1.4706, "step": 6934 }, { "epoch": 0.41339849803313866, "grad_norm": 3.061814308166504, "learning_rate": 9.057278936400453e-05, "loss": 1.3879, "step": 6936 }, { "epoch": 0.4135177017522947, "grad_norm": 3.3190042972564697, "learning_rate": 9.056726187690643e-05, "loss": 1.6634, "step": 6938 }, { "epoch": 0.4136369054714507, "grad_norm": 2.918520927429199, "learning_rate": 9.056173293857859e-05, "loss": 1.4731, "step": 6940 }, { "epoch": 0.41375610919060674, "grad_norm": 2.9576776027679443, "learning_rate": 9.055620254921878e-05, "loss": 1.3217, "step": 6942 }, { "epoch": 0.41387531290976276, "grad_norm": 2.8022305965423584, "learning_rate": 9.055067070902484e-05, "loss": 1.3688, "step": 6944 }, { "epoch": 0.41399451662891884, "grad_norm": 3.2805306911468506, "learning_rate": 9.054513741819466e-05, "loss": 1.4967, "step": 6946 }, { "epoch": 0.41411372034807487, "grad_norm": 3.4063379764556885, "learning_rate": 9.05396026769262e-05, "loss": 1.6174, "step": 6948 }, { "epoch": 0.4142329240672309, "grad_norm": 2.9869537353515625, "learning_rate": 9.053406648541743e-05, "loss": 1.3712, "step": 6950 }, { "epoch": 0.4143521277863869, "grad_norm": 3.347355365753174, "learning_rate": 9.052852884386643e-05, "loss": 1.5569, "step": 6952 }, { "epoch": 0.414471331505543, "grad_norm": 3.006950616836548, "learning_rate": 9.052298975247129e-05, "loss": 1.4087, "step": 6954 }, { "epoch": 0.414590535224699, "grad_norm": 3.4438982009887695, "learning_rate": 9.051744921143015e-05, "loss": 1.4606, "step": 6956 }, { "epoch": 0.41470973894385504, "grad_norm": 3.1123569011688232, "learning_rate": 9.051190722094122e-05, "loss": 1.5352, "step": 6958 }, { "epoch": 0.41482894266301107, "grad_norm": 3.1305603981018066, "learning_rate": 9.050636378120277e-05, "loss": 1.454, "step": 6960 }, { "epoch": 0.41494814638216715, "grad_norm": 2.9163482189178467, "learning_rate": 9.05008188924131e-05, "loss": 1.5819, "step": 6962 }, { "epoch": 0.4150673501013232, "grad_norm": 3.1929116249084473, "learning_rate": 9.049527255477055e-05, "loss": 1.4902, "step": 6964 }, { "epoch": 0.4151865538204792, "grad_norm": 2.7676777839660645, "learning_rate": 9.048972476847356e-05, "loss": 1.4743, "step": 6966 }, { "epoch": 0.4153057575396352, "grad_norm": 3.0060625076293945, "learning_rate": 9.048417553372057e-05, "loss": 1.4066, "step": 6968 }, { "epoch": 0.41542496125879125, "grad_norm": 2.7975401878356934, "learning_rate": 9.047862485071012e-05, "loss": 1.3492, "step": 6970 }, { "epoch": 0.41554416497794733, "grad_norm": 2.9436728954315186, "learning_rate": 9.047307271964075e-05, "loss": 1.48, "step": 6972 }, { "epoch": 0.41566336869710335, "grad_norm": 3.0946128368377686, "learning_rate": 9.04675191407111e-05, "loss": 1.5146, "step": 6974 }, { "epoch": 0.4157825724162594, "grad_norm": 3.4264402389526367, "learning_rate": 9.046196411411982e-05, "loss": 1.3858, "step": 6976 }, { "epoch": 0.4159017761354154, "grad_norm": 3.0300652980804443, "learning_rate": 9.045640764006567e-05, "loss": 1.3849, "step": 6978 }, { "epoch": 0.4160209798545715, "grad_norm": 3.0855467319488525, "learning_rate": 9.045084971874738e-05, "loss": 1.3718, "step": 6980 }, { "epoch": 0.4161401835737275, "grad_norm": 3.1998379230499268, "learning_rate": 9.04452903503638e-05, "loss": 1.5218, "step": 6982 }, { "epoch": 0.41625938729288353, "grad_norm": 3.2321627140045166, "learning_rate": 9.043972953511379e-05, "loss": 1.3413, "step": 6984 }, { "epoch": 0.41637859101203956, "grad_norm": 3.2775163650512695, "learning_rate": 9.04341672731963e-05, "loss": 1.5759, "step": 6986 }, { "epoch": 0.41649779473119564, "grad_norm": 2.9519810676574707, "learning_rate": 9.042860356481031e-05, "loss": 1.2681, "step": 6988 }, { "epoch": 0.41661699845035166, "grad_norm": 3.2217869758605957, "learning_rate": 9.042303841015484e-05, "loss": 1.541, "step": 6990 }, { "epoch": 0.4167362021695077, "grad_norm": 3.2689664363861084, "learning_rate": 9.041747180942897e-05, "loss": 1.4772, "step": 6992 }, { "epoch": 0.4168554058886637, "grad_norm": 3.335045576095581, "learning_rate": 9.041190376283186e-05, "loss": 1.4648, "step": 6994 }, { "epoch": 0.4169746096078198, "grad_norm": 3.1101815700531006, "learning_rate": 9.040633427056267e-05, "loss": 1.4385, "step": 6996 }, { "epoch": 0.4170938133269758, "grad_norm": 2.8808467388153076, "learning_rate": 9.040076333282069e-05, "loss": 1.3995, "step": 6998 }, { "epoch": 0.41721301704613184, "grad_norm": 3.0448577404022217, "learning_rate": 9.039519094980513e-05, "loss": 1.5717, "step": 7000 }, { "epoch": 0.41733222076528786, "grad_norm": 3.5661869049072266, "learning_rate": 9.038961712171541e-05, "loss": 1.4527, "step": 7002 }, { "epoch": 0.4174514244844439, "grad_norm": 3.13183856010437, "learning_rate": 9.038404184875087e-05, "loss": 1.3838, "step": 7004 }, { "epoch": 0.41757062820359997, "grad_norm": 3.0915284156799316, "learning_rate": 9.037846513111099e-05, "loss": 1.5968, "step": 7006 }, { "epoch": 0.417689831922756, "grad_norm": 3.150644063949585, "learning_rate": 9.037288696899527e-05, "loss": 1.4896, "step": 7008 }, { "epoch": 0.417809035641912, "grad_norm": 3.6351075172424316, "learning_rate": 9.036730736260323e-05, "loss": 1.5116, "step": 7010 }, { "epoch": 0.41792823936106804, "grad_norm": 3.2571184635162354, "learning_rate": 9.036172631213451e-05, "loss": 1.4488, "step": 7012 }, { "epoch": 0.4180474430802241, "grad_norm": 2.76265811920166, "learning_rate": 9.035614381778875e-05, "loss": 1.3357, "step": 7014 }, { "epoch": 0.41816664679938015, "grad_norm": 2.9673120975494385, "learning_rate": 9.035055987976562e-05, "loss": 1.5238, "step": 7016 }, { "epoch": 0.4182858505185362, "grad_norm": 2.8774733543395996, "learning_rate": 9.034497449826492e-05, "loss": 1.4683, "step": 7018 }, { "epoch": 0.4184050542376922, "grad_norm": 3.154940366744995, "learning_rate": 9.033938767348644e-05, "loss": 1.4059, "step": 7020 }, { "epoch": 0.4185242579568483, "grad_norm": 3.140317916870117, "learning_rate": 9.033379940563005e-05, "loss": 1.5009, "step": 7022 }, { "epoch": 0.4186434616760043, "grad_norm": 3.130795478820801, "learning_rate": 9.032820969489565e-05, "loss": 1.3637, "step": 7024 }, { "epoch": 0.4187626653951603, "grad_norm": 2.941112756729126, "learning_rate": 9.03226185414832e-05, "loss": 1.4709, "step": 7026 }, { "epoch": 0.41888186911431635, "grad_norm": 3.0058188438415527, "learning_rate": 9.031702594559274e-05, "loss": 1.4259, "step": 7028 }, { "epoch": 0.4190010728334724, "grad_norm": 3.272557258605957, "learning_rate": 9.03114319074243e-05, "loss": 1.3319, "step": 7030 }, { "epoch": 0.41912027655262846, "grad_norm": 3.8803882598876953, "learning_rate": 9.030583642717803e-05, "loss": 1.4036, "step": 7032 }, { "epoch": 0.4192394802717845, "grad_norm": 3.1880435943603516, "learning_rate": 9.030023950505408e-05, "loss": 1.5836, "step": 7034 }, { "epoch": 0.4193586839909405, "grad_norm": 2.9208316802978516, "learning_rate": 9.029464114125268e-05, "loss": 1.3764, "step": 7036 }, { "epoch": 0.41947788771009653, "grad_norm": 3.2436861991882324, "learning_rate": 9.02890413359741e-05, "loss": 1.5325, "step": 7038 }, { "epoch": 0.4195970914292526, "grad_norm": 3.5344021320343018, "learning_rate": 9.028344008941867e-05, "loss": 1.5276, "step": 7040 }, { "epoch": 0.41971629514840864, "grad_norm": 3.2099812030792236, "learning_rate": 9.027783740178675e-05, "loss": 1.559, "step": 7042 }, { "epoch": 0.41983549886756466, "grad_norm": 2.747262477874756, "learning_rate": 9.027223327327878e-05, "loss": 1.3165, "step": 7044 }, { "epoch": 0.4199547025867207, "grad_norm": 3.275524616241455, "learning_rate": 9.026662770409522e-05, "loss": 1.5726, "step": 7046 }, { "epoch": 0.42007390630587677, "grad_norm": 2.956195592880249, "learning_rate": 9.026102069443664e-05, "loss": 1.3799, "step": 7048 }, { "epoch": 0.4201931100250328, "grad_norm": 2.998727321624756, "learning_rate": 9.02554122445036e-05, "loss": 1.4213, "step": 7050 }, { "epoch": 0.4203123137441888, "grad_norm": 3.0470681190490723, "learning_rate": 9.024980235449671e-05, "loss": 1.6782, "step": 7052 }, { "epoch": 0.42043151746334484, "grad_norm": 3.0133492946624756, "learning_rate": 9.024419102461668e-05, "loss": 1.3955, "step": 7054 }, { "epoch": 0.4205507211825009, "grad_norm": 3.2978711128234863, "learning_rate": 9.023857825506426e-05, "loss": 1.4132, "step": 7056 }, { "epoch": 0.42066992490165694, "grad_norm": 2.8162546157836914, "learning_rate": 9.023296404604021e-05, "loss": 1.328, "step": 7058 }, { "epoch": 0.42078912862081297, "grad_norm": 3.6041383743286133, "learning_rate": 9.022734839774537e-05, "loss": 1.5018, "step": 7060 }, { "epoch": 0.420908332339969, "grad_norm": 3.19767165184021, "learning_rate": 9.022173131038068e-05, "loss": 1.4596, "step": 7062 }, { "epoch": 0.421027536059125, "grad_norm": 3.1853134632110596, "learning_rate": 9.021611278414702e-05, "loss": 1.3682, "step": 7064 }, { "epoch": 0.4211467397782811, "grad_norm": 3.844505786895752, "learning_rate": 9.021049281924539e-05, "loss": 1.5219, "step": 7066 }, { "epoch": 0.4212659434974371, "grad_norm": 3.13592791557312, "learning_rate": 9.020487141587687e-05, "loss": 1.624, "step": 7068 }, { "epoch": 0.42138514721659315, "grad_norm": 2.9981460571289062, "learning_rate": 9.019924857424254e-05, "loss": 1.3634, "step": 7070 }, { "epoch": 0.4215043509357492, "grad_norm": 3.0049850940704346, "learning_rate": 9.019362429454355e-05, "loss": 1.4045, "step": 7072 }, { "epoch": 0.42162355465490525, "grad_norm": 3.4514734745025635, "learning_rate": 9.018799857698108e-05, "loss": 1.3891, "step": 7074 }, { "epoch": 0.4217427583740613, "grad_norm": 3.054260730743408, "learning_rate": 9.018237142175643e-05, "loss": 1.4334, "step": 7076 }, { "epoch": 0.4218619620932173, "grad_norm": 2.9937219619750977, "learning_rate": 9.017674282907085e-05, "loss": 1.423, "step": 7078 }, { "epoch": 0.4219811658123733, "grad_norm": 3.0896637439727783, "learning_rate": 9.017111279912571e-05, "loss": 1.3699, "step": 7080 }, { "epoch": 0.4221003695315294, "grad_norm": 3.5064499378204346, "learning_rate": 9.016548133212244e-05, "loss": 1.5507, "step": 7082 }, { "epoch": 0.42221957325068543, "grad_norm": 3.048896312713623, "learning_rate": 9.015984842826248e-05, "loss": 1.3704, "step": 7084 }, { "epoch": 0.42233877696984146, "grad_norm": 2.984010934829712, "learning_rate": 9.015421408774732e-05, "loss": 1.4259, "step": 7086 }, { "epoch": 0.4224579806889975, "grad_norm": 3.2718422412872314, "learning_rate": 9.014857831077854e-05, "loss": 1.3762, "step": 7088 }, { "epoch": 0.42257718440815356, "grad_norm": 3.0204782485961914, "learning_rate": 9.014294109755774e-05, "loss": 1.5198, "step": 7090 }, { "epoch": 0.4226963881273096, "grad_norm": 3.303910493850708, "learning_rate": 9.013730244828661e-05, "loss": 1.4219, "step": 7092 }, { "epoch": 0.4228155918464656, "grad_norm": 3.4653828144073486, "learning_rate": 9.013166236316683e-05, "loss": 1.4998, "step": 7094 }, { "epoch": 0.42293479556562164, "grad_norm": 2.9636576175689697, "learning_rate": 9.012602084240018e-05, "loss": 1.3542, "step": 7096 }, { "epoch": 0.42305399928477766, "grad_norm": 2.9584579467773438, "learning_rate": 9.012037788618848e-05, "loss": 1.2535, "step": 7098 }, { "epoch": 0.42317320300393374, "grad_norm": 3.4135162830352783, "learning_rate": 9.011473349473358e-05, "loss": 1.515, "step": 7100 }, { "epoch": 0.42329240672308976, "grad_norm": 2.996347427368164, "learning_rate": 9.010908766823743e-05, "loss": 1.415, "step": 7102 }, { "epoch": 0.4234116104422458, "grad_norm": 3.305921792984009, "learning_rate": 9.010344040690197e-05, "loss": 1.4061, "step": 7104 }, { "epoch": 0.4235308141614018, "grad_norm": 3.1416800022125244, "learning_rate": 9.009779171092923e-05, "loss": 1.4041, "step": 7106 }, { "epoch": 0.4236500178805579, "grad_norm": 3.197587013244629, "learning_rate": 9.009214158052129e-05, "loss": 1.4218, "step": 7108 }, { "epoch": 0.4237692215997139, "grad_norm": 3.2975480556488037, "learning_rate": 9.008649001588028e-05, "loss": 1.5914, "step": 7110 }, { "epoch": 0.42388842531886994, "grad_norm": 3.109226942062378, "learning_rate": 9.008083701720837e-05, "loss": 1.3362, "step": 7112 }, { "epoch": 0.42400762903802597, "grad_norm": 3.065117835998535, "learning_rate": 9.007518258470778e-05, "loss": 1.3726, "step": 7114 }, { "epoch": 0.42412683275718205, "grad_norm": 3.2798426151275635, "learning_rate": 9.006952671858077e-05, "loss": 1.2989, "step": 7116 }, { "epoch": 0.4242460364763381, "grad_norm": 2.856405258178711, "learning_rate": 9.006386941902973e-05, "loss": 1.4869, "step": 7118 }, { "epoch": 0.4243652401954941, "grad_norm": 2.8484046459198, "learning_rate": 9.005821068625699e-05, "loss": 1.3894, "step": 7120 }, { "epoch": 0.4244844439146501, "grad_norm": 3.068563461303711, "learning_rate": 9.005255052046499e-05, "loss": 1.4471, "step": 7122 }, { "epoch": 0.42460364763380615, "grad_norm": 3.0216057300567627, "learning_rate": 9.004688892185622e-05, "loss": 1.3759, "step": 7124 }, { "epoch": 0.4247228513529622, "grad_norm": 3.2997779846191406, "learning_rate": 9.004122589063321e-05, "loss": 1.4827, "step": 7126 }, { "epoch": 0.42484205507211825, "grad_norm": 3.381392240524292, "learning_rate": 9.003556142699856e-05, "loss": 1.4563, "step": 7128 }, { "epoch": 0.4249612587912743, "grad_norm": 2.87530779838562, "learning_rate": 9.00298955311549e-05, "loss": 1.4436, "step": 7130 }, { "epoch": 0.4250804625104303, "grad_norm": 3.2389912605285645, "learning_rate": 9.002422820330492e-05, "loss": 1.298, "step": 7132 }, { "epoch": 0.4251996662295864, "grad_norm": 3.239495038986206, "learning_rate": 9.001855944365133e-05, "loss": 1.5968, "step": 7134 }, { "epoch": 0.4253188699487424, "grad_norm": 3.0246522426605225, "learning_rate": 9.001288925239697e-05, "loss": 1.3637, "step": 7136 }, { "epoch": 0.42543807366789843, "grad_norm": 3.2050087451934814, "learning_rate": 9.000721762974465e-05, "loss": 1.3247, "step": 7138 }, { "epoch": 0.42555727738705446, "grad_norm": 2.9038615226745605, "learning_rate": 9.000154457589728e-05, "loss": 1.3374, "step": 7140 }, { "epoch": 0.42567648110621054, "grad_norm": 3.1374142169952393, "learning_rate": 8.999587009105779e-05, "loss": 1.5147, "step": 7142 }, { "epoch": 0.42579568482536656, "grad_norm": 2.9442992210388184, "learning_rate": 8.999019417542918e-05, "loss": 1.5394, "step": 7144 }, { "epoch": 0.4259148885445226, "grad_norm": 3.146991014480591, "learning_rate": 8.99845168292145e-05, "loss": 1.5541, "step": 7146 }, { "epoch": 0.4260340922636786, "grad_norm": 2.9688315391540527, "learning_rate": 8.997883805261687e-05, "loss": 1.4495, "step": 7148 }, { "epoch": 0.4261532959828347, "grad_norm": 2.873795747756958, "learning_rate": 8.997315784583939e-05, "loss": 1.468, "step": 7150 }, { "epoch": 0.4262724997019907, "grad_norm": 3.0335707664489746, "learning_rate": 8.996747620908528e-05, "loss": 1.52, "step": 7152 }, { "epoch": 0.42639170342114674, "grad_norm": 3.2062935829162598, "learning_rate": 8.996179314255783e-05, "loss": 1.3348, "step": 7154 }, { "epoch": 0.42651090714030276, "grad_norm": 3.238222122192383, "learning_rate": 8.99561086464603e-05, "loss": 1.3796, "step": 7156 }, { "epoch": 0.4266301108594588, "grad_norm": 3.2971081733703613, "learning_rate": 8.995042272099604e-05, "loss": 1.354, "step": 7158 }, { "epoch": 0.42674931457861487, "grad_norm": 3.1056668758392334, "learning_rate": 8.994473536636847e-05, "loss": 1.2732, "step": 7160 }, { "epoch": 0.4268685182977709, "grad_norm": 3.4170312881469727, "learning_rate": 8.993904658278107e-05, "loss": 1.4066, "step": 7162 }, { "epoch": 0.4269877220169269, "grad_norm": 3.2104732990264893, "learning_rate": 8.993335637043732e-05, "loss": 1.4981, "step": 7164 }, { "epoch": 0.42710692573608294, "grad_norm": 3.268996477127075, "learning_rate": 8.992766472954076e-05, "loss": 1.3898, "step": 7166 }, { "epoch": 0.427226129455239, "grad_norm": 3.277458667755127, "learning_rate": 8.992197166029504e-05, "loss": 1.5774, "step": 7168 }, { "epoch": 0.42734533317439505, "grad_norm": 3.3674144744873047, "learning_rate": 8.991627716290379e-05, "loss": 1.4237, "step": 7170 }, { "epoch": 0.4274645368935511, "grad_norm": 3.308283805847168, "learning_rate": 8.991058123757074e-05, "loss": 1.4794, "step": 7172 }, { "epoch": 0.4275837406127071, "grad_norm": 3.075176477432251, "learning_rate": 8.990488388449965e-05, "loss": 1.4376, "step": 7174 }, { "epoch": 0.4277029443318632, "grad_norm": 3.402547836303711, "learning_rate": 8.989918510389432e-05, "loss": 1.5798, "step": 7176 }, { "epoch": 0.4278221480510192, "grad_norm": 2.7395641803741455, "learning_rate": 8.989348489595863e-05, "loss": 1.2913, "step": 7178 }, { "epoch": 0.4279413517701752, "grad_norm": 2.978372812271118, "learning_rate": 8.98877832608965e-05, "loss": 1.3324, "step": 7180 }, { "epoch": 0.42806055548933125, "grad_norm": 2.9271881580352783, "learning_rate": 8.988208019891189e-05, "loss": 1.4345, "step": 7182 }, { "epoch": 0.42817975920848733, "grad_norm": 2.811805486679077, "learning_rate": 8.98763757102088e-05, "loss": 1.4701, "step": 7184 }, { "epoch": 0.42829896292764336, "grad_norm": 3.0851869583129883, "learning_rate": 8.987066979499133e-05, "loss": 1.4352, "step": 7186 }, { "epoch": 0.4284181666467994, "grad_norm": 3.058563232421875, "learning_rate": 8.986496245346357e-05, "loss": 1.3913, "step": 7188 }, { "epoch": 0.4285373703659554, "grad_norm": 4.735252380371094, "learning_rate": 8.985925368582973e-05, "loss": 1.4909, "step": 7190 }, { "epoch": 0.42865657408511143, "grad_norm": 3.2270724773406982, "learning_rate": 8.985354349229398e-05, "loss": 1.3673, "step": 7192 }, { "epoch": 0.4287757778042675, "grad_norm": 2.821242094039917, "learning_rate": 8.984783187306064e-05, "loss": 1.3565, "step": 7194 }, { "epoch": 0.42889498152342354, "grad_norm": 3.1619420051574707, "learning_rate": 8.984211882833402e-05, "loss": 1.3244, "step": 7196 }, { "epoch": 0.42901418524257956, "grad_norm": 3.1572606563568115, "learning_rate": 8.983640435831849e-05, "loss": 1.4139, "step": 7198 }, { "epoch": 0.4291333889617356, "grad_norm": 3.389122724533081, "learning_rate": 8.983068846321845e-05, "loss": 1.3805, "step": 7200 }, { "epoch": 0.42925259268089166, "grad_norm": 2.876739025115967, "learning_rate": 8.982497114323843e-05, "loss": 1.3576, "step": 7202 }, { "epoch": 0.4293717964000477, "grad_norm": 3.33445143699646, "learning_rate": 8.981925239858292e-05, "loss": 1.5215, "step": 7204 }, { "epoch": 0.4294910001192037, "grad_norm": 2.9857006072998047, "learning_rate": 8.981353222945653e-05, "loss": 1.5896, "step": 7206 }, { "epoch": 0.42961020383835974, "grad_norm": 5.068776607513428, "learning_rate": 8.980781063606387e-05, "loss": 1.3068, "step": 7208 }, { "epoch": 0.4297294075575158, "grad_norm": 3.4145116806030273, "learning_rate": 8.98020876186096e-05, "loss": 1.5368, "step": 7210 }, { "epoch": 0.42984861127667184, "grad_norm": 3.210507392883301, "learning_rate": 8.979636317729849e-05, "loss": 1.5065, "step": 7212 }, { "epoch": 0.42996781499582787, "grad_norm": 2.9468610286712646, "learning_rate": 8.97906373123353e-05, "loss": 1.4266, "step": 7214 }, { "epoch": 0.4300870187149839, "grad_norm": 3.2167224884033203, "learning_rate": 8.978491002392489e-05, "loss": 1.699, "step": 7216 }, { "epoch": 0.4302062224341399, "grad_norm": 3.4463188648223877, "learning_rate": 8.97791813122721e-05, "loss": 1.439, "step": 7218 }, { "epoch": 0.430325426153296, "grad_norm": 3.3179514408111572, "learning_rate": 8.977345117758192e-05, "loss": 1.4312, "step": 7220 }, { "epoch": 0.430444629872452, "grad_norm": 3.3692386150360107, "learning_rate": 8.976771962005927e-05, "loss": 1.4893, "step": 7222 }, { "epoch": 0.43056383359160805, "grad_norm": 3.4449048042297363, "learning_rate": 8.976198663990926e-05, "loss": 1.5294, "step": 7224 }, { "epoch": 0.43068303731076407, "grad_norm": 2.827653646469116, "learning_rate": 8.975625223733692e-05, "loss": 1.48, "step": 7226 }, { "epoch": 0.43080224102992015, "grad_norm": 3.0720653533935547, "learning_rate": 8.975051641254744e-05, "loss": 1.501, "step": 7228 }, { "epoch": 0.4309214447490762, "grad_norm": 3.268798589706421, "learning_rate": 8.974477916574595e-05, "loss": 1.3933, "step": 7230 }, { "epoch": 0.4310406484682322, "grad_norm": 2.984283208847046, "learning_rate": 8.973904049713775e-05, "loss": 1.5539, "step": 7232 }, { "epoch": 0.4311598521873882, "grad_norm": 2.9992337226867676, "learning_rate": 8.973330040692808e-05, "loss": 1.5329, "step": 7234 }, { "epoch": 0.4312790559065443, "grad_norm": 3.0919883251190186, "learning_rate": 8.972755889532233e-05, "loss": 1.5108, "step": 7236 }, { "epoch": 0.43139825962570033, "grad_norm": 3.1875789165496826, "learning_rate": 8.972181596252588e-05, "loss": 1.3956, "step": 7238 }, { "epoch": 0.43151746334485636, "grad_norm": 2.9451868534088135, "learning_rate": 8.971607160874414e-05, "loss": 1.5475, "step": 7240 }, { "epoch": 0.4316366670640124, "grad_norm": 3.2550296783447266, "learning_rate": 8.971032583418265e-05, "loss": 1.3988, "step": 7242 }, { "epoch": 0.43175587078316846, "grad_norm": 3.0588362216949463, "learning_rate": 8.970457863904694e-05, "loss": 1.4968, "step": 7244 }, { "epoch": 0.4318750745023245, "grad_norm": 3.2743330001831055, "learning_rate": 8.96988300235426e-05, "loss": 1.3762, "step": 7246 }, { "epoch": 0.4319942782214805, "grad_norm": 3.259152412414551, "learning_rate": 8.969307998787527e-05, "loss": 1.4535, "step": 7248 }, { "epoch": 0.43211348194063653, "grad_norm": 3.0208945274353027, "learning_rate": 8.968732853225066e-05, "loss": 1.3726, "step": 7250 }, { "epoch": 0.43223268565979256, "grad_norm": 3.034311294555664, "learning_rate": 8.968157565687454e-05, "loss": 1.5169, "step": 7252 }, { "epoch": 0.43235188937894864, "grad_norm": 2.883915662765503, "learning_rate": 8.967582136195267e-05, "loss": 1.3816, "step": 7254 }, { "epoch": 0.43247109309810466, "grad_norm": 3.106220006942749, "learning_rate": 8.967006564769094e-05, "loss": 1.6138, "step": 7256 }, { "epoch": 0.4325902968172607, "grad_norm": 3.1039962768554688, "learning_rate": 8.966430851429522e-05, "loss": 1.3978, "step": 7258 }, { "epoch": 0.4327095005364167, "grad_norm": 3.3883020877838135, "learning_rate": 8.965854996197147e-05, "loss": 1.748, "step": 7260 }, { "epoch": 0.4328287042555728, "grad_norm": 3.281669855117798, "learning_rate": 8.96527899909257e-05, "loss": 1.4491, "step": 7262 }, { "epoch": 0.4329479079747288, "grad_norm": 3.382519483566284, "learning_rate": 8.964702860136396e-05, "loss": 1.6933, "step": 7264 }, { "epoch": 0.43306711169388484, "grad_norm": 2.824885129928589, "learning_rate": 8.964126579349236e-05, "loss": 1.4604, "step": 7266 }, { "epoch": 0.43318631541304087, "grad_norm": 3.2812774181365967, "learning_rate": 8.963550156751706e-05, "loss": 1.4149, "step": 7268 }, { "epoch": 0.43330551913219695, "grad_norm": 3.08626127243042, "learning_rate": 8.962973592364425e-05, "loss": 1.5854, "step": 7270 }, { "epoch": 0.433424722851353, "grad_norm": 2.9925460815429688, "learning_rate": 8.962396886208018e-05, "loss": 1.4588, "step": 7272 }, { "epoch": 0.433543926570509, "grad_norm": 2.9452881813049316, "learning_rate": 8.96182003830312e-05, "loss": 1.3495, "step": 7274 }, { "epoch": 0.433663130289665, "grad_norm": 3.293349504470825, "learning_rate": 8.961243048670363e-05, "loss": 1.3345, "step": 7276 }, { "epoch": 0.4337823340088211, "grad_norm": 2.963836193084717, "learning_rate": 8.960665917330388e-05, "loss": 1.3945, "step": 7278 }, { "epoch": 0.4339015377279771, "grad_norm": 3.0834898948669434, "learning_rate": 8.960088644303841e-05, "loss": 1.4011, "step": 7280 }, { "epoch": 0.43402074144713315, "grad_norm": 3.260159969329834, "learning_rate": 8.959511229611376e-05, "loss": 1.5208, "step": 7282 }, { "epoch": 0.4341399451662892, "grad_norm": 3.0987772941589355, "learning_rate": 8.958933673273646e-05, "loss": 1.3901, "step": 7284 }, { "epoch": 0.4342591488854452, "grad_norm": 3.129429578781128, "learning_rate": 8.958355975311315e-05, "loss": 1.5568, "step": 7286 }, { "epoch": 0.4343783526046013, "grad_norm": 2.952737331390381, "learning_rate": 8.957778135745045e-05, "loss": 1.3615, "step": 7288 }, { "epoch": 0.4344975563237573, "grad_norm": 3.492403745651245, "learning_rate": 8.95720015459551e-05, "loss": 1.6495, "step": 7290 }, { "epoch": 0.43461676004291333, "grad_norm": 3.1942927837371826, "learning_rate": 8.956622031883386e-05, "loss": 1.5043, "step": 7292 }, { "epoch": 0.43473596376206936, "grad_norm": 3.1959452629089355, "learning_rate": 8.956043767629355e-05, "loss": 1.3775, "step": 7294 }, { "epoch": 0.43485516748122544, "grad_norm": 3.0784432888031006, "learning_rate": 8.955465361854104e-05, "loss": 1.3839, "step": 7296 }, { "epoch": 0.43497437120038146, "grad_norm": 3.0925121307373047, "learning_rate": 8.954886814578321e-05, "loss": 1.4694, "step": 7298 }, { "epoch": 0.4350935749195375, "grad_norm": 3.173872232437134, "learning_rate": 8.954308125822708e-05, "loss": 1.4517, "step": 7300 }, { "epoch": 0.4352127786386935, "grad_norm": 3.3502485752105713, "learning_rate": 8.953729295607962e-05, "loss": 1.5687, "step": 7302 }, { "epoch": 0.4353319823578496, "grad_norm": 2.921919822692871, "learning_rate": 8.953150323954791e-05, "loss": 1.4171, "step": 7304 }, { "epoch": 0.4354511860770056, "grad_norm": 3.031723976135254, "learning_rate": 8.95257121088391e-05, "loss": 1.4566, "step": 7306 }, { "epoch": 0.43557038979616164, "grad_norm": 2.8176989555358887, "learning_rate": 8.95199195641603e-05, "loss": 1.4013, "step": 7308 }, { "epoch": 0.43568959351531766, "grad_norm": 3.323702335357666, "learning_rate": 8.951412560571879e-05, "loss": 1.4514, "step": 7310 }, { "epoch": 0.4358087972344737, "grad_norm": 3.168705701828003, "learning_rate": 8.95083302337218e-05, "loss": 1.4757, "step": 7312 }, { "epoch": 0.43592800095362977, "grad_norm": 3.4622068405151367, "learning_rate": 8.950253344837664e-05, "loss": 1.5597, "step": 7314 }, { "epoch": 0.4360472046727858, "grad_norm": 3.1548752784729004, "learning_rate": 8.949673524989073e-05, "loss": 1.3901, "step": 7316 }, { "epoch": 0.4361664083919418, "grad_norm": 3.018893241882324, "learning_rate": 8.949093563847146e-05, "loss": 1.5573, "step": 7318 }, { "epoch": 0.43628561211109784, "grad_norm": 2.9531447887420654, "learning_rate": 8.94851346143263e-05, "loss": 1.5001, "step": 7320 }, { "epoch": 0.4364048158302539, "grad_norm": 3.0229249000549316, "learning_rate": 8.947933217766278e-05, "loss": 1.3564, "step": 7322 }, { "epoch": 0.43652401954940995, "grad_norm": 3.0603320598602295, "learning_rate": 8.947352832868847e-05, "loss": 1.4025, "step": 7324 }, { "epoch": 0.43664322326856597, "grad_norm": 3.0653159618377686, "learning_rate": 8.946772306761098e-05, "loss": 1.5118, "step": 7326 }, { "epoch": 0.436762426987722, "grad_norm": 3.426454782485962, "learning_rate": 8.946191639463802e-05, "loss": 1.5035, "step": 7328 }, { "epoch": 0.4368816307068781, "grad_norm": 2.880615472793579, "learning_rate": 8.945610830997728e-05, "loss": 1.3809, "step": 7330 }, { "epoch": 0.4370008344260341, "grad_norm": 3.1256394386291504, "learning_rate": 8.945029881383655e-05, "loss": 1.531, "step": 7332 }, { "epoch": 0.4371200381451901, "grad_norm": 3.2308616638183594, "learning_rate": 8.944448790642366e-05, "loss": 1.4516, "step": 7334 }, { "epoch": 0.43723924186434615, "grad_norm": 2.9230966567993164, "learning_rate": 8.943867558794647e-05, "loss": 1.3874, "step": 7336 }, { "epoch": 0.43735844558350223, "grad_norm": 2.9269449710845947, "learning_rate": 8.943286185861293e-05, "loss": 1.3077, "step": 7338 }, { "epoch": 0.43747764930265826, "grad_norm": 3.0495200157165527, "learning_rate": 8.942704671863101e-05, "loss": 1.3537, "step": 7340 }, { "epoch": 0.4375968530218143, "grad_norm": 3.0718441009521484, "learning_rate": 8.942123016820871e-05, "loss": 1.4363, "step": 7342 }, { "epoch": 0.4377160567409703, "grad_norm": 3.0470516681671143, "learning_rate": 8.941541220755414e-05, "loss": 1.3292, "step": 7344 }, { "epoch": 0.43783526046012633, "grad_norm": 3.5237233638763428, "learning_rate": 8.94095928368754e-05, "loss": 1.4251, "step": 7346 }, { "epoch": 0.4379544641792824, "grad_norm": 3.0891242027282715, "learning_rate": 8.940377205638071e-05, "loss": 1.4971, "step": 7348 }, { "epoch": 0.43807366789843843, "grad_norm": 3.4209585189819336, "learning_rate": 8.939794986627825e-05, "loss": 1.4561, "step": 7350 }, { "epoch": 0.43819287161759446, "grad_norm": 3.1268575191497803, "learning_rate": 8.939212626677635e-05, "loss": 1.337, "step": 7352 }, { "epoch": 0.4383120753367505, "grad_norm": 3.323270082473755, "learning_rate": 8.93863012580833e-05, "loss": 1.5032, "step": 7354 }, { "epoch": 0.43843127905590656, "grad_norm": 3.0879790782928467, "learning_rate": 8.938047484040749e-05, "loss": 1.4752, "step": 7356 }, { "epoch": 0.4385504827750626, "grad_norm": 2.9502265453338623, "learning_rate": 8.937464701395737e-05, "loss": 1.4607, "step": 7358 }, { "epoch": 0.4386696864942186, "grad_norm": 3.067044258117676, "learning_rate": 8.93688177789414e-05, "loss": 1.5384, "step": 7360 }, { "epoch": 0.43878889021337464, "grad_norm": 3.2505178451538086, "learning_rate": 8.936298713556813e-05, "loss": 1.3871, "step": 7362 }, { "epoch": 0.4389080939325307, "grad_norm": 3.1511805057525635, "learning_rate": 8.93571550840461e-05, "loss": 1.3568, "step": 7364 }, { "epoch": 0.43902729765168674, "grad_norm": 3.079254388809204, "learning_rate": 8.9351321624584e-05, "loss": 1.5082, "step": 7366 }, { "epoch": 0.43914650137084277, "grad_norm": 3.3116960525512695, "learning_rate": 8.934548675739049e-05, "loss": 1.3214, "step": 7368 }, { "epoch": 0.4392657050899988, "grad_norm": 3.166126251220703, "learning_rate": 8.933965048267428e-05, "loss": 1.5069, "step": 7370 }, { "epoch": 0.4393849088091549, "grad_norm": 3.093480348587036, "learning_rate": 8.933381280064419e-05, "loss": 1.3828, "step": 7372 }, { "epoch": 0.4395041125283109, "grad_norm": 3.073812246322632, "learning_rate": 8.932797371150903e-05, "loss": 1.431, "step": 7374 }, { "epoch": 0.4396233162474669, "grad_norm": 3.26104736328125, "learning_rate": 8.93221332154777e-05, "loss": 1.3491, "step": 7376 }, { "epoch": 0.43974251996662295, "grad_norm": 2.9339962005615234, "learning_rate": 8.931629131275911e-05, "loss": 1.5041, "step": 7378 }, { "epoch": 0.43986172368577897, "grad_norm": 2.999802827835083, "learning_rate": 8.931044800356228e-05, "loss": 1.3267, "step": 7380 }, { "epoch": 0.43998092740493505, "grad_norm": 3.1849875450134277, "learning_rate": 8.93046032880962e-05, "loss": 1.3447, "step": 7382 }, { "epoch": 0.4401001311240911, "grad_norm": 3.0116310119628906, "learning_rate": 8.929875716656999e-05, "loss": 1.5642, "step": 7384 }, { "epoch": 0.4402193348432471, "grad_norm": 3.2074954509735107, "learning_rate": 8.929290963919278e-05, "loss": 1.4543, "step": 7386 }, { "epoch": 0.4403385385624031, "grad_norm": 3.3599190711975098, "learning_rate": 8.928706070617376e-05, "loss": 1.501, "step": 7388 }, { "epoch": 0.4404577422815592, "grad_norm": 2.828106164932251, "learning_rate": 8.928121036772216e-05, "loss": 1.2798, "step": 7390 }, { "epoch": 0.44057694600071523, "grad_norm": 3.1389079093933105, "learning_rate": 8.927535862404726e-05, "loss": 1.5233, "step": 7392 }, { "epoch": 0.44069614971987126, "grad_norm": 3.2191550731658936, "learning_rate": 8.926950547535839e-05, "loss": 1.5264, "step": 7394 }, { "epoch": 0.4408153534390273, "grad_norm": 3.1663641929626465, "learning_rate": 8.926365092186497e-05, "loss": 1.4612, "step": 7396 }, { "epoch": 0.44093455715818336, "grad_norm": 2.745512008666992, "learning_rate": 8.925779496377642e-05, "loss": 1.489, "step": 7398 }, { "epoch": 0.4410537608773394, "grad_norm": 3.2794649600982666, "learning_rate": 8.925193760130221e-05, "loss": 1.6126, "step": 7400 }, { "epoch": 0.4411729645964954, "grad_norm": 3.2773191928863525, "learning_rate": 8.92460788346519e-05, "loss": 1.3967, "step": 7402 }, { "epoch": 0.44129216831565143, "grad_norm": 3.0351316928863525, "learning_rate": 8.924021866403507e-05, "loss": 1.3482, "step": 7404 }, { "epoch": 0.44141137203480746, "grad_norm": 2.9452857971191406, "learning_rate": 8.923435708966135e-05, "loss": 1.33, "step": 7406 }, { "epoch": 0.44153057575396354, "grad_norm": 3.1553187370300293, "learning_rate": 8.922849411174046e-05, "loss": 1.5699, "step": 7408 }, { "epoch": 0.44164977947311956, "grad_norm": 3.1524791717529297, "learning_rate": 8.92226297304821e-05, "loss": 1.5183, "step": 7410 }, { "epoch": 0.4417689831922756, "grad_norm": 3.1176226139068604, "learning_rate": 8.921676394609609e-05, "loss": 1.443, "step": 7412 }, { "epoch": 0.4418881869114316, "grad_norm": 3.0813980102539062, "learning_rate": 8.921089675879223e-05, "loss": 1.4831, "step": 7414 }, { "epoch": 0.4420073906305877, "grad_norm": 3.348034143447876, "learning_rate": 8.920502816878044e-05, "loss": 1.4649, "step": 7416 }, { "epoch": 0.4421265943497437, "grad_norm": 3.2634241580963135, "learning_rate": 8.919915817627066e-05, "loss": 1.5501, "step": 7418 }, { "epoch": 0.44224579806889974, "grad_norm": 3.0761828422546387, "learning_rate": 8.919328678147289e-05, "loss": 1.4543, "step": 7420 }, { "epoch": 0.44236500178805577, "grad_norm": 3.1042816638946533, "learning_rate": 8.918741398459713e-05, "loss": 1.4413, "step": 7422 }, { "epoch": 0.44248420550721185, "grad_norm": 3.34818959236145, "learning_rate": 8.91815397858535e-05, "loss": 1.6166, "step": 7424 }, { "epoch": 0.44260340922636787, "grad_norm": 3.143359422683716, "learning_rate": 8.917566418545215e-05, "loss": 1.4579, "step": 7426 }, { "epoch": 0.4427226129455239, "grad_norm": 3.1868793964385986, "learning_rate": 8.916978718360324e-05, "loss": 1.2677, "step": 7428 }, { "epoch": 0.4428418166646799, "grad_norm": 2.9242844581604004, "learning_rate": 8.916390878051701e-05, "loss": 1.5543, "step": 7430 }, { "epoch": 0.442961020383836, "grad_norm": 2.935554027557373, "learning_rate": 8.91580289764038e-05, "loss": 1.6235, "step": 7432 }, { "epoch": 0.443080224102992, "grad_norm": 3.2648375034332275, "learning_rate": 8.91521477714739e-05, "loss": 1.4704, "step": 7434 }, { "epoch": 0.44319942782214805, "grad_norm": 3.2209489345550537, "learning_rate": 8.914626516593769e-05, "loss": 1.4175, "step": 7436 }, { "epoch": 0.4433186315413041, "grad_norm": 3.0153095722198486, "learning_rate": 8.914038116000567e-05, "loss": 1.3491, "step": 7438 }, { "epoch": 0.4434378352604601, "grad_norm": 3.2178759574890137, "learning_rate": 8.913449575388828e-05, "loss": 1.6282, "step": 7440 }, { "epoch": 0.4435570389796162, "grad_norm": 3.4232678413391113, "learning_rate": 8.912860894779608e-05, "loss": 1.6841, "step": 7442 }, { "epoch": 0.4436762426987722, "grad_norm": 3.2227768898010254, "learning_rate": 8.912272074193969e-05, "loss": 1.5019, "step": 7444 }, { "epoch": 0.44379544641792823, "grad_norm": 3.356458902359009, "learning_rate": 8.911683113652968e-05, "loss": 1.5289, "step": 7446 }, { "epoch": 0.44391465013708425, "grad_norm": 3.03576397895813, "learning_rate": 8.911094013177682e-05, "loss": 1.4162, "step": 7448 }, { "epoch": 0.44403385385624033, "grad_norm": 3.542097330093384, "learning_rate": 8.910504772789178e-05, "loss": 1.4657, "step": 7450 }, { "epoch": 0.44415305757539636, "grad_norm": 3.0224671363830566, "learning_rate": 8.909915392508543e-05, "loss": 1.5657, "step": 7452 }, { "epoch": 0.4442722612945524, "grad_norm": 2.951507568359375, "learning_rate": 8.909325872356853e-05, "loss": 1.5023, "step": 7454 }, { "epoch": 0.4443914650137084, "grad_norm": 2.8853557109832764, "learning_rate": 8.908736212355202e-05, "loss": 1.5025, "step": 7456 }, { "epoch": 0.4445106687328645, "grad_norm": 3.320495367050171, "learning_rate": 8.908146412524684e-05, "loss": 1.5659, "step": 7458 }, { "epoch": 0.4446298724520205, "grad_norm": 2.9225409030914307, "learning_rate": 8.907556472886397e-05, "loss": 1.3727, "step": 7460 }, { "epoch": 0.44474907617117654, "grad_norm": 3.100834369659424, "learning_rate": 8.906966393461445e-05, "loss": 1.4916, "step": 7462 }, { "epoch": 0.44486827989033256, "grad_norm": 3.40146803855896, "learning_rate": 8.906376174270937e-05, "loss": 1.4572, "step": 7464 }, { "epoch": 0.44498748360948864, "grad_norm": 2.9830210208892822, "learning_rate": 8.905785815335988e-05, "loss": 1.486, "step": 7466 }, { "epoch": 0.44510668732864467, "grad_norm": 3.2366831302642822, "learning_rate": 8.905195316677718e-05, "loss": 1.4017, "step": 7468 }, { "epoch": 0.4452258910478007, "grad_norm": 2.9985947608947754, "learning_rate": 8.904604678317248e-05, "loss": 1.5035, "step": 7470 }, { "epoch": 0.4453450947669567, "grad_norm": 3.139249801635742, "learning_rate": 8.904013900275711e-05, "loss": 1.392, "step": 7472 }, { "epoch": 0.44546429848611274, "grad_norm": 3.2226903438568115, "learning_rate": 8.903422982574238e-05, "loss": 1.4712, "step": 7474 }, { "epoch": 0.4455835022052688, "grad_norm": 3.349834680557251, "learning_rate": 8.902831925233972e-05, "loss": 1.4488, "step": 7476 }, { "epoch": 0.44570270592442485, "grad_norm": 3.1168923377990723, "learning_rate": 8.902240728276053e-05, "loss": 1.4893, "step": 7478 }, { "epoch": 0.44582190964358087, "grad_norm": 2.705209255218506, "learning_rate": 8.901649391721632e-05, "loss": 1.215, "step": 7480 }, { "epoch": 0.4459411133627369, "grad_norm": 3.5173473358154297, "learning_rate": 8.901057915591862e-05, "loss": 1.4801, "step": 7482 }, { "epoch": 0.446060317081893, "grad_norm": 3.1243972778320312, "learning_rate": 8.900466299907904e-05, "loss": 1.3389, "step": 7484 }, { "epoch": 0.446179520801049, "grad_norm": 3.254666566848755, "learning_rate": 8.89987454469092e-05, "loss": 1.4728, "step": 7486 }, { "epoch": 0.446298724520205, "grad_norm": 2.9383296966552734, "learning_rate": 8.899282649962081e-05, "loss": 1.458, "step": 7488 }, { "epoch": 0.44641792823936105, "grad_norm": 3.2827391624450684, "learning_rate": 8.89869061574256e-05, "loss": 1.4588, "step": 7490 }, { "epoch": 0.44653713195851713, "grad_norm": 2.845355987548828, "learning_rate": 8.898098442053537e-05, "loss": 1.3224, "step": 7492 }, { "epoch": 0.44665633567767316, "grad_norm": 3.105189561843872, "learning_rate": 8.897506128916196e-05, "loss": 1.5495, "step": 7494 }, { "epoch": 0.4467755393968292, "grad_norm": 3.062343120574951, "learning_rate": 8.896913676351724e-05, "loss": 1.5494, "step": 7496 }, { "epoch": 0.4468947431159852, "grad_norm": 2.9470221996307373, "learning_rate": 8.896321084381319e-05, "loss": 1.3919, "step": 7498 }, { "epoch": 0.44701394683514123, "grad_norm": 2.7918059825897217, "learning_rate": 8.895728353026176e-05, "loss": 1.4608, "step": 7500 }, { "epoch": 0.4471331505542973, "grad_norm": 3.3572194576263428, "learning_rate": 8.8951354823075e-05, "loss": 1.5819, "step": 7502 }, { "epoch": 0.44725235427345333, "grad_norm": 3.0070748329162598, "learning_rate": 8.894542472246503e-05, "loss": 1.4402, "step": 7504 }, { "epoch": 0.44737155799260936, "grad_norm": 2.9908061027526855, "learning_rate": 8.893949322864395e-05, "loss": 1.287, "step": 7506 }, { "epoch": 0.4474907617117654, "grad_norm": 3.3552119731903076, "learning_rate": 8.893356034182396e-05, "loss": 1.4422, "step": 7508 }, { "epoch": 0.44760996543092146, "grad_norm": 3.1318624019622803, "learning_rate": 8.892762606221731e-05, "loss": 1.3894, "step": 7510 }, { "epoch": 0.4477291691500775, "grad_norm": 3.171379327774048, "learning_rate": 8.89216903900363e-05, "loss": 1.4066, "step": 7512 }, { "epoch": 0.4478483728692335, "grad_norm": 2.9335975646972656, "learning_rate": 8.891575332549325e-05, "loss": 1.5297, "step": 7514 }, { "epoch": 0.44796757658838954, "grad_norm": 3.2495789527893066, "learning_rate": 8.890981486880057e-05, "loss": 1.3532, "step": 7516 }, { "epoch": 0.4480867803075456, "grad_norm": 3.2988815307617188, "learning_rate": 8.890387502017067e-05, "loss": 1.4194, "step": 7518 }, { "epoch": 0.44820598402670164, "grad_norm": 3.1635944843292236, "learning_rate": 8.889793377981604e-05, "loss": 1.4432, "step": 7520 }, { "epoch": 0.44832518774585767, "grad_norm": 3.119572401046753, "learning_rate": 8.889199114794925e-05, "loss": 1.465, "step": 7522 }, { "epoch": 0.4484443914650137, "grad_norm": 3.160310983657837, "learning_rate": 8.888604712478285e-05, "loss": 1.4623, "step": 7524 }, { "epoch": 0.44856359518416977, "grad_norm": 3.2679038047790527, "learning_rate": 8.888010171052951e-05, "loss": 1.4818, "step": 7526 }, { "epoch": 0.4486827989033258, "grad_norm": 3.3835501670837402, "learning_rate": 8.887415490540192e-05, "loss": 1.4596, "step": 7528 }, { "epoch": 0.4488020026224818, "grad_norm": 3.1670432090759277, "learning_rate": 8.886820670961278e-05, "loss": 1.518, "step": 7530 }, { "epoch": 0.44892120634163785, "grad_norm": 2.9058895111083984, "learning_rate": 8.886225712337491e-05, "loss": 1.6323, "step": 7532 }, { "epoch": 0.44904041006079387, "grad_norm": 3.565793514251709, "learning_rate": 8.885630614690113e-05, "loss": 1.4296, "step": 7534 }, { "epoch": 0.44915961377994995, "grad_norm": 2.9724912643432617, "learning_rate": 8.885035378040434e-05, "loss": 1.4339, "step": 7536 }, { "epoch": 0.449278817499106, "grad_norm": 3.095614194869995, "learning_rate": 8.884440002409746e-05, "loss": 1.538, "step": 7538 }, { "epoch": 0.449398021218262, "grad_norm": 3.186286211013794, "learning_rate": 8.88384448781935e-05, "loss": 1.5624, "step": 7540 }, { "epoch": 0.449517224937418, "grad_norm": 3.0854458808898926, "learning_rate": 8.883248834290547e-05, "loss": 1.4711, "step": 7542 }, { "epoch": 0.4496364286565741, "grad_norm": 3.1033222675323486, "learning_rate": 8.882653041844648e-05, "loss": 1.6195, "step": 7544 }, { "epoch": 0.44975563237573013, "grad_norm": 3.8729732036590576, "learning_rate": 8.882057110502963e-05, "loss": 1.5027, "step": 7546 }, { "epoch": 0.44987483609488615, "grad_norm": 3.0708160400390625, "learning_rate": 8.881461040286816e-05, "loss": 1.3874, "step": 7548 }, { "epoch": 0.4499940398140422, "grad_norm": 2.924185276031494, "learning_rate": 8.880864831217526e-05, "loss": 1.4533, "step": 7550 }, { "epoch": 0.45011324353319826, "grad_norm": 3.0904693603515625, "learning_rate": 8.880268483316421e-05, "loss": 1.5621, "step": 7552 }, { "epoch": 0.4502324472523543, "grad_norm": 3.1530144214630127, "learning_rate": 8.879671996604839e-05, "loss": 1.3764, "step": 7554 }, { "epoch": 0.4503516509715103, "grad_norm": 2.9353344440460205, "learning_rate": 8.879075371104114e-05, "loss": 1.4006, "step": 7556 }, { "epoch": 0.45047085469066633, "grad_norm": 2.817115068435669, "learning_rate": 8.878478606835591e-05, "loss": 1.5706, "step": 7558 }, { "epoch": 0.4505900584098224, "grad_norm": 3.1449434757232666, "learning_rate": 8.877881703820618e-05, "loss": 1.4387, "step": 7560 }, { "epoch": 0.45070926212897844, "grad_norm": 3.248560905456543, "learning_rate": 8.87728466208055e-05, "loss": 1.5398, "step": 7562 }, { "epoch": 0.45082846584813446, "grad_norm": 3.519455671310425, "learning_rate": 8.876687481636741e-05, "loss": 1.466, "step": 7564 }, { "epoch": 0.4509476695672905, "grad_norm": 3.3675262928009033, "learning_rate": 8.876090162510559e-05, "loss": 1.6552, "step": 7566 }, { "epoch": 0.4510668732864465, "grad_norm": 2.9877495765686035, "learning_rate": 8.87549270472337e-05, "loss": 1.4562, "step": 7568 }, { "epoch": 0.4511860770056026, "grad_norm": 2.9614756107330322, "learning_rate": 8.874895108296546e-05, "loss": 1.3831, "step": 7570 }, { "epoch": 0.4513052807247586, "grad_norm": 3.2876157760620117, "learning_rate": 8.874297373251466e-05, "loss": 1.5624, "step": 7572 }, { "epoch": 0.45142448444391464, "grad_norm": 3.682063341140747, "learning_rate": 8.873699499609513e-05, "loss": 1.5319, "step": 7574 }, { "epoch": 0.45154368816307067, "grad_norm": 3.45906138420105, "learning_rate": 8.873101487392077e-05, "loss": 1.4735, "step": 7576 }, { "epoch": 0.45166289188222675, "grad_norm": 3.0181081295013428, "learning_rate": 8.872503336620548e-05, "loss": 1.5286, "step": 7578 }, { "epoch": 0.45178209560138277, "grad_norm": 3.021660089492798, "learning_rate": 8.871905047316325e-05, "loss": 1.4638, "step": 7580 }, { "epoch": 0.4519012993205388, "grad_norm": 3.052706003189087, "learning_rate": 8.87130661950081e-05, "loss": 1.3706, "step": 7582 }, { "epoch": 0.4520205030396948, "grad_norm": 3.2238922119140625, "learning_rate": 8.870708053195413e-05, "loss": 1.2926, "step": 7584 }, { "epoch": 0.4521397067588509, "grad_norm": 3.579439640045166, "learning_rate": 8.870109348421545e-05, "loss": 1.5433, "step": 7586 }, { "epoch": 0.4522589104780069, "grad_norm": 2.9346072673797607, "learning_rate": 8.869510505200624e-05, "loss": 1.4405, "step": 7588 }, { "epoch": 0.45237811419716295, "grad_norm": 3.052978515625, "learning_rate": 8.868911523554073e-05, "loss": 1.4397, "step": 7590 }, { "epoch": 0.452497317916319, "grad_norm": 3.2375288009643555, "learning_rate": 8.86831240350332e-05, "loss": 1.4307, "step": 7592 }, { "epoch": 0.452616521635475, "grad_norm": 3.1600799560546875, "learning_rate": 8.867713145069796e-05, "loss": 1.5011, "step": 7594 }, { "epoch": 0.4527357253546311, "grad_norm": 3.087547540664673, "learning_rate": 8.86711374827494e-05, "loss": 1.4765, "step": 7596 }, { "epoch": 0.4528549290737871, "grad_norm": 3.051379919052124, "learning_rate": 8.866514213140194e-05, "loss": 1.3626, "step": 7598 }, { "epoch": 0.45297413279294313, "grad_norm": 3.1761367321014404, "learning_rate": 8.865914539687006e-05, "loss": 1.4799, "step": 7600 }, { "epoch": 0.45309333651209915, "grad_norm": 3.0379552841186523, "learning_rate": 8.865314727936826e-05, "loss": 1.3914, "step": 7602 }, { "epoch": 0.45321254023125523, "grad_norm": 3.3580052852630615, "learning_rate": 8.864714777911117e-05, "loss": 1.5778, "step": 7604 }, { "epoch": 0.45333174395041126, "grad_norm": 3.388157367706299, "learning_rate": 8.864114689631333e-05, "loss": 1.6211, "step": 7606 }, { "epoch": 0.4534509476695673, "grad_norm": 3.2112057209014893, "learning_rate": 8.863514463118948e-05, "loss": 1.4316, "step": 7608 }, { "epoch": 0.4535701513887233, "grad_norm": 2.8582305908203125, "learning_rate": 8.862914098395432e-05, "loss": 1.4489, "step": 7610 }, { "epoch": 0.4536893551078794, "grad_norm": 3.2510805130004883, "learning_rate": 8.862313595482262e-05, "loss": 1.6807, "step": 7612 }, { "epoch": 0.4538085588270354, "grad_norm": 2.9318337440490723, "learning_rate": 8.861712954400918e-05, "loss": 1.5386, "step": 7614 }, { "epoch": 0.45392776254619144, "grad_norm": 3.0665383338928223, "learning_rate": 8.861112175172889e-05, "loss": 1.3603, "step": 7616 }, { "epoch": 0.45404696626534746, "grad_norm": 3.0336477756500244, "learning_rate": 8.860511257819669e-05, "loss": 1.6012, "step": 7618 }, { "epoch": 0.45416616998450354, "grad_norm": 2.8331832885742188, "learning_rate": 8.859910202362751e-05, "loss": 1.3875, "step": 7620 }, { "epoch": 0.45428537370365957, "grad_norm": 3.0945637226104736, "learning_rate": 8.85930900882364e-05, "loss": 1.4522, "step": 7622 }, { "epoch": 0.4544045774228156, "grad_norm": 3.2141997814178467, "learning_rate": 8.858707677223841e-05, "loss": 1.5887, "step": 7624 }, { "epoch": 0.4545237811419716, "grad_norm": 3.313265800476074, "learning_rate": 8.858106207584864e-05, "loss": 1.3349, "step": 7626 }, { "epoch": 0.45464298486112764, "grad_norm": 3.1888511180877686, "learning_rate": 8.85750459992823e-05, "loss": 1.3601, "step": 7628 }, { "epoch": 0.4547621885802837, "grad_norm": 3.2264115810394287, "learning_rate": 8.856902854275457e-05, "loss": 1.5586, "step": 7630 }, { "epoch": 0.45488139229943975, "grad_norm": 3.1537203788757324, "learning_rate": 8.856300970648072e-05, "loss": 1.3841, "step": 7632 }, { "epoch": 0.45500059601859577, "grad_norm": 2.9914371967315674, "learning_rate": 8.855698949067609e-05, "loss": 1.5082, "step": 7634 }, { "epoch": 0.4551197997377518, "grad_norm": 3.009610652923584, "learning_rate": 8.855096789555602e-05, "loss": 1.4962, "step": 7636 }, { "epoch": 0.4552390034569079, "grad_norm": 2.748013734817505, "learning_rate": 8.854494492133592e-05, "loss": 1.3688, "step": 7638 }, { "epoch": 0.4553582071760639, "grad_norm": 3.462043046951294, "learning_rate": 8.853892056823125e-05, "loss": 1.4692, "step": 7640 }, { "epoch": 0.4554774108952199, "grad_norm": 3.096703290939331, "learning_rate": 8.853289483645755e-05, "loss": 1.4449, "step": 7642 }, { "epoch": 0.45559661461437595, "grad_norm": 3.486086368560791, "learning_rate": 8.852686772623036e-05, "loss": 1.5472, "step": 7644 }, { "epoch": 0.45571581833353203, "grad_norm": 3.1145946979522705, "learning_rate": 8.852083923776529e-05, "loss": 1.5519, "step": 7646 }, { "epoch": 0.45583502205268805, "grad_norm": 2.6257879734039307, "learning_rate": 8.851480937127798e-05, "loss": 1.2566, "step": 7648 }, { "epoch": 0.4559542257718441, "grad_norm": 2.930192708969116, "learning_rate": 8.850877812698419e-05, "loss": 1.3862, "step": 7650 }, { "epoch": 0.4560734294910001, "grad_norm": 3.06391978263855, "learning_rate": 8.850274550509964e-05, "loss": 1.3447, "step": 7652 }, { "epoch": 0.4561926332101562, "grad_norm": 3.010261058807373, "learning_rate": 8.849671150584015e-05, "loss": 1.4029, "step": 7654 }, { "epoch": 0.4563118369293122, "grad_norm": 2.9751203060150146, "learning_rate": 8.849067612942159e-05, "loss": 1.3049, "step": 7656 }, { "epoch": 0.45643104064846823, "grad_norm": 3.146895408630371, "learning_rate": 8.848463937605983e-05, "loss": 1.3651, "step": 7658 }, { "epoch": 0.45655024436762426, "grad_norm": 3.254558563232422, "learning_rate": 8.847860124597085e-05, "loss": 1.3873, "step": 7660 }, { "epoch": 0.4566694480867803, "grad_norm": 2.993385076522827, "learning_rate": 8.847256173937067e-05, "loss": 1.4696, "step": 7662 }, { "epoch": 0.45678865180593636, "grad_norm": 4.016400337219238, "learning_rate": 8.846652085647529e-05, "loss": 1.6032, "step": 7664 }, { "epoch": 0.4569078555250924, "grad_norm": 3.4503040313720703, "learning_rate": 8.846047859750087e-05, "loss": 1.4458, "step": 7666 }, { "epoch": 0.4570270592442484, "grad_norm": 3.199296474456787, "learning_rate": 8.845443496266352e-05, "loss": 1.4322, "step": 7668 }, { "epoch": 0.45714626296340444, "grad_norm": 3.0712153911590576, "learning_rate": 8.844838995217948e-05, "loss": 1.3567, "step": 7670 }, { "epoch": 0.4572654666825605, "grad_norm": 3.1063222885131836, "learning_rate": 8.844234356626497e-05, "loss": 1.3397, "step": 7672 }, { "epoch": 0.45738467040171654, "grad_norm": 2.7624495029449463, "learning_rate": 8.843629580513632e-05, "loss": 1.3425, "step": 7674 }, { "epoch": 0.45750387412087257, "grad_norm": 3.0703866481781006, "learning_rate": 8.843024666900984e-05, "loss": 1.4103, "step": 7676 }, { "epoch": 0.4576230778400286, "grad_norm": 3.046726703643799, "learning_rate": 8.842419615810196e-05, "loss": 1.4653, "step": 7678 }, { "epoch": 0.45774228155918467, "grad_norm": 3.1547670364379883, "learning_rate": 8.841814427262911e-05, "loss": 1.4852, "step": 7680 }, { "epoch": 0.4578614852783407, "grad_norm": 3.341562271118164, "learning_rate": 8.84120910128078e-05, "loss": 1.5472, "step": 7682 }, { "epoch": 0.4579806889974967, "grad_norm": 3.5090041160583496, "learning_rate": 8.840603637885457e-05, "loss": 1.4274, "step": 7684 }, { "epoch": 0.45809989271665275, "grad_norm": 3.0123608112335205, "learning_rate": 8.8399980370986e-05, "loss": 1.3078, "step": 7686 }, { "epoch": 0.45821909643580877, "grad_norm": 2.8307812213897705, "learning_rate": 8.839392298941876e-05, "loss": 1.4056, "step": 7688 }, { "epoch": 0.45833830015496485, "grad_norm": 3.2673017978668213, "learning_rate": 8.838786423436953e-05, "loss": 1.378, "step": 7690 }, { "epoch": 0.4584575038741209, "grad_norm": 2.546107053756714, "learning_rate": 8.838180410605505e-05, "loss": 1.3606, "step": 7692 }, { "epoch": 0.4585767075932769, "grad_norm": 3.1849658489227295, "learning_rate": 8.837574260469211e-05, "loss": 1.463, "step": 7694 }, { "epoch": 0.4586959113124329, "grad_norm": 2.865980625152588, "learning_rate": 8.836967973049756e-05, "loss": 1.3552, "step": 7696 }, { "epoch": 0.458815115031589, "grad_norm": 3.1406612396240234, "learning_rate": 8.83636154836883e-05, "loss": 1.4194, "step": 7698 }, { "epoch": 0.45893431875074503, "grad_norm": 3.287393093109131, "learning_rate": 8.835754986448124e-05, "loss": 1.5764, "step": 7700 }, { "epoch": 0.45905352246990105, "grad_norm": 2.998067617416382, "learning_rate": 8.835148287309338e-05, "loss": 1.3689, "step": 7702 }, { "epoch": 0.4591727261890571, "grad_norm": 3.0131640434265137, "learning_rate": 8.834541450974177e-05, "loss": 1.3064, "step": 7704 }, { "epoch": 0.45929192990821316, "grad_norm": 3.404811143875122, "learning_rate": 8.833934477464347e-05, "loss": 1.538, "step": 7706 }, { "epoch": 0.4594111336273692, "grad_norm": 3.0915017127990723, "learning_rate": 8.833327366801565e-05, "loss": 1.4236, "step": 7708 }, { "epoch": 0.4595303373465252, "grad_norm": 3.336355686187744, "learning_rate": 8.832720119007547e-05, "loss": 1.4722, "step": 7710 }, { "epoch": 0.45964954106568123, "grad_norm": 3.289304256439209, "learning_rate": 8.832112734104016e-05, "loss": 1.4212, "step": 7712 }, { "epoch": 0.4597687447848373, "grad_norm": 3.367161750793457, "learning_rate": 8.831505212112702e-05, "loss": 1.4762, "step": 7714 }, { "epoch": 0.45988794850399334, "grad_norm": 3.157327651977539, "learning_rate": 8.830897553055336e-05, "loss": 1.4354, "step": 7716 }, { "epoch": 0.46000715222314936, "grad_norm": 2.8260669708251953, "learning_rate": 8.830289756953658e-05, "loss": 1.3457, "step": 7718 }, { "epoch": 0.4601263559423054, "grad_norm": 5.968504428863525, "learning_rate": 8.829681823829409e-05, "loss": 1.3656, "step": 7720 }, { "epoch": 0.4602455596614614, "grad_norm": 3.0416066646575928, "learning_rate": 8.82907375370434e-05, "loss": 1.5295, "step": 7722 }, { "epoch": 0.4603647633806175, "grad_norm": 3.1435422897338867, "learning_rate": 8.828465546600202e-05, "loss": 1.4891, "step": 7724 }, { "epoch": 0.4604839670997735, "grad_norm": 3.147273063659668, "learning_rate": 8.82785720253875e-05, "loss": 1.4676, "step": 7726 }, { "epoch": 0.46060317081892954, "grad_norm": 3.0127553939819336, "learning_rate": 8.827248721541752e-05, "loss": 1.509, "step": 7728 }, { "epoch": 0.46072237453808557, "grad_norm": 2.831386089324951, "learning_rate": 8.82664010363097e-05, "loss": 1.322, "step": 7730 }, { "epoch": 0.46084157825724165, "grad_norm": 3.0834429264068604, "learning_rate": 8.82603134882818e-05, "loss": 1.4397, "step": 7732 }, { "epoch": 0.46096078197639767, "grad_norm": 3.0513439178466797, "learning_rate": 8.825422457155158e-05, "loss": 1.3872, "step": 7734 }, { "epoch": 0.4610799856955537, "grad_norm": 2.7594568729400635, "learning_rate": 8.824813428633686e-05, "loss": 1.4599, "step": 7736 }, { "epoch": 0.4611991894147097, "grad_norm": 3.3261361122131348, "learning_rate": 8.82420426328555e-05, "loss": 1.3484, "step": 7738 }, { "epoch": 0.4613183931338658, "grad_norm": 3.3996996879577637, "learning_rate": 8.823594961132544e-05, "loss": 1.5413, "step": 7740 }, { "epoch": 0.4614375968530218, "grad_norm": 3.358017683029175, "learning_rate": 8.822985522196466e-05, "loss": 1.4691, "step": 7742 }, { "epoch": 0.46155680057217785, "grad_norm": 3.0829579830169678, "learning_rate": 8.822375946499113e-05, "loss": 1.5126, "step": 7744 }, { "epoch": 0.4616760042913339, "grad_norm": 2.9862732887268066, "learning_rate": 8.821766234062294e-05, "loss": 1.301, "step": 7746 }, { "epoch": 0.46179520801048995, "grad_norm": 3.008646011352539, "learning_rate": 8.821156384907823e-05, "loss": 1.35, "step": 7748 }, { "epoch": 0.461914411729646, "grad_norm": 2.945218324661255, "learning_rate": 8.820546399057511e-05, "loss": 1.4031, "step": 7750 }, { "epoch": 0.462033615448802, "grad_norm": 3.45114803314209, "learning_rate": 8.819936276533187e-05, "loss": 1.5332, "step": 7752 }, { "epoch": 0.46215281916795803, "grad_norm": 2.824042558670044, "learning_rate": 8.819326017356669e-05, "loss": 1.3305, "step": 7754 }, { "epoch": 0.46227202288711405, "grad_norm": 2.8654913902282715, "learning_rate": 8.818715621549794e-05, "loss": 1.3499, "step": 7756 }, { "epoch": 0.46239122660627013, "grad_norm": 3.0332798957824707, "learning_rate": 8.818105089134394e-05, "loss": 1.5516, "step": 7758 }, { "epoch": 0.46251043032542616, "grad_norm": 2.9211721420288086, "learning_rate": 8.817494420132311e-05, "loss": 1.4646, "step": 7760 }, { "epoch": 0.4626296340445822, "grad_norm": 3.4166905879974365, "learning_rate": 8.816883614565392e-05, "loss": 1.4821, "step": 7762 }, { "epoch": 0.4627488377637382, "grad_norm": 3.0576012134552, "learning_rate": 8.816272672455488e-05, "loss": 1.3695, "step": 7764 }, { "epoch": 0.4628680414828943, "grad_norm": 3.1528544425964355, "learning_rate": 8.815661593824451e-05, "loss": 1.4834, "step": 7766 }, { "epoch": 0.4629872452020503, "grad_norm": 3.206361770629883, "learning_rate": 8.815050378694145e-05, "loss": 1.4829, "step": 7768 }, { "epoch": 0.46310644892120634, "grad_norm": 2.977804660797119, "learning_rate": 8.814439027086435e-05, "loss": 1.5105, "step": 7770 }, { "epoch": 0.46322565264036236, "grad_norm": 3.1331822872161865, "learning_rate": 8.813827539023187e-05, "loss": 1.5561, "step": 7772 }, { "epoch": 0.46334485635951844, "grad_norm": 2.8508424758911133, "learning_rate": 8.813215914526281e-05, "loss": 1.4193, "step": 7774 }, { "epoch": 0.46346406007867447, "grad_norm": 3.0094430446624756, "learning_rate": 8.812604153617594e-05, "loss": 1.511, "step": 7776 }, { "epoch": 0.4635832637978305, "grad_norm": 2.980224609375, "learning_rate": 8.811992256319012e-05, "loss": 1.4968, "step": 7778 }, { "epoch": 0.4637024675169865, "grad_norm": 3.1679205894470215, "learning_rate": 8.811380222652425e-05, "loss": 1.4497, "step": 7780 }, { "epoch": 0.46382167123614254, "grad_norm": 3.287057876586914, "learning_rate": 8.810768052639728e-05, "loss": 1.4917, "step": 7782 }, { "epoch": 0.4639408749552986, "grad_norm": 3.3252158164978027, "learning_rate": 8.810155746302817e-05, "loss": 1.5133, "step": 7784 }, { "epoch": 0.46406007867445465, "grad_norm": 3.2147183418273926, "learning_rate": 8.809543303663601e-05, "loss": 1.4773, "step": 7786 }, { "epoch": 0.46417928239361067, "grad_norm": 3.048180341720581, "learning_rate": 8.808930724743984e-05, "loss": 1.4334, "step": 7788 }, { "epoch": 0.4642984861127667, "grad_norm": 3.138029098510742, "learning_rate": 8.808318009565886e-05, "loss": 1.4231, "step": 7790 }, { "epoch": 0.4644176898319228, "grad_norm": 3.1922945976257324, "learning_rate": 8.80770515815122e-05, "loss": 1.4922, "step": 7792 }, { "epoch": 0.4645368935510788, "grad_norm": 3.1525065898895264, "learning_rate": 8.807092170521915e-05, "loss": 1.5088, "step": 7794 }, { "epoch": 0.4646560972702348, "grad_norm": 3.2863495349884033, "learning_rate": 8.806479046699896e-05, "loss": 1.5737, "step": 7796 }, { "epoch": 0.46477530098939085, "grad_norm": 2.598518133163452, "learning_rate": 8.805865786707098e-05, "loss": 1.3622, "step": 7798 }, { "epoch": 0.46489450470854693, "grad_norm": 2.794779062271118, "learning_rate": 8.805252390565458e-05, "loss": 1.405, "step": 7800 }, { "epoch": 0.46501370842770295, "grad_norm": 3.298962116241455, "learning_rate": 8.80463885829692e-05, "loss": 1.295, "step": 7802 }, { "epoch": 0.465132912146859, "grad_norm": 3.1342954635620117, "learning_rate": 8.804025189923435e-05, "loss": 1.2704, "step": 7804 }, { "epoch": 0.465252115866015, "grad_norm": 3.1002073287963867, "learning_rate": 8.803411385466953e-05, "loss": 1.3357, "step": 7806 }, { "epoch": 0.4653713195851711, "grad_norm": 3.191850423812866, "learning_rate": 8.802797444949432e-05, "loss": 1.4158, "step": 7808 }, { "epoch": 0.4654905233043271, "grad_norm": 3.1769447326660156, "learning_rate": 8.802183368392836e-05, "loss": 1.4939, "step": 7810 }, { "epoch": 0.46560972702348313, "grad_norm": 3.1104183197021484, "learning_rate": 8.801569155819132e-05, "loss": 1.4666, "step": 7812 }, { "epoch": 0.46572893074263916, "grad_norm": 3.2041594982147217, "learning_rate": 8.80095480725029e-05, "loss": 1.6602, "step": 7814 }, { "epoch": 0.4658481344617952, "grad_norm": 3.18573260307312, "learning_rate": 8.800340322708291e-05, "loss": 1.608, "step": 7816 }, { "epoch": 0.46596733818095126, "grad_norm": 3.0524845123291016, "learning_rate": 8.799725702215118e-05, "loss": 1.3789, "step": 7818 }, { "epoch": 0.4660865419001073, "grad_norm": 3.0288190841674805, "learning_rate": 8.799110945792754e-05, "loss": 1.4357, "step": 7820 }, { "epoch": 0.4662057456192633, "grad_norm": 3.0282399654388428, "learning_rate": 8.798496053463194e-05, "loss": 1.4441, "step": 7822 }, { "epoch": 0.46632494933841934, "grad_norm": 3.0379960536956787, "learning_rate": 8.79788102524843e-05, "loss": 1.355, "step": 7824 }, { "epoch": 0.4664441530575754, "grad_norm": 3.230591058731079, "learning_rate": 8.797265861170471e-05, "loss": 1.3906, "step": 7826 }, { "epoch": 0.46656335677673144, "grad_norm": 3.021085739135742, "learning_rate": 8.79665056125132e-05, "loss": 1.4194, "step": 7828 }, { "epoch": 0.46668256049588747, "grad_norm": 2.9675915241241455, "learning_rate": 8.796035125512989e-05, "loss": 1.3482, "step": 7830 }, { "epoch": 0.4668017642150435, "grad_norm": 2.771779775619507, "learning_rate": 8.795419553977492e-05, "loss": 1.3492, "step": 7832 }, { "epoch": 0.46692096793419957, "grad_norm": 2.9437968730926514, "learning_rate": 8.794803846666852e-05, "loss": 1.4612, "step": 7834 }, { "epoch": 0.4670401716533556, "grad_norm": 2.9938199520111084, "learning_rate": 8.794188003603095e-05, "loss": 1.3782, "step": 7836 }, { "epoch": 0.4671593753725116, "grad_norm": 2.9078316688537598, "learning_rate": 8.793572024808249e-05, "loss": 1.3563, "step": 7838 }, { "epoch": 0.46727857909166765, "grad_norm": 3.098175525665283, "learning_rate": 8.792955910304356e-05, "loss": 1.528, "step": 7840 }, { "epoch": 0.4673977828108237, "grad_norm": 3.081486463546753, "learning_rate": 8.79233966011345e-05, "loss": 1.5271, "step": 7842 }, { "epoch": 0.46751698652997975, "grad_norm": 3.2325806617736816, "learning_rate": 8.79172327425758e-05, "loss": 1.5171, "step": 7844 }, { "epoch": 0.4676361902491358, "grad_norm": 3.1434741020202637, "learning_rate": 8.791106752758795e-05, "loss": 1.5023, "step": 7846 }, { "epoch": 0.4677553939682918, "grad_norm": 3.1062264442443848, "learning_rate": 8.79049009563915e-05, "loss": 1.4269, "step": 7848 }, { "epoch": 0.4678745976874478, "grad_norm": 3.280381202697754, "learning_rate": 8.789873302920705e-05, "loss": 1.5462, "step": 7850 }, { "epoch": 0.4679938014066039, "grad_norm": 3.088214874267578, "learning_rate": 8.789256374625526e-05, "loss": 1.559, "step": 7852 }, { "epoch": 0.46811300512575993, "grad_norm": 3.211787462234497, "learning_rate": 8.788639310775681e-05, "loss": 1.4732, "step": 7854 }, { "epoch": 0.46823220884491595, "grad_norm": 3.5049684047698975, "learning_rate": 8.788022111393245e-05, "loss": 1.674, "step": 7856 }, { "epoch": 0.468351412564072, "grad_norm": 2.85036039352417, "learning_rate": 8.787404776500299e-05, "loss": 1.4344, "step": 7858 }, { "epoch": 0.46847061628322806, "grad_norm": 3.1003222465515137, "learning_rate": 8.786787306118924e-05, "loss": 1.3406, "step": 7860 }, { "epoch": 0.4685898200023841, "grad_norm": 2.9883968830108643, "learning_rate": 8.786169700271212e-05, "loss": 1.5476, "step": 7862 }, { "epoch": 0.4687090237215401, "grad_norm": 3.3602325916290283, "learning_rate": 8.785551958979254e-05, "loss": 1.3612, "step": 7864 }, { "epoch": 0.46882822744069613, "grad_norm": 3.3433852195739746, "learning_rate": 8.784934082265153e-05, "loss": 1.4568, "step": 7866 }, { "epoch": 0.4689474311598522, "grad_norm": 3.266587257385254, "learning_rate": 8.784316070151006e-05, "loss": 1.4517, "step": 7868 }, { "epoch": 0.46906663487900824, "grad_norm": 2.6704771518707275, "learning_rate": 8.783697922658929e-05, "loss": 1.3903, "step": 7870 }, { "epoch": 0.46918583859816426, "grad_norm": 2.66750431060791, "learning_rate": 8.783079639811029e-05, "loss": 1.4703, "step": 7872 }, { "epoch": 0.4693050423173203, "grad_norm": 3.1452269554138184, "learning_rate": 8.782461221629428e-05, "loss": 1.4018, "step": 7874 }, { "epoch": 0.4694242460364763, "grad_norm": 2.773679733276367, "learning_rate": 8.781842668136246e-05, "loss": 1.403, "step": 7876 }, { "epoch": 0.4695434497556324, "grad_norm": 3.0565342903137207, "learning_rate": 8.781223979353615e-05, "loss": 1.4611, "step": 7878 }, { "epoch": 0.4696626534747884, "grad_norm": 3.0325584411621094, "learning_rate": 8.780605155303663e-05, "loss": 1.3478, "step": 7880 }, { "epoch": 0.46978185719394444, "grad_norm": 3.210305690765381, "learning_rate": 8.779986196008529e-05, "loss": 1.5634, "step": 7882 }, { "epoch": 0.46990106091310047, "grad_norm": 3.3790478706359863, "learning_rate": 8.779367101490357e-05, "loss": 1.5278, "step": 7884 }, { "epoch": 0.47002026463225655, "grad_norm": 3.278090238571167, "learning_rate": 8.778747871771292e-05, "loss": 1.3865, "step": 7886 }, { "epoch": 0.47013946835141257, "grad_norm": 3.061246871948242, "learning_rate": 8.778128506873486e-05, "loss": 1.322, "step": 7888 }, { "epoch": 0.4702586720705686, "grad_norm": 3.0127885341644287, "learning_rate": 8.777509006819098e-05, "loss": 1.3247, "step": 7890 }, { "epoch": 0.4703778757897246, "grad_norm": 3.393669366836548, "learning_rate": 8.776889371630289e-05, "loss": 1.48, "step": 7892 }, { "epoch": 0.4704970795088807, "grad_norm": 3.112020254135132, "learning_rate": 8.776269601329224e-05, "loss": 1.4794, "step": 7894 }, { "epoch": 0.4706162832280367, "grad_norm": 3.0594518184661865, "learning_rate": 8.775649695938073e-05, "loss": 1.4155, "step": 7896 }, { "epoch": 0.47073548694719275, "grad_norm": 2.813969612121582, "learning_rate": 8.775029655479016e-05, "loss": 1.4823, "step": 7898 }, { "epoch": 0.4708546906663488, "grad_norm": 3.0814931392669678, "learning_rate": 8.774409479974232e-05, "loss": 1.3405, "step": 7900 }, { "epoch": 0.47097389438550485, "grad_norm": 2.9308669567108154, "learning_rate": 8.773789169445907e-05, "loss": 1.301, "step": 7902 }, { "epoch": 0.4710930981046609, "grad_norm": 2.794832706451416, "learning_rate": 8.77316872391623e-05, "loss": 1.4353, "step": 7904 }, { "epoch": 0.4712123018238169, "grad_norm": 3.155242443084717, "learning_rate": 8.7725481434074e-05, "loss": 1.4905, "step": 7906 }, { "epoch": 0.47133150554297293, "grad_norm": 3.126840591430664, "learning_rate": 8.771927427941613e-05, "loss": 1.452, "step": 7908 }, { "epoch": 0.47145070926212895, "grad_norm": 2.935077667236328, "learning_rate": 8.771306577541079e-05, "loss": 1.3922, "step": 7910 }, { "epoch": 0.47156991298128503, "grad_norm": 3.070906400680542, "learning_rate": 8.770685592228002e-05, "loss": 1.4076, "step": 7912 }, { "epoch": 0.47168911670044106, "grad_norm": 2.880742073059082, "learning_rate": 8.770064472024604e-05, "loss": 1.27, "step": 7914 }, { "epoch": 0.4718083204195971, "grad_norm": 3.1286959648132324, "learning_rate": 8.769443216953099e-05, "loss": 1.4176, "step": 7916 }, { "epoch": 0.4719275241387531, "grad_norm": 3.194363594055176, "learning_rate": 8.768821827035713e-05, "loss": 1.4903, "step": 7918 }, { "epoch": 0.4720467278579092, "grad_norm": 3.8103814125061035, "learning_rate": 8.768200302294675e-05, "loss": 1.5227, "step": 7920 }, { "epoch": 0.4721659315770652, "grad_norm": 3.2522590160369873, "learning_rate": 8.76757864275222e-05, "loss": 1.5215, "step": 7922 }, { "epoch": 0.47228513529622124, "grad_norm": 3.0100958347320557, "learning_rate": 8.766956848430586e-05, "loss": 1.3769, "step": 7924 }, { "epoch": 0.47240433901537726, "grad_norm": 3.190032482147217, "learning_rate": 8.766334919352017e-05, "loss": 1.3552, "step": 7926 }, { "epoch": 0.47252354273453334, "grad_norm": 3.2102890014648438, "learning_rate": 8.765712855538761e-05, "loss": 1.4247, "step": 7928 }, { "epoch": 0.47264274645368937, "grad_norm": 3.180896759033203, "learning_rate": 8.765090657013074e-05, "loss": 1.4235, "step": 7930 }, { "epoch": 0.4727619501728454, "grad_norm": 3.235442638397217, "learning_rate": 8.76446832379721e-05, "loss": 1.4096, "step": 7932 }, { "epoch": 0.4728811538920014, "grad_norm": 3.138176441192627, "learning_rate": 8.763845855913435e-05, "loss": 1.4664, "step": 7934 }, { "epoch": 0.47300035761115744, "grad_norm": 2.7238926887512207, "learning_rate": 8.763223253384016e-05, "loss": 1.3632, "step": 7936 }, { "epoch": 0.4731195613303135, "grad_norm": 3.2899231910705566, "learning_rate": 8.762600516231225e-05, "loss": 1.6143, "step": 7938 }, { "epoch": 0.47323876504946955, "grad_norm": 3.443861722946167, "learning_rate": 8.761977644477339e-05, "loss": 1.6248, "step": 7940 }, { "epoch": 0.47335796876862557, "grad_norm": 3.161987543106079, "learning_rate": 8.761354638144644e-05, "loss": 1.4894, "step": 7942 }, { "epoch": 0.4734771724877816, "grad_norm": 2.8557379245758057, "learning_rate": 8.760731497255421e-05, "loss": 1.4341, "step": 7944 }, { "epoch": 0.4735963762069377, "grad_norm": 3.1456940174102783, "learning_rate": 8.760108221831968e-05, "loss": 1.4107, "step": 7946 }, { "epoch": 0.4737155799260937, "grad_norm": 2.8580353260040283, "learning_rate": 8.759484811896577e-05, "loss": 1.3607, "step": 7948 }, { "epoch": 0.4738347836452497, "grad_norm": 2.8429348468780518, "learning_rate": 8.758861267471552e-05, "loss": 1.395, "step": 7950 }, { "epoch": 0.47395398736440575, "grad_norm": 3.271148204803467, "learning_rate": 8.758237588579198e-05, "loss": 1.5056, "step": 7952 }, { "epoch": 0.47407319108356183, "grad_norm": 3.383861541748047, "learning_rate": 8.757613775241828e-05, "loss": 1.4535, "step": 7954 }, { "epoch": 0.47419239480271785, "grad_norm": 3.2428996562957764, "learning_rate": 8.756989827481755e-05, "loss": 1.4453, "step": 7956 }, { "epoch": 0.4743115985218739, "grad_norm": 3.3222904205322266, "learning_rate": 8.756365745321302e-05, "loss": 1.5834, "step": 7958 }, { "epoch": 0.4744308022410299, "grad_norm": 3.084245204925537, "learning_rate": 8.755741528782795e-05, "loss": 1.4979, "step": 7960 }, { "epoch": 0.474550005960186, "grad_norm": 3.003242254257202, "learning_rate": 8.755117177888563e-05, "loss": 1.4135, "step": 7962 }, { "epoch": 0.474669209679342, "grad_norm": 2.86360239982605, "learning_rate": 8.754492692660942e-05, "loss": 1.385, "step": 7964 }, { "epoch": 0.47478841339849803, "grad_norm": 2.864387273788452, "learning_rate": 8.753868073122271e-05, "loss": 1.4571, "step": 7966 }, { "epoch": 0.47490761711765406, "grad_norm": 3.0829062461853027, "learning_rate": 8.753243319294895e-05, "loss": 1.4723, "step": 7968 }, { "epoch": 0.4750268208368101, "grad_norm": 3.2360920906066895, "learning_rate": 8.752618431201163e-05, "loss": 1.2949, "step": 7970 }, { "epoch": 0.47514602455596616, "grad_norm": 2.994230270385742, "learning_rate": 8.751993408863432e-05, "loss": 1.5402, "step": 7972 }, { "epoch": 0.4752652282751222, "grad_norm": 3.208486318588257, "learning_rate": 8.75136825230406e-05, "loss": 1.3963, "step": 7974 }, { "epoch": 0.4753844319942782, "grad_norm": 2.778198480606079, "learning_rate": 8.750742961545408e-05, "loss": 1.3166, "step": 7976 }, { "epoch": 0.47550363571343424, "grad_norm": 3.228677272796631, "learning_rate": 8.75011753660985e-05, "loss": 1.4727, "step": 7978 }, { "epoch": 0.4756228394325903, "grad_norm": 2.791888952255249, "learning_rate": 8.749491977519755e-05, "loss": 1.4445, "step": 7980 }, { "epoch": 0.47574204315174634, "grad_norm": 3.0896167755126953, "learning_rate": 8.748866284297503e-05, "loss": 1.4607, "step": 7982 }, { "epoch": 0.47586124687090237, "grad_norm": 3.2961535453796387, "learning_rate": 8.748240456965477e-05, "loss": 1.4135, "step": 7984 }, { "epoch": 0.4759804505900584, "grad_norm": 3.299046277999878, "learning_rate": 8.747614495546068e-05, "loss": 1.3639, "step": 7986 }, { "epoch": 0.47609965430921447, "grad_norm": 3.0537610054016113, "learning_rate": 8.746988400061664e-05, "loss": 1.4748, "step": 7988 }, { "epoch": 0.4762188580283705, "grad_norm": 3.375601291656494, "learning_rate": 8.746362170534665e-05, "loss": 1.377, "step": 7990 }, { "epoch": 0.4763380617475265, "grad_norm": 3.196659803390503, "learning_rate": 8.745735806987474e-05, "loss": 1.4616, "step": 7992 }, { "epoch": 0.47645726546668254, "grad_norm": 3.3959906101226807, "learning_rate": 8.745109309442496e-05, "loss": 1.3417, "step": 7994 }, { "epoch": 0.4765764691858386, "grad_norm": 2.8455328941345215, "learning_rate": 8.744482677922146e-05, "loss": 1.4686, "step": 7996 }, { "epoch": 0.47669567290499465, "grad_norm": 2.8644826412200928, "learning_rate": 8.743855912448838e-05, "loss": 1.3819, "step": 7998 }, { "epoch": 0.4768148766241507, "grad_norm": 3.051968574523926, "learning_rate": 8.743229013044994e-05, "loss": 1.4221, "step": 8000 }, { "epoch": 0.4769340803433067, "grad_norm": 3.1788344383239746, "learning_rate": 8.74260197973304e-05, "loss": 1.5774, "step": 8002 }, { "epoch": 0.4770532840624627, "grad_norm": 3.5791242122650146, "learning_rate": 8.74197481253541e-05, "loss": 1.5804, "step": 8004 }, { "epoch": 0.4771724877816188, "grad_norm": 2.9162795543670654, "learning_rate": 8.741347511474538e-05, "loss": 1.3817, "step": 8006 }, { "epoch": 0.47729169150077483, "grad_norm": 2.844057321548462, "learning_rate": 8.740720076572864e-05, "loss": 1.3668, "step": 8008 }, { "epoch": 0.47741089521993085, "grad_norm": 2.8799190521240234, "learning_rate": 8.740092507852834e-05, "loss": 1.4164, "step": 8010 }, { "epoch": 0.4775300989390869, "grad_norm": 2.775514602661133, "learning_rate": 8.7394648053369e-05, "loss": 1.3566, "step": 8012 }, { "epoch": 0.47764930265824296, "grad_norm": 3.02913498878479, "learning_rate": 8.738836969047513e-05, "loss": 1.4136, "step": 8014 }, { "epoch": 0.477768506377399, "grad_norm": 3.0749785900115967, "learning_rate": 8.738208999007136e-05, "loss": 1.4818, "step": 8016 }, { "epoch": 0.477887710096555, "grad_norm": 3.0583958625793457, "learning_rate": 8.737580895238233e-05, "loss": 1.39, "step": 8018 }, { "epoch": 0.47800691381571103, "grad_norm": 3.344742774963379, "learning_rate": 8.736952657763273e-05, "loss": 1.2848, "step": 8020 }, { "epoch": 0.4781261175348671, "grad_norm": 2.833775043487549, "learning_rate": 8.736324286604731e-05, "loss": 1.3169, "step": 8022 }, { "epoch": 0.47824532125402314, "grad_norm": 3.0255753993988037, "learning_rate": 8.735695781785087e-05, "loss": 1.3989, "step": 8024 }, { "epoch": 0.47836452497317916, "grad_norm": 3.366741418838501, "learning_rate": 8.735067143326822e-05, "loss": 1.6743, "step": 8026 }, { "epoch": 0.4784837286923352, "grad_norm": 2.672192096710205, "learning_rate": 8.734438371252426e-05, "loss": 1.4271, "step": 8028 }, { "epoch": 0.4786029324114912, "grad_norm": 2.986206293106079, "learning_rate": 8.733809465584391e-05, "loss": 1.3621, "step": 8030 }, { "epoch": 0.4787221361306473, "grad_norm": 3.109955310821533, "learning_rate": 8.733180426345218e-05, "loss": 1.447, "step": 8032 }, { "epoch": 0.4788413398498033, "grad_norm": 3.2638304233551025, "learning_rate": 8.732551253557409e-05, "loss": 1.5582, "step": 8034 }, { "epoch": 0.47896054356895934, "grad_norm": 2.64650821685791, "learning_rate": 8.731921947243469e-05, "loss": 1.3557, "step": 8036 }, { "epoch": 0.47907974728811537, "grad_norm": 2.8512251377105713, "learning_rate": 8.731292507425912e-05, "loss": 1.3963, "step": 8038 }, { "epoch": 0.47919895100727145, "grad_norm": 3.012929916381836, "learning_rate": 8.730662934127257e-05, "loss": 1.4581, "step": 8040 }, { "epoch": 0.47931815472642747, "grad_norm": 3.2794976234436035, "learning_rate": 8.730033227370025e-05, "loss": 1.4368, "step": 8042 }, { "epoch": 0.4794373584455835, "grad_norm": 2.832514524459839, "learning_rate": 8.72940338717674e-05, "loss": 1.3848, "step": 8044 }, { "epoch": 0.4795565621647395, "grad_norm": 3.603679656982422, "learning_rate": 8.728773413569937e-05, "loss": 1.5629, "step": 8046 }, { "epoch": 0.4796757658838956, "grad_norm": 2.7639663219451904, "learning_rate": 8.728143306572151e-05, "loss": 1.4145, "step": 8048 }, { "epoch": 0.4797949696030516, "grad_norm": 2.8145639896392822, "learning_rate": 8.727513066205925e-05, "loss": 1.384, "step": 8050 }, { "epoch": 0.47991417332220765, "grad_norm": 2.961031913757324, "learning_rate": 8.726882692493801e-05, "loss": 1.485, "step": 8052 }, { "epoch": 0.4800333770413637, "grad_norm": 3.0985751152038574, "learning_rate": 8.726252185458333e-05, "loss": 1.3204, "step": 8054 }, { "epoch": 0.48015258076051975, "grad_norm": 3.0231335163116455, "learning_rate": 8.725621545122074e-05, "loss": 1.5073, "step": 8056 }, { "epoch": 0.4802717844796758, "grad_norm": 3.039700984954834, "learning_rate": 8.724990771507585e-05, "loss": 1.3738, "step": 8058 }, { "epoch": 0.4803909881988318, "grad_norm": 3.0230607986450195, "learning_rate": 8.724359864637433e-05, "loss": 1.3581, "step": 8060 }, { "epoch": 0.48051019191798783, "grad_norm": 3.085632801055908, "learning_rate": 8.723728824534183e-05, "loss": 1.552, "step": 8062 }, { "epoch": 0.48062939563714385, "grad_norm": 3.064265489578247, "learning_rate": 8.723097651220414e-05, "loss": 1.6067, "step": 8064 }, { "epoch": 0.48074859935629993, "grad_norm": 3.181561231613159, "learning_rate": 8.722466344718704e-05, "loss": 1.4444, "step": 8066 }, { "epoch": 0.48086780307545596, "grad_norm": 3.0404725074768066, "learning_rate": 8.721834905051636e-05, "loss": 1.3754, "step": 8068 }, { "epoch": 0.480987006794612, "grad_norm": 3.1711583137512207, "learning_rate": 8.721203332241799e-05, "loss": 1.4878, "step": 8070 }, { "epoch": 0.481106210513768, "grad_norm": 3.2320854663848877, "learning_rate": 8.720571626311787e-05, "loss": 1.5972, "step": 8072 }, { "epoch": 0.4812254142329241, "grad_norm": 3.5046143531799316, "learning_rate": 8.719939787284199e-05, "loss": 1.4272, "step": 8074 }, { "epoch": 0.4813446179520801, "grad_norm": 3.2314488887786865, "learning_rate": 8.719307815181636e-05, "loss": 1.4647, "step": 8076 }, { "epoch": 0.48146382167123614, "grad_norm": 3.1684107780456543, "learning_rate": 8.71867571002671e-05, "loss": 1.4206, "step": 8078 }, { "epoch": 0.48158302539039216, "grad_norm": 2.7973673343658447, "learning_rate": 8.718043471842025e-05, "loss": 1.4132, "step": 8080 }, { "epoch": 0.48170222910954824, "grad_norm": 3.19448184967041, "learning_rate": 8.717411100650208e-05, "loss": 1.2611, "step": 8082 }, { "epoch": 0.48182143282870427, "grad_norm": 3.018686294555664, "learning_rate": 8.716778596473875e-05, "loss": 1.4216, "step": 8084 }, { "epoch": 0.4819406365478603, "grad_norm": 3.622342109680176, "learning_rate": 8.716145959335658e-05, "loss": 1.3772, "step": 8086 }, { "epoch": 0.4820598402670163, "grad_norm": 3.2222745418548584, "learning_rate": 8.715513189258182e-05, "loss": 1.4255, "step": 8088 }, { "epoch": 0.4821790439861724, "grad_norm": 2.958153009414673, "learning_rate": 8.714880286264088e-05, "loss": 1.2684, "step": 8090 }, { "epoch": 0.4822982477053284, "grad_norm": 2.8570220470428467, "learning_rate": 8.714247250376016e-05, "loss": 1.3078, "step": 8092 }, { "epoch": 0.48241745142448444, "grad_norm": 3.0733518600463867, "learning_rate": 8.713614081616611e-05, "loss": 1.463, "step": 8094 }, { "epoch": 0.48253665514364047, "grad_norm": 3.0753395557403564, "learning_rate": 8.712980780008524e-05, "loss": 1.4981, "step": 8096 }, { "epoch": 0.4826558588627965, "grad_norm": 2.8337342739105225, "learning_rate": 8.712347345574412e-05, "loss": 1.4409, "step": 8098 }, { "epoch": 0.4827750625819526, "grad_norm": 3.1329619884490967, "learning_rate": 8.711713778336934e-05, "loss": 1.38, "step": 8100 }, { "epoch": 0.4828942663011086, "grad_norm": 3.0520215034484863, "learning_rate": 8.711080078318753e-05, "loss": 1.3949, "step": 8102 }, { "epoch": 0.4830134700202646, "grad_norm": 3.511885404586792, "learning_rate": 8.71044624554254e-05, "loss": 1.472, "step": 8104 }, { "epoch": 0.48313267373942065, "grad_norm": 3.042048692703247, "learning_rate": 8.709812280030971e-05, "loss": 1.2497, "step": 8106 }, { "epoch": 0.48325187745857673, "grad_norm": 3.2581303119659424, "learning_rate": 8.709178181806723e-05, "loss": 1.4391, "step": 8108 }, { "epoch": 0.48337108117773275, "grad_norm": 3.1480233669281006, "learning_rate": 8.708543950892483e-05, "loss": 1.4154, "step": 8110 }, { "epoch": 0.4834902848968888, "grad_norm": 3.1348249912261963, "learning_rate": 8.707909587310934e-05, "loss": 1.4491, "step": 8112 }, { "epoch": 0.4836094886160448, "grad_norm": 3.096039056777954, "learning_rate": 8.707275091084774e-05, "loss": 1.5271, "step": 8114 }, { "epoch": 0.4837286923352009, "grad_norm": 3.0915303230285645, "learning_rate": 8.7066404622367e-05, "loss": 1.2591, "step": 8116 }, { "epoch": 0.4838478960543569, "grad_norm": 2.951967477798462, "learning_rate": 8.706005700789414e-05, "loss": 1.4152, "step": 8118 }, { "epoch": 0.48396709977351293, "grad_norm": 3.0656397342681885, "learning_rate": 8.705370806765625e-05, "loss": 1.5022, "step": 8120 }, { "epoch": 0.48408630349266896, "grad_norm": 3.0593502521514893, "learning_rate": 8.704735780188043e-05, "loss": 1.4229, "step": 8122 }, { "epoch": 0.484205507211825, "grad_norm": 3.1620945930480957, "learning_rate": 8.704100621079388e-05, "loss": 1.407, "step": 8124 }, { "epoch": 0.48432471093098106, "grad_norm": 3.1214864253997803, "learning_rate": 8.70346532946238e-05, "loss": 1.4418, "step": 8126 }, { "epoch": 0.4844439146501371, "grad_norm": 3.2583045959472656, "learning_rate": 8.702829905359745e-05, "loss": 1.5022, "step": 8128 }, { "epoch": 0.4845631183692931, "grad_norm": 3.101247549057007, "learning_rate": 8.702194348794214e-05, "loss": 1.5196, "step": 8130 }, { "epoch": 0.48468232208844914, "grad_norm": 3.166334390640259, "learning_rate": 8.701558659788526e-05, "loss": 1.4076, "step": 8132 }, { "epoch": 0.4848015258076052, "grad_norm": 2.716531991958618, "learning_rate": 8.70092283836542e-05, "loss": 1.3501, "step": 8134 }, { "epoch": 0.48492072952676124, "grad_norm": 3.2532594203948975, "learning_rate": 8.700286884547642e-05, "loss": 1.4579, "step": 8136 }, { "epoch": 0.48503993324591727, "grad_norm": 3.3713042736053467, "learning_rate": 8.699650798357941e-05, "loss": 1.6027, "step": 8138 }, { "epoch": 0.4851591369650733, "grad_norm": 2.97774338722229, "learning_rate": 8.699014579819072e-05, "loss": 1.3834, "step": 8140 }, { "epoch": 0.48527834068422937, "grad_norm": 3.070751667022705, "learning_rate": 8.698378228953796e-05, "loss": 1.448, "step": 8142 }, { "epoch": 0.4853975444033854, "grad_norm": 3.3093767166137695, "learning_rate": 8.697741745784877e-05, "loss": 1.4129, "step": 8144 }, { "epoch": 0.4855167481225414, "grad_norm": 2.8752074241638184, "learning_rate": 8.697105130335085e-05, "loss": 1.3791, "step": 8146 }, { "epoch": 0.48563595184169744, "grad_norm": 3.4649760723114014, "learning_rate": 8.696468382627192e-05, "loss": 1.5955, "step": 8148 }, { "epoch": 0.4857551555608535, "grad_norm": 3.3995072841644287, "learning_rate": 8.695831502683978e-05, "loss": 1.4296, "step": 8150 }, { "epoch": 0.48587435928000955, "grad_norm": 3.4643871784210205, "learning_rate": 8.695194490528226e-05, "loss": 1.5007, "step": 8152 }, { "epoch": 0.4859935629991656, "grad_norm": 2.9809794425964355, "learning_rate": 8.694557346182723e-05, "loss": 1.3706, "step": 8154 }, { "epoch": 0.4861127667183216, "grad_norm": 3.2671058177948, "learning_rate": 8.693920069670265e-05, "loss": 1.387, "step": 8156 }, { "epoch": 0.4862319704374776, "grad_norm": 3.6504409313201904, "learning_rate": 8.693282661013646e-05, "loss": 1.6429, "step": 8158 }, { "epoch": 0.4863511741566337, "grad_norm": 3.0850377082824707, "learning_rate": 8.69264512023567e-05, "loss": 1.3867, "step": 8160 }, { "epoch": 0.48647037787578973, "grad_norm": 3.27168345451355, "learning_rate": 8.692007447359143e-05, "loss": 1.4082, "step": 8162 }, { "epoch": 0.48658958159494575, "grad_norm": 2.9512112140655518, "learning_rate": 8.691369642406879e-05, "loss": 1.2667, "step": 8164 }, { "epoch": 0.4867087853141018, "grad_norm": 2.9095208644866943, "learning_rate": 8.690731705401694e-05, "loss": 1.5262, "step": 8166 }, { "epoch": 0.48682798903325786, "grad_norm": 3.2492430210113525, "learning_rate": 8.690093636366405e-05, "loss": 1.463, "step": 8168 }, { "epoch": 0.4869471927524139, "grad_norm": 2.838507890701294, "learning_rate": 8.689455435323843e-05, "loss": 1.4587, "step": 8170 }, { "epoch": 0.4870663964715699, "grad_norm": 2.8335487842559814, "learning_rate": 8.688817102296837e-05, "loss": 1.262, "step": 8172 }, { "epoch": 0.48718560019072593, "grad_norm": 3.063420057296753, "learning_rate": 8.688178637308221e-05, "loss": 1.4402, "step": 8174 }, { "epoch": 0.487304803909882, "grad_norm": 3.1209194660186768, "learning_rate": 8.687540040380837e-05, "loss": 1.5118, "step": 8176 }, { "epoch": 0.48742400762903804, "grad_norm": 2.970097541809082, "learning_rate": 8.686901311537528e-05, "loss": 1.4186, "step": 8178 }, { "epoch": 0.48754321134819406, "grad_norm": 2.8375208377838135, "learning_rate": 8.686262450801146e-05, "loss": 1.4173, "step": 8180 }, { "epoch": 0.4876624150673501, "grad_norm": 2.9091384410858154, "learning_rate": 8.685623458194543e-05, "loss": 1.412, "step": 8182 }, { "epoch": 0.48778161878650617, "grad_norm": 2.860633611679077, "learning_rate": 8.684984333740578e-05, "loss": 1.4836, "step": 8184 }, { "epoch": 0.4879008225056622, "grad_norm": 3.065049886703491, "learning_rate": 8.684345077462117e-05, "loss": 1.3973, "step": 8186 }, { "epoch": 0.4880200262248182, "grad_norm": 3.1811962127685547, "learning_rate": 8.683705689382024e-05, "loss": 1.4929, "step": 8188 }, { "epoch": 0.48813922994397424, "grad_norm": 2.964381694793701, "learning_rate": 8.683066169523178e-05, "loss": 1.459, "step": 8190 }, { "epoch": 0.48825843366313026, "grad_norm": 3.009181499481201, "learning_rate": 8.682426517908452e-05, "loss": 1.3894, "step": 8192 }, { "epoch": 0.48837763738228634, "grad_norm": 3.253809928894043, "learning_rate": 8.68178673456073e-05, "loss": 1.4694, "step": 8194 }, { "epoch": 0.48849684110144237, "grad_norm": 2.961216449737549, "learning_rate": 8.6811468195029e-05, "loss": 1.3337, "step": 8196 }, { "epoch": 0.4886160448205984, "grad_norm": 3.362032413482666, "learning_rate": 8.680506772757853e-05, "loss": 1.4498, "step": 8198 }, { "epoch": 0.4887352485397544, "grad_norm": 3.0969934463500977, "learning_rate": 8.679866594348486e-05, "loss": 1.3239, "step": 8200 }, { "epoch": 0.4888544522589105, "grad_norm": 3.2395215034484863, "learning_rate": 8.679226284297702e-05, "loss": 1.3391, "step": 8202 }, { "epoch": 0.4889736559780665, "grad_norm": 3.316284656524658, "learning_rate": 8.678585842628403e-05, "loss": 1.3389, "step": 8204 }, { "epoch": 0.48909285969722255, "grad_norm": 3.184062957763672, "learning_rate": 8.677945269363504e-05, "loss": 1.4051, "step": 8206 }, { "epoch": 0.4892120634163786, "grad_norm": 3.3095197677612305, "learning_rate": 8.677304564525917e-05, "loss": 1.4314, "step": 8208 }, { "epoch": 0.48933126713553465, "grad_norm": 3.1755058765411377, "learning_rate": 8.676663728138565e-05, "loss": 1.387, "step": 8210 }, { "epoch": 0.4894504708546907, "grad_norm": 2.9396920204162598, "learning_rate": 8.676022760224371e-05, "loss": 1.4688, "step": 8212 }, { "epoch": 0.4895696745738467, "grad_norm": 3.230187177658081, "learning_rate": 8.675381660806268e-05, "loss": 1.303, "step": 8214 }, { "epoch": 0.4896888782930027, "grad_norm": 2.9677529335021973, "learning_rate": 8.674740429907186e-05, "loss": 1.4169, "step": 8216 }, { "epoch": 0.48980808201215875, "grad_norm": 3.230142116546631, "learning_rate": 8.674099067550066e-05, "loss": 1.4404, "step": 8218 }, { "epoch": 0.48992728573131483, "grad_norm": 3.1431639194488525, "learning_rate": 8.67345757375785e-05, "loss": 1.4658, "step": 8220 }, { "epoch": 0.49004648945047086, "grad_norm": 3.2218215465545654, "learning_rate": 8.67281594855349e-05, "loss": 1.5025, "step": 8222 }, { "epoch": 0.4901656931696269, "grad_norm": 2.914942979812622, "learning_rate": 8.672174191959936e-05, "loss": 1.3014, "step": 8224 }, { "epoch": 0.4902848968887829, "grad_norm": 3.0064470767974854, "learning_rate": 8.671532304000149e-05, "loss": 1.2893, "step": 8226 }, { "epoch": 0.490404100607939, "grad_norm": 2.737297296524048, "learning_rate": 8.670890284697087e-05, "loss": 1.2408, "step": 8228 }, { "epoch": 0.490523304327095, "grad_norm": 3.036562442779541, "learning_rate": 8.670248134073722e-05, "loss": 1.3859, "step": 8230 }, { "epoch": 0.49064250804625104, "grad_norm": 3.761437177658081, "learning_rate": 8.669605852153022e-05, "loss": 1.5571, "step": 8232 }, { "epoch": 0.49076171176540706, "grad_norm": 2.9972825050354004, "learning_rate": 8.668963438957966e-05, "loss": 1.2785, "step": 8234 }, { "epoch": 0.49088091548456314, "grad_norm": 3.0287771224975586, "learning_rate": 8.668320894511533e-05, "loss": 1.3161, "step": 8236 }, { "epoch": 0.49100011920371917, "grad_norm": 3.014914035797119, "learning_rate": 8.667678218836712e-05, "loss": 1.3817, "step": 8238 }, { "epoch": 0.4911193229228752, "grad_norm": 3.2257091999053955, "learning_rate": 8.667035411956492e-05, "loss": 1.3948, "step": 8240 }, { "epoch": 0.4912385266420312, "grad_norm": 2.7663581371307373, "learning_rate": 8.66639247389387e-05, "loss": 1.2135, "step": 8242 }, { "epoch": 0.4913577303611873, "grad_norm": 3.2061970233917236, "learning_rate": 8.665749404671842e-05, "loss": 1.4396, "step": 8244 }, { "epoch": 0.4914769340803433, "grad_norm": 3.2123987674713135, "learning_rate": 8.665106204313417e-05, "loss": 1.5648, "step": 8246 }, { "epoch": 0.49159613779949934, "grad_norm": 3.135786294937134, "learning_rate": 8.664462872841603e-05, "loss": 1.3411, "step": 8248 }, { "epoch": 0.49171534151865537, "grad_norm": 3.7354393005371094, "learning_rate": 8.663819410279416e-05, "loss": 1.3267, "step": 8250 }, { "epoch": 0.4918345452378114, "grad_norm": 3.270970582962036, "learning_rate": 8.663175816649872e-05, "loss": 1.4113, "step": 8252 }, { "epoch": 0.4919537489569675, "grad_norm": 3.1143646240234375, "learning_rate": 8.662532091975994e-05, "loss": 1.612, "step": 8254 }, { "epoch": 0.4920729526761235, "grad_norm": 3.4739913940429688, "learning_rate": 8.661888236280813e-05, "loss": 1.5866, "step": 8256 }, { "epoch": 0.4921921563952795, "grad_norm": 3.1259865760803223, "learning_rate": 8.66124424958736e-05, "loss": 1.3581, "step": 8258 }, { "epoch": 0.49231136011443555, "grad_norm": 3.6020636558532715, "learning_rate": 8.660600131918675e-05, "loss": 1.5027, "step": 8260 }, { "epoch": 0.49243056383359163, "grad_norm": 2.9896352291107178, "learning_rate": 8.659955883297797e-05, "loss": 1.4727, "step": 8262 }, { "epoch": 0.49254976755274765, "grad_norm": 2.7958977222442627, "learning_rate": 8.659311503747776e-05, "loss": 1.3344, "step": 8264 }, { "epoch": 0.4926689712719037, "grad_norm": 3.313432216644287, "learning_rate": 8.658666993291662e-05, "loss": 1.4355, "step": 8266 }, { "epoch": 0.4927881749910597, "grad_norm": 3.123487710952759, "learning_rate": 8.65802235195251e-05, "loss": 1.4334, "step": 8268 }, { "epoch": 0.4929073787102158, "grad_norm": 3.2999250888824463, "learning_rate": 8.657377579753385e-05, "loss": 1.5333, "step": 8270 }, { "epoch": 0.4930265824293718, "grad_norm": 3.0497217178344727, "learning_rate": 8.65673267671735e-05, "loss": 1.3492, "step": 8272 }, { "epoch": 0.49314578614852783, "grad_norm": 3.468294620513916, "learning_rate": 8.656087642867474e-05, "loss": 1.5244, "step": 8274 }, { "epoch": 0.49326498986768386, "grad_norm": 3.448096752166748, "learning_rate": 8.655442478226835e-05, "loss": 1.4697, "step": 8276 }, { "epoch": 0.49338419358683994, "grad_norm": 3.005570888519287, "learning_rate": 8.654797182818513e-05, "loss": 1.5478, "step": 8278 }, { "epoch": 0.49350339730599596, "grad_norm": 3.0367469787597656, "learning_rate": 8.654151756665588e-05, "loss": 1.3159, "step": 8280 }, { "epoch": 0.493622601025152, "grad_norm": 3.159432888031006, "learning_rate": 8.653506199791155e-05, "loss": 1.4288, "step": 8282 }, { "epoch": 0.493741804744308, "grad_norm": 2.9138598442077637, "learning_rate": 8.652860512218304e-05, "loss": 1.287, "step": 8284 }, { "epoch": 0.49386100846346404, "grad_norm": 3.0885307788848877, "learning_rate": 8.652214693970133e-05, "loss": 1.2302, "step": 8286 }, { "epoch": 0.4939802121826201, "grad_norm": 3.401862382888794, "learning_rate": 8.651568745069746e-05, "loss": 1.4152, "step": 8288 }, { "epoch": 0.49409941590177614, "grad_norm": 3.182626247406006, "learning_rate": 8.650922665540254e-05, "loss": 1.4819, "step": 8290 }, { "epoch": 0.49421861962093216, "grad_norm": 3.234534740447998, "learning_rate": 8.650276455404766e-05, "loss": 1.3743, "step": 8292 }, { "epoch": 0.4943378233400882, "grad_norm": 3.05423903465271, "learning_rate": 8.6496301146864e-05, "loss": 1.4455, "step": 8294 }, { "epoch": 0.49445702705924427, "grad_norm": 3.001718759536743, "learning_rate": 8.648983643408275e-05, "loss": 1.4608, "step": 8296 }, { "epoch": 0.4945762307784003, "grad_norm": 3.1950302124023438, "learning_rate": 8.648337041593523e-05, "loss": 1.4127, "step": 8298 }, { "epoch": 0.4946954344975563, "grad_norm": 2.72052264213562, "learning_rate": 8.647690309265273e-05, "loss": 1.4644, "step": 8300 }, { "epoch": 0.49481463821671234, "grad_norm": 3.496117115020752, "learning_rate": 8.647043446446659e-05, "loss": 1.397, "step": 8302 }, { "epoch": 0.4949338419358684, "grad_norm": 3.0680203437805176, "learning_rate": 8.646396453160822e-05, "loss": 1.4647, "step": 8304 }, { "epoch": 0.49505304565502445, "grad_norm": 3.0043118000030518, "learning_rate": 8.645749329430909e-05, "loss": 1.4403, "step": 8306 }, { "epoch": 0.4951722493741805, "grad_norm": 2.9148662090301514, "learning_rate": 8.645102075280067e-05, "loss": 1.4075, "step": 8308 }, { "epoch": 0.4952914530933365, "grad_norm": 2.6572251319885254, "learning_rate": 8.644454690731454e-05, "loss": 1.3749, "step": 8310 }, { "epoch": 0.4954106568124925, "grad_norm": 3.0030341148376465, "learning_rate": 8.643807175808228e-05, "loss": 1.4707, "step": 8312 }, { "epoch": 0.4955298605316486, "grad_norm": 3.196988105773926, "learning_rate": 8.643159530533549e-05, "loss": 1.3747, "step": 8314 }, { "epoch": 0.4956490642508046, "grad_norm": 3.19667387008667, "learning_rate": 8.642511754930591e-05, "loss": 1.3737, "step": 8316 }, { "epoch": 0.49576826796996065, "grad_norm": 2.8498992919921875, "learning_rate": 8.641863849022525e-05, "loss": 1.3994, "step": 8318 }, { "epoch": 0.4958874716891167, "grad_norm": 3.1572883129119873, "learning_rate": 8.641215812832528e-05, "loss": 1.429, "step": 8320 }, { "epoch": 0.49600667540827276, "grad_norm": 3.2466487884521484, "learning_rate": 8.640567646383782e-05, "loss": 1.4536, "step": 8322 }, { "epoch": 0.4961258791274288, "grad_norm": 3.0992233753204346, "learning_rate": 8.639919349699476e-05, "loss": 1.5213, "step": 8324 }, { "epoch": 0.4962450828465848, "grad_norm": 2.9870991706848145, "learning_rate": 8.639270922802803e-05, "loss": 1.3816, "step": 8326 }, { "epoch": 0.49636428656574083, "grad_norm": 3.277738571166992, "learning_rate": 8.638622365716955e-05, "loss": 1.4191, "step": 8328 }, { "epoch": 0.4964834902848969, "grad_norm": 2.8475310802459717, "learning_rate": 8.637973678465136e-05, "loss": 1.3842, "step": 8330 }, { "epoch": 0.49660269400405294, "grad_norm": 3.1819891929626465, "learning_rate": 8.637324861070553e-05, "loss": 1.5757, "step": 8332 }, { "epoch": 0.49672189772320896, "grad_norm": 3.0680744647979736, "learning_rate": 8.636675913556411e-05, "loss": 1.563, "step": 8334 }, { "epoch": 0.496841101442365, "grad_norm": 2.802664041519165, "learning_rate": 8.636026835945932e-05, "loss": 1.3702, "step": 8336 }, { "epoch": 0.49696030516152107, "grad_norm": 2.9375953674316406, "learning_rate": 8.635377628262332e-05, "loss": 1.4093, "step": 8338 }, { "epoch": 0.4970795088806771, "grad_norm": 2.7517216205596924, "learning_rate": 8.634728290528835e-05, "loss": 1.2535, "step": 8340 }, { "epoch": 0.4971987125998331, "grad_norm": 3.1537060737609863, "learning_rate": 8.634078822768672e-05, "loss": 1.5802, "step": 8342 }, { "epoch": 0.49731791631898914, "grad_norm": 2.9744231700897217, "learning_rate": 8.633429225005077e-05, "loss": 1.3759, "step": 8344 }, { "epoch": 0.49743712003814516, "grad_norm": 3.1374351978302, "learning_rate": 8.632779497261283e-05, "loss": 1.4521, "step": 8346 }, { "epoch": 0.49755632375730124, "grad_norm": 2.64045786857605, "learning_rate": 8.63212963956054e-05, "loss": 1.4039, "step": 8348 }, { "epoch": 0.49767552747645727, "grad_norm": 3.001844882965088, "learning_rate": 8.631479651926091e-05, "loss": 1.4632, "step": 8350 }, { "epoch": 0.4977947311956133, "grad_norm": 3.3769335746765137, "learning_rate": 8.630829534381191e-05, "loss": 1.5933, "step": 8352 }, { "epoch": 0.4979139349147693, "grad_norm": 3.2396724224090576, "learning_rate": 8.630179286949095e-05, "loss": 1.4073, "step": 8354 }, { "epoch": 0.4980331386339254, "grad_norm": 3.3443641662597656, "learning_rate": 8.629528909653066e-05, "loss": 1.5346, "step": 8356 }, { "epoch": 0.4981523423530814, "grad_norm": 3.202261447906494, "learning_rate": 8.62887840251637e-05, "loss": 1.4278, "step": 8358 }, { "epoch": 0.49827154607223745, "grad_norm": 3.1050169467926025, "learning_rate": 8.628227765562278e-05, "loss": 1.3087, "step": 8360 }, { "epoch": 0.4983907497913935, "grad_norm": 2.8868026733398438, "learning_rate": 8.627576998814063e-05, "loss": 1.5436, "step": 8362 }, { "epoch": 0.49850995351054955, "grad_norm": 3.0826101303100586, "learning_rate": 8.626926102295008e-05, "loss": 1.5232, "step": 8364 }, { "epoch": 0.4986291572297056, "grad_norm": 3.4276680946350098, "learning_rate": 8.626275076028397e-05, "loss": 1.48, "step": 8366 }, { "epoch": 0.4987483609488616, "grad_norm": 2.775514602661133, "learning_rate": 8.62562392003752e-05, "loss": 1.3097, "step": 8368 }, { "epoch": 0.4988675646680176, "grad_norm": 2.8777499198913574, "learning_rate": 8.624972634345669e-05, "loss": 1.3951, "step": 8370 }, { "epoch": 0.4989867683871737, "grad_norm": 2.7001593112945557, "learning_rate": 8.624321218976146e-05, "loss": 1.3725, "step": 8372 }, { "epoch": 0.49910597210632973, "grad_norm": 3.0231430530548096, "learning_rate": 8.62366967395225e-05, "loss": 1.3093, "step": 8374 }, { "epoch": 0.49922517582548576, "grad_norm": 3.1029651165008545, "learning_rate": 8.623017999297294e-05, "loss": 1.3492, "step": 8376 }, { "epoch": 0.4993443795446418, "grad_norm": 3.5048418045043945, "learning_rate": 8.622366195034587e-05, "loss": 1.5448, "step": 8378 }, { "epoch": 0.4994635832637978, "grad_norm": 3.0009641647338867, "learning_rate": 8.621714261187448e-05, "loss": 1.3469, "step": 8380 }, { "epoch": 0.4995827869829539, "grad_norm": 2.980485439300537, "learning_rate": 8.621062197779197e-05, "loss": 1.4007, "step": 8382 }, { "epoch": 0.4997019907021099, "grad_norm": 2.996835708618164, "learning_rate": 8.620410004833164e-05, "loss": 1.4935, "step": 8384 }, { "epoch": 0.49982119442126594, "grad_norm": 3.132197618484497, "learning_rate": 8.619757682372675e-05, "loss": 1.4591, "step": 8386 }, { "epoch": 0.49994039814042196, "grad_norm": 2.895205497741699, "learning_rate": 8.619105230421071e-05, "loss": 1.3589, "step": 8388 }, { "epoch": 0.500059601859578, "grad_norm": 3.3486220836639404, "learning_rate": 8.618452649001691e-05, "loss": 1.3205, "step": 8390 }, { "epoch": 0.5001788055787341, "grad_norm": 3.331188440322876, "learning_rate": 8.617799938137877e-05, "loss": 1.4635, "step": 8392 }, { "epoch": 0.5002980092978901, "grad_norm": 3.197627067565918, "learning_rate": 8.617147097852982e-05, "loss": 1.535, "step": 8394 }, { "epoch": 0.5004172130170461, "grad_norm": 2.770683765411377, "learning_rate": 8.61649412817036e-05, "loss": 1.3314, "step": 8396 }, { "epoch": 0.5005364167362022, "grad_norm": 2.815006732940674, "learning_rate": 8.61584102911337e-05, "loss": 1.3834, "step": 8398 }, { "epoch": 0.5006556204553582, "grad_norm": 3.095743417739868, "learning_rate": 8.615187800705374e-05, "loss": 1.4604, "step": 8400 }, { "epoch": 0.5007748241745142, "grad_norm": 3.246007204055786, "learning_rate": 8.614534442969739e-05, "loss": 1.4042, "step": 8402 }, { "epoch": 0.5008940278936703, "grad_norm": 3.0788052082061768, "learning_rate": 8.613880955929843e-05, "loss": 1.3807, "step": 8404 }, { "epoch": 0.5010132316128263, "grad_norm": 2.9141242504119873, "learning_rate": 8.613227339609059e-05, "loss": 1.3565, "step": 8406 }, { "epoch": 0.5011324353319824, "grad_norm": 3.112480640411377, "learning_rate": 8.612573594030771e-05, "loss": 1.4169, "step": 8408 }, { "epoch": 0.5012516390511383, "grad_norm": 3.3576269149780273, "learning_rate": 8.611919719218364e-05, "loss": 1.4528, "step": 8410 }, { "epoch": 0.5013708427702944, "grad_norm": 2.9171152114868164, "learning_rate": 8.611265715195234e-05, "loss": 1.3968, "step": 8412 }, { "epoch": 0.5014900464894505, "grad_norm": 3.3922646045684814, "learning_rate": 8.610611581984771e-05, "loss": 1.372, "step": 8414 }, { "epoch": 0.5016092502086065, "grad_norm": 3.2041051387786865, "learning_rate": 8.609957319610377e-05, "loss": 1.4554, "step": 8416 }, { "epoch": 0.5017284539277626, "grad_norm": 3.2148704528808594, "learning_rate": 8.60930292809546e-05, "loss": 1.4228, "step": 8418 }, { "epoch": 0.5018476576469186, "grad_norm": 3.128779649734497, "learning_rate": 8.608648407463429e-05, "loss": 1.4292, "step": 8420 }, { "epoch": 0.5019668613660746, "grad_norm": 3.0812339782714844, "learning_rate": 8.607993757737696e-05, "loss": 1.3841, "step": 8422 }, { "epoch": 0.5020860650852307, "grad_norm": 3.4110543727874756, "learning_rate": 8.607338978941682e-05, "loss": 1.4904, "step": 8424 }, { "epoch": 0.5022052688043867, "grad_norm": 3.211512565612793, "learning_rate": 8.60668407109881e-05, "loss": 1.281, "step": 8426 }, { "epoch": 0.5023244725235427, "grad_norm": 3.0771868228912354, "learning_rate": 8.60602903423251e-05, "loss": 1.3296, "step": 8428 }, { "epoch": 0.5024436762426988, "grad_norm": 3.5332560539245605, "learning_rate": 8.605373868366213e-05, "loss": 1.7426, "step": 8430 }, { "epoch": 0.5025628799618548, "grad_norm": 3.205498218536377, "learning_rate": 8.604718573523357e-05, "loss": 1.5075, "step": 8432 }, { "epoch": 0.5026820836810109, "grad_norm": 3.157634735107422, "learning_rate": 8.604063149727383e-05, "loss": 1.4584, "step": 8434 }, { "epoch": 0.5028012874001668, "grad_norm": 2.8145029544830322, "learning_rate": 8.60340759700174e-05, "loss": 1.2564, "step": 8436 }, { "epoch": 0.5029204911193229, "grad_norm": 3.1757636070251465, "learning_rate": 8.60275191536988e-05, "loss": 1.4215, "step": 8438 }, { "epoch": 0.503039694838479, "grad_norm": 2.721609354019165, "learning_rate": 8.602096104855255e-05, "loss": 1.4424, "step": 8440 }, { "epoch": 0.503158898557635, "grad_norm": 3.191052198410034, "learning_rate": 8.60144016548133e-05, "loss": 1.4165, "step": 8442 }, { "epoch": 0.503278102276791, "grad_norm": 3.214698314666748, "learning_rate": 8.600784097271566e-05, "loss": 1.601, "step": 8444 }, { "epoch": 0.5033973059959471, "grad_norm": 2.634542465209961, "learning_rate": 8.600127900249434e-05, "loss": 1.4136, "step": 8446 }, { "epoch": 0.5035165097151031, "grad_norm": 3.2234017848968506, "learning_rate": 8.599471574438411e-05, "loss": 1.4323, "step": 8448 }, { "epoch": 0.5036357134342592, "grad_norm": 2.790938377380371, "learning_rate": 8.598815119861976e-05, "loss": 1.3871, "step": 8450 }, { "epoch": 0.5037549171534151, "grad_norm": 3.1796319484710693, "learning_rate": 8.598158536543608e-05, "loss": 1.4165, "step": 8452 }, { "epoch": 0.5038741208725712, "grad_norm": 2.6380646228790283, "learning_rate": 8.597501824506799e-05, "loss": 1.3465, "step": 8454 }, { "epoch": 0.5039933245917273, "grad_norm": 3.2624733448028564, "learning_rate": 8.596844983775041e-05, "loss": 1.5109, "step": 8456 }, { "epoch": 0.5041125283108833, "grad_norm": 3.024704694747925, "learning_rate": 8.596188014371834e-05, "loss": 1.4229, "step": 8458 }, { "epoch": 0.5042317320300393, "grad_norm": 2.7904539108276367, "learning_rate": 8.595530916320673e-05, "loss": 1.3464, "step": 8460 }, { "epoch": 0.5043509357491954, "grad_norm": 3.021886110305786, "learning_rate": 8.594873689645072e-05, "loss": 1.495, "step": 8462 }, { "epoch": 0.5044701394683514, "grad_norm": 2.8100481033325195, "learning_rate": 8.594216334368539e-05, "loss": 1.4089, "step": 8464 }, { "epoch": 0.5045893431875075, "grad_norm": 3.323441982269287, "learning_rate": 8.59355885051459e-05, "loss": 1.4639, "step": 8466 }, { "epoch": 0.5047085469066634, "grad_norm": 3.182137966156006, "learning_rate": 8.592901238106745e-05, "loss": 1.4605, "step": 8468 }, { "epoch": 0.5048277506258195, "grad_norm": 2.8031527996063232, "learning_rate": 8.59224349716853e-05, "loss": 1.3967, "step": 8470 }, { "epoch": 0.5049469543449756, "grad_norm": 3.481037139892578, "learning_rate": 8.591585627723477e-05, "loss": 1.417, "step": 8472 }, { "epoch": 0.5050661580641316, "grad_norm": 3.020775318145752, "learning_rate": 8.590927629795115e-05, "loss": 1.4087, "step": 8474 }, { "epoch": 0.5051853617832877, "grad_norm": 2.8574085235595703, "learning_rate": 8.590269503406985e-05, "loss": 1.3995, "step": 8476 }, { "epoch": 0.5053045655024436, "grad_norm": 3.012777805328369, "learning_rate": 8.589611248582632e-05, "loss": 1.488, "step": 8478 }, { "epoch": 0.5054237692215997, "grad_norm": 3.020080804824829, "learning_rate": 8.588952865345603e-05, "loss": 1.2994, "step": 8480 }, { "epoch": 0.5055429729407558, "grad_norm": 3.2458839416503906, "learning_rate": 8.58829435371945e-05, "loss": 1.5866, "step": 8482 }, { "epoch": 0.5056621766599118, "grad_norm": 3.104445457458496, "learning_rate": 8.587635713727731e-05, "loss": 1.3515, "step": 8484 }, { "epoch": 0.5057813803790678, "grad_norm": 3.1800060272216797, "learning_rate": 8.586976945394007e-05, "loss": 1.3557, "step": 8486 }, { "epoch": 0.5059005840982239, "grad_norm": 3.503749132156372, "learning_rate": 8.586318048741846e-05, "loss": 1.6158, "step": 8488 }, { "epoch": 0.5060197878173799, "grad_norm": 2.8667008876800537, "learning_rate": 8.585659023794818e-05, "loss": 1.3036, "step": 8490 }, { "epoch": 0.506138991536536, "grad_norm": 3.2708027362823486, "learning_rate": 8.584999870576498e-05, "loss": 1.3878, "step": 8492 }, { "epoch": 0.5062581952556919, "grad_norm": 3.1597580909729004, "learning_rate": 8.584340589110466e-05, "loss": 1.3716, "step": 8494 }, { "epoch": 0.506377398974848, "grad_norm": 3.2546699047088623, "learning_rate": 8.583681179420309e-05, "loss": 1.4565, "step": 8496 }, { "epoch": 0.5064966026940041, "grad_norm": 3.2521016597747803, "learning_rate": 8.583021641529613e-05, "loss": 1.5056, "step": 8498 }, { "epoch": 0.5066158064131601, "grad_norm": 3.3213517665863037, "learning_rate": 8.582361975461974e-05, "loss": 1.3866, "step": 8500 }, { "epoch": 0.5067350101323161, "grad_norm": 3.2733192443847656, "learning_rate": 8.581702181240992e-05, "loss": 1.3612, "step": 8502 }, { "epoch": 0.5068542138514721, "grad_norm": 3.1004064083099365, "learning_rate": 8.581042258890267e-05, "loss": 1.5246, "step": 8504 }, { "epoch": 0.5069734175706282, "grad_norm": 2.8979432582855225, "learning_rate": 8.580382208433408e-05, "loss": 1.3903, "step": 8506 }, { "epoch": 0.5070926212897843, "grad_norm": 3.0670716762542725, "learning_rate": 8.579722029894028e-05, "loss": 1.497, "step": 8508 }, { "epoch": 0.5072118250089402, "grad_norm": 3.1819608211517334, "learning_rate": 8.579061723295743e-05, "loss": 1.5408, "step": 8510 }, { "epoch": 0.5073310287280963, "grad_norm": 2.965322971343994, "learning_rate": 8.578401288662172e-05, "loss": 1.4576, "step": 8512 }, { "epoch": 0.5074502324472524, "grad_norm": 2.7276153564453125, "learning_rate": 8.577740726016948e-05, "loss": 1.2519, "step": 8514 }, { "epoch": 0.5075694361664084, "grad_norm": 3.1991286277770996, "learning_rate": 8.577080035383693e-05, "loss": 1.5371, "step": 8516 }, { "epoch": 0.5076886398855645, "grad_norm": 3.243952989578247, "learning_rate": 8.57641921678605e-05, "loss": 1.6061, "step": 8518 }, { "epoch": 0.5078078436047204, "grad_norm": 2.8836863040924072, "learning_rate": 8.575758270247652e-05, "loss": 1.2517, "step": 8520 }, { "epoch": 0.5079270473238765, "grad_norm": 2.9106881618499756, "learning_rate": 8.575097195792147e-05, "loss": 1.2835, "step": 8522 }, { "epoch": 0.5080462510430326, "grad_norm": 3.013749361038208, "learning_rate": 8.574435993443184e-05, "loss": 1.3991, "step": 8524 }, { "epoch": 0.5081654547621886, "grad_norm": 3.0422186851501465, "learning_rate": 8.573774663224415e-05, "loss": 1.4111, "step": 8526 }, { "epoch": 0.5082846584813446, "grad_norm": 3.281527042388916, "learning_rate": 8.573113205159498e-05, "loss": 1.4991, "step": 8528 }, { "epoch": 0.5084038622005006, "grad_norm": 3.3033430576324463, "learning_rate": 8.572451619272098e-05, "loss": 1.4867, "step": 8530 }, { "epoch": 0.5085230659196567, "grad_norm": 3.0208611488342285, "learning_rate": 8.57178990558588e-05, "loss": 1.5252, "step": 8532 }, { "epoch": 0.5086422696388128, "grad_norm": 2.835714817047119, "learning_rate": 8.571128064124517e-05, "loss": 1.3924, "step": 8534 }, { "epoch": 0.5087614733579687, "grad_norm": 3.01381516456604, "learning_rate": 8.570466094911683e-05, "loss": 1.4924, "step": 8536 }, { "epoch": 0.5088806770771248, "grad_norm": 3.0589332580566406, "learning_rate": 8.569803997971061e-05, "loss": 1.4199, "step": 8538 }, { "epoch": 0.5089998807962809, "grad_norm": 3.007573366165161, "learning_rate": 8.569141773326338e-05, "loss": 1.2935, "step": 8540 }, { "epoch": 0.5091190845154369, "grad_norm": 2.8149945735931396, "learning_rate": 8.568479421001201e-05, "loss": 1.2357, "step": 8542 }, { "epoch": 0.5092382882345929, "grad_norm": 3.593604326248169, "learning_rate": 8.567816941019345e-05, "loss": 1.4072, "step": 8544 }, { "epoch": 0.5093574919537489, "grad_norm": 3.2713606357574463, "learning_rate": 8.567154333404471e-05, "loss": 1.4862, "step": 8546 }, { "epoch": 0.509476695672905, "grad_norm": 3.082206964492798, "learning_rate": 8.56649159818028e-05, "loss": 1.3521, "step": 8548 }, { "epoch": 0.5095958993920611, "grad_norm": 2.849949598312378, "learning_rate": 8.565828735370482e-05, "loss": 1.2475, "step": 8550 }, { "epoch": 0.509715103111217, "grad_norm": 3.222942352294922, "learning_rate": 8.565165744998791e-05, "loss": 1.4351, "step": 8552 }, { "epoch": 0.5098343068303731, "grad_norm": 3.215590715408325, "learning_rate": 8.564502627088922e-05, "loss": 1.4739, "step": 8554 }, { "epoch": 0.5099535105495292, "grad_norm": 3.216829538345337, "learning_rate": 8.5638393816646e-05, "loss": 1.4189, "step": 8556 }, { "epoch": 0.5100727142686852, "grad_norm": 3.1735174655914307, "learning_rate": 8.563176008749549e-05, "loss": 1.4265, "step": 8558 }, { "epoch": 0.5101919179878412, "grad_norm": 2.8150126934051514, "learning_rate": 8.562512508367499e-05, "loss": 1.3171, "step": 8560 }, { "epoch": 0.5103111217069972, "grad_norm": 3.3842201232910156, "learning_rate": 8.56184888054219e-05, "loss": 1.669, "step": 8562 }, { "epoch": 0.5104303254261533, "grad_norm": 3.383673667907715, "learning_rate": 8.561185125297359e-05, "loss": 1.3585, "step": 8564 }, { "epoch": 0.5105495291453094, "grad_norm": 2.9770991802215576, "learning_rate": 8.560521242656751e-05, "loss": 1.4117, "step": 8566 }, { "epoch": 0.5106687328644653, "grad_norm": 3.3526711463928223, "learning_rate": 8.559857232644118e-05, "loss": 1.3052, "step": 8568 }, { "epoch": 0.5107879365836214, "grad_norm": 3.1954314708709717, "learning_rate": 8.55919309528321e-05, "loss": 1.4133, "step": 8570 }, { "epoch": 0.5109071403027774, "grad_norm": 3.1023988723754883, "learning_rate": 8.558528830597787e-05, "loss": 1.3973, "step": 8572 }, { "epoch": 0.5110263440219335, "grad_norm": 3.059824228286743, "learning_rate": 8.557864438611613e-05, "loss": 1.4304, "step": 8574 }, { "epoch": 0.5111455477410896, "grad_norm": 3.183777332305908, "learning_rate": 8.557199919348456e-05, "loss": 1.5481, "step": 8576 }, { "epoch": 0.5112647514602455, "grad_norm": 3.0278160572052, "learning_rate": 8.556535272832087e-05, "loss": 1.5083, "step": 8578 }, { "epoch": 0.5113839551794016, "grad_norm": 3.1632659435272217, "learning_rate": 8.55587049908628e-05, "loss": 1.588, "step": 8580 }, { "epoch": 0.5115031588985577, "grad_norm": 2.9706180095672607, "learning_rate": 8.555205598134822e-05, "loss": 1.3574, "step": 8582 }, { "epoch": 0.5116223626177137, "grad_norm": 3.1229021549224854, "learning_rate": 8.554540570001495e-05, "loss": 1.4853, "step": 8584 }, { "epoch": 0.5117415663368697, "grad_norm": 3.1187357902526855, "learning_rate": 8.553875414710088e-05, "loss": 1.388, "step": 8586 }, { "epoch": 0.5118607700560257, "grad_norm": 3.353238821029663, "learning_rate": 8.5532101322844e-05, "loss": 1.497, "step": 8588 }, { "epoch": 0.5119799737751818, "grad_norm": 3.1801750659942627, "learning_rate": 8.552544722748228e-05, "loss": 1.4377, "step": 8590 }, { "epoch": 0.5120991774943379, "grad_norm": 3.097027063369751, "learning_rate": 8.551879186125376e-05, "loss": 1.4792, "step": 8592 }, { "epoch": 0.5122183812134938, "grad_norm": 3.476848602294922, "learning_rate": 8.551213522439654e-05, "loss": 1.4227, "step": 8594 }, { "epoch": 0.5123375849326499, "grad_norm": 2.838575839996338, "learning_rate": 8.550547731714873e-05, "loss": 1.4631, "step": 8596 }, { "epoch": 0.5124567886518059, "grad_norm": 2.963779926300049, "learning_rate": 8.549881813974852e-05, "loss": 1.4692, "step": 8598 }, { "epoch": 0.512575992370962, "grad_norm": 3.1993749141693115, "learning_rate": 8.549215769243413e-05, "loss": 1.4485, "step": 8600 }, { "epoch": 0.512695196090118, "grad_norm": 3.004566192626953, "learning_rate": 8.548549597544382e-05, "loss": 1.5075, "step": 8602 }, { "epoch": 0.512814399809274, "grad_norm": 3.0187833309173584, "learning_rate": 8.54788329890159e-05, "loss": 1.3675, "step": 8604 }, { "epoch": 0.5129336035284301, "grad_norm": 3.231863260269165, "learning_rate": 8.547216873338875e-05, "loss": 1.5207, "step": 8606 }, { "epoch": 0.5130528072475862, "grad_norm": 2.806851625442505, "learning_rate": 8.546550320880073e-05, "loss": 1.4205, "step": 8608 }, { "epoch": 0.5131720109667421, "grad_norm": 3.651108741760254, "learning_rate": 8.545883641549036e-05, "loss": 1.4905, "step": 8610 }, { "epoch": 0.5132912146858982, "grad_norm": 3.042221784591675, "learning_rate": 8.545216835369605e-05, "loss": 1.2453, "step": 8612 }, { "epoch": 0.5134104184050542, "grad_norm": 2.9878058433532715, "learning_rate": 8.54454990236564e-05, "loss": 1.2513, "step": 8614 }, { "epoch": 0.5135296221242103, "grad_norm": 2.760657548904419, "learning_rate": 8.543882842560996e-05, "loss": 1.3879, "step": 8616 }, { "epoch": 0.5136488258433664, "grad_norm": 2.890014410018921, "learning_rate": 8.543215655979538e-05, "loss": 1.4008, "step": 8618 }, { "epoch": 0.5137680295625223, "grad_norm": 3.39342999458313, "learning_rate": 8.542548342645134e-05, "loss": 1.4091, "step": 8620 }, { "epoch": 0.5138872332816784, "grad_norm": 3.0557005405426025, "learning_rate": 8.541880902581654e-05, "loss": 1.6251, "step": 8622 }, { "epoch": 0.5140064370008344, "grad_norm": 3.0706887245178223, "learning_rate": 8.541213335812977e-05, "loss": 1.4558, "step": 8624 }, { "epoch": 0.5141256407199905, "grad_norm": 3.1408379077911377, "learning_rate": 8.540545642362982e-05, "loss": 1.562, "step": 8626 }, { "epoch": 0.5142448444391465, "grad_norm": 3.1805171966552734, "learning_rate": 8.539877822255555e-05, "loss": 1.485, "step": 8628 }, { "epoch": 0.5143640481583025, "grad_norm": 3.056419610977173, "learning_rate": 8.539209875514586e-05, "loss": 1.557, "step": 8630 }, { "epoch": 0.5144832518774586, "grad_norm": 3.1333110332489014, "learning_rate": 8.538541802163973e-05, "loss": 1.4552, "step": 8632 }, { "epoch": 0.5146024555966147, "grad_norm": 3.2163238525390625, "learning_rate": 8.537873602227611e-05, "loss": 1.3415, "step": 8634 }, { "epoch": 0.5147216593157706, "grad_norm": 3.088810443878174, "learning_rate": 8.537205275729407e-05, "loss": 1.3655, "step": 8636 }, { "epoch": 0.5148408630349267, "grad_norm": 4.057421684265137, "learning_rate": 8.536536822693266e-05, "loss": 1.5748, "step": 8638 }, { "epoch": 0.5149600667540827, "grad_norm": 3.4033634662628174, "learning_rate": 8.535868243143103e-05, "loss": 1.2134, "step": 8640 }, { "epoch": 0.5150792704732388, "grad_norm": 3.076779842376709, "learning_rate": 8.535199537102837e-05, "loss": 1.3037, "step": 8642 }, { "epoch": 0.5151984741923948, "grad_norm": 3.1912100315093994, "learning_rate": 8.534530704596387e-05, "loss": 1.3755, "step": 8644 }, { "epoch": 0.5153176779115508, "grad_norm": 3.3152811527252197, "learning_rate": 8.53386174564768e-05, "loss": 1.3983, "step": 8646 }, { "epoch": 0.5154368816307069, "grad_norm": 2.84836745262146, "learning_rate": 8.533192660280646e-05, "loss": 1.3271, "step": 8648 }, { "epoch": 0.515556085349863, "grad_norm": 2.9876229763031006, "learning_rate": 8.532523448519224e-05, "loss": 1.3676, "step": 8650 }, { "epoch": 0.5156752890690189, "grad_norm": 2.9331791400909424, "learning_rate": 8.531854110387351e-05, "loss": 1.3841, "step": 8652 }, { "epoch": 0.515794492788175, "grad_norm": 3.0070080757141113, "learning_rate": 8.531184645908973e-05, "loss": 1.3258, "step": 8654 }, { "epoch": 0.515913696507331, "grad_norm": 3.628619909286499, "learning_rate": 8.530515055108038e-05, "loss": 1.444, "step": 8656 }, { "epoch": 0.5160329002264871, "grad_norm": 3.0911691188812256, "learning_rate": 8.5298453380085e-05, "loss": 1.5374, "step": 8658 }, { "epoch": 0.5161521039456431, "grad_norm": 2.745194673538208, "learning_rate": 8.529175494634316e-05, "loss": 1.3587, "step": 8660 }, { "epoch": 0.5162713076647991, "grad_norm": 2.9695000648498535, "learning_rate": 8.528505525009452e-05, "loss": 1.4816, "step": 8662 }, { "epoch": 0.5163905113839552, "grad_norm": 3.0053141117095947, "learning_rate": 8.52783542915787e-05, "loss": 1.4437, "step": 8664 }, { "epoch": 0.5165097151031112, "grad_norm": 3.3196682929992676, "learning_rate": 8.527165207103546e-05, "loss": 1.4567, "step": 8666 }, { "epoch": 0.5166289188222672, "grad_norm": 2.806640625, "learning_rate": 8.526494858870454e-05, "loss": 1.275, "step": 8668 }, { "epoch": 0.5167481225414233, "grad_norm": 2.8601887226104736, "learning_rate": 8.525824384482575e-05, "loss": 1.3691, "step": 8670 }, { "epoch": 0.5168673262605793, "grad_norm": 2.9338550567626953, "learning_rate": 8.525153783963896e-05, "loss": 1.3622, "step": 8672 }, { "epoch": 0.5169865299797354, "grad_norm": 3.4285480976104736, "learning_rate": 8.524483057338402e-05, "loss": 1.512, "step": 8674 }, { "epoch": 0.5171057336988915, "grad_norm": 3.1706812381744385, "learning_rate": 8.523812204630092e-05, "loss": 1.3642, "step": 8676 }, { "epoch": 0.5172249374180474, "grad_norm": 2.773695945739746, "learning_rate": 8.523141225862963e-05, "loss": 1.3664, "step": 8678 }, { "epoch": 0.5173441411372035, "grad_norm": 3.2382614612579346, "learning_rate": 8.522470121061018e-05, "loss": 1.4583, "step": 8680 }, { "epoch": 0.5174633448563595, "grad_norm": 3.218846082687378, "learning_rate": 8.521798890248263e-05, "loss": 1.5464, "step": 8682 }, { "epoch": 0.5175825485755156, "grad_norm": 3.14020037651062, "learning_rate": 8.521127533448714e-05, "loss": 1.4562, "step": 8684 }, { "epoch": 0.5177017522946716, "grad_norm": 3.4335949420928955, "learning_rate": 8.520456050686384e-05, "loss": 1.3928, "step": 8686 }, { "epoch": 0.5178209560138276, "grad_norm": 3.205843687057495, "learning_rate": 8.519784441985297e-05, "loss": 1.3615, "step": 8688 }, { "epoch": 0.5179401597329837, "grad_norm": 2.989337205886841, "learning_rate": 8.519112707369479e-05, "loss": 1.4043, "step": 8690 }, { "epoch": 0.5180593634521397, "grad_norm": 2.911783456802368, "learning_rate": 8.518440846862956e-05, "loss": 1.3235, "step": 8692 }, { "epoch": 0.5181785671712957, "grad_norm": 3.1609861850738525, "learning_rate": 8.517768860489767e-05, "loss": 1.3886, "step": 8694 }, { "epoch": 0.5182977708904518, "grad_norm": 3.1901395320892334, "learning_rate": 8.517096748273951e-05, "loss": 1.3617, "step": 8696 }, { "epoch": 0.5184169746096078, "grad_norm": 3.187762975692749, "learning_rate": 8.51642451023955e-05, "loss": 1.5667, "step": 8698 }, { "epoch": 0.5185361783287639, "grad_norm": 3.0767648220062256, "learning_rate": 8.515752146410613e-05, "loss": 1.5363, "step": 8700 }, { "epoch": 0.5186553820479199, "grad_norm": 3.1180317401885986, "learning_rate": 8.515079656811194e-05, "loss": 1.2995, "step": 8702 }, { "epoch": 0.5187745857670759, "grad_norm": 3.2574000358581543, "learning_rate": 8.514407041465346e-05, "loss": 1.4542, "step": 8704 }, { "epoch": 0.518893789486232, "grad_norm": 3.7885520458221436, "learning_rate": 8.513734300397136e-05, "loss": 1.5156, "step": 8706 }, { "epoch": 0.519012993205388, "grad_norm": 3.2803795337677, "learning_rate": 8.513061433630627e-05, "loss": 1.6027, "step": 8708 }, { "epoch": 0.519132196924544, "grad_norm": 3.3272550106048584, "learning_rate": 8.51238844118989e-05, "loss": 1.5603, "step": 8710 }, { "epoch": 0.5192514006437001, "grad_norm": 3.1927144527435303, "learning_rate": 8.511715323099002e-05, "loss": 1.3971, "step": 8712 }, { "epoch": 0.5193706043628561, "grad_norm": 3.0642616748809814, "learning_rate": 8.51104207938204e-05, "loss": 1.387, "step": 8714 }, { "epoch": 0.5194898080820122, "grad_norm": 3.096622943878174, "learning_rate": 8.510368710063094e-05, "loss": 1.4686, "step": 8716 }, { "epoch": 0.5196090118011681, "grad_norm": 3.2344133853912354, "learning_rate": 8.509695215166247e-05, "loss": 1.4772, "step": 8718 }, { "epoch": 0.5197282155203242, "grad_norm": 3.325354814529419, "learning_rate": 8.509021594715591e-05, "loss": 1.5042, "step": 8720 }, { "epoch": 0.5198474192394803, "grad_norm": 2.8131256103515625, "learning_rate": 8.508347848735229e-05, "loss": 1.4685, "step": 8722 }, { "epoch": 0.5199666229586363, "grad_norm": 3.2376890182495117, "learning_rate": 8.50767397724926e-05, "loss": 1.3122, "step": 8724 }, { "epoch": 0.5200858266777924, "grad_norm": 3.1907641887664795, "learning_rate": 8.506999980281791e-05, "loss": 1.2843, "step": 8726 }, { "epoch": 0.5202050303969484, "grad_norm": 2.7703335285186768, "learning_rate": 8.506325857856936e-05, "loss": 1.1854, "step": 8728 }, { "epoch": 0.5203242341161044, "grad_norm": 3.282747983932495, "learning_rate": 8.505651609998806e-05, "loss": 1.372, "step": 8730 }, { "epoch": 0.5204434378352605, "grad_norm": 3.4668285846710205, "learning_rate": 8.504977236731524e-05, "loss": 1.542, "step": 8732 }, { "epoch": 0.5205626415544164, "grad_norm": 3.399085760116577, "learning_rate": 8.504302738079215e-05, "loss": 1.5327, "step": 8734 }, { "epoch": 0.5206818452735725, "grad_norm": 2.871108293533325, "learning_rate": 8.503628114066007e-05, "loss": 1.4394, "step": 8736 }, { "epoch": 0.5208010489927286, "grad_norm": 2.931436777114868, "learning_rate": 8.502953364716033e-05, "loss": 1.4482, "step": 8738 }, { "epoch": 0.5209202527118846, "grad_norm": 3.024168014526367, "learning_rate": 8.502278490053432e-05, "loss": 1.2833, "step": 8740 }, { "epoch": 0.5210394564310407, "grad_norm": 3.620006561279297, "learning_rate": 8.501603490102345e-05, "loss": 1.557, "step": 8742 }, { "epoch": 0.5211586601501967, "grad_norm": 2.8095016479492188, "learning_rate": 8.500928364886924e-05, "loss": 1.4039, "step": 8744 }, { "epoch": 0.5212778638693527, "grad_norm": 3.1459579467773438, "learning_rate": 8.500253114431315e-05, "loss": 1.4457, "step": 8746 }, { "epoch": 0.5213970675885088, "grad_norm": 3.259331464767456, "learning_rate": 8.499577738759677e-05, "loss": 1.3919, "step": 8748 }, { "epoch": 0.5215162713076648, "grad_norm": 2.667318105697632, "learning_rate": 8.498902237896169e-05, "loss": 1.2912, "step": 8750 }, { "epoch": 0.5216354750268208, "grad_norm": 2.4981038570404053, "learning_rate": 8.498226611864957e-05, "loss": 1.3186, "step": 8752 }, { "epoch": 0.5217546787459769, "grad_norm": 3.2225215435028076, "learning_rate": 8.497550860690208e-05, "loss": 1.3766, "step": 8754 }, { "epoch": 0.5218738824651329, "grad_norm": 2.7675647735595703, "learning_rate": 8.4968749843961e-05, "loss": 1.3001, "step": 8756 }, { "epoch": 0.521993086184289, "grad_norm": 3.344348907470703, "learning_rate": 8.496198983006809e-05, "loss": 1.565, "step": 8758 }, { "epoch": 0.5221122899034449, "grad_norm": 3.300565481185913, "learning_rate": 8.495522856546518e-05, "loss": 1.3473, "step": 8760 }, { "epoch": 0.522231493622601, "grad_norm": 3.112846612930298, "learning_rate": 8.494846605039417e-05, "loss": 1.3863, "step": 8762 }, { "epoch": 0.5223506973417571, "grad_norm": 2.9412639141082764, "learning_rate": 8.494170228509692e-05, "loss": 1.4092, "step": 8764 }, { "epoch": 0.5224699010609131, "grad_norm": 3.0482544898986816, "learning_rate": 8.493493726981545e-05, "loss": 1.4453, "step": 8766 }, { "epoch": 0.5225891047800691, "grad_norm": 2.831005334854126, "learning_rate": 8.492817100479173e-05, "loss": 1.3198, "step": 8768 }, { "epoch": 0.5227083084992252, "grad_norm": 3.199409008026123, "learning_rate": 8.492140349026783e-05, "loss": 1.383, "step": 8770 }, { "epoch": 0.5228275122183812, "grad_norm": 3.2448549270629883, "learning_rate": 8.491463472648586e-05, "loss": 1.4457, "step": 8772 }, { "epoch": 0.5229467159375373, "grad_norm": 2.8998477458953857, "learning_rate": 8.490786471368792e-05, "loss": 1.4615, "step": 8774 }, { "epoch": 0.5230659196566932, "grad_norm": 2.8137826919555664, "learning_rate": 8.490109345211625e-05, "loss": 1.2326, "step": 8776 }, { "epoch": 0.5231851233758493, "grad_norm": 3.2687699794769287, "learning_rate": 8.489432094201304e-05, "loss": 1.4203, "step": 8778 }, { "epoch": 0.5233043270950054, "grad_norm": 3.0831127166748047, "learning_rate": 8.488754718362059e-05, "loss": 1.3811, "step": 8780 }, { "epoch": 0.5234235308141614, "grad_norm": 3.4362926483154297, "learning_rate": 8.48807721771812e-05, "loss": 1.4236, "step": 8782 }, { "epoch": 0.5235427345333175, "grad_norm": 3.394629955291748, "learning_rate": 8.487399592293726e-05, "loss": 1.5096, "step": 8784 }, { "epoch": 0.5236619382524734, "grad_norm": 3.1563942432403564, "learning_rate": 8.486721842113114e-05, "loss": 1.3846, "step": 8786 }, { "epoch": 0.5237811419716295, "grad_norm": 3.500610113143921, "learning_rate": 8.486043967200534e-05, "loss": 1.5562, "step": 8788 }, { "epoch": 0.5239003456907856, "grad_norm": 2.8865742683410645, "learning_rate": 8.485365967580233e-05, "loss": 1.5017, "step": 8790 }, { "epoch": 0.5240195494099416, "grad_norm": 2.9454638957977295, "learning_rate": 8.484687843276469e-05, "loss": 1.5817, "step": 8792 }, { "epoch": 0.5241387531290976, "grad_norm": 3.1213624477386475, "learning_rate": 8.484009594313495e-05, "loss": 1.5453, "step": 8794 }, { "epoch": 0.5242579568482537, "grad_norm": 3.72279953956604, "learning_rate": 8.483331220715578e-05, "loss": 1.3809, "step": 8796 }, { "epoch": 0.5243771605674097, "grad_norm": 3.325183629989624, "learning_rate": 8.482652722506985e-05, "loss": 1.4895, "step": 8798 }, { "epoch": 0.5244963642865658, "grad_norm": 8.86125373840332, "learning_rate": 8.48197409971199e-05, "loss": 1.286, "step": 8800 }, { "epoch": 0.5246155680057217, "grad_norm": 2.990590810775757, "learning_rate": 8.481295352354867e-05, "loss": 1.4198, "step": 8802 }, { "epoch": 0.5247347717248778, "grad_norm": 3.0119917392730713, "learning_rate": 8.480616480459898e-05, "loss": 1.4899, "step": 8804 }, { "epoch": 0.5248539754440339, "grad_norm": 2.3490772247314453, "learning_rate": 8.479937484051368e-05, "loss": 1.3455, "step": 8806 }, { "epoch": 0.5249731791631899, "grad_norm": 3.2158758640289307, "learning_rate": 8.479258363153569e-05, "loss": 1.5396, "step": 8808 }, { "epoch": 0.5250923828823459, "grad_norm": 2.92813777923584, "learning_rate": 8.478579117790792e-05, "loss": 1.3624, "step": 8810 }, { "epoch": 0.5252115866015019, "grad_norm": 2.9448256492614746, "learning_rate": 8.477899747987342e-05, "loss": 1.3892, "step": 8812 }, { "epoch": 0.525330790320658, "grad_norm": 3.26636004447937, "learning_rate": 8.477220253767514e-05, "loss": 1.4369, "step": 8814 }, { "epoch": 0.5254499940398141, "grad_norm": 3.3621015548706055, "learning_rate": 8.476540635155624e-05, "loss": 1.5508, "step": 8816 }, { "epoch": 0.52556919775897, "grad_norm": 3.173238754272461, "learning_rate": 8.475860892175978e-05, "loss": 1.4579, "step": 8818 }, { "epoch": 0.5256884014781261, "grad_norm": 3.0221784114837646, "learning_rate": 8.475181024852896e-05, "loss": 1.3874, "step": 8820 }, { "epoch": 0.5258076051972822, "grad_norm": 3.221273899078369, "learning_rate": 8.4745010332107e-05, "loss": 1.4546, "step": 8822 }, { "epoch": 0.5259268089164382, "grad_norm": 2.6812193393707275, "learning_rate": 8.473820917273714e-05, "loss": 1.1145, "step": 8824 }, { "epoch": 0.5260460126355943, "grad_norm": 2.5915915966033936, "learning_rate": 8.473140677066267e-05, "loss": 1.2727, "step": 8826 }, { "epoch": 0.5261652163547502, "grad_norm": 3.487161159515381, "learning_rate": 8.472460312612696e-05, "loss": 1.2536, "step": 8828 }, { "epoch": 0.5262844200739063, "grad_norm": 3.159944772720337, "learning_rate": 8.471779823937339e-05, "loss": 1.3881, "step": 8830 }, { "epoch": 0.5264036237930624, "grad_norm": 3.62729549407959, "learning_rate": 8.471099211064535e-05, "loss": 1.4574, "step": 8832 }, { "epoch": 0.5265228275122183, "grad_norm": 3.0053598880767822, "learning_rate": 8.47041847401864e-05, "loss": 1.4577, "step": 8834 }, { "epoch": 0.5266420312313744, "grad_norm": 3.1661789417266846, "learning_rate": 8.469737612824001e-05, "loss": 1.5066, "step": 8836 }, { "epoch": 0.5267612349505305, "grad_norm": 2.977907657623291, "learning_rate": 8.469056627504976e-05, "loss": 1.4176, "step": 8838 }, { "epoch": 0.5268804386696865, "grad_norm": 3.0117764472961426, "learning_rate": 8.468375518085928e-05, "loss": 1.5048, "step": 8840 }, { "epoch": 0.5269996423888426, "grad_norm": 2.827136754989624, "learning_rate": 8.467694284591218e-05, "loss": 1.3084, "step": 8842 }, { "epoch": 0.5271188461079985, "grad_norm": 3.4251139163970947, "learning_rate": 8.467012927045221e-05, "loss": 1.5283, "step": 8844 }, { "epoch": 0.5272380498271546, "grad_norm": 3.2383832931518555, "learning_rate": 8.466331445472308e-05, "loss": 1.3805, "step": 8846 }, { "epoch": 0.5273572535463107, "grad_norm": 2.8706517219543457, "learning_rate": 8.46564983989686e-05, "loss": 1.101, "step": 8848 }, { "epoch": 0.5274764572654667, "grad_norm": 2.93820858001709, "learning_rate": 8.46496811034326e-05, "loss": 1.3104, "step": 8850 }, { "epoch": 0.5275956609846227, "grad_norm": 2.6444578170776367, "learning_rate": 8.464286256835896e-05, "loss": 1.3596, "step": 8852 }, { "epoch": 0.5277148647037787, "grad_norm": 3.3999412059783936, "learning_rate": 8.463604279399157e-05, "loss": 1.4616, "step": 8854 }, { "epoch": 0.5278340684229348, "grad_norm": 3.1632983684539795, "learning_rate": 8.462922178057445e-05, "loss": 1.4931, "step": 8856 }, { "epoch": 0.5279532721420909, "grad_norm": 3.2722771167755127, "learning_rate": 8.462239952835155e-05, "loss": 1.4956, "step": 8858 }, { "epoch": 0.5280724758612468, "grad_norm": 3.2961831092834473, "learning_rate": 8.4615576037567e-05, "loss": 1.4168, "step": 8860 }, { "epoch": 0.5281916795804029, "grad_norm": 3.05623197555542, "learning_rate": 8.460875130846484e-05, "loss": 1.3989, "step": 8862 }, { "epoch": 0.528310883299559, "grad_norm": 3.0065536499023438, "learning_rate": 8.460192534128923e-05, "loss": 1.5087, "step": 8864 }, { "epoch": 0.528430087018715, "grad_norm": 3.084975004196167, "learning_rate": 8.459509813628436e-05, "loss": 1.3236, "step": 8866 }, { "epoch": 0.528549290737871, "grad_norm": 3.051413059234619, "learning_rate": 8.458826969369448e-05, "loss": 1.2898, "step": 8868 }, { "epoch": 0.528668494457027, "grad_norm": 2.8723065853118896, "learning_rate": 8.458144001376385e-05, "loss": 1.2945, "step": 8870 }, { "epoch": 0.5287876981761831, "grad_norm": 2.968973159790039, "learning_rate": 8.457460909673677e-05, "loss": 1.3702, "step": 8872 }, { "epoch": 0.5289069018953392, "grad_norm": 3.418015718460083, "learning_rate": 8.456777694285764e-05, "loss": 1.4306, "step": 8874 }, { "epoch": 0.5290261056144951, "grad_norm": 3.477078914642334, "learning_rate": 8.456094355237086e-05, "loss": 1.4156, "step": 8876 }, { "epoch": 0.5291453093336512, "grad_norm": 3.224818706512451, "learning_rate": 8.455410892552087e-05, "loss": 1.4955, "step": 8878 }, { "epoch": 0.5292645130528072, "grad_norm": 3.3631560802459717, "learning_rate": 8.454727306255219e-05, "loss": 1.5484, "step": 8880 }, { "epoch": 0.5293837167719633, "grad_norm": 3.2126293182373047, "learning_rate": 8.454043596370935e-05, "loss": 1.4945, "step": 8882 }, { "epoch": 0.5295029204911194, "grad_norm": 3.059732437133789, "learning_rate": 8.453359762923692e-05, "loss": 1.3405, "step": 8884 }, { "epoch": 0.5296221242102753, "grad_norm": 3.320833444595337, "learning_rate": 8.452675805937956e-05, "loss": 1.4137, "step": 8886 }, { "epoch": 0.5297413279294314, "grad_norm": 2.797140598297119, "learning_rate": 8.451991725438193e-05, "loss": 1.2849, "step": 8888 }, { "epoch": 0.5298605316485875, "grad_norm": 2.9973926544189453, "learning_rate": 8.451307521448874e-05, "loss": 1.3719, "step": 8890 }, { "epoch": 0.5299797353677435, "grad_norm": 3.0331268310546875, "learning_rate": 8.450623193994478e-05, "loss": 1.4049, "step": 8892 }, { "epoch": 0.5300989390868995, "grad_norm": 2.9044699668884277, "learning_rate": 8.449938743099484e-05, "loss": 1.5587, "step": 8894 }, { "epoch": 0.5302181428060555, "grad_norm": 3.0361380577087402, "learning_rate": 8.449254168788377e-05, "loss": 1.3254, "step": 8896 }, { "epoch": 0.5303373465252116, "grad_norm": 3.382354497909546, "learning_rate": 8.448569471085648e-05, "loss": 1.5439, "step": 8898 }, { "epoch": 0.5304565502443677, "grad_norm": 2.9402270317077637, "learning_rate": 8.447884650015789e-05, "loss": 1.2609, "step": 8900 }, { "epoch": 0.5305757539635236, "grad_norm": 2.8341012001037598, "learning_rate": 8.447199705603299e-05, "loss": 1.2837, "step": 8902 }, { "epoch": 0.5306949576826797, "grad_norm": 3.0556693077087402, "learning_rate": 8.44651463787268e-05, "loss": 1.5688, "step": 8904 }, { "epoch": 0.5308141614018357, "grad_norm": 2.9304325580596924, "learning_rate": 8.445829446848442e-05, "loss": 1.3553, "step": 8906 }, { "epoch": 0.5309333651209918, "grad_norm": 3.134270668029785, "learning_rate": 8.445144132555094e-05, "loss": 1.4077, "step": 8908 }, { "epoch": 0.5310525688401478, "grad_norm": 3.255971908569336, "learning_rate": 8.444458695017155e-05, "loss": 1.3343, "step": 8910 }, { "epoch": 0.5311717725593038, "grad_norm": 3.2114992141723633, "learning_rate": 8.44377313425914e-05, "loss": 1.3623, "step": 8912 }, { "epoch": 0.5312909762784599, "grad_norm": 2.8409509658813477, "learning_rate": 8.44308745030558e-05, "loss": 1.3752, "step": 8914 }, { "epoch": 0.531410179997616, "grad_norm": 3.2286360263824463, "learning_rate": 8.442401643181e-05, "loss": 1.2603, "step": 8916 }, { "epoch": 0.5315293837167719, "grad_norm": 3.3794021606445312, "learning_rate": 8.441715712909935e-05, "loss": 1.4677, "step": 8918 }, { "epoch": 0.531648587435928, "grad_norm": 3.2774436473846436, "learning_rate": 8.441029659516924e-05, "loss": 1.3572, "step": 8920 }, { "epoch": 0.531767791155084, "grad_norm": 3.0302562713623047, "learning_rate": 8.44034348302651e-05, "loss": 1.3062, "step": 8922 }, { "epoch": 0.5318869948742401, "grad_norm": 3.1730949878692627, "learning_rate": 8.439657183463236e-05, "loss": 1.463, "step": 8924 }, { "epoch": 0.5320061985933962, "grad_norm": 3.301208257675171, "learning_rate": 8.438970760851656e-05, "loss": 1.3018, "step": 8926 }, { "epoch": 0.5321254023125521, "grad_norm": 2.925199270248413, "learning_rate": 8.438284215216328e-05, "loss": 1.4673, "step": 8928 }, { "epoch": 0.5322446060317082, "grad_norm": 2.971926689147949, "learning_rate": 8.437597546581808e-05, "loss": 1.535, "step": 8930 }, { "epoch": 0.5323638097508643, "grad_norm": 2.804457187652588, "learning_rate": 8.436910754972662e-05, "loss": 1.3941, "step": 8932 }, { "epoch": 0.5324830134700202, "grad_norm": 3.134193181991577, "learning_rate": 8.43622384041346e-05, "loss": 1.384, "step": 8934 }, { "epoch": 0.5326022171891763, "grad_norm": 2.761681079864502, "learning_rate": 8.435536802928774e-05, "loss": 1.3122, "step": 8936 }, { "epoch": 0.5327214209083323, "grad_norm": 3.1966989040374756, "learning_rate": 8.434849642543181e-05, "loss": 1.387, "step": 8938 }, { "epoch": 0.5328406246274884, "grad_norm": 3.2972137928009033, "learning_rate": 8.434162359281265e-05, "loss": 1.3193, "step": 8940 }, { "epoch": 0.5329598283466445, "grad_norm": 2.995405912399292, "learning_rate": 8.433474953167611e-05, "loss": 1.441, "step": 8942 }, { "epoch": 0.5330790320658004, "grad_norm": 3.113494396209717, "learning_rate": 8.432787424226812e-05, "loss": 1.4722, "step": 8944 }, { "epoch": 0.5331982357849565, "grad_norm": 2.8570516109466553, "learning_rate": 8.43209977248346e-05, "loss": 1.3734, "step": 8946 }, { "epoch": 0.5333174395041125, "grad_norm": 2.899491786956787, "learning_rate": 8.431411997962158e-05, "loss": 1.5953, "step": 8948 }, { "epoch": 0.5334366432232686, "grad_norm": 3.274477481842041, "learning_rate": 8.430724100687507e-05, "loss": 1.3564, "step": 8950 }, { "epoch": 0.5335558469424246, "grad_norm": 3.1355319023132324, "learning_rate": 8.430036080684117e-05, "loss": 1.4026, "step": 8952 }, { "epoch": 0.5336750506615806, "grad_norm": 3.0628905296325684, "learning_rate": 8.4293479379766e-05, "loss": 1.3727, "step": 8954 }, { "epoch": 0.5337942543807367, "grad_norm": 2.8969216346740723, "learning_rate": 8.428659672589573e-05, "loss": 1.4363, "step": 8956 }, { "epoch": 0.5339134580998928, "grad_norm": 3.268800973892212, "learning_rate": 8.427971284547663e-05, "loss": 1.4588, "step": 8958 }, { "epoch": 0.5340326618190487, "grad_norm": 2.8108394145965576, "learning_rate": 8.427282773875487e-05, "loss": 1.391, "step": 8960 }, { "epoch": 0.5341518655382048, "grad_norm": 3.081178903579712, "learning_rate": 8.426594140597681e-05, "loss": 1.4786, "step": 8962 }, { "epoch": 0.5342710692573608, "grad_norm": 3.32028865814209, "learning_rate": 8.42590538473888e-05, "loss": 1.4486, "step": 8964 }, { "epoch": 0.5343902729765169, "grad_norm": 2.848459243774414, "learning_rate": 8.425216506323721e-05, "loss": 1.3429, "step": 8966 }, { "epoch": 0.534509476695673, "grad_norm": 2.731245994567871, "learning_rate": 8.424527505376848e-05, "loss": 1.4053, "step": 8968 }, { "epoch": 0.5346286804148289, "grad_norm": 3.068756341934204, "learning_rate": 8.423838381922909e-05, "loss": 1.5542, "step": 8970 }, { "epoch": 0.534747884133985, "grad_norm": 3.2714664936065674, "learning_rate": 8.423149135986557e-05, "loss": 1.3957, "step": 8972 }, { "epoch": 0.534867087853141, "grad_norm": 2.9049220085144043, "learning_rate": 8.422459767592447e-05, "loss": 1.4735, "step": 8974 }, { "epoch": 0.534986291572297, "grad_norm": 2.8451898097991943, "learning_rate": 8.421770276765245e-05, "loss": 1.3587, "step": 8976 }, { "epoch": 0.5351054952914531, "grad_norm": 3.1118290424346924, "learning_rate": 8.421080663529608e-05, "loss": 1.4498, "step": 8978 }, { "epoch": 0.5352246990106091, "grad_norm": 3.0487060546875, "learning_rate": 8.420390927910213e-05, "loss": 1.3841, "step": 8980 }, { "epoch": 0.5353439027297652, "grad_norm": 2.9094343185424805, "learning_rate": 8.419701069931731e-05, "loss": 1.3051, "step": 8982 }, { "epoch": 0.5354631064489213, "grad_norm": 3.4131815433502197, "learning_rate": 8.419011089618842e-05, "loss": 1.65, "step": 8984 }, { "epoch": 0.5355823101680772, "grad_norm": 3.0092954635620117, "learning_rate": 8.418320986996229e-05, "loss": 1.4983, "step": 8986 }, { "epoch": 0.5357015138872333, "grad_norm": 3.3127710819244385, "learning_rate": 8.417630762088577e-05, "loss": 1.4031, "step": 8988 }, { "epoch": 0.5358207176063893, "grad_norm": 3.0275766849517822, "learning_rate": 8.41694041492058e-05, "loss": 1.3528, "step": 8990 }, { "epoch": 0.5359399213255454, "grad_norm": 2.8243134021759033, "learning_rate": 8.416249945516935e-05, "loss": 1.2619, "step": 8992 }, { "epoch": 0.5360591250447014, "grad_norm": 3.1822903156280518, "learning_rate": 8.415559353902339e-05, "loss": 1.4011, "step": 8994 }, { "epoch": 0.5361783287638574, "grad_norm": 3.12730073928833, "learning_rate": 8.414868640101499e-05, "loss": 1.3555, "step": 8996 }, { "epoch": 0.5362975324830135, "grad_norm": 3.0105860233306885, "learning_rate": 8.414177804139122e-05, "loss": 1.4202, "step": 8998 }, { "epoch": 0.5364167362021695, "grad_norm": 3.2191970348358154, "learning_rate": 8.413486846039926e-05, "loss": 1.2437, "step": 9000 }, { "epoch": 0.5365359399213255, "grad_norm": 3.4711904525756836, "learning_rate": 8.412795765828627e-05, "loss": 1.45, "step": 9002 }, { "epoch": 0.5366551436404816, "grad_norm": 3.0877695083618164, "learning_rate": 8.412104563529946e-05, "loss": 1.284, "step": 9004 }, { "epoch": 0.5367743473596376, "grad_norm": 3.1792705059051514, "learning_rate": 8.411413239168609e-05, "loss": 1.3688, "step": 9006 }, { "epoch": 0.5368935510787937, "grad_norm": 3.2621116638183594, "learning_rate": 8.41072179276935e-05, "loss": 1.5621, "step": 9008 }, { "epoch": 0.5370127547979497, "grad_norm": 2.8454911708831787, "learning_rate": 8.410030224356902e-05, "loss": 1.3145, "step": 9010 }, { "epoch": 0.5371319585171057, "grad_norm": 3.081512212753296, "learning_rate": 8.409338533956005e-05, "loss": 1.6354, "step": 9012 }, { "epoch": 0.5372511622362618, "grad_norm": 2.9488518238067627, "learning_rate": 8.408646721591405e-05, "loss": 1.28, "step": 9014 }, { "epoch": 0.5373703659554178, "grad_norm": 2.9905412197113037, "learning_rate": 8.407954787287849e-05, "loss": 1.507, "step": 9016 }, { "epoch": 0.5374895696745738, "grad_norm": 3.321016788482666, "learning_rate": 8.407262731070089e-05, "loss": 1.3936, "step": 9018 }, { "epoch": 0.5376087733937299, "grad_norm": 3.169191837310791, "learning_rate": 8.406570552962885e-05, "loss": 1.3721, "step": 9020 }, { "epoch": 0.5377279771128859, "grad_norm": 2.7360117435455322, "learning_rate": 8.405878252990996e-05, "loss": 1.3527, "step": 9022 }, { "epoch": 0.537847180832042, "grad_norm": 3.3947689533233643, "learning_rate": 8.405185831179188e-05, "loss": 1.6975, "step": 9024 }, { "epoch": 0.537966384551198, "grad_norm": 3.2267704010009766, "learning_rate": 8.404493287552232e-05, "loss": 1.2935, "step": 9026 }, { "epoch": 0.538085588270354, "grad_norm": 2.8881850242614746, "learning_rate": 8.403800622134904e-05, "loss": 1.3748, "step": 9028 }, { "epoch": 0.5382047919895101, "grad_norm": 2.96982741355896, "learning_rate": 8.40310783495198e-05, "loss": 1.2207, "step": 9030 }, { "epoch": 0.5383239957086661, "grad_norm": 2.6553163528442383, "learning_rate": 8.402414926028249e-05, "loss": 1.2803, "step": 9032 }, { "epoch": 0.5384431994278222, "grad_norm": 3.1701486110687256, "learning_rate": 8.401721895388491e-05, "loss": 1.4457, "step": 9034 }, { "epoch": 0.5385624031469782, "grad_norm": 2.90456485748291, "learning_rate": 8.401028743057503e-05, "loss": 1.5531, "step": 9036 }, { "epoch": 0.5386816068661342, "grad_norm": 3.1665358543395996, "learning_rate": 8.400335469060081e-05, "loss": 1.3554, "step": 9038 }, { "epoch": 0.5388008105852903, "grad_norm": 3.0864884853363037, "learning_rate": 8.399642073421025e-05, "loss": 1.4527, "step": 9040 }, { "epoch": 0.5389200143044462, "grad_norm": 3.0853545665740967, "learning_rate": 8.398948556165139e-05, "loss": 1.3841, "step": 9042 }, { "epoch": 0.5390392180236023, "grad_norm": 3.159863233566284, "learning_rate": 8.398254917317235e-05, "loss": 1.2992, "step": 9044 }, { "epoch": 0.5391584217427584, "grad_norm": 3.298377513885498, "learning_rate": 8.397561156902125e-05, "loss": 1.3555, "step": 9046 }, { "epoch": 0.5392776254619144, "grad_norm": 3.15712308883667, "learning_rate": 8.396867274944629e-05, "loss": 1.408, "step": 9048 }, { "epoch": 0.5393968291810705, "grad_norm": 2.933161735534668, "learning_rate": 8.396173271469567e-05, "loss": 1.435, "step": 9050 }, { "epoch": 0.5395160329002265, "grad_norm": 3.1018691062927246, "learning_rate": 8.395479146501767e-05, "loss": 1.4613, "step": 9052 }, { "epoch": 0.5396352366193825, "grad_norm": 2.9559326171875, "learning_rate": 8.394784900066061e-05, "loss": 1.3539, "step": 9054 }, { "epoch": 0.5397544403385386, "grad_norm": 2.8019814491271973, "learning_rate": 8.394090532187285e-05, "loss": 1.273, "step": 9056 }, { "epoch": 0.5398736440576946, "grad_norm": 3.0567469596862793, "learning_rate": 8.393396042890277e-05, "loss": 1.4175, "step": 9058 }, { "epoch": 0.5399928477768506, "grad_norm": 3.1939754486083984, "learning_rate": 8.392701432199883e-05, "loss": 1.5, "step": 9060 }, { "epoch": 0.5401120514960067, "grad_norm": 3.1172289848327637, "learning_rate": 8.392006700140949e-05, "loss": 1.291, "step": 9062 }, { "epoch": 0.5402312552151627, "grad_norm": 3.249420642852783, "learning_rate": 8.391311846738332e-05, "loss": 1.4841, "step": 9064 }, { "epoch": 0.5403504589343188, "grad_norm": 3.039212226867676, "learning_rate": 8.390616872016885e-05, "loss": 1.3282, "step": 9066 }, { "epoch": 0.5404696626534747, "grad_norm": 3.4569315910339355, "learning_rate": 8.389921776001472e-05, "loss": 1.5279, "step": 9068 }, { "epoch": 0.5405888663726308, "grad_norm": 2.930856704711914, "learning_rate": 8.389226558716958e-05, "loss": 1.4167, "step": 9070 }, { "epoch": 0.5407080700917869, "grad_norm": 2.7225308418273926, "learning_rate": 8.388531220188216e-05, "loss": 1.3125, "step": 9072 }, { "epoch": 0.5408272738109429, "grad_norm": 3.150175094604492, "learning_rate": 8.387835760440117e-05, "loss": 1.5021, "step": 9074 }, { "epoch": 0.540946477530099, "grad_norm": 3.1198389530181885, "learning_rate": 8.387140179497541e-05, "loss": 1.4905, "step": 9076 }, { "epoch": 0.541065681249255, "grad_norm": 2.9781336784362793, "learning_rate": 8.386444477385373e-05, "loss": 1.4378, "step": 9078 }, { "epoch": 0.541184884968411, "grad_norm": 3.0779285430908203, "learning_rate": 8.3857486541285e-05, "loss": 1.3598, "step": 9080 }, { "epoch": 0.5413040886875671, "grad_norm": 3.0328831672668457, "learning_rate": 8.385052709751812e-05, "loss": 1.4109, "step": 9082 }, { "epoch": 0.541423292406723, "grad_norm": 3.1709702014923096, "learning_rate": 8.384356644280206e-05, "loss": 1.5105, "step": 9084 }, { "epoch": 0.5415424961258791, "grad_norm": 3.0608386993408203, "learning_rate": 8.383660457738585e-05, "loss": 1.3774, "step": 9086 }, { "epoch": 0.5416616998450352, "grad_norm": 3.026323080062866, "learning_rate": 8.382964150151852e-05, "loss": 1.4409, "step": 9088 }, { "epoch": 0.5417809035641912, "grad_norm": 3.308593511581421, "learning_rate": 8.382267721544916e-05, "loss": 1.4003, "step": 9090 }, { "epoch": 0.5419001072833473, "grad_norm": 3.60892653465271, "learning_rate": 8.381571171942692e-05, "loss": 1.5683, "step": 9092 }, { "epoch": 0.5420193110025032, "grad_norm": 2.9505276679992676, "learning_rate": 8.380874501370097e-05, "loss": 1.3476, "step": 9094 }, { "epoch": 0.5421385147216593, "grad_norm": 3.0227415561676025, "learning_rate": 8.380177709852055e-05, "loss": 1.261, "step": 9096 }, { "epoch": 0.5422577184408154, "grad_norm": 3.2831246852874756, "learning_rate": 8.37948079741349e-05, "loss": 1.588, "step": 9098 }, { "epoch": 0.5423769221599714, "grad_norm": 3.1869072914123535, "learning_rate": 8.378783764079333e-05, "loss": 1.3091, "step": 9100 }, { "epoch": 0.5424961258791274, "grad_norm": 3.125098466873169, "learning_rate": 8.378086609874521e-05, "loss": 1.4121, "step": 9102 }, { "epoch": 0.5426153295982835, "grad_norm": 3.162031888961792, "learning_rate": 8.377389334823993e-05, "loss": 1.4718, "step": 9104 }, { "epoch": 0.5427345333174395, "grad_norm": 3.111966371536255, "learning_rate": 8.376691938952694e-05, "loss": 1.3375, "step": 9106 }, { "epoch": 0.5428537370365956, "grad_norm": 3.0201587677001953, "learning_rate": 8.37599442228557e-05, "loss": 1.4304, "step": 9108 }, { "epoch": 0.5429729407557515, "grad_norm": 3.1464171409606934, "learning_rate": 8.375296784847576e-05, "loss": 1.452, "step": 9110 }, { "epoch": 0.5430921444749076, "grad_norm": 3.300222158432007, "learning_rate": 8.374599026663665e-05, "loss": 1.5558, "step": 9112 }, { "epoch": 0.5432113481940637, "grad_norm": 3.1377172470092773, "learning_rate": 8.373901147758802e-05, "loss": 1.4369, "step": 9114 }, { "epoch": 0.5433305519132197, "grad_norm": 3.1540467739105225, "learning_rate": 8.373203148157953e-05, "loss": 1.404, "step": 9116 }, { "epoch": 0.5434497556323757, "grad_norm": 3.292686700820923, "learning_rate": 8.372505027886084e-05, "loss": 1.3728, "step": 9118 }, { "epoch": 0.5435689593515318, "grad_norm": 2.7852859497070312, "learning_rate": 8.371806786968172e-05, "loss": 1.4563, "step": 9120 }, { "epoch": 0.5436881630706878, "grad_norm": 2.8441860675811768, "learning_rate": 8.371108425429194e-05, "loss": 1.3958, "step": 9122 }, { "epoch": 0.5438073667898439, "grad_norm": 2.7629597187042236, "learning_rate": 8.370409943294136e-05, "loss": 1.3648, "step": 9124 }, { "epoch": 0.5439265705089998, "grad_norm": 3.1157889366149902, "learning_rate": 8.36971134058798e-05, "loss": 1.4308, "step": 9126 }, { "epoch": 0.5440457742281559, "grad_norm": 2.779745101928711, "learning_rate": 8.36901261733572e-05, "loss": 1.3479, "step": 9128 }, { "epoch": 0.544164977947312, "grad_norm": 3.3449997901916504, "learning_rate": 8.368313773562353e-05, "loss": 1.458, "step": 9130 }, { "epoch": 0.544284181666468, "grad_norm": 3.3515422344207764, "learning_rate": 8.367614809292877e-05, "loss": 1.2127, "step": 9132 }, { "epoch": 0.544403385385624, "grad_norm": 2.9929697513580322, "learning_rate": 8.366915724552297e-05, "loss": 1.3917, "step": 9134 }, { "epoch": 0.54452258910478, "grad_norm": 2.9837796688079834, "learning_rate": 8.366216519365621e-05, "loss": 1.5068, "step": 9136 }, { "epoch": 0.5446417928239361, "grad_norm": 2.7105915546417236, "learning_rate": 8.365517193757865e-05, "loss": 1.2914, "step": 9138 }, { "epoch": 0.5447609965430922, "grad_norm": 2.873209238052368, "learning_rate": 8.364817747754042e-05, "loss": 1.4148, "step": 9140 }, { "epoch": 0.5448802002622481, "grad_norm": 3.2198116779327393, "learning_rate": 8.364118181379177e-05, "loss": 1.3663, "step": 9142 }, { "epoch": 0.5449994039814042, "grad_norm": 3.1277153491973877, "learning_rate": 8.363418494658293e-05, "loss": 1.5288, "step": 9144 }, { "epoch": 0.5451186077005603, "grad_norm": 2.856245756149292, "learning_rate": 8.362718687616422e-05, "loss": 1.3897, "step": 9146 }, { "epoch": 0.5452378114197163, "grad_norm": 3.006182909011841, "learning_rate": 8.3620187602786e-05, "loss": 1.407, "step": 9148 }, { "epoch": 0.5453570151388724, "grad_norm": 3.313291072845459, "learning_rate": 8.361318712669862e-05, "loss": 1.4255, "step": 9150 }, { "epoch": 0.5454762188580283, "grad_norm": 3.2493159770965576, "learning_rate": 8.360618544815253e-05, "loss": 1.4729, "step": 9152 }, { "epoch": 0.5455954225771844, "grad_norm": 3.133206605911255, "learning_rate": 8.359918256739821e-05, "loss": 1.2855, "step": 9154 }, { "epoch": 0.5457146262963405, "grad_norm": 2.8218917846679688, "learning_rate": 8.359217848468617e-05, "loss": 1.3012, "step": 9156 }, { "epoch": 0.5458338300154965, "grad_norm": 2.918001174926758, "learning_rate": 8.358517320026697e-05, "loss": 1.4847, "step": 9158 }, { "epoch": 0.5459530337346525, "grad_norm": 3.0330214500427246, "learning_rate": 8.357816671439121e-05, "loss": 1.4661, "step": 9160 }, { "epoch": 0.5460722374538085, "grad_norm": 3.004833221435547, "learning_rate": 8.357115902730955e-05, "loss": 1.5399, "step": 9162 }, { "epoch": 0.5461914411729646, "grad_norm": 3.250286340713501, "learning_rate": 8.356415013927265e-05, "loss": 1.4295, "step": 9164 }, { "epoch": 0.5463106448921207, "grad_norm": 3.1345255374908447, "learning_rate": 8.355714005053128e-05, "loss": 1.4069, "step": 9166 }, { "epoch": 0.5464298486112766, "grad_norm": 3.097844362258911, "learning_rate": 8.35501287613362e-05, "loss": 1.4048, "step": 9168 }, { "epoch": 0.5465490523304327, "grad_norm": 3.547527551651001, "learning_rate": 8.354311627193823e-05, "loss": 1.3684, "step": 9170 }, { "epoch": 0.5466682560495888, "grad_norm": 3.1737451553344727, "learning_rate": 8.35361025825882e-05, "loss": 1.372, "step": 9172 }, { "epoch": 0.5467874597687448, "grad_norm": 2.6747994422912598, "learning_rate": 8.352908769353705e-05, "loss": 1.3503, "step": 9174 }, { "epoch": 0.5469066634879008, "grad_norm": 2.9199576377868652, "learning_rate": 8.352207160503571e-05, "loss": 1.4427, "step": 9176 }, { "epoch": 0.5470258672070568, "grad_norm": 3.0851693153381348, "learning_rate": 8.351505431733518e-05, "loss": 1.4475, "step": 9178 }, { "epoch": 0.5471450709262129, "grad_norm": 3.3697688579559326, "learning_rate": 8.350803583068647e-05, "loss": 1.5353, "step": 9180 }, { "epoch": 0.547264274645369, "grad_norm": 2.7964024543762207, "learning_rate": 8.350101614534069e-05, "loss": 1.3985, "step": 9182 }, { "epoch": 0.547383478364525, "grad_norm": 3.279945135116577, "learning_rate": 8.349399526154894e-05, "loss": 1.5931, "step": 9184 }, { "epoch": 0.547502682083681, "grad_norm": 4.059196472167969, "learning_rate": 8.348697317956237e-05, "loss": 1.5648, "step": 9186 }, { "epoch": 0.547621885802837, "grad_norm": 3.638826847076416, "learning_rate": 8.34799498996322e-05, "loss": 1.3896, "step": 9188 }, { "epoch": 0.5477410895219931, "grad_norm": 3.1101908683776855, "learning_rate": 8.347292542200967e-05, "loss": 1.3747, "step": 9190 }, { "epoch": 0.5478602932411492, "grad_norm": 2.8422439098358154, "learning_rate": 8.346589974694607e-05, "loss": 1.4038, "step": 9192 }, { "epoch": 0.5479794969603051, "grad_norm": 3.1032400131225586, "learning_rate": 8.345887287469275e-05, "loss": 1.4192, "step": 9194 }, { "epoch": 0.5480987006794612, "grad_norm": 2.841683864593506, "learning_rate": 8.345184480550105e-05, "loss": 1.3126, "step": 9196 }, { "epoch": 0.5482179043986173, "grad_norm": 3.278046131134033, "learning_rate": 8.344481553962242e-05, "loss": 1.3971, "step": 9198 }, { "epoch": 0.5483371081177733, "grad_norm": 3.4172136783599854, "learning_rate": 8.34377850773083e-05, "loss": 1.4907, "step": 9200 }, { "epoch": 0.5484563118369293, "grad_norm": 2.984093189239502, "learning_rate": 8.343075341881019e-05, "loss": 1.412, "step": 9202 }, { "epoch": 0.5485755155560853, "grad_norm": 3.1046907901763916, "learning_rate": 8.342372056437967e-05, "loss": 1.3821, "step": 9204 }, { "epoch": 0.5486947192752414, "grad_norm": 3.706146478652954, "learning_rate": 8.34166865142683e-05, "loss": 1.385, "step": 9206 }, { "epoch": 0.5488139229943975, "grad_norm": 3.3030710220336914, "learning_rate": 8.340965126872771e-05, "loss": 1.374, "step": 9208 }, { "epoch": 0.5489331267135534, "grad_norm": 2.8742423057556152, "learning_rate": 8.34026148280096e-05, "loss": 1.2396, "step": 9210 }, { "epoch": 0.5490523304327095, "grad_norm": 3.107327699661255, "learning_rate": 8.339557719236566e-05, "loss": 1.4563, "step": 9212 }, { "epoch": 0.5491715341518656, "grad_norm": 3.0600857734680176, "learning_rate": 8.338853836204767e-05, "loss": 1.3979, "step": 9214 }, { "epoch": 0.5492907378710216, "grad_norm": 2.8841934204101562, "learning_rate": 8.338149833730742e-05, "loss": 1.3518, "step": 9216 }, { "epoch": 0.5494099415901776, "grad_norm": 3.193524122238159, "learning_rate": 8.337445711839676e-05, "loss": 1.3939, "step": 9218 }, { "epoch": 0.5495291453093336, "grad_norm": 2.9930148124694824, "learning_rate": 8.336741470556758e-05, "loss": 1.3832, "step": 9220 }, { "epoch": 0.5496483490284897, "grad_norm": 3.3656697273254395, "learning_rate": 8.336037109907182e-05, "loss": 1.4847, "step": 9222 }, { "epoch": 0.5497675527476458, "grad_norm": 3.329319477081299, "learning_rate": 8.335332629916142e-05, "loss": 1.2634, "step": 9224 }, { "epoch": 0.5498867564668017, "grad_norm": 2.944450855255127, "learning_rate": 8.334628030608844e-05, "loss": 1.4575, "step": 9226 }, { "epoch": 0.5500059601859578, "grad_norm": 3.2843832969665527, "learning_rate": 8.333923312010492e-05, "loss": 1.4676, "step": 9228 }, { "epoch": 0.5501251639051138, "grad_norm": 3.080631971359253, "learning_rate": 8.333218474146297e-05, "loss": 1.4516, "step": 9230 }, { "epoch": 0.5502443676242699, "grad_norm": 2.961078643798828, "learning_rate": 8.332513517041472e-05, "loss": 1.3028, "step": 9232 }, { "epoch": 0.550363571343426, "grad_norm": 3.216996192932129, "learning_rate": 8.331808440721236e-05, "loss": 1.401, "step": 9234 }, { "epoch": 0.5504827750625819, "grad_norm": 2.986347198486328, "learning_rate": 8.331103245210811e-05, "loss": 1.1994, "step": 9236 }, { "epoch": 0.550601978781738, "grad_norm": 3.1459720134735107, "learning_rate": 8.330397930535427e-05, "loss": 1.4981, "step": 9238 }, { "epoch": 0.5507211825008941, "grad_norm": 2.865727186203003, "learning_rate": 8.329692496720316e-05, "loss": 1.5939, "step": 9240 }, { "epoch": 0.55084038622005, "grad_norm": 2.7497010231018066, "learning_rate": 8.32898694379071e-05, "loss": 1.352, "step": 9242 }, { "epoch": 0.5509595899392061, "grad_norm": 2.932358741760254, "learning_rate": 8.328281271771852e-05, "loss": 1.3594, "step": 9244 }, { "epoch": 0.5510787936583621, "grad_norm": 3.0200228691101074, "learning_rate": 8.327575480688985e-05, "loss": 1.3068, "step": 9246 }, { "epoch": 0.5511979973775182, "grad_norm": 3.3579397201538086, "learning_rate": 8.326869570567357e-05, "loss": 1.3265, "step": 9248 }, { "epoch": 0.5513172010966743, "grad_norm": 2.8973865509033203, "learning_rate": 8.326163541432222e-05, "loss": 1.3113, "step": 9250 }, { "epoch": 0.5514364048158302, "grad_norm": 3.3253397941589355, "learning_rate": 8.325457393308838e-05, "loss": 1.4566, "step": 9252 }, { "epoch": 0.5515556085349863, "grad_norm": 3.7445316314697266, "learning_rate": 8.324751126222462e-05, "loss": 1.4641, "step": 9254 }, { "epoch": 0.5516748122541423, "grad_norm": 3.0305440425872803, "learning_rate": 8.324044740198366e-05, "loss": 1.3292, "step": 9256 }, { "epoch": 0.5517940159732984, "grad_norm": 2.7162694931030273, "learning_rate": 8.323338235261815e-05, "loss": 1.2429, "step": 9258 }, { "epoch": 0.5519132196924544, "grad_norm": 3.075139284133911, "learning_rate": 8.322631611438085e-05, "loss": 1.4144, "step": 9260 }, { "epoch": 0.5520324234116104, "grad_norm": 3.2647624015808105, "learning_rate": 8.321924868752456e-05, "loss": 1.5307, "step": 9262 }, { "epoch": 0.5521516271307665, "grad_norm": 2.5697784423828125, "learning_rate": 8.321218007230204e-05, "loss": 1.2711, "step": 9264 }, { "epoch": 0.5522708308499226, "grad_norm": 2.9217634201049805, "learning_rate": 8.320511026896624e-05, "loss": 1.3376, "step": 9266 }, { "epoch": 0.5523900345690785, "grad_norm": 3.167191982269287, "learning_rate": 8.319803927777004e-05, "loss": 1.4163, "step": 9268 }, { "epoch": 0.5525092382882346, "grad_norm": 3.4097957611083984, "learning_rate": 8.319096709896636e-05, "loss": 1.4251, "step": 9270 }, { "epoch": 0.5526284420073906, "grad_norm": 3.25508713722229, "learning_rate": 8.318389373280824e-05, "loss": 1.3965, "step": 9272 }, { "epoch": 0.5527476457265467, "grad_norm": 3.20245623588562, "learning_rate": 8.317681917954872e-05, "loss": 1.3956, "step": 9274 }, { "epoch": 0.5528668494457027, "grad_norm": 3.4352126121520996, "learning_rate": 8.316974343944084e-05, "loss": 1.3044, "step": 9276 }, { "epoch": 0.5529860531648587, "grad_norm": 3.158440589904785, "learning_rate": 8.316266651273776e-05, "loss": 1.5418, "step": 9278 }, { "epoch": 0.5531052568840148, "grad_norm": 2.819467544555664, "learning_rate": 8.315558839969264e-05, "loss": 1.3406, "step": 9280 }, { "epoch": 0.5532244606031708, "grad_norm": 2.7980451583862305, "learning_rate": 8.314850910055866e-05, "loss": 1.4448, "step": 9282 }, { "epoch": 0.5533436643223268, "grad_norm": 3.47700834274292, "learning_rate": 8.314142861558912e-05, "loss": 1.4134, "step": 9284 }, { "epoch": 0.5534628680414829, "grad_norm": 3.192470073699951, "learning_rate": 8.313434694503727e-05, "loss": 1.6583, "step": 9286 }, { "epoch": 0.5535820717606389, "grad_norm": 3.0726993083953857, "learning_rate": 8.312726408915646e-05, "loss": 1.3147, "step": 9288 }, { "epoch": 0.553701275479795, "grad_norm": 2.974092483520508, "learning_rate": 8.312018004820008e-05, "loss": 1.3577, "step": 9290 }, { "epoch": 0.553820479198951, "grad_norm": 2.7609100341796875, "learning_rate": 8.311309482242154e-05, "loss": 1.3413, "step": 9292 }, { "epoch": 0.553939682918107, "grad_norm": 3.15144681930542, "learning_rate": 8.31060084120743e-05, "loss": 1.5142, "step": 9294 }, { "epoch": 0.5540588866372631, "grad_norm": 3.329650402069092, "learning_rate": 8.309892081741186e-05, "loss": 1.5941, "step": 9296 }, { "epoch": 0.5541780903564191, "grad_norm": 2.8904526233673096, "learning_rate": 8.309183203868778e-05, "loss": 1.2571, "step": 9298 }, { "epoch": 0.5542972940755752, "grad_norm": 3.404552698135376, "learning_rate": 8.308474207615564e-05, "loss": 1.5305, "step": 9300 }, { "epoch": 0.5544164977947312, "grad_norm": 2.913668394088745, "learning_rate": 8.30776509300691e-05, "loss": 1.287, "step": 9302 }, { "epoch": 0.5545357015138872, "grad_norm": 3.2836856842041016, "learning_rate": 8.30705586006818e-05, "loss": 1.4509, "step": 9304 }, { "epoch": 0.5546549052330433, "grad_norm": 2.8385040760040283, "learning_rate": 8.306346508824746e-05, "loss": 1.3269, "step": 9306 }, { "epoch": 0.5547741089521994, "grad_norm": 3.3464620113372803, "learning_rate": 8.305637039301986e-05, "loss": 1.5467, "step": 9308 }, { "epoch": 0.5548933126713553, "grad_norm": 3.1301801204681396, "learning_rate": 8.304927451525277e-05, "loss": 1.5204, "step": 9310 }, { "epoch": 0.5550125163905114, "grad_norm": 3.181194305419922, "learning_rate": 8.304217745520008e-05, "loss": 1.3976, "step": 9312 }, { "epoch": 0.5551317201096674, "grad_norm": 2.9244725704193115, "learning_rate": 8.303507921311563e-05, "loss": 1.3797, "step": 9314 }, { "epoch": 0.5552509238288235, "grad_norm": 2.977588415145874, "learning_rate": 8.302797978925338e-05, "loss": 1.4037, "step": 9316 }, { "epoch": 0.5553701275479795, "grad_norm": 3.2960476875305176, "learning_rate": 8.302087918386728e-05, "loss": 1.3594, "step": 9318 }, { "epoch": 0.5554893312671355, "grad_norm": 2.8158276081085205, "learning_rate": 8.301377739721137e-05, "loss": 1.3798, "step": 9320 }, { "epoch": 0.5556085349862916, "grad_norm": 3.2496511936187744, "learning_rate": 8.300667442953967e-05, "loss": 1.421, "step": 9322 }, { "epoch": 0.5557277387054476, "grad_norm": 2.925330877304077, "learning_rate": 8.29995702811063e-05, "loss": 1.447, "step": 9324 }, { "epoch": 0.5558469424246036, "grad_norm": 3.0630390644073486, "learning_rate": 8.29924649521654e-05, "loss": 1.4271, "step": 9326 }, { "epoch": 0.5559661461437597, "grad_norm": 3.2246196269989014, "learning_rate": 8.298535844297112e-05, "loss": 1.3806, "step": 9328 }, { "epoch": 0.5560853498629157, "grad_norm": 3.4236440658569336, "learning_rate": 8.297825075377774e-05, "loss": 1.4104, "step": 9330 }, { "epoch": 0.5562045535820718, "grad_norm": 3.104865312576294, "learning_rate": 8.297114188483949e-05, "loss": 1.311, "step": 9332 }, { "epoch": 0.5563237573012279, "grad_norm": 2.7538938522338867, "learning_rate": 8.296403183641068e-05, "loss": 1.2917, "step": 9334 }, { "epoch": 0.5564429610203838, "grad_norm": 3.0760910511016846, "learning_rate": 8.295692060874567e-05, "loss": 1.5277, "step": 9336 }, { "epoch": 0.5565621647395399, "grad_norm": 3.2573423385620117, "learning_rate": 8.294980820209884e-05, "loss": 1.5225, "step": 9338 }, { "epoch": 0.5566813684586959, "grad_norm": 3.0434188842773438, "learning_rate": 8.294269461672465e-05, "loss": 1.3168, "step": 9340 }, { "epoch": 0.556800572177852, "grad_norm": 3.138303518295288, "learning_rate": 8.293557985287754e-05, "loss": 1.458, "step": 9342 }, { "epoch": 0.556919775897008, "grad_norm": 3.0319578647613525, "learning_rate": 8.292846391081207e-05, "loss": 1.3429, "step": 9344 }, { "epoch": 0.557038979616164, "grad_norm": 2.814462661743164, "learning_rate": 8.292134679078278e-05, "loss": 1.3677, "step": 9346 }, { "epoch": 0.5571581833353201, "grad_norm": 2.964353084564209, "learning_rate": 8.291422849304427e-05, "loss": 1.4517, "step": 9348 }, { "epoch": 0.557277387054476, "grad_norm": 2.9790420532226562, "learning_rate": 8.29071090178512e-05, "loss": 1.3671, "step": 9350 }, { "epoch": 0.5573965907736321, "grad_norm": 2.8542771339416504, "learning_rate": 8.289998836545824e-05, "loss": 1.3161, "step": 9352 }, { "epoch": 0.5575157944927882, "grad_norm": 3.0611093044281006, "learning_rate": 8.289286653612011e-05, "loss": 1.4298, "step": 9354 }, { "epoch": 0.5576349982119442, "grad_norm": 3.0424818992614746, "learning_rate": 8.288574353009164e-05, "loss": 1.3933, "step": 9356 }, { "epoch": 0.5577542019311003, "grad_norm": 3.062162399291992, "learning_rate": 8.28786193476276e-05, "loss": 1.4687, "step": 9358 }, { "epoch": 0.5578734056502563, "grad_norm": 3.210543632507324, "learning_rate": 8.287149398898284e-05, "loss": 1.3612, "step": 9360 }, { "epoch": 0.5579926093694123, "grad_norm": 3.195469856262207, "learning_rate": 8.286436745441227e-05, "loss": 1.3175, "step": 9362 }, { "epoch": 0.5581118130885684, "grad_norm": 3.0763373374938965, "learning_rate": 8.285723974417084e-05, "loss": 1.436, "step": 9364 }, { "epoch": 0.5582310168077244, "grad_norm": 2.961385488510132, "learning_rate": 8.285011085851353e-05, "loss": 1.4987, "step": 9366 }, { "epoch": 0.5583502205268804, "grad_norm": 2.9778828620910645, "learning_rate": 8.284298079769534e-05, "loss": 1.4164, "step": 9368 }, { "epoch": 0.5584694242460365, "grad_norm": 2.986323118209839, "learning_rate": 8.283584956197137e-05, "loss": 1.3787, "step": 9370 }, { "epoch": 0.5585886279651925, "grad_norm": 3.189087152481079, "learning_rate": 8.282871715159672e-05, "loss": 1.4657, "step": 9372 }, { "epoch": 0.5587078316843486, "grad_norm": 3.11084246635437, "learning_rate": 8.282158356682652e-05, "loss": 1.3472, "step": 9374 }, { "epoch": 0.5588270354035045, "grad_norm": 3.097607374191284, "learning_rate": 8.281444880791599e-05, "loss": 1.6149, "step": 9376 }, { "epoch": 0.5589462391226606, "grad_norm": 3.2332558631896973, "learning_rate": 8.280731287512035e-05, "loss": 1.5895, "step": 9378 }, { "epoch": 0.5590654428418167, "grad_norm": 3.030198574066162, "learning_rate": 8.280017576869488e-05, "loss": 1.3142, "step": 9380 }, { "epoch": 0.5591846465609727, "grad_norm": 3.3612730503082275, "learning_rate": 8.27930374888949e-05, "loss": 1.3801, "step": 9382 }, { "epoch": 0.5593038502801287, "grad_norm": 3.2333571910858154, "learning_rate": 8.278589803597576e-05, "loss": 1.4263, "step": 9384 }, { "epoch": 0.5594230539992848, "grad_norm": 3.156280279159546, "learning_rate": 8.277875741019287e-05, "loss": 1.3817, "step": 9386 }, { "epoch": 0.5595422577184408, "grad_norm": 2.950395345687866, "learning_rate": 8.277161561180169e-05, "loss": 1.3498, "step": 9388 }, { "epoch": 0.5596614614375969, "grad_norm": 3.0694475173950195, "learning_rate": 8.276447264105768e-05, "loss": 1.3355, "step": 9390 }, { "epoch": 0.5597806651567528, "grad_norm": 3.0540261268615723, "learning_rate": 8.275732849821637e-05, "loss": 1.2961, "step": 9392 }, { "epoch": 0.5598998688759089, "grad_norm": 2.918633222579956, "learning_rate": 8.275018318353338e-05, "loss": 1.3533, "step": 9394 }, { "epoch": 0.560019072595065, "grad_norm": 3.197822332382202, "learning_rate": 8.274303669726426e-05, "loss": 1.2667, "step": 9396 }, { "epoch": 0.560138276314221, "grad_norm": 2.8894872665405273, "learning_rate": 8.27358890396647e-05, "loss": 1.4321, "step": 9398 }, { "epoch": 0.560257480033377, "grad_norm": 2.9784622192382812, "learning_rate": 8.272874021099036e-05, "loss": 1.4519, "step": 9400 }, { "epoch": 0.5603766837525331, "grad_norm": 3.3344786167144775, "learning_rate": 8.272159021149701e-05, "loss": 1.566, "step": 9402 }, { "epoch": 0.5604958874716891, "grad_norm": 2.992908000946045, "learning_rate": 8.271443904144045e-05, "loss": 1.4079, "step": 9404 }, { "epoch": 0.5606150911908452, "grad_norm": 2.8044393062591553, "learning_rate": 8.270728670107643e-05, "loss": 1.3281, "step": 9406 }, { "epoch": 0.5607342949100012, "grad_norm": 3.0260393619537354, "learning_rate": 8.27001331906609e-05, "loss": 1.3755, "step": 9408 }, { "epoch": 0.5608534986291572, "grad_norm": 3.4052846431732178, "learning_rate": 8.26929785104497e-05, "loss": 1.3653, "step": 9410 }, { "epoch": 0.5609727023483133, "grad_norm": 3.4522881507873535, "learning_rate": 8.268582266069881e-05, "loss": 1.563, "step": 9412 }, { "epoch": 0.5610919060674693, "grad_norm": 2.870565414428711, "learning_rate": 8.26786656416642e-05, "loss": 1.3492, "step": 9414 }, { "epoch": 0.5612111097866254, "grad_norm": 3.170363426208496, "learning_rate": 8.267150745360192e-05, "loss": 1.3675, "step": 9416 }, { "epoch": 0.5613303135057813, "grad_norm": 3.0429797172546387, "learning_rate": 8.266434809676802e-05, "loss": 1.4554, "step": 9418 }, { "epoch": 0.5614495172249374, "grad_norm": 3.087540626525879, "learning_rate": 8.265718757141865e-05, "loss": 1.5172, "step": 9420 }, { "epoch": 0.5615687209440935, "grad_norm": 3.3540561199188232, "learning_rate": 8.265002587780993e-05, "loss": 1.4723, "step": 9422 }, { "epoch": 0.5616879246632495, "grad_norm": 3.0236377716064453, "learning_rate": 8.264286301619808e-05, "loss": 1.3499, "step": 9424 }, { "epoch": 0.5618071283824055, "grad_norm": 3.025256633758545, "learning_rate": 8.263569898683934e-05, "loss": 1.4966, "step": 9426 }, { "epoch": 0.5619263321015616, "grad_norm": 3.5381171703338623, "learning_rate": 8.262853378998999e-05, "loss": 1.3714, "step": 9428 }, { "epoch": 0.5620455358207176, "grad_norm": 2.8093080520629883, "learning_rate": 8.262136742590632e-05, "loss": 1.4311, "step": 9430 }, { "epoch": 0.5621647395398737, "grad_norm": 3.3325631618499756, "learning_rate": 8.261419989484474e-05, "loss": 1.4435, "step": 9432 }, { "epoch": 0.5622839432590296, "grad_norm": 3.3689985275268555, "learning_rate": 8.260703119706165e-05, "loss": 1.411, "step": 9434 }, { "epoch": 0.5624031469781857, "grad_norm": 3.099550247192383, "learning_rate": 8.259986133281348e-05, "loss": 1.4448, "step": 9436 }, { "epoch": 0.5625223506973418, "grad_norm": 2.9891469478607178, "learning_rate": 8.259269030235672e-05, "loss": 1.1923, "step": 9438 }, { "epoch": 0.5626415544164978, "grad_norm": 3.114739418029785, "learning_rate": 8.258551810594795e-05, "loss": 1.5021, "step": 9440 }, { "epoch": 0.5627607581356538, "grad_norm": 3.1488893032073975, "learning_rate": 8.257834474384367e-05, "loss": 1.326, "step": 9442 }, { "epoch": 0.5628799618548098, "grad_norm": 3.268359899520874, "learning_rate": 8.257117021630055e-05, "loss": 1.4356, "step": 9444 }, { "epoch": 0.5629991655739659, "grad_norm": 3.323416233062744, "learning_rate": 8.256399452357523e-05, "loss": 1.3609, "step": 9446 }, { "epoch": 0.563118369293122, "grad_norm": 3.1035706996917725, "learning_rate": 8.25568176659244e-05, "loss": 1.5258, "step": 9448 }, { "epoch": 0.563237573012278, "grad_norm": 3.0998551845550537, "learning_rate": 8.254963964360483e-05, "loss": 1.6082, "step": 9450 }, { "epoch": 0.563356776731434, "grad_norm": 2.690319299697876, "learning_rate": 8.254246045687326e-05, "loss": 1.2387, "step": 9452 }, { "epoch": 0.5634759804505901, "grad_norm": 3.2586374282836914, "learning_rate": 8.253528010598655e-05, "loss": 1.5114, "step": 9454 }, { "epoch": 0.5635951841697461, "grad_norm": 3.487576484680176, "learning_rate": 8.252809859120155e-05, "loss": 1.4947, "step": 9456 }, { "epoch": 0.5637143878889022, "grad_norm": 2.7470998764038086, "learning_rate": 8.252091591277515e-05, "loss": 1.3884, "step": 9458 }, { "epoch": 0.5638335916080581, "grad_norm": 2.9609909057617188, "learning_rate": 8.251373207096434e-05, "loss": 1.3039, "step": 9460 }, { "epoch": 0.5639527953272142, "grad_norm": 3.307243585586548, "learning_rate": 8.250654706602609e-05, "loss": 1.3651, "step": 9462 }, { "epoch": 0.5640719990463703, "grad_norm": 3.066185235977173, "learning_rate": 8.249936089821742e-05, "loss": 1.438, "step": 9464 }, { "epoch": 0.5641912027655263, "grad_norm": 3.1443774700164795, "learning_rate": 8.249217356779544e-05, "loss": 1.4139, "step": 9466 }, { "epoch": 0.5643104064846823, "grad_norm": 3.386497974395752, "learning_rate": 8.248498507501721e-05, "loss": 1.5996, "step": 9468 }, { "epoch": 0.5644296102038383, "grad_norm": 2.4486660957336426, "learning_rate": 8.247779542013994e-05, "loss": 1.3193, "step": 9470 }, { "epoch": 0.5645488139229944, "grad_norm": 3.0494191646575928, "learning_rate": 8.24706046034208e-05, "loss": 1.3737, "step": 9472 }, { "epoch": 0.5646680176421505, "grad_norm": 2.9972195625305176, "learning_rate": 8.246341262511703e-05, "loss": 1.3165, "step": 9474 }, { "epoch": 0.5647872213613064, "grad_norm": 2.8888988494873047, "learning_rate": 8.245621948548592e-05, "loss": 1.4697, "step": 9476 }, { "epoch": 0.5649064250804625, "grad_norm": 3.119257926940918, "learning_rate": 8.24490251847848e-05, "loss": 1.407, "step": 9478 }, { "epoch": 0.5650256287996186, "grad_norm": 3.368350028991699, "learning_rate": 8.244182972327101e-05, "loss": 1.5717, "step": 9480 }, { "epoch": 0.5651448325187746, "grad_norm": 3.045966625213623, "learning_rate": 8.243463310120199e-05, "loss": 1.369, "step": 9482 }, { "epoch": 0.5652640362379306, "grad_norm": 2.7769601345062256, "learning_rate": 8.242743531883514e-05, "loss": 1.4084, "step": 9484 }, { "epoch": 0.5653832399570866, "grad_norm": 2.9329757690429688, "learning_rate": 8.242023637642802e-05, "loss": 1.4285, "step": 9486 }, { "epoch": 0.5655024436762427, "grad_norm": 3.2599027156829834, "learning_rate": 8.24130362742381e-05, "loss": 1.3574, "step": 9488 }, { "epoch": 0.5656216473953988, "grad_norm": 3.0210955142974854, "learning_rate": 8.240583501252297e-05, "loss": 1.3286, "step": 9490 }, { "epoch": 0.5657408511145547, "grad_norm": 2.9712417125701904, "learning_rate": 8.239863259154025e-05, "loss": 1.4145, "step": 9492 }, { "epoch": 0.5658600548337108, "grad_norm": 3.3387868404388428, "learning_rate": 8.239142901154759e-05, "loss": 1.4461, "step": 9494 }, { "epoch": 0.5659792585528669, "grad_norm": 3.0744879245758057, "learning_rate": 8.238422427280268e-05, "loss": 1.3679, "step": 9496 }, { "epoch": 0.5660984622720229, "grad_norm": 3.247741460800171, "learning_rate": 8.237701837556329e-05, "loss": 1.5729, "step": 9498 }, { "epoch": 0.566217665991179, "grad_norm": 3.1395018100738525, "learning_rate": 8.236981132008716e-05, "loss": 1.3957, "step": 9500 }, { "epoch": 0.5663368697103349, "grad_norm": 2.8859307765960693, "learning_rate": 8.236260310663214e-05, "loss": 1.3416, "step": 9502 }, { "epoch": 0.566456073429491, "grad_norm": 2.853565216064453, "learning_rate": 8.235539373545606e-05, "loss": 1.4167, "step": 9504 }, { "epoch": 0.5665752771486471, "grad_norm": 3.04508376121521, "learning_rate": 8.234818320681684e-05, "loss": 1.3928, "step": 9506 }, { "epoch": 0.566694480867803, "grad_norm": 3.1971211433410645, "learning_rate": 8.234097152097245e-05, "loss": 1.4715, "step": 9508 }, { "epoch": 0.5668136845869591, "grad_norm": 3.101461887359619, "learning_rate": 8.233375867818084e-05, "loss": 1.528, "step": 9510 }, { "epoch": 0.5669328883061151, "grad_norm": 2.964548110961914, "learning_rate": 8.232654467870005e-05, "loss": 1.4016, "step": 9512 }, { "epoch": 0.5670520920252712, "grad_norm": 2.8691248893737793, "learning_rate": 8.231932952278818e-05, "loss": 1.4702, "step": 9514 }, { "epoch": 0.5671712957444273, "grad_norm": 2.8234589099884033, "learning_rate": 8.231211321070328e-05, "loss": 1.3503, "step": 9516 }, { "epoch": 0.5672904994635832, "grad_norm": 2.985288619995117, "learning_rate": 8.230489574270355e-05, "loss": 1.4143, "step": 9518 }, { "epoch": 0.5674097031827393, "grad_norm": 3.0312862396240234, "learning_rate": 8.229767711904717e-05, "loss": 1.5829, "step": 9520 }, { "epoch": 0.5675289069018954, "grad_norm": 2.8522894382476807, "learning_rate": 8.229045733999237e-05, "loss": 1.3308, "step": 9522 }, { "epoch": 0.5676481106210514, "grad_norm": 2.9237051010131836, "learning_rate": 8.228323640579744e-05, "loss": 1.331, "step": 9524 }, { "epoch": 0.5677673143402074, "grad_norm": 2.9956443309783936, "learning_rate": 8.227601431672068e-05, "loss": 1.2441, "step": 9526 }, { "epoch": 0.5678865180593634, "grad_norm": 3.044170618057251, "learning_rate": 8.226879107302046e-05, "loss": 1.4573, "step": 9528 }, { "epoch": 0.5680057217785195, "grad_norm": 2.9754557609558105, "learning_rate": 8.226156667495519e-05, "loss": 1.2612, "step": 9530 }, { "epoch": 0.5681249254976756, "grad_norm": 2.8628315925598145, "learning_rate": 8.225434112278328e-05, "loss": 1.3376, "step": 9532 }, { "epoch": 0.5682441292168315, "grad_norm": 3.274616241455078, "learning_rate": 8.224711441676323e-05, "loss": 1.3858, "step": 9534 }, { "epoch": 0.5683633329359876, "grad_norm": 3.1497790813446045, "learning_rate": 8.223988655715356e-05, "loss": 1.4794, "step": 9536 }, { "epoch": 0.5684825366551436, "grad_norm": 3.133776903152466, "learning_rate": 8.223265754421285e-05, "loss": 1.3873, "step": 9538 }, { "epoch": 0.5686017403742997, "grad_norm": 3.1129212379455566, "learning_rate": 8.222542737819969e-05, "loss": 1.371, "step": 9540 }, { "epoch": 0.5687209440934557, "grad_norm": 3.1717841625213623, "learning_rate": 8.221819605937275e-05, "loss": 1.4358, "step": 9542 }, { "epoch": 0.5688401478126117, "grad_norm": 3.546149253845215, "learning_rate": 8.22109635879907e-05, "loss": 1.5077, "step": 9544 }, { "epoch": 0.5689593515317678, "grad_norm": 3.2929935455322266, "learning_rate": 8.220372996431228e-05, "loss": 1.4701, "step": 9546 }, { "epoch": 0.5690785552509239, "grad_norm": 3.170454263687134, "learning_rate": 8.219649518859623e-05, "loss": 1.5395, "step": 9548 }, { "epoch": 0.5691977589700798, "grad_norm": 3.4059972763061523, "learning_rate": 8.21892592611014e-05, "loss": 1.3215, "step": 9550 }, { "epoch": 0.5693169626892359, "grad_norm": 3.3255038261413574, "learning_rate": 8.218202218208663e-05, "loss": 1.4265, "step": 9552 }, { "epoch": 0.5694361664083919, "grad_norm": 3.188709259033203, "learning_rate": 8.217478395181083e-05, "loss": 1.3212, "step": 9554 }, { "epoch": 0.569555370127548, "grad_norm": 3.107375144958496, "learning_rate": 8.21675445705329e-05, "loss": 1.4125, "step": 9556 }, { "epoch": 0.5696745738467041, "grad_norm": 3.1853318214416504, "learning_rate": 8.216030403851187e-05, "loss": 1.3419, "step": 9558 }, { "epoch": 0.56979377756586, "grad_norm": 2.991475820541382, "learning_rate": 8.215306235600672e-05, "loss": 1.3432, "step": 9560 }, { "epoch": 0.5699129812850161, "grad_norm": 3.8766214847564697, "learning_rate": 8.214581952327652e-05, "loss": 1.6041, "step": 9562 }, { "epoch": 0.5700321850041721, "grad_norm": 2.9881350994110107, "learning_rate": 8.213857554058036e-05, "loss": 1.4476, "step": 9564 }, { "epoch": 0.5701513887233282, "grad_norm": 3.0649282932281494, "learning_rate": 8.213133040817737e-05, "loss": 1.4872, "step": 9566 }, { "epoch": 0.5702705924424842, "grad_norm": 3.0523743629455566, "learning_rate": 8.21240841263268e-05, "loss": 1.2989, "step": 9568 }, { "epoch": 0.5703897961616402, "grad_norm": 3.1126699447631836, "learning_rate": 8.21168366952878e-05, "loss": 1.3771, "step": 9570 }, { "epoch": 0.5705089998807963, "grad_norm": 2.8651702404022217, "learning_rate": 8.210958811531968e-05, "loss": 1.3526, "step": 9572 }, { "epoch": 0.5706282035999524, "grad_norm": 2.9104137420654297, "learning_rate": 8.210233838668172e-05, "loss": 1.2264, "step": 9574 }, { "epoch": 0.5707474073191083, "grad_norm": 3.034221887588501, "learning_rate": 8.209508750963328e-05, "loss": 1.3282, "step": 9576 }, { "epoch": 0.5708666110382644, "grad_norm": 2.901484727859497, "learning_rate": 8.208783548443377e-05, "loss": 1.3171, "step": 9578 }, { "epoch": 0.5709858147574204, "grad_norm": 3.1419169902801514, "learning_rate": 8.208058231134256e-05, "loss": 1.4503, "step": 9580 }, { "epoch": 0.5711050184765765, "grad_norm": 3.214456081390381, "learning_rate": 8.207332799061919e-05, "loss": 1.3233, "step": 9582 }, { "epoch": 0.5712242221957325, "grad_norm": 3.0890390872955322, "learning_rate": 8.206607252252311e-05, "loss": 1.2883, "step": 9584 }, { "epoch": 0.5713434259148885, "grad_norm": 3.2437336444854736, "learning_rate": 8.205881590731394e-05, "loss": 1.4741, "step": 9586 }, { "epoch": 0.5714626296340446, "grad_norm": 3.0525944232940674, "learning_rate": 8.20515581452512e-05, "loss": 1.3444, "step": 9588 }, { "epoch": 0.5715818333532007, "grad_norm": 3.0988552570343018, "learning_rate": 8.204429923659458e-05, "loss": 1.2547, "step": 9590 }, { "epoch": 0.5717010370723566, "grad_norm": 2.975290298461914, "learning_rate": 8.203703918160375e-05, "loss": 1.3501, "step": 9592 }, { "epoch": 0.5718202407915127, "grad_norm": 3.1281228065490723, "learning_rate": 8.20297779805384e-05, "loss": 1.3581, "step": 9594 }, { "epoch": 0.5719394445106687, "grad_norm": 2.888643503189087, "learning_rate": 8.20225156336583e-05, "loss": 1.5396, "step": 9596 }, { "epoch": 0.5720586482298248, "grad_norm": 3.0676939487457275, "learning_rate": 8.201525214122326e-05, "loss": 1.4145, "step": 9598 }, { "epoch": 0.5721778519489809, "grad_norm": 3.2389559745788574, "learning_rate": 8.200798750349312e-05, "loss": 1.4823, "step": 9600 }, { "epoch": 0.5722970556681368, "grad_norm": 3.166698455810547, "learning_rate": 8.200072172072774e-05, "loss": 1.3225, "step": 9602 }, { "epoch": 0.5724162593872929, "grad_norm": 3.0016391277313232, "learning_rate": 8.199345479318705e-05, "loss": 1.4404, "step": 9604 }, { "epoch": 0.5725354631064489, "grad_norm": 3.483384370803833, "learning_rate": 8.198618672113103e-05, "loss": 1.4829, "step": 9606 }, { "epoch": 0.572654666825605, "grad_norm": 2.8026387691497803, "learning_rate": 8.197891750481966e-05, "loss": 1.4153, "step": 9608 }, { "epoch": 0.572773870544761, "grad_norm": 2.875307321548462, "learning_rate": 8.197164714451299e-05, "loss": 1.4473, "step": 9610 }, { "epoch": 0.572893074263917, "grad_norm": 2.6718997955322266, "learning_rate": 8.196437564047115e-05, "loss": 1.366, "step": 9612 }, { "epoch": 0.5730122779830731, "grad_norm": 3.1387386322021484, "learning_rate": 8.195710299295419e-05, "loss": 1.6968, "step": 9614 }, { "epoch": 0.5731314817022292, "grad_norm": 2.7365493774414062, "learning_rate": 8.194982920222233e-05, "loss": 1.3478, "step": 9616 }, { "epoch": 0.5732506854213851, "grad_norm": 3.348233699798584, "learning_rate": 8.194255426853577e-05, "loss": 1.4591, "step": 9618 }, { "epoch": 0.5733698891405412, "grad_norm": 3.02673077583313, "learning_rate": 8.193527819215474e-05, "loss": 1.4371, "step": 9620 }, { "epoch": 0.5734890928596972, "grad_norm": 3.312790870666504, "learning_rate": 8.192800097333955e-05, "loss": 1.5011, "step": 9622 }, { "epoch": 0.5736082965788533, "grad_norm": 2.8581230640411377, "learning_rate": 8.192072261235053e-05, "loss": 1.3881, "step": 9624 }, { "epoch": 0.5737275002980093, "grad_norm": 2.9699788093566895, "learning_rate": 8.191344310944803e-05, "loss": 1.4039, "step": 9626 }, { "epoch": 0.5738467040171653, "grad_norm": 2.915688991546631, "learning_rate": 8.190616246489249e-05, "loss": 1.241, "step": 9628 }, { "epoch": 0.5739659077363214, "grad_norm": 3.693899393081665, "learning_rate": 8.189888067894436e-05, "loss": 1.3847, "step": 9630 }, { "epoch": 0.5740851114554774, "grad_norm": 2.8462162017822266, "learning_rate": 8.189159775186413e-05, "loss": 1.4605, "step": 9632 }, { "epoch": 0.5742043151746334, "grad_norm": 3.027454376220703, "learning_rate": 8.18843136839123e-05, "loss": 1.3289, "step": 9634 }, { "epoch": 0.5743235188937895, "grad_norm": 3.062293291091919, "learning_rate": 8.187702847534952e-05, "loss": 1.4782, "step": 9636 }, { "epoch": 0.5744427226129455, "grad_norm": 3.214975357055664, "learning_rate": 8.186974212643634e-05, "loss": 1.6262, "step": 9638 }, { "epoch": 0.5745619263321016, "grad_norm": 3.20194673538208, "learning_rate": 8.186245463743345e-05, "loss": 1.3687, "step": 9640 }, { "epoch": 0.5746811300512576, "grad_norm": 3.0940916538238525, "learning_rate": 8.185516600860154e-05, "loss": 1.5162, "step": 9642 }, { "epoch": 0.5748003337704136, "grad_norm": 3.136789321899414, "learning_rate": 8.184787624020135e-05, "loss": 1.4258, "step": 9644 }, { "epoch": 0.5749195374895697, "grad_norm": 3.127112865447998, "learning_rate": 8.184058533249366e-05, "loss": 1.2822, "step": 9646 }, { "epoch": 0.5750387412087257, "grad_norm": 3.217841863632202, "learning_rate": 8.18332932857393e-05, "loss": 1.4931, "step": 9648 }, { "epoch": 0.5751579449278817, "grad_norm": 3.379267692565918, "learning_rate": 8.182600010019912e-05, "loss": 1.4933, "step": 9650 }, { "epoch": 0.5752771486470378, "grad_norm": 2.804032802581787, "learning_rate": 8.181870577613402e-05, "loss": 1.4098, "step": 9652 }, { "epoch": 0.5753963523661938, "grad_norm": 3.143519878387451, "learning_rate": 8.181141031380495e-05, "loss": 1.3686, "step": 9654 }, { "epoch": 0.5755155560853499, "grad_norm": 2.966606855392456, "learning_rate": 8.180411371347289e-05, "loss": 1.4165, "step": 9656 }, { "epoch": 0.5756347598045058, "grad_norm": 3.0441651344299316, "learning_rate": 8.179681597539885e-05, "loss": 1.3317, "step": 9658 }, { "epoch": 0.5757539635236619, "grad_norm": 2.75431227684021, "learning_rate": 8.178951709984394e-05, "loss": 1.3812, "step": 9660 }, { "epoch": 0.575873167242818, "grad_norm": 3.171396255493164, "learning_rate": 8.178221708706921e-05, "loss": 1.4062, "step": 9662 }, { "epoch": 0.575992370961974, "grad_norm": 2.9140830039978027, "learning_rate": 8.177491593733586e-05, "loss": 1.4046, "step": 9664 }, { "epoch": 0.5761115746811301, "grad_norm": 2.898369789123535, "learning_rate": 8.176761365090504e-05, "loss": 1.2823, "step": 9666 }, { "epoch": 0.5762307784002861, "grad_norm": 3.321345567703247, "learning_rate": 8.176031022803798e-05, "loss": 1.557, "step": 9668 }, { "epoch": 0.5763499821194421, "grad_norm": 2.9449384212493896, "learning_rate": 8.175300566899597e-05, "loss": 1.3169, "step": 9670 }, { "epoch": 0.5764691858385982, "grad_norm": 3.1126863956451416, "learning_rate": 8.174569997404029e-05, "loss": 1.4764, "step": 9672 }, { "epoch": 0.5765883895577542, "grad_norm": 3.1149847507476807, "learning_rate": 8.173839314343231e-05, "loss": 1.3671, "step": 9674 }, { "epoch": 0.5767075932769102, "grad_norm": 2.9732604026794434, "learning_rate": 8.173108517743342e-05, "loss": 1.3629, "step": 9676 }, { "epoch": 0.5768267969960663, "grad_norm": 3.1414263248443604, "learning_rate": 8.172377607630503e-05, "loss": 1.4024, "step": 9678 }, { "epoch": 0.5769460007152223, "grad_norm": 3.081202507019043, "learning_rate": 8.171646584030866e-05, "loss": 1.3734, "step": 9680 }, { "epoch": 0.5770652044343784, "grad_norm": 3.3547351360321045, "learning_rate": 8.170915446970577e-05, "loss": 1.4571, "step": 9682 }, { "epoch": 0.5771844081535344, "grad_norm": 3.097313165664673, "learning_rate": 8.170184196475795e-05, "loss": 1.4231, "step": 9684 }, { "epoch": 0.5773036118726904, "grad_norm": 2.7145681381225586, "learning_rate": 8.169452832572675e-05, "loss": 1.1631, "step": 9686 }, { "epoch": 0.5774228155918465, "grad_norm": 3.501237154006958, "learning_rate": 8.168721355287384e-05, "loss": 1.4099, "step": 9688 }, { "epoch": 0.5775420193110025, "grad_norm": 3.063915252685547, "learning_rate": 8.16798976464609e-05, "loss": 1.3328, "step": 9690 }, { "epoch": 0.5776612230301585, "grad_norm": 3.4351940155029297, "learning_rate": 8.167258060674961e-05, "loss": 1.6047, "step": 9692 }, { "epoch": 0.5777804267493146, "grad_norm": 2.9934139251708984, "learning_rate": 8.166526243400175e-05, "loss": 1.3487, "step": 9694 }, { "epoch": 0.5778996304684706, "grad_norm": 3.709120988845825, "learning_rate": 8.16579431284791e-05, "loss": 1.6158, "step": 9696 }, { "epoch": 0.5780188341876267, "grad_norm": 2.8675873279571533, "learning_rate": 8.165062269044353e-05, "loss": 1.5977, "step": 9698 }, { "epoch": 0.5781380379067826, "grad_norm": 3.2862775325775146, "learning_rate": 8.164330112015688e-05, "loss": 1.372, "step": 9700 }, { "epoch": 0.5782572416259387, "grad_norm": 3.095885753631592, "learning_rate": 8.163597841788108e-05, "loss": 1.4335, "step": 9702 }, { "epoch": 0.5783764453450948, "grad_norm": 3.30173921585083, "learning_rate": 8.162865458387811e-05, "loss": 1.3986, "step": 9704 }, { "epoch": 0.5784956490642508, "grad_norm": 3.126744270324707, "learning_rate": 8.162132961840994e-05, "loss": 1.275, "step": 9706 }, { "epoch": 0.5786148527834069, "grad_norm": 2.9025485515594482, "learning_rate": 8.16140035217386e-05, "loss": 1.2224, "step": 9708 }, { "epoch": 0.5787340565025629, "grad_norm": 3.126354932785034, "learning_rate": 8.160667629412621e-05, "loss": 1.4083, "step": 9710 }, { "epoch": 0.5788532602217189, "grad_norm": 3.2206971645355225, "learning_rate": 8.159934793583486e-05, "loss": 1.3196, "step": 9712 }, { "epoch": 0.578972463940875, "grad_norm": 2.952558755874634, "learning_rate": 8.159201844712673e-05, "loss": 1.348, "step": 9714 }, { "epoch": 0.579091667660031, "grad_norm": 3.345828056335449, "learning_rate": 8.1584687828264e-05, "loss": 1.7182, "step": 9716 }, { "epoch": 0.579210871379187, "grad_norm": 3.018310308456421, "learning_rate": 8.157735607950892e-05, "loss": 1.1805, "step": 9718 }, { "epoch": 0.5793300750983431, "grad_norm": 3.07275390625, "learning_rate": 8.157002320112377e-05, "loss": 1.389, "step": 9720 }, { "epoch": 0.5794492788174991, "grad_norm": 3.3546841144561768, "learning_rate": 8.156268919337087e-05, "loss": 1.4312, "step": 9722 }, { "epoch": 0.5795684825366552, "grad_norm": 3.3113858699798584, "learning_rate": 8.15553540565126e-05, "loss": 1.4883, "step": 9724 }, { "epoch": 0.5796876862558111, "grad_norm": 3.0776751041412354, "learning_rate": 8.154801779081133e-05, "loss": 1.5427, "step": 9726 }, { "epoch": 0.5798068899749672, "grad_norm": 3.0126121044158936, "learning_rate": 8.154068039652955e-05, "loss": 1.3864, "step": 9728 }, { "epoch": 0.5799260936941233, "grad_norm": 2.927361488342285, "learning_rate": 8.15333418739297e-05, "loss": 1.3361, "step": 9730 }, { "epoch": 0.5800452974132793, "grad_norm": 3.143937349319458, "learning_rate": 8.152600222327432e-05, "loss": 1.3607, "step": 9732 }, { "epoch": 0.5801645011324353, "grad_norm": 3.0715794563293457, "learning_rate": 8.151866144482598e-05, "loss": 1.4337, "step": 9734 }, { "epoch": 0.5802837048515914, "grad_norm": 3.3432302474975586, "learning_rate": 8.151131953884727e-05, "loss": 1.2968, "step": 9736 }, { "epoch": 0.5804029085707474, "grad_norm": 3.23561692237854, "learning_rate": 8.150397650560086e-05, "loss": 1.4981, "step": 9738 }, { "epoch": 0.5805221122899035, "grad_norm": 3.433316469192505, "learning_rate": 8.149663234534942e-05, "loss": 1.5207, "step": 9740 }, { "epoch": 0.5806413160090594, "grad_norm": 2.9130501747131348, "learning_rate": 8.148928705835567e-05, "loss": 1.5133, "step": 9742 }, { "epoch": 0.5807605197282155, "grad_norm": 2.91573166847229, "learning_rate": 8.14819406448824e-05, "loss": 1.3792, "step": 9744 }, { "epoch": 0.5808797234473716, "grad_norm": 3.168294668197632, "learning_rate": 8.147459310519237e-05, "loss": 1.4743, "step": 9746 }, { "epoch": 0.5809989271665276, "grad_norm": 3.4513063430786133, "learning_rate": 8.146724443954847e-05, "loss": 1.3955, "step": 9748 }, { "epoch": 0.5811181308856836, "grad_norm": 3.156442165374756, "learning_rate": 8.145989464821358e-05, "loss": 1.4138, "step": 9750 }, { "epoch": 0.5812373346048396, "grad_norm": 3.0769176483154297, "learning_rate": 8.145254373145062e-05, "loss": 1.3596, "step": 9752 }, { "epoch": 0.5813565383239957, "grad_norm": 2.8438165187835693, "learning_rate": 8.144519168952254e-05, "loss": 1.2742, "step": 9754 }, { "epoch": 0.5814757420431518, "grad_norm": 3.1950435638427734, "learning_rate": 8.143783852269238e-05, "loss": 1.4102, "step": 9756 }, { "epoch": 0.5815949457623077, "grad_norm": 3.432908296585083, "learning_rate": 8.143048423122316e-05, "loss": 1.4669, "step": 9758 }, { "epoch": 0.5817141494814638, "grad_norm": 2.759481906890869, "learning_rate": 8.142312881537798e-05, "loss": 1.3307, "step": 9760 }, { "epoch": 0.5818333532006199, "grad_norm": 2.881803035736084, "learning_rate": 8.141577227541998e-05, "loss": 1.2294, "step": 9762 }, { "epoch": 0.5819525569197759, "grad_norm": 2.699592351913452, "learning_rate": 8.14084146116123e-05, "loss": 1.2394, "step": 9764 }, { "epoch": 0.582071760638932, "grad_norm": 2.836481809616089, "learning_rate": 8.140105582421819e-05, "loss": 1.3606, "step": 9766 }, { "epoch": 0.5821909643580879, "grad_norm": 3.03759503364563, "learning_rate": 8.139369591350086e-05, "loss": 1.4357, "step": 9768 }, { "epoch": 0.582310168077244, "grad_norm": 2.9850778579711914, "learning_rate": 8.138633487972362e-05, "loss": 1.2637, "step": 9770 }, { "epoch": 0.5824293717964001, "grad_norm": 3.3585965633392334, "learning_rate": 8.137897272314981e-05, "loss": 1.4197, "step": 9772 }, { "epoch": 0.582548575515556, "grad_norm": 3.0899651050567627, "learning_rate": 8.137160944404276e-05, "loss": 1.3617, "step": 9774 }, { "epoch": 0.5826677792347121, "grad_norm": 3.466048002243042, "learning_rate": 8.136424504266592e-05, "loss": 1.3887, "step": 9776 }, { "epoch": 0.5827869829538682, "grad_norm": 3.114032506942749, "learning_rate": 8.13568795192827e-05, "loss": 1.3934, "step": 9778 }, { "epoch": 0.5829061866730242, "grad_norm": 2.9655745029449463, "learning_rate": 8.134951287415663e-05, "loss": 1.3173, "step": 9780 }, { "epoch": 0.5830253903921803, "grad_norm": 3.121840238571167, "learning_rate": 8.134214510755123e-05, "loss": 1.3736, "step": 9782 }, { "epoch": 0.5831445941113362, "grad_norm": 3.331176996231079, "learning_rate": 8.133477621973004e-05, "loss": 1.4333, "step": 9784 }, { "epoch": 0.5832637978304923, "grad_norm": 3.2261135578155518, "learning_rate": 8.132740621095672e-05, "loss": 1.3242, "step": 9786 }, { "epoch": 0.5833830015496484, "grad_norm": 3.0747668743133545, "learning_rate": 8.132003508149488e-05, "loss": 1.5171, "step": 9788 }, { "epoch": 0.5835022052688044, "grad_norm": 2.968538522720337, "learning_rate": 8.131266283160823e-05, "loss": 1.2687, "step": 9790 }, { "epoch": 0.5836214089879604, "grad_norm": 3.3904240131378174, "learning_rate": 8.130528946156048e-05, "loss": 1.4101, "step": 9792 }, { "epoch": 0.5837406127071164, "grad_norm": 3.149963855743408, "learning_rate": 8.129791497161545e-05, "loss": 1.2836, "step": 9794 }, { "epoch": 0.5838598164262725, "grad_norm": 2.6928653717041016, "learning_rate": 8.129053936203687e-05, "loss": 1.1965, "step": 9796 }, { "epoch": 0.5839790201454286, "grad_norm": 3.1431798934936523, "learning_rate": 8.128316263308868e-05, "loss": 1.4281, "step": 9798 }, { "epoch": 0.5840982238645845, "grad_norm": 3.3815503120422363, "learning_rate": 8.12757847850347e-05, "loss": 1.4483, "step": 9800 }, { "epoch": 0.5842174275837406, "grad_norm": 2.781919479370117, "learning_rate": 8.126840581813892e-05, "loss": 1.3787, "step": 9802 }, { "epoch": 0.5843366313028967, "grad_norm": 2.870802402496338, "learning_rate": 8.126102573266525e-05, "loss": 1.243, "step": 9804 }, { "epoch": 0.5844558350220527, "grad_norm": 3.003131151199341, "learning_rate": 8.125364452887775e-05, "loss": 1.2407, "step": 9806 }, { "epoch": 0.5845750387412088, "grad_norm": 2.942373037338257, "learning_rate": 8.124626220704044e-05, "loss": 1.3985, "step": 9808 }, { "epoch": 0.5846942424603647, "grad_norm": 3.2189505100250244, "learning_rate": 8.123887876741742e-05, "loss": 1.5693, "step": 9810 }, { "epoch": 0.5848134461795208, "grad_norm": 2.9379382133483887, "learning_rate": 8.123149421027284e-05, "loss": 1.2986, "step": 9812 }, { "epoch": 0.5849326498986769, "grad_norm": 3.013920307159424, "learning_rate": 8.122410853587085e-05, "loss": 1.5604, "step": 9814 }, { "epoch": 0.5850518536178329, "grad_norm": 2.795665740966797, "learning_rate": 8.121672174447567e-05, "loss": 1.3138, "step": 9816 }, { "epoch": 0.5851710573369889, "grad_norm": 2.9688446521759033, "learning_rate": 8.120933383635154e-05, "loss": 1.427, "step": 9818 }, { "epoch": 0.5852902610561449, "grad_norm": 2.9829139709472656, "learning_rate": 8.120194481176275e-05, "loss": 1.386, "step": 9820 }, { "epoch": 0.585409464775301, "grad_norm": 3.065791606903076, "learning_rate": 8.119455467097363e-05, "loss": 1.5594, "step": 9822 }, { "epoch": 0.5855286684944571, "grad_norm": 2.899001121520996, "learning_rate": 8.118716341424857e-05, "loss": 1.2647, "step": 9824 }, { "epoch": 0.585647872213613, "grad_norm": 3.089390754699707, "learning_rate": 8.117977104185196e-05, "loss": 1.4829, "step": 9826 }, { "epoch": 0.5857670759327691, "grad_norm": 2.9518260955810547, "learning_rate": 8.117237755404829e-05, "loss": 1.4273, "step": 9828 }, { "epoch": 0.5858862796519252, "grad_norm": 3.203035354614258, "learning_rate": 8.1164982951102e-05, "loss": 1.3651, "step": 9830 }, { "epoch": 0.5860054833710812, "grad_norm": 3.376255989074707, "learning_rate": 8.115758723327764e-05, "loss": 1.495, "step": 9832 }, { "epoch": 0.5861246870902372, "grad_norm": 2.9667463302612305, "learning_rate": 8.115019040083978e-05, "loss": 1.2954, "step": 9834 }, { "epoch": 0.5862438908093932, "grad_norm": 3.131635904312134, "learning_rate": 8.114279245405302e-05, "loss": 1.2363, "step": 9836 }, { "epoch": 0.5863630945285493, "grad_norm": 3.1825578212738037, "learning_rate": 8.113539339318203e-05, "loss": 1.5846, "step": 9838 }, { "epoch": 0.5864822982477054, "grad_norm": 3.5719430446624756, "learning_rate": 8.11279932184915e-05, "loss": 1.2925, "step": 9840 }, { "epoch": 0.5866015019668613, "grad_norm": 3.1276204586029053, "learning_rate": 8.112059193024612e-05, "loss": 1.4007, "step": 9842 }, { "epoch": 0.5867207056860174, "grad_norm": 3.043257474899292, "learning_rate": 8.111318952871073e-05, "loss": 1.3902, "step": 9844 }, { "epoch": 0.5868399094051734, "grad_norm": 2.7383227348327637, "learning_rate": 8.110578601415007e-05, "loss": 1.3909, "step": 9846 }, { "epoch": 0.5869591131243295, "grad_norm": 3.163299798965454, "learning_rate": 8.109838138682902e-05, "loss": 1.3864, "step": 9848 }, { "epoch": 0.5870783168434855, "grad_norm": 3.4646406173706055, "learning_rate": 8.109097564701246e-05, "loss": 1.5515, "step": 9850 }, { "epoch": 0.5871975205626415, "grad_norm": 3.182358741760254, "learning_rate": 8.108356879496532e-05, "loss": 1.3584, "step": 9852 }, { "epoch": 0.5873167242817976, "grad_norm": 3.021385669708252, "learning_rate": 8.10761608309526e-05, "loss": 1.383, "step": 9854 }, { "epoch": 0.5874359280009537, "grad_norm": 2.995407819747925, "learning_rate": 8.106875175523927e-05, "loss": 1.3766, "step": 9856 }, { "epoch": 0.5875551317201096, "grad_norm": 3.219984769821167, "learning_rate": 8.106134156809038e-05, "loss": 1.4998, "step": 9858 }, { "epoch": 0.5876743354392657, "grad_norm": 3.285560369491577, "learning_rate": 8.105393026977103e-05, "loss": 1.4684, "step": 9860 }, { "epoch": 0.5877935391584217, "grad_norm": 3.1779046058654785, "learning_rate": 8.104651786054635e-05, "loss": 1.3995, "step": 9862 }, { "epoch": 0.5879127428775778, "grad_norm": 3.193256139755249, "learning_rate": 8.10391043406815e-05, "loss": 1.5006, "step": 9864 }, { "epoch": 0.5880319465967339, "grad_norm": 3.0670173168182373, "learning_rate": 8.103168971044168e-05, "loss": 1.4896, "step": 9866 }, { "epoch": 0.5881511503158898, "grad_norm": 3.3811755180358887, "learning_rate": 8.102427397009215e-05, "loss": 1.3725, "step": 9868 }, { "epoch": 0.5882703540350459, "grad_norm": 2.8342223167419434, "learning_rate": 8.10168571198982e-05, "loss": 1.3316, "step": 9870 }, { "epoch": 0.588389557754202, "grad_norm": 2.8004391193389893, "learning_rate": 8.100943916012513e-05, "loss": 1.3501, "step": 9872 }, { "epoch": 0.588508761473358, "grad_norm": 3.1454360485076904, "learning_rate": 8.100202009103834e-05, "loss": 1.3592, "step": 9874 }, { "epoch": 0.588627965192514, "grad_norm": 2.838714361190796, "learning_rate": 8.099459991290324e-05, "loss": 1.3458, "step": 9876 }, { "epoch": 0.58874716891167, "grad_norm": 3.223748207092285, "learning_rate": 8.098717862598522e-05, "loss": 1.2535, "step": 9878 }, { "epoch": 0.5888663726308261, "grad_norm": 2.7612826824188232, "learning_rate": 8.09797562305498e-05, "loss": 1.3318, "step": 9880 }, { "epoch": 0.5889855763499822, "grad_norm": 2.958530902862549, "learning_rate": 8.097233272686251e-05, "loss": 1.3745, "step": 9882 }, { "epoch": 0.5891047800691381, "grad_norm": 3.0820600986480713, "learning_rate": 8.096490811518891e-05, "loss": 1.3579, "step": 9884 }, { "epoch": 0.5892239837882942, "grad_norm": 3.1205718517303467, "learning_rate": 8.095748239579459e-05, "loss": 1.5097, "step": 9886 }, { "epoch": 0.5893431875074502, "grad_norm": 3.515958070755005, "learning_rate": 8.095005556894521e-05, "loss": 1.3878, "step": 9888 }, { "epoch": 0.5894623912266063, "grad_norm": 3.2395269870758057, "learning_rate": 8.094262763490646e-05, "loss": 1.4735, "step": 9890 }, { "epoch": 0.5895815949457623, "grad_norm": 3.078584671020508, "learning_rate": 8.093519859394405e-05, "loss": 1.3648, "step": 9892 }, { "epoch": 0.5897007986649183, "grad_norm": 3.049971103668213, "learning_rate": 8.092776844632374e-05, "loss": 1.3218, "step": 9894 }, { "epoch": 0.5898200023840744, "grad_norm": 2.903243064880371, "learning_rate": 8.092033719231134e-05, "loss": 1.4407, "step": 9896 }, { "epoch": 0.5899392061032305, "grad_norm": 2.96940016746521, "learning_rate": 8.091290483217266e-05, "loss": 1.4145, "step": 9898 }, { "epoch": 0.5900584098223864, "grad_norm": 3.11919903755188, "learning_rate": 8.090547136617362e-05, "loss": 1.2705, "step": 9900 }, { "epoch": 0.5901776135415425, "grad_norm": 3.0353903770446777, "learning_rate": 8.089803679458013e-05, "loss": 1.3274, "step": 9902 }, { "epoch": 0.5902968172606985, "grad_norm": 3.2437121868133545, "learning_rate": 8.089060111765815e-05, "loss": 1.419, "step": 9904 }, { "epoch": 0.5904160209798546, "grad_norm": 2.7104170322418213, "learning_rate": 8.08831643356737e-05, "loss": 1.3561, "step": 9906 }, { "epoch": 0.5905352246990107, "grad_norm": 3.0788681507110596, "learning_rate": 8.087572644889275e-05, "loss": 1.4025, "step": 9908 }, { "epoch": 0.5906544284181666, "grad_norm": 3.17547869682312, "learning_rate": 8.086828745758145e-05, "loss": 1.3266, "step": 9910 }, { "epoch": 0.5907736321373227, "grad_norm": 3.118429183959961, "learning_rate": 8.086084736200589e-05, "loss": 1.3731, "step": 9912 }, { "epoch": 0.5908928358564787, "grad_norm": 3.109140634536743, "learning_rate": 8.085340616243223e-05, "loss": 1.3277, "step": 9914 }, { "epoch": 0.5910120395756348, "grad_norm": 3.0186307430267334, "learning_rate": 8.084596385912666e-05, "loss": 1.4475, "step": 9916 }, { "epoch": 0.5911312432947908, "grad_norm": 3.017834424972534, "learning_rate": 8.083852045235542e-05, "loss": 1.3564, "step": 9918 }, { "epoch": 0.5912504470139468, "grad_norm": 3.301501512527466, "learning_rate": 8.083107594238481e-05, "loss": 1.4633, "step": 9920 }, { "epoch": 0.5913696507331029, "grad_norm": 3.2306368350982666, "learning_rate": 8.082363032948112e-05, "loss": 1.5964, "step": 9922 }, { "epoch": 0.591488854452259, "grad_norm": 2.799403667449951, "learning_rate": 8.081618361391072e-05, "loss": 1.4208, "step": 9924 }, { "epoch": 0.5916080581714149, "grad_norm": 3.00980806350708, "learning_rate": 8.080873579593997e-05, "loss": 1.3255, "step": 9926 }, { "epoch": 0.591727261890571, "grad_norm": 3.2875173091888428, "learning_rate": 8.080128687583534e-05, "loss": 1.4121, "step": 9928 }, { "epoch": 0.591846465609727, "grad_norm": 3.053114652633667, "learning_rate": 8.07938368538633e-05, "loss": 1.4026, "step": 9930 }, { "epoch": 0.5919656693288831, "grad_norm": 2.9627299308776855, "learning_rate": 8.078638573029036e-05, "loss": 1.3743, "step": 9932 }, { "epoch": 0.5920848730480391, "grad_norm": 3.084763765335083, "learning_rate": 8.077893350538305e-05, "loss": 1.3624, "step": 9934 }, { "epoch": 0.5922040767671951, "grad_norm": 3.0380451679229736, "learning_rate": 8.0771480179408e-05, "loss": 1.4069, "step": 9936 }, { "epoch": 0.5923232804863512, "grad_norm": 3.121980905532837, "learning_rate": 8.076402575263183e-05, "loss": 1.5672, "step": 9938 }, { "epoch": 0.5924424842055072, "grad_norm": 2.858715772628784, "learning_rate": 8.075657022532117e-05, "loss": 1.3974, "step": 9940 }, { "epoch": 0.5925616879246632, "grad_norm": 2.658998727798462, "learning_rate": 8.074911359774278e-05, "loss": 1.4125, "step": 9942 }, { "epoch": 0.5926808916438193, "grad_norm": 3.2062318325042725, "learning_rate": 8.074165587016337e-05, "loss": 1.4107, "step": 9944 }, { "epoch": 0.5928000953629753, "grad_norm": 3.1688921451568604, "learning_rate": 8.073419704284976e-05, "loss": 1.2033, "step": 9946 }, { "epoch": 0.5929192990821314, "grad_norm": 3.049543857574463, "learning_rate": 8.072673711606876e-05, "loss": 1.4213, "step": 9948 }, { "epoch": 0.5930385028012874, "grad_norm": 3.019369602203369, "learning_rate": 8.071927609008725e-05, "loss": 1.2901, "step": 9950 }, { "epoch": 0.5931577065204434, "grad_norm": 2.7807130813598633, "learning_rate": 8.071181396517214e-05, "loss": 1.4519, "step": 9952 }, { "epoch": 0.5932769102395995, "grad_norm": 2.7559781074523926, "learning_rate": 8.070435074159034e-05, "loss": 1.2763, "step": 9954 }, { "epoch": 0.5933961139587555, "grad_norm": 2.8434462547302246, "learning_rate": 8.069688641960889e-05, "loss": 1.289, "step": 9956 }, { "epoch": 0.5935153176779115, "grad_norm": 3.3560590744018555, "learning_rate": 8.068942099949476e-05, "loss": 1.501, "step": 9958 }, { "epoch": 0.5936345213970676, "grad_norm": 3.025745153427124, "learning_rate": 8.068195448151505e-05, "loss": 1.4861, "step": 9960 }, { "epoch": 0.5937537251162236, "grad_norm": 2.8473689556121826, "learning_rate": 8.067448686593684e-05, "loss": 1.2864, "step": 9962 }, { "epoch": 0.5938729288353797, "grad_norm": 2.9627695083618164, "learning_rate": 8.066701815302729e-05, "loss": 1.3172, "step": 9964 }, { "epoch": 0.5939921325545358, "grad_norm": 2.6816227436065674, "learning_rate": 8.065954834305358e-05, "loss": 1.3625, "step": 9966 }, { "epoch": 0.5941113362736917, "grad_norm": 3.3582611083984375, "learning_rate": 8.065207743628293e-05, "loss": 1.3261, "step": 9968 }, { "epoch": 0.5942305399928478, "grad_norm": 3.2441930770874023, "learning_rate": 8.064460543298258e-05, "loss": 1.3706, "step": 9970 }, { "epoch": 0.5943497437120038, "grad_norm": 3.164095401763916, "learning_rate": 8.063713233341985e-05, "loss": 1.2757, "step": 9972 }, { "epoch": 0.5944689474311599, "grad_norm": 3.1500015258789062, "learning_rate": 8.062965813786207e-05, "loss": 1.3024, "step": 9974 }, { "epoch": 0.5945881511503159, "grad_norm": 5.025313854217529, "learning_rate": 8.062218284657663e-05, "loss": 1.4943, "step": 9976 }, { "epoch": 0.5947073548694719, "grad_norm": 3.148073434829712, "learning_rate": 8.061470645983091e-05, "loss": 1.4546, "step": 9978 }, { "epoch": 0.594826558588628, "grad_norm": 3.0653109550476074, "learning_rate": 8.060722897789243e-05, "loss": 1.4017, "step": 9980 }, { "epoch": 0.594945762307784, "grad_norm": 2.9356918334960938, "learning_rate": 8.059975040102863e-05, "loss": 1.3336, "step": 9982 }, { "epoch": 0.59506496602694, "grad_norm": 2.8995468616485596, "learning_rate": 8.059227072950705e-05, "loss": 1.3926, "step": 9984 }, { "epoch": 0.5951841697460961, "grad_norm": 2.8044161796569824, "learning_rate": 8.05847899635953e-05, "loss": 1.2851, "step": 9986 }, { "epoch": 0.5953033734652521, "grad_norm": 2.856590986251831, "learning_rate": 8.057730810356096e-05, "loss": 1.3117, "step": 9988 }, { "epoch": 0.5954225771844082, "grad_norm": 3.3072004318237305, "learning_rate": 8.056982514967169e-05, "loss": 1.5231, "step": 9990 }, { "epoch": 0.5955417809035642, "grad_norm": 2.915194511413574, "learning_rate": 8.056234110219516e-05, "loss": 1.4079, "step": 9992 }, { "epoch": 0.5956609846227202, "grad_norm": 3.31290340423584, "learning_rate": 8.055485596139915e-05, "loss": 1.3986, "step": 9994 }, { "epoch": 0.5957801883418763, "grad_norm": 3.006833553314209, "learning_rate": 8.054736972755138e-05, "loss": 1.3906, "step": 9996 }, { "epoch": 0.5958993920610323, "grad_norm": 2.814448118209839, "learning_rate": 8.05398824009197e-05, "loss": 1.2669, "step": 9998 }, { "epoch": 0.5960185957801883, "grad_norm": 3.0470495223999023, "learning_rate": 8.053239398177191e-05, "loss": 1.5531, "step": 10000 }, { "epoch": 0.5961377994993444, "grad_norm": 2.9911201000213623, "learning_rate": 8.052490447037593e-05, "loss": 1.2994, "step": 10002 }, { "epoch": 0.5962570032185004, "grad_norm": 2.8965272903442383, "learning_rate": 8.051741386699969e-05, "loss": 1.4114, "step": 10004 }, { "epoch": 0.5963762069376565, "grad_norm": 3.2441537380218506, "learning_rate": 8.050992217191114e-05, "loss": 1.4412, "step": 10006 }, { "epoch": 0.5964954106568124, "grad_norm": 2.998692512512207, "learning_rate": 8.050242938537827e-05, "loss": 1.433, "step": 10008 }, { "epoch": 0.5966146143759685, "grad_norm": 3.1514041423797607, "learning_rate": 8.049493550766914e-05, "loss": 1.4084, "step": 10010 }, { "epoch": 0.5967338180951246, "grad_norm": 3.062669515609741, "learning_rate": 8.048744053905184e-05, "loss": 1.4382, "step": 10012 }, { "epoch": 0.5968530218142806, "grad_norm": 2.8678486347198486, "learning_rate": 8.047994447979448e-05, "loss": 1.2796, "step": 10014 }, { "epoch": 0.5969722255334367, "grad_norm": 2.803821563720703, "learning_rate": 8.047244733016522e-05, "loss": 1.3365, "step": 10016 }, { "epoch": 0.5970914292525927, "grad_norm": 3.114438533782959, "learning_rate": 8.046494909043226e-05, "loss": 1.2974, "step": 10018 }, { "epoch": 0.5972106329717487, "grad_norm": 3.299194574356079, "learning_rate": 8.045744976086383e-05, "loss": 1.3989, "step": 10020 }, { "epoch": 0.5973298366909048, "grad_norm": 2.8655519485473633, "learning_rate": 8.044994934172821e-05, "loss": 1.2119, "step": 10022 }, { "epoch": 0.5974490404100607, "grad_norm": 3.289729356765747, "learning_rate": 8.044244783329371e-05, "loss": 1.2971, "step": 10024 }, { "epoch": 0.5975682441292168, "grad_norm": 3.198345422744751, "learning_rate": 8.04349452358287e-05, "loss": 1.4649, "step": 10026 }, { "epoch": 0.5976874478483729, "grad_norm": 2.9153354167938232, "learning_rate": 8.042744154960158e-05, "loss": 1.3069, "step": 10028 }, { "epoch": 0.5978066515675289, "grad_norm": 2.7820937633514404, "learning_rate": 8.041993677488076e-05, "loss": 1.2735, "step": 10030 }, { "epoch": 0.597925855286685, "grad_norm": 3.275254964828491, "learning_rate": 8.041243091193473e-05, "loss": 1.3236, "step": 10032 }, { "epoch": 0.5980450590058409, "grad_norm": 2.91268253326416, "learning_rate": 8.040492396103198e-05, "loss": 1.3418, "step": 10034 }, { "epoch": 0.598164262724997, "grad_norm": 3.1335935592651367, "learning_rate": 8.039741592244108e-05, "loss": 1.4127, "step": 10036 }, { "epoch": 0.5982834664441531, "grad_norm": 2.995988368988037, "learning_rate": 8.038990679643059e-05, "loss": 1.3273, "step": 10038 }, { "epoch": 0.5984026701633091, "grad_norm": 3.3048312664031982, "learning_rate": 8.038239658326919e-05, "loss": 1.4002, "step": 10040 }, { "epoch": 0.5985218738824651, "grad_norm": 3.005141019821167, "learning_rate": 8.037488528322547e-05, "loss": 1.2549, "step": 10042 }, { "epoch": 0.5986410776016212, "grad_norm": 3.087905168533325, "learning_rate": 8.036737289656822e-05, "loss": 1.3404, "step": 10044 }, { "epoch": 0.5987602813207772, "grad_norm": 3.1933937072753906, "learning_rate": 8.035985942356611e-05, "loss": 1.3333, "step": 10046 }, { "epoch": 0.5988794850399333, "grad_norm": 3.226149559020996, "learning_rate": 8.035234486448797e-05, "loss": 1.4905, "step": 10048 }, { "epoch": 0.5989986887590892, "grad_norm": 2.825535535812378, "learning_rate": 8.034482921960258e-05, "loss": 1.4564, "step": 10050 }, { "epoch": 0.5991178924782453, "grad_norm": 3.200728178024292, "learning_rate": 8.033731248917883e-05, "loss": 1.4996, "step": 10052 }, { "epoch": 0.5992370961974014, "grad_norm": 2.9472594261169434, "learning_rate": 8.032979467348564e-05, "loss": 1.2516, "step": 10054 }, { "epoch": 0.5993562999165574, "grad_norm": 3.394944667816162, "learning_rate": 8.032227577279191e-05, "loss": 1.4107, "step": 10056 }, { "epoch": 0.5994755036357134, "grad_norm": 2.5951061248779297, "learning_rate": 8.031475578736664e-05, "loss": 1.246, "step": 10058 }, { "epoch": 0.5995947073548695, "grad_norm": 3.255499839782715, "learning_rate": 8.030723471747881e-05, "loss": 1.4276, "step": 10060 }, { "epoch": 0.5997139110740255, "grad_norm": 3.272372245788574, "learning_rate": 8.029971256339753e-05, "loss": 1.3234, "step": 10062 }, { "epoch": 0.5998331147931816, "grad_norm": 3.25140643119812, "learning_rate": 8.029218932539183e-05, "loss": 1.368, "step": 10064 }, { "epoch": 0.5999523185123375, "grad_norm": 3.176316022872925, "learning_rate": 8.028466500373088e-05, "loss": 1.3347, "step": 10066 }, { "epoch": 0.6000715222314936, "grad_norm": 3.1288633346557617, "learning_rate": 8.027713959868385e-05, "loss": 1.4089, "step": 10068 }, { "epoch": 0.6001907259506497, "grad_norm": 3.173572301864624, "learning_rate": 8.026961311051994e-05, "loss": 1.3729, "step": 10070 }, { "epoch": 0.6003099296698057, "grad_norm": 3.2281205654144287, "learning_rate": 8.026208553950843e-05, "loss": 1.4338, "step": 10072 }, { "epoch": 0.6004291333889618, "grad_norm": 3.218362331390381, "learning_rate": 8.025455688591856e-05, "loss": 1.3052, "step": 10074 }, { "epoch": 0.6005483371081177, "grad_norm": 3.071852207183838, "learning_rate": 8.024702715001967e-05, "loss": 1.3688, "step": 10076 }, { "epoch": 0.6006675408272738, "grad_norm": 2.8740041255950928, "learning_rate": 8.023949633208115e-05, "loss": 1.2193, "step": 10078 }, { "epoch": 0.6007867445464299, "grad_norm": 3.1378865242004395, "learning_rate": 8.023196443237235e-05, "loss": 1.3667, "step": 10080 }, { "epoch": 0.6009059482655859, "grad_norm": 3.2106902599334717, "learning_rate": 8.022443145116276e-05, "loss": 1.6287, "step": 10082 }, { "epoch": 0.6010251519847419, "grad_norm": 3.2189738750457764, "learning_rate": 8.021689738872185e-05, "loss": 1.5133, "step": 10084 }, { "epoch": 0.601144355703898, "grad_norm": 2.862269878387451, "learning_rate": 8.020936224531911e-05, "loss": 1.2865, "step": 10086 }, { "epoch": 0.601263559423054, "grad_norm": 3.121257781982422, "learning_rate": 8.020182602122417e-05, "loss": 1.5117, "step": 10088 }, { "epoch": 0.6013827631422101, "grad_norm": 3.1170434951782227, "learning_rate": 8.019428871670656e-05, "loss": 1.4321, "step": 10090 }, { "epoch": 0.601501966861366, "grad_norm": 2.866708278656006, "learning_rate": 8.01867503320359e-05, "loss": 1.3876, "step": 10092 }, { "epoch": 0.6016211705805221, "grad_norm": 2.816432237625122, "learning_rate": 8.017921086748194e-05, "loss": 1.3516, "step": 10094 }, { "epoch": 0.6017403742996782, "grad_norm": 3.1286559104919434, "learning_rate": 8.017167032331434e-05, "loss": 1.4043, "step": 10096 }, { "epoch": 0.6018595780188342, "grad_norm": 3.1975152492523193, "learning_rate": 8.016412869980285e-05, "loss": 1.3886, "step": 10098 }, { "epoch": 0.6019787817379902, "grad_norm": 2.9634666442871094, "learning_rate": 8.015658599721727e-05, "loss": 1.2944, "step": 10100 }, { "epoch": 0.6020979854571462, "grad_norm": 3.06158185005188, "learning_rate": 8.014904221582744e-05, "loss": 1.4636, "step": 10102 }, { "epoch": 0.6022171891763023, "grad_norm": 3.4345171451568604, "learning_rate": 8.014149735590321e-05, "loss": 1.3998, "step": 10104 }, { "epoch": 0.6023363928954584, "grad_norm": 3.0877645015716553, "learning_rate": 8.013395141771451e-05, "loss": 1.2841, "step": 10106 }, { "epoch": 0.6024555966146143, "grad_norm": 3.5091400146484375, "learning_rate": 8.012640440153124e-05, "loss": 1.5655, "step": 10108 }, { "epoch": 0.6025748003337704, "grad_norm": 3.277761936187744, "learning_rate": 8.011885630762341e-05, "loss": 1.3057, "step": 10110 }, { "epoch": 0.6026940040529265, "grad_norm": 3.350212812423706, "learning_rate": 8.011130713626105e-05, "loss": 1.434, "step": 10112 }, { "epoch": 0.6028132077720825, "grad_norm": 3.152190685272217, "learning_rate": 8.010375688771421e-05, "loss": 1.4455, "step": 10114 }, { "epoch": 0.6029324114912386, "grad_norm": 3.0002641677856445, "learning_rate": 8.009620556225298e-05, "loss": 1.4837, "step": 10116 }, { "epoch": 0.6030516152103945, "grad_norm": 3.1524977684020996, "learning_rate": 8.00886531601475e-05, "loss": 1.2298, "step": 10118 }, { "epoch": 0.6031708189295506, "grad_norm": 3.2043581008911133, "learning_rate": 8.008109968166797e-05, "loss": 1.3318, "step": 10120 }, { "epoch": 0.6032900226487067, "grad_norm": 3.101088047027588, "learning_rate": 8.007354512708458e-05, "loss": 1.4473, "step": 10122 }, { "epoch": 0.6034092263678626, "grad_norm": 3.0159571170806885, "learning_rate": 8.006598949666756e-05, "loss": 1.4758, "step": 10124 }, { "epoch": 0.6035284300870187, "grad_norm": 3.1913955211639404, "learning_rate": 8.005843279068724e-05, "loss": 1.3895, "step": 10126 }, { "epoch": 0.6036476338061747, "grad_norm": 2.9064886569976807, "learning_rate": 8.005087500941395e-05, "loss": 1.3392, "step": 10128 }, { "epoch": 0.6037668375253308, "grad_norm": 3.045100450515747, "learning_rate": 8.004331615311801e-05, "loss": 1.4455, "step": 10130 }, { "epoch": 0.6038860412444869, "grad_norm": 3.0111372470855713, "learning_rate": 8.00357562220699e-05, "loss": 1.3726, "step": 10132 }, { "epoch": 0.6040052449636428, "grad_norm": 3.055333137512207, "learning_rate": 8.002819521654e-05, "loss": 1.3808, "step": 10134 }, { "epoch": 0.6041244486827989, "grad_norm": 3.2806997299194336, "learning_rate": 8.00206331367988e-05, "loss": 1.3287, "step": 10136 }, { "epoch": 0.604243652401955, "grad_norm": 3.1930665969848633, "learning_rate": 8.001306998311686e-05, "loss": 1.4032, "step": 10138 }, { "epoch": 0.604362856121111, "grad_norm": 2.774416208267212, "learning_rate": 8.00055057557647e-05, "loss": 1.2657, "step": 10140 }, { "epoch": 0.604482059840267, "grad_norm": 3.138158082962036, "learning_rate": 7.999794045501296e-05, "loss": 1.3527, "step": 10142 }, { "epoch": 0.604601263559423, "grad_norm": 3.150834083557129, "learning_rate": 7.999037408113225e-05, "loss": 1.3904, "step": 10144 }, { "epoch": 0.6047204672785791, "grad_norm": 3.184615135192871, "learning_rate": 7.998280663439324e-05, "loss": 1.3646, "step": 10146 }, { "epoch": 0.6048396709977352, "grad_norm": 3.2387149333953857, "learning_rate": 7.997523811506666e-05, "loss": 1.3966, "step": 10148 }, { "epoch": 0.6049588747168911, "grad_norm": 2.613283157348633, "learning_rate": 7.996766852342325e-05, "loss": 1.3079, "step": 10150 }, { "epoch": 0.6050780784360472, "grad_norm": 3.5935580730438232, "learning_rate": 7.99600978597338e-05, "loss": 1.2767, "step": 10152 }, { "epoch": 0.6051972821552033, "grad_norm": 2.715019702911377, "learning_rate": 7.995252612426915e-05, "loss": 1.38, "step": 10154 }, { "epoch": 0.6053164858743593, "grad_norm": 3.271435022354126, "learning_rate": 7.994495331730015e-05, "loss": 1.4202, "step": 10156 }, { "epoch": 0.6054356895935153, "grad_norm": 3.048464298248291, "learning_rate": 7.993737943909771e-05, "loss": 1.2429, "step": 10158 }, { "epoch": 0.6055548933126713, "grad_norm": 3.1399691104888916, "learning_rate": 7.992980448993277e-05, "loss": 1.2156, "step": 10160 }, { "epoch": 0.6056740970318274, "grad_norm": 2.717289924621582, "learning_rate": 7.992222847007635e-05, "loss": 1.2318, "step": 10162 }, { "epoch": 0.6057933007509835, "grad_norm": 2.901240348815918, "learning_rate": 7.991465137979943e-05, "loss": 1.266, "step": 10164 }, { "epoch": 0.6059125044701394, "grad_norm": 3.094372510910034, "learning_rate": 7.990707321937308e-05, "loss": 1.3351, "step": 10166 }, { "epoch": 0.6060317081892955, "grad_norm": 3.1896400451660156, "learning_rate": 7.989949398906838e-05, "loss": 1.4235, "step": 10168 }, { "epoch": 0.6061509119084515, "grad_norm": 3.1948323249816895, "learning_rate": 7.989191368915648e-05, "loss": 1.4748, "step": 10170 }, { "epoch": 0.6062701156276076, "grad_norm": 3.040766477584839, "learning_rate": 7.988433231990857e-05, "loss": 1.358, "step": 10172 }, { "epoch": 0.6063893193467637, "grad_norm": 3.154625654220581, "learning_rate": 7.987674988159583e-05, "loss": 1.3469, "step": 10174 }, { "epoch": 0.6065085230659196, "grad_norm": 3.3262598514556885, "learning_rate": 7.986916637448952e-05, "loss": 1.4849, "step": 10176 }, { "epoch": 0.6066277267850757, "grad_norm": 3.3176703453063965, "learning_rate": 7.986158179886095e-05, "loss": 1.4291, "step": 10178 }, { "epoch": 0.6067469305042318, "grad_norm": 3.166140556335449, "learning_rate": 7.985399615498143e-05, "loss": 1.4463, "step": 10180 }, { "epoch": 0.6068661342233878, "grad_norm": 2.882863759994507, "learning_rate": 7.98464094431223e-05, "loss": 1.33, "step": 10182 }, { "epoch": 0.6069853379425438, "grad_norm": 2.9582712650299072, "learning_rate": 7.9838821663555e-05, "loss": 1.3745, "step": 10184 }, { "epoch": 0.6071045416616998, "grad_norm": 3.320333242416382, "learning_rate": 7.983123281655097e-05, "loss": 1.3197, "step": 10186 }, { "epoch": 0.6072237453808559, "grad_norm": 2.9879071712493896, "learning_rate": 7.982364290238165e-05, "loss": 1.2589, "step": 10188 }, { "epoch": 0.607342949100012, "grad_norm": 3.246664047241211, "learning_rate": 7.98160519213186e-05, "loss": 1.5147, "step": 10190 }, { "epoch": 0.6074621528191679, "grad_norm": 3.22621750831604, "learning_rate": 7.980845987363335e-05, "loss": 1.3802, "step": 10192 }, { "epoch": 0.607581356538324, "grad_norm": 2.9884068965911865, "learning_rate": 7.98008667595975e-05, "loss": 1.321, "step": 10194 }, { "epoch": 0.60770056025748, "grad_norm": 3.2178401947021484, "learning_rate": 7.979327257948271e-05, "loss": 1.6148, "step": 10196 }, { "epoch": 0.6078197639766361, "grad_norm": 2.903284788131714, "learning_rate": 7.97856773335606e-05, "loss": 1.3752, "step": 10198 }, { "epoch": 0.6079389676957921, "grad_norm": 3.193800926208496, "learning_rate": 7.97780810221029e-05, "loss": 1.3313, "step": 10200 }, { "epoch": 0.6080581714149481, "grad_norm": 3.153440475463867, "learning_rate": 7.977048364538136e-05, "loss": 1.5876, "step": 10202 }, { "epoch": 0.6081773751341042, "grad_norm": 2.691352128982544, "learning_rate": 7.976288520366774e-05, "loss": 1.2977, "step": 10204 }, { "epoch": 0.6082965788532603, "grad_norm": 2.8689682483673096, "learning_rate": 7.975528569723392e-05, "loss": 1.5016, "step": 10206 }, { "epoch": 0.6084157825724162, "grad_norm": 3.3539879322052, "learning_rate": 7.974768512635171e-05, "loss": 1.4894, "step": 10208 }, { "epoch": 0.6085349862915723, "grad_norm": 2.944754123687744, "learning_rate": 7.974008349129303e-05, "loss": 1.3333, "step": 10210 }, { "epoch": 0.6086541900107283, "grad_norm": 3.2144923210144043, "learning_rate": 7.97324807923298e-05, "loss": 1.2342, "step": 10212 }, { "epoch": 0.6087733937298844, "grad_norm": 3.1018567085266113, "learning_rate": 7.972487702973399e-05, "loss": 1.3318, "step": 10214 }, { "epoch": 0.6088925974490405, "grad_norm": 3.06213116645813, "learning_rate": 7.971727220377765e-05, "loss": 1.4924, "step": 10216 }, { "epoch": 0.6090118011681964, "grad_norm": 3.4277164936065674, "learning_rate": 7.97096663147328e-05, "loss": 1.5091, "step": 10218 }, { "epoch": 0.6091310048873525, "grad_norm": 2.7790603637695312, "learning_rate": 7.970205936287151e-05, "loss": 1.3624, "step": 10220 }, { "epoch": 0.6092502086065085, "grad_norm": 2.969111919403076, "learning_rate": 7.969445134846597e-05, "loss": 1.2832, "step": 10222 }, { "epoch": 0.6093694123256646, "grad_norm": 3.3259637355804443, "learning_rate": 7.96868422717883e-05, "loss": 1.4108, "step": 10224 }, { "epoch": 0.6094886160448206, "grad_norm": 3.042038679122925, "learning_rate": 7.967923213311069e-05, "loss": 1.4911, "step": 10226 }, { "epoch": 0.6096078197639766, "grad_norm": 2.7651045322418213, "learning_rate": 7.967162093270542e-05, "loss": 1.451, "step": 10228 }, { "epoch": 0.6097270234831327, "grad_norm": 3.268165349960327, "learning_rate": 7.966400867084474e-05, "loss": 1.3879, "step": 10230 }, { "epoch": 0.6098462272022888, "grad_norm": 3.6113123893737793, "learning_rate": 7.965639534780096e-05, "loss": 1.485, "step": 10232 }, { "epoch": 0.6099654309214447, "grad_norm": 2.790236473083496, "learning_rate": 7.964878096384647e-05, "loss": 1.3292, "step": 10234 }, { "epoch": 0.6100846346406008, "grad_norm": 3.0286898612976074, "learning_rate": 7.964116551925365e-05, "loss": 1.2884, "step": 10236 }, { "epoch": 0.6102038383597568, "grad_norm": 3.177074909210205, "learning_rate": 7.963354901429491e-05, "loss": 1.235, "step": 10238 }, { "epoch": 0.6103230420789129, "grad_norm": 3.150453567504883, "learning_rate": 7.962593144924273e-05, "loss": 1.4939, "step": 10240 }, { "epoch": 0.6104422457980689, "grad_norm": 3.610698938369751, "learning_rate": 7.961831282436962e-05, "loss": 1.438, "step": 10242 }, { "epoch": 0.6105614495172249, "grad_norm": 3.191969394683838, "learning_rate": 7.961069313994814e-05, "loss": 1.3534, "step": 10244 }, { "epoch": 0.610680653236381, "grad_norm": 2.910949945449829, "learning_rate": 7.960307239625082e-05, "loss": 1.412, "step": 10246 }, { "epoch": 0.6107998569555371, "grad_norm": 3.299877166748047, "learning_rate": 7.959545059355033e-05, "loss": 1.3866, "step": 10248 }, { "epoch": 0.610919060674693, "grad_norm": 3.221773147583008, "learning_rate": 7.958782773211932e-05, "loss": 1.6243, "step": 10250 }, { "epoch": 0.6110382643938491, "grad_norm": 2.5875308513641357, "learning_rate": 7.958020381223047e-05, "loss": 1.3449, "step": 10252 }, { "epoch": 0.6111574681130051, "grad_norm": 3.2585160732269287, "learning_rate": 7.957257883415653e-05, "loss": 1.4235, "step": 10254 }, { "epoch": 0.6112766718321612, "grad_norm": 2.828378915786743, "learning_rate": 7.956495279817026e-05, "loss": 1.2666, "step": 10256 }, { "epoch": 0.6113958755513172, "grad_norm": 3.2228994369506836, "learning_rate": 7.955732570454447e-05, "loss": 1.4501, "step": 10258 }, { "epoch": 0.6115150792704732, "grad_norm": 3.1065523624420166, "learning_rate": 7.954969755355201e-05, "loss": 1.2767, "step": 10260 }, { "epoch": 0.6116342829896293, "grad_norm": 3.0114195346832275, "learning_rate": 7.954206834546577e-05, "loss": 1.4512, "step": 10262 }, { "epoch": 0.6117534867087853, "grad_norm": 3.3219351768493652, "learning_rate": 7.953443808055866e-05, "loss": 1.4799, "step": 10264 }, { "epoch": 0.6118726904279413, "grad_norm": 3.157235860824585, "learning_rate": 7.952680675910365e-05, "loss": 1.359, "step": 10266 }, { "epoch": 0.6119918941470974, "grad_norm": 3.2445220947265625, "learning_rate": 7.951917438137376e-05, "loss": 1.3454, "step": 10268 }, { "epoch": 0.6121110978662534, "grad_norm": 2.9992682933807373, "learning_rate": 7.951154094764198e-05, "loss": 1.3158, "step": 10270 }, { "epoch": 0.6122303015854095, "grad_norm": 3.142300844192505, "learning_rate": 7.950390645818143e-05, "loss": 1.3647, "step": 10272 }, { "epoch": 0.6123495053045656, "grad_norm": 2.8211662769317627, "learning_rate": 7.949627091326517e-05, "loss": 1.4106, "step": 10274 }, { "epoch": 0.6124687090237215, "grad_norm": 3.3969061374664307, "learning_rate": 7.948863431316639e-05, "loss": 1.6194, "step": 10276 }, { "epoch": 0.6125879127428776, "grad_norm": 2.4709575176239014, "learning_rate": 7.948099665815827e-05, "loss": 1.3598, "step": 10278 }, { "epoch": 0.6127071164620336, "grad_norm": 3.3662562370300293, "learning_rate": 7.947335794851403e-05, "loss": 1.5704, "step": 10280 }, { "epoch": 0.6128263201811897, "grad_norm": 3.3012101650238037, "learning_rate": 7.946571818450693e-05, "loss": 1.4202, "step": 10282 }, { "epoch": 0.6129455239003457, "grad_norm": 3.141461133956909, "learning_rate": 7.945807736641027e-05, "loss": 1.2798, "step": 10284 }, { "epoch": 0.6130647276195017, "grad_norm": 3.434190273284912, "learning_rate": 7.94504354944974e-05, "loss": 1.4459, "step": 10286 }, { "epoch": 0.6131839313386578, "grad_norm": 2.6452016830444336, "learning_rate": 7.94427925690417e-05, "loss": 1.276, "step": 10288 }, { "epoch": 0.6133031350578138, "grad_norm": 2.7890465259552, "learning_rate": 7.943514859031655e-05, "loss": 1.384, "step": 10290 }, { "epoch": 0.6134223387769698, "grad_norm": 2.8560285568237305, "learning_rate": 7.942750355859545e-05, "loss": 1.328, "step": 10292 }, { "epoch": 0.6135415424961259, "grad_norm": 2.7466840744018555, "learning_rate": 7.941985747415184e-05, "loss": 1.48, "step": 10294 }, { "epoch": 0.6136607462152819, "grad_norm": 3.230708360671997, "learning_rate": 7.941221033725928e-05, "loss": 1.3199, "step": 10296 }, { "epoch": 0.613779949934438, "grad_norm": 3.0988550186157227, "learning_rate": 7.940456214819132e-05, "loss": 1.3575, "step": 10298 }, { "epoch": 0.613899153653594, "grad_norm": 2.9452431201934814, "learning_rate": 7.939691290722158e-05, "loss": 1.3744, "step": 10300 }, { "epoch": 0.61401835737275, "grad_norm": 2.9550304412841797, "learning_rate": 7.938926261462366e-05, "loss": 1.3107, "step": 10302 }, { "epoch": 0.6141375610919061, "grad_norm": 2.6341588497161865, "learning_rate": 7.938161127067128e-05, "loss": 1.3231, "step": 10304 }, { "epoch": 0.6142567648110621, "grad_norm": 3.3287439346313477, "learning_rate": 7.937395887563813e-05, "loss": 1.5049, "step": 10306 }, { "epoch": 0.6143759685302181, "grad_norm": 3.141537666320801, "learning_rate": 7.936630542979797e-05, "loss": 1.3807, "step": 10308 }, { "epoch": 0.6144951722493742, "grad_norm": 3.024587869644165, "learning_rate": 7.935865093342459e-05, "loss": 1.4339, "step": 10310 }, { "epoch": 0.6146143759685302, "grad_norm": 3.6708292961120605, "learning_rate": 7.935099538679181e-05, "loss": 1.4679, "step": 10312 }, { "epoch": 0.6147335796876863, "grad_norm": 2.8555188179016113, "learning_rate": 7.93433387901735e-05, "loss": 1.3333, "step": 10314 }, { "epoch": 0.6148527834068422, "grad_norm": 3.5215399265289307, "learning_rate": 7.933568114384357e-05, "loss": 1.4324, "step": 10316 }, { "epoch": 0.6149719871259983, "grad_norm": 3.2243168354034424, "learning_rate": 7.932802244807596e-05, "loss": 1.4879, "step": 10318 }, { "epoch": 0.6150911908451544, "grad_norm": 3.0148510932922363, "learning_rate": 7.932036270314465e-05, "loss": 1.5786, "step": 10320 }, { "epoch": 0.6152103945643104, "grad_norm": 3.009927749633789, "learning_rate": 7.931270190932362e-05, "loss": 1.4426, "step": 10322 }, { "epoch": 0.6153295982834665, "grad_norm": 2.966144561767578, "learning_rate": 7.930504006688697e-05, "loss": 1.4884, "step": 10324 }, { "epoch": 0.6154488020026225, "grad_norm": 3.195944309234619, "learning_rate": 7.929737717610877e-05, "loss": 1.3999, "step": 10326 }, { "epoch": 0.6155680057217785, "grad_norm": 3.1256539821624756, "learning_rate": 7.928971323726315e-05, "loss": 1.4793, "step": 10328 }, { "epoch": 0.6156872094409346, "grad_norm": 3.7225265502929688, "learning_rate": 7.928204825062426e-05, "loss": 1.2845, "step": 10330 }, { "epoch": 0.6158064131600905, "grad_norm": 3.120680809020996, "learning_rate": 7.927438221646632e-05, "loss": 1.4656, "step": 10332 }, { "epoch": 0.6159256168792466, "grad_norm": 3.053776502609253, "learning_rate": 7.926671513506358e-05, "loss": 1.3547, "step": 10334 }, { "epoch": 0.6160448205984027, "grad_norm": 3.0954437255859375, "learning_rate": 7.92590470066903e-05, "loss": 1.4016, "step": 10336 }, { "epoch": 0.6161640243175587, "grad_norm": 2.8141021728515625, "learning_rate": 7.925137783162078e-05, "loss": 1.3059, "step": 10338 }, { "epoch": 0.6162832280367148, "grad_norm": 3.0158703327178955, "learning_rate": 7.924370761012943e-05, "loss": 1.3156, "step": 10340 }, { "epoch": 0.6164024317558707, "grad_norm": 2.9699671268463135, "learning_rate": 7.923603634249057e-05, "loss": 1.2827, "step": 10342 }, { "epoch": 0.6165216354750268, "grad_norm": 3.2444076538085938, "learning_rate": 7.922836402897868e-05, "loss": 1.2435, "step": 10344 }, { "epoch": 0.6166408391941829, "grad_norm": 2.9285058975219727, "learning_rate": 7.92206906698682e-05, "loss": 1.3355, "step": 10346 }, { "epoch": 0.6167600429133389, "grad_norm": 2.802932024002075, "learning_rate": 7.921301626543362e-05, "loss": 1.3669, "step": 10348 }, { "epoch": 0.6168792466324949, "grad_norm": 3.3474671840667725, "learning_rate": 7.920534081594952e-05, "loss": 1.358, "step": 10350 }, { "epoch": 0.616998450351651, "grad_norm": 3.2892544269561768, "learning_rate": 7.919766432169045e-05, "loss": 1.4081, "step": 10352 }, { "epoch": 0.617117654070807, "grad_norm": 3.2523441314697266, "learning_rate": 7.918998678293102e-05, "loss": 1.3621, "step": 10354 }, { "epoch": 0.6172368577899631, "grad_norm": 3.0477492809295654, "learning_rate": 7.918230819994589e-05, "loss": 1.3675, "step": 10356 }, { "epoch": 0.617356061509119, "grad_norm": 3.311965227127075, "learning_rate": 7.917462857300976e-05, "loss": 1.4173, "step": 10358 }, { "epoch": 0.6174752652282751, "grad_norm": 2.8687360286712646, "learning_rate": 7.916694790239734e-05, "loss": 1.4114, "step": 10360 }, { "epoch": 0.6175944689474312, "grad_norm": 2.757845163345337, "learning_rate": 7.915926618838338e-05, "loss": 1.4134, "step": 10362 }, { "epoch": 0.6177136726665872, "grad_norm": 2.943242073059082, "learning_rate": 7.915158343124273e-05, "loss": 1.2632, "step": 10364 }, { "epoch": 0.6178328763857432, "grad_norm": 3.1504673957824707, "learning_rate": 7.914389963125019e-05, "loss": 1.4157, "step": 10366 }, { "epoch": 0.6179520801048993, "grad_norm": 2.9947092533111572, "learning_rate": 7.913621478868064e-05, "loss": 1.2721, "step": 10368 }, { "epoch": 0.6180712838240553, "grad_norm": 3.2420754432678223, "learning_rate": 7.912852890380899e-05, "loss": 1.3914, "step": 10370 }, { "epoch": 0.6181904875432114, "grad_norm": 3.358328342437744, "learning_rate": 7.912084197691023e-05, "loss": 1.3796, "step": 10372 }, { "epoch": 0.6183096912623673, "grad_norm": 3.145965814590454, "learning_rate": 7.91131540082593e-05, "loss": 1.5259, "step": 10374 }, { "epoch": 0.6184288949815234, "grad_norm": 3.0187628269195557, "learning_rate": 7.910546499813124e-05, "loss": 1.234, "step": 10376 }, { "epoch": 0.6185480987006795, "grad_norm": 3.344261884689331, "learning_rate": 7.909777494680111e-05, "loss": 1.463, "step": 10378 }, { "epoch": 0.6186673024198355, "grad_norm": 3.2599129676818848, "learning_rate": 7.909008385454401e-05, "loss": 1.3032, "step": 10380 }, { "epoch": 0.6187865061389916, "grad_norm": 3.0185067653656006, "learning_rate": 7.90823917216351e-05, "loss": 1.4195, "step": 10382 }, { "epoch": 0.6189057098581475, "grad_norm": 2.9460599422454834, "learning_rate": 7.907469854834952e-05, "loss": 1.4613, "step": 10384 }, { "epoch": 0.6190249135773036, "grad_norm": 2.889348030090332, "learning_rate": 7.906700433496249e-05, "loss": 1.2721, "step": 10386 }, { "epoch": 0.6191441172964597, "grad_norm": 3.0630714893341064, "learning_rate": 7.905930908174928e-05, "loss": 1.3047, "step": 10388 }, { "epoch": 0.6192633210156157, "grad_norm": 3.0775089263916016, "learning_rate": 7.905161278898514e-05, "loss": 1.3587, "step": 10390 }, { "epoch": 0.6193825247347717, "grad_norm": 2.7180371284484863, "learning_rate": 7.904391545694543e-05, "loss": 1.3788, "step": 10392 }, { "epoch": 0.6195017284539278, "grad_norm": 3.1437506675720215, "learning_rate": 7.903621708590548e-05, "loss": 1.4121, "step": 10394 }, { "epoch": 0.6196209321730838, "grad_norm": 2.9821691513061523, "learning_rate": 7.902851767614069e-05, "loss": 1.3455, "step": 10396 }, { "epoch": 0.6197401358922399, "grad_norm": 3.0585219860076904, "learning_rate": 7.902081722792651e-05, "loss": 1.4518, "step": 10398 }, { "epoch": 0.6198593396113958, "grad_norm": 3.424267292022705, "learning_rate": 7.901311574153841e-05, "loss": 1.3096, "step": 10400 }, { "epoch": 0.6199785433305519, "grad_norm": 3.0986850261688232, "learning_rate": 7.900541321725187e-05, "loss": 1.3743, "step": 10402 }, { "epoch": 0.620097747049708, "grad_norm": 3.1641905307769775, "learning_rate": 7.89977096553425e-05, "loss": 1.3544, "step": 10404 }, { "epoch": 0.620216950768864, "grad_norm": 3.219461679458618, "learning_rate": 7.899000505608583e-05, "loss": 1.4218, "step": 10406 }, { "epoch": 0.62033615448802, "grad_norm": 3.000714063644409, "learning_rate": 7.898229941975747e-05, "loss": 1.2082, "step": 10408 }, { "epoch": 0.620455358207176, "grad_norm": 3.04015851020813, "learning_rate": 7.89745927466331e-05, "loss": 1.3642, "step": 10410 }, { "epoch": 0.6205745619263321, "grad_norm": 3.3974082469940186, "learning_rate": 7.896688503698841e-05, "loss": 1.2641, "step": 10412 }, { "epoch": 0.6206937656454882, "grad_norm": 2.9095797538757324, "learning_rate": 7.895917629109915e-05, "loss": 1.377, "step": 10414 }, { "epoch": 0.6208129693646441, "grad_norm": 3.1768815517425537, "learning_rate": 7.895146650924105e-05, "loss": 1.2879, "step": 10416 }, { "epoch": 0.6209321730838002, "grad_norm": 2.7980239391326904, "learning_rate": 7.894375569168996e-05, "loss": 1.363, "step": 10418 }, { "epoch": 0.6210513768029563, "grad_norm": 3.1265323162078857, "learning_rate": 7.893604383872169e-05, "loss": 1.4624, "step": 10420 }, { "epoch": 0.6211705805221123, "grad_norm": 3.389273166656494, "learning_rate": 7.892833095061214e-05, "loss": 1.4491, "step": 10422 }, { "epoch": 0.6212897842412684, "grad_norm": 2.6013307571411133, "learning_rate": 7.89206170276372e-05, "loss": 1.3785, "step": 10424 }, { "epoch": 0.6214089879604243, "grad_norm": 2.8677635192871094, "learning_rate": 7.891290207007283e-05, "loss": 1.3853, "step": 10426 }, { "epoch": 0.6215281916795804, "grad_norm": 2.853498935699463, "learning_rate": 7.890518607819504e-05, "loss": 1.3446, "step": 10428 }, { "epoch": 0.6216473953987365, "grad_norm": 2.9534642696380615, "learning_rate": 7.889746905227983e-05, "loss": 1.4198, "step": 10430 }, { "epoch": 0.6217665991178924, "grad_norm": 3.0092968940734863, "learning_rate": 7.888975099260332e-05, "loss": 1.3575, "step": 10432 }, { "epoch": 0.6218858028370485, "grad_norm": 3.24371337890625, "learning_rate": 7.888203189944154e-05, "loss": 1.3927, "step": 10434 }, { "epoch": 0.6220050065562045, "grad_norm": 2.862438201904297, "learning_rate": 7.887431177307068e-05, "loss": 1.2936, "step": 10436 }, { "epoch": 0.6221242102753606, "grad_norm": 3.150186777114868, "learning_rate": 7.886659061376686e-05, "loss": 1.4681, "step": 10438 }, { "epoch": 0.6222434139945167, "grad_norm": 2.9257895946502686, "learning_rate": 7.885886842180636e-05, "loss": 1.3039, "step": 10440 }, { "epoch": 0.6223626177136726, "grad_norm": 3.167534589767456, "learning_rate": 7.885114519746539e-05, "loss": 1.4727, "step": 10442 }, { "epoch": 0.6224818214328287, "grad_norm": 3.3950843811035156, "learning_rate": 7.884342094102024e-05, "loss": 1.3671, "step": 10444 }, { "epoch": 0.6226010251519848, "grad_norm": 3.063199281692505, "learning_rate": 7.883569565274723e-05, "loss": 1.3374, "step": 10446 }, { "epoch": 0.6227202288711408, "grad_norm": 3.318871259689331, "learning_rate": 7.882796933292274e-05, "loss": 1.3204, "step": 10448 }, { "epoch": 0.6228394325902968, "grad_norm": 2.985567331314087, "learning_rate": 7.882024198182314e-05, "loss": 1.4648, "step": 10450 }, { "epoch": 0.6229586363094528, "grad_norm": 3.155559539794922, "learning_rate": 7.881251359972487e-05, "loss": 1.3251, "step": 10452 }, { "epoch": 0.6230778400286089, "grad_norm": 2.7275946140289307, "learning_rate": 7.880478418690443e-05, "loss": 1.2312, "step": 10454 }, { "epoch": 0.623197043747765, "grad_norm": 3.006558418273926, "learning_rate": 7.879705374363831e-05, "loss": 1.2734, "step": 10456 }, { "epoch": 0.6233162474669209, "grad_norm": 2.7995359897613525, "learning_rate": 7.878932227020303e-05, "loss": 1.3627, "step": 10458 }, { "epoch": 0.623435451186077, "grad_norm": 3.0198705196380615, "learning_rate": 7.878158976687519e-05, "loss": 1.302, "step": 10460 }, { "epoch": 0.6235546549052331, "grad_norm": 2.6362316608428955, "learning_rate": 7.877385623393141e-05, "loss": 1.2308, "step": 10462 }, { "epoch": 0.6236738586243891, "grad_norm": 3.0962038040161133, "learning_rate": 7.876612167164835e-05, "loss": 1.522, "step": 10464 }, { "epoch": 0.6237930623435451, "grad_norm": 2.7445178031921387, "learning_rate": 7.87583860803027e-05, "loss": 1.2467, "step": 10466 }, { "epoch": 0.6239122660627011, "grad_norm": 3.010044813156128, "learning_rate": 7.875064946017117e-05, "loss": 1.3898, "step": 10468 }, { "epoch": 0.6240314697818572, "grad_norm": 3.163115978240967, "learning_rate": 7.874291181153055e-05, "loss": 1.372, "step": 10470 }, { "epoch": 0.6241506735010133, "grad_norm": 3.2118096351623535, "learning_rate": 7.873517313465764e-05, "loss": 1.4927, "step": 10472 }, { "epoch": 0.6242698772201692, "grad_norm": 3.093336343765259, "learning_rate": 7.872743342982924e-05, "loss": 1.3131, "step": 10474 }, { "epoch": 0.6243890809393253, "grad_norm": 3.0341413021087646, "learning_rate": 7.871969269732229e-05, "loss": 1.3378, "step": 10476 }, { "epoch": 0.6245082846584813, "grad_norm": 3.1220157146453857, "learning_rate": 7.871195093741365e-05, "loss": 1.4024, "step": 10478 }, { "epoch": 0.6246274883776374, "grad_norm": 2.869910955429077, "learning_rate": 7.87042081503803e-05, "loss": 1.3651, "step": 10480 }, { "epoch": 0.6247466920967935, "grad_norm": 2.929577589035034, "learning_rate": 7.86964643364992e-05, "loss": 1.2764, "step": 10482 }, { "epoch": 0.6248658958159494, "grad_norm": 3.087352991104126, "learning_rate": 7.868871949604739e-05, "loss": 1.4636, "step": 10484 }, { "epoch": 0.6249850995351055, "grad_norm": 3.1168243885040283, "learning_rate": 7.868097362930194e-05, "loss": 1.4162, "step": 10486 }, { "epoch": 0.6251043032542616, "grad_norm": 3.0617494583129883, "learning_rate": 7.867322673653991e-05, "loss": 1.4099, "step": 10488 }, { "epoch": 0.6252235069734176, "grad_norm": 3.023580312728882, "learning_rate": 7.866547881803847e-05, "loss": 1.4982, "step": 10490 }, { "epoch": 0.6253427106925736, "grad_norm": 2.977571964263916, "learning_rate": 7.865772987407478e-05, "loss": 1.4702, "step": 10492 }, { "epoch": 0.6254619144117296, "grad_norm": 3.262263059616089, "learning_rate": 7.864997990492604e-05, "loss": 1.2773, "step": 10494 }, { "epoch": 0.6255811181308857, "grad_norm": 3.147216796875, "learning_rate": 7.864222891086948e-05, "loss": 1.3614, "step": 10496 }, { "epoch": 0.6257003218500418, "grad_norm": 3.069394826889038, "learning_rate": 7.863447689218241e-05, "loss": 1.3775, "step": 10498 }, { "epoch": 0.6258195255691977, "grad_norm": 3.1725025177001953, "learning_rate": 7.862672384914211e-05, "loss": 1.305, "step": 10500 }, { "epoch": 0.6259387292883538, "grad_norm": 3.0853214263916016, "learning_rate": 7.861896978202596e-05, "loss": 1.4118, "step": 10502 }, { "epoch": 0.6260579330075098, "grad_norm": 2.995471954345703, "learning_rate": 7.861121469111135e-05, "loss": 1.3355, "step": 10504 }, { "epoch": 0.6261771367266659, "grad_norm": 2.919322967529297, "learning_rate": 7.86034585766757e-05, "loss": 1.3147, "step": 10506 }, { "epoch": 0.6262963404458219, "grad_norm": 3.0831496715545654, "learning_rate": 7.859570143899646e-05, "loss": 1.5018, "step": 10508 }, { "epoch": 0.6264155441649779, "grad_norm": 2.802781105041504, "learning_rate": 7.858794327835115e-05, "loss": 1.3142, "step": 10510 }, { "epoch": 0.626534747884134, "grad_norm": 3.067509889602661, "learning_rate": 7.858018409501729e-05, "loss": 1.286, "step": 10512 }, { "epoch": 0.6266539516032901, "grad_norm": 2.814260244369507, "learning_rate": 7.857242388927246e-05, "loss": 1.2096, "step": 10514 }, { "epoch": 0.626773155322446, "grad_norm": 3.1987783908843994, "learning_rate": 7.856466266139427e-05, "loss": 1.3955, "step": 10516 }, { "epoch": 0.6268923590416021, "grad_norm": 3.4163384437561035, "learning_rate": 7.855690041166036e-05, "loss": 1.3246, "step": 10518 }, { "epoch": 0.6270115627607581, "grad_norm": 3.262545347213745, "learning_rate": 7.854913714034842e-05, "loss": 1.7192, "step": 10520 }, { "epoch": 0.6271307664799142, "grad_norm": 3.3753654956817627, "learning_rate": 7.854137284773616e-05, "loss": 1.4318, "step": 10522 }, { "epoch": 0.6272499701990703, "grad_norm": 3.3667731285095215, "learning_rate": 7.853360753410134e-05, "loss": 1.3972, "step": 10524 }, { "epoch": 0.6273691739182262, "grad_norm": 3.1456451416015625, "learning_rate": 7.852584119972178e-05, "loss": 1.3849, "step": 10526 }, { "epoch": 0.6274883776373823, "grad_norm": 2.804746150970459, "learning_rate": 7.851807384487524e-05, "loss": 1.246, "step": 10528 }, { "epoch": 0.6276075813565383, "grad_norm": 3.0537102222442627, "learning_rate": 7.851030546983964e-05, "loss": 1.3456, "step": 10530 }, { "epoch": 0.6277267850756943, "grad_norm": 2.9155383110046387, "learning_rate": 7.850253607489287e-05, "loss": 1.2917, "step": 10532 }, { "epoch": 0.6278459887948504, "grad_norm": 3.1201276779174805, "learning_rate": 7.849476566031286e-05, "loss": 1.3346, "step": 10534 }, { "epoch": 0.6279651925140064, "grad_norm": 3.0306944847106934, "learning_rate": 7.848699422637757e-05, "loss": 1.4677, "step": 10536 }, { "epoch": 0.6280843962331625, "grad_norm": 2.902397871017456, "learning_rate": 7.847922177336506e-05, "loss": 1.3447, "step": 10538 }, { "epoch": 0.6282035999523186, "grad_norm": 3.1312849521636963, "learning_rate": 7.847144830155334e-05, "loss": 1.3684, "step": 10540 }, { "epoch": 0.6283228036714745, "grad_norm": 3.142610549926758, "learning_rate": 7.84636738112205e-05, "loss": 1.4496, "step": 10542 }, { "epoch": 0.6284420073906306, "grad_norm": 2.968919038772583, "learning_rate": 7.845589830264465e-05, "loss": 1.372, "step": 10544 }, { "epoch": 0.6285612111097866, "grad_norm": 2.990802526473999, "learning_rate": 7.844812177610396e-05, "loss": 1.3392, "step": 10546 }, { "epoch": 0.6286804148289427, "grad_norm": 2.9551992416381836, "learning_rate": 7.844034423187663e-05, "loss": 1.4361, "step": 10548 }, { "epoch": 0.6287996185480987, "grad_norm": 2.9495906829833984, "learning_rate": 7.843256567024087e-05, "loss": 1.423, "step": 10550 }, { "epoch": 0.6289188222672547, "grad_norm": 2.8732519149780273, "learning_rate": 7.842478609147495e-05, "loss": 1.4285, "step": 10552 }, { "epoch": 0.6290380259864108, "grad_norm": 2.853835344314575, "learning_rate": 7.841700549585719e-05, "loss": 1.3108, "step": 10554 }, { "epoch": 0.6291572297055669, "grad_norm": 2.94722580909729, "learning_rate": 7.840922388366591e-05, "loss": 1.5037, "step": 10556 }, { "epoch": 0.6292764334247228, "grad_norm": 3.2262017726898193, "learning_rate": 7.840144125517949e-05, "loss": 1.4464, "step": 10558 }, { "epoch": 0.6293956371438789, "grad_norm": 2.806163787841797, "learning_rate": 7.839365761067634e-05, "loss": 1.3196, "step": 10560 }, { "epoch": 0.6295148408630349, "grad_norm": 3.062066078186035, "learning_rate": 7.838587295043491e-05, "loss": 1.4366, "step": 10562 }, { "epoch": 0.629634044582191, "grad_norm": 2.9189865589141846, "learning_rate": 7.837808727473368e-05, "loss": 1.5463, "step": 10564 }, { "epoch": 0.629753248301347, "grad_norm": 3.1702725887298584, "learning_rate": 7.837030058385118e-05, "loss": 1.3989, "step": 10566 }, { "epoch": 0.629872452020503, "grad_norm": 3.363492250442505, "learning_rate": 7.836251287806595e-05, "loss": 1.4039, "step": 10568 }, { "epoch": 0.6299916557396591, "grad_norm": 2.946619749069214, "learning_rate": 7.835472415765661e-05, "loss": 1.3097, "step": 10570 }, { "epoch": 0.6301108594588151, "grad_norm": 3.174436092376709, "learning_rate": 7.834693442290178e-05, "loss": 1.3081, "step": 10572 }, { "epoch": 0.6302300631779711, "grad_norm": 3.2021749019622803, "learning_rate": 7.833914367408009e-05, "loss": 1.3491, "step": 10574 }, { "epoch": 0.6303492668971272, "grad_norm": 3.3198330402374268, "learning_rate": 7.833135191147028e-05, "loss": 1.4733, "step": 10576 }, { "epoch": 0.6304684706162832, "grad_norm": 3.0955193042755127, "learning_rate": 7.832355913535107e-05, "loss": 1.3805, "step": 10578 }, { "epoch": 0.6305876743354393, "grad_norm": 2.831897258758545, "learning_rate": 7.831576534600124e-05, "loss": 1.2827, "step": 10580 }, { "epoch": 0.6307068780545954, "grad_norm": 3.199599266052246, "learning_rate": 7.830797054369963e-05, "loss": 1.3773, "step": 10582 }, { "epoch": 0.6308260817737513, "grad_norm": 3.0557267665863037, "learning_rate": 7.830017472872504e-05, "loss": 1.5126, "step": 10584 }, { "epoch": 0.6309452854929074, "grad_norm": 2.6628010272979736, "learning_rate": 7.829237790135637e-05, "loss": 1.4029, "step": 10586 }, { "epoch": 0.6310644892120634, "grad_norm": 2.8902716636657715, "learning_rate": 7.828458006187254e-05, "loss": 1.2891, "step": 10588 }, { "epoch": 0.6311836929312195, "grad_norm": 3.28741717338562, "learning_rate": 7.827678121055251e-05, "loss": 1.3434, "step": 10590 }, { "epoch": 0.6313028966503755, "grad_norm": 3.1102943420410156, "learning_rate": 7.826898134767527e-05, "loss": 1.4329, "step": 10592 }, { "epoch": 0.6314221003695315, "grad_norm": 2.9140403270721436, "learning_rate": 7.826118047351985e-05, "loss": 1.2608, "step": 10594 }, { "epoch": 0.6315413040886876, "grad_norm": 2.9905588626861572, "learning_rate": 7.82533785883653e-05, "loss": 1.3186, "step": 10596 }, { "epoch": 0.6316605078078436, "grad_norm": 3.0775482654571533, "learning_rate": 7.824557569249073e-05, "loss": 1.4366, "step": 10598 }, { "epoch": 0.6317797115269996, "grad_norm": 2.8186261653900146, "learning_rate": 7.823777178617528e-05, "loss": 1.4717, "step": 10600 }, { "epoch": 0.6318989152461557, "grad_norm": 3.369844913482666, "learning_rate": 7.822996686969813e-05, "loss": 1.3546, "step": 10602 }, { "epoch": 0.6320181189653117, "grad_norm": 3.1029813289642334, "learning_rate": 7.822216094333847e-05, "loss": 1.1911, "step": 10604 }, { "epoch": 0.6321373226844678, "grad_norm": 3.0316054821014404, "learning_rate": 7.821435400737554e-05, "loss": 1.4117, "step": 10606 }, { "epoch": 0.6322565264036238, "grad_norm": 2.6658170223236084, "learning_rate": 7.820654606208864e-05, "loss": 1.2249, "step": 10608 }, { "epoch": 0.6323757301227798, "grad_norm": 2.884178876876831, "learning_rate": 7.819873710775709e-05, "loss": 1.3085, "step": 10610 }, { "epoch": 0.6324949338419359, "grad_norm": 2.8612613677978516, "learning_rate": 7.819092714466021e-05, "loss": 1.257, "step": 10612 }, { "epoch": 0.6326141375610919, "grad_norm": 3.0260753631591797, "learning_rate": 7.818311617307743e-05, "loss": 1.2374, "step": 10614 }, { "epoch": 0.6327333412802479, "grad_norm": 2.645063638687134, "learning_rate": 7.817530419328816e-05, "loss": 1.2579, "step": 10616 }, { "epoch": 0.632852544999404, "grad_norm": 3.031090259552002, "learning_rate": 7.816749120557185e-05, "loss": 1.3071, "step": 10618 }, { "epoch": 0.63297174871856, "grad_norm": 2.9337103366851807, "learning_rate": 7.8159677210208e-05, "loss": 1.3017, "step": 10620 }, { "epoch": 0.6330909524377161, "grad_norm": 2.9738805294036865, "learning_rate": 7.815186220747617e-05, "loss": 1.2796, "step": 10622 }, { "epoch": 0.633210156156872, "grad_norm": 3.25179123878479, "learning_rate": 7.814404619765588e-05, "loss": 1.3892, "step": 10624 }, { "epoch": 0.6333293598760281, "grad_norm": 3.081598997116089, "learning_rate": 7.813622918102679e-05, "loss": 1.4467, "step": 10626 }, { "epoch": 0.6334485635951842, "grad_norm": 3.2068073749542236, "learning_rate": 7.812841115786849e-05, "loss": 1.5233, "step": 10628 }, { "epoch": 0.6335677673143402, "grad_norm": 3.2374351024627686, "learning_rate": 7.81205921284607e-05, "loss": 1.2877, "step": 10630 }, { "epoch": 0.6336869710334962, "grad_norm": 3.3403818607330322, "learning_rate": 7.811277209308313e-05, "loss": 1.4117, "step": 10632 }, { "epoch": 0.6338061747526523, "grad_norm": 3.250915765762329, "learning_rate": 7.810495105201547e-05, "loss": 1.3672, "step": 10634 }, { "epoch": 0.6339253784718083, "grad_norm": 3.1146883964538574, "learning_rate": 7.809712900553758e-05, "loss": 1.4356, "step": 10636 }, { "epoch": 0.6340445821909644, "grad_norm": 3.3531692028045654, "learning_rate": 7.808930595392924e-05, "loss": 1.4283, "step": 10638 }, { "epoch": 0.6341637859101203, "grad_norm": 3.1073782444000244, "learning_rate": 7.808148189747036e-05, "loss": 1.3583, "step": 10640 }, { "epoch": 0.6342829896292764, "grad_norm": 3.1907331943511963, "learning_rate": 7.807365683644074e-05, "loss": 1.406, "step": 10642 }, { "epoch": 0.6344021933484325, "grad_norm": 3.5130929946899414, "learning_rate": 7.80658307711204e-05, "loss": 1.3909, "step": 10644 }, { "epoch": 0.6345213970675885, "grad_norm": 2.990959405899048, "learning_rate": 7.805800370178925e-05, "loss": 1.3631, "step": 10646 }, { "epoch": 0.6346406007867446, "grad_norm": 3.009589910507202, "learning_rate": 7.80501756287273e-05, "loss": 1.3612, "step": 10648 }, { "epoch": 0.6347598045059006, "grad_norm": 3.3463728427886963, "learning_rate": 7.80423465522146e-05, "loss": 1.4247, "step": 10650 }, { "epoch": 0.6348790082250566, "grad_norm": 3.1499338150024414, "learning_rate": 7.803451647253122e-05, "loss": 1.4266, "step": 10652 }, { "epoch": 0.6349982119442127, "grad_norm": 2.9483706951141357, "learning_rate": 7.802668538995727e-05, "loss": 1.4147, "step": 10654 }, { "epoch": 0.6351174156633687, "grad_norm": 3.0838072299957275, "learning_rate": 7.80188533047729e-05, "loss": 1.3703, "step": 10656 }, { "epoch": 0.6352366193825247, "grad_norm": 2.850449562072754, "learning_rate": 7.801102021725827e-05, "loss": 1.3715, "step": 10658 }, { "epoch": 0.6353558231016808, "grad_norm": 3.011746406555176, "learning_rate": 7.800318612769361e-05, "loss": 1.4191, "step": 10660 }, { "epoch": 0.6354750268208368, "grad_norm": 3.0663809776306152, "learning_rate": 7.799535103635918e-05, "loss": 1.407, "step": 10662 }, { "epoch": 0.6355942305399929, "grad_norm": 3.1554720401763916, "learning_rate": 7.798751494353524e-05, "loss": 1.4043, "step": 10664 }, { "epoch": 0.6357134342591488, "grad_norm": 3.384171724319458, "learning_rate": 7.797967784950215e-05, "loss": 1.378, "step": 10666 }, { "epoch": 0.6358326379783049, "grad_norm": 3.167113780975342, "learning_rate": 7.797183975454023e-05, "loss": 1.2109, "step": 10668 }, { "epoch": 0.635951841697461, "grad_norm": 2.87438702583313, "learning_rate": 7.79640006589299e-05, "loss": 1.2426, "step": 10670 }, { "epoch": 0.636071045416617, "grad_norm": 3.1111197471618652, "learning_rate": 7.79561605629516e-05, "loss": 1.367, "step": 10672 }, { "epoch": 0.636190249135773, "grad_norm": 3.317965030670166, "learning_rate": 7.794831946688581e-05, "loss": 1.3958, "step": 10674 }, { "epoch": 0.6363094528549291, "grad_norm": 2.937570333480835, "learning_rate": 7.794047737101297e-05, "loss": 1.324, "step": 10676 }, { "epoch": 0.6364286565740851, "grad_norm": 3.0162007808685303, "learning_rate": 7.793263427561367e-05, "loss": 1.3244, "step": 10678 }, { "epoch": 0.6365478602932412, "grad_norm": 3.313217878341675, "learning_rate": 7.792479018096848e-05, "loss": 1.3555, "step": 10680 }, { "epoch": 0.6366670640123971, "grad_norm": 3.0587942600250244, "learning_rate": 7.791694508735799e-05, "loss": 1.3514, "step": 10682 }, { "epoch": 0.6367862677315532, "grad_norm": 3.294116497039795, "learning_rate": 7.790909899506285e-05, "loss": 1.3595, "step": 10684 }, { "epoch": 0.6369054714507093, "grad_norm": 2.801009178161621, "learning_rate": 7.790125190436377e-05, "loss": 1.3718, "step": 10686 }, { "epoch": 0.6370246751698653, "grad_norm": 3.261120080947876, "learning_rate": 7.789340381554144e-05, "loss": 1.3899, "step": 10688 }, { "epoch": 0.6371438788890214, "grad_norm": 2.8921661376953125, "learning_rate": 7.788555472887662e-05, "loss": 1.2027, "step": 10690 }, { "epoch": 0.6372630826081773, "grad_norm": 3.2371151447296143, "learning_rate": 7.787770464465008e-05, "loss": 1.5201, "step": 10692 }, { "epoch": 0.6373822863273334, "grad_norm": 3.2371268272399902, "learning_rate": 7.786985356314267e-05, "loss": 1.3788, "step": 10694 }, { "epoch": 0.6375014900464895, "grad_norm": 3.399108648300171, "learning_rate": 7.786200148463524e-05, "loss": 1.4287, "step": 10696 }, { "epoch": 0.6376206937656455, "grad_norm": 3.0058701038360596, "learning_rate": 7.785414840940871e-05, "loss": 1.4468, "step": 10698 }, { "epoch": 0.6377398974848015, "grad_norm": 3.051701784133911, "learning_rate": 7.784629433774397e-05, "loss": 1.4395, "step": 10700 }, { "epoch": 0.6378591012039576, "grad_norm": 3.0339853763580322, "learning_rate": 7.783843926992201e-05, "loss": 1.341, "step": 10702 }, { "epoch": 0.6379783049231136, "grad_norm": 2.9352779388427734, "learning_rate": 7.783058320622384e-05, "loss": 1.4689, "step": 10704 }, { "epoch": 0.6380975086422697, "grad_norm": 3.0101053714752197, "learning_rate": 7.78227261469305e-05, "loss": 1.373, "step": 10706 }, { "epoch": 0.6382167123614256, "grad_norm": 3.558260917663574, "learning_rate": 7.781486809232301e-05, "loss": 1.327, "step": 10708 }, { "epoch": 0.6383359160805817, "grad_norm": 2.945641040802002, "learning_rate": 7.780700904268255e-05, "loss": 1.225, "step": 10710 }, { "epoch": 0.6384551197997378, "grad_norm": 3.0865650177001953, "learning_rate": 7.779914899829022e-05, "loss": 1.4553, "step": 10712 }, { "epoch": 0.6385743235188938, "grad_norm": 2.9399070739746094, "learning_rate": 7.779128795942722e-05, "loss": 1.4178, "step": 10714 }, { "epoch": 0.6386935272380498, "grad_norm": 3.3241779804229736, "learning_rate": 7.778342592637476e-05, "loss": 1.3959, "step": 10716 }, { "epoch": 0.6388127309572058, "grad_norm": 3.0156800746917725, "learning_rate": 7.77755628994141e-05, "loss": 1.2711, "step": 10718 }, { "epoch": 0.6389319346763619, "grad_norm": 3.485638380050659, "learning_rate": 7.776769887882653e-05, "loss": 1.3832, "step": 10720 }, { "epoch": 0.639051138395518, "grad_norm": 2.8963255882263184, "learning_rate": 7.775983386489337e-05, "loss": 1.271, "step": 10722 }, { "epoch": 0.6391703421146739, "grad_norm": 3.211223840713501, "learning_rate": 7.775196785789594e-05, "loss": 1.3766, "step": 10724 }, { "epoch": 0.63928954583383, "grad_norm": 2.778764009475708, "learning_rate": 7.77441008581157e-05, "loss": 1.3329, "step": 10726 }, { "epoch": 0.6394087495529861, "grad_norm": 3.0494284629821777, "learning_rate": 7.773623286583403e-05, "loss": 1.2197, "step": 10728 }, { "epoch": 0.6395279532721421, "grad_norm": 2.9423067569732666, "learning_rate": 7.772836388133243e-05, "loss": 1.4003, "step": 10730 }, { "epoch": 0.6396471569912981, "grad_norm": 3.0666208267211914, "learning_rate": 7.772049390489236e-05, "loss": 1.4103, "step": 10732 }, { "epoch": 0.6397663607104541, "grad_norm": 3.1973965167999268, "learning_rate": 7.771262293679537e-05, "loss": 1.3958, "step": 10734 }, { "epoch": 0.6398855644296102, "grad_norm": 2.891338586807251, "learning_rate": 7.770475097732308e-05, "loss": 1.3046, "step": 10736 }, { "epoch": 0.6400047681487663, "grad_norm": 3.0849108695983887, "learning_rate": 7.769687802675702e-05, "loss": 1.2952, "step": 10738 }, { "epoch": 0.6401239718679222, "grad_norm": 2.954972267150879, "learning_rate": 7.76890040853789e-05, "loss": 1.2659, "step": 10740 }, { "epoch": 0.6402431755870783, "grad_norm": 3.223794460296631, "learning_rate": 7.768112915347034e-05, "loss": 1.3702, "step": 10742 }, { "epoch": 0.6403623793062344, "grad_norm": 3.0747923851013184, "learning_rate": 7.767325323131309e-05, "loss": 1.3311, "step": 10744 }, { "epoch": 0.6404815830253904, "grad_norm": 3.2722249031066895, "learning_rate": 7.766537631918889e-05, "loss": 1.333, "step": 10746 }, { "epoch": 0.6406007867445465, "grad_norm": 3.6786887645721436, "learning_rate": 7.765749841737953e-05, "loss": 1.4332, "step": 10748 }, { "epoch": 0.6407199904637024, "grad_norm": 3.1447863578796387, "learning_rate": 7.76496195261668e-05, "loss": 1.3235, "step": 10750 }, { "epoch": 0.6408391941828585, "grad_norm": 3.157985210418701, "learning_rate": 7.76417396458326e-05, "loss": 1.3296, "step": 10752 }, { "epoch": 0.6409583979020146, "grad_norm": 2.6994543075561523, "learning_rate": 7.76338587766588e-05, "loss": 1.361, "step": 10754 }, { "epoch": 0.6410776016211706, "grad_norm": 2.9417226314544678, "learning_rate": 7.762597691892731e-05, "loss": 1.4059, "step": 10756 }, { "epoch": 0.6411968053403266, "grad_norm": 3.030717134475708, "learning_rate": 7.761809407292011e-05, "loss": 1.2939, "step": 10758 }, { "epoch": 0.6413160090594826, "grad_norm": 3.1590118408203125, "learning_rate": 7.761021023891918e-05, "loss": 1.515, "step": 10760 }, { "epoch": 0.6414352127786387, "grad_norm": 3.0391430854797363, "learning_rate": 7.760232541720656e-05, "loss": 1.2943, "step": 10762 }, { "epoch": 0.6415544164977948, "grad_norm": 3.3164806365966797, "learning_rate": 7.759443960806434e-05, "loss": 1.3595, "step": 10764 }, { "epoch": 0.6416736202169507, "grad_norm": 3.335202693939209, "learning_rate": 7.75865528117746e-05, "loss": 1.309, "step": 10766 }, { "epoch": 0.6417928239361068, "grad_norm": 2.9205074310302734, "learning_rate": 7.757866502861946e-05, "loss": 1.3924, "step": 10768 }, { "epoch": 0.6419120276552629, "grad_norm": 3.0388193130493164, "learning_rate": 7.757077625888114e-05, "loss": 1.3395, "step": 10770 }, { "epoch": 0.6420312313744189, "grad_norm": 3.2080068588256836, "learning_rate": 7.75628865028418e-05, "loss": 1.3853, "step": 10772 }, { "epoch": 0.642150435093575, "grad_norm": 3.229597806930542, "learning_rate": 7.755499576078371e-05, "loss": 1.3561, "step": 10774 }, { "epoch": 0.6422696388127309, "grad_norm": 2.93764328956604, "learning_rate": 7.754710403298914e-05, "loss": 1.344, "step": 10776 }, { "epoch": 0.642388842531887, "grad_norm": 2.989926815032959, "learning_rate": 7.75392113197404e-05, "loss": 1.315, "step": 10778 }, { "epoch": 0.6425080462510431, "grad_norm": 3.357855796813965, "learning_rate": 7.753131762131987e-05, "loss": 1.4818, "step": 10780 }, { "epoch": 0.642627249970199, "grad_norm": 3.8818984031677246, "learning_rate": 7.752342293800989e-05, "loss": 1.4786, "step": 10782 }, { "epoch": 0.6427464536893551, "grad_norm": 3.1692473888397217, "learning_rate": 7.751552727009291e-05, "loss": 1.3755, "step": 10784 }, { "epoch": 0.6428656574085111, "grad_norm": 3.0155434608459473, "learning_rate": 7.750763061785138e-05, "loss": 1.4122, "step": 10786 }, { "epoch": 0.6429848611276672, "grad_norm": 3.7854816913604736, "learning_rate": 7.749973298156779e-05, "loss": 1.3471, "step": 10788 }, { "epoch": 0.6431040648468233, "grad_norm": 2.8838679790496826, "learning_rate": 7.749183436152465e-05, "loss": 1.208, "step": 10790 }, { "epoch": 0.6432232685659792, "grad_norm": 3.244828939437866, "learning_rate": 7.748393475800452e-05, "loss": 1.4875, "step": 10792 }, { "epoch": 0.6433424722851353, "grad_norm": 3.2859323024749756, "learning_rate": 7.747603417129004e-05, "loss": 1.4729, "step": 10794 }, { "epoch": 0.6434616760042914, "grad_norm": 3.51990008354187, "learning_rate": 7.746813260166379e-05, "loss": 1.347, "step": 10796 }, { "epoch": 0.6435808797234474, "grad_norm": 2.9625024795532227, "learning_rate": 7.746023004940847e-05, "loss": 1.3465, "step": 10798 }, { "epoch": 0.6437000834426034, "grad_norm": 2.9446351528167725, "learning_rate": 7.745232651480675e-05, "loss": 1.3779, "step": 10800 }, { "epoch": 0.6438192871617594, "grad_norm": 3.3540124893188477, "learning_rate": 7.744442199814139e-05, "loss": 1.4678, "step": 10802 }, { "epoch": 0.6439384908809155, "grad_norm": 2.8351235389709473, "learning_rate": 7.743651649969515e-05, "loss": 1.374, "step": 10804 }, { "epoch": 0.6440576946000716, "grad_norm": 3.0768706798553467, "learning_rate": 7.742861001975086e-05, "loss": 1.4342, "step": 10806 }, { "epoch": 0.6441768983192275, "grad_norm": 3.097745180130005, "learning_rate": 7.742070255859131e-05, "loss": 1.4808, "step": 10808 }, { "epoch": 0.6442961020383836, "grad_norm": 3.2979042530059814, "learning_rate": 7.741279411649944e-05, "loss": 1.4292, "step": 10810 }, { "epoch": 0.6444153057575396, "grad_norm": 2.9690916538238525, "learning_rate": 7.740488469375813e-05, "loss": 1.3809, "step": 10812 }, { "epoch": 0.6445345094766957, "grad_norm": 3.6254189014434814, "learning_rate": 7.739697429065033e-05, "loss": 1.4794, "step": 10814 }, { "epoch": 0.6446537131958517, "grad_norm": 3.1211326122283936, "learning_rate": 7.738906290745901e-05, "loss": 1.4618, "step": 10816 }, { "epoch": 0.6447729169150077, "grad_norm": 3.260906457901001, "learning_rate": 7.73811505444672e-05, "loss": 1.3677, "step": 10818 }, { "epoch": 0.6448921206341638, "grad_norm": 2.897705078125, "learning_rate": 7.737323720195795e-05, "loss": 1.3861, "step": 10820 }, { "epoch": 0.6450113243533199, "grad_norm": 2.7660815715789795, "learning_rate": 7.736532288021436e-05, "loss": 1.3155, "step": 10822 }, { "epoch": 0.6451305280724758, "grad_norm": 3.324896812438965, "learning_rate": 7.735740757951953e-05, "loss": 1.3879, "step": 10824 }, { "epoch": 0.6452497317916319, "grad_norm": 2.8308088779449463, "learning_rate": 7.734949130015665e-05, "loss": 1.29, "step": 10826 }, { "epoch": 0.6453689355107879, "grad_norm": 2.728743553161621, "learning_rate": 7.734157404240887e-05, "loss": 1.2423, "step": 10828 }, { "epoch": 0.645488139229944, "grad_norm": 3.050098180770874, "learning_rate": 7.733365580655945e-05, "loss": 1.4092, "step": 10830 }, { "epoch": 0.6456073429491, "grad_norm": 3.0320043563842773, "learning_rate": 7.732573659289163e-05, "loss": 1.3977, "step": 10832 }, { "epoch": 0.645726546668256, "grad_norm": 2.887434482574463, "learning_rate": 7.731781640168872e-05, "loss": 1.2954, "step": 10834 }, { "epoch": 0.6458457503874121, "grad_norm": 3.095870018005371, "learning_rate": 7.730989523323405e-05, "loss": 1.5245, "step": 10836 }, { "epoch": 0.6459649541065682, "grad_norm": 2.9411864280700684, "learning_rate": 7.7301973087811e-05, "loss": 1.3748, "step": 10838 }, { "epoch": 0.6460841578257241, "grad_norm": 2.9690725803375244, "learning_rate": 7.729404996570296e-05, "loss": 1.343, "step": 10840 }, { "epoch": 0.6462033615448802, "grad_norm": 3.0745909214019775, "learning_rate": 7.728612586719335e-05, "loss": 1.3909, "step": 10842 }, { "epoch": 0.6463225652640362, "grad_norm": 2.877251148223877, "learning_rate": 7.727820079256565e-05, "loss": 1.3152, "step": 10844 }, { "epoch": 0.6464417689831923, "grad_norm": 3.184401750564575, "learning_rate": 7.72702747421034e-05, "loss": 1.3723, "step": 10846 }, { "epoch": 0.6465609727023484, "grad_norm": 3.202462673187256, "learning_rate": 7.726234771609011e-05, "loss": 1.4995, "step": 10848 }, { "epoch": 0.6466801764215043, "grad_norm": 3.2408154010772705, "learning_rate": 7.725441971480938e-05, "loss": 1.2752, "step": 10850 }, { "epoch": 0.6467993801406604, "grad_norm": 2.931123733520508, "learning_rate": 7.724649073854477e-05, "loss": 1.408, "step": 10852 }, { "epoch": 0.6469185838598164, "grad_norm": 2.9713165760040283, "learning_rate": 7.723856078758001e-05, "loss": 1.5181, "step": 10854 }, { "epoch": 0.6470377875789725, "grad_norm": 2.9185681343078613, "learning_rate": 7.723062986219871e-05, "loss": 1.3731, "step": 10856 }, { "epoch": 0.6471569912981285, "grad_norm": 3.1757826805114746, "learning_rate": 7.722269796268461e-05, "loss": 1.3911, "step": 10858 }, { "epoch": 0.6472761950172845, "grad_norm": 2.7725276947021484, "learning_rate": 7.721476508932146e-05, "loss": 1.2685, "step": 10860 }, { "epoch": 0.6473953987364406, "grad_norm": 3.689366102218628, "learning_rate": 7.720683124239305e-05, "loss": 1.365, "step": 10862 }, { "epoch": 0.6475146024555967, "grad_norm": 3.1999921798706055, "learning_rate": 7.71988964221832e-05, "loss": 1.5724, "step": 10864 }, { "epoch": 0.6476338061747526, "grad_norm": 3.15842604637146, "learning_rate": 7.719096062897578e-05, "loss": 1.3764, "step": 10866 }, { "epoch": 0.6477530098939087, "grad_norm": 3.1586618423461914, "learning_rate": 7.718302386305465e-05, "loss": 1.4008, "step": 10868 }, { "epoch": 0.6478722136130647, "grad_norm": 2.859060525894165, "learning_rate": 7.717508612470376e-05, "loss": 1.3934, "step": 10870 }, { "epoch": 0.6479914173322208, "grad_norm": 2.9460504055023193, "learning_rate": 7.716714741420705e-05, "loss": 1.3472, "step": 10872 }, { "epoch": 0.6481106210513768, "grad_norm": 2.748647928237915, "learning_rate": 7.715920773184853e-05, "loss": 1.3029, "step": 10874 }, { "epoch": 0.6482298247705328, "grad_norm": 3.061927080154419, "learning_rate": 7.715126707791223e-05, "loss": 1.235, "step": 10876 }, { "epoch": 0.6483490284896889, "grad_norm": 2.975923776626587, "learning_rate": 7.714332545268221e-05, "loss": 1.385, "step": 10878 }, { "epoch": 0.6484682322088449, "grad_norm": 3.173326015472412, "learning_rate": 7.713538285644255e-05, "loss": 1.3854, "step": 10880 }, { "epoch": 0.6485874359280009, "grad_norm": 2.854536771774292, "learning_rate": 7.712743928947742e-05, "loss": 1.3291, "step": 10882 }, { "epoch": 0.648706639647157, "grad_norm": 3.4117023944854736, "learning_rate": 7.711949475207097e-05, "loss": 1.4714, "step": 10884 }, { "epoch": 0.648825843366313, "grad_norm": 3.1811132431030273, "learning_rate": 7.71115492445074e-05, "loss": 1.4526, "step": 10886 }, { "epoch": 0.6489450470854691, "grad_norm": 2.8521578311920166, "learning_rate": 7.710360276707095e-05, "loss": 1.2641, "step": 10888 }, { "epoch": 0.6490642508046252, "grad_norm": 3.187901020050049, "learning_rate": 7.70956553200459e-05, "loss": 1.5102, "step": 10890 }, { "epoch": 0.6491834545237811, "grad_norm": 2.9097323417663574, "learning_rate": 7.708770690371656e-05, "loss": 1.3673, "step": 10892 }, { "epoch": 0.6493026582429372, "grad_norm": 3.2198240756988525, "learning_rate": 7.707975751836725e-05, "loss": 1.5295, "step": 10894 }, { "epoch": 0.6494218619620932, "grad_norm": 3.3908798694610596, "learning_rate": 7.707180716428237e-05, "loss": 1.4131, "step": 10896 }, { "epoch": 0.6495410656812493, "grad_norm": 3.155315637588501, "learning_rate": 7.706385584174631e-05, "loss": 1.3778, "step": 10898 }, { "epoch": 0.6496602694004053, "grad_norm": 2.71907901763916, "learning_rate": 7.705590355104356e-05, "loss": 1.277, "step": 10900 }, { "epoch": 0.6497794731195613, "grad_norm": 3.070570468902588, "learning_rate": 7.704795029245854e-05, "loss": 1.284, "step": 10902 }, { "epoch": 0.6498986768387174, "grad_norm": 3.097437858581543, "learning_rate": 7.70399960662758e-05, "loss": 1.3901, "step": 10904 }, { "epoch": 0.6500178805578734, "grad_norm": 3.0699164867401123, "learning_rate": 7.703204087277988e-05, "loss": 1.4482, "step": 10906 }, { "epoch": 0.6501370842770294, "grad_norm": 2.8945958614349365, "learning_rate": 7.702408471225537e-05, "loss": 1.3131, "step": 10908 }, { "epoch": 0.6502562879961855, "grad_norm": 3.324920177459717, "learning_rate": 7.701612758498691e-05, "loss": 1.6171, "step": 10910 }, { "epoch": 0.6503754917153415, "grad_norm": 3.0445971488952637, "learning_rate": 7.700816949125911e-05, "loss": 1.2964, "step": 10912 }, { "epoch": 0.6504946954344976, "grad_norm": 3.2853357791900635, "learning_rate": 7.700021043135668e-05, "loss": 1.5271, "step": 10914 }, { "epoch": 0.6506138991536536, "grad_norm": 3.173224687576294, "learning_rate": 7.699225040556435e-05, "loss": 1.3208, "step": 10916 }, { "epoch": 0.6507331028728096, "grad_norm": 3.0075600147247314, "learning_rate": 7.698428941416686e-05, "loss": 1.3666, "step": 10918 }, { "epoch": 0.6508523065919657, "grad_norm": 3.100665330886841, "learning_rate": 7.697632745744903e-05, "loss": 1.3136, "step": 10920 }, { "epoch": 0.6509715103111217, "grad_norm": 3.0874176025390625, "learning_rate": 7.696836453569564e-05, "loss": 1.4274, "step": 10922 }, { "epoch": 0.6510907140302777, "grad_norm": 3.1886990070343018, "learning_rate": 7.69604006491916e-05, "loss": 1.4963, "step": 10924 }, { "epoch": 0.6512099177494338, "grad_norm": 8.41060733795166, "learning_rate": 7.695243579822179e-05, "loss": 1.4265, "step": 10926 }, { "epoch": 0.6513291214685898, "grad_norm": 3.028960704803467, "learning_rate": 7.694446998307112e-05, "loss": 1.3871, "step": 10928 }, { "epoch": 0.6514483251877459, "grad_norm": 2.8458034992218018, "learning_rate": 7.693650320402459e-05, "loss": 1.2672, "step": 10930 }, { "epoch": 0.651567528906902, "grad_norm": 2.759514570236206, "learning_rate": 7.692853546136716e-05, "loss": 1.3484, "step": 10932 }, { "epoch": 0.6516867326260579, "grad_norm": 2.826188802719116, "learning_rate": 7.692056675538388e-05, "loss": 1.3052, "step": 10934 }, { "epoch": 0.651805936345214, "grad_norm": 3.1221606731414795, "learning_rate": 7.691259708635983e-05, "loss": 1.3392, "step": 10936 }, { "epoch": 0.65192514006437, "grad_norm": 3.389139413833618, "learning_rate": 7.69046264545801e-05, "loss": 1.3228, "step": 10938 }, { "epoch": 0.652044343783526, "grad_norm": 3.275712013244629, "learning_rate": 7.689665486032983e-05, "loss": 1.4037, "step": 10940 }, { "epoch": 0.6521635475026821, "grad_norm": 2.8453526496887207, "learning_rate": 7.688868230389417e-05, "loss": 1.3994, "step": 10942 }, { "epoch": 0.6522827512218381, "grad_norm": 2.9061124324798584, "learning_rate": 7.688070878555836e-05, "loss": 1.3198, "step": 10944 }, { "epoch": 0.6524019549409942, "grad_norm": 2.9354517459869385, "learning_rate": 7.687273430560763e-05, "loss": 1.2886, "step": 10946 }, { "epoch": 0.6525211586601501, "grad_norm": 2.8316714763641357, "learning_rate": 7.686475886432724e-05, "loss": 1.2514, "step": 10948 }, { "epoch": 0.6526403623793062, "grad_norm": 3.026207685470581, "learning_rate": 7.685678246200252e-05, "loss": 1.5998, "step": 10950 }, { "epoch": 0.6527595660984623, "grad_norm": 3.174060583114624, "learning_rate": 7.684880509891879e-05, "loss": 1.4963, "step": 10952 }, { "epoch": 0.6528787698176183, "grad_norm": 2.8705010414123535, "learning_rate": 7.684082677536145e-05, "loss": 1.2812, "step": 10954 }, { "epoch": 0.6529979735367744, "grad_norm": 3.0914113521575928, "learning_rate": 7.68328474916159e-05, "loss": 1.4345, "step": 10956 }, { "epoch": 0.6531171772559304, "grad_norm": 2.8183319568634033, "learning_rate": 7.682486724796758e-05, "loss": 1.3124, "step": 10958 }, { "epoch": 0.6532363809750864, "grad_norm": 3.10443115234375, "learning_rate": 7.681688604470198e-05, "loss": 1.2565, "step": 10960 }, { "epoch": 0.6533555846942425, "grad_norm": 3.0849673748016357, "learning_rate": 7.680890388210462e-05, "loss": 1.5193, "step": 10962 }, { "epoch": 0.6534747884133985, "grad_norm": 3.128784418106079, "learning_rate": 7.680092076046104e-05, "loss": 1.6423, "step": 10964 }, { "epoch": 0.6535939921325545, "grad_norm": 3.2281076908111572, "learning_rate": 7.679293668005682e-05, "loss": 1.4771, "step": 10966 }, { "epoch": 0.6537131958517106, "grad_norm": 2.817962646484375, "learning_rate": 7.678495164117759e-05, "loss": 1.3937, "step": 10968 }, { "epoch": 0.6538323995708666, "grad_norm": 2.8246970176696777, "learning_rate": 7.677696564410898e-05, "loss": 1.4424, "step": 10970 }, { "epoch": 0.6539516032900227, "grad_norm": 2.857818126678467, "learning_rate": 7.676897868913671e-05, "loss": 1.3765, "step": 10972 }, { "epoch": 0.6540708070091786, "grad_norm": 3.486639976501465, "learning_rate": 7.676099077654648e-05, "loss": 1.1242, "step": 10974 }, { "epoch": 0.6541900107283347, "grad_norm": 2.8107097148895264, "learning_rate": 7.675300190662406e-05, "loss": 1.3359, "step": 10976 }, { "epoch": 0.6543092144474908, "grad_norm": 3.0235443115234375, "learning_rate": 7.674501207965521e-05, "loss": 1.6471, "step": 10978 }, { "epoch": 0.6544284181666468, "grad_norm": 3.103106737136841, "learning_rate": 7.673702129592578e-05, "loss": 1.3029, "step": 10980 }, { "epoch": 0.6545476218858028, "grad_norm": 2.770845413208008, "learning_rate": 7.672902955572162e-05, "loss": 1.2626, "step": 10982 }, { "epoch": 0.6546668256049589, "grad_norm": 3.1773922443389893, "learning_rate": 7.67210368593286e-05, "loss": 1.4371, "step": 10984 }, { "epoch": 0.6547860293241149, "grad_norm": 2.939314365386963, "learning_rate": 7.671304320703269e-05, "loss": 1.3635, "step": 10986 }, { "epoch": 0.654905233043271, "grad_norm": 3.1016621589660645, "learning_rate": 7.670504859911982e-05, "loss": 1.4056, "step": 10988 }, { "epoch": 0.6550244367624269, "grad_norm": 2.986300230026245, "learning_rate": 7.669705303587597e-05, "loss": 1.3021, "step": 10990 }, { "epoch": 0.655143640481583, "grad_norm": 2.5584728717803955, "learning_rate": 7.668905651758724e-05, "loss": 1.1847, "step": 10992 }, { "epoch": 0.6552628442007391, "grad_norm": 2.821882963180542, "learning_rate": 7.668105904453961e-05, "loss": 1.293, "step": 10994 }, { "epoch": 0.6553820479198951, "grad_norm": 3.0939061641693115, "learning_rate": 7.66730606170192e-05, "loss": 1.3089, "step": 10996 }, { "epoch": 0.6555012516390512, "grad_norm": 2.838285446166992, "learning_rate": 7.666506123531216e-05, "loss": 1.3823, "step": 10998 }, { "epoch": 0.6556204553582071, "grad_norm": 3.448155641555786, "learning_rate": 7.665706089970465e-05, "loss": 1.5333, "step": 11000 }, { "epoch": 0.6557396590773632, "grad_norm": 3.0139245986938477, "learning_rate": 7.664905961048288e-05, "loss": 1.391, "step": 11002 }, { "epoch": 0.6558588627965193, "grad_norm": 2.97826886177063, "learning_rate": 7.664105736793304e-05, "loss": 1.3489, "step": 11004 }, { "epoch": 0.6559780665156753, "grad_norm": 2.959252119064331, "learning_rate": 7.663305417234145e-05, "loss": 1.2825, "step": 11006 }, { "epoch": 0.6560972702348313, "grad_norm": 3.330169200897217, "learning_rate": 7.662505002399437e-05, "loss": 1.4741, "step": 11008 }, { "epoch": 0.6562164739539874, "grad_norm": 2.9102842807769775, "learning_rate": 7.661704492317816e-05, "loss": 1.2763, "step": 11010 }, { "epoch": 0.6563356776731434, "grad_norm": 3.0319790840148926, "learning_rate": 7.660903887017918e-05, "loss": 1.3154, "step": 11012 }, { "epoch": 0.6564548813922995, "grad_norm": 3.2315874099731445, "learning_rate": 7.660103186528386e-05, "loss": 1.4025, "step": 11014 }, { "epoch": 0.6565740851114554, "grad_norm": 3.32920503616333, "learning_rate": 7.659302390877858e-05, "loss": 1.5125, "step": 11016 }, { "epoch": 0.6566932888306115, "grad_norm": 3.5187158584594727, "learning_rate": 7.658501500094988e-05, "loss": 1.5909, "step": 11018 }, { "epoch": 0.6568124925497676, "grad_norm": 3.3146214485168457, "learning_rate": 7.657700514208421e-05, "loss": 1.3648, "step": 11020 }, { "epoch": 0.6569316962689236, "grad_norm": 3.081254720687866, "learning_rate": 7.656899433246816e-05, "loss": 1.4813, "step": 11022 }, { "epoch": 0.6570508999880796, "grad_norm": 3.0173420906066895, "learning_rate": 7.656098257238827e-05, "loss": 1.3699, "step": 11024 }, { "epoch": 0.6571701037072357, "grad_norm": 3.1240110397338867, "learning_rate": 7.655296986213114e-05, "loss": 1.4639, "step": 11026 }, { "epoch": 0.6572893074263917, "grad_norm": 3.4741709232330322, "learning_rate": 7.654495620198343e-05, "loss": 1.366, "step": 11028 }, { "epoch": 0.6574085111455478, "grad_norm": 3.2796640396118164, "learning_rate": 7.653694159223181e-05, "loss": 1.3724, "step": 11030 }, { "epoch": 0.6575277148647037, "grad_norm": 2.7472035884857178, "learning_rate": 7.652892603316302e-05, "loss": 1.3834, "step": 11032 }, { "epoch": 0.6576469185838598, "grad_norm": 2.7414019107818604, "learning_rate": 7.652090952506376e-05, "loss": 1.446, "step": 11034 }, { "epoch": 0.6577661223030159, "grad_norm": 2.9279701709747314, "learning_rate": 7.651289206822084e-05, "loss": 1.3569, "step": 11036 }, { "epoch": 0.6578853260221719, "grad_norm": 3.1879029273986816, "learning_rate": 7.650487366292105e-05, "loss": 1.2403, "step": 11038 }, { "epoch": 0.658004529741328, "grad_norm": 2.978391170501709, "learning_rate": 7.649685430945123e-05, "loss": 1.3128, "step": 11040 }, { "epoch": 0.6581237334604839, "grad_norm": 3.44809627532959, "learning_rate": 7.648883400809828e-05, "loss": 1.3812, "step": 11042 }, { "epoch": 0.65824293717964, "grad_norm": 3.3217954635620117, "learning_rate": 7.648081275914911e-05, "loss": 1.4331, "step": 11044 }, { "epoch": 0.6583621408987961, "grad_norm": 3.039748430252075, "learning_rate": 7.647279056289067e-05, "loss": 1.4052, "step": 11046 }, { "epoch": 0.658481344617952, "grad_norm": 3.1451778411865234, "learning_rate": 7.646476741960993e-05, "loss": 1.4288, "step": 11048 }, { "epoch": 0.6586005483371081, "grad_norm": 4.322169303894043, "learning_rate": 7.645674332959391e-05, "loss": 1.3916, "step": 11050 }, { "epoch": 0.6587197520562642, "grad_norm": 2.7941815853118896, "learning_rate": 7.644871829312967e-05, "loss": 1.3193, "step": 11052 }, { "epoch": 0.6588389557754202, "grad_norm": 3.192779302597046, "learning_rate": 7.644069231050427e-05, "loss": 1.3337, "step": 11054 }, { "epoch": 0.6589581594945763, "grad_norm": 3.397752523422241, "learning_rate": 7.643266538200484e-05, "loss": 1.6202, "step": 11056 }, { "epoch": 0.6590773632137322, "grad_norm": 3.2287540435791016, "learning_rate": 7.642463750791855e-05, "loss": 1.3335, "step": 11058 }, { "epoch": 0.6591965669328883, "grad_norm": 2.701732873916626, "learning_rate": 7.641660868853253e-05, "loss": 1.4712, "step": 11060 }, { "epoch": 0.6593157706520444, "grad_norm": 3.2814745903015137, "learning_rate": 7.640857892413407e-05, "loss": 1.4888, "step": 11062 }, { "epoch": 0.6594349743712004, "grad_norm": 3.1929218769073486, "learning_rate": 7.640054821501038e-05, "loss": 1.3117, "step": 11064 }, { "epoch": 0.6595541780903564, "grad_norm": 3.114880084991455, "learning_rate": 7.639251656144874e-05, "loss": 1.456, "step": 11066 }, { "epoch": 0.6596733818095124, "grad_norm": 2.9443984031677246, "learning_rate": 7.638448396373648e-05, "loss": 1.2163, "step": 11068 }, { "epoch": 0.6597925855286685, "grad_norm": 2.9122705459594727, "learning_rate": 7.637645042216097e-05, "loss": 1.3188, "step": 11070 }, { "epoch": 0.6599117892478246, "grad_norm": 2.836732864379883, "learning_rate": 7.636841593700956e-05, "loss": 1.2865, "step": 11072 }, { "epoch": 0.6600309929669805, "grad_norm": 3.0154542922973633, "learning_rate": 7.63603805085697e-05, "loss": 1.3025, "step": 11074 }, { "epoch": 0.6601501966861366, "grad_norm": 3.093141555786133, "learning_rate": 7.635234413712886e-05, "loss": 1.3538, "step": 11076 }, { "epoch": 0.6602694004052927, "grad_norm": 2.968731164932251, "learning_rate": 7.634430682297448e-05, "loss": 1.3043, "step": 11078 }, { "epoch": 0.6603886041244487, "grad_norm": 3.1635019779205322, "learning_rate": 7.633626856639412e-05, "loss": 1.3089, "step": 11080 }, { "epoch": 0.6605078078436047, "grad_norm": 2.818878650665283, "learning_rate": 7.632822936767534e-05, "loss": 1.3593, "step": 11082 }, { "epoch": 0.6606270115627607, "grad_norm": 3.3760087490081787, "learning_rate": 7.632018922710569e-05, "loss": 1.4251, "step": 11084 }, { "epoch": 0.6607462152819168, "grad_norm": 3.1267478466033936, "learning_rate": 7.631214814497282e-05, "loss": 1.4628, "step": 11086 }, { "epoch": 0.6608654190010729, "grad_norm": 2.8903260231018066, "learning_rate": 7.63041061215644e-05, "loss": 1.3261, "step": 11088 }, { "epoch": 0.6609846227202288, "grad_norm": 3.014704942703247, "learning_rate": 7.62960631571681e-05, "loss": 1.3836, "step": 11090 }, { "epoch": 0.6611038264393849, "grad_norm": 3.0158612728118896, "learning_rate": 7.628801925207163e-05, "loss": 1.3424, "step": 11092 }, { "epoch": 0.6612230301585409, "grad_norm": 2.9126579761505127, "learning_rate": 7.62799744065628e-05, "loss": 1.4331, "step": 11094 }, { "epoch": 0.661342233877697, "grad_norm": 3.5265233516693115, "learning_rate": 7.627192862092935e-05, "loss": 1.3718, "step": 11096 }, { "epoch": 0.661461437596853, "grad_norm": 2.8882460594177246, "learning_rate": 7.626388189545914e-05, "loss": 1.3421, "step": 11098 }, { "epoch": 0.661580641316009, "grad_norm": 3.0413625240325928, "learning_rate": 7.625583423044e-05, "loss": 1.2923, "step": 11100 }, { "epoch": 0.6616998450351651, "grad_norm": 3.0028676986694336, "learning_rate": 7.624778562615983e-05, "loss": 1.2334, "step": 11102 }, { "epoch": 0.6618190487543212, "grad_norm": 3.3759167194366455, "learning_rate": 7.623973608290658e-05, "loss": 1.5405, "step": 11104 }, { "epoch": 0.6619382524734772, "grad_norm": 2.7198991775512695, "learning_rate": 7.623168560096818e-05, "loss": 1.2899, "step": 11106 }, { "epoch": 0.6620574561926332, "grad_norm": 3.810892343521118, "learning_rate": 7.622363418063263e-05, "loss": 1.3891, "step": 11108 }, { "epoch": 0.6621766599117892, "grad_norm": 2.889998197555542, "learning_rate": 7.621558182218798e-05, "loss": 1.2929, "step": 11110 }, { "epoch": 0.6622958636309453, "grad_norm": 3.228182077407837, "learning_rate": 7.620752852592227e-05, "loss": 1.3862, "step": 11112 }, { "epoch": 0.6624150673501014, "grad_norm": 3.449655055999756, "learning_rate": 7.619947429212358e-05, "loss": 1.5586, "step": 11114 }, { "epoch": 0.6625342710692573, "grad_norm": 3.5452282428741455, "learning_rate": 7.619141912108008e-05, "loss": 1.2981, "step": 11116 }, { "epoch": 0.6626534747884134, "grad_norm": 2.918973207473755, "learning_rate": 7.618336301307987e-05, "loss": 1.3127, "step": 11118 }, { "epoch": 0.6627726785075695, "grad_norm": 2.930914878845215, "learning_rate": 7.61753059684112e-05, "loss": 1.3444, "step": 11120 }, { "epoch": 0.6628918822267255, "grad_norm": 2.841024160385132, "learning_rate": 7.616724798736227e-05, "loss": 1.3476, "step": 11122 }, { "epoch": 0.6630110859458815, "grad_norm": 2.923488140106201, "learning_rate": 7.615918907022135e-05, "loss": 1.3966, "step": 11124 }, { "epoch": 0.6631302896650375, "grad_norm": 2.9956021308898926, "learning_rate": 7.615112921727677e-05, "loss": 1.5584, "step": 11126 }, { "epoch": 0.6632494933841936, "grad_norm": 2.9641265869140625, "learning_rate": 7.614306842881678e-05, "loss": 1.2065, "step": 11128 }, { "epoch": 0.6633686971033497, "grad_norm": 2.749598741531372, "learning_rate": 7.61350067051298e-05, "loss": 1.2396, "step": 11130 }, { "epoch": 0.6634879008225056, "grad_norm": 3.345089912414551, "learning_rate": 7.61269440465042e-05, "loss": 1.2959, "step": 11132 }, { "epoch": 0.6636071045416617, "grad_norm": 3.3748834133148193, "learning_rate": 7.611888045322844e-05, "loss": 1.4561, "step": 11134 }, { "epoch": 0.6637263082608177, "grad_norm": 3.0728869438171387, "learning_rate": 7.611081592559095e-05, "loss": 1.3702, "step": 11136 }, { "epoch": 0.6638455119799738, "grad_norm": 2.997236490249634, "learning_rate": 7.610275046388022e-05, "loss": 1.4297, "step": 11138 }, { "epoch": 0.6639647156991298, "grad_norm": 3.154984712600708, "learning_rate": 7.609468406838482e-05, "loss": 1.2857, "step": 11140 }, { "epoch": 0.6640839194182858, "grad_norm": 3.157273769378662, "learning_rate": 7.608661673939328e-05, "loss": 1.6522, "step": 11142 }, { "epoch": 0.6642031231374419, "grad_norm": 3.1894373893737793, "learning_rate": 7.60785484771942e-05, "loss": 1.3749, "step": 11144 }, { "epoch": 0.664322326856598, "grad_norm": 3.2312119007110596, "learning_rate": 7.607047928207622e-05, "loss": 1.5037, "step": 11146 }, { "epoch": 0.664441530575754, "grad_norm": 3.2735073566436768, "learning_rate": 7.6062409154328e-05, "loss": 1.4602, "step": 11148 }, { "epoch": 0.66456073429491, "grad_norm": 3.255892753601074, "learning_rate": 7.605433809423823e-05, "loss": 1.312, "step": 11150 }, { "epoch": 0.664679938014066, "grad_norm": 2.940722942352295, "learning_rate": 7.604626610209564e-05, "loss": 1.3616, "step": 11152 }, { "epoch": 0.6647991417332221, "grad_norm": 3.2408447265625, "learning_rate": 7.603819317818899e-05, "loss": 1.3237, "step": 11154 }, { "epoch": 0.6649183454523782, "grad_norm": 3.1295602321624756, "learning_rate": 7.60301193228071e-05, "loss": 1.4544, "step": 11156 }, { "epoch": 0.6650375491715341, "grad_norm": 3.3546223640441895, "learning_rate": 7.602204453623876e-05, "loss": 1.4442, "step": 11158 }, { "epoch": 0.6651567528906902, "grad_norm": 3.2034530639648438, "learning_rate": 7.601396881877288e-05, "loss": 1.3724, "step": 11160 }, { "epoch": 0.6652759566098462, "grad_norm": 3.1136913299560547, "learning_rate": 7.60058921706983e-05, "loss": 1.4177, "step": 11162 }, { "epoch": 0.6653951603290023, "grad_norm": 2.952732563018799, "learning_rate": 7.599781459230398e-05, "loss": 1.4644, "step": 11164 }, { "epoch": 0.6655143640481583, "grad_norm": 3.410658836364746, "learning_rate": 7.598973608387889e-05, "loss": 1.44, "step": 11166 }, { "epoch": 0.6656335677673143, "grad_norm": 2.8884737491607666, "learning_rate": 7.598165664571202e-05, "loss": 1.338, "step": 11168 }, { "epoch": 0.6657527714864704, "grad_norm": 2.9197909832000732, "learning_rate": 7.597357627809241e-05, "loss": 1.2803, "step": 11170 }, { "epoch": 0.6658719752056265, "grad_norm": 3.1297078132629395, "learning_rate": 7.59654949813091e-05, "loss": 1.3352, "step": 11172 }, { "epoch": 0.6659911789247824, "grad_norm": 3.145873546600342, "learning_rate": 7.595741275565118e-05, "loss": 1.4291, "step": 11174 }, { "epoch": 0.6661103826439385, "grad_norm": 3.0256407260894775, "learning_rate": 7.59493296014078e-05, "loss": 1.4117, "step": 11176 }, { "epoch": 0.6662295863630945, "grad_norm": 3.155207633972168, "learning_rate": 7.594124551886812e-05, "loss": 1.2338, "step": 11178 }, { "epoch": 0.6663487900822506, "grad_norm": 3.0702836513519287, "learning_rate": 7.593316050832133e-05, "loss": 1.4016, "step": 11180 }, { "epoch": 0.6664679938014066, "grad_norm": 3.078157901763916, "learning_rate": 7.592507457005665e-05, "loss": 1.2869, "step": 11182 }, { "epoch": 0.6665871975205626, "grad_norm": 3.241210460662842, "learning_rate": 7.591698770436338e-05, "loss": 1.5056, "step": 11184 }, { "epoch": 0.6667064012397187, "grad_norm": 3.14509916305542, "learning_rate": 7.590889991153076e-05, "loss": 1.4157, "step": 11186 }, { "epoch": 0.6668256049588747, "grad_norm": 3.0778961181640625, "learning_rate": 7.590081119184814e-05, "loss": 1.4123, "step": 11188 }, { "epoch": 0.6669448086780307, "grad_norm": 3.1660423278808594, "learning_rate": 7.589272154560489e-05, "loss": 1.3292, "step": 11190 }, { "epoch": 0.6670640123971868, "grad_norm": 3.218221426010132, "learning_rate": 7.588463097309042e-05, "loss": 1.4243, "step": 11192 }, { "epoch": 0.6671832161163428, "grad_norm": 3.0021307468414307, "learning_rate": 7.58765394745941e-05, "loss": 1.2421, "step": 11194 }, { "epoch": 0.6673024198354989, "grad_norm": 3.1202805042266846, "learning_rate": 7.586844705040546e-05, "loss": 1.341, "step": 11196 }, { "epoch": 0.667421623554655, "grad_norm": 3.2144856452941895, "learning_rate": 7.586035370081394e-05, "loss": 1.5726, "step": 11198 }, { "epoch": 0.6675408272738109, "grad_norm": 3.4244132041931152, "learning_rate": 7.58522594261091e-05, "loss": 1.4302, "step": 11200 }, { "epoch": 0.667660030992967, "grad_norm": 2.9644484519958496, "learning_rate": 7.58441642265805e-05, "loss": 1.3892, "step": 11202 }, { "epoch": 0.667779234712123, "grad_norm": 3.24259614944458, "learning_rate": 7.58360681025177e-05, "loss": 1.3502, "step": 11204 }, { "epoch": 0.667898438431279, "grad_norm": 3.128040313720703, "learning_rate": 7.582797105421034e-05, "loss": 1.2982, "step": 11206 }, { "epoch": 0.6680176421504351, "grad_norm": 3.0123648643493652, "learning_rate": 7.58198730819481e-05, "loss": 1.3682, "step": 11208 }, { "epoch": 0.6681368458695911, "grad_norm": 2.9304044246673584, "learning_rate": 7.581177418602065e-05, "loss": 1.3344, "step": 11210 }, { "epoch": 0.6682560495887472, "grad_norm": 3.1394593715667725, "learning_rate": 7.580367436671773e-05, "loss": 1.3093, "step": 11212 }, { "epoch": 0.6683752533079033, "grad_norm": 2.9933860301971436, "learning_rate": 7.57955736243291e-05, "loss": 1.2585, "step": 11214 }, { "epoch": 0.6684944570270592, "grad_norm": 3.1054341793060303, "learning_rate": 7.578747195914453e-05, "loss": 1.2597, "step": 11216 }, { "epoch": 0.6686136607462153, "grad_norm": 3.1520369052886963, "learning_rate": 7.577936937145387e-05, "loss": 1.4351, "step": 11218 }, { "epoch": 0.6687328644653713, "grad_norm": 3.0715928077697754, "learning_rate": 7.577126586154695e-05, "loss": 1.4469, "step": 11220 }, { "epoch": 0.6688520681845274, "grad_norm": 2.7172892093658447, "learning_rate": 7.576316142971367e-05, "loss": 1.2358, "step": 11222 }, { "epoch": 0.6689712719036834, "grad_norm": 3.1962497234344482, "learning_rate": 7.575505607624399e-05, "loss": 1.3671, "step": 11224 }, { "epoch": 0.6690904756228394, "grad_norm": 3.0940256118774414, "learning_rate": 7.574694980142779e-05, "loss": 1.2275, "step": 11226 }, { "epoch": 0.6692096793419955, "grad_norm": 3.087095022201538, "learning_rate": 7.573884260555513e-05, "loss": 1.2827, "step": 11228 }, { "epoch": 0.6693288830611515, "grad_norm": 3.2119767665863037, "learning_rate": 7.5730734488916e-05, "loss": 1.4083, "step": 11230 }, { "epoch": 0.6694480867803075, "grad_norm": 3.217632532119751, "learning_rate": 7.572262545180046e-05, "loss": 1.3813, "step": 11232 }, { "epoch": 0.6695672904994636, "grad_norm": 3.072539806365967, "learning_rate": 7.571451549449861e-05, "loss": 1.3698, "step": 11234 }, { "epoch": 0.6696864942186196, "grad_norm": 3.1514832973480225, "learning_rate": 7.570640461730054e-05, "loss": 1.3782, "step": 11236 }, { "epoch": 0.6698056979377757, "grad_norm": 14.400566101074219, "learning_rate": 7.569829282049644e-05, "loss": 1.3258, "step": 11238 }, { "epoch": 0.6699249016569317, "grad_norm": 3.1604442596435547, "learning_rate": 7.569018010437647e-05, "loss": 1.4328, "step": 11240 }, { "epoch": 0.6700441053760877, "grad_norm": 3.064544916152954, "learning_rate": 7.568206646923086e-05, "loss": 1.6671, "step": 11242 }, { "epoch": 0.6701633090952438, "grad_norm": 3.1075103282928467, "learning_rate": 7.567395191534987e-05, "loss": 1.4912, "step": 11244 }, { "epoch": 0.6702825128143998, "grad_norm": 3.069570779800415, "learning_rate": 7.566583644302376e-05, "loss": 1.3652, "step": 11246 }, { "epoch": 0.6704017165335558, "grad_norm": 3.430037260055542, "learning_rate": 7.56577200525429e-05, "loss": 1.4704, "step": 11248 }, { "epoch": 0.6705209202527119, "grad_norm": 3.1538188457489014, "learning_rate": 7.564960274419758e-05, "loss": 1.3662, "step": 11250 }, { "epoch": 0.6706401239718679, "grad_norm": 2.8717315196990967, "learning_rate": 7.56414845182782e-05, "loss": 1.2446, "step": 11252 }, { "epoch": 0.670759327691024, "grad_norm": 3.104811906814575, "learning_rate": 7.563336537507522e-05, "loss": 1.3212, "step": 11254 }, { "epoch": 0.67087853141018, "grad_norm": 2.5824553966522217, "learning_rate": 7.562524531487903e-05, "loss": 1.2064, "step": 11256 }, { "epoch": 0.670997735129336, "grad_norm": 3.410020589828491, "learning_rate": 7.561712433798014e-05, "loss": 1.3228, "step": 11258 }, { "epoch": 0.6711169388484921, "grad_norm": 3.057044267654419, "learning_rate": 7.560900244466907e-05, "loss": 1.4097, "step": 11260 }, { "epoch": 0.6712361425676481, "grad_norm": 3.2475781440734863, "learning_rate": 7.560087963523639e-05, "loss": 1.2934, "step": 11262 }, { "epoch": 0.6713553462868042, "grad_norm": 3.242792844772339, "learning_rate": 7.559275590997261e-05, "loss": 1.3398, "step": 11264 }, { "epoch": 0.6714745500059602, "grad_norm": 2.998591423034668, "learning_rate": 7.558463126916842e-05, "loss": 1.5442, "step": 11266 }, { "epoch": 0.6715937537251162, "grad_norm": 2.87682843208313, "learning_rate": 7.55765057131144e-05, "loss": 1.3398, "step": 11268 }, { "epoch": 0.6717129574442723, "grad_norm": 3.0373640060424805, "learning_rate": 7.55683792421013e-05, "loss": 1.3395, "step": 11270 }, { "epoch": 0.6718321611634283, "grad_norm": 3.174862861633301, "learning_rate": 7.556025185641975e-05, "loss": 1.333, "step": 11272 }, { "epoch": 0.6719513648825843, "grad_norm": 2.708556890487671, "learning_rate": 7.555212355636056e-05, "loss": 1.2952, "step": 11274 }, { "epoch": 0.6720705686017404, "grad_norm": 2.8587846755981445, "learning_rate": 7.554399434221449e-05, "loss": 1.2226, "step": 11276 }, { "epoch": 0.6721897723208964, "grad_norm": 3.2637600898742676, "learning_rate": 7.553586421427234e-05, "loss": 1.3262, "step": 11278 }, { "epoch": 0.6723089760400525, "grad_norm": 3.1673521995544434, "learning_rate": 7.552773317282495e-05, "loss": 1.3118, "step": 11280 }, { "epoch": 0.6724281797592084, "grad_norm": 3.1405515670776367, "learning_rate": 7.551960121816322e-05, "loss": 1.4525, "step": 11282 }, { "epoch": 0.6725473834783645, "grad_norm": 3.347151756286621, "learning_rate": 7.551146835057804e-05, "loss": 1.284, "step": 11284 }, { "epoch": 0.6726665871975206, "grad_norm": 3.253545045852661, "learning_rate": 7.550333457036032e-05, "loss": 1.4428, "step": 11286 }, { "epoch": 0.6727857909166766, "grad_norm": 2.9507482051849365, "learning_rate": 7.549519987780109e-05, "loss": 1.2913, "step": 11288 }, { "epoch": 0.6729049946358326, "grad_norm": 3.259566307067871, "learning_rate": 7.54870642731913e-05, "loss": 1.3847, "step": 11290 }, { "epoch": 0.6730241983549887, "grad_norm": 2.988016128540039, "learning_rate": 7.547892775682206e-05, "loss": 1.3716, "step": 11292 }, { "epoch": 0.6731434020741447, "grad_norm": 2.9273152351379395, "learning_rate": 7.547079032898438e-05, "loss": 1.363, "step": 11294 }, { "epoch": 0.6732626057933008, "grad_norm": 2.9325242042541504, "learning_rate": 7.546265198996938e-05, "loss": 1.2527, "step": 11296 }, { "epoch": 0.6733818095124567, "grad_norm": 3.136676073074341, "learning_rate": 7.54545127400682e-05, "loss": 1.3441, "step": 11298 }, { "epoch": 0.6735010132316128, "grad_norm": 3.490265369415283, "learning_rate": 7.544637257957201e-05, "loss": 1.5203, "step": 11300 }, { "epoch": 0.6736202169507689, "grad_norm": 3.1689178943634033, "learning_rate": 7.543823150877201e-05, "loss": 1.2813, "step": 11302 }, { "epoch": 0.6737394206699249, "grad_norm": 3.095982313156128, "learning_rate": 7.543008952795943e-05, "loss": 1.44, "step": 11304 }, { "epoch": 0.673858624389081, "grad_norm": 3.2911932468414307, "learning_rate": 7.542194663742553e-05, "loss": 1.2868, "step": 11306 }, { "epoch": 0.673977828108237, "grad_norm": 3.1426472663879395, "learning_rate": 7.541380283746164e-05, "loss": 1.3843, "step": 11308 }, { "epoch": 0.674097031827393, "grad_norm": 3.2744815349578857, "learning_rate": 7.540565812835905e-05, "loss": 1.3345, "step": 11310 }, { "epoch": 0.6742162355465491, "grad_norm": 2.689762592315674, "learning_rate": 7.539751251040916e-05, "loss": 1.2874, "step": 11312 }, { "epoch": 0.674335439265705, "grad_norm": 3.209831953048706, "learning_rate": 7.538936598390333e-05, "loss": 1.3197, "step": 11314 }, { "epoch": 0.6744546429848611, "grad_norm": 3.001068353652954, "learning_rate": 7.538121854913303e-05, "loss": 1.2011, "step": 11316 }, { "epoch": 0.6745738467040172, "grad_norm": 2.887836217880249, "learning_rate": 7.537307020638969e-05, "loss": 1.3314, "step": 11318 }, { "epoch": 0.6746930504231732, "grad_norm": 2.982957124710083, "learning_rate": 7.536492095596483e-05, "loss": 1.2396, "step": 11320 }, { "epoch": 0.6748122541423293, "grad_norm": 3.2100629806518555, "learning_rate": 7.535677079814996e-05, "loss": 1.3519, "step": 11322 }, { "epoch": 0.6749314578614852, "grad_norm": 2.966731071472168, "learning_rate": 7.534861973323662e-05, "loss": 1.4211, "step": 11324 }, { "epoch": 0.6750506615806413, "grad_norm": 2.9166550636291504, "learning_rate": 7.534046776151644e-05, "loss": 1.3784, "step": 11326 }, { "epoch": 0.6751698652997974, "grad_norm": 2.6436288356781006, "learning_rate": 7.533231488328102e-05, "loss": 1.2226, "step": 11328 }, { "epoch": 0.6752890690189534, "grad_norm": 3.429213762283325, "learning_rate": 7.532416109882204e-05, "loss": 1.5209, "step": 11330 }, { "epoch": 0.6754082727381094, "grad_norm": 2.9603421688079834, "learning_rate": 7.531600640843115e-05, "loss": 1.3173, "step": 11332 }, { "epoch": 0.6755274764572655, "grad_norm": 3.188124418258667, "learning_rate": 7.530785081240011e-05, "loss": 1.4694, "step": 11334 }, { "epoch": 0.6756466801764215, "grad_norm": 3.14048433303833, "learning_rate": 7.529969431102064e-05, "loss": 1.2519, "step": 11336 }, { "epoch": 0.6757658838955776, "grad_norm": 3.2532386779785156, "learning_rate": 7.529153690458455e-05, "loss": 1.4794, "step": 11338 }, { "epoch": 0.6758850876147335, "grad_norm": 3.0165343284606934, "learning_rate": 7.528337859338364e-05, "loss": 1.4752, "step": 11340 }, { "epoch": 0.6760042913338896, "grad_norm": 3.268618106842041, "learning_rate": 7.527521937770978e-05, "loss": 1.3612, "step": 11342 }, { "epoch": 0.6761234950530457, "grad_norm": 2.9181718826293945, "learning_rate": 7.526705925785487e-05, "loss": 1.3653, "step": 11344 }, { "epoch": 0.6762426987722017, "grad_norm": 3.3733909130096436, "learning_rate": 7.525889823411077e-05, "loss": 1.5127, "step": 11346 }, { "epoch": 0.6763619024913577, "grad_norm": 3.2201318740844727, "learning_rate": 7.525073630676945e-05, "loss": 1.463, "step": 11348 }, { "epoch": 0.6764811062105137, "grad_norm": 3.217255115509033, "learning_rate": 7.524257347612292e-05, "loss": 1.3433, "step": 11350 }, { "epoch": 0.6766003099296698, "grad_norm": 3.1484549045562744, "learning_rate": 7.523440974246318e-05, "loss": 1.3877, "step": 11352 }, { "epoch": 0.6767195136488259, "grad_norm": 3.3316314220428467, "learning_rate": 7.522624510608224e-05, "loss": 1.4962, "step": 11354 }, { "epoch": 0.6768387173679818, "grad_norm": 3.1448616981506348, "learning_rate": 7.52180795672722e-05, "loss": 1.3485, "step": 11356 }, { "epoch": 0.6769579210871379, "grad_norm": 3.476321220397949, "learning_rate": 7.52099131263252e-05, "loss": 1.4643, "step": 11358 }, { "epoch": 0.677077124806294, "grad_norm": 3.178527355194092, "learning_rate": 7.520174578353332e-05, "loss": 1.3679, "step": 11360 }, { "epoch": 0.67719632852545, "grad_norm": 3.2266573905944824, "learning_rate": 7.519357753918879e-05, "loss": 1.3992, "step": 11362 }, { "epoch": 0.6773155322446061, "grad_norm": 2.8596620559692383, "learning_rate": 7.518540839358377e-05, "loss": 1.3345, "step": 11364 }, { "epoch": 0.677434735963762, "grad_norm": 2.7497425079345703, "learning_rate": 7.517723834701053e-05, "loss": 1.2352, "step": 11366 }, { "epoch": 0.6775539396829181, "grad_norm": 3.118070125579834, "learning_rate": 7.516906739976135e-05, "loss": 1.4459, "step": 11368 }, { "epoch": 0.6776731434020742, "grad_norm": 3.1188766956329346, "learning_rate": 7.51608955521285e-05, "loss": 1.3671, "step": 11370 }, { "epoch": 0.6777923471212302, "grad_norm": 3.1618735790252686, "learning_rate": 7.515272280440432e-05, "loss": 1.406, "step": 11372 }, { "epoch": 0.6779115508403862, "grad_norm": 3.200594425201416, "learning_rate": 7.51445491568812e-05, "loss": 1.4921, "step": 11374 }, { "epoch": 0.6780307545595422, "grad_norm": 2.777345657348633, "learning_rate": 7.513637460985153e-05, "loss": 1.279, "step": 11376 }, { "epoch": 0.6781499582786983, "grad_norm": 2.979140043258667, "learning_rate": 7.512819916360771e-05, "loss": 1.194, "step": 11378 }, { "epoch": 0.6782691619978544, "grad_norm": 3.2916793823242188, "learning_rate": 7.512002281844224e-05, "loss": 1.3958, "step": 11380 }, { "epoch": 0.6783883657170103, "grad_norm": 3.2600579261779785, "learning_rate": 7.511184557464763e-05, "loss": 1.3963, "step": 11382 }, { "epoch": 0.6785075694361664, "grad_norm": 3.0087709426879883, "learning_rate": 7.510366743251638e-05, "loss": 1.2372, "step": 11384 }, { "epoch": 0.6786267731553225, "grad_norm": 3.5033767223358154, "learning_rate": 7.509548839234101e-05, "loss": 1.5005, "step": 11386 }, { "epoch": 0.6787459768744785, "grad_norm": 3.292447328567505, "learning_rate": 7.508730845441421e-05, "loss": 1.3287, "step": 11388 }, { "epoch": 0.6788651805936345, "grad_norm": 2.8654232025146484, "learning_rate": 7.507912761902854e-05, "loss": 1.3047, "step": 11390 }, { "epoch": 0.6789843843127905, "grad_norm": 2.976003646850586, "learning_rate": 7.507094588647665e-05, "loss": 1.3043, "step": 11392 }, { "epoch": 0.6791035880319466, "grad_norm": 3.2271242141723633, "learning_rate": 7.506276325705125e-05, "loss": 1.296, "step": 11394 }, { "epoch": 0.6792227917511027, "grad_norm": 3.5737156867980957, "learning_rate": 7.505457973104506e-05, "loss": 1.3446, "step": 11396 }, { "epoch": 0.6793419954702586, "grad_norm": 2.959538698196411, "learning_rate": 7.504639530875083e-05, "loss": 1.2877, "step": 11398 }, { "epoch": 0.6794611991894147, "grad_norm": 2.982525587081909, "learning_rate": 7.503820999046136e-05, "loss": 1.4215, "step": 11400 }, { "epoch": 0.6795804029085708, "grad_norm": 3.1099050045013428, "learning_rate": 7.503002377646943e-05, "loss": 1.3666, "step": 11402 }, { "epoch": 0.6796996066277268, "grad_norm": 3.0803115367889404, "learning_rate": 7.502183666706793e-05, "loss": 1.3462, "step": 11404 }, { "epoch": 0.6798188103468829, "grad_norm": 2.961575746536255, "learning_rate": 7.50136486625497e-05, "loss": 1.3209, "step": 11406 }, { "epoch": 0.6799380140660388, "grad_norm": 2.9912540912628174, "learning_rate": 7.500545976320768e-05, "loss": 1.4099, "step": 11408 }, { "epoch": 0.6800572177851949, "grad_norm": 3.046856164932251, "learning_rate": 7.499726996933481e-05, "loss": 1.3114, "step": 11410 }, { "epoch": 0.680176421504351, "grad_norm": 3.038175106048584, "learning_rate": 7.498907928122408e-05, "loss": 1.3922, "step": 11412 }, { "epoch": 0.680295625223507, "grad_norm": 3.098508596420288, "learning_rate": 7.498088769916846e-05, "loss": 1.4623, "step": 11414 }, { "epoch": 0.680414828942663, "grad_norm": 3.0042474269866943, "learning_rate": 7.497269522346105e-05, "loss": 1.2539, "step": 11416 }, { "epoch": 0.680534032661819, "grad_norm": 3.132552146911621, "learning_rate": 7.496450185439486e-05, "loss": 1.3753, "step": 11418 }, { "epoch": 0.6806532363809751, "grad_norm": 3.1045632362365723, "learning_rate": 7.495630759226303e-05, "loss": 1.4618, "step": 11420 }, { "epoch": 0.6807724401001312, "grad_norm": 2.929560661315918, "learning_rate": 7.494811243735869e-05, "loss": 1.2895, "step": 11422 }, { "epoch": 0.6808916438192871, "grad_norm": 3.224700450897217, "learning_rate": 7.4939916389975e-05, "loss": 1.3951, "step": 11424 }, { "epoch": 0.6810108475384432, "grad_norm": 3.6533799171447754, "learning_rate": 7.493171945040518e-05, "loss": 1.2945, "step": 11426 }, { "epoch": 0.6811300512575993, "grad_norm": 3.075840950012207, "learning_rate": 7.492352161894247e-05, "loss": 1.305, "step": 11428 }, { "epoch": 0.6812492549767553, "grad_norm": 3.029188871383667, "learning_rate": 7.491532289588009e-05, "loss": 1.2824, "step": 11430 }, { "epoch": 0.6813684586959113, "grad_norm": 2.89699649810791, "learning_rate": 7.490712328151138e-05, "loss": 1.4856, "step": 11432 }, { "epoch": 0.6814876624150673, "grad_norm": 2.8706159591674805, "learning_rate": 7.489892277612964e-05, "loss": 1.3722, "step": 11434 }, { "epoch": 0.6816068661342234, "grad_norm": 3.1563334465026855, "learning_rate": 7.489072138002825e-05, "loss": 1.4083, "step": 11436 }, { "epoch": 0.6817260698533795, "grad_norm": 3.2135863304138184, "learning_rate": 7.488251909350058e-05, "loss": 1.4336, "step": 11438 }, { "epoch": 0.6818452735725354, "grad_norm": 2.953787088394165, "learning_rate": 7.487431591684008e-05, "loss": 1.2835, "step": 11440 }, { "epoch": 0.6819644772916915, "grad_norm": 2.9531466960906982, "learning_rate": 7.48661118503402e-05, "loss": 1.4044, "step": 11442 }, { "epoch": 0.6820836810108475, "grad_norm": 3.144925832748413, "learning_rate": 7.485790689429441e-05, "loss": 1.3767, "step": 11444 }, { "epoch": 0.6822028847300036, "grad_norm": 2.6341028213500977, "learning_rate": 7.484970104899624e-05, "loss": 1.2723, "step": 11446 }, { "epoch": 0.6823220884491596, "grad_norm": 3.0663464069366455, "learning_rate": 7.484149431473923e-05, "loss": 1.4566, "step": 11448 }, { "epoch": 0.6824412921683156, "grad_norm": 5.245096683502197, "learning_rate": 7.483328669181698e-05, "loss": 1.3959, "step": 11450 }, { "epoch": 0.6825604958874717, "grad_norm": 2.92165470123291, "learning_rate": 7.482507818052311e-05, "loss": 1.2131, "step": 11452 }, { "epoch": 0.6826796996066278, "grad_norm": 2.9951415061950684, "learning_rate": 7.481686878115126e-05, "loss": 1.3673, "step": 11454 }, { "epoch": 0.6827989033257837, "grad_norm": 3.181806802749634, "learning_rate": 7.480865849399508e-05, "loss": 1.4292, "step": 11456 }, { "epoch": 0.6829181070449398, "grad_norm": 3.337843418121338, "learning_rate": 7.480044731934833e-05, "loss": 1.3233, "step": 11458 }, { "epoch": 0.6830373107640958, "grad_norm": 2.8996212482452393, "learning_rate": 7.479223525750472e-05, "loss": 1.3592, "step": 11460 }, { "epoch": 0.6831565144832519, "grad_norm": 3.1664845943450928, "learning_rate": 7.478402230875801e-05, "loss": 1.3021, "step": 11462 }, { "epoch": 0.683275718202408, "grad_norm": 2.9811742305755615, "learning_rate": 7.477580847340204e-05, "loss": 1.2783, "step": 11464 }, { "epoch": 0.6833949219215639, "grad_norm": 2.9614815711975098, "learning_rate": 7.476759375173063e-05, "loss": 1.2065, "step": 11466 }, { "epoch": 0.68351412564072, "grad_norm": 3.4236578941345215, "learning_rate": 7.475937814403764e-05, "loss": 1.4452, "step": 11468 }, { "epoch": 0.683633329359876, "grad_norm": 3.48992919921875, "learning_rate": 7.475116165061698e-05, "loss": 1.2859, "step": 11470 }, { "epoch": 0.683752533079032, "grad_norm": 3.020195960998535, "learning_rate": 7.47429442717626e-05, "loss": 1.3065, "step": 11472 }, { "epoch": 0.6838717367981881, "grad_norm": 3.150268316268921, "learning_rate": 7.473472600776844e-05, "loss": 1.3828, "step": 11474 }, { "epoch": 0.6839909405173441, "grad_norm": 3.2406692504882812, "learning_rate": 7.47265068589285e-05, "loss": 1.2146, "step": 11476 }, { "epoch": 0.6841101442365002, "grad_norm": 3.373657703399658, "learning_rate": 7.47182868255368e-05, "loss": 1.3076, "step": 11478 }, { "epoch": 0.6842293479556563, "grad_norm": 3.493339776992798, "learning_rate": 7.471006590788743e-05, "loss": 1.4371, "step": 11480 }, { "epoch": 0.6843485516748122, "grad_norm": 3.3110711574554443, "learning_rate": 7.470184410627444e-05, "loss": 1.5636, "step": 11482 }, { "epoch": 0.6844677553939683, "grad_norm": 3.2239551544189453, "learning_rate": 7.469362142099196e-05, "loss": 1.4615, "step": 11484 }, { "epoch": 0.6845869591131243, "grad_norm": 3.0743215084075928, "learning_rate": 7.468539785233417e-05, "loss": 1.4031, "step": 11486 }, { "epoch": 0.6847061628322804, "grad_norm": 2.9024384021759033, "learning_rate": 7.467717340059524e-05, "loss": 1.3851, "step": 11488 }, { "epoch": 0.6848253665514364, "grad_norm": 3.2466061115264893, "learning_rate": 7.466894806606939e-05, "loss": 1.2969, "step": 11490 }, { "epoch": 0.6849445702705924, "grad_norm": 2.8589701652526855, "learning_rate": 7.466072184905085e-05, "loss": 1.2741, "step": 11492 }, { "epoch": 0.6850637739897485, "grad_norm": 3.193812608718872, "learning_rate": 7.465249474983392e-05, "loss": 1.314, "step": 11494 }, { "epoch": 0.6851829777089046, "grad_norm": 3.28775954246521, "learning_rate": 7.46442667687129e-05, "loss": 1.6354, "step": 11496 }, { "epoch": 0.6853021814280605, "grad_norm": 3.912654161453247, "learning_rate": 7.463603790598214e-05, "loss": 1.5073, "step": 11498 }, { "epoch": 0.6854213851472166, "grad_norm": 3.112741231918335, "learning_rate": 7.462780816193602e-05, "loss": 1.2014, "step": 11500 }, { "epoch": 0.6855405888663726, "grad_norm": 2.9842536449432373, "learning_rate": 7.461957753686893e-05, "loss": 1.2782, "step": 11502 }, { "epoch": 0.6856597925855287, "grad_norm": 3.5600063800811768, "learning_rate": 7.461134603107533e-05, "loss": 1.3083, "step": 11504 }, { "epoch": 0.6857789963046848, "grad_norm": 2.8591928482055664, "learning_rate": 7.460311364484964e-05, "loss": 1.3709, "step": 11506 }, { "epoch": 0.6858982000238407, "grad_norm": 3.368011236190796, "learning_rate": 7.459488037848643e-05, "loss": 1.389, "step": 11508 }, { "epoch": 0.6860174037429968, "grad_norm": 3.152740955352783, "learning_rate": 7.45866462322802e-05, "loss": 1.3774, "step": 11510 }, { "epoch": 0.6861366074621528, "grad_norm": 2.8608243465423584, "learning_rate": 7.45784112065255e-05, "loss": 1.2522, "step": 11512 }, { "epoch": 0.6862558111813089, "grad_norm": 2.9974417686462402, "learning_rate": 7.457017530151693e-05, "loss": 1.3957, "step": 11514 }, { "epoch": 0.6863750149004649, "grad_norm": 2.972892999649048, "learning_rate": 7.456193851754914e-05, "loss": 1.2969, "step": 11516 }, { "epoch": 0.6864942186196209, "grad_norm": 3.0133607387542725, "learning_rate": 7.455370085491677e-05, "loss": 1.4406, "step": 11518 }, { "epoch": 0.686613422338777, "grad_norm": 2.8009605407714844, "learning_rate": 7.45454623139145e-05, "loss": 1.3108, "step": 11520 }, { "epoch": 0.6867326260579331, "grad_norm": 2.5685906410217285, "learning_rate": 7.453722289483706e-05, "loss": 1.2291, "step": 11522 }, { "epoch": 0.686851829777089, "grad_norm": 2.9566574096679688, "learning_rate": 7.452898259797922e-05, "loss": 1.3763, "step": 11524 }, { "epoch": 0.6869710334962451, "grad_norm": 3.0031638145446777, "learning_rate": 7.452074142363573e-05, "loss": 1.3491, "step": 11526 }, { "epoch": 0.6870902372154011, "grad_norm": 3.03749680519104, "learning_rate": 7.451249937210143e-05, "loss": 1.404, "step": 11528 }, { "epoch": 0.6872094409345572, "grad_norm": 3.1808674335479736, "learning_rate": 7.450425644367117e-05, "loss": 1.3391, "step": 11530 }, { "epoch": 0.6873286446537132, "grad_norm": 2.9089925289154053, "learning_rate": 7.44960126386398e-05, "loss": 1.3592, "step": 11532 }, { "epoch": 0.6874478483728692, "grad_norm": 2.788388967514038, "learning_rate": 7.448776795730226e-05, "loss": 1.4194, "step": 11534 }, { "epoch": 0.6875670520920253, "grad_norm": 3.0737266540527344, "learning_rate": 7.447952239995346e-05, "loss": 1.3261, "step": 11536 }, { "epoch": 0.6876862558111813, "grad_norm": 2.8071415424346924, "learning_rate": 7.447127596688839e-05, "loss": 1.3685, "step": 11538 }, { "epoch": 0.6878054595303373, "grad_norm": 3.2665367126464844, "learning_rate": 7.446302865840206e-05, "loss": 1.3579, "step": 11540 }, { "epoch": 0.6879246632494934, "grad_norm": 3.088024377822876, "learning_rate": 7.445478047478948e-05, "loss": 1.5123, "step": 11542 }, { "epoch": 0.6880438669686494, "grad_norm": 3.183359146118164, "learning_rate": 7.444653141634575e-05, "loss": 1.3773, "step": 11544 }, { "epoch": 0.6881630706878055, "grad_norm": 3.6457784175872803, "learning_rate": 7.443828148336593e-05, "loss": 1.2874, "step": 11546 }, { "epoch": 0.6882822744069615, "grad_norm": 2.9941046237945557, "learning_rate": 7.443003067614521e-05, "loss": 1.5554, "step": 11548 }, { "epoch": 0.6884014781261175, "grad_norm": 2.722566604614258, "learning_rate": 7.442177899497867e-05, "loss": 1.2836, "step": 11550 }, { "epoch": 0.6885206818452736, "grad_norm": 2.987135887145996, "learning_rate": 7.441352644016153e-05, "loss": 1.2247, "step": 11552 }, { "epoch": 0.6886398855644296, "grad_norm": 3.2400104999542236, "learning_rate": 7.440527301198902e-05, "loss": 1.3156, "step": 11554 }, { "epoch": 0.6887590892835856, "grad_norm": 3.2880210876464844, "learning_rate": 7.439701871075641e-05, "loss": 1.3347, "step": 11556 }, { "epoch": 0.6888782930027417, "grad_norm": 3.087319850921631, "learning_rate": 7.438876353675897e-05, "loss": 1.202, "step": 11558 }, { "epoch": 0.6889974967218977, "grad_norm": 2.7945611476898193, "learning_rate": 7.4380507490292e-05, "loss": 1.214, "step": 11560 }, { "epoch": 0.6891167004410538, "grad_norm": 3.1687252521514893, "learning_rate": 7.437225057165087e-05, "loss": 1.34, "step": 11562 }, { "epoch": 0.6892359041602097, "grad_norm": 3.061964988708496, "learning_rate": 7.436399278113094e-05, "loss": 1.4029, "step": 11564 }, { "epoch": 0.6893551078793658, "grad_norm": 3.1537117958068848, "learning_rate": 7.435573411902763e-05, "loss": 1.2892, "step": 11566 }, { "epoch": 0.6894743115985219, "grad_norm": 3.0488314628601074, "learning_rate": 7.434747458563637e-05, "loss": 1.3338, "step": 11568 }, { "epoch": 0.6895935153176779, "grad_norm": 2.8798294067382812, "learning_rate": 7.433921418125264e-05, "loss": 1.2527, "step": 11570 }, { "epoch": 0.689712719036834, "grad_norm": 3.060309410095215, "learning_rate": 7.433095290617195e-05, "loss": 1.3128, "step": 11572 }, { "epoch": 0.68983192275599, "grad_norm": 2.932927370071411, "learning_rate": 7.432269076068986e-05, "loss": 1.355, "step": 11574 }, { "epoch": 0.689951126475146, "grad_norm": 3.3284690380096436, "learning_rate": 7.431442774510185e-05, "loss": 1.2524, "step": 11576 }, { "epoch": 0.6900703301943021, "grad_norm": 2.7848763465881348, "learning_rate": 7.430616385970359e-05, "loss": 1.4205, "step": 11578 }, { "epoch": 0.690189533913458, "grad_norm": 2.728919744491577, "learning_rate": 7.42978991047907e-05, "loss": 1.2913, "step": 11580 }, { "epoch": 0.6903087376326141, "grad_norm": 3.113825798034668, "learning_rate": 7.42896334806588e-05, "loss": 1.3316, "step": 11582 }, { "epoch": 0.6904279413517702, "grad_norm": 3.2555315494537354, "learning_rate": 7.428136698760361e-05, "loss": 1.4311, "step": 11584 }, { "epoch": 0.6905471450709262, "grad_norm": 2.963958501815796, "learning_rate": 7.427309962592086e-05, "loss": 1.3243, "step": 11586 }, { "epoch": 0.6906663487900823, "grad_norm": 3.160691022872925, "learning_rate": 7.426483139590628e-05, "loss": 1.3465, "step": 11588 }, { "epoch": 0.6907855525092383, "grad_norm": 3.379725456237793, "learning_rate": 7.425656229785565e-05, "loss": 1.3726, "step": 11590 }, { "epoch": 0.6909047562283943, "grad_norm": 3.219780683517456, "learning_rate": 7.424829233206479e-05, "loss": 1.4155, "step": 11592 }, { "epoch": 0.6910239599475504, "grad_norm": 3.0872840881347656, "learning_rate": 7.424002149882958e-05, "loss": 1.4335, "step": 11594 }, { "epoch": 0.6911431636667064, "grad_norm": 2.7061076164245605, "learning_rate": 7.423174979844583e-05, "loss": 1.3122, "step": 11596 }, { "epoch": 0.6912623673858624, "grad_norm": 3.0806825160980225, "learning_rate": 7.42234772312095e-05, "loss": 1.2973, "step": 11598 }, { "epoch": 0.6913815711050185, "grad_norm": 3.081416606903076, "learning_rate": 7.42152037974165e-05, "loss": 1.3052, "step": 11600 }, { "epoch": 0.6915007748241745, "grad_norm": 3.0530223846435547, "learning_rate": 7.420692949736281e-05, "loss": 1.2474, "step": 11602 }, { "epoch": 0.6916199785433306, "grad_norm": 3.2039237022399902, "learning_rate": 7.419865433134443e-05, "loss": 1.2982, "step": 11604 }, { "epoch": 0.6917391822624865, "grad_norm": 3.0170822143554688, "learning_rate": 7.41903782996574e-05, "loss": 1.2713, "step": 11606 }, { "epoch": 0.6918583859816426, "grad_norm": 3.3353633880615234, "learning_rate": 7.418210140259776e-05, "loss": 1.3308, "step": 11608 }, { "epoch": 0.6919775897007987, "grad_norm": 2.9310696125030518, "learning_rate": 7.417382364046162e-05, "loss": 1.2729, "step": 11610 }, { "epoch": 0.6920967934199547, "grad_norm": 2.872549533843994, "learning_rate": 7.41655450135451e-05, "loss": 1.2351, "step": 11612 }, { "epoch": 0.6922159971391108, "grad_norm": 3.0427474975585938, "learning_rate": 7.415726552214435e-05, "loss": 1.2131, "step": 11614 }, { "epoch": 0.6923352008582668, "grad_norm": 3.2279765605926514, "learning_rate": 7.414898516655555e-05, "loss": 1.4555, "step": 11616 }, { "epoch": 0.6924544045774228, "grad_norm": 3.2396702766418457, "learning_rate": 7.414070394707494e-05, "loss": 1.342, "step": 11618 }, { "epoch": 0.6925736082965789, "grad_norm": 3.0463969707489014, "learning_rate": 7.413242186399875e-05, "loss": 1.3583, "step": 11620 }, { "epoch": 0.6926928120157348, "grad_norm": 3.1896073818206787, "learning_rate": 7.412413891762325e-05, "loss": 1.3214, "step": 11622 }, { "epoch": 0.6928120157348909, "grad_norm": 3.2530035972595215, "learning_rate": 7.41158551082448e-05, "loss": 1.4124, "step": 11624 }, { "epoch": 0.692931219454047, "grad_norm": 3.168999433517456, "learning_rate": 7.410757043615965e-05, "loss": 1.2779, "step": 11626 }, { "epoch": 0.693050423173203, "grad_norm": 2.764477252960205, "learning_rate": 7.409928490166424e-05, "loss": 1.2605, "step": 11628 }, { "epoch": 0.6931696268923591, "grad_norm": 2.8949530124664307, "learning_rate": 7.409099850505497e-05, "loss": 1.3745, "step": 11630 }, { "epoch": 0.693288830611515, "grad_norm": 3.117957830429077, "learning_rate": 7.408271124662824e-05, "loss": 1.221, "step": 11632 }, { "epoch": 0.6934080343306711, "grad_norm": 2.9836370944976807, "learning_rate": 7.407442312668054e-05, "loss": 1.3082, "step": 11634 }, { "epoch": 0.6935272380498272, "grad_norm": 3.359524965286255, "learning_rate": 7.406613414550835e-05, "loss": 1.3483, "step": 11636 }, { "epoch": 0.6936464417689832, "grad_norm": 3.3778107166290283, "learning_rate": 7.40578443034082e-05, "loss": 1.4251, "step": 11638 }, { "epoch": 0.6937656454881392, "grad_norm": 2.9634344577789307, "learning_rate": 7.404955360067665e-05, "loss": 1.2812, "step": 11640 }, { "epoch": 0.6938848492072953, "grad_norm": 3.0335702896118164, "learning_rate": 7.404126203761028e-05, "loss": 1.3205, "step": 11642 }, { "epoch": 0.6940040529264513, "grad_norm": 3.320711374282837, "learning_rate": 7.403296961450572e-05, "loss": 1.4878, "step": 11644 }, { "epoch": 0.6941232566456074, "grad_norm": 3.326862335205078, "learning_rate": 7.402467633165959e-05, "loss": 1.5122, "step": 11646 }, { "epoch": 0.6942424603647633, "grad_norm": 3.504277229309082, "learning_rate": 7.401638218936859e-05, "loss": 1.3153, "step": 11648 }, { "epoch": 0.6943616640839194, "grad_norm": 3.028203010559082, "learning_rate": 7.400808718792944e-05, "loss": 1.2923, "step": 11650 }, { "epoch": 0.6944808678030755, "grad_norm": 3.38312029838562, "learning_rate": 7.399979132763888e-05, "loss": 1.3378, "step": 11652 }, { "epoch": 0.6946000715222315, "grad_norm": 3.1538245677948, "learning_rate": 7.399149460879365e-05, "loss": 1.4967, "step": 11654 }, { "epoch": 0.6947192752413875, "grad_norm": 3.1964704990386963, "learning_rate": 7.398319703169058e-05, "loss": 1.4293, "step": 11656 }, { "epoch": 0.6948384789605435, "grad_norm": 2.96461820602417, "learning_rate": 7.397489859662649e-05, "loss": 1.3192, "step": 11658 }, { "epoch": 0.6949576826796996, "grad_norm": 3.034611940383911, "learning_rate": 7.396659930389825e-05, "loss": 1.286, "step": 11660 }, { "epoch": 0.6950768863988557, "grad_norm": 2.970865488052368, "learning_rate": 7.395829915380275e-05, "loss": 1.3978, "step": 11662 }, { "epoch": 0.6951960901180116, "grad_norm": 3.145573139190674, "learning_rate": 7.394999814663691e-05, "loss": 1.3638, "step": 11664 }, { "epoch": 0.6953152938371677, "grad_norm": 3.0894930362701416, "learning_rate": 7.394169628269772e-05, "loss": 1.3706, "step": 11666 }, { "epoch": 0.6954344975563238, "grad_norm": 3.0456717014312744, "learning_rate": 7.39333935622821e-05, "loss": 1.3218, "step": 11668 }, { "epoch": 0.6955537012754798, "grad_norm": 3.5652151107788086, "learning_rate": 7.392508998568714e-05, "loss": 1.5723, "step": 11670 }, { "epoch": 0.6956729049946359, "grad_norm": 2.7441587448120117, "learning_rate": 7.391678555320982e-05, "loss": 1.3299, "step": 11672 }, { "epoch": 0.6957921087137918, "grad_norm": 3.0470542907714844, "learning_rate": 7.390848026514727e-05, "loss": 1.275, "step": 11674 }, { "epoch": 0.6959113124329479, "grad_norm": 2.7697019577026367, "learning_rate": 7.390017412179658e-05, "loss": 1.3716, "step": 11676 }, { "epoch": 0.696030516152104, "grad_norm": 2.8959743976593018, "learning_rate": 7.389186712345488e-05, "loss": 1.2472, "step": 11678 }, { "epoch": 0.69614971987126, "grad_norm": 3.12138295173645, "learning_rate": 7.388355927041935e-05, "loss": 1.5091, "step": 11680 }, { "epoch": 0.696268923590416, "grad_norm": 2.8283941745758057, "learning_rate": 7.387525056298718e-05, "loss": 1.2287, "step": 11682 }, { "epoch": 0.6963881273095721, "grad_norm": 3.462535858154297, "learning_rate": 7.386694100145563e-05, "loss": 1.3019, "step": 11684 }, { "epoch": 0.6965073310287281, "grad_norm": 3.1232705116271973, "learning_rate": 7.385863058612193e-05, "loss": 1.2501, "step": 11686 }, { "epoch": 0.6966265347478842, "grad_norm": 2.929309606552124, "learning_rate": 7.385031931728338e-05, "loss": 1.2873, "step": 11688 }, { "epoch": 0.6967457384670401, "grad_norm": 3.3139004707336426, "learning_rate": 7.38420071952373e-05, "loss": 1.5183, "step": 11690 }, { "epoch": 0.6968649421861962, "grad_norm": 3.205115795135498, "learning_rate": 7.383369422028106e-05, "loss": 1.4175, "step": 11692 }, { "epoch": 0.6969841459053523, "grad_norm": 3.179750680923462, "learning_rate": 7.382538039271203e-05, "loss": 1.3953, "step": 11694 }, { "epoch": 0.6971033496245083, "grad_norm": 2.690648317337036, "learning_rate": 7.381706571282761e-05, "loss": 1.3908, "step": 11696 }, { "epoch": 0.6972225533436643, "grad_norm": 3.0765044689178467, "learning_rate": 7.380875018092528e-05, "loss": 1.4385, "step": 11698 }, { "epoch": 0.6973417570628203, "grad_norm": 3.096558094024658, "learning_rate": 7.380043379730249e-05, "loss": 1.275, "step": 11700 }, { "epoch": 0.6974609607819764, "grad_norm": 2.7595348358154297, "learning_rate": 7.379211656225676e-05, "loss": 1.3667, "step": 11702 }, { "epoch": 0.6975801645011325, "grad_norm": 2.975598096847534, "learning_rate": 7.378379847608559e-05, "loss": 1.2462, "step": 11704 }, { "epoch": 0.6976993682202884, "grad_norm": 2.962022304534912, "learning_rate": 7.377547953908659e-05, "loss": 1.3943, "step": 11706 }, { "epoch": 0.6978185719394445, "grad_norm": 3.037641763687134, "learning_rate": 7.376715975155735e-05, "loss": 1.5266, "step": 11708 }, { "epoch": 0.6979377756586006, "grad_norm": 2.8126587867736816, "learning_rate": 7.375883911379546e-05, "loss": 1.3231, "step": 11710 }, { "epoch": 0.6980569793777566, "grad_norm": 2.8385002613067627, "learning_rate": 7.375051762609862e-05, "loss": 1.3268, "step": 11712 }, { "epoch": 0.6981761830969127, "grad_norm": 3.4131367206573486, "learning_rate": 7.374219528876452e-05, "loss": 1.4059, "step": 11714 }, { "epoch": 0.6982953868160686, "grad_norm": 3.1312456130981445, "learning_rate": 7.373387210209084e-05, "loss": 1.3321, "step": 11716 }, { "epoch": 0.6984145905352247, "grad_norm": 3.1531643867492676, "learning_rate": 7.372554806637535e-05, "loss": 1.3108, "step": 11718 }, { "epoch": 0.6985337942543808, "grad_norm": 3.1408438682556152, "learning_rate": 7.371722318191583e-05, "loss": 1.2818, "step": 11720 }, { "epoch": 0.6986529979735367, "grad_norm": 3.007904291152954, "learning_rate": 7.37088974490101e-05, "loss": 1.4553, "step": 11722 }, { "epoch": 0.6987722016926928, "grad_norm": 3.0061309337615967, "learning_rate": 7.370057086795598e-05, "loss": 1.4872, "step": 11724 }, { "epoch": 0.6988914054118488, "grad_norm": 2.889618396759033, "learning_rate": 7.369224343905135e-05, "loss": 1.4042, "step": 11726 }, { "epoch": 0.6990106091310049, "grad_norm": 3.1722686290740967, "learning_rate": 7.368391516259412e-05, "loss": 1.2963, "step": 11728 }, { "epoch": 0.699129812850161, "grad_norm": 3.4389021396636963, "learning_rate": 7.36755860388822e-05, "loss": 1.3353, "step": 11730 }, { "epoch": 0.6992490165693169, "grad_norm": 3.1954469680786133, "learning_rate": 7.366725606821356e-05, "loss": 1.4189, "step": 11732 }, { "epoch": 0.699368220288473, "grad_norm": 3.184288740158081, "learning_rate": 7.365892525088621e-05, "loss": 1.5164, "step": 11734 }, { "epoch": 0.6994874240076291, "grad_norm": 2.720580816268921, "learning_rate": 7.365059358719814e-05, "loss": 1.3029, "step": 11736 }, { "epoch": 0.6996066277267851, "grad_norm": 3.036383867263794, "learning_rate": 7.364226107744742e-05, "loss": 1.275, "step": 11738 }, { "epoch": 0.6997258314459411, "grad_norm": 3.3084335327148438, "learning_rate": 7.363392772193214e-05, "loss": 1.2621, "step": 11740 }, { "epoch": 0.6998450351650971, "grad_norm": 2.7750954627990723, "learning_rate": 7.362559352095039e-05, "loss": 1.2394, "step": 11742 }, { "epoch": 0.6999642388842532, "grad_norm": 3.337132215499878, "learning_rate": 7.361725847480033e-05, "loss": 1.3467, "step": 11744 }, { "epoch": 0.7000834426034093, "grad_norm": 2.8113296031951904, "learning_rate": 7.360892258378014e-05, "loss": 1.3242, "step": 11746 }, { "epoch": 0.7002026463225652, "grad_norm": 3.3372817039489746, "learning_rate": 7.3600585848188e-05, "loss": 1.3616, "step": 11748 }, { "epoch": 0.7003218500417213, "grad_norm": 3.484139919281006, "learning_rate": 7.359224826832216e-05, "loss": 1.3934, "step": 11750 }, { "epoch": 0.7004410537608773, "grad_norm": 2.8935062885284424, "learning_rate": 7.358390984448087e-05, "loss": 1.4499, "step": 11752 }, { "epoch": 0.7005602574800334, "grad_norm": 2.9506916999816895, "learning_rate": 7.357557057696245e-05, "loss": 1.2284, "step": 11754 }, { "epoch": 0.7006794611991894, "grad_norm": 2.973179340362549, "learning_rate": 7.356723046606519e-05, "loss": 1.2695, "step": 11756 }, { "epoch": 0.7007986649183454, "grad_norm": 2.8179595470428467, "learning_rate": 7.355888951208747e-05, "loss": 1.3314, "step": 11758 }, { "epoch": 0.7009178686375015, "grad_norm": 3.4738106727600098, "learning_rate": 7.355054771532768e-05, "loss": 1.4119, "step": 11760 }, { "epoch": 0.7010370723566576, "grad_norm": 3.084742307662964, "learning_rate": 7.354220507608419e-05, "loss": 1.3978, "step": 11762 }, { "epoch": 0.7011562760758135, "grad_norm": 2.9900968074798584, "learning_rate": 7.35338615946555e-05, "loss": 1.3727, "step": 11764 }, { "epoch": 0.7012754797949696, "grad_norm": 3.0453410148620605, "learning_rate": 7.352551727134004e-05, "loss": 1.3414, "step": 11766 }, { "epoch": 0.7013946835141256, "grad_norm": 2.9792284965515137, "learning_rate": 7.351717210643636e-05, "loss": 1.2544, "step": 11768 }, { "epoch": 0.7015138872332817, "grad_norm": 2.854093313217163, "learning_rate": 7.350882610024296e-05, "loss": 1.2608, "step": 11770 }, { "epoch": 0.7016330909524378, "grad_norm": 2.901747226715088, "learning_rate": 7.35004792530584e-05, "loss": 1.3074, "step": 11772 }, { "epoch": 0.7017522946715937, "grad_norm": 3.1041903495788574, "learning_rate": 7.349213156518132e-05, "loss": 1.2459, "step": 11774 }, { "epoch": 0.7018714983907498, "grad_norm": 3.3884356021881104, "learning_rate": 7.34837830369103e-05, "loss": 1.3271, "step": 11776 }, { "epoch": 0.7019907021099059, "grad_norm": 3.075429677963257, "learning_rate": 7.347543366854402e-05, "loss": 1.3127, "step": 11778 }, { "epoch": 0.7021099058290619, "grad_norm": 3.2906196117401123, "learning_rate": 7.346708346038116e-05, "loss": 1.4713, "step": 11780 }, { "epoch": 0.7022291095482179, "grad_norm": 2.9950883388519287, "learning_rate": 7.345873241272041e-05, "loss": 1.3445, "step": 11782 }, { "epoch": 0.7023483132673739, "grad_norm": 2.944423198699951, "learning_rate": 7.345038052586055e-05, "loss": 1.3907, "step": 11784 }, { "epoch": 0.70246751698653, "grad_norm": 3.277862787246704, "learning_rate": 7.344202780010035e-05, "loss": 1.5313, "step": 11786 }, { "epoch": 0.7025867207056861, "grad_norm": 2.9315857887268066, "learning_rate": 7.34336742357386e-05, "loss": 1.3891, "step": 11788 }, { "epoch": 0.702705924424842, "grad_norm": 3.1934046745300293, "learning_rate": 7.342531983307416e-05, "loss": 1.2019, "step": 11790 }, { "epoch": 0.7028251281439981, "grad_norm": 3.2654545307159424, "learning_rate": 7.341696459240589e-05, "loss": 1.3237, "step": 11792 }, { "epoch": 0.7029443318631541, "grad_norm": 3.006150722503662, "learning_rate": 7.340860851403265e-05, "loss": 1.3702, "step": 11794 }, { "epoch": 0.7030635355823102, "grad_norm": 2.9698069095611572, "learning_rate": 7.34002515982534e-05, "loss": 1.3673, "step": 11796 }, { "epoch": 0.7031827393014662, "grad_norm": 3.0241329669952393, "learning_rate": 7.339189384536708e-05, "loss": 1.2889, "step": 11798 }, { "epoch": 0.7033019430206222, "grad_norm": 3.115283727645874, "learning_rate": 7.33835352556727e-05, "loss": 1.1836, "step": 11800 }, { "epoch": 0.7034211467397783, "grad_norm": 3.2226665019989014, "learning_rate": 7.337517582946925e-05, "loss": 1.2111, "step": 11802 }, { "epoch": 0.7035403504589344, "grad_norm": 3.009878158569336, "learning_rate": 7.336681556705579e-05, "loss": 1.5059, "step": 11804 }, { "epoch": 0.7036595541780903, "grad_norm": 2.66802978515625, "learning_rate": 7.335845446873136e-05, "loss": 1.1926, "step": 11806 }, { "epoch": 0.7037787578972464, "grad_norm": 2.95403790473938, "learning_rate": 7.335009253479513e-05, "loss": 1.2657, "step": 11808 }, { "epoch": 0.7038979616164024, "grad_norm": 2.920358896255493, "learning_rate": 7.334172976554617e-05, "loss": 1.2195, "step": 11810 }, { "epoch": 0.7040171653355585, "grad_norm": 3.0445754528045654, "learning_rate": 7.333336616128369e-05, "loss": 1.3874, "step": 11812 }, { "epoch": 0.7041363690547146, "grad_norm": 2.7534523010253906, "learning_rate": 7.332500172230686e-05, "loss": 1.2177, "step": 11814 }, { "epoch": 0.7042555727738705, "grad_norm": 3.2140755653381348, "learning_rate": 7.331663644891492e-05, "loss": 1.3285, "step": 11816 }, { "epoch": 0.7043747764930266, "grad_norm": 3.2122178077697754, "learning_rate": 7.330827034140712e-05, "loss": 1.3087, "step": 11818 }, { "epoch": 0.7044939802121826, "grad_norm": 2.7898106575012207, "learning_rate": 7.329990340008273e-05, "loss": 1.3693, "step": 11820 }, { "epoch": 0.7046131839313386, "grad_norm": 2.883539915084839, "learning_rate": 7.329153562524109e-05, "loss": 1.1233, "step": 11822 }, { "epoch": 0.7047323876504947, "grad_norm": 3.2210679054260254, "learning_rate": 7.328316701718152e-05, "loss": 1.3385, "step": 11824 }, { "epoch": 0.7048515913696507, "grad_norm": 3.1288702487945557, "learning_rate": 7.32747975762034e-05, "loss": 1.4195, "step": 11826 }, { "epoch": 0.7049707950888068, "grad_norm": 2.917332410812378, "learning_rate": 7.326642730260614e-05, "loss": 1.3366, "step": 11828 }, { "epoch": 0.7050899988079629, "grad_norm": 3.0429954528808594, "learning_rate": 7.325805619668917e-05, "loss": 1.2491, "step": 11830 }, { "epoch": 0.7052092025271188, "grad_norm": 3.0051827430725098, "learning_rate": 7.324968425875196e-05, "loss": 1.3572, "step": 11832 }, { "epoch": 0.7053284062462749, "grad_norm": 3.304762840270996, "learning_rate": 7.324131148909398e-05, "loss": 1.384, "step": 11834 }, { "epoch": 0.7054476099654309, "grad_norm": 2.9149320125579834, "learning_rate": 7.323293788801479e-05, "loss": 1.4043, "step": 11836 }, { "epoch": 0.705566813684587, "grad_norm": 2.994631052017212, "learning_rate": 7.32245634558139e-05, "loss": 1.3262, "step": 11838 }, { "epoch": 0.705686017403743, "grad_norm": 3.1834335327148438, "learning_rate": 7.321618819279093e-05, "loss": 1.2906, "step": 11840 }, { "epoch": 0.705805221122899, "grad_norm": 3.034491777420044, "learning_rate": 7.320781209924547e-05, "loss": 1.2904, "step": 11842 }, { "epoch": 0.7059244248420551, "grad_norm": 3.2182681560516357, "learning_rate": 7.319943517547716e-05, "loss": 1.4163, "step": 11844 }, { "epoch": 0.7060436285612111, "grad_norm": 3.081153392791748, "learning_rate": 7.319105742178568e-05, "loss": 1.385, "step": 11846 }, { "epoch": 0.7061628322803671, "grad_norm": 3.2821805477142334, "learning_rate": 7.318267883847072e-05, "loss": 1.5852, "step": 11848 }, { "epoch": 0.7062820359995232, "grad_norm": 3.3755042552948, "learning_rate": 7.317429942583204e-05, "loss": 1.3204, "step": 11850 }, { "epoch": 0.7064012397186792, "grad_norm": 2.9078996181488037, "learning_rate": 7.316591918416938e-05, "loss": 1.4621, "step": 11852 }, { "epoch": 0.7065204434378353, "grad_norm": 2.7921488285064697, "learning_rate": 7.315753811378251e-05, "loss": 1.3475, "step": 11854 }, { "epoch": 0.7066396471569913, "grad_norm": 2.9934301376342773, "learning_rate": 7.31491562149713e-05, "loss": 1.2746, "step": 11856 }, { "epoch": 0.7067588508761473, "grad_norm": 3.2446420192718506, "learning_rate": 7.314077348803552e-05, "loss": 1.3877, "step": 11858 }, { "epoch": 0.7068780545953034, "grad_norm": 3.4937479496002197, "learning_rate": 7.313238993327514e-05, "loss": 1.4825, "step": 11860 }, { "epoch": 0.7069972583144594, "grad_norm": 3.000790596008301, "learning_rate": 7.312400555099e-05, "loss": 1.4025, "step": 11862 }, { "epoch": 0.7071164620336154, "grad_norm": 3.0127665996551514, "learning_rate": 7.311562034148007e-05, "loss": 1.3389, "step": 11864 }, { "epoch": 0.7072356657527715, "grad_norm": 2.8830697536468506, "learning_rate": 7.310723430504533e-05, "loss": 1.2835, "step": 11866 }, { "epoch": 0.7073548694719275, "grad_norm": 2.9622764587402344, "learning_rate": 7.309884744198574e-05, "loss": 1.3767, "step": 11868 }, { "epoch": 0.7074740731910836, "grad_norm": 3.08685040473938, "learning_rate": 7.309045975260134e-05, "loss": 1.3744, "step": 11870 }, { "epoch": 0.7075932769102397, "grad_norm": 3.423147678375244, "learning_rate": 7.30820712371922e-05, "loss": 1.3776, "step": 11872 }, { "epoch": 0.7077124806293956, "grad_norm": 3.2208025455474854, "learning_rate": 7.30736818960584e-05, "loss": 1.4569, "step": 11874 }, { "epoch": 0.7078316843485517, "grad_norm": 3.242882251739502, "learning_rate": 7.306529172950005e-05, "loss": 1.4448, "step": 11876 }, { "epoch": 0.7079508880677077, "grad_norm": 3.0094711780548096, "learning_rate": 7.305690073781729e-05, "loss": 1.1807, "step": 11878 }, { "epoch": 0.7080700917868638, "grad_norm": 2.914775848388672, "learning_rate": 7.304850892131032e-05, "loss": 1.4066, "step": 11880 }, { "epoch": 0.7081892955060198, "grad_norm": 3.2467541694641113, "learning_rate": 7.304011628027932e-05, "loss": 1.2376, "step": 11882 }, { "epoch": 0.7083084992251758, "grad_norm": 2.7729265689849854, "learning_rate": 7.303172281502451e-05, "loss": 1.1882, "step": 11884 }, { "epoch": 0.7084277029443319, "grad_norm": 2.9003360271453857, "learning_rate": 7.302332852584618e-05, "loss": 1.2819, "step": 11886 }, { "epoch": 0.7085469066634879, "grad_norm": 3.0184855461120605, "learning_rate": 7.301493341304464e-05, "loss": 1.5431, "step": 11888 }, { "epoch": 0.7086661103826439, "grad_norm": 3.415332078933716, "learning_rate": 7.300653747692016e-05, "loss": 1.3923, "step": 11890 }, { "epoch": 0.7087853141018, "grad_norm": 3.519286632537842, "learning_rate": 7.299814071777314e-05, "loss": 1.3341, "step": 11892 }, { "epoch": 0.708904517820956, "grad_norm": 2.8363683223724365, "learning_rate": 7.298974313590393e-05, "loss": 1.2991, "step": 11894 }, { "epoch": 0.7090237215401121, "grad_norm": 3.9013376235961914, "learning_rate": 7.298134473161294e-05, "loss": 1.3594, "step": 11896 }, { "epoch": 0.7091429252592681, "grad_norm": 2.923588752746582, "learning_rate": 7.297294550520063e-05, "loss": 1.4151, "step": 11898 }, { "epoch": 0.7092621289784241, "grad_norm": 2.9584598541259766, "learning_rate": 7.296454545696745e-05, "loss": 1.3188, "step": 11900 }, { "epoch": 0.7093813326975802, "grad_norm": 3.265983819961548, "learning_rate": 7.29561445872139e-05, "loss": 1.4076, "step": 11902 }, { "epoch": 0.7095005364167362, "grad_norm": 2.9752397537231445, "learning_rate": 7.294774289624053e-05, "loss": 1.3211, "step": 11904 }, { "epoch": 0.7096197401358922, "grad_norm": 3.1521644592285156, "learning_rate": 7.293934038434789e-05, "loss": 1.4115, "step": 11906 }, { "epoch": 0.7097389438550483, "grad_norm": 3.056412696838379, "learning_rate": 7.293093705183654e-05, "loss": 1.158, "step": 11908 }, { "epoch": 0.7098581475742043, "grad_norm": 3.3301167488098145, "learning_rate": 7.292253289900713e-05, "loss": 1.3522, "step": 11910 }, { "epoch": 0.7099773512933604, "grad_norm": 3.328406810760498, "learning_rate": 7.291412792616028e-05, "loss": 1.3761, "step": 11912 }, { "epoch": 0.7100965550125163, "grad_norm": 2.8005943298339844, "learning_rate": 7.290572213359666e-05, "loss": 1.2789, "step": 11914 }, { "epoch": 0.7102157587316724, "grad_norm": 3.027325391769409, "learning_rate": 7.2897315521617e-05, "loss": 1.4216, "step": 11916 }, { "epoch": 0.7103349624508285, "grad_norm": 2.953579902648926, "learning_rate": 7.288890809052202e-05, "loss": 1.2962, "step": 11918 }, { "epoch": 0.7104541661699845, "grad_norm": 2.902616024017334, "learning_rate": 7.28804998406125e-05, "loss": 1.4099, "step": 11920 }, { "epoch": 0.7105733698891405, "grad_norm": 2.8769097328186035, "learning_rate": 7.287209077218919e-05, "loss": 1.2286, "step": 11922 }, { "epoch": 0.7106925736082966, "grad_norm": 2.789719820022583, "learning_rate": 7.286368088555296e-05, "loss": 1.2569, "step": 11924 }, { "epoch": 0.7108117773274526, "grad_norm": 3.259453058242798, "learning_rate": 7.285527018100464e-05, "loss": 1.3889, "step": 11926 }, { "epoch": 0.7109309810466087, "grad_norm": 3.10268497467041, "learning_rate": 7.284685865884509e-05, "loss": 1.3747, "step": 11928 }, { "epoch": 0.7110501847657646, "grad_norm": 2.954254627227783, "learning_rate": 7.283844631937524e-05, "loss": 1.3361, "step": 11930 }, { "epoch": 0.7111693884849207, "grad_norm": 2.7632381916046143, "learning_rate": 7.283003316289603e-05, "loss": 1.2897, "step": 11932 }, { "epoch": 0.7112885922040768, "grad_norm": 3.334984540939331, "learning_rate": 7.28216191897084e-05, "loss": 1.3596, "step": 11934 }, { "epoch": 0.7114077959232328, "grad_norm": 3.1120505332946777, "learning_rate": 7.28132044001134e-05, "loss": 1.3978, "step": 11936 }, { "epoch": 0.7115269996423889, "grad_norm": 3.2534565925598145, "learning_rate": 7.2804788794412e-05, "loss": 1.4247, "step": 11938 }, { "epoch": 0.7116462033615448, "grad_norm": 3.030853509902954, "learning_rate": 7.27963723729053e-05, "loss": 1.183, "step": 11940 }, { "epoch": 0.7117654070807009, "grad_norm": 3.1701390743255615, "learning_rate": 7.278795513589435e-05, "loss": 1.3597, "step": 11942 }, { "epoch": 0.711884610799857, "grad_norm": 2.6543970108032227, "learning_rate": 7.277953708368028e-05, "loss": 1.3101, "step": 11944 }, { "epoch": 0.712003814519013, "grad_norm": 2.939481496810913, "learning_rate": 7.277111821656422e-05, "loss": 1.3187, "step": 11946 }, { "epoch": 0.712123018238169, "grad_norm": 3.0540175437927246, "learning_rate": 7.276269853484736e-05, "loss": 1.4274, "step": 11948 }, { "epoch": 0.7122422219573251, "grad_norm": 3.077425003051758, "learning_rate": 7.275427803883089e-05, "loss": 1.2874, "step": 11950 }, { "epoch": 0.7123614256764811, "grad_norm": 2.7767841815948486, "learning_rate": 7.274585672881605e-05, "loss": 1.1867, "step": 11952 }, { "epoch": 0.7124806293956372, "grad_norm": 3.770348310470581, "learning_rate": 7.273743460510408e-05, "loss": 1.3441, "step": 11954 }, { "epoch": 0.7125998331147931, "grad_norm": 3.017948627471924, "learning_rate": 7.272901166799628e-05, "loss": 1.4458, "step": 11956 }, { "epoch": 0.7127190368339492, "grad_norm": 2.955522060394287, "learning_rate": 7.272058791779397e-05, "loss": 1.3466, "step": 11958 }, { "epoch": 0.7128382405531053, "grad_norm": 2.936656951904297, "learning_rate": 7.27121633547985e-05, "loss": 1.3255, "step": 11960 }, { "epoch": 0.7129574442722613, "grad_norm": 3.0200228691101074, "learning_rate": 7.270373797931122e-05, "loss": 1.4912, "step": 11962 }, { "epoch": 0.7130766479914173, "grad_norm": 2.6053426265716553, "learning_rate": 7.269531179163356e-05, "loss": 1.2233, "step": 11964 }, { "epoch": 0.7131958517105734, "grad_norm": 3.102640390396118, "learning_rate": 7.268688479206694e-05, "loss": 1.3905, "step": 11966 }, { "epoch": 0.7133150554297294, "grad_norm": 3.2269370555877686, "learning_rate": 7.267845698091284e-05, "loss": 1.3582, "step": 11968 }, { "epoch": 0.7134342591488855, "grad_norm": 2.9724931716918945, "learning_rate": 7.267002835847274e-05, "loss": 1.2678, "step": 11970 }, { "epoch": 0.7135534628680414, "grad_norm": 3.0054659843444824, "learning_rate": 7.266159892504815e-05, "loss": 1.3351, "step": 11972 }, { "epoch": 0.7136726665871975, "grad_norm": 2.8013720512390137, "learning_rate": 7.265316868094063e-05, "loss": 1.4065, "step": 11974 }, { "epoch": 0.7137918703063536, "grad_norm": 3.2915689945220947, "learning_rate": 7.264473762645177e-05, "loss": 1.4244, "step": 11976 }, { "epoch": 0.7139110740255096, "grad_norm": 3.1168465614318848, "learning_rate": 7.263630576188317e-05, "loss": 1.3538, "step": 11978 }, { "epoch": 0.7140302777446657, "grad_norm": 3.334642171859741, "learning_rate": 7.262787308753645e-05, "loss": 1.2805, "step": 11980 }, { "epoch": 0.7141494814638216, "grad_norm": 3.549269199371338, "learning_rate": 7.26194396037133e-05, "loss": 1.5527, "step": 11982 }, { "epoch": 0.7142686851829777, "grad_norm": 3.455843925476074, "learning_rate": 7.26110053107154e-05, "loss": 1.3812, "step": 11984 }, { "epoch": 0.7143878889021338, "grad_norm": 2.915893793106079, "learning_rate": 7.260257020884448e-05, "loss": 1.2342, "step": 11986 }, { "epoch": 0.7145070926212898, "grad_norm": 4.191336631774902, "learning_rate": 7.259413429840229e-05, "loss": 1.4365, "step": 11988 }, { "epoch": 0.7146262963404458, "grad_norm": 4.672886371612549, "learning_rate": 7.258569757969061e-05, "loss": 1.3201, "step": 11990 }, { "epoch": 0.7147455000596019, "grad_norm": 3.1974895000457764, "learning_rate": 7.257726005301124e-05, "loss": 1.4122, "step": 11992 }, { "epoch": 0.7148647037787579, "grad_norm": 3.320934295654297, "learning_rate": 7.256882171866605e-05, "loss": 1.3328, "step": 11994 }, { "epoch": 0.714983907497914, "grad_norm": 3.0582964420318604, "learning_rate": 7.256038257695687e-05, "loss": 1.2862, "step": 11996 }, { "epoch": 0.7151031112170699, "grad_norm": 3.15641713142395, "learning_rate": 7.255194262818563e-05, "loss": 1.3805, "step": 11998 }, { "epoch": 0.715222314936226, "grad_norm": 3.2540082931518555, "learning_rate": 7.254350187265423e-05, "loss": 1.4882, "step": 12000 }, { "epoch": 0.7153415186553821, "grad_norm": 3.2105631828308105, "learning_rate": 7.253506031066466e-05, "loss": 1.2908, "step": 12002 }, { "epoch": 0.7154607223745381, "grad_norm": 3.3087141513824463, "learning_rate": 7.252661794251885e-05, "loss": 1.2768, "step": 12004 }, { "epoch": 0.7155799260936941, "grad_norm": 3.2676589488983154, "learning_rate": 7.251817476851886e-05, "loss": 1.3893, "step": 12006 }, { "epoch": 0.7156991298128501, "grad_norm": 3.154170513153076, "learning_rate": 7.25097307889667e-05, "loss": 1.3755, "step": 12008 }, { "epoch": 0.7158183335320062, "grad_norm": 3.5421438217163086, "learning_rate": 7.250128600416447e-05, "loss": 1.4119, "step": 12010 }, { "epoch": 0.7159375372511623, "grad_norm": 3.141937732696533, "learning_rate": 7.249284041441423e-05, "loss": 1.3537, "step": 12012 }, { "epoch": 0.7160567409703182, "grad_norm": 2.6636829376220703, "learning_rate": 7.248439402001814e-05, "loss": 1.2233, "step": 12014 }, { "epoch": 0.7161759446894743, "grad_norm": 3.040802001953125, "learning_rate": 7.247594682127838e-05, "loss": 1.3255, "step": 12016 }, { "epoch": 0.7162951484086304, "grad_norm": 3.1716535091400146, "learning_rate": 7.246749881849706e-05, "loss": 1.431, "step": 12018 }, { "epoch": 0.7164143521277864, "grad_norm": 3.1649436950683594, "learning_rate": 7.245905001197643e-05, "loss": 1.3861, "step": 12020 }, { "epoch": 0.7165335558469424, "grad_norm": 2.7248551845550537, "learning_rate": 7.245060040201876e-05, "loss": 1.3796, "step": 12022 }, { "epoch": 0.7166527595660984, "grad_norm": 2.973905324935913, "learning_rate": 7.24421499889263e-05, "loss": 1.3023, "step": 12024 }, { "epoch": 0.7167719632852545, "grad_norm": 3.106527090072632, "learning_rate": 7.243369877300135e-05, "loss": 1.3455, "step": 12026 }, { "epoch": 0.7168911670044106, "grad_norm": 3.2412912845611572, "learning_rate": 7.242524675454622e-05, "loss": 1.3227, "step": 12028 }, { "epoch": 0.7170103707235665, "grad_norm": 3.0261013507843018, "learning_rate": 7.241679393386332e-05, "loss": 1.3373, "step": 12030 }, { "epoch": 0.7171295744427226, "grad_norm": 3.276204824447632, "learning_rate": 7.240834031125498e-05, "loss": 1.3498, "step": 12032 }, { "epoch": 0.7172487781618786, "grad_norm": 3.2382612228393555, "learning_rate": 7.239988588702365e-05, "loss": 1.4318, "step": 12034 }, { "epoch": 0.7173679818810347, "grad_norm": 3.14926815032959, "learning_rate": 7.239143066147174e-05, "loss": 1.4455, "step": 12036 }, { "epoch": 0.7174871856001908, "grad_norm": 2.9327549934387207, "learning_rate": 7.238297463490176e-05, "loss": 1.427, "step": 12038 }, { "epoch": 0.7176063893193467, "grad_norm": 3.2788679599761963, "learning_rate": 7.23745178076162e-05, "loss": 1.3036, "step": 12040 }, { "epoch": 0.7177255930385028, "grad_norm": 3.122925043106079, "learning_rate": 7.236606017991756e-05, "loss": 1.3304, "step": 12042 }, { "epoch": 0.7178447967576589, "grad_norm": 2.8656327724456787, "learning_rate": 7.235760175210844e-05, "loss": 1.1977, "step": 12044 }, { "epoch": 0.7179640004768149, "grad_norm": 2.7972352504730225, "learning_rate": 7.23491425244914e-05, "loss": 1.2894, "step": 12046 }, { "epoch": 0.7180832041959709, "grad_norm": 3.2404537200927734, "learning_rate": 7.23406824973691e-05, "loss": 1.3046, "step": 12048 }, { "epoch": 0.7182024079151269, "grad_norm": 2.8908472061157227, "learning_rate": 7.23322216710441e-05, "loss": 1.2856, "step": 12050 }, { "epoch": 0.718321611634283, "grad_norm": 3.348198413848877, "learning_rate": 7.232376004581915e-05, "loss": 1.2708, "step": 12052 }, { "epoch": 0.7184408153534391, "grad_norm": 3.6386196613311768, "learning_rate": 7.231529762199691e-05, "loss": 1.2859, "step": 12054 }, { "epoch": 0.718560019072595, "grad_norm": 3.3557474613189697, "learning_rate": 7.230683439988013e-05, "loss": 1.4518, "step": 12056 }, { "epoch": 0.7186792227917511, "grad_norm": 3.265782594680786, "learning_rate": 7.229837037977156e-05, "loss": 1.4845, "step": 12058 }, { "epoch": 0.7187984265109072, "grad_norm": 3.1751511096954346, "learning_rate": 7.2289905561974e-05, "loss": 1.4161, "step": 12060 }, { "epoch": 0.7189176302300632, "grad_norm": 3.2878377437591553, "learning_rate": 7.228143994679024e-05, "loss": 1.4197, "step": 12062 }, { "epoch": 0.7190368339492192, "grad_norm": 3.149134397506714, "learning_rate": 7.227297353452315e-05, "loss": 1.4405, "step": 12064 }, { "epoch": 0.7191560376683752, "grad_norm": 3.033576011657715, "learning_rate": 7.226450632547558e-05, "loss": 1.2809, "step": 12066 }, { "epoch": 0.7192752413875313, "grad_norm": 3.007988452911377, "learning_rate": 7.225603831995044e-05, "loss": 1.2773, "step": 12068 }, { "epoch": 0.7193944451066874, "grad_norm": 3.2729735374450684, "learning_rate": 7.224756951825066e-05, "loss": 1.5304, "step": 12070 }, { "epoch": 0.7195136488258433, "grad_norm": 3.0922815799713135, "learning_rate": 7.22390999206792e-05, "loss": 1.2401, "step": 12072 }, { "epoch": 0.7196328525449994, "grad_norm": 2.921261787414551, "learning_rate": 7.223062952753906e-05, "loss": 1.2387, "step": 12074 }, { "epoch": 0.7197520562641554, "grad_norm": 3.2314891815185547, "learning_rate": 7.222215833913324e-05, "loss": 1.3785, "step": 12076 }, { "epoch": 0.7198712599833115, "grad_norm": 3.2394096851348877, "learning_rate": 7.221368635576478e-05, "loss": 1.3281, "step": 12078 }, { "epoch": 0.7199904637024676, "grad_norm": 3.0041847229003906, "learning_rate": 7.220521357773674e-05, "loss": 1.2087, "step": 12080 }, { "epoch": 0.7201096674216235, "grad_norm": 3.059286594390869, "learning_rate": 7.219674000535224e-05, "loss": 1.3145, "step": 12082 }, { "epoch": 0.7202288711407796, "grad_norm": 3.669238805770874, "learning_rate": 7.218826563891441e-05, "loss": 1.3837, "step": 12084 }, { "epoch": 0.7203480748599357, "grad_norm": 2.9961695671081543, "learning_rate": 7.217979047872641e-05, "loss": 1.1667, "step": 12086 }, { "epoch": 0.7204672785790917, "grad_norm": 3.1377134323120117, "learning_rate": 7.217131452509141e-05, "loss": 1.3086, "step": 12088 }, { "epoch": 0.7205864822982477, "grad_norm": 2.9803805351257324, "learning_rate": 7.216283777831263e-05, "loss": 1.2619, "step": 12090 }, { "epoch": 0.7207056860174037, "grad_norm": 3.5293474197387695, "learning_rate": 7.215436023869331e-05, "loss": 1.312, "step": 12092 }, { "epoch": 0.7208248897365598, "grad_norm": 2.605902910232544, "learning_rate": 7.214588190653673e-05, "loss": 1.2218, "step": 12094 }, { "epoch": 0.7209440934557159, "grad_norm": 3.0267975330352783, "learning_rate": 7.213740278214618e-05, "loss": 1.5429, "step": 12096 }, { "epoch": 0.7210632971748718, "grad_norm": 3.041921854019165, "learning_rate": 7.2128922865825e-05, "loss": 1.1942, "step": 12098 }, { "epoch": 0.7211825008940279, "grad_norm": 3.1855084896087646, "learning_rate": 7.212044215787651e-05, "loss": 1.4705, "step": 12100 }, { "epoch": 0.7213017046131839, "grad_norm": 3.147270441055298, "learning_rate": 7.211196065860415e-05, "loss": 1.4461, "step": 12102 }, { "epoch": 0.72142090833234, "grad_norm": 3.343421220779419, "learning_rate": 7.210347836831128e-05, "loss": 1.3223, "step": 12104 }, { "epoch": 0.721540112051496, "grad_norm": 3.004775047302246, "learning_rate": 7.209499528730138e-05, "loss": 1.3134, "step": 12106 }, { "epoch": 0.721659315770652, "grad_norm": 3.1743640899658203, "learning_rate": 7.20865114158779e-05, "loss": 1.4793, "step": 12108 }, { "epoch": 0.7217785194898081, "grad_norm": 3.3354568481445312, "learning_rate": 7.207802675434431e-05, "loss": 1.3929, "step": 12110 }, { "epoch": 0.7218977232089642, "grad_norm": 2.661480665206909, "learning_rate": 7.20695413030042e-05, "loss": 1.2357, "step": 12112 }, { "epoch": 0.7220169269281201, "grad_norm": 2.6426994800567627, "learning_rate": 7.206105506216106e-05, "loss": 1.2568, "step": 12114 }, { "epoch": 0.7221361306472762, "grad_norm": 3.104700803756714, "learning_rate": 7.205256803211851e-05, "loss": 1.2006, "step": 12116 }, { "epoch": 0.7222553343664322, "grad_norm": 3.2898313999176025, "learning_rate": 7.204408021318016e-05, "loss": 1.4442, "step": 12118 }, { "epoch": 0.7223745380855883, "grad_norm": 3.148362636566162, "learning_rate": 7.203559160564964e-05, "loss": 1.2688, "step": 12120 }, { "epoch": 0.7224937418047443, "grad_norm": 3.0884604454040527, "learning_rate": 7.20271022098306e-05, "loss": 1.3006, "step": 12122 }, { "epoch": 0.7226129455239003, "grad_norm": 2.7839646339416504, "learning_rate": 7.201861202602676e-05, "loss": 1.4535, "step": 12124 }, { "epoch": 0.7227321492430564, "grad_norm": 3.036867618560791, "learning_rate": 7.201012105454181e-05, "loss": 1.3696, "step": 12126 }, { "epoch": 0.7228513529622124, "grad_norm": 3.255411386489868, "learning_rate": 7.200162929567954e-05, "loss": 1.3988, "step": 12128 }, { "epoch": 0.7229705566813684, "grad_norm": 2.9650211334228516, "learning_rate": 7.19931367497437e-05, "loss": 1.3607, "step": 12130 }, { "epoch": 0.7230897604005245, "grad_norm": 3.226682424545288, "learning_rate": 7.198464341703812e-05, "loss": 1.3678, "step": 12132 }, { "epoch": 0.7232089641196805, "grad_norm": 3.0554311275482178, "learning_rate": 7.197614929786662e-05, "loss": 1.1597, "step": 12134 }, { "epoch": 0.7233281678388366, "grad_norm": 3.149496078491211, "learning_rate": 7.19676543925331e-05, "loss": 1.3421, "step": 12136 }, { "epoch": 0.7234473715579927, "grad_norm": 2.9469611644744873, "learning_rate": 7.195915870134138e-05, "loss": 1.2516, "step": 12138 }, { "epoch": 0.7235665752771486, "grad_norm": 2.8710362911224365, "learning_rate": 7.195066222459543e-05, "loss": 1.3203, "step": 12140 }, { "epoch": 0.7236857789963047, "grad_norm": 3.0962233543395996, "learning_rate": 7.19421649625992e-05, "loss": 1.1894, "step": 12142 }, { "epoch": 0.7238049827154607, "grad_norm": 2.9328110218048096, "learning_rate": 7.193366691565666e-05, "loss": 1.3547, "step": 12144 }, { "epoch": 0.7239241864346168, "grad_norm": 3.1594905853271484, "learning_rate": 7.19251680840718e-05, "loss": 1.5077, "step": 12146 }, { "epoch": 0.7240433901537728, "grad_norm": 3.1597557067871094, "learning_rate": 7.191666846814865e-05, "loss": 1.3904, "step": 12148 }, { "epoch": 0.7241625938729288, "grad_norm": 3.230550765991211, "learning_rate": 7.19081680681913e-05, "loss": 1.293, "step": 12150 }, { "epoch": 0.7242817975920849, "grad_norm": 3.297147512435913, "learning_rate": 7.189966688450381e-05, "loss": 1.3623, "step": 12152 }, { "epoch": 0.7244010013112409, "grad_norm": 3.0315959453582764, "learning_rate": 7.189116491739031e-05, "loss": 1.3294, "step": 12154 }, { "epoch": 0.7245202050303969, "grad_norm": 2.999807596206665, "learning_rate": 7.188266216715493e-05, "loss": 1.4039, "step": 12156 }, { "epoch": 0.724639408749553, "grad_norm": 2.793668270111084, "learning_rate": 7.187415863410186e-05, "loss": 1.2968, "step": 12158 }, { "epoch": 0.724758612468709, "grad_norm": 2.9107673168182373, "learning_rate": 7.18656543185353e-05, "loss": 1.2906, "step": 12160 }, { "epoch": 0.7248778161878651, "grad_norm": 3.0270040035247803, "learning_rate": 7.185714922075947e-05, "loss": 1.3246, "step": 12162 }, { "epoch": 0.7249970199070211, "grad_norm": 3.091637134552002, "learning_rate": 7.184864334107862e-05, "loss": 1.2828, "step": 12164 }, { "epoch": 0.7251162236261771, "grad_norm": 3.4044792652130127, "learning_rate": 7.184013667979707e-05, "loss": 1.4077, "step": 12166 }, { "epoch": 0.7252354273453332, "grad_norm": 3.1713953018188477, "learning_rate": 7.183162923721909e-05, "loss": 1.3983, "step": 12168 }, { "epoch": 0.7253546310644892, "grad_norm": 3.141195774078369, "learning_rate": 7.182312101364903e-05, "loss": 1.367, "step": 12170 }, { "epoch": 0.7254738347836452, "grad_norm": 2.998202323913574, "learning_rate": 7.181461200939127e-05, "loss": 1.3601, "step": 12172 }, { "epoch": 0.7255930385028013, "grad_norm": 2.885854482650757, "learning_rate": 7.18061022247502e-05, "loss": 1.2908, "step": 12174 }, { "epoch": 0.7257122422219573, "grad_norm": 2.8887927532196045, "learning_rate": 7.179759166003024e-05, "loss": 1.3642, "step": 12176 }, { "epoch": 0.7258314459411134, "grad_norm": 3.0604991912841797, "learning_rate": 7.178908031553584e-05, "loss": 1.2643, "step": 12178 }, { "epoch": 0.7259506496602695, "grad_norm": 3.3197646141052246, "learning_rate": 7.17805681915715e-05, "loss": 1.4015, "step": 12180 }, { "epoch": 0.7260698533794254, "grad_norm": 3.006657600402832, "learning_rate": 7.177205528844173e-05, "loss": 1.3193, "step": 12182 }, { "epoch": 0.7261890570985815, "grad_norm": 3.219139575958252, "learning_rate": 7.176354160645102e-05, "loss": 1.3961, "step": 12184 }, { "epoch": 0.7263082608177375, "grad_norm": 3.068418025970459, "learning_rate": 7.175502714590398e-05, "loss": 1.3195, "step": 12186 }, { "epoch": 0.7264274645368936, "grad_norm": 3.061333656311035, "learning_rate": 7.174651190710518e-05, "loss": 1.4184, "step": 12188 }, { "epoch": 0.7265466682560496, "grad_norm": 2.9573237895965576, "learning_rate": 7.173799589035923e-05, "loss": 1.3312, "step": 12190 }, { "epoch": 0.7266658719752056, "grad_norm": 3.6278066635131836, "learning_rate": 7.172947909597081e-05, "loss": 1.3187, "step": 12192 }, { "epoch": 0.7267850756943617, "grad_norm": 3.621704339981079, "learning_rate": 7.172096152424457e-05, "loss": 1.3283, "step": 12194 }, { "epoch": 0.7269042794135177, "grad_norm": 2.705296039581299, "learning_rate": 7.171244317548522e-05, "loss": 1.3547, "step": 12196 }, { "epoch": 0.7270234831326737, "grad_norm": 2.90543532371521, "learning_rate": 7.170392404999749e-05, "loss": 1.3682, "step": 12198 }, { "epoch": 0.7271426868518298, "grad_norm": 2.925109624862671, "learning_rate": 7.169540414808614e-05, "loss": 1.3383, "step": 12200 }, { "epoch": 0.7272618905709858, "grad_norm": 3.0427308082580566, "learning_rate": 7.168688347005595e-05, "loss": 1.3633, "step": 12202 }, { "epoch": 0.7273810942901419, "grad_norm": 2.9516565799713135, "learning_rate": 7.167836201621174e-05, "loss": 1.2219, "step": 12204 }, { "epoch": 0.7275002980092979, "grad_norm": 2.7349376678466797, "learning_rate": 7.166983978685835e-05, "loss": 1.2455, "step": 12206 }, { "epoch": 0.7276195017284539, "grad_norm": 3.112119674682617, "learning_rate": 7.166131678230063e-05, "loss": 1.3248, "step": 12208 }, { "epoch": 0.72773870544761, "grad_norm": 2.6824951171875, "learning_rate": 7.165279300284352e-05, "loss": 1.229, "step": 12210 }, { "epoch": 0.727857909166766, "grad_norm": 3.429077386856079, "learning_rate": 7.164426844879192e-05, "loss": 1.4591, "step": 12212 }, { "epoch": 0.727977112885922, "grad_norm": 2.8217272758483887, "learning_rate": 7.163574312045077e-05, "loss": 1.2991, "step": 12214 }, { "epoch": 0.7280963166050781, "grad_norm": 3.312971830368042, "learning_rate": 7.162721701812505e-05, "loss": 1.4903, "step": 12216 }, { "epoch": 0.7282155203242341, "grad_norm": 2.9256763458251953, "learning_rate": 7.161869014211981e-05, "loss": 1.2448, "step": 12218 }, { "epoch": 0.7283347240433902, "grad_norm": 3.210364818572998, "learning_rate": 7.161016249274003e-05, "loss": 1.2123, "step": 12220 }, { "epoch": 0.7284539277625461, "grad_norm": 2.7087693214416504, "learning_rate": 7.160163407029081e-05, "loss": 1.2536, "step": 12222 }, { "epoch": 0.7285731314817022, "grad_norm": 3.333698034286499, "learning_rate": 7.159310487507724e-05, "loss": 1.2564, "step": 12224 }, { "epoch": 0.7286923352008583, "grad_norm": 3.1157212257385254, "learning_rate": 7.158457490740442e-05, "loss": 1.2421, "step": 12226 }, { "epoch": 0.7288115389200143, "grad_norm": 3.0814685821533203, "learning_rate": 7.157604416757751e-05, "loss": 1.3777, "step": 12228 }, { "epoch": 0.7289307426391703, "grad_norm": 3.2311573028564453, "learning_rate": 7.156751265590167e-05, "loss": 1.323, "step": 12230 }, { "epoch": 0.7290499463583264, "grad_norm": 3.037290334701538, "learning_rate": 7.155898037268212e-05, "loss": 1.4088, "step": 12232 }, { "epoch": 0.7291691500774824, "grad_norm": 3.077634572982788, "learning_rate": 7.155044731822406e-05, "loss": 1.3177, "step": 12234 }, { "epoch": 0.7292883537966385, "grad_norm": 3.006575107574463, "learning_rate": 7.154191349283278e-05, "loss": 1.2842, "step": 12236 }, { "epoch": 0.7294075575157944, "grad_norm": 3.2119123935699463, "learning_rate": 7.153337889681355e-05, "loss": 1.424, "step": 12238 }, { "epoch": 0.7295267612349505, "grad_norm": 2.866593599319458, "learning_rate": 7.152484353047168e-05, "loss": 1.3281, "step": 12240 }, { "epoch": 0.7296459649541066, "grad_norm": 3.0808303356170654, "learning_rate": 7.151630739411251e-05, "loss": 1.1933, "step": 12242 }, { "epoch": 0.7297651686732626, "grad_norm": 3.1132290363311768, "learning_rate": 7.15077704880414e-05, "loss": 1.3863, "step": 12244 }, { "epoch": 0.7298843723924187, "grad_norm": 3.037154197692871, "learning_rate": 7.149923281256377e-05, "loss": 1.3472, "step": 12246 }, { "epoch": 0.7300035761115746, "grad_norm": 3.0005011558532715, "learning_rate": 7.149069436798501e-05, "loss": 1.23, "step": 12248 }, { "epoch": 0.7301227798307307, "grad_norm": 3.4015541076660156, "learning_rate": 7.148215515461057e-05, "loss": 1.408, "step": 12250 }, { "epoch": 0.7302419835498868, "grad_norm": 2.960360288619995, "learning_rate": 7.147361517274595e-05, "loss": 1.3394, "step": 12252 }, { "epoch": 0.7303611872690428, "grad_norm": 2.9197816848754883, "learning_rate": 7.146507442269665e-05, "loss": 1.448, "step": 12254 }, { "epoch": 0.7304803909881988, "grad_norm": 3.2892351150512695, "learning_rate": 7.145653290476821e-05, "loss": 1.4045, "step": 12256 }, { "epoch": 0.7305995947073549, "grad_norm": 3.2589573860168457, "learning_rate": 7.144799061926616e-05, "loss": 1.4399, "step": 12258 }, { "epoch": 0.7307187984265109, "grad_norm": 3.2174410820007324, "learning_rate": 7.143944756649609e-05, "loss": 1.3616, "step": 12260 }, { "epoch": 0.730838002145667, "grad_norm": 2.73017954826355, "learning_rate": 7.143090374676365e-05, "loss": 1.4023, "step": 12262 }, { "epoch": 0.7309572058648229, "grad_norm": 2.884242296218872, "learning_rate": 7.142235916037444e-05, "loss": 1.4001, "step": 12264 }, { "epoch": 0.731076409583979, "grad_norm": 3.0310475826263428, "learning_rate": 7.141381380763414e-05, "loss": 1.3402, "step": 12266 }, { "epoch": 0.7311956133031351, "grad_norm": 2.963146924972534, "learning_rate": 7.140526768884846e-05, "loss": 1.2366, "step": 12268 }, { "epoch": 0.7313148170222911, "grad_norm": 2.9211032390594482, "learning_rate": 7.139672080432314e-05, "loss": 1.2251, "step": 12270 }, { "epoch": 0.7314340207414471, "grad_norm": 3.1452407836914062, "learning_rate": 7.138817315436388e-05, "loss": 1.2576, "step": 12272 }, { "epoch": 0.7315532244606032, "grad_norm": 3.084655284881592, "learning_rate": 7.13796247392765e-05, "loss": 1.2695, "step": 12274 }, { "epoch": 0.7316724281797592, "grad_norm": 2.8065266609191895, "learning_rate": 7.137107555936679e-05, "loss": 1.3472, "step": 12276 }, { "epoch": 0.7317916318989153, "grad_norm": 3.200814723968506, "learning_rate": 7.13625256149406e-05, "loss": 1.3888, "step": 12278 }, { "epoch": 0.7319108356180712, "grad_norm": 3.0158674716949463, "learning_rate": 7.135397490630375e-05, "loss": 1.3902, "step": 12280 }, { "epoch": 0.7320300393372273, "grad_norm": 2.733515501022339, "learning_rate": 7.134542343376218e-05, "loss": 1.2153, "step": 12282 }, { "epoch": 0.7321492430563834, "grad_norm": 3.015719175338745, "learning_rate": 7.133687119762178e-05, "loss": 1.2919, "step": 12284 }, { "epoch": 0.7322684467755394, "grad_norm": 3.1844232082366943, "learning_rate": 7.132831819818849e-05, "loss": 1.4185, "step": 12286 }, { "epoch": 0.7323876504946955, "grad_norm": 3.103226900100708, "learning_rate": 7.131976443576829e-05, "loss": 1.4117, "step": 12288 }, { "epoch": 0.7325068542138514, "grad_norm": 3.1515777111053467, "learning_rate": 7.131120991066716e-05, "loss": 1.3319, "step": 12290 }, { "epoch": 0.7326260579330075, "grad_norm": 3.2050158977508545, "learning_rate": 7.130265462319113e-05, "loss": 1.3841, "step": 12292 }, { "epoch": 0.7327452616521636, "grad_norm": 2.9631431102752686, "learning_rate": 7.129409857364627e-05, "loss": 1.325, "step": 12294 }, { "epoch": 0.7328644653713196, "grad_norm": 3.3402280807495117, "learning_rate": 7.128554176233865e-05, "loss": 1.3508, "step": 12296 }, { "epoch": 0.7329836690904756, "grad_norm": 3.076651096343994, "learning_rate": 7.127698418957436e-05, "loss": 1.3579, "step": 12298 }, { "epoch": 0.7331028728096317, "grad_norm": 3.0737574100494385, "learning_rate": 7.126842585565956e-05, "loss": 1.1835, "step": 12300 }, { "epoch": 0.7332220765287877, "grad_norm": 3.1274173259735107, "learning_rate": 7.12598667609004e-05, "loss": 1.2697, "step": 12302 }, { "epoch": 0.7333412802479438, "grad_norm": 3.0902998447418213, "learning_rate": 7.125130690560305e-05, "loss": 1.2695, "step": 12304 }, { "epoch": 0.7334604839670997, "grad_norm": 3.2581405639648438, "learning_rate": 7.124274629007374e-05, "loss": 1.4258, "step": 12306 }, { "epoch": 0.7335796876862558, "grad_norm": 3.080064058303833, "learning_rate": 7.123418491461874e-05, "loss": 1.34, "step": 12308 }, { "epoch": 0.7336988914054119, "grad_norm": 2.8753621578216553, "learning_rate": 7.122562277954428e-05, "loss": 1.2765, "step": 12310 }, { "epoch": 0.7338180951245679, "grad_norm": 2.968309164047241, "learning_rate": 7.121705988515665e-05, "loss": 1.3677, "step": 12312 }, { "epoch": 0.7339372988437239, "grad_norm": 2.79282283782959, "learning_rate": 7.120849623176222e-05, "loss": 1.0779, "step": 12314 }, { "epoch": 0.7340565025628799, "grad_norm": 3.01450514793396, "learning_rate": 7.11999318196673e-05, "loss": 1.3268, "step": 12316 }, { "epoch": 0.734175706282036, "grad_norm": 3.1824278831481934, "learning_rate": 7.11913666491783e-05, "loss": 1.3466, "step": 12318 }, { "epoch": 0.7342949100011921, "grad_norm": 3.1193366050720215, "learning_rate": 7.118280072060161e-05, "loss": 1.3785, "step": 12320 }, { "epoch": 0.734414113720348, "grad_norm": 2.9674627780914307, "learning_rate": 7.117423403424367e-05, "loss": 1.3019, "step": 12322 }, { "epoch": 0.7345333174395041, "grad_norm": 3.1269686222076416, "learning_rate": 7.11656665904109e-05, "loss": 1.2533, "step": 12324 }, { "epoch": 0.7346525211586602, "grad_norm": 3.163384199142456, "learning_rate": 7.115709838940983e-05, "loss": 1.298, "step": 12326 }, { "epoch": 0.7347717248778162, "grad_norm": 3.0705604553222656, "learning_rate": 7.114852943154697e-05, "loss": 1.3331, "step": 12328 }, { "epoch": 0.7348909285969722, "grad_norm": 3.1077728271484375, "learning_rate": 7.113995971712884e-05, "loss": 1.3904, "step": 12330 }, { "epoch": 0.7350101323161282, "grad_norm": 3.231426239013672, "learning_rate": 7.113138924646204e-05, "loss": 1.3192, "step": 12332 }, { "epoch": 0.7351293360352843, "grad_norm": 2.9043421745300293, "learning_rate": 7.112281801985315e-05, "loss": 1.1186, "step": 12334 }, { "epoch": 0.7352485397544404, "grad_norm": 3.2053282260894775, "learning_rate": 7.111424603760877e-05, "loss": 1.4781, "step": 12336 }, { "epoch": 0.7353677434735963, "grad_norm": 2.9683525562286377, "learning_rate": 7.110567330003558e-05, "loss": 1.3509, "step": 12338 }, { "epoch": 0.7354869471927524, "grad_norm": 3.2801036834716797, "learning_rate": 7.109709980744023e-05, "loss": 1.4818, "step": 12340 }, { "epoch": 0.7356061509119084, "grad_norm": 2.9895339012145996, "learning_rate": 7.108852556012945e-05, "loss": 1.2706, "step": 12342 }, { "epoch": 0.7357253546310645, "grad_norm": 3.281233549118042, "learning_rate": 7.107995055840994e-05, "loss": 1.3664, "step": 12344 }, { "epoch": 0.7358445583502206, "grad_norm": 3.0253076553344727, "learning_rate": 7.10713748025885e-05, "loss": 1.27, "step": 12346 }, { "epoch": 0.7359637620693765, "grad_norm": 3.2133424282073975, "learning_rate": 7.106279829297187e-05, "loss": 1.4089, "step": 12348 }, { "epoch": 0.7360829657885326, "grad_norm": 3.384925603866577, "learning_rate": 7.105422102986687e-05, "loss": 1.4428, "step": 12350 }, { "epoch": 0.7362021695076887, "grad_norm": 2.895930051803589, "learning_rate": 7.104564301358038e-05, "loss": 1.2784, "step": 12352 }, { "epoch": 0.7363213732268447, "grad_norm": 2.873356342315674, "learning_rate": 7.103706424441921e-05, "loss": 1.4294, "step": 12354 }, { "epoch": 0.7364405769460007, "grad_norm": 3.16046142578125, "learning_rate": 7.102848472269027e-05, "loss": 1.4668, "step": 12356 }, { "epoch": 0.7365597806651567, "grad_norm": 3.19270658493042, "learning_rate": 7.10199044487005e-05, "loss": 1.3241, "step": 12358 }, { "epoch": 0.7366789843843128, "grad_norm": 3.1918680667877197, "learning_rate": 7.101132342275681e-05, "loss": 1.5326, "step": 12360 }, { "epoch": 0.7367981881034689, "grad_norm": 2.681723117828369, "learning_rate": 7.10027416451662e-05, "loss": 1.191, "step": 12362 }, { "epoch": 0.7369173918226248, "grad_norm": 3.135457992553711, "learning_rate": 7.099415911623568e-05, "loss": 1.4395, "step": 12364 }, { "epoch": 0.7370365955417809, "grad_norm": 2.912388801574707, "learning_rate": 7.098557583627223e-05, "loss": 1.2664, "step": 12366 }, { "epoch": 0.737155799260937, "grad_norm": 3.0728464126586914, "learning_rate": 7.097699180558295e-05, "loss": 1.4653, "step": 12368 }, { "epoch": 0.737275002980093, "grad_norm": 3.0518927574157715, "learning_rate": 7.096840702447489e-05, "loss": 1.3382, "step": 12370 }, { "epoch": 0.737394206699249, "grad_norm": 3.401170015335083, "learning_rate": 7.095982149325517e-05, "loss": 1.4796, "step": 12372 }, { "epoch": 0.737513410418405, "grad_norm": 3.0396041870117188, "learning_rate": 7.095123521223092e-05, "loss": 1.3146, "step": 12374 }, { "epoch": 0.7376326141375611, "grad_norm": 2.9961304664611816, "learning_rate": 7.094264818170931e-05, "loss": 1.1644, "step": 12376 }, { "epoch": 0.7377518178567172, "grad_norm": 3.2434234619140625, "learning_rate": 7.093406040199753e-05, "loss": 1.2449, "step": 12378 }, { "epoch": 0.7378710215758731, "grad_norm": 3.404083013534546, "learning_rate": 7.092547187340277e-05, "loss": 1.3729, "step": 12380 }, { "epoch": 0.7379902252950292, "grad_norm": 3.1881814002990723, "learning_rate": 7.091688259623227e-05, "loss": 1.496, "step": 12382 }, { "epoch": 0.7381094290141852, "grad_norm": 3.099703550338745, "learning_rate": 7.090829257079334e-05, "loss": 1.3328, "step": 12384 }, { "epoch": 0.7382286327333413, "grad_norm": 3.0774924755096436, "learning_rate": 7.089970179739323e-05, "loss": 1.395, "step": 12386 }, { "epoch": 0.7383478364524974, "grad_norm": 3.338008403778076, "learning_rate": 7.089111027633929e-05, "loss": 1.4171, "step": 12388 }, { "epoch": 0.7384670401716533, "grad_norm": 2.9873604774475098, "learning_rate": 7.088251800793885e-05, "loss": 1.3647, "step": 12390 }, { "epoch": 0.7385862438908094, "grad_norm": 3.196049928665161, "learning_rate": 7.08739249924993e-05, "loss": 1.3342, "step": 12392 }, { "epoch": 0.7387054476099655, "grad_norm": 3.3546793460845947, "learning_rate": 7.086533123032803e-05, "loss": 1.317, "step": 12394 }, { "epoch": 0.7388246513291215, "grad_norm": 3.254709243774414, "learning_rate": 7.085673672173246e-05, "loss": 1.4894, "step": 12396 }, { "epoch": 0.7389438550482775, "grad_norm": 2.8812966346740723, "learning_rate": 7.084814146702007e-05, "loss": 1.3814, "step": 12398 }, { "epoch": 0.7390630587674335, "grad_norm": 2.7488927841186523, "learning_rate": 7.083954546649832e-05, "loss": 1.4634, "step": 12400 }, { "epoch": 0.7391822624865896, "grad_norm": 3.0594382286071777, "learning_rate": 7.083094872047474e-05, "loss": 1.2819, "step": 12402 }, { "epoch": 0.7393014662057457, "grad_norm": 2.6680335998535156, "learning_rate": 7.082235122925684e-05, "loss": 1.2521, "step": 12404 }, { "epoch": 0.7394206699249016, "grad_norm": 2.943150758743286, "learning_rate": 7.081375299315221e-05, "loss": 1.2999, "step": 12406 }, { "epoch": 0.7395398736440577, "grad_norm": 2.9605000019073486, "learning_rate": 7.080515401246841e-05, "loss": 1.2652, "step": 12408 }, { "epoch": 0.7396590773632137, "grad_norm": 3.1296794414520264, "learning_rate": 7.079655428751306e-05, "loss": 1.3722, "step": 12410 }, { "epoch": 0.7397782810823698, "grad_norm": 3.5162220001220703, "learning_rate": 7.078795381859382e-05, "loss": 1.3897, "step": 12412 }, { "epoch": 0.7398974848015258, "grad_norm": 2.8997108936309814, "learning_rate": 7.077935260601834e-05, "loss": 1.4113, "step": 12414 }, { "epoch": 0.7400166885206818, "grad_norm": 2.902101516723633, "learning_rate": 7.077075065009433e-05, "loss": 1.288, "step": 12416 }, { "epoch": 0.7401358922398379, "grad_norm": 2.8001298904418945, "learning_rate": 7.07621479511295e-05, "loss": 1.3128, "step": 12418 }, { "epoch": 0.740255095958994, "grad_norm": 3.065492868423462, "learning_rate": 7.07535445094316e-05, "loss": 1.4243, "step": 12420 }, { "epoch": 0.7403742996781499, "grad_norm": 3.2331838607788086, "learning_rate": 7.074494032530841e-05, "loss": 1.2742, "step": 12422 }, { "epoch": 0.740493503397306, "grad_norm": 3.5180001258850098, "learning_rate": 7.073633539906771e-05, "loss": 1.4976, "step": 12424 }, { "epoch": 0.740612707116462, "grad_norm": 2.887958288192749, "learning_rate": 7.072772973101734e-05, "loss": 1.3409, "step": 12426 }, { "epoch": 0.7407319108356181, "grad_norm": 3.050405979156494, "learning_rate": 7.071912332146518e-05, "loss": 1.2385, "step": 12428 }, { "epoch": 0.7408511145547741, "grad_norm": 2.9096195697784424, "learning_rate": 7.071051617071909e-05, "loss": 1.1526, "step": 12430 }, { "epoch": 0.7409703182739301, "grad_norm": 4.069388389587402, "learning_rate": 7.070190827908696e-05, "loss": 1.3843, "step": 12432 }, { "epoch": 0.7410895219930862, "grad_norm": 3.004216432571411, "learning_rate": 7.069329964687674e-05, "loss": 1.4521, "step": 12434 }, { "epoch": 0.7412087257122422, "grad_norm": 3.0402400493621826, "learning_rate": 7.068469027439642e-05, "loss": 1.3484, "step": 12436 }, { "epoch": 0.7413279294313982, "grad_norm": 2.9240641593933105, "learning_rate": 7.067608016195392e-05, "loss": 1.1994, "step": 12438 }, { "epoch": 0.7414471331505543, "grad_norm": 2.9677248001098633, "learning_rate": 7.06674693098573e-05, "loss": 1.4589, "step": 12440 }, { "epoch": 0.7415663368697103, "grad_norm": 3.310378313064575, "learning_rate": 7.06588577184146e-05, "loss": 1.2684, "step": 12442 }, { "epoch": 0.7416855405888664, "grad_norm": 3.088120937347412, "learning_rate": 7.065024538793387e-05, "loss": 1.3152, "step": 12444 }, { "epoch": 0.7418047443080225, "grad_norm": 2.918401002883911, "learning_rate": 7.064163231872321e-05, "loss": 1.366, "step": 12446 }, { "epoch": 0.7419239480271784, "grad_norm": 2.7770252227783203, "learning_rate": 7.063301851109074e-05, "loss": 1.2775, "step": 12448 }, { "epoch": 0.7420431517463345, "grad_norm": 2.980677604675293, "learning_rate": 7.06244039653446e-05, "loss": 1.3521, "step": 12450 }, { "epoch": 0.7421623554654905, "grad_norm": 2.8554461002349854, "learning_rate": 7.061578868179297e-05, "loss": 1.3605, "step": 12452 }, { "epoch": 0.7422815591846466, "grad_norm": 3.091740846633911, "learning_rate": 7.060717266074404e-05, "loss": 1.2808, "step": 12454 }, { "epoch": 0.7424007629038026, "grad_norm": 3.1484546661376953, "learning_rate": 7.059855590250604e-05, "loss": 1.2236, "step": 12456 }, { "epoch": 0.7425199666229586, "grad_norm": 3.1336047649383545, "learning_rate": 7.05899384073872e-05, "loss": 1.3644, "step": 12458 }, { "epoch": 0.7426391703421147, "grad_norm": 3.225910186767578, "learning_rate": 7.058132017569582e-05, "loss": 1.4793, "step": 12460 }, { "epoch": 0.7427583740612708, "grad_norm": 3.151784896850586, "learning_rate": 7.057270120774022e-05, "loss": 1.181, "step": 12462 }, { "epoch": 0.7428775777804267, "grad_norm": 2.9987714290618896, "learning_rate": 7.056408150382868e-05, "loss": 1.2664, "step": 12464 }, { "epoch": 0.7429967814995828, "grad_norm": 3.2037103176116943, "learning_rate": 7.055546106426961e-05, "loss": 1.3169, "step": 12466 }, { "epoch": 0.7431159852187388, "grad_norm": 2.956078052520752, "learning_rate": 7.054683988937136e-05, "loss": 1.3749, "step": 12468 }, { "epoch": 0.7432351889378949, "grad_norm": 2.816685914993286, "learning_rate": 7.053821797944234e-05, "loss": 1.3243, "step": 12470 }, { "epoch": 0.7433543926570509, "grad_norm": 3.856076240539551, "learning_rate": 7.0529595334791e-05, "loss": 1.3402, "step": 12472 }, { "epoch": 0.7434735963762069, "grad_norm": 3.146695375442505, "learning_rate": 7.05209719557258e-05, "loss": 1.2424, "step": 12474 }, { "epoch": 0.743592800095363, "grad_norm": 3.312662124633789, "learning_rate": 7.051234784255521e-05, "loss": 1.3445, "step": 12476 }, { "epoch": 0.743712003814519, "grad_norm": 2.77451229095459, "learning_rate": 7.050372299558775e-05, "loss": 1.3094, "step": 12478 }, { "epoch": 0.743831207533675, "grad_norm": 3.1625170707702637, "learning_rate": 7.049509741513197e-05, "loss": 1.2514, "step": 12480 }, { "epoch": 0.7439504112528311, "grad_norm": 3.133612632751465, "learning_rate": 7.048647110149644e-05, "loss": 1.2696, "step": 12482 }, { "epoch": 0.7440696149719871, "grad_norm": 2.9844415187835693, "learning_rate": 7.047784405498976e-05, "loss": 1.2807, "step": 12484 }, { "epoch": 0.7441888186911432, "grad_norm": 3.0200376510620117, "learning_rate": 7.04692162759205e-05, "loss": 1.373, "step": 12486 }, { "epoch": 0.7443080224102993, "grad_norm": 3.297978401184082, "learning_rate": 7.046058776459737e-05, "loss": 1.3802, "step": 12488 }, { "epoch": 0.7444272261294552, "grad_norm": 2.886587142944336, "learning_rate": 7.045195852132898e-05, "loss": 1.3358, "step": 12490 }, { "epoch": 0.7445464298486113, "grad_norm": 2.981783151626587, "learning_rate": 7.044332854642406e-05, "loss": 1.3582, "step": 12492 }, { "epoch": 0.7446656335677673, "grad_norm": 2.9167227745056152, "learning_rate": 7.043469784019136e-05, "loss": 1.3983, "step": 12494 }, { "epoch": 0.7447848372869234, "grad_norm": 2.8077685832977295, "learning_rate": 7.042606640293958e-05, "loss": 1.2747, "step": 12496 }, { "epoch": 0.7449040410060794, "grad_norm": 2.9885807037353516, "learning_rate": 7.041743423497752e-05, "loss": 1.2723, "step": 12498 }, { "epoch": 0.7450232447252354, "grad_norm": 3.094834089279175, "learning_rate": 7.0408801336614e-05, "loss": 1.4615, "step": 12500 }, { "epoch": 0.7451424484443915, "grad_norm": 3.4311695098876953, "learning_rate": 7.04001677081578e-05, "loss": 1.2687, "step": 12502 }, { "epoch": 0.7452616521635475, "grad_norm": 2.860678195953369, "learning_rate": 7.039153334991783e-05, "loss": 1.242, "step": 12504 }, { "epoch": 0.7453808558827035, "grad_norm": 3.1442055702209473, "learning_rate": 7.038289826220292e-05, "loss": 1.4236, "step": 12506 }, { "epoch": 0.7455000596018596, "grad_norm": 2.8961963653564453, "learning_rate": 7.037426244532201e-05, "loss": 1.3845, "step": 12508 }, { "epoch": 0.7456192633210156, "grad_norm": 3.019284963607788, "learning_rate": 7.036562589958401e-05, "loss": 1.2337, "step": 12510 }, { "epoch": 0.7457384670401717, "grad_norm": 3.170358896255493, "learning_rate": 7.035698862529792e-05, "loss": 1.4112, "step": 12512 }, { "epoch": 0.7458576707593277, "grad_norm": 2.835322856903076, "learning_rate": 7.034835062277268e-05, "loss": 1.2723, "step": 12514 }, { "epoch": 0.7459768744784837, "grad_norm": 3.278143882751465, "learning_rate": 7.03397118923173e-05, "loss": 1.346, "step": 12516 }, { "epoch": 0.7460960781976398, "grad_norm": 3.1347217559814453, "learning_rate": 7.033107243424086e-05, "loss": 1.3647, "step": 12518 }, { "epoch": 0.7462152819167958, "grad_norm": 3.261178731918335, "learning_rate": 7.032243224885239e-05, "loss": 1.4291, "step": 12520 }, { "epoch": 0.7463344856359518, "grad_norm": 3.159672975540161, "learning_rate": 7.031379133646096e-05, "loss": 1.2791, "step": 12522 }, { "epoch": 0.7464536893551079, "grad_norm": 3.1539807319641113, "learning_rate": 7.030514969737575e-05, "loss": 1.2097, "step": 12524 }, { "epoch": 0.7465728930742639, "grad_norm": 3.1942458152770996, "learning_rate": 7.029650733190585e-05, "loss": 1.2945, "step": 12526 }, { "epoch": 0.74669209679342, "grad_norm": 3.5658111572265625, "learning_rate": 7.028786424036043e-05, "loss": 1.4252, "step": 12528 }, { "epoch": 0.7468113005125759, "grad_norm": 3.3852570056915283, "learning_rate": 7.027922042304869e-05, "loss": 1.4201, "step": 12530 }, { "epoch": 0.746930504231732, "grad_norm": 2.8451714515686035, "learning_rate": 7.027057588027987e-05, "loss": 1.2355, "step": 12532 }, { "epoch": 0.7470497079508881, "grad_norm": 3.127800226211548, "learning_rate": 7.026193061236317e-05, "loss": 1.3675, "step": 12534 }, { "epoch": 0.7471689116700441, "grad_norm": 3.2559053897857666, "learning_rate": 7.02532846196079e-05, "loss": 1.4964, "step": 12536 }, { "epoch": 0.7472881153892001, "grad_norm": 3.185515880584717, "learning_rate": 7.024463790232334e-05, "loss": 1.3952, "step": 12538 }, { "epoch": 0.7474073191083562, "grad_norm": 2.8621199131011963, "learning_rate": 7.023599046081882e-05, "loss": 1.2918, "step": 12540 }, { "epoch": 0.7475265228275122, "grad_norm": 2.940917491912842, "learning_rate": 7.022734229540368e-05, "loss": 1.3727, "step": 12542 }, { "epoch": 0.7476457265466683, "grad_norm": 3.2611355781555176, "learning_rate": 7.021869340638732e-05, "loss": 1.3841, "step": 12544 }, { "epoch": 0.7477649302658242, "grad_norm": 3.1875219345092773, "learning_rate": 7.021004379407909e-05, "loss": 1.3295, "step": 12546 }, { "epoch": 0.7478841339849803, "grad_norm": 2.637531042098999, "learning_rate": 7.020139345878846e-05, "loss": 1.3423, "step": 12548 }, { "epoch": 0.7480033377041364, "grad_norm": 2.7655446529388428, "learning_rate": 7.019274240082487e-05, "loss": 1.4501, "step": 12550 }, { "epoch": 0.7481225414232924, "grad_norm": 3.2592270374298096, "learning_rate": 7.018409062049779e-05, "loss": 1.6097, "step": 12552 }, { "epoch": 0.7482417451424485, "grad_norm": 2.8731744289398193, "learning_rate": 7.017543811811672e-05, "loss": 1.2625, "step": 12554 }, { "epoch": 0.7483609488616045, "grad_norm": 3.173900842666626, "learning_rate": 7.016678489399121e-05, "loss": 1.3451, "step": 12556 }, { "epoch": 0.7484801525807605, "grad_norm": 3.0001471042633057, "learning_rate": 7.015813094843081e-05, "loss": 1.3593, "step": 12558 }, { "epoch": 0.7485993562999166, "grad_norm": 3.2769453525543213, "learning_rate": 7.014947628174509e-05, "loss": 1.4459, "step": 12560 }, { "epoch": 0.7487185600190726, "grad_norm": 2.980109930038452, "learning_rate": 7.014082089424366e-05, "loss": 1.1769, "step": 12562 }, { "epoch": 0.7488377637382286, "grad_norm": 3.287128448486328, "learning_rate": 7.013216478623616e-05, "loss": 1.2368, "step": 12564 }, { "epoch": 0.7489569674573847, "grad_norm": 3.3713812828063965, "learning_rate": 7.012350795803224e-05, "loss": 1.3293, "step": 12566 }, { "epoch": 0.7490761711765407, "grad_norm": 2.8184359073638916, "learning_rate": 7.011485040994158e-05, "loss": 1.3484, "step": 12568 }, { "epoch": 0.7491953748956968, "grad_norm": 3.314502000808716, "learning_rate": 7.010619214227392e-05, "loss": 1.3809, "step": 12570 }, { "epoch": 0.7493145786148527, "grad_norm": 3.286935567855835, "learning_rate": 7.009753315533896e-05, "loss": 1.5286, "step": 12572 }, { "epoch": 0.7494337823340088, "grad_norm": 3.0371205806732178, "learning_rate": 7.008887344944647e-05, "loss": 1.2807, "step": 12574 }, { "epoch": 0.7495529860531649, "grad_norm": 3.2348272800445557, "learning_rate": 7.008021302490626e-05, "loss": 1.3898, "step": 12576 }, { "epoch": 0.7496721897723209, "grad_norm": 3.2537930011749268, "learning_rate": 7.007155188202811e-05, "loss": 1.4455, "step": 12578 }, { "epoch": 0.7497913934914769, "grad_norm": 2.748136043548584, "learning_rate": 7.006289002112189e-05, "loss": 1.2587, "step": 12580 }, { "epoch": 0.749910597210633, "grad_norm": 3.0508368015289307, "learning_rate": 7.005422744249744e-05, "loss": 1.3358, "step": 12582 }, { "epoch": 0.750029800929789, "grad_norm": 2.6258411407470703, "learning_rate": 7.004556414646468e-05, "loss": 1.3015, "step": 12584 }, { "epoch": 0.7501490046489451, "grad_norm": 3.041471004486084, "learning_rate": 7.003690013333348e-05, "loss": 1.2997, "step": 12586 }, { "epoch": 0.750268208368101, "grad_norm": 3.325587749481201, "learning_rate": 7.002823540341382e-05, "loss": 1.2503, "step": 12588 }, { "epoch": 0.7503874120872571, "grad_norm": 3.4219934940338135, "learning_rate": 7.001956995701565e-05, "loss": 1.2653, "step": 12590 }, { "epoch": 0.7505066158064132, "grad_norm": 3.2194972038269043, "learning_rate": 7.001090379444895e-05, "loss": 1.3692, "step": 12592 }, { "epoch": 0.7506258195255692, "grad_norm": 2.8465659618377686, "learning_rate": 7.000223691602377e-05, "loss": 1.1907, "step": 12594 }, { "epoch": 0.7507450232447253, "grad_norm": 3.2943811416625977, "learning_rate": 6.999356932205015e-05, "loss": 1.278, "step": 12596 }, { "epoch": 0.7508642269638812, "grad_norm": 3.039527177810669, "learning_rate": 6.998490101283812e-05, "loss": 1.4771, "step": 12598 }, { "epoch": 0.7509834306830373, "grad_norm": 3.2009940147399902, "learning_rate": 6.997623198869783e-05, "loss": 1.216, "step": 12600 }, { "epoch": 0.7511026344021934, "grad_norm": 3.2026095390319824, "learning_rate": 6.996756224993937e-05, "loss": 1.2077, "step": 12602 }, { "epoch": 0.7512218381213494, "grad_norm": 3.0377893447875977, "learning_rate": 6.995889179687288e-05, "loss": 1.3594, "step": 12604 }, { "epoch": 0.7513410418405054, "grad_norm": 3.2737433910369873, "learning_rate": 6.995022062980854e-05, "loss": 1.2992, "step": 12606 }, { "epoch": 0.7514602455596615, "grad_norm": 3.0613839626312256, "learning_rate": 6.994154874905656e-05, "loss": 1.164, "step": 12608 }, { "epoch": 0.7515794492788175, "grad_norm": 2.8983168601989746, "learning_rate": 6.993287615492714e-05, "loss": 1.2857, "step": 12610 }, { "epoch": 0.7516986529979736, "grad_norm": 3.494520425796509, "learning_rate": 6.992420284773055e-05, "loss": 1.3101, "step": 12612 }, { "epoch": 0.7518178567171295, "grad_norm": 2.9777109622955322, "learning_rate": 6.991552882777704e-05, "loss": 1.2461, "step": 12614 }, { "epoch": 0.7519370604362856, "grad_norm": 2.9460253715515137, "learning_rate": 6.990685409537693e-05, "loss": 1.3233, "step": 12616 }, { "epoch": 0.7520562641554417, "grad_norm": 2.5605967044830322, "learning_rate": 6.989817865084057e-05, "loss": 1.379, "step": 12618 }, { "epoch": 0.7521754678745977, "grad_norm": 2.7469141483306885, "learning_rate": 6.988950249447825e-05, "loss": 1.2552, "step": 12620 }, { "epoch": 0.7522946715937537, "grad_norm": 3.2917327880859375, "learning_rate": 6.988082562660036e-05, "loss": 1.326, "step": 12622 }, { "epoch": 0.7524138753129097, "grad_norm": 2.919527053833008, "learning_rate": 6.987214804751734e-05, "loss": 1.3618, "step": 12624 }, { "epoch": 0.7525330790320658, "grad_norm": 3.148594379425049, "learning_rate": 6.986346975753958e-05, "loss": 1.3139, "step": 12626 }, { "epoch": 0.7526522827512219, "grad_norm": 2.9207303524017334, "learning_rate": 6.985479075697756e-05, "loss": 1.2975, "step": 12628 }, { "epoch": 0.7527714864703778, "grad_norm": 3.0762603282928467, "learning_rate": 6.984611104614172e-05, "loss": 1.2036, "step": 12630 }, { "epoch": 0.7528906901895339, "grad_norm": 3.039485216140747, "learning_rate": 6.98374306253426e-05, "loss": 1.3425, "step": 12632 }, { "epoch": 0.75300989390869, "grad_norm": 2.952059507369995, "learning_rate": 6.982874949489071e-05, "loss": 1.2887, "step": 12634 }, { "epoch": 0.753129097627846, "grad_norm": 3.044750213623047, "learning_rate": 6.98200676550966e-05, "loss": 1.2991, "step": 12636 }, { "epoch": 0.753248301347002, "grad_norm": 3.3369739055633545, "learning_rate": 6.981138510627086e-05, "loss": 1.2951, "step": 12638 }, { "epoch": 0.753367505066158, "grad_norm": 2.987985134124756, "learning_rate": 6.980270184872411e-05, "loss": 1.392, "step": 12640 }, { "epoch": 0.7534867087853141, "grad_norm": 3.0558810234069824, "learning_rate": 6.979401788276694e-05, "loss": 1.3525, "step": 12642 }, { "epoch": 0.7536059125044702, "grad_norm": 3.062080144882202, "learning_rate": 6.978533320871002e-05, "loss": 1.3085, "step": 12644 }, { "epoch": 0.7537251162236261, "grad_norm": 2.9798262119293213, "learning_rate": 6.977664782686406e-05, "loss": 1.4426, "step": 12646 }, { "epoch": 0.7538443199427822, "grad_norm": 3.333897113800049, "learning_rate": 6.976796173753972e-05, "loss": 1.4332, "step": 12648 }, { "epoch": 0.7539635236619383, "grad_norm": 3.0691497325897217, "learning_rate": 6.975927494104777e-05, "loss": 1.2845, "step": 12650 }, { "epoch": 0.7540827273810943, "grad_norm": 3.015866994857788, "learning_rate": 6.975058743769895e-05, "loss": 1.4788, "step": 12652 }, { "epoch": 0.7542019311002504, "grad_norm": 3.014549493789673, "learning_rate": 6.974189922780403e-05, "loss": 1.4349, "step": 12654 }, { "epoch": 0.7543211348194063, "grad_norm": 2.944153308868408, "learning_rate": 6.973321031167383e-05, "loss": 1.366, "step": 12656 }, { "epoch": 0.7544403385385624, "grad_norm": 2.747645378112793, "learning_rate": 6.972452068961917e-05, "loss": 1.3064, "step": 12658 }, { "epoch": 0.7545595422577185, "grad_norm": 2.948456048965454, "learning_rate": 6.971583036195094e-05, "loss": 1.2636, "step": 12660 }, { "epoch": 0.7546787459768745, "grad_norm": 3.0968501567840576, "learning_rate": 6.970713932898e-05, "loss": 1.4524, "step": 12662 }, { "epoch": 0.7547979496960305, "grad_norm": 3.2407004833221436, "learning_rate": 6.969844759101725e-05, "loss": 1.3899, "step": 12664 }, { "epoch": 0.7549171534151865, "grad_norm": 3.0194449424743652, "learning_rate": 6.968975514837363e-05, "loss": 1.3328, "step": 12666 }, { "epoch": 0.7550363571343426, "grad_norm": 3.067777156829834, "learning_rate": 6.96810620013601e-05, "loss": 1.3731, "step": 12668 }, { "epoch": 0.7551555608534987, "grad_norm": 3.966639757156372, "learning_rate": 6.967236815028765e-05, "loss": 1.468, "step": 12670 }, { "epoch": 0.7552747645726546, "grad_norm": 2.824930429458618, "learning_rate": 6.966367359546729e-05, "loss": 1.3233, "step": 12672 }, { "epoch": 0.7553939682918107, "grad_norm": 2.8392744064331055, "learning_rate": 6.965497833721003e-05, "loss": 1.181, "step": 12674 }, { "epoch": 0.7555131720109668, "grad_norm": 3.30167293548584, "learning_rate": 6.964628237582695e-05, "loss": 1.3478, "step": 12676 }, { "epoch": 0.7556323757301228, "grad_norm": 3.4269063472747803, "learning_rate": 6.963758571162915e-05, "loss": 1.3821, "step": 12678 }, { "epoch": 0.7557515794492788, "grad_norm": 2.868619680404663, "learning_rate": 6.96288883449277e-05, "loss": 1.139, "step": 12680 }, { "epoch": 0.7558707831684348, "grad_norm": 3.158884048461914, "learning_rate": 6.962019027603375e-05, "loss": 1.3364, "step": 12682 }, { "epoch": 0.7559899868875909, "grad_norm": 3.1638553142547607, "learning_rate": 6.961149150525847e-05, "loss": 1.2062, "step": 12684 }, { "epoch": 0.756109190606747, "grad_norm": 2.973658561706543, "learning_rate": 6.960279203291304e-05, "loss": 1.3003, "step": 12686 }, { "epoch": 0.7562283943259029, "grad_norm": 2.8460166454315186, "learning_rate": 6.959409185930868e-05, "loss": 1.4846, "step": 12688 }, { "epoch": 0.756347598045059, "grad_norm": 3.382935047149658, "learning_rate": 6.958539098475661e-05, "loss": 1.2888, "step": 12690 }, { "epoch": 0.756466801764215, "grad_norm": 3.749373435974121, "learning_rate": 6.957668940956809e-05, "loss": 1.3803, "step": 12692 }, { "epoch": 0.7565860054833711, "grad_norm": 3.1956307888031006, "learning_rate": 6.95679871340544e-05, "loss": 1.3109, "step": 12694 }, { "epoch": 0.7567052092025272, "grad_norm": 2.6669344902038574, "learning_rate": 6.955928415852686e-05, "loss": 1.2108, "step": 12696 }, { "epoch": 0.7568244129216831, "grad_norm": 3.052396297454834, "learning_rate": 6.955058048329682e-05, "loss": 1.3684, "step": 12698 }, { "epoch": 0.7569436166408392, "grad_norm": 2.937720537185669, "learning_rate": 6.95418761086756e-05, "loss": 1.4608, "step": 12700 }, { "epoch": 0.7570628203599953, "grad_norm": 3.271723747253418, "learning_rate": 6.953317103497464e-05, "loss": 1.3065, "step": 12702 }, { "epoch": 0.7571820240791513, "grad_norm": 3.0315663814544678, "learning_rate": 6.95244652625053e-05, "loss": 1.3849, "step": 12704 }, { "epoch": 0.7573012277983073, "grad_norm": 2.9889707565307617, "learning_rate": 6.951575879157904e-05, "loss": 1.0991, "step": 12706 }, { "epoch": 0.7574204315174633, "grad_norm": 2.7293028831481934, "learning_rate": 6.950705162250732e-05, "loss": 1.2394, "step": 12708 }, { "epoch": 0.7575396352366194, "grad_norm": 2.9880411624908447, "learning_rate": 6.949834375560161e-05, "loss": 1.3015, "step": 12710 }, { "epoch": 0.7576588389557755, "grad_norm": 2.8299503326416016, "learning_rate": 6.948963519117343e-05, "loss": 1.3144, "step": 12712 }, { "epoch": 0.7577780426749314, "grad_norm": 3.277043342590332, "learning_rate": 6.948092592953432e-05, "loss": 1.2897, "step": 12714 }, { "epoch": 0.7578972463940875, "grad_norm": 3.2482736110687256, "learning_rate": 6.947221597099585e-05, "loss": 1.4036, "step": 12716 }, { "epoch": 0.7580164501132435, "grad_norm": 3.220205307006836, "learning_rate": 6.946350531586959e-05, "loss": 1.2725, "step": 12718 }, { "epoch": 0.7581356538323996, "grad_norm": 2.9848270416259766, "learning_rate": 6.945479396446712e-05, "loss": 1.1585, "step": 12720 }, { "epoch": 0.7582548575515556, "grad_norm": 3.1121039390563965, "learning_rate": 6.944608191710015e-05, "loss": 1.2049, "step": 12722 }, { "epoch": 0.7583740612707116, "grad_norm": 3.1951487064361572, "learning_rate": 6.943736917408027e-05, "loss": 1.4784, "step": 12724 }, { "epoch": 0.7584932649898677, "grad_norm": 3.2636139392852783, "learning_rate": 6.942865573571918e-05, "loss": 1.3738, "step": 12726 }, { "epoch": 0.7586124687090238, "grad_norm": 3.176032781600952, "learning_rate": 6.941994160232861e-05, "loss": 1.2675, "step": 12728 }, { "epoch": 0.7587316724281797, "grad_norm": 3.1885101795196533, "learning_rate": 6.941122677422031e-05, "loss": 1.3276, "step": 12730 }, { "epoch": 0.7588508761473358, "grad_norm": 3.2407968044281006, "learning_rate": 6.940251125170598e-05, "loss": 1.4615, "step": 12732 }, { "epoch": 0.7589700798664918, "grad_norm": 2.8559510707855225, "learning_rate": 6.939379503509744e-05, "loss": 1.3782, "step": 12734 }, { "epoch": 0.7590892835856479, "grad_norm": 3.0481314659118652, "learning_rate": 6.938507812470652e-05, "loss": 1.2905, "step": 12736 }, { "epoch": 0.759208487304804, "grad_norm": 3.082096815109253, "learning_rate": 6.937636052084502e-05, "loss": 1.438, "step": 12738 }, { "epoch": 0.7593276910239599, "grad_norm": 2.7417798042297363, "learning_rate": 6.936764222382481e-05, "loss": 1.3163, "step": 12740 }, { "epoch": 0.759446894743116, "grad_norm": 3.223320722579956, "learning_rate": 6.935892323395776e-05, "loss": 1.3283, "step": 12742 }, { "epoch": 0.7595660984622721, "grad_norm": 2.8892552852630615, "learning_rate": 6.93502035515558e-05, "loss": 1.4685, "step": 12744 }, { "epoch": 0.759685302181428, "grad_norm": 2.8151440620422363, "learning_rate": 6.934148317693084e-05, "loss": 1.3151, "step": 12746 }, { "epoch": 0.7598045059005841, "grad_norm": 3.0485928058624268, "learning_rate": 6.933276211039486e-05, "loss": 1.3764, "step": 12748 }, { "epoch": 0.7599237096197401, "grad_norm": 2.8821985721588135, "learning_rate": 6.932404035225984e-05, "loss": 1.4547, "step": 12750 }, { "epoch": 0.7600429133388962, "grad_norm": 2.7130672931671143, "learning_rate": 6.931531790283776e-05, "loss": 1.2076, "step": 12752 }, { "epoch": 0.7601621170580523, "grad_norm": 2.8475453853607178, "learning_rate": 6.93065947624407e-05, "loss": 1.2718, "step": 12754 }, { "epoch": 0.7602813207772082, "grad_norm": 3.2990002632141113, "learning_rate": 6.929787093138067e-05, "loss": 1.2673, "step": 12756 }, { "epoch": 0.7604005244963643, "grad_norm": 3.1092114448547363, "learning_rate": 6.928914640996978e-05, "loss": 1.3031, "step": 12758 }, { "epoch": 0.7605197282155203, "grad_norm": 3.3176395893096924, "learning_rate": 6.92804211985201e-05, "loss": 1.3926, "step": 12760 }, { "epoch": 0.7606389319346764, "grad_norm": 3.120790958404541, "learning_rate": 6.927169529734382e-05, "loss": 1.207, "step": 12762 }, { "epoch": 0.7607581356538324, "grad_norm": 2.9861490726470947, "learning_rate": 6.926296870675304e-05, "loss": 1.2222, "step": 12764 }, { "epoch": 0.7608773393729884, "grad_norm": 3.44265079498291, "learning_rate": 6.925424142705997e-05, "loss": 1.4897, "step": 12766 }, { "epoch": 0.7609965430921445, "grad_norm": 3.268561601638794, "learning_rate": 6.924551345857682e-05, "loss": 1.3285, "step": 12768 }, { "epoch": 0.7611157468113006, "grad_norm": 3.061586856842041, "learning_rate": 6.923678480161578e-05, "loss": 1.3452, "step": 12770 }, { "epoch": 0.7612349505304565, "grad_norm": 3.0824062824249268, "learning_rate": 6.922805545648914e-05, "loss": 1.232, "step": 12772 }, { "epoch": 0.7613541542496126, "grad_norm": 2.9860897064208984, "learning_rate": 6.921932542350917e-05, "loss": 1.2853, "step": 12774 }, { "epoch": 0.7614733579687686, "grad_norm": 2.8589820861816406, "learning_rate": 6.921059470298819e-05, "loss": 1.3537, "step": 12776 }, { "epoch": 0.7615925616879247, "grad_norm": 3.080498218536377, "learning_rate": 6.920186329523848e-05, "loss": 1.359, "step": 12778 }, { "epoch": 0.7617117654070807, "grad_norm": 3.189997911453247, "learning_rate": 6.919313120057242e-05, "loss": 1.4065, "step": 12780 }, { "epoch": 0.7618309691262367, "grad_norm": 3.033149003982544, "learning_rate": 6.918439841930241e-05, "loss": 1.5696, "step": 12782 }, { "epoch": 0.7619501728453928, "grad_norm": 3.034019708633423, "learning_rate": 6.917566495174082e-05, "loss": 1.2156, "step": 12784 }, { "epoch": 0.7620693765645488, "grad_norm": 3.3295986652374268, "learning_rate": 6.91669307982001e-05, "loss": 1.3415, "step": 12786 }, { "epoch": 0.7621885802837048, "grad_norm": 3.012657642364502, "learning_rate": 6.915819595899266e-05, "loss": 1.2288, "step": 12788 }, { "epoch": 0.7623077840028609, "grad_norm": 3.046541213989258, "learning_rate": 6.9149460434431e-05, "loss": 1.3682, "step": 12790 }, { "epoch": 0.7624269877220169, "grad_norm": 3.432305097579956, "learning_rate": 6.914072422482764e-05, "loss": 1.268, "step": 12792 }, { "epoch": 0.762546191441173, "grad_norm": 3.216722249984741, "learning_rate": 6.913198733049508e-05, "loss": 1.3358, "step": 12794 }, { "epoch": 0.762665395160329, "grad_norm": 3.4778122901916504, "learning_rate": 6.912324975174586e-05, "loss": 1.577, "step": 12796 }, { "epoch": 0.762784598879485, "grad_norm": 3.2495670318603516, "learning_rate": 6.911451148889261e-05, "loss": 1.2942, "step": 12798 }, { "epoch": 0.7629038025986411, "grad_norm": 3.383064031600952, "learning_rate": 6.910577254224785e-05, "loss": 1.2741, "step": 12800 }, { "epoch": 0.7630230063177971, "grad_norm": 2.873906373977661, "learning_rate": 6.909703291212423e-05, "loss": 1.3517, "step": 12802 }, { "epoch": 0.7631422100369532, "grad_norm": 3.4721219539642334, "learning_rate": 6.908829259883443e-05, "loss": 1.5692, "step": 12804 }, { "epoch": 0.7632614137561092, "grad_norm": 2.893767833709717, "learning_rate": 6.907955160269108e-05, "loss": 1.2844, "step": 12806 }, { "epoch": 0.7633806174752652, "grad_norm": 3.2490367889404297, "learning_rate": 6.907080992400688e-05, "loss": 1.3192, "step": 12808 }, { "epoch": 0.7634998211944213, "grad_norm": 3.271491765975952, "learning_rate": 6.906206756309456e-05, "loss": 1.3839, "step": 12810 }, { "epoch": 0.7636190249135772, "grad_norm": 3.3338897228240967, "learning_rate": 6.905332452026686e-05, "loss": 1.2614, "step": 12812 }, { "epoch": 0.7637382286327333, "grad_norm": 3.229372262954712, "learning_rate": 6.904458079583658e-05, "loss": 1.3842, "step": 12814 }, { "epoch": 0.7638574323518894, "grad_norm": 2.874793291091919, "learning_rate": 6.903583639011646e-05, "loss": 1.4801, "step": 12816 }, { "epoch": 0.7639766360710454, "grad_norm": 3.0257768630981445, "learning_rate": 6.902709130341936e-05, "loss": 1.3513, "step": 12818 }, { "epoch": 0.7640958397902015, "grad_norm": 2.922384023666382, "learning_rate": 6.901834553605808e-05, "loss": 1.2716, "step": 12820 }, { "epoch": 0.7642150435093575, "grad_norm": 3.0904693603515625, "learning_rate": 6.900959908834552e-05, "loss": 1.5404, "step": 12822 }, { "epoch": 0.7643342472285135, "grad_norm": 2.9032931327819824, "learning_rate": 6.900085196059456e-05, "loss": 1.2934, "step": 12824 }, { "epoch": 0.7644534509476696, "grad_norm": 2.605524778366089, "learning_rate": 6.899210415311812e-05, "loss": 1.3685, "step": 12826 }, { "epoch": 0.7645726546668256, "grad_norm": 2.967514991760254, "learning_rate": 6.898335566622913e-05, "loss": 1.2085, "step": 12828 }, { "epoch": 0.7646918583859816, "grad_norm": 3.117755889892578, "learning_rate": 6.897460650024055e-05, "loss": 1.368, "step": 12830 }, { "epoch": 0.7648110621051377, "grad_norm": 2.8210482597351074, "learning_rate": 6.896585665546538e-05, "loss": 1.2743, "step": 12832 }, { "epoch": 0.7649302658242937, "grad_norm": 2.914010524749756, "learning_rate": 6.895710613221662e-05, "loss": 1.2626, "step": 12834 }, { "epoch": 0.7650494695434498, "grad_norm": 3.3430733680725098, "learning_rate": 6.894835493080732e-05, "loss": 1.318, "step": 12836 }, { "epoch": 0.7651686732626058, "grad_norm": 2.9844226837158203, "learning_rate": 6.893960305155053e-05, "loss": 1.1222, "step": 12838 }, { "epoch": 0.7652878769817618, "grad_norm": 3.3292222023010254, "learning_rate": 6.893085049475936e-05, "loss": 1.2878, "step": 12840 }, { "epoch": 0.7654070807009179, "grad_norm": 3.0655441284179688, "learning_rate": 6.892209726074686e-05, "loss": 1.2625, "step": 12842 }, { "epoch": 0.7655262844200739, "grad_norm": 3.1228110790252686, "learning_rate": 6.891334334982623e-05, "loss": 1.4809, "step": 12844 }, { "epoch": 0.76564548813923, "grad_norm": 3.3401401042938232, "learning_rate": 6.890458876231058e-05, "loss": 1.2997, "step": 12846 }, { "epoch": 0.765764691858386, "grad_norm": 2.9619054794311523, "learning_rate": 6.889583349851311e-05, "loss": 1.2541, "step": 12848 }, { "epoch": 0.765883895577542, "grad_norm": 3.3284378051757812, "learning_rate": 6.888707755874704e-05, "loss": 1.2161, "step": 12850 }, { "epoch": 0.7660030992966981, "grad_norm": 2.802643060684204, "learning_rate": 6.887832094332556e-05, "loss": 1.3922, "step": 12852 }, { "epoch": 0.766122303015854, "grad_norm": 3.15224552154541, "learning_rate": 6.886956365256196e-05, "loss": 1.4171, "step": 12854 }, { "epoch": 0.7662415067350101, "grad_norm": 3.2773499488830566, "learning_rate": 6.88608056867695e-05, "loss": 1.4222, "step": 12856 }, { "epoch": 0.7663607104541662, "grad_norm": 3.151381254196167, "learning_rate": 6.885204704626151e-05, "loss": 1.4774, "step": 12858 }, { "epoch": 0.7664799141733222, "grad_norm": 3.176095962524414, "learning_rate": 6.884328773135127e-05, "loss": 1.369, "step": 12860 }, { "epoch": 0.7665991178924783, "grad_norm": 2.7990667819976807, "learning_rate": 6.883452774235218e-05, "loss": 1.3534, "step": 12862 }, { "epoch": 0.7667183216116343, "grad_norm": 3.0191569328308105, "learning_rate": 6.882576707957757e-05, "loss": 1.2772, "step": 12864 }, { "epoch": 0.7668375253307903, "grad_norm": 3.1132538318634033, "learning_rate": 6.881700574334086e-05, "loss": 1.3286, "step": 12866 }, { "epoch": 0.7669567290499464, "grad_norm": 3.076545000076294, "learning_rate": 6.880824373395549e-05, "loss": 1.3906, "step": 12868 }, { "epoch": 0.7670759327691024, "grad_norm": 2.7758281230926514, "learning_rate": 6.879948105173488e-05, "loss": 1.2528, "step": 12870 }, { "epoch": 0.7671951364882584, "grad_norm": 2.7836015224456787, "learning_rate": 6.879071769699252e-05, "loss": 1.2676, "step": 12872 }, { "epoch": 0.7673143402074145, "grad_norm": 3.2046585083007812, "learning_rate": 6.878195367004188e-05, "loss": 1.3712, "step": 12874 }, { "epoch": 0.7674335439265705, "grad_norm": 2.897977113723755, "learning_rate": 6.87731889711965e-05, "loss": 1.391, "step": 12876 }, { "epoch": 0.7675527476457266, "grad_norm": 3.086726427078247, "learning_rate": 6.876442360076992e-05, "loss": 1.3118, "step": 12878 }, { "epoch": 0.7676719513648825, "grad_norm": 3.031834602355957, "learning_rate": 6.875565755907571e-05, "loss": 1.2981, "step": 12880 }, { "epoch": 0.7677911550840386, "grad_norm": 3.072530746459961, "learning_rate": 6.874689084642746e-05, "loss": 1.3287, "step": 12882 }, { "epoch": 0.7679103588031947, "grad_norm": 3.4631872177124023, "learning_rate": 6.873812346313877e-05, "loss": 1.4892, "step": 12884 }, { "epoch": 0.7680295625223507, "grad_norm": 2.9670028686523438, "learning_rate": 6.87293554095233e-05, "loss": 1.3534, "step": 12886 }, { "epoch": 0.7681487662415067, "grad_norm": 2.9643259048461914, "learning_rate": 6.872058668589472e-05, "loss": 1.3221, "step": 12888 }, { "epoch": 0.7682679699606628, "grad_norm": 3.1987617015838623, "learning_rate": 6.871181729256669e-05, "loss": 1.484, "step": 12890 }, { "epoch": 0.7683871736798188, "grad_norm": 3.397918939590454, "learning_rate": 6.870304722985292e-05, "loss": 1.3234, "step": 12892 }, { "epoch": 0.7685063773989749, "grad_norm": 3.2610082626342773, "learning_rate": 6.869427649806716e-05, "loss": 1.4592, "step": 12894 }, { "epoch": 0.7686255811181308, "grad_norm": 3.516040802001953, "learning_rate": 6.86855050975232e-05, "loss": 1.3312, "step": 12896 }, { "epoch": 0.7687447848372869, "grad_norm": 3.251539707183838, "learning_rate": 6.867673302853474e-05, "loss": 1.4669, "step": 12898 }, { "epoch": 0.768863988556443, "grad_norm": 2.796858072280884, "learning_rate": 6.866796029141568e-05, "loss": 1.2587, "step": 12900 }, { "epoch": 0.768983192275599, "grad_norm": 3.1610119342803955, "learning_rate": 6.86591868864798e-05, "loss": 1.1659, "step": 12902 }, { "epoch": 0.769102395994755, "grad_norm": 2.7485837936401367, "learning_rate": 6.865041281404096e-05, "loss": 1.2845, "step": 12904 }, { "epoch": 0.769221599713911, "grad_norm": 2.8592288494110107, "learning_rate": 6.864163807441304e-05, "loss": 1.3006, "step": 12906 }, { "epoch": 0.7693408034330671, "grad_norm": 3.040501832962036, "learning_rate": 6.863286266790994e-05, "loss": 1.3226, "step": 12908 }, { "epoch": 0.7694600071522232, "grad_norm": 2.9913675785064697, "learning_rate": 6.862408659484559e-05, "loss": 1.3495, "step": 12910 }, { "epoch": 0.7695792108713791, "grad_norm": 3.3557159900665283, "learning_rate": 6.861530985553395e-05, "loss": 1.4191, "step": 12912 }, { "epoch": 0.7696984145905352, "grad_norm": 2.8575446605682373, "learning_rate": 6.860653245028899e-05, "loss": 1.2492, "step": 12914 }, { "epoch": 0.7698176183096913, "grad_norm": 2.981362819671631, "learning_rate": 6.85977543794247e-05, "loss": 1.3015, "step": 12916 }, { "epoch": 0.7699368220288473, "grad_norm": 3.1469264030456543, "learning_rate": 6.85889756432551e-05, "loss": 1.4209, "step": 12918 }, { "epoch": 0.7700560257480034, "grad_norm": 2.8933255672454834, "learning_rate": 6.858019624209423e-05, "loss": 1.2893, "step": 12920 }, { "epoch": 0.7701752294671593, "grad_norm": 3.2765917778015137, "learning_rate": 6.857141617625618e-05, "loss": 1.3953, "step": 12922 }, { "epoch": 0.7702944331863154, "grad_norm": 2.803557872772217, "learning_rate": 6.856263544605503e-05, "loss": 1.2517, "step": 12924 }, { "epoch": 0.7704136369054715, "grad_norm": 3.089639663696289, "learning_rate": 6.85538540518049e-05, "loss": 1.2658, "step": 12926 }, { "epoch": 0.7705328406246275, "grad_norm": 3.4039862155914307, "learning_rate": 6.854507199381992e-05, "loss": 1.3799, "step": 12928 }, { "epoch": 0.7706520443437835, "grad_norm": 3.2613472938537598, "learning_rate": 6.853628927241427e-05, "loss": 1.4993, "step": 12930 }, { "epoch": 0.7707712480629396, "grad_norm": 3.279583692550659, "learning_rate": 6.852750588790213e-05, "loss": 1.4146, "step": 12932 }, { "epoch": 0.7708904517820956, "grad_norm": 3.0791587829589844, "learning_rate": 6.851872184059774e-05, "loss": 1.2431, "step": 12934 }, { "epoch": 0.7710096555012517, "grad_norm": 3.083824872970581, "learning_rate": 6.850993713081525e-05, "loss": 1.4002, "step": 12936 }, { "epoch": 0.7711288592204076, "grad_norm": 2.9882876873016357, "learning_rate": 6.850115175886902e-05, "loss": 1.2434, "step": 12938 }, { "epoch": 0.7712480629395637, "grad_norm": 2.9485960006713867, "learning_rate": 6.84923657250733e-05, "loss": 1.3241, "step": 12940 }, { "epoch": 0.7713672666587198, "grad_norm": 2.658743381500244, "learning_rate": 6.848357902974237e-05, "loss": 1.2516, "step": 12942 }, { "epoch": 0.7714864703778758, "grad_norm": 3.465156078338623, "learning_rate": 6.847479167319057e-05, "loss": 1.351, "step": 12944 }, { "epoch": 0.7716056740970318, "grad_norm": 2.7719967365264893, "learning_rate": 6.846600365573226e-05, "loss": 1.2333, "step": 12946 }, { "epoch": 0.7717248778161878, "grad_norm": 3.157581090927124, "learning_rate": 6.845721497768183e-05, "loss": 1.168, "step": 12948 }, { "epoch": 0.7718440815353439, "grad_norm": 3.0040247440338135, "learning_rate": 6.844842563935366e-05, "loss": 1.3366, "step": 12950 }, { "epoch": 0.7719632852545, "grad_norm": 3.2464804649353027, "learning_rate": 6.843963564106219e-05, "loss": 1.3547, "step": 12952 }, { "epoch": 0.772082488973656, "grad_norm": 2.731462240219116, "learning_rate": 6.843084498312186e-05, "loss": 1.3541, "step": 12954 }, { "epoch": 0.772201692692812, "grad_norm": 3.0671064853668213, "learning_rate": 6.842205366584715e-05, "loss": 1.3735, "step": 12956 }, { "epoch": 0.7723208964119681, "grad_norm": 2.9829297065734863, "learning_rate": 6.841326168955255e-05, "loss": 1.147, "step": 12958 }, { "epoch": 0.7724401001311241, "grad_norm": 2.7939348220825195, "learning_rate": 6.840446905455258e-05, "loss": 1.2114, "step": 12960 }, { "epoch": 0.7725593038502802, "grad_norm": 3.2391891479492188, "learning_rate": 6.839567576116178e-05, "loss": 1.3954, "step": 12962 }, { "epoch": 0.7726785075694361, "grad_norm": 3.128955364227295, "learning_rate": 6.838688180969471e-05, "loss": 1.4567, "step": 12964 }, { "epoch": 0.7727977112885922, "grad_norm": 2.8507704734802246, "learning_rate": 6.837808720046598e-05, "loss": 1.3655, "step": 12966 }, { "epoch": 0.7729169150077483, "grad_norm": 3.297189235687256, "learning_rate": 6.836929193379018e-05, "loss": 1.2298, "step": 12968 }, { "epoch": 0.7730361187269043, "grad_norm": 2.997927665710449, "learning_rate": 6.836049600998195e-05, "loss": 1.3622, "step": 12970 }, { "epoch": 0.7731553224460603, "grad_norm": 2.6694226264953613, "learning_rate": 6.835169942935598e-05, "loss": 1.2861, "step": 12972 }, { "epoch": 0.7732745261652163, "grad_norm": 3.196902275085449, "learning_rate": 6.834290219222692e-05, "loss": 1.2243, "step": 12974 }, { "epoch": 0.7733937298843724, "grad_norm": 3.0140326023101807, "learning_rate": 6.833410429890949e-05, "loss": 1.2508, "step": 12976 }, { "epoch": 0.7735129336035285, "grad_norm": 3.104566812515259, "learning_rate": 6.832530574971842e-05, "loss": 1.2198, "step": 12978 }, { "epoch": 0.7736321373226844, "grad_norm": 3.026071786880493, "learning_rate": 6.831650654496846e-05, "loss": 1.3858, "step": 12980 }, { "epoch": 0.7737513410418405, "grad_norm": 2.8549365997314453, "learning_rate": 6.830770668497441e-05, "loss": 1.3817, "step": 12982 }, { "epoch": 0.7738705447609966, "grad_norm": 3.134326934814453, "learning_rate": 6.829890617005104e-05, "loss": 1.2472, "step": 12984 }, { "epoch": 0.7739897484801526, "grad_norm": 2.893359422683716, "learning_rate": 6.829010500051318e-05, "loss": 1.3824, "step": 12986 }, { "epoch": 0.7741089521993086, "grad_norm": 3.009632110595703, "learning_rate": 6.828130317667568e-05, "loss": 1.2127, "step": 12988 }, { "epoch": 0.7742281559184646, "grad_norm": 3.347508192062378, "learning_rate": 6.827250069885343e-05, "loss": 1.2122, "step": 12990 }, { "epoch": 0.7743473596376207, "grad_norm": 3.2070565223693848, "learning_rate": 6.826369756736132e-05, "loss": 1.2253, "step": 12992 }, { "epoch": 0.7744665633567768, "grad_norm": 2.837390184402466, "learning_rate": 6.825489378251424e-05, "loss": 1.3131, "step": 12994 }, { "epoch": 0.7745857670759327, "grad_norm": 3.5785844326019287, "learning_rate": 6.824608934462718e-05, "loss": 1.4178, "step": 12996 }, { "epoch": 0.7747049707950888, "grad_norm": 3.162116050720215, "learning_rate": 6.823728425401504e-05, "loss": 1.3325, "step": 12998 }, { "epoch": 0.7748241745142448, "grad_norm": 3.07413387298584, "learning_rate": 6.822847851099286e-05, "loss": 1.2642, "step": 13000 }, { "epoch": 0.7749433782334009, "grad_norm": 3.2513246536254883, "learning_rate": 6.821967211587563e-05, "loss": 1.3873, "step": 13002 }, { "epoch": 0.775062581952557, "grad_norm": 3.422253370285034, "learning_rate": 6.82108650689784e-05, "loss": 1.4769, "step": 13004 }, { "epoch": 0.7751817856717129, "grad_norm": 2.7022008895874023, "learning_rate": 6.820205737061621e-05, "loss": 1.4541, "step": 13006 }, { "epoch": 0.775300989390869, "grad_norm": 2.965954303741455, "learning_rate": 6.819324902110415e-05, "loss": 1.3416, "step": 13008 }, { "epoch": 0.7754201931100251, "grad_norm": 2.9712932109832764, "learning_rate": 6.818444002075735e-05, "loss": 1.2391, "step": 13010 }, { "epoch": 0.775539396829181, "grad_norm": 2.8302621841430664, "learning_rate": 6.817563036989088e-05, "loss": 1.2918, "step": 13012 }, { "epoch": 0.7756586005483371, "grad_norm": 2.789579391479492, "learning_rate": 6.816682006881994e-05, "loss": 1.1492, "step": 13014 }, { "epoch": 0.7757778042674931, "grad_norm": 2.936793088912964, "learning_rate": 6.815800911785967e-05, "loss": 1.3504, "step": 13016 }, { "epoch": 0.7758970079866492, "grad_norm": 3.2274041175842285, "learning_rate": 6.814919751732532e-05, "loss": 1.3261, "step": 13018 }, { "epoch": 0.7760162117058053, "grad_norm": 2.9420247077941895, "learning_rate": 6.814038526753205e-05, "loss": 1.4149, "step": 13020 }, { "epoch": 0.7761354154249612, "grad_norm": 3.20599365234375, "learning_rate": 6.813157236879514e-05, "loss": 1.4416, "step": 13022 }, { "epoch": 0.7762546191441173, "grad_norm": 3.0009918212890625, "learning_rate": 6.812275882142987e-05, "loss": 1.2797, "step": 13024 }, { "epoch": 0.7763738228632734, "grad_norm": 2.8643245697021484, "learning_rate": 6.81139446257515e-05, "loss": 1.2146, "step": 13026 }, { "epoch": 0.7764930265824294, "grad_norm": 3.066358804702759, "learning_rate": 6.810512978207534e-05, "loss": 1.3728, "step": 13028 }, { "epoch": 0.7766122303015854, "grad_norm": 3.0750315189361572, "learning_rate": 6.809631429071675e-05, "loss": 1.3904, "step": 13030 }, { "epoch": 0.7767314340207414, "grad_norm": 2.9391674995422363, "learning_rate": 6.808749815199107e-05, "loss": 1.2527, "step": 13032 }, { "epoch": 0.7768506377398975, "grad_norm": 3.4474189281463623, "learning_rate": 6.807868136621372e-05, "loss": 1.4475, "step": 13034 }, { "epoch": 0.7769698414590536, "grad_norm": 3.0570857524871826, "learning_rate": 6.806986393370007e-05, "loss": 1.3454, "step": 13036 }, { "epoch": 0.7770890451782095, "grad_norm": 3.146023988723755, "learning_rate": 6.806104585476555e-05, "loss": 1.3406, "step": 13038 }, { "epoch": 0.7772082488973656, "grad_norm": 3.182347536087036, "learning_rate": 6.805222712972565e-05, "loss": 1.488, "step": 13040 }, { "epoch": 0.7773274526165216, "grad_norm": 2.6610686779022217, "learning_rate": 6.80434077588958e-05, "loss": 1.3028, "step": 13042 }, { "epoch": 0.7774466563356777, "grad_norm": 2.85739803314209, "learning_rate": 6.803458774259151e-05, "loss": 1.2665, "step": 13044 }, { "epoch": 0.7775658600548337, "grad_norm": 3.146059989929199, "learning_rate": 6.802576708112834e-05, "loss": 1.3213, "step": 13046 }, { "epoch": 0.7776850637739897, "grad_norm": 3.0522005558013916, "learning_rate": 6.801694577482179e-05, "loss": 1.2551, "step": 13048 }, { "epoch": 0.7778042674931458, "grad_norm": 2.888519763946533, "learning_rate": 6.800812382398744e-05, "loss": 1.3196, "step": 13050 }, { "epoch": 0.7779234712123019, "grad_norm": 3.2150678634643555, "learning_rate": 6.799930122894089e-05, "loss": 1.2725, "step": 13052 }, { "epoch": 0.7780426749314578, "grad_norm": 3.0854856967926025, "learning_rate": 6.799047798999778e-05, "loss": 1.2905, "step": 13054 }, { "epoch": 0.7781618786506139, "grad_norm": 2.9505693912506104, "learning_rate": 6.79816541074737e-05, "loss": 1.2624, "step": 13056 }, { "epoch": 0.7782810823697699, "grad_norm": 2.961709499359131, "learning_rate": 6.797282958168432e-05, "loss": 1.253, "step": 13058 }, { "epoch": 0.778400286088926, "grad_norm": 3.074387311935425, "learning_rate": 6.796400441294533e-05, "loss": 1.3249, "step": 13060 }, { "epoch": 0.778519489808082, "grad_norm": 3.249105930328369, "learning_rate": 6.795517860157245e-05, "loss": 1.4179, "step": 13062 }, { "epoch": 0.778638693527238, "grad_norm": 3.0777416229248047, "learning_rate": 6.794635214788139e-05, "loss": 1.3927, "step": 13064 }, { "epoch": 0.7787578972463941, "grad_norm": 2.906653642654419, "learning_rate": 6.793752505218791e-05, "loss": 1.327, "step": 13066 }, { "epoch": 0.7788771009655501, "grad_norm": 2.765704393386841, "learning_rate": 6.79286973148078e-05, "loss": 1.1602, "step": 13068 }, { "epoch": 0.7789963046847062, "grad_norm": 3.1430394649505615, "learning_rate": 6.791986893605684e-05, "loss": 1.298, "step": 13070 }, { "epoch": 0.7791155084038622, "grad_norm": 3.054281711578369, "learning_rate": 6.791103991625085e-05, "loss": 1.2568, "step": 13072 }, { "epoch": 0.7792347121230182, "grad_norm": 3.799142599105835, "learning_rate": 6.790221025570568e-05, "loss": 1.3113, "step": 13074 }, { "epoch": 0.7793539158421743, "grad_norm": 3.106098175048828, "learning_rate": 6.78933799547372e-05, "loss": 1.3006, "step": 13076 }, { "epoch": 0.7794731195613304, "grad_norm": 2.9665133953094482, "learning_rate": 6.78845490136613e-05, "loss": 1.204, "step": 13078 }, { "epoch": 0.7795923232804863, "grad_norm": 2.9013564586639404, "learning_rate": 6.787571743279389e-05, "loss": 1.3746, "step": 13080 }, { "epoch": 0.7797115269996424, "grad_norm": 2.9929304122924805, "learning_rate": 6.78668852124509e-05, "loss": 1.3266, "step": 13082 }, { "epoch": 0.7798307307187984, "grad_norm": 2.839155912399292, "learning_rate": 6.785805235294831e-05, "loss": 1.2486, "step": 13084 }, { "epoch": 0.7799499344379545, "grad_norm": 3.0141329765319824, "learning_rate": 6.784921885460206e-05, "loss": 1.3737, "step": 13086 }, { "epoch": 0.7800691381571105, "grad_norm": 3.0415780544281006, "learning_rate": 6.784038471772822e-05, "loss": 1.2964, "step": 13088 }, { "epoch": 0.7801883418762665, "grad_norm": 3.282653570175171, "learning_rate": 6.783154994264273e-05, "loss": 1.4668, "step": 13090 }, { "epoch": 0.7803075455954226, "grad_norm": 2.850008010864258, "learning_rate": 6.782271452966172e-05, "loss": 1.306, "step": 13092 }, { "epoch": 0.7804267493145786, "grad_norm": 3.139850616455078, "learning_rate": 6.781387847910124e-05, "loss": 1.4296, "step": 13094 }, { "epoch": 0.7805459530337346, "grad_norm": 3.0283761024475098, "learning_rate": 6.780504179127734e-05, "loss": 1.2526, "step": 13096 }, { "epoch": 0.7806651567528907, "grad_norm": 2.8427529335021973, "learning_rate": 6.779620446650621e-05, "loss": 1.1851, "step": 13098 }, { "epoch": 0.7807843604720467, "grad_norm": 3.0063083171844482, "learning_rate": 6.778736650510396e-05, "loss": 1.2738, "step": 13100 }, { "epoch": 0.7809035641912028, "grad_norm": 3.3100016117095947, "learning_rate": 6.777852790738672e-05, "loss": 1.346, "step": 13102 }, { "epoch": 0.7810227679103589, "grad_norm": 2.8648056983947754, "learning_rate": 6.776968867367072e-05, "loss": 1.274, "step": 13104 }, { "epoch": 0.7811419716295148, "grad_norm": 2.9200439453125, "learning_rate": 6.776084880427218e-05, "loss": 1.3081, "step": 13106 }, { "epoch": 0.7812611753486709, "grad_norm": 3.1125950813293457, "learning_rate": 6.775200829950729e-05, "loss": 1.2918, "step": 13108 }, { "epoch": 0.7813803790678269, "grad_norm": 3.4504666328430176, "learning_rate": 6.774316715969232e-05, "loss": 1.32, "step": 13110 }, { "epoch": 0.781499582786983, "grad_norm": 2.962622880935669, "learning_rate": 6.773432538514357e-05, "loss": 1.2546, "step": 13112 }, { "epoch": 0.781618786506139, "grad_norm": 3.2771294116973877, "learning_rate": 6.772548297617731e-05, "loss": 1.3564, "step": 13114 }, { "epoch": 0.781737990225295, "grad_norm": 3.0810024738311768, "learning_rate": 6.77166399331099e-05, "loss": 1.3638, "step": 13116 }, { "epoch": 0.7818571939444511, "grad_norm": 2.779694080352783, "learning_rate": 6.770779625625763e-05, "loss": 1.1658, "step": 13118 }, { "epoch": 0.7819763976636072, "grad_norm": 3.0770082473754883, "learning_rate": 6.769895194593693e-05, "loss": 1.2796, "step": 13120 }, { "epoch": 0.7820956013827631, "grad_norm": 3.1066360473632812, "learning_rate": 6.769010700246414e-05, "loss": 1.3181, "step": 13122 }, { "epoch": 0.7822148051019192, "grad_norm": 3.295799732208252, "learning_rate": 6.768126142615571e-05, "loss": 1.4442, "step": 13124 }, { "epoch": 0.7823340088210752, "grad_norm": 3.3278372287750244, "learning_rate": 6.767241521732806e-05, "loss": 1.3728, "step": 13126 }, { "epoch": 0.7824532125402313, "grad_norm": 3.0060665607452393, "learning_rate": 6.766356837629764e-05, "loss": 1.2282, "step": 13128 }, { "epoch": 0.7825724162593873, "grad_norm": 3.016488790512085, "learning_rate": 6.765472090338096e-05, "loss": 1.1173, "step": 13130 }, { "epoch": 0.7826916199785433, "grad_norm": 2.7585833072662354, "learning_rate": 6.76458727988945e-05, "loss": 1.2516, "step": 13132 }, { "epoch": 0.7828108236976994, "grad_norm": 2.7920618057250977, "learning_rate": 6.763702406315479e-05, "loss": 1.1498, "step": 13134 }, { "epoch": 0.7829300274168554, "grad_norm": 3.0311062335968018, "learning_rate": 6.762817469647839e-05, "loss": 1.3022, "step": 13136 }, { "epoch": 0.7830492311360114, "grad_norm": 3.4541351795196533, "learning_rate": 6.761932469918188e-05, "loss": 1.4129, "step": 13138 }, { "epoch": 0.7831684348551675, "grad_norm": 3.260780096054077, "learning_rate": 6.761047407158185e-05, "loss": 1.4335, "step": 13140 }, { "epoch": 0.7832876385743235, "grad_norm": 3.2151455879211426, "learning_rate": 6.760162281399487e-05, "loss": 1.5234, "step": 13142 }, { "epoch": 0.7834068422934796, "grad_norm": 2.8289992809295654, "learning_rate": 6.759277092673766e-05, "loss": 1.2251, "step": 13144 }, { "epoch": 0.7835260460126356, "grad_norm": 3.453233242034912, "learning_rate": 6.75839184101268e-05, "loss": 1.3127, "step": 13146 }, { "epoch": 0.7836452497317916, "grad_norm": 3.280778169631958, "learning_rate": 6.757506526447905e-05, "loss": 1.4737, "step": 13148 }, { "epoch": 0.7837644534509477, "grad_norm": 3.0295615196228027, "learning_rate": 6.756621149011106e-05, "loss": 1.4598, "step": 13150 }, { "epoch": 0.7838836571701037, "grad_norm": 2.732438564300537, "learning_rate": 6.755735708733959e-05, "loss": 1.3001, "step": 13152 }, { "epoch": 0.7840028608892597, "grad_norm": 2.953652858734131, "learning_rate": 6.754850205648139e-05, "loss": 1.2614, "step": 13154 }, { "epoch": 0.7841220646084158, "grad_norm": 3.1526856422424316, "learning_rate": 6.753964639785322e-05, "loss": 1.4702, "step": 13156 }, { "epoch": 0.7842412683275718, "grad_norm": 2.957315683364868, "learning_rate": 6.75307901117719e-05, "loss": 1.2941, "step": 13158 }, { "epoch": 0.7843604720467279, "grad_norm": 3.346418857574463, "learning_rate": 6.752193319855423e-05, "loss": 1.4229, "step": 13160 }, { "epoch": 0.7844796757658838, "grad_norm": 2.9783871173858643, "learning_rate": 6.751307565851706e-05, "loss": 1.2164, "step": 13162 }, { "epoch": 0.7845988794850399, "grad_norm": 3.1261258125305176, "learning_rate": 6.750421749197726e-05, "loss": 1.391, "step": 13164 }, { "epoch": 0.784718083204196, "grad_norm": 2.9560890197753906, "learning_rate": 6.74953586992517e-05, "loss": 1.3824, "step": 13166 }, { "epoch": 0.784837286923352, "grad_norm": 2.9248697757720947, "learning_rate": 6.74864992806573e-05, "loss": 1.2721, "step": 13168 }, { "epoch": 0.784956490642508, "grad_norm": 2.9718403816223145, "learning_rate": 6.747763923651099e-05, "loss": 1.2343, "step": 13170 }, { "epoch": 0.7850756943616641, "grad_norm": 3.0633463859558105, "learning_rate": 6.746877856712974e-05, "loss": 1.4233, "step": 13172 }, { "epoch": 0.7851948980808201, "grad_norm": 3.0176286697387695, "learning_rate": 6.745991727283049e-05, "loss": 1.2902, "step": 13174 }, { "epoch": 0.7853141017999762, "grad_norm": 2.840947151184082, "learning_rate": 6.745105535393028e-05, "loss": 1.2346, "step": 13176 }, { "epoch": 0.7854333055191322, "grad_norm": 3.217099189758301, "learning_rate": 6.74421928107461e-05, "loss": 1.4899, "step": 13178 }, { "epoch": 0.7855525092382882, "grad_norm": 2.8520257472991943, "learning_rate": 6.743332964359502e-05, "loss": 1.3433, "step": 13180 }, { "epoch": 0.7856717129574443, "grad_norm": 3.1814002990722656, "learning_rate": 6.742446585279407e-05, "loss": 1.364, "step": 13182 }, { "epoch": 0.7857909166766003, "grad_norm": 2.784271717071533, "learning_rate": 6.741560143866037e-05, "loss": 1.2948, "step": 13184 }, { "epoch": 0.7859101203957564, "grad_norm": 3.0990114212036133, "learning_rate": 6.740673640151102e-05, "loss": 1.2674, "step": 13186 }, { "epoch": 0.7860293241149123, "grad_norm": 3.0327675342559814, "learning_rate": 6.739787074166314e-05, "loss": 1.3719, "step": 13188 }, { "epoch": 0.7861485278340684, "grad_norm": 3.2107391357421875, "learning_rate": 6.738900445943392e-05, "loss": 1.4073, "step": 13190 }, { "epoch": 0.7862677315532245, "grad_norm": 2.8091647624969482, "learning_rate": 6.73801375551405e-05, "loss": 1.3118, "step": 13192 }, { "epoch": 0.7863869352723805, "grad_norm": 3.629509925842285, "learning_rate": 6.73712700291001e-05, "loss": 1.4275, "step": 13194 }, { "epoch": 0.7865061389915365, "grad_norm": 3.2818527221679688, "learning_rate": 6.736240188162994e-05, "loss": 1.3585, "step": 13196 }, { "epoch": 0.7866253427106926, "grad_norm": 3.2443385124206543, "learning_rate": 6.735353311304724e-05, "loss": 1.3766, "step": 13198 }, { "epoch": 0.7867445464298486, "grad_norm": 3.116738796234131, "learning_rate": 6.734466372366931e-05, "loss": 1.3776, "step": 13200 }, { "epoch": 0.7868637501490047, "grad_norm": 3.4205548763275146, "learning_rate": 6.73357937138134e-05, "loss": 1.6315, "step": 13202 }, { "epoch": 0.7869829538681606, "grad_norm": 3.0832011699676514, "learning_rate": 6.732692308379684e-05, "loss": 1.2992, "step": 13204 }, { "epoch": 0.7871021575873167, "grad_norm": 3.1597774028778076, "learning_rate": 6.731805183393696e-05, "loss": 1.4458, "step": 13206 }, { "epoch": 0.7872213613064728, "grad_norm": 3.2048139572143555, "learning_rate": 6.730917996455112e-05, "loss": 1.3133, "step": 13208 }, { "epoch": 0.7873405650256288, "grad_norm": 2.9238693714141846, "learning_rate": 6.730030747595667e-05, "loss": 1.354, "step": 13210 }, { "epoch": 0.7874597687447848, "grad_norm": 3.310904026031494, "learning_rate": 6.729143436847105e-05, "loss": 1.3129, "step": 13212 }, { "epoch": 0.7875789724639409, "grad_norm": 2.929027795791626, "learning_rate": 6.728256064241164e-05, "loss": 1.1754, "step": 13214 }, { "epoch": 0.7876981761830969, "grad_norm": 2.986069440841675, "learning_rate": 6.727368629809591e-05, "loss": 1.2434, "step": 13216 }, { "epoch": 0.787817379902253, "grad_norm": 3.223642349243164, "learning_rate": 6.726481133584132e-05, "loss": 1.3005, "step": 13218 }, { "epoch": 0.787936583621409, "grad_norm": 2.9777565002441406, "learning_rate": 6.725593575596537e-05, "loss": 1.3728, "step": 13220 }, { "epoch": 0.788055787340565, "grad_norm": 2.932328701019287, "learning_rate": 6.724705955878556e-05, "loss": 1.1861, "step": 13222 }, { "epoch": 0.7881749910597211, "grad_norm": 3.1089346408843994, "learning_rate": 6.72381827446194e-05, "loss": 1.3825, "step": 13224 }, { "epoch": 0.7882941947788771, "grad_norm": 3.181640625, "learning_rate": 6.722930531378446e-05, "loss": 1.3085, "step": 13226 }, { "epoch": 0.7884133984980332, "grad_norm": 2.8368465900421143, "learning_rate": 6.722042726659835e-05, "loss": 1.3382, "step": 13228 }, { "epoch": 0.7885326022171891, "grad_norm": 2.8557097911834717, "learning_rate": 6.72115486033786e-05, "loss": 1.3512, "step": 13230 }, { "epoch": 0.7886518059363452, "grad_norm": 2.958594799041748, "learning_rate": 6.720266932444289e-05, "loss": 1.386, "step": 13232 }, { "epoch": 0.7887710096555013, "grad_norm": 3.0207715034484863, "learning_rate": 6.719378943010885e-05, "loss": 1.3623, "step": 13234 }, { "epoch": 0.7888902133746573, "grad_norm": 3.1009132862091064, "learning_rate": 6.71849089206941e-05, "loss": 1.1468, "step": 13236 }, { "epoch": 0.7890094170938133, "grad_norm": 3.212296724319458, "learning_rate": 6.71760277965164e-05, "loss": 1.2119, "step": 13238 }, { "epoch": 0.7891286208129694, "grad_norm": 3.2035248279571533, "learning_rate": 6.716714605789339e-05, "loss": 1.3777, "step": 13240 }, { "epoch": 0.7892478245321254, "grad_norm": 3.4759700298309326, "learning_rate": 6.715826370514283e-05, "loss": 1.4211, "step": 13242 }, { "epoch": 0.7893670282512815, "grad_norm": 3.117305040359497, "learning_rate": 6.714938073858248e-05, "loss": 1.2821, "step": 13244 }, { "epoch": 0.7894862319704374, "grad_norm": 3.1145360469818115, "learning_rate": 6.714049715853011e-05, "loss": 1.399, "step": 13246 }, { "epoch": 0.7896054356895935, "grad_norm": 2.689290761947632, "learning_rate": 6.71316129653035e-05, "loss": 1.3261, "step": 13248 }, { "epoch": 0.7897246394087496, "grad_norm": 3.456850051879883, "learning_rate": 6.71227281592205e-05, "loss": 1.3318, "step": 13250 }, { "epoch": 0.7898438431279056, "grad_norm": 2.9158778190612793, "learning_rate": 6.711384274059891e-05, "loss": 1.355, "step": 13252 }, { "epoch": 0.7899630468470616, "grad_norm": 3.0408294200897217, "learning_rate": 6.710495670975662e-05, "loss": 1.3434, "step": 13254 }, { "epoch": 0.7900822505662176, "grad_norm": 3.18326997756958, "learning_rate": 6.709607006701149e-05, "loss": 1.2233, "step": 13256 }, { "epoch": 0.7902014542853737, "grad_norm": 3.2073748111724854, "learning_rate": 6.708718281268145e-05, "loss": 1.3441, "step": 13258 }, { "epoch": 0.7903206580045298, "grad_norm": 3.006502866744995, "learning_rate": 6.707829494708441e-05, "loss": 1.1758, "step": 13260 }, { "epoch": 0.7904398617236857, "grad_norm": 3.239071846008301, "learning_rate": 6.706940647053833e-05, "loss": 1.3873, "step": 13262 }, { "epoch": 0.7905590654428418, "grad_norm": 3.1497955322265625, "learning_rate": 6.706051738336118e-05, "loss": 1.3555, "step": 13264 }, { "epoch": 0.7906782691619979, "grad_norm": 3.0619726181030273, "learning_rate": 6.705162768587096e-05, "loss": 1.3426, "step": 13266 }, { "epoch": 0.7907974728811539, "grad_norm": 3.0331220626831055, "learning_rate": 6.704273737838567e-05, "loss": 1.345, "step": 13268 }, { "epoch": 0.79091667660031, "grad_norm": 3.139463186264038, "learning_rate": 6.703384646122335e-05, "loss": 1.3091, "step": 13270 }, { "epoch": 0.7910358803194659, "grad_norm": 3.1906464099884033, "learning_rate": 6.702495493470206e-05, "loss": 1.3384, "step": 13272 }, { "epoch": 0.791155084038622, "grad_norm": 2.8175735473632812, "learning_rate": 6.70160627991399e-05, "loss": 1.2171, "step": 13274 }, { "epoch": 0.7912742877577781, "grad_norm": 4.027008533477783, "learning_rate": 6.700717005485493e-05, "loss": 1.356, "step": 13276 }, { "epoch": 0.791393491476934, "grad_norm": 3.467454195022583, "learning_rate": 6.69982767021653e-05, "loss": 1.2906, "step": 13278 }, { "epoch": 0.7915126951960901, "grad_norm": 3.343430519104004, "learning_rate": 6.698938274138918e-05, "loss": 1.3597, "step": 13280 }, { "epoch": 0.7916318989152461, "grad_norm": 3.137314558029175, "learning_rate": 6.698048817284468e-05, "loss": 1.4402, "step": 13282 }, { "epoch": 0.7917511026344022, "grad_norm": 2.8719892501831055, "learning_rate": 6.697159299685004e-05, "loss": 1.2943, "step": 13284 }, { "epoch": 0.7918703063535583, "grad_norm": 3.069509983062744, "learning_rate": 6.696269721372344e-05, "loss": 1.2804, "step": 13286 }, { "epoch": 0.7919895100727142, "grad_norm": 3.286771059036255, "learning_rate": 6.695380082378311e-05, "loss": 1.4356, "step": 13288 }, { "epoch": 0.7921087137918703, "grad_norm": 2.909682512283325, "learning_rate": 6.694490382734734e-05, "loss": 1.2537, "step": 13290 }, { "epoch": 0.7922279175110264, "grad_norm": 3.0842697620391846, "learning_rate": 6.693600622473438e-05, "loss": 1.3563, "step": 13292 }, { "epoch": 0.7923471212301824, "grad_norm": 3.1445815563201904, "learning_rate": 6.692710801626252e-05, "loss": 1.1783, "step": 13294 }, { "epoch": 0.7924663249493384, "grad_norm": 3.1124300956726074, "learning_rate": 6.69182092022501e-05, "loss": 1.2651, "step": 13296 }, { "epoch": 0.7925855286684944, "grad_norm": 3.3480234146118164, "learning_rate": 6.690930978301546e-05, "loss": 1.3608, "step": 13298 }, { "epoch": 0.7927047323876505, "grad_norm": 3.415719747543335, "learning_rate": 6.690040975887693e-05, "loss": 1.6197, "step": 13300 }, { "epoch": 0.7928239361068066, "grad_norm": 3.0838682651519775, "learning_rate": 6.689150913015293e-05, "loss": 1.4605, "step": 13302 }, { "epoch": 0.7929431398259625, "grad_norm": 3.0145175457000732, "learning_rate": 6.688260789716188e-05, "loss": 1.3788, "step": 13304 }, { "epoch": 0.7930623435451186, "grad_norm": 2.850703477859497, "learning_rate": 6.687370606022214e-05, "loss": 1.3535, "step": 13306 }, { "epoch": 0.7931815472642747, "grad_norm": 2.941255569458008, "learning_rate": 6.686480361965223e-05, "loss": 1.1741, "step": 13308 }, { "epoch": 0.7933007509834307, "grad_norm": 3.177475929260254, "learning_rate": 6.68559005757706e-05, "loss": 1.315, "step": 13310 }, { "epoch": 0.7934199547025867, "grad_norm": 3.079846143722534, "learning_rate": 6.684699692889571e-05, "loss": 1.3948, "step": 13312 }, { "epoch": 0.7935391584217427, "grad_norm": 2.8013498783111572, "learning_rate": 6.68380926793461e-05, "loss": 1.2399, "step": 13314 }, { "epoch": 0.7936583621408988, "grad_norm": 3.2051427364349365, "learning_rate": 6.682918782744032e-05, "loss": 1.2585, "step": 13316 }, { "epoch": 0.7937775658600549, "grad_norm": 3.0536224842071533, "learning_rate": 6.68202823734969e-05, "loss": 1.2947, "step": 13318 }, { "epoch": 0.7938967695792108, "grad_norm": 3.127406597137451, "learning_rate": 6.681137631783443e-05, "loss": 1.1892, "step": 13320 }, { "epoch": 0.7940159732983669, "grad_norm": 2.7035696506500244, "learning_rate": 6.680246966077151e-05, "loss": 1.2271, "step": 13322 }, { "epoch": 0.7941351770175229, "grad_norm": 2.8445258140563965, "learning_rate": 6.679356240262676e-05, "loss": 1.2459, "step": 13324 }, { "epoch": 0.794254380736679, "grad_norm": 3.0908408164978027, "learning_rate": 6.678465454371883e-05, "loss": 1.2563, "step": 13326 }, { "epoch": 0.7943735844558351, "grad_norm": 2.893000364303589, "learning_rate": 6.677574608436639e-05, "loss": 1.1788, "step": 13328 }, { "epoch": 0.794492788174991, "grad_norm": 3.2967960834503174, "learning_rate": 6.67668370248881e-05, "loss": 1.3499, "step": 13330 }, { "epoch": 0.7946119918941471, "grad_norm": 4.509522438049316, "learning_rate": 6.675792736560267e-05, "loss": 1.1366, "step": 13332 }, { "epoch": 0.7947311956133032, "grad_norm": 3.2227866649627686, "learning_rate": 6.674901710682886e-05, "loss": 1.27, "step": 13334 }, { "epoch": 0.7948503993324592, "grad_norm": 2.7819387912750244, "learning_rate": 6.67401062488854e-05, "loss": 1.29, "step": 13336 }, { "epoch": 0.7949696030516152, "grad_norm": 3.389017343521118, "learning_rate": 6.673119479209106e-05, "loss": 1.2333, "step": 13338 }, { "epoch": 0.7950888067707712, "grad_norm": 2.855109214782715, "learning_rate": 6.672228273676464e-05, "loss": 1.3278, "step": 13340 }, { "epoch": 0.7952080104899273, "grad_norm": 3.6280391216278076, "learning_rate": 6.671337008322495e-05, "loss": 1.3368, "step": 13342 }, { "epoch": 0.7953272142090834, "grad_norm": 3.4821832180023193, "learning_rate": 6.670445683179082e-05, "loss": 1.2839, "step": 13344 }, { "epoch": 0.7954464179282393, "grad_norm": 2.867886543273926, "learning_rate": 6.669554298278112e-05, "loss": 1.2907, "step": 13346 }, { "epoch": 0.7955656216473954, "grad_norm": 2.8418490886688232, "learning_rate": 6.668662853651472e-05, "loss": 1.2509, "step": 13348 }, { "epoch": 0.7956848253665514, "grad_norm": 3.048816204071045, "learning_rate": 6.667771349331055e-05, "loss": 1.3238, "step": 13350 }, { "epoch": 0.7958040290857075, "grad_norm": 3.236039876937866, "learning_rate": 6.666879785348748e-05, "loss": 1.3167, "step": 13352 }, { "epoch": 0.7959232328048635, "grad_norm": 3.047574281692505, "learning_rate": 6.665988161736448e-05, "loss": 1.2145, "step": 13354 }, { "epoch": 0.7960424365240195, "grad_norm": 3.099853038787842, "learning_rate": 6.665096478526053e-05, "loss": 1.2656, "step": 13356 }, { "epoch": 0.7961616402431756, "grad_norm": 3.2576348781585693, "learning_rate": 6.664204735749458e-05, "loss": 1.3316, "step": 13358 }, { "epoch": 0.7962808439623317, "grad_norm": 2.9594569206237793, "learning_rate": 6.663312933438567e-05, "loss": 1.438, "step": 13360 }, { "epoch": 0.7964000476814876, "grad_norm": 3.5618648529052734, "learning_rate": 6.662421071625283e-05, "loss": 1.3139, "step": 13362 }, { "epoch": 0.7965192514006437, "grad_norm": 2.860971450805664, "learning_rate": 6.661529150341506e-05, "loss": 1.2357, "step": 13364 }, { "epoch": 0.7966384551197997, "grad_norm": 3.368335247039795, "learning_rate": 6.660637169619148e-05, "loss": 1.2893, "step": 13366 }, { "epoch": 0.7967576588389558, "grad_norm": 3.1023223400115967, "learning_rate": 6.659745129490116e-05, "loss": 1.2385, "step": 13368 }, { "epoch": 0.7968768625581119, "grad_norm": 3.3376500606536865, "learning_rate": 6.658853029986322e-05, "loss": 1.3149, "step": 13370 }, { "epoch": 0.7969960662772678, "grad_norm": 3.431669235229492, "learning_rate": 6.65796087113968e-05, "loss": 1.3013, "step": 13372 }, { "epoch": 0.7971152699964239, "grad_norm": 2.77956485748291, "learning_rate": 6.657068652982105e-05, "loss": 1.267, "step": 13374 }, { "epoch": 0.7972344737155799, "grad_norm": 2.8445959091186523, "learning_rate": 6.656176375545514e-05, "loss": 1.3407, "step": 13376 }, { "epoch": 0.797353677434736, "grad_norm": 3.268670082092285, "learning_rate": 6.655284038861828e-05, "loss": 1.3249, "step": 13378 }, { "epoch": 0.797472881153892, "grad_norm": 3.1279473304748535, "learning_rate": 6.654391642962968e-05, "loss": 1.3537, "step": 13380 }, { "epoch": 0.797592084873048, "grad_norm": 3.182703971862793, "learning_rate": 6.653499187880859e-05, "loss": 1.1795, "step": 13382 }, { "epoch": 0.7977112885922041, "grad_norm": 2.9991519451141357, "learning_rate": 6.652606673647426e-05, "loss": 1.3348, "step": 13384 }, { "epoch": 0.7978304923113602, "grad_norm": 2.9437742233276367, "learning_rate": 6.651714100294599e-05, "loss": 1.3124, "step": 13386 }, { "epoch": 0.7979496960305161, "grad_norm": 3.3267569541931152, "learning_rate": 6.650821467854307e-05, "loss": 1.435, "step": 13388 }, { "epoch": 0.7980688997496722, "grad_norm": 2.751267433166504, "learning_rate": 6.649928776358483e-05, "loss": 1.3139, "step": 13390 }, { "epoch": 0.7981881034688282, "grad_norm": 2.9576611518859863, "learning_rate": 6.64903602583906e-05, "loss": 1.2518, "step": 13392 }, { "epoch": 0.7983073071879843, "grad_norm": 3.232517957687378, "learning_rate": 6.648143216327977e-05, "loss": 1.3189, "step": 13394 }, { "epoch": 0.7984265109071403, "grad_norm": 3.0577869415283203, "learning_rate": 6.647250347857172e-05, "loss": 1.4232, "step": 13396 }, { "epoch": 0.7985457146262963, "grad_norm": 2.9676451683044434, "learning_rate": 6.646357420458587e-05, "loss": 1.2043, "step": 13398 }, { "epoch": 0.7986649183454524, "grad_norm": 2.9753737449645996, "learning_rate": 6.645464434164166e-05, "loss": 1.2798, "step": 13400 }, { "epoch": 0.7987841220646085, "grad_norm": 2.8738458156585693, "learning_rate": 6.64457138900585e-05, "loss": 1.1958, "step": 13402 }, { "epoch": 0.7989033257837644, "grad_norm": 3.042874813079834, "learning_rate": 6.643678285015587e-05, "loss": 1.4359, "step": 13404 }, { "epoch": 0.7990225295029205, "grad_norm": 2.9862470626831055, "learning_rate": 6.642785122225331e-05, "loss": 1.3306, "step": 13406 }, { "epoch": 0.7991417332220765, "grad_norm": 3.213005542755127, "learning_rate": 6.641891900667028e-05, "loss": 1.3816, "step": 13408 }, { "epoch": 0.7992609369412326, "grad_norm": 2.9440722465515137, "learning_rate": 6.640998620372634e-05, "loss": 1.1143, "step": 13410 }, { "epoch": 0.7993801406603886, "grad_norm": 3.035351276397705, "learning_rate": 6.640105281374105e-05, "loss": 1.3244, "step": 13412 }, { "epoch": 0.7994993443795446, "grad_norm": 3.1318321228027344, "learning_rate": 6.6392118837034e-05, "loss": 1.3612, "step": 13414 }, { "epoch": 0.7996185480987007, "grad_norm": 3.0568275451660156, "learning_rate": 6.638318427392478e-05, "loss": 1.4332, "step": 13416 }, { "epoch": 0.7997377518178567, "grad_norm": 3.0644330978393555, "learning_rate": 6.6374249124733e-05, "loss": 1.125, "step": 13418 }, { "epoch": 0.7998569555370127, "grad_norm": 3.211216926574707, "learning_rate": 6.636531338977829e-05, "loss": 1.4302, "step": 13420 }, { "epoch": 0.7999761592561688, "grad_norm": 2.9934537410736084, "learning_rate": 6.635637706938032e-05, "loss": 1.3314, "step": 13422 }, { "epoch": 0.8000953629753248, "grad_norm": 3.2510008811950684, "learning_rate": 6.634744016385879e-05, "loss": 1.3686, "step": 13424 }, { "epoch": 0.8002145666944809, "grad_norm": 3.1044089794158936, "learning_rate": 6.63385026735334e-05, "loss": 1.2282, "step": 13426 }, { "epoch": 0.800333770413637, "grad_norm": 2.764463424682617, "learning_rate": 6.632956459872386e-05, "loss": 1.4272, "step": 13428 }, { "epoch": 0.8004529741327929, "grad_norm": 2.341984748840332, "learning_rate": 6.632062593974993e-05, "loss": 1.1652, "step": 13430 }, { "epoch": 0.800572177851949, "grad_norm": 3.173326253890991, "learning_rate": 6.631168669693137e-05, "loss": 1.3879, "step": 13432 }, { "epoch": 0.800691381571105, "grad_norm": 2.9578869342803955, "learning_rate": 6.630274687058795e-05, "loss": 1.2235, "step": 13434 }, { "epoch": 0.8008105852902611, "grad_norm": 3.1494922637939453, "learning_rate": 6.62938064610395e-05, "loss": 1.3645, "step": 13436 }, { "epoch": 0.8009297890094171, "grad_norm": 2.7637295722961426, "learning_rate": 6.628486546860586e-05, "loss": 1.2216, "step": 13438 }, { "epoch": 0.8010489927285731, "grad_norm": 3.2191319465637207, "learning_rate": 6.627592389360687e-05, "loss": 1.4257, "step": 13440 }, { "epoch": 0.8011681964477292, "grad_norm": 3.1262292861938477, "learning_rate": 6.626698173636236e-05, "loss": 1.2836, "step": 13442 }, { "epoch": 0.8012874001668852, "grad_norm": 3.2034945487976074, "learning_rate": 6.625803899719229e-05, "loss": 1.3216, "step": 13444 }, { "epoch": 0.8014066038860412, "grad_norm": 3.1763010025024414, "learning_rate": 6.624909567641653e-05, "loss": 1.439, "step": 13446 }, { "epoch": 0.8015258076051973, "grad_norm": 3.1676979064941406, "learning_rate": 6.624015177435502e-05, "loss": 1.3091, "step": 13448 }, { "epoch": 0.8016450113243533, "grad_norm": 2.88590931892395, "learning_rate": 6.623120729132771e-05, "loss": 1.4112, "step": 13450 }, { "epoch": 0.8017642150435094, "grad_norm": 3.3897480964660645, "learning_rate": 6.622226222765459e-05, "loss": 1.3115, "step": 13452 }, { "epoch": 0.8018834187626654, "grad_norm": 2.958160161972046, "learning_rate": 6.621331658365564e-05, "loss": 1.2411, "step": 13454 }, { "epoch": 0.8020026224818214, "grad_norm": 2.9623501300811768, "learning_rate": 6.620437035965088e-05, "loss": 1.1996, "step": 13456 }, { "epoch": 0.8021218262009775, "grad_norm": 2.727116584777832, "learning_rate": 6.619542355596035e-05, "loss": 1.1818, "step": 13458 }, { "epoch": 0.8022410299201335, "grad_norm": 3.0238687992095947, "learning_rate": 6.618647617290412e-05, "loss": 1.3201, "step": 13460 }, { "epoch": 0.8023602336392895, "grad_norm": 2.9962551593780518, "learning_rate": 6.617752821080225e-05, "loss": 1.4368, "step": 13462 }, { "epoch": 0.8024794373584456, "grad_norm": 2.9218013286590576, "learning_rate": 6.616857966997484e-05, "loss": 1.2582, "step": 13464 }, { "epoch": 0.8025986410776016, "grad_norm": 3.176387071609497, "learning_rate": 6.615963055074202e-05, "loss": 1.3692, "step": 13466 }, { "epoch": 0.8027178447967577, "grad_norm": 3.2429358959198, "learning_rate": 6.61506808534239e-05, "loss": 1.4743, "step": 13468 }, { "epoch": 0.8028370485159136, "grad_norm": 2.977552890777588, "learning_rate": 6.61417305783407e-05, "loss": 1.4021, "step": 13470 }, { "epoch": 0.8029562522350697, "grad_norm": 3.11628794670105, "learning_rate": 6.613277972581256e-05, "loss": 1.2422, "step": 13472 }, { "epoch": 0.8030754559542258, "grad_norm": 2.973510980606079, "learning_rate": 6.612382829615968e-05, "loss": 1.3195, "step": 13474 }, { "epoch": 0.8031946596733818, "grad_norm": 2.9840002059936523, "learning_rate": 6.611487628970232e-05, "loss": 1.1851, "step": 13476 }, { "epoch": 0.8033138633925379, "grad_norm": 3.1576268672943115, "learning_rate": 6.610592370676068e-05, "loss": 1.3327, "step": 13478 }, { "epoch": 0.8034330671116939, "grad_norm": 2.9039390087127686, "learning_rate": 6.609697054765505e-05, "loss": 1.2438, "step": 13480 }, { "epoch": 0.8035522708308499, "grad_norm": 2.9053242206573486, "learning_rate": 6.60880168127057e-05, "loss": 1.2283, "step": 13482 }, { "epoch": 0.803671474550006, "grad_norm": 3.0931155681610107, "learning_rate": 6.607906250223295e-05, "loss": 1.2532, "step": 13484 }, { "epoch": 0.803790678269162, "grad_norm": 3.323423147201538, "learning_rate": 6.607010761655711e-05, "loss": 1.2617, "step": 13486 }, { "epoch": 0.803909881988318, "grad_norm": 2.971703052520752, "learning_rate": 6.606115215599855e-05, "loss": 1.1895, "step": 13488 }, { "epoch": 0.8040290857074741, "grad_norm": 3.078037977218628, "learning_rate": 6.605219612087762e-05, "loss": 1.1603, "step": 13490 }, { "epoch": 0.8041482894266301, "grad_norm": 2.9021592140197754, "learning_rate": 6.604323951151472e-05, "loss": 1.2251, "step": 13492 }, { "epoch": 0.8042674931457862, "grad_norm": 3.3024864196777344, "learning_rate": 6.603428232823025e-05, "loss": 1.2452, "step": 13494 }, { "epoch": 0.8043866968649422, "grad_norm": 3.2543673515319824, "learning_rate": 6.602532457134463e-05, "loss": 1.2608, "step": 13496 }, { "epoch": 0.8045059005840982, "grad_norm": 3.2294914722442627, "learning_rate": 6.601636624117832e-05, "loss": 1.416, "step": 13498 }, { "epoch": 0.8046251043032543, "grad_norm": 3.2212822437286377, "learning_rate": 6.600740733805179e-05, "loss": 1.2484, "step": 13500 }, { "epoch": 0.8047443080224103, "grad_norm": 2.707008123397827, "learning_rate": 6.599844786228555e-05, "loss": 1.2297, "step": 13502 }, { "epoch": 0.8048635117415663, "grad_norm": 2.9864261150360107, "learning_rate": 6.598948781420007e-05, "loss": 1.2941, "step": 13504 }, { "epoch": 0.8049827154607224, "grad_norm": 3.127298355102539, "learning_rate": 6.598052719411592e-05, "loss": 1.2908, "step": 13506 }, { "epoch": 0.8051019191798784, "grad_norm": 3.0334627628326416, "learning_rate": 6.597156600235362e-05, "loss": 1.4672, "step": 13508 }, { "epoch": 0.8052211228990345, "grad_norm": 3.1419906616210938, "learning_rate": 6.596260423923376e-05, "loss": 1.4844, "step": 13510 }, { "epoch": 0.8053403266181904, "grad_norm": 3.4168949127197266, "learning_rate": 6.595364190507694e-05, "loss": 1.5211, "step": 13512 }, { "epoch": 0.8054595303373465, "grad_norm": 3.0415267944335938, "learning_rate": 6.594467900020376e-05, "loss": 1.2222, "step": 13514 }, { "epoch": 0.8055787340565026, "grad_norm": 3.047931432723999, "learning_rate": 6.593571552493486e-05, "loss": 1.2293, "step": 13516 }, { "epoch": 0.8056979377756586, "grad_norm": 3.269451141357422, "learning_rate": 6.592675147959088e-05, "loss": 1.3677, "step": 13518 }, { "epoch": 0.8058171414948146, "grad_norm": 3.339939594268799, "learning_rate": 6.591778686449252e-05, "loss": 1.3882, "step": 13520 }, { "epoch": 0.8059363452139707, "grad_norm": 3.1204183101654053, "learning_rate": 6.590882167996047e-05, "loss": 1.2349, "step": 13522 }, { "epoch": 0.8060555489331267, "grad_norm": 2.7759346961975098, "learning_rate": 6.589985592631542e-05, "loss": 1.3295, "step": 13524 }, { "epoch": 0.8061747526522828, "grad_norm": 2.9735054969787598, "learning_rate": 6.589088960387813e-05, "loss": 1.2035, "step": 13526 }, { "epoch": 0.8062939563714387, "grad_norm": 3.4996612071990967, "learning_rate": 6.588192271296936e-05, "loss": 1.3944, "step": 13528 }, { "epoch": 0.8064131600905948, "grad_norm": 2.690887451171875, "learning_rate": 6.587295525390985e-05, "loss": 1.4069, "step": 13530 }, { "epoch": 0.8065323638097509, "grad_norm": 2.6931447982788086, "learning_rate": 6.586398722702043e-05, "loss": 1.2599, "step": 13532 }, { "epoch": 0.8066515675289069, "grad_norm": 3.1060140132904053, "learning_rate": 6.585501863262192e-05, "loss": 1.3137, "step": 13534 }, { "epoch": 0.806770771248063, "grad_norm": 3.1979751586914062, "learning_rate": 6.584604947103514e-05, "loss": 1.2384, "step": 13536 }, { "epoch": 0.8068899749672189, "grad_norm": 3.1360023021698, "learning_rate": 6.583707974258096e-05, "loss": 1.2718, "step": 13538 }, { "epoch": 0.807009178686375, "grad_norm": 2.8849642276763916, "learning_rate": 6.582810944758024e-05, "loss": 1.2525, "step": 13540 }, { "epoch": 0.8071283824055311, "grad_norm": 3.316678524017334, "learning_rate": 6.581913858635389e-05, "loss": 1.5199, "step": 13542 }, { "epoch": 0.807247586124687, "grad_norm": 3.038295030593872, "learning_rate": 6.581016715922283e-05, "loss": 1.4904, "step": 13544 }, { "epoch": 0.8073667898438431, "grad_norm": 2.95731782913208, "learning_rate": 6.5801195166508e-05, "loss": 1.4153, "step": 13546 }, { "epoch": 0.8074859935629992, "grad_norm": 3.0068013668060303, "learning_rate": 6.579222260853036e-05, "loss": 1.217, "step": 13548 }, { "epoch": 0.8076051972821552, "grad_norm": 2.9332308769226074, "learning_rate": 6.578324948561087e-05, "loss": 1.2132, "step": 13550 }, { "epoch": 0.8077244010013113, "grad_norm": 3.1646158695220947, "learning_rate": 6.577427579807058e-05, "loss": 1.3281, "step": 13552 }, { "epoch": 0.8078436047204672, "grad_norm": 2.787832021713257, "learning_rate": 6.576530154623044e-05, "loss": 1.1919, "step": 13554 }, { "epoch": 0.8079628084396233, "grad_norm": 3.065610647201538, "learning_rate": 6.575632673041152e-05, "loss": 1.4153, "step": 13556 }, { "epoch": 0.8080820121587794, "grad_norm": 3.0589261054992676, "learning_rate": 6.574735135093491e-05, "loss": 1.1803, "step": 13558 }, { "epoch": 0.8082012158779354, "grad_norm": 3.0758056640625, "learning_rate": 6.573837540812165e-05, "loss": 1.352, "step": 13560 }, { "epoch": 0.8083204195970914, "grad_norm": 3.7635483741760254, "learning_rate": 6.572939890229284e-05, "loss": 1.4136, "step": 13562 }, { "epoch": 0.8084396233162474, "grad_norm": 3.326035737991333, "learning_rate": 6.572042183376962e-05, "loss": 1.3458, "step": 13564 }, { "epoch": 0.8085588270354035, "grad_norm": 3.525820732116699, "learning_rate": 6.571144420287314e-05, "loss": 1.4607, "step": 13566 }, { "epoch": 0.8086780307545596, "grad_norm": 2.801863193511963, "learning_rate": 6.570246600992453e-05, "loss": 1.2025, "step": 13568 }, { "epoch": 0.8087972344737155, "grad_norm": 3.742861032485962, "learning_rate": 6.569348725524499e-05, "loss": 1.3088, "step": 13570 }, { "epoch": 0.8089164381928716, "grad_norm": 3.1575980186462402, "learning_rate": 6.568450793915572e-05, "loss": 1.3368, "step": 13572 }, { "epoch": 0.8090356419120277, "grad_norm": 2.7836759090423584, "learning_rate": 6.567552806197792e-05, "loss": 1.1719, "step": 13574 }, { "epoch": 0.8091548456311837, "grad_norm": 3.135288715362549, "learning_rate": 6.566654762403286e-05, "loss": 1.2855, "step": 13576 }, { "epoch": 0.8092740493503398, "grad_norm": 2.8799915313720703, "learning_rate": 6.565756662564179e-05, "loss": 1.2923, "step": 13578 }, { "epoch": 0.8093932530694957, "grad_norm": 3.0103073120117188, "learning_rate": 6.564858506712599e-05, "loss": 1.2244, "step": 13580 }, { "epoch": 0.8095124567886518, "grad_norm": 2.945509672164917, "learning_rate": 6.563960294880677e-05, "loss": 1.2665, "step": 13582 }, { "epoch": 0.8096316605078079, "grad_norm": 3.8132712841033936, "learning_rate": 6.563062027100542e-05, "loss": 1.3382, "step": 13584 }, { "epoch": 0.8097508642269639, "grad_norm": 3.0158398151397705, "learning_rate": 6.562163703404333e-05, "loss": 1.217, "step": 13586 }, { "epoch": 0.8098700679461199, "grad_norm": 3.20436692237854, "learning_rate": 6.56126532382418e-05, "loss": 1.3114, "step": 13588 }, { "epoch": 0.809989271665276, "grad_norm": 3.2003366947174072, "learning_rate": 6.560366888392228e-05, "loss": 1.2674, "step": 13590 }, { "epoch": 0.810108475384432, "grad_norm": 3.4287397861480713, "learning_rate": 6.55946839714061e-05, "loss": 1.419, "step": 13592 }, { "epoch": 0.8102276791035881, "grad_norm": 2.487689733505249, "learning_rate": 6.558569850101475e-05, "loss": 1.2009, "step": 13594 }, { "epoch": 0.810346882822744, "grad_norm": 2.979980945587158, "learning_rate": 6.557671247306965e-05, "loss": 1.2365, "step": 13596 }, { "epoch": 0.8104660865419001, "grad_norm": 2.9267475605010986, "learning_rate": 6.556772588789221e-05, "loss": 1.4043, "step": 13598 }, { "epoch": 0.8105852902610562, "grad_norm": 3.1458277702331543, "learning_rate": 6.555873874580398e-05, "loss": 1.3741, "step": 13600 }, { "epoch": 0.8107044939802122, "grad_norm": 2.870468854904175, "learning_rate": 6.554975104712642e-05, "loss": 1.265, "step": 13602 }, { "epoch": 0.8108236976993682, "grad_norm": 3.389944553375244, "learning_rate": 6.554076279218106e-05, "loss": 1.3044, "step": 13604 }, { "epoch": 0.8109429014185242, "grad_norm": 3.1469154357910156, "learning_rate": 6.553177398128944e-05, "loss": 1.3425, "step": 13606 }, { "epoch": 0.8110621051376803, "grad_norm": 3.057192802429199, "learning_rate": 6.552278461477312e-05, "loss": 1.3354, "step": 13608 }, { "epoch": 0.8111813088568364, "grad_norm": 3.017202138900757, "learning_rate": 6.55137946929537e-05, "loss": 1.3789, "step": 13610 }, { "epoch": 0.8113005125759923, "grad_norm": 3.2252371311187744, "learning_rate": 6.550480421615275e-05, "loss": 1.1519, "step": 13612 }, { "epoch": 0.8114197162951484, "grad_norm": 3.4343936443328857, "learning_rate": 6.549581318469193e-05, "loss": 1.3161, "step": 13614 }, { "epoch": 0.8115389200143045, "grad_norm": 3.0687148571014404, "learning_rate": 6.548682159889283e-05, "loss": 1.1576, "step": 13616 }, { "epoch": 0.8116581237334605, "grad_norm": 3.0861504077911377, "learning_rate": 6.547782945907714e-05, "loss": 1.2704, "step": 13618 }, { "epoch": 0.8117773274526165, "grad_norm": 2.6193456649780273, "learning_rate": 6.546883676556652e-05, "loss": 1.239, "step": 13620 }, { "epoch": 0.8118965311717725, "grad_norm": 2.8998451232910156, "learning_rate": 6.545984351868269e-05, "loss": 1.2881, "step": 13622 }, { "epoch": 0.8120157348909286, "grad_norm": 2.8422203063964844, "learning_rate": 6.545084971874738e-05, "loss": 1.2539, "step": 13624 }, { "epoch": 0.8121349386100847, "grad_norm": 3.3420045375823975, "learning_rate": 6.54418553660823e-05, "loss": 1.3235, "step": 13626 }, { "epoch": 0.8122541423292406, "grad_norm": 3.05241060256958, "learning_rate": 6.543286046100921e-05, "loss": 1.4359, "step": 13628 }, { "epoch": 0.8123733460483967, "grad_norm": 2.774531602859497, "learning_rate": 6.542386500384992e-05, "loss": 1.1954, "step": 13630 }, { "epoch": 0.8124925497675527, "grad_norm": 3.08278489112854, "learning_rate": 6.541486899492617e-05, "loss": 1.199, "step": 13632 }, { "epoch": 0.8126117534867088, "grad_norm": 3.0724759101867676, "learning_rate": 6.540587243455985e-05, "loss": 1.2681, "step": 13634 }, { "epoch": 0.8127309572058649, "grad_norm": 2.962862968444824, "learning_rate": 6.539687532307275e-05, "loss": 1.2033, "step": 13636 }, { "epoch": 0.8128501609250208, "grad_norm": 3.057480812072754, "learning_rate": 6.538787766078673e-05, "loss": 1.4088, "step": 13638 }, { "epoch": 0.8129693646441769, "grad_norm": 3.191225051879883, "learning_rate": 6.537887944802369e-05, "loss": 1.1849, "step": 13640 }, { "epoch": 0.813088568363333, "grad_norm": 3.062727212905884, "learning_rate": 6.536988068510552e-05, "loss": 1.308, "step": 13642 }, { "epoch": 0.813207772082489, "grad_norm": 2.983151435852051, "learning_rate": 6.536088137235411e-05, "loss": 1.3648, "step": 13644 }, { "epoch": 0.813326975801645, "grad_norm": 3.260730743408203, "learning_rate": 6.535188151009143e-05, "loss": 1.4051, "step": 13646 }, { "epoch": 0.813446179520801, "grad_norm": 2.920832633972168, "learning_rate": 6.534288109863941e-05, "loss": 1.2442, "step": 13648 }, { "epoch": 0.8135653832399571, "grad_norm": 3.3347008228302, "learning_rate": 6.533388013832006e-05, "loss": 1.2517, "step": 13650 }, { "epoch": 0.8136845869591132, "grad_norm": 3.0796499252319336, "learning_rate": 6.532487862945534e-05, "loss": 1.2215, "step": 13652 }, { "epoch": 0.8138037906782691, "grad_norm": 3.1268234252929688, "learning_rate": 6.531587657236727e-05, "loss": 1.2776, "step": 13654 }, { "epoch": 0.8139229943974252, "grad_norm": 2.9841012954711914, "learning_rate": 6.530687396737791e-05, "loss": 1.2146, "step": 13656 }, { "epoch": 0.8140421981165812, "grad_norm": 2.8731870651245117, "learning_rate": 6.52978708148093e-05, "loss": 1.259, "step": 13658 }, { "epoch": 0.8141614018357373, "grad_norm": 3.0773398876190186, "learning_rate": 6.528886711498348e-05, "loss": 1.3844, "step": 13660 }, { "epoch": 0.8142806055548933, "grad_norm": 3.2796835899353027, "learning_rate": 6.52798628682226e-05, "loss": 1.405, "step": 13662 }, { "epoch": 0.8143998092740493, "grad_norm": 3.1146013736724854, "learning_rate": 6.527085807484873e-05, "loss": 1.3779, "step": 13664 }, { "epoch": 0.8145190129932054, "grad_norm": 3.419084310531616, "learning_rate": 6.526185273518402e-05, "loss": 1.324, "step": 13666 }, { "epoch": 0.8146382167123615, "grad_norm": 2.89192795753479, "learning_rate": 6.525284684955063e-05, "loss": 1.27, "step": 13668 }, { "epoch": 0.8147574204315174, "grad_norm": 3.252784013748169, "learning_rate": 6.52438404182707e-05, "loss": 1.4154, "step": 13670 }, { "epoch": 0.8148766241506735, "grad_norm": 3.06744122505188, "learning_rate": 6.523483344166648e-05, "loss": 1.3368, "step": 13672 }, { "epoch": 0.8149958278698295, "grad_norm": 3.0552139282226562, "learning_rate": 6.522582592006011e-05, "loss": 1.3367, "step": 13674 }, { "epoch": 0.8151150315889856, "grad_norm": 3.331223487854004, "learning_rate": 6.521681785377386e-05, "loss": 1.4332, "step": 13676 }, { "epoch": 0.8152342353081417, "grad_norm": 3.5742270946502686, "learning_rate": 6.520780924312997e-05, "loss": 1.2743, "step": 13678 }, { "epoch": 0.8153534390272976, "grad_norm": 2.843946933746338, "learning_rate": 6.519880008845071e-05, "loss": 1.1857, "step": 13680 }, { "epoch": 0.8154726427464537, "grad_norm": 3.2816474437713623, "learning_rate": 6.518979039005838e-05, "loss": 1.3725, "step": 13682 }, { "epoch": 0.8155918464656098, "grad_norm": 2.910595655441284, "learning_rate": 6.518078014827527e-05, "loss": 1.3466, "step": 13684 }, { "epoch": 0.8157110501847658, "grad_norm": 6.5019731521606445, "learning_rate": 6.517176936342372e-05, "loss": 1.3863, "step": 13686 }, { "epoch": 0.8158302539039218, "grad_norm": 3.1567962169647217, "learning_rate": 6.516275803582606e-05, "loss": 1.365, "step": 13688 }, { "epoch": 0.8159494576230778, "grad_norm": 2.8735666275024414, "learning_rate": 6.515374616580466e-05, "loss": 1.2838, "step": 13690 }, { "epoch": 0.8160686613422339, "grad_norm": 2.773430585861206, "learning_rate": 6.514473375368192e-05, "loss": 1.2951, "step": 13692 }, { "epoch": 0.81618786506139, "grad_norm": 3.2710719108581543, "learning_rate": 6.513572079978025e-05, "loss": 1.2928, "step": 13694 }, { "epoch": 0.8163070687805459, "grad_norm": 3.3031580448150635, "learning_rate": 6.512670730442204e-05, "loss": 1.4427, "step": 13696 }, { "epoch": 0.816426272499702, "grad_norm": 3.279176712036133, "learning_rate": 6.511769326792976e-05, "loss": 1.2871, "step": 13698 }, { "epoch": 0.816545476218858, "grad_norm": 2.914367914199829, "learning_rate": 6.510867869062586e-05, "loss": 1.2443, "step": 13700 }, { "epoch": 0.8166646799380141, "grad_norm": 3.2169687747955322, "learning_rate": 6.509966357283285e-05, "loss": 1.4331, "step": 13702 }, { "epoch": 0.8167838836571701, "grad_norm": 3.1830358505249023, "learning_rate": 6.509064791487319e-05, "loss": 1.3591, "step": 13704 }, { "epoch": 0.8169030873763261, "grad_norm": 3.1756784915924072, "learning_rate": 6.508163171706944e-05, "loss": 1.3433, "step": 13706 }, { "epoch": 0.8170222910954822, "grad_norm": 2.875854969024658, "learning_rate": 6.507261497974411e-05, "loss": 1.2001, "step": 13708 }, { "epoch": 0.8171414948146383, "grad_norm": 3.090297222137451, "learning_rate": 6.506359770321977e-05, "loss": 1.2757, "step": 13710 }, { "epoch": 0.8172606985337942, "grad_norm": 3.1037750244140625, "learning_rate": 6.505457988781901e-05, "loss": 1.3008, "step": 13712 }, { "epoch": 0.8173799022529503, "grad_norm": 3.225349187850952, "learning_rate": 6.504556153386442e-05, "loss": 1.3201, "step": 13714 }, { "epoch": 0.8174991059721063, "grad_norm": 3.3119046688079834, "learning_rate": 6.503654264167861e-05, "loss": 1.3419, "step": 13716 }, { "epoch": 0.8176183096912624, "grad_norm": 3.1657872200012207, "learning_rate": 6.502752321158423e-05, "loss": 1.2066, "step": 13718 }, { "epoch": 0.8177375134104184, "grad_norm": 3.259472608566284, "learning_rate": 6.501850324390391e-05, "loss": 1.3814, "step": 13720 }, { "epoch": 0.8178567171295744, "grad_norm": 2.9716975688934326, "learning_rate": 6.500948273896036e-05, "loss": 1.266, "step": 13722 }, { "epoch": 0.8179759208487305, "grad_norm": 3.0982539653778076, "learning_rate": 6.500046169707625e-05, "loss": 1.3705, "step": 13724 }, { "epoch": 0.8180951245678865, "grad_norm": 3.106292963027954, "learning_rate": 6.49914401185743e-05, "loss": 1.2987, "step": 13726 }, { "epoch": 0.8182143282870425, "grad_norm": 3.2717058658599854, "learning_rate": 6.498241800377724e-05, "loss": 1.3997, "step": 13728 }, { "epoch": 0.8183335320061986, "grad_norm": 3.030456066131592, "learning_rate": 6.497339535300785e-05, "loss": 1.3776, "step": 13730 }, { "epoch": 0.8184527357253546, "grad_norm": 3.226651906967163, "learning_rate": 6.496437216658887e-05, "loss": 1.2867, "step": 13732 }, { "epoch": 0.8185719394445107, "grad_norm": 2.792961597442627, "learning_rate": 6.495534844484309e-05, "loss": 1.2278, "step": 13734 }, { "epoch": 0.8186911431636668, "grad_norm": 2.978459358215332, "learning_rate": 6.494632418809332e-05, "loss": 1.2845, "step": 13736 }, { "epoch": 0.8188103468828227, "grad_norm": 2.741821527481079, "learning_rate": 6.493729939666242e-05, "loss": 1.4368, "step": 13738 }, { "epoch": 0.8189295506019788, "grad_norm": 3.5518834590911865, "learning_rate": 6.492827407087319e-05, "loss": 1.2766, "step": 13740 }, { "epoch": 0.8190487543211348, "grad_norm": 2.906925916671753, "learning_rate": 6.491924821104851e-05, "loss": 1.2399, "step": 13742 }, { "epoch": 0.8191679580402909, "grad_norm": 3.352945327758789, "learning_rate": 6.49102218175113e-05, "loss": 1.6385, "step": 13744 }, { "epoch": 0.8192871617594469, "grad_norm": 3.217627763748169, "learning_rate": 6.490119489058443e-05, "loss": 1.4661, "step": 13746 }, { "epoch": 0.8194063654786029, "grad_norm": 2.9777939319610596, "learning_rate": 6.489216743059085e-05, "loss": 1.3339, "step": 13748 }, { "epoch": 0.819525569197759, "grad_norm": 3.237298011779785, "learning_rate": 6.488313943785347e-05, "loss": 1.3065, "step": 13750 }, { "epoch": 0.819644772916915, "grad_norm": 3.1665871143341064, "learning_rate": 6.487411091269529e-05, "loss": 1.469, "step": 13752 }, { "epoch": 0.819763976636071, "grad_norm": 3.119290351867676, "learning_rate": 6.486508185543925e-05, "loss": 1.3021, "step": 13754 }, { "epoch": 0.8198831803552271, "grad_norm": 3.147866725921631, "learning_rate": 6.485605226640837e-05, "loss": 1.22, "step": 13756 }, { "epoch": 0.8200023840743831, "grad_norm": 3.0490269660949707, "learning_rate": 6.484702214592569e-05, "loss": 1.2931, "step": 13758 }, { "epoch": 0.8201215877935392, "grad_norm": 3.048096179962158, "learning_rate": 6.48379914943142e-05, "loss": 1.3699, "step": 13760 }, { "epoch": 0.8202407915126952, "grad_norm": 3.073692798614502, "learning_rate": 6.482896031189703e-05, "loss": 1.3542, "step": 13762 }, { "epoch": 0.8203599952318512, "grad_norm": 2.931121826171875, "learning_rate": 6.481992859899718e-05, "loss": 1.1962, "step": 13764 }, { "epoch": 0.8204791989510073, "grad_norm": 3.0020313262939453, "learning_rate": 6.481089635593778e-05, "loss": 1.3091, "step": 13766 }, { "epoch": 0.8205984026701633, "grad_norm": 3.199237823486328, "learning_rate": 6.480186358304196e-05, "loss": 1.4747, "step": 13768 }, { "epoch": 0.8207176063893193, "grad_norm": 2.919480562210083, "learning_rate": 6.479283028063282e-05, "loss": 1.3466, "step": 13770 }, { "epoch": 0.8208368101084754, "grad_norm": 3.1456377506256104, "learning_rate": 6.478379644903353e-05, "loss": 1.28, "step": 13772 }, { "epoch": 0.8209560138276314, "grad_norm": 2.9344089031219482, "learning_rate": 6.477476208856726e-05, "loss": 1.2742, "step": 13774 }, { "epoch": 0.8210752175467875, "grad_norm": 3.0320112705230713, "learning_rate": 6.47657271995572e-05, "loss": 1.3482, "step": 13776 }, { "epoch": 0.8211944212659436, "grad_norm": 2.9072048664093018, "learning_rate": 6.475669178232656e-05, "loss": 1.2386, "step": 13778 }, { "epoch": 0.8213136249850995, "grad_norm": 3.317452907562256, "learning_rate": 6.474765583719858e-05, "loss": 1.3911, "step": 13780 }, { "epoch": 0.8214328287042556, "grad_norm": 2.991393566131592, "learning_rate": 6.473861936449646e-05, "loss": 1.337, "step": 13782 }, { "epoch": 0.8215520324234116, "grad_norm": 3.877208948135376, "learning_rate": 6.472958236454352e-05, "loss": 1.4658, "step": 13784 }, { "epoch": 0.8216712361425677, "grad_norm": 3.12283992767334, "learning_rate": 6.472054483766301e-05, "loss": 1.3392, "step": 13786 }, { "epoch": 0.8217904398617237, "grad_norm": 3.116511821746826, "learning_rate": 6.471150678417826e-05, "loss": 1.173, "step": 13788 }, { "epoch": 0.8219096435808797, "grad_norm": 2.8414525985717773, "learning_rate": 6.470246820441256e-05, "loss": 1.1851, "step": 13790 }, { "epoch": 0.8220288473000358, "grad_norm": 3.4663732051849365, "learning_rate": 6.469342909868928e-05, "loss": 1.4479, "step": 13792 }, { "epoch": 0.8221480510191918, "grad_norm": 2.963639497756958, "learning_rate": 6.468438946733178e-05, "loss": 1.3066, "step": 13794 }, { "epoch": 0.8222672547383478, "grad_norm": 3.0042574405670166, "learning_rate": 6.46753493106634e-05, "loss": 1.3143, "step": 13796 }, { "epoch": 0.8223864584575039, "grad_norm": 3.173774242401123, "learning_rate": 6.466630862900757e-05, "loss": 1.214, "step": 13798 }, { "epoch": 0.8225056621766599, "grad_norm": 3.0891966819763184, "learning_rate": 6.46572674226877e-05, "loss": 1.1937, "step": 13800 }, { "epoch": 0.822624865895816, "grad_norm": 3.070112705230713, "learning_rate": 6.464822569202724e-05, "loss": 1.2182, "step": 13802 }, { "epoch": 0.822744069614972, "grad_norm": 3.244399070739746, "learning_rate": 6.463918343734961e-05, "loss": 1.3463, "step": 13804 }, { "epoch": 0.822863273334128, "grad_norm": 3.2125914096832275, "learning_rate": 6.463014065897832e-05, "loss": 1.188, "step": 13806 }, { "epoch": 0.8229824770532841, "grad_norm": 3.133265495300293, "learning_rate": 6.462109735723683e-05, "loss": 1.2681, "step": 13808 }, { "epoch": 0.8231016807724401, "grad_norm": 2.851421356201172, "learning_rate": 6.461205353244865e-05, "loss": 1.2466, "step": 13810 }, { "epoch": 0.8232208844915961, "grad_norm": 3.0624003410339355, "learning_rate": 6.460300918493735e-05, "loss": 1.3471, "step": 13812 }, { "epoch": 0.8233400882107522, "grad_norm": 2.8990540504455566, "learning_rate": 6.459396431502644e-05, "loss": 1.2582, "step": 13814 }, { "epoch": 0.8234592919299082, "grad_norm": 3.3828296661376953, "learning_rate": 6.458491892303949e-05, "loss": 1.3282, "step": 13816 }, { "epoch": 0.8235784956490643, "grad_norm": 3.224325656890869, "learning_rate": 6.457587300930008e-05, "loss": 1.2083, "step": 13818 }, { "epoch": 0.8236976993682202, "grad_norm": 2.8174946308135986, "learning_rate": 6.456682657413182e-05, "loss": 1.2999, "step": 13820 }, { "epoch": 0.8238169030873763, "grad_norm": 3.21258807182312, "learning_rate": 6.455777961785833e-05, "loss": 1.2722, "step": 13822 }, { "epoch": 0.8239361068065324, "grad_norm": 3.0921990871429443, "learning_rate": 6.454873214080328e-05, "loss": 1.2457, "step": 13824 }, { "epoch": 0.8240553105256884, "grad_norm": 3.5655031204223633, "learning_rate": 6.453968414329028e-05, "loss": 1.3665, "step": 13826 }, { "epoch": 0.8241745142448444, "grad_norm": 3.4333693981170654, "learning_rate": 6.453063562564304e-05, "loss": 1.2939, "step": 13828 }, { "epoch": 0.8242937179640005, "grad_norm": 2.9629929065704346, "learning_rate": 6.452158658818524e-05, "loss": 1.2248, "step": 13830 }, { "epoch": 0.8244129216831565, "grad_norm": 3.425814151763916, "learning_rate": 6.451253703124061e-05, "loss": 1.27, "step": 13832 }, { "epoch": 0.8245321254023126, "grad_norm": 2.982295274734497, "learning_rate": 6.450348695513289e-05, "loss": 1.3488, "step": 13834 }, { "epoch": 0.8246513291214685, "grad_norm": 3.188425064086914, "learning_rate": 6.449443636018579e-05, "loss": 1.2936, "step": 13836 }, { "epoch": 0.8247705328406246, "grad_norm": 2.906506299972534, "learning_rate": 6.448538524672314e-05, "loss": 1.4009, "step": 13838 }, { "epoch": 0.8248897365597807, "grad_norm": 2.7937185764312744, "learning_rate": 6.447633361506867e-05, "loss": 1.2086, "step": 13840 }, { "epoch": 0.8250089402789367, "grad_norm": 3.0669634342193604, "learning_rate": 6.446728146554623e-05, "loss": 1.2007, "step": 13842 }, { "epoch": 0.8251281439980928, "grad_norm": 3.4265177249908447, "learning_rate": 6.445822879847963e-05, "loss": 1.3562, "step": 13844 }, { "epoch": 0.8252473477172487, "grad_norm": 2.9959867000579834, "learning_rate": 6.444917561419272e-05, "loss": 1.2655, "step": 13846 }, { "epoch": 0.8253665514364048, "grad_norm": 3.2858834266662598, "learning_rate": 6.444012191300936e-05, "loss": 1.2961, "step": 13848 }, { "epoch": 0.8254857551555609, "grad_norm": 3.037508249282837, "learning_rate": 6.443106769525342e-05, "loss": 1.2772, "step": 13850 }, { "epoch": 0.8256049588747169, "grad_norm": 3.466071844100952, "learning_rate": 6.442201296124884e-05, "loss": 1.369, "step": 13852 }, { "epoch": 0.8257241625938729, "grad_norm": 2.696462869644165, "learning_rate": 6.441295771131949e-05, "loss": 1.3291, "step": 13854 }, { "epoch": 0.825843366313029, "grad_norm": 2.9421026706695557, "learning_rate": 6.440390194578934e-05, "loss": 1.2374, "step": 13856 }, { "epoch": 0.825962570032185, "grad_norm": 3.247556447982788, "learning_rate": 6.439484566498233e-05, "loss": 1.3665, "step": 13858 }, { "epoch": 0.8260817737513411, "grad_norm": 3.0057694911956787, "learning_rate": 6.438578886922244e-05, "loss": 1.3427, "step": 13860 }, { "epoch": 0.826200977470497, "grad_norm": 2.755557060241699, "learning_rate": 6.437673155883366e-05, "loss": 1.1828, "step": 13862 }, { "epoch": 0.8263201811896531, "grad_norm": 3.023024559020996, "learning_rate": 6.436767373414001e-05, "loss": 1.1461, "step": 13864 }, { "epoch": 0.8264393849088092, "grad_norm": 3.4322874546051025, "learning_rate": 6.43586153954655e-05, "loss": 1.4107, "step": 13866 }, { "epoch": 0.8265585886279652, "grad_norm": 2.8962597846984863, "learning_rate": 6.43495565431342e-05, "loss": 1.2681, "step": 13868 }, { "epoch": 0.8266777923471212, "grad_norm": 3.356720447540283, "learning_rate": 6.434049717747017e-05, "loss": 1.3162, "step": 13870 }, { "epoch": 0.8267969960662773, "grad_norm": 2.820786476135254, "learning_rate": 6.433143729879748e-05, "loss": 1.3736, "step": 13872 }, { "epoch": 0.8269161997854333, "grad_norm": 3.1370797157287598, "learning_rate": 6.432237690744023e-05, "loss": 1.3817, "step": 13874 }, { "epoch": 0.8270354035045894, "grad_norm": 2.9720494747161865, "learning_rate": 6.431331600372258e-05, "loss": 1.3139, "step": 13876 }, { "epoch": 0.8271546072237453, "grad_norm": 2.827810287475586, "learning_rate": 6.430425458796864e-05, "loss": 1.1342, "step": 13878 }, { "epoch": 0.8272738109429014, "grad_norm": 2.961381673812866, "learning_rate": 6.429519266050256e-05, "loss": 1.1747, "step": 13880 }, { "epoch": 0.8273930146620575, "grad_norm": 3.2094216346740723, "learning_rate": 6.428613022164854e-05, "loss": 1.2187, "step": 13882 }, { "epoch": 0.8275122183812135, "grad_norm": 3.1677603721618652, "learning_rate": 6.427706727173077e-05, "loss": 1.3018, "step": 13884 }, { "epoch": 0.8276314221003696, "grad_norm": 3.0042617321014404, "learning_rate": 6.426800381107343e-05, "loss": 1.3668, "step": 13886 }, { "epoch": 0.8277506258195255, "grad_norm": 3.016005754470825, "learning_rate": 6.42589398400008e-05, "loss": 1.3794, "step": 13888 }, { "epoch": 0.8278698295386816, "grad_norm": 3.0911357402801514, "learning_rate": 6.424987535883708e-05, "loss": 1.2789, "step": 13890 }, { "epoch": 0.8279890332578377, "grad_norm": 3.085688829421997, "learning_rate": 6.424081036790659e-05, "loss": 1.1392, "step": 13892 }, { "epoch": 0.8281082369769937, "grad_norm": 2.900104522705078, "learning_rate": 6.423174486753355e-05, "loss": 1.4783, "step": 13894 }, { "epoch": 0.8282274406961497, "grad_norm": 3.2360076904296875, "learning_rate": 6.422267885804235e-05, "loss": 1.3332, "step": 13896 }, { "epoch": 0.8283466444153058, "grad_norm": 3.3924574851989746, "learning_rate": 6.421361233975726e-05, "loss": 1.3305, "step": 13898 }, { "epoch": 0.8284658481344618, "grad_norm": 2.9824037551879883, "learning_rate": 6.42045453130026e-05, "loss": 1.2258, "step": 13900 }, { "epoch": 0.8285850518536179, "grad_norm": 3.047442674636841, "learning_rate": 6.419547777810276e-05, "loss": 1.3047, "step": 13902 }, { "epoch": 0.8287042555727738, "grad_norm": 3.121962070465088, "learning_rate": 6.418640973538211e-05, "loss": 1.1804, "step": 13904 }, { "epoch": 0.8288234592919299, "grad_norm": 3.382828950881958, "learning_rate": 6.417734118516505e-05, "loss": 1.3935, "step": 13906 }, { "epoch": 0.828942663011086, "grad_norm": 3.1392557621002197, "learning_rate": 6.416827212777599e-05, "loss": 1.2632, "step": 13908 }, { "epoch": 0.829061866730242, "grad_norm": 2.840834140777588, "learning_rate": 6.415920256353935e-05, "loss": 1.0949, "step": 13910 }, { "epoch": 0.829181070449398, "grad_norm": 3.095557689666748, "learning_rate": 6.41501324927796e-05, "loss": 1.4391, "step": 13912 }, { "epoch": 0.829300274168554, "grad_norm": 3.1813547611236572, "learning_rate": 6.414106191582119e-05, "loss": 1.2507, "step": 13914 }, { "epoch": 0.8294194778877101, "grad_norm": 2.9635775089263916, "learning_rate": 6.413199083298861e-05, "loss": 1.235, "step": 13916 }, { "epoch": 0.8295386816068662, "grad_norm": 3.2503342628479004, "learning_rate": 6.412291924460639e-05, "loss": 1.2693, "step": 13918 }, { "epoch": 0.8296578853260221, "grad_norm": 3.541882276535034, "learning_rate": 6.4113847150999e-05, "loss": 1.4202, "step": 13920 }, { "epoch": 0.8297770890451782, "grad_norm": 3.154932975769043, "learning_rate": 6.410477455249101e-05, "loss": 1.2915, "step": 13922 }, { "epoch": 0.8298962927643343, "grad_norm": 3.257984161376953, "learning_rate": 6.409570144940698e-05, "loss": 1.3718, "step": 13924 }, { "epoch": 0.8300154964834903, "grad_norm": 3.0016515254974365, "learning_rate": 6.408662784207149e-05, "loss": 1.2562, "step": 13926 }, { "epoch": 0.8301347002026463, "grad_norm": 2.9897594451904297, "learning_rate": 6.407755373080912e-05, "loss": 1.3443, "step": 13928 }, { "epoch": 0.8302539039218023, "grad_norm": 2.76971435546875, "learning_rate": 6.406847911594449e-05, "loss": 1.2632, "step": 13930 }, { "epoch": 0.8303731076409584, "grad_norm": 2.8729825019836426, "learning_rate": 6.405940399780222e-05, "loss": 1.2852, "step": 13932 }, { "epoch": 0.8304923113601145, "grad_norm": 3.1462161540985107, "learning_rate": 6.405032837670697e-05, "loss": 1.2487, "step": 13934 }, { "epoch": 0.8306115150792704, "grad_norm": 3.025230646133423, "learning_rate": 6.40412522529834e-05, "loss": 1.2143, "step": 13936 }, { "epoch": 0.8307307187984265, "grad_norm": 2.988044500350952, "learning_rate": 6.403217562695619e-05, "loss": 1.1717, "step": 13938 }, { "epoch": 0.8308499225175825, "grad_norm": 3.0952727794647217, "learning_rate": 6.402309849895004e-05, "loss": 1.179, "step": 13940 }, { "epoch": 0.8309691262367386, "grad_norm": 2.4958105087280273, "learning_rate": 6.401402086928971e-05, "loss": 1.2128, "step": 13942 }, { "epoch": 0.8310883299558947, "grad_norm": 2.650355339050293, "learning_rate": 6.400494273829987e-05, "loss": 1.1755, "step": 13944 }, { "epoch": 0.8312075336750506, "grad_norm": 3.383704662322998, "learning_rate": 6.399586410630532e-05, "loss": 1.2941, "step": 13946 }, { "epoch": 0.8313267373942067, "grad_norm": 2.816953659057617, "learning_rate": 6.398678497363085e-05, "loss": 1.2499, "step": 13948 }, { "epoch": 0.8314459411133628, "grad_norm": 3.3804538249969482, "learning_rate": 6.39777053406012e-05, "loss": 1.3324, "step": 13950 }, { "epoch": 0.8315651448325188, "grad_norm": 3.8238525390625, "learning_rate": 6.396862520754118e-05, "loss": 1.2689, "step": 13952 }, { "epoch": 0.8316843485516748, "grad_norm": 3.1947851181030273, "learning_rate": 6.395954457477568e-05, "loss": 1.2712, "step": 13954 }, { "epoch": 0.8318035522708308, "grad_norm": 2.8301172256469727, "learning_rate": 6.395046344262951e-05, "loss": 1.2762, "step": 13956 }, { "epoch": 0.8319227559899869, "grad_norm": 3.4151928424835205, "learning_rate": 6.394138181142753e-05, "loss": 1.3856, "step": 13958 }, { "epoch": 0.832041959709143, "grad_norm": 3.1892645359039307, "learning_rate": 6.393229968149462e-05, "loss": 1.3927, "step": 13960 }, { "epoch": 0.8321611634282989, "grad_norm": 3.342526435852051, "learning_rate": 6.392321705315569e-05, "loss": 1.2649, "step": 13962 }, { "epoch": 0.832280367147455, "grad_norm": 2.731693983078003, "learning_rate": 6.391413392673563e-05, "loss": 1.2467, "step": 13964 }, { "epoch": 0.8323995708666111, "grad_norm": 3.055880546569824, "learning_rate": 6.390505030255939e-05, "loss": 1.3245, "step": 13966 }, { "epoch": 0.8325187745857671, "grad_norm": 3.371291160583496, "learning_rate": 6.389596618095193e-05, "loss": 1.2632, "step": 13968 }, { "epoch": 0.8326379783049231, "grad_norm": 3.3135294914245605, "learning_rate": 6.388688156223823e-05, "loss": 1.3593, "step": 13970 }, { "epoch": 0.8327571820240791, "grad_norm": 2.920193672180176, "learning_rate": 6.387779644674326e-05, "loss": 1.1883, "step": 13972 }, { "epoch": 0.8328763857432352, "grad_norm": 3.0731797218322754, "learning_rate": 6.386871083479202e-05, "loss": 1.2714, "step": 13974 }, { "epoch": 0.8329955894623913, "grad_norm": 3.139577865600586, "learning_rate": 6.385962472670953e-05, "loss": 1.1859, "step": 13976 }, { "epoch": 0.8331147931815472, "grad_norm": 2.959803581237793, "learning_rate": 6.385053812282086e-05, "loss": 1.2264, "step": 13978 }, { "epoch": 0.8332339969007033, "grad_norm": 3.301942825317383, "learning_rate": 6.384145102345105e-05, "loss": 1.372, "step": 13980 }, { "epoch": 0.8333532006198593, "grad_norm": 3.08595871925354, "learning_rate": 6.383236342892517e-05, "loss": 1.3609, "step": 13982 }, { "epoch": 0.8334724043390154, "grad_norm": 3.2151379585266113, "learning_rate": 6.382327533956834e-05, "loss": 1.4546, "step": 13984 }, { "epoch": 0.8335916080581715, "grad_norm": 3.382795572280884, "learning_rate": 6.381418675570564e-05, "loss": 1.355, "step": 13986 }, { "epoch": 0.8337108117773274, "grad_norm": 3.174787998199463, "learning_rate": 6.380509767766223e-05, "loss": 1.3789, "step": 13988 }, { "epoch": 0.8338300154964835, "grad_norm": 3.220409631729126, "learning_rate": 6.379600810576322e-05, "loss": 1.4677, "step": 13990 }, { "epoch": 0.8339492192156396, "grad_norm": 3.3016722202301025, "learning_rate": 6.378691804033383e-05, "loss": 1.4671, "step": 13992 }, { "epoch": 0.8340684229347956, "grad_norm": 3.105311632156372, "learning_rate": 6.377782748169919e-05, "loss": 1.1393, "step": 13994 }, { "epoch": 0.8341876266539516, "grad_norm": 3.112492084503174, "learning_rate": 6.376873643018452e-05, "loss": 1.301, "step": 13996 }, { "epoch": 0.8343068303731076, "grad_norm": 2.886678695678711, "learning_rate": 6.375964488611503e-05, "loss": 1.4557, "step": 13998 }, { "epoch": 0.8344260340922637, "grad_norm": 3.458178758621216, "learning_rate": 6.375055284981598e-05, "loss": 1.4375, "step": 14000 }, { "epoch": 0.8345452378114198, "grad_norm": 2.738217353820801, "learning_rate": 6.374146032161261e-05, "loss": 1.1299, "step": 14002 }, { "epoch": 0.8346644415305757, "grad_norm": 3.1564464569091797, "learning_rate": 6.373236730183019e-05, "loss": 1.355, "step": 14004 }, { "epoch": 0.8347836452497318, "grad_norm": 3.093621253967285, "learning_rate": 6.372327379079399e-05, "loss": 1.172, "step": 14006 }, { "epoch": 0.8349028489688878, "grad_norm": 3.131972551345825, "learning_rate": 6.371417978882934e-05, "loss": 1.3295, "step": 14008 }, { "epoch": 0.8350220526880439, "grad_norm": 3.17535662651062, "learning_rate": 6.370508529626156e-05, "loss": 1.3906, "step": 14010 }, { "epoch": 0.8351412564071999, "grad_norm": 2.9105849266052246, "learning_rate": 6.369599031341598e-05, "loss": 1.3026, "step": 14012 }, { "epoch": 0.8352604601263559, "grad_norm": 3.1975555419921875, "learning_rate": 6.368689484061797e-05, "loss": 1.2219, "step": 14014 }, { "epoch": 0.835379663845512, "grad_norm": 3.1202199459075928, "learning_rate": 6.36777988781929e-05, "loss": 1.2341, "step": 14016 }, { "epoch": 0.8354988675646681, "grad_norm": 2.9445886611938477, "learning_rate": 6.366870242646618e-05, "loss": 1.2414, "step": 14018 }, { "epoch": 0.835618071283824, "grad_norm": 3.0964462757110596, "learning_rate": 6.36596054857632e-05, "loss": 1.4365, "step": 14020 }, { "epoch": 0.8357372750029801, "grad_norm": 2.9898815155029297, "learning_rate": 6.365050805640939e-05, "loss": 1.3435, "step": 14022 }, { "epoch": 0.8358564787221361, "grad_norm": 3.4276182651519775, "learning_rate": 6.36414101387302e-05, "loss": 1.4733, "step": 14024 }, { "epoch": 0.8359756824412922, "grad_norm": 3.1049070358276367, "learning_rate": 6.36323117330511e-05, "loss": 1.3941, "step": 14026 }, { "epoch": 0.8360948861604482, "grad_norm": 3.0973286628723145, "learning_rate": 6.362321283969756e-05, "loss": 1.2786, "step": 14028 }, { "epoch": 0.8362140898796042, "grad_norm": 2.9721145629882812, "learning_rate": 6.361411345899509e-05, "loss": 1.2644, "step": 14030 }, { "epoch": 0.8363332935987603, "grad_norm": 2.8804478645324707, "learning_rate": 6.360501359126921e-05, "loss": 1.3184, "step": 14032 }, { "epoch": 0.8364524973179163, "grad_norm": 2.923628568649292, "learning_rate": 6.359591323684544e-05, "loss": 1.3702, "step": 14034 }, { "epoch": 0.8365717010370723, "grad_norm": 2.6668553352355957, "learning_rate": 6.358681239604934e-05, "loss": 1.3338, "step": 14036 }, { "epoch": 0.8366909047562284, "grad_norm": 3.373401641845703, "learning_rate": 6.357771106920646e-05, "loss": 1.3972, "step": 14038 }, { "epoch": 0.8368101084753844, "grad_norm": 3.3508458137512207, "learning_rate": 6.356860925664241e-05, "loss": 1.2679, "step": 14040 }, { "epoch": 0.8369293121945405, "grad_norm": 3.327754497528076, "learning_rate": 6.355950695868278e-05, "loss": 1.3129, "step": 14042 }, { "epoch": 0.8370485159136966, "grad_norm": 2.960214138031006, "learning_rate": 6.35504041756532e-05, "loss": 1.381, "step": 14044 }, { "epoch": 0.8371677196328525, "grad_norm": 2.9191277027130127, "learning_rate": 6.354130090787928e-05, "loss": 1.2144, "step": 14046 }, { "epoch": 0.8372869233520086, "grad_norm": 3.077860116958618, "learning_rate": 6.353219715568673e-05, "loss": 1.3094, "step": 14048 }, { "epoch": 0.8374061270711646, "grad_norm": 2.982090950012207, "learning_rate": 6.352309291940116e-05, "loss": 1.3269, "step": 14050 }, { "epoch": 0.8375253307903207, "grad_norm": 2.8766629695892334, "learning_rate": 6.351398819934828e-05, "loss": 1.2596, "step": 14052 }, { "epoch": 0.8376445345094767, "grad_norm": 3.1874752044677734, "learning_rate": 6.350488299585384e-05, "loss": 1.1979, "step": 14054 }, { "epoch": 0.8377637382286327, "grad_norm": 3.5894923210144043, "learning_rate": 6.34957773092435e-05, "loss": 1.4122, "step": 14056 }, { "epoch": 0.8378829419477888, "grad_norm": 3.1663243770599365, "learning_rate": 6.348667113984304e-05, "loss": 1.2176, "step": 14058 }, { "epoch": 0.8380021456669448, "grad_norm": 3.3801262378692627, "learning_rate": 6.347756448797819e-05, "loss": 1.3964, "step": 14060 }, { "epoch": 0.8381213493861008, "grad_norm": 3.198756694793701, "learning_rate": 6.346845735397477e-05, "loss": 1.2458, "step": 14062 }, { "epoch": 0.8382405531052569, "grad_norm": 2.8368403911590576, "learning_rate": 6.345934973815856e-05, "loss": 1.3628, "step": 14064 }, { "epoch": 0.8383597568244129, "grad_norm": 3.1974081993103027, "learning_rate": 6.345024164085533e-05, "loss": 1.3018, "step": 14066 }, { "epoch": 0.838478960543569, "grad_norm": 3.0176827907562256, "learning_rate": 6.344113306239094e-05, "loss": 1.3131, "step": 14068 }, { "epoch": 0.838598164262725, "grad_norm": 3.205190896987915, "learning_rate": 6.343202400309125e-05, "loss": 1.2871, "step": 14070 }, { "epoch": 0.838717367981881, "grad_norm": 3.224518060684204, "learning_rate": 6.342291446328208e-05, "loss": 1.3169, "step": 14072 }, { "epoch": 0.8388365717010371, "grad_norm": 3.52123761177063, "learning_rate": 6.341380444328935e-05, "loss": 1.4183, "step": 14074 }, { "epoch": 0.8389557754201931, "grad_norm": 3.2277848720550537, "learning_rate": 6.340469394343894e-05, "loss": 1.2257, "step": 14076 }, { "epoch": 0.8390749791393491, "grad_norm": 3.381728410720825, "learning_rate": 6.339558296405676e-05, "loss": 1.2521, "step": 14078 }, { "epoch": 0.8391941828585052, "grad_norm": 3.3225157260894775, "learning_rate": 6.338647150546875e-05, "loss": 1.2986, "step": 14080 }, { "epoch": 0.8393133865776612, "grad_norm": 3.2173092365264893, "learning_rate": 6.337735956800085e-05, "loss": 1.3409, "step": 14082 }, { "epoch": 0.8394325902968173, "grad_norm": 3.497968912124634, "learning_rate": 6.336824715197904e-05, "loss": 1.5232, "step": 14084 }, { "epoch": 0.8395517940159734, "grad_norm": 3.3086421489715576, "learning_rate": 6.335913425772926e-05, "loss": 1.3045, "step": 14086 }, { "epoch": 0.8396709977351293, "grad_norm": 3.2022268772125244, "learning_rate": 6.335002088557755e-05, "loss": 1.5121, "step": 14088 }, { "epoch": 0.8397902014542854, "grad_norm": 2.742276906967163, "learning_rate": 6.334090703584994e-05, "loss": 1.3076, "step": 14090 }, { "epoch": 0.8399094051734414, "grad_norm": 3.2700347900390625, "learning_rate": 6.33317927088724e-05, "loss": 1.329, "step": 14092 }, { "epoch": 0.8400286088925975, "grad_norm": 3.263068914413452, "learning_rate": 6.332267790497106e-05, "loss": 1.2108, "step": 14094 }, { "epoch": 0.8401478126117535, "grad_norm": 3.402377128601074, "learning_rate": 6.331356262447195e-05, "loss": 1.1955, "step": 14096 }, { "epoch": 0.8402670163309095, "grad_norm": 3.332488536834717, "learning_rate": 6.330444686770112e-05, "loss": 1.4397, "step": 14098 }, { "epoch": 0.8403862200500656, "grad_norm": 3.2306299209594727, "learning_rate": 6.329533063498472e-05, "loss": 1.338, "step": 14100 }, { "epoch": 0.8405054237692215, "grad_norm": 3.1019864082336426, "learning_rate": 6.328621392664885e-05, "loss": 1.3206, "step": 14102 }, { "epoch": 0.8406246274883776, "grad_norm": 3.0979294776916504, "learning_rate": 6.327709674301965e-05, "loss": 1.2455, "step": 14104 }, { "epoch": 0.8407438312075337, "grad_norm": 3.1867988109588623, "learning_rate": 6.326797908442328e-05, "loss": 1.2857, "step": 14106 }, { "epoch": 0.8408630349266897, "grad_norm": 3.6790337562561035, "learning_rate": 6.325886095118591e-05, "loss": 1.3549, "step": 14108 }, { "epoch": 0.8409822386458458, "grad_norm": 2.922772169113159, "learning_rate": 6.32497423436337e-05, "loss": 1.3461, "step": 14110 }, { "epoch": 0.8411014423650018, "grad_norm": 3.1449780464172363, "learning_rate": 6.324062326209288e-05, "loss": 1.4023, "step": 14112 }, { "epoch": 0.8412206460841578, "grad_norm": 3.3892805576324463, "learning_rate": 6.323150370688967e-05, "loss": 1.4006, "step": 14114 }, { "epoch": 0.8413398498033139, "grad_norm": 3.091982126235962, "learning_rate": 6.322238367835029e-05, "loss": 1.2861, "step": 14116 }, { "epoch": 0.8414590535224699, "grad_norm": 2.8907454013824463, "learning_rate": 6.321326317680101e-05, "loss": 1.1511, "step": 14118 }, { "epoch": 0.8415782572416259, "grad_norm": 3.1768863201141357, "learning_rate": 6.32041422025681e-05, "loss": 1.1122, "step": 14120 }, { "epoch": 0.841697460960782, "grad_norm": 3.120384693145752, "learning_rate": 6.319502075597784e-05, "loss": 1.178, "step": 14122 }, { "epoch": 0.841816664679938, "grad_norm": 2.8001480102539062, "learning_rate": 6.318589883735655e-05, "loss": 1.3131, "step": 14124 }, { "epoch": 0.8419358683990941, "grad_norm": 3.281297206878662, "learning_rate": 6.317677644703054e-05, "loss": 1.317, "step": 14126 }, { "epoch": 0.84205507211825, "grad_norm": 3.322164535522461, "learning_rate": 6.316765358532615e-05, "loss": 1.2816, "step": 14128 }, { "epoch": 0.8421742758374061, "grad_norm": 3.1118035316467285, "learning_rate": 6.315853025256974e-05, "loss": 1.2072, "step": 14130 }, { "epoch": 0.8422934795565622, "grad_norm": 3.178877353668213, "learning_rate": 6.314940644908767e-05, "loss": 1.3189, "step": 14132 }, { "epoch": 0.8424126832757182, "grad_norm": 3.2339699268341064, "learning_rate": 6.314028217520637e-05, "loss": 1.3449, "step": 14134 }, { "epoch": 0.8425318869948742, "grad_norm": 3.2155611515045166, "learning_rate": 6.313115743125219e-05, "loss": 1.3192, "step": 14136 }, { "epoch": 0.8426510907140303, "grad_norm": 3.3409478664398193, "learning_rate": 6.31220322175516e-05, "loss": 1.377, "step": 14138 }, { "epoch": 0.8427702944331863, "grad_norm": 2.9698846340179443, "learning_rate": 6.311290653443102e-05, "loss": 1.384, "step": 14140 }, { "epoch": 0.8428894981523424, "grad_norm": 3.0806524753570557, "learning_rate": 6.310378038221688e-05, "loss": 1.2972, "step": 14142 }, { "epoch": 0.8430087018714983, "grad_norm": 3.2505924701690674, "learning_rate": 6.309465376123572e-05, "loss": 1.2824, "step": 14144 }, { "epoch": 0.8431279055906544, "grad_norm": 3.1494064331054688, "learning_rate": 6.308552667181397e-05, "loss": 1.2277, "step": 14146 }, { "epoch": 0.8432471093098105, "grad_norm": 3.06756329536438, "learning_rate": 6.307639911427816e-05, "loss": 1.2349, "step": 14148 }, { "epoch": 0.8433663130289665, "grad_norm": 3.226325511932373, "learning_rate": 6.30672710889548e-05, "loss": 1.4493, "step": 14150 }, { "epoch": 0.8434855167481226, "grad_norm": 2.9649903774261475, "learning_rate": 6.305814259617047e-05, "loss": 1.3092, "step": 14152 }, { "epoch": 0.8436047204672785, "grad_norm": 3.0367891788482666, "learning_rate": 6.30490136362517e-05, "loss": 1.3074, "step": 14154 }, { "epoch": 0.8437239241864346, "grad_norm": 3.230459451675415, "learning_rate": 6.303988420952506e-05, "loss": 1.2987, "step": 14156 }, { "epoch": 0.8438431279055907, "grad_norm": 3.0627856254577637, "learning_rate": 6.303075431631714e-05, "loss": 1.4158, "step": 14158 }, { "epoch": 0.8439623316247467, "grad_norm": 3.171194314956665, "learning_rate": 6.302162395695455e-05, "loss": 1.2758, "step": 14160 }, { "epoch": 0.8440815353439027, "grad_norm": 2.8828446865081787, "learning_rate": 6.301249313176392e-05, "loss": 1.1822, "step": 14162 }, { "epoch": 0.8442007390630588, "grad_norm": 2.948823928833008, "learning_rate": 6.300336184107191e-05, "loss": 1.2095, "step": 14164 }, { "epoch": 0.8443199427822148, "grad_norm": 2.742722511291504, "learning_rate": 6.299423008520515e-05, "loss": 1.2158, "step": 14166 }, { "epoch": 0.8444391465013709, "grad_norm": 3.500776529312134, "learning_rate": 6.29850978644903e-05, "loss": 1.3874, "step": 14168 }, { "epoch": 0.8445583502205268, "grad_norm": 3.116925001144409, "learning_rate": 6.297596517925408e-05, "loss": 1.2689, "step": 14170 }, { "epoch": 0.8446775539396829, "grad_norm": 3.1852400302886963, "learning_rate": 6.296683202982319e-05, "loss": 1.2196, "step": 14172 }, { "epoch": 0.844796757658839, "grad_norm": 3.0430781841278076, "learning_rate": 6.295769841652436e-05, "loss": 1.3182, "step": 14174 }, { "epoch": 0.844915961377995, "grad_norm": 3.3189609050750732, "learning_rate": 6.294856433968431e-05, "loss": 1.2916, "step": 14176 }, { "epoch": 0.845035165097151, "grad_norm": 3.149883270263672, "learning_rate": 6.293942979962982e-05, "loss": 1.4635, "step": 14178 }, { "epoch": 0.8451543688163071, "grad_norm": 3.142831325531006, "learning_rate": 6.293029479668767e-05, "loss": 1.3592, "step": 14180 }, { "epoch": 0.8452735725354631, "grad_norm": 3.3213937282562256, "learning_rate": 6.29211593311846e-05, "loss": 1.4201, "step": 14182 }, { "epoch": 0.8453927762546192, "grad_norm": 3.070859670639038, "learning_rate": 6.291202340344748e-05, "loss": 1.3209, "step": 14184 }, { "epoch": 0.8455119799737751, "grad_norm": 3.292224645614624, "learning_rate": 6.29028870138031e-05, "loss": 1.5854, "step": 14186 }, { "epoch": 0.8456311836929312, "grad_norm": 3.1120669841766357, "learning_rate": 6.289375016257828e-05, "loss": 1.2485, "step": 14188 }, { "epoch": 0.8457503874120873, "grad_norm": 3.5118510723114014, "learning_rate": 6.288461285009992e-05, "loss": 1.3983, "step": 14190 }, { "epoch": 0.8458695911312433, "grad_norm": 2.942084550857544, "learning_rate": 6.287547507669489e-05, "loss": 1.3822, "step": 14192 }, { "epoch": 0.8459887948503994, "grad_norm": 3.3387351036071777, "learning_rate": 6.286633684269003e-05, "loss": 1.4649, "step": 14194 }, { "epoch": 0.8461079985695553, "grad_norm": 2.9656901359558105, "learning_rate": 6.28571981484123e-05, "loss": 1.3869, "step": 14196 }, { "epoch": 0.8462272022887114, "grad_norm": 3.2256689071655273, "learning_rate": 6.284805899418861e-05, "loss": 1.3642, "step": 14198 }, { "epoch": 0.8463464060078675, "grad_norm": 2.803722381591797, "learning_rate": 6.283891938034587e-05, "loss": 1.3193, "step": 14200 }, { "epoch": 0.8464656097270234, "grad_norm": 2.8165814876556396, "learning_rate": 6.282977930721107e-05, "loss": 1.3269, "step": 14202 }, { "epoch": 0.8465848134461795, "grad_norm": 2.573960542678833, "learning_rate": 6.282063877511116e-05, "loss": 1.2999, "step": 14204 }, { "epoch": 0.8467040171653356, "grad_norm": 3.059307098388672, "learning_rate": 6.281149778437313e-05, "loss": 1.287, "step": 14206 }, { "epoch": 0.8468232208844916, "grad_norm": 3.453195333480835, "learning_rate": 6.280235633532401e-05, "loss": 1.2961, "step": 14208 }, { "epoch": 0.8469424246036477, "grad_norm": 3.126768112182617, "learning_rate": 6.279321442829078e-05, "loss": 1.2461, "step": 14210 }, { "epoch": 0.8470616283228036, "grad_norm": 2.7832679748535156, "learning_rate": 6.278407206360052e-05, "loss": 1.3187, "step": 14212 }, { "epoch": 0.8471808320419597, "grad_norm": 3.0999417304992676, "learning_rate": 6.277492924158025e-05, "loss": 1.408, "step": 14214 }, { "epoch": 0.8473000357611158, "grad_norm": 2.9742422103881836, "learning_rate": 6.276578596255704e-05, "loss": 1.4118, "step": 14216 }, { "epoch": 0.8474192394802718, "grad_norm": 3.4807381629943848, "learning_rate": 6.275664222685799e-05, "loss": 1.3104, "step": 14218 }, { "epoch": 0.8475384431994278, "grad_norm": 3.361999750137329, "learning_rate": 6.274749803481022e-05, "loss": 1.3394, "step": 14220 }, { "epoch": 0.8476576469185838, "grad_norm": 3.360304355621338, "learning_rate": 6.273835338674083e-05, "loss": 1.5804, "step": 14222 }, { "epoch": 0.8477768506377399, "grad_norm": 2.9771649837493896, "learning_rate": 6.272920828297695e-05, "loss": 1.2345, "step": 14224 }, { "epoch": 0.847896054356896, "grad_norm": 3.0909922122955322, "learning_rate": 6.272006272384573e-05, "loss": 1.2273, "step": 14226 }, { "epoch": 0.8480152580760519, "grad_norm": 3.339913845062256, "learning_rate": 6.271091670967436e-05, "loss": 1.1858, "step": 14228 }, { "epoch": 0.848134461795208, "grad_norm": 3.089726686477661, "learning_rate": 6.270177024079001e-05, "loss": 1.3296, "step": 14230 }, { "epoch": 0.8482536655143641, "grad_norm": 2.780428886413574, "learning_rate": 6.269262331751985e-05, "loss": 1.2235, "step": 14232 }, { "epoch": 0.8483728692335201, "grad_norm": 3.206249237060547, "learning_rate": 6.268347594019114e-05, "loss": 1.2543, "step": 14234 }, { "epoch": 0.8484920729526761, "grad_norm": 3.011131525039673, "learning_rate": 6.267432810913112e-05, "loss": 1.3914, "step": 14236 }, { "epoch": 0.8486112766718321, "grad_norm": 3.0325284004211426, "learning_rate": 6.266517982466698e-05, "loss": 1.3811, "step": 14238 }, { "epoch": 0.8487304803909882, "grad_norm": 2.93255615234375, "learning_rate": 6.265603108712606e-05, "loss": 1.2954, "step": 14240 }, { "epoch": 0.8488496841101443, "grad_norm": 2.9219155311584473, "learning_rate": 6.26468818968356e-05, "loss": 1.3187, "step": 14242 }, { "epoch": 0.8489688878293002, "grad_norm": 3.084414005279541, "learning_rate": 6.26377322541229e-05, "loss": 1.4516, "step": 14244 }, { "epoch": 0.8490880915484563, "grad_norm": 3.0479443073272705, "learning_rate": 6.262858215931527e-05, "loss": 1.4187, "step": 14246 }, { "epoch": 0.8492072952676123, "grad_norm": 2.9509475231170654, "learning_rate": 6.261943161274006e-05, "loss": 1.1408, "step": 14248 }, { "epoch": 0.8493264989867684, "grad_norm": 2.8620147705078125, "learning_rate": 6.261028061472458e-05, "loss": 1.2819, "step": 14250 }, { "epoch": 0.8494457027059245, "grad_norm": 3.0610833168029785, "learning_rate": 6.260112916559623e-05, "loss": 1.5307, "step": 14252 }, { "epoch": 0.8495649064250804, "grad_norm": 3.315896987915039, "learning_rate": 6.25919772656824e-05, "loss": 1.2439, "step": 14254 }, { "epoch": 0.8496841101442365, "grad_norm": 3.1746747493743896, "learning_rate": 6.258282491531044e-05, "loss": 1.2873, "step": 14256 }, { "epoch": 0.8498033138633926, "grad_norm": 3.3031535148620605, "learning_rate": 6.257367211480778e-05, "loss": 1.4306, "step": 14258 }, { "epoch": 0.8499225175825486, "grad_norm": 3.052018165588379, "learning_rate": 6.256451886450185e-05, "loss": 1.343, "step": 14260 }, { "epoch": 0.8500417213017046, "grad_norm": 3.1143178939819336, "learning_rate": 6.255536516472009e-05, "loss": 1.4158, "step": 14262 }, { "epoch": 0.8501609250208606, "grad_norm": 2.7988970279693604, "learning_rate": 6.254621101578996e-05, "loss": 1.2834, "step": 14264 }, { "epoch": 0.8502801287400167, "grad_norm": 3.1742002964019775, "learning_rate": 6.253705641803893e-05, "loss": 1.2789, "step": 14266 }, { "epoch": 0.8503993324591728, "grad_norm": 3.031446933746338, "learning_rate": 6.252790137179451e-05, "loss": 1.252, "step": 14268 }, { "epoch": 0.8505185361783287, "grad_norm": 3.298762083053589, "learning_rate": 6.251874587738418e-05, "loss": 1.2872, "step": 14270 }, { "epoch": 0.8506377398974848, "grad_norm": 3.172081470489502, "learning_rate": 6.250958993513549e-05, "loss": 1.2841, "step": 14272 }, { "epoch": 0.8507569436166409, "grad_norm": 2.867478609085083, "learning_rate": 6.250043354537597e-05, "loss": 1.2651, "step": 14274 }, { "epoch": 0.8508761473357969, "grad_norm": 3.0596745014190674, "learning_rate": 6.249127670843315e-05, "loss": 1.2443, "step": 14276 }, { "epoch": 0.8509953510549529, "grad_norm": 3.1140236854553223, "learning_rate": 6.248211942463464e-05, "loss": 1.2636, "step": 14278 }, { "epoch": 0.8511145547741089, "grad_norm": 3.147535562515259, "learning_rate": 6.247296169430802e-05, "loss": 1.3722, "step": 14280 }, { "epoch": 0.851233758493265, "grad_norm": 3.0535337924957275, "learning_rate": 6.246380351778088e-05, "loss": 1.3378, "step": 14282 }, { "epoch": 0.8513529622124211, "grad_norm": 3.157346248626709, "learning_rate": 6.245464489538083e-05, "loss": 1.3153, "step": 14284 }, { "epoch": 0.851472165931577, "grad_norm": 2.956727981567383, "learning_rate": 6.244548582743552e-05, "loss": 1.2519, "step": 14286 }, { "epoch": 0.8515913696507331, "grad_norm": 3.4986166954040527, "learning_rate": 6.243632631427261e-05, "loss": 1.299, "step": 14288 }, { "epoch": 0.8517105733698891, "grad_norm": 3.1609506607055664, "learning_rate": 6.242716635621975e-05, "loss": 1.3263, "step": 14290 }, { "epoch": 0.8518297770890452, "grad_norm": 2.957921028137207, "learning_rate": 6.241800595360465e-05, "loss": 1.2334, "step": 14292 }, { "epoch": 0.8519489808082013, "grad_norm": 3.0404958724975586, "learning_rate": 6.240884510675497e-05, "loss": 1.1576, "step": 14294 }, { "epoch": 0.8520681845273572, "grad_norm": 3.107147216796875, "learning_rate": 6.239968381599844e-05, "loss": 1.2841, "step": 14296 }, { "epoch": 0.8521873882465133, "grad_norm": 2.735804796218872, "learning_rate": 6.239052208166279e-05, "loss": 1.2108, "step": 14298 }, { "epoch": 0.8523065919656694, "grad_norm": 2.973708391189575, "learning_rate": 6.238135990407579e-05, "loss": 1.1953, "step": 14300 }, { "epoch": 0.8524257956848253, "grad_norm": 3.2319767475128174, "learning_rate": 6.237219728356517e-05, "loss": 1.3237, "step": 14302 }, { "epoch": 0.8525449994039814, "grad_norm": 2.8994483947753906, "learning_rate": 6.236303422045874e-05, "loss": 1.3439, "step": 14304 }, { "epoch": 0.8526642031231374, "grad_norm": 3.3031957149505615, "learning_rate": 6.235387071508427e-05, "loss": 1.3459, "step": 14306 }, { "epoch": 0.8527834068422935, "grad_norm": 3.1022446155548096, "learning_rate": 6.234470676776957e-05, "loss": 1.1591, "step": 14308 }, { "epoch": 0.8529026105614496, "grad_norm": 3.301426649093628, "learning_rate": 6.233554237884247e-05, "loss": 1.3409, "step": 14310 }, { "epoch": 0.8530218142806055, "grad_norm": 3.179616928100586, "learning_rate": 6.232637754863083e-05, "loss": 1.1517, "step": 14312 }, { "epoch": 0.8531410179997616, "grad_norm": 3.3884482383728027, "learning_rate": 6.231721227746246e-05, "loss": 1.4138, "step": 14314 }, { "epoch": 0.8532602217189176, "grad_norm": 3.1712889671325684, "learning_rate": 6.230804656566528e-05, "loss": 1.348, "step": 14316 }, { "epoch": 0.8533794254380737, "grad_norm": 2.957322359085083, "learning_rate": 6.229888041356717e-05, "loss": 1.2777, "step": 14318 }, { "epoch": 0.8534986291572297, "grad_norm": 2.9632132053375244, "learning_rate": 6.228971382149603e-05, "loss": 1.4245, "step": 14320 }, { "epoch": 0.8536178328763857, "grad_norm": 3.6651766300201416, "learning_rate": 6.228054678977977e-05, "loss": 1.3102, "step": 14322 }, { "epoch": 0.8537370365955418, "grad_norm": 3.1711981296539307, "learning_rate": 6.227137931874634e-05, "loss": 1.2121, "step": 14324 }, { "epoch": 0.8538562403146979, "grad_norm": 2.9318432807922363, "learning_rate": 6.226221140872368e-05, "loss": 1.2009, "step": 14326 }, { "epoch": 0.8539754440338538, "grad_norm": 3.2687504291534424, "learning_rate": 6.225304306003975e-05, "loss": 1.233, "step": 14328 }, { "epoch": 0.8540946477530099, "grad_norm": 3.03330397605896, "learning_rate": 6.224387427302256e-05, "loss": 1.0553, "step": 14330 }, { "epoch": 0.8542138514721659, "grad_norm": 2.9217050075531006, "learning_rate": 6.22347050480001e-05, "loss": 1.2404, "step": 14332 }, { "epoch": 0.854333055191322, "grad_norm": 3.0362210273742676, "learning_rate": 6.222553538530036e-05, "loss": 1.2266, "step": 14334 }, { "epoch": 0.854452258910478, "grad_norm": 2.885812997817993, "learning_rate": 6.221636528525142e-05, "loss": 1.3369, "step": 14336 }, { "epoch": 0.854571462629634, "grad_norm": 3.243873357772827, "learning_rate": 6.220719474818128e-05, "loss": 1.2349, "step": 14338 }, { "epoch": 0.8546906663487901, "grad_norm": 3.1100611686706543, "learning_rate": 6.219802377441801e-05, "loss": 1.274, "step": 14340 }, { "epoch": 0.8548098700679461, "grad_norm": 3.2674174308776855, "learning_rate": 6.218885236428971e-05, "loss": 1.2978, "step": 14342 }, { "epoch": 0.8549290737871021, "grad_norm": 3.14931321144104, "learning_rate": 6.217968051812445e-05, "loss": 1.3797, "step": 14344 }, { "epoch": 0.8550482775062582, "grad_norm": 2.8225932121276855, "learning_rate": 6.217050823625035e-05, "loss": 1.2562, "step": 14346 }, { "epoch": 0.8551674812254142, "grad_norm": 2.9007740020751953, "learning_rate": 6.216133551899551e-05, "loss": 1.2003, "step": 14348 }, { "epoch": 0.8552866849445703, "grad_norm": 3.367340326309204, "learning_rate": 6.215216236668811e-05, "loss": 1.2599, "step": 14350 }, { "epoch": 0.8554058886637264, "grad_norm": 2.8789446353912354, "learning_rate": 6.214298877965627e-05, "loss": 1.1731, "step": 14352 }, { "epoch": 0.8555250923828823, "grad_norm": 3.0413994789123535, "learning_rate": 6.213381475822819e-05, "loss": 1.1946, "step": 14354 }, { "epoch": 0.8556442961020384, "grad_norm": 3.1582555770874023, "learning_rate": 6.212464030273204e-05, "loss": 1.2037, "step": 14356 }, { "epoch": 0.8557634998211944, "grad_norm": 3.0105390548706055, "learning_rate": 6.211546541349602e-05, "loss": 1.175, "step": 14358 }, { "epoch": 0.8558827035403505, "grad_norm": 3.189775228500366, "learning_rate": 6.210629009084833e-05, "loss": 1.1493, "step": 14360 }, { "epoch": 0.8560019072595065, "grad_norm": 3.301785469055176, "learning_rate": 6.209711433511725e-05, "loss": 1.3023, "step": 14362 }, { "epoch": 0.8561211109786625, "grad_norm": 2.94958758354187, "learning_rate": 6.208793814663098e-05, "loss": 1.0505, "step": 14364 }, { "epoch": 0.8562403146978186, "grad_norm": 3.0118236541748047, "learning_rate": 6.207876152571781e-05, "loss": 1.2303, "step": 14366 }, { "epoch": 0.8563595184169747, "grad_norm": 3.152509927749634, "learning_rate": 6.206958447270602e-05, "loss": 1.4204, "step": 14368 }, { "epoch": 0.8564787221361306, "grad_norm": 3.5675244331359863, "learning_rate": 6.20604069879239e-05, "loss": 1.3077, "step": 14370 }, { "epoch": 0.8565979258552867, "grad_norm": 2.8275434970855713, "learning_rate": 6.205122907169974e-05, "loss": 1.306, "step": 14372 }, { "epoch": 0.8567171295744427, "grad_norm": 2.801872491836548, "learning_rate": 6.204205072436189e-05, "loss": 1.2281, "step": 14374 }, { "epoch": 0.8568363332935988, "grad_norm": 3.1452369689941406, "learning_rate": 6.203287194623869e-05, "loss": 1.2564, "step": 14376 }, { "epoch": 0.8569555370127548, "grad_norm": 3.4781925678253174, "learning_rate": 6.20236927376585e-05, "loss": 1.1888, "step": 14378 }, { "epoch": 0.8570747407319108, "grad_norm": 3.2317957878112793, "learning_rate": 6.201451309894967e-05, "loss": 1.3774, "step": 14380 }, { "epoch": 0.8571939444510669, "grad_norm": 2.8576881885528564, "learning_rate": 6.20053330304406e-05, "loss": 1.2494, "step": 14382 }, { "epoch": 0.8573131481702229, "grad_norm": 3.301307439804077, "learning_rate": 6.199615253245968e-05, "loss": 1.245, "step": 14384 }, { "epoch": 0.8574323518893789, "grad_norm": 3.1370413303375244, "learning_rate": 6.198697160533536e-05, "loss": 1.3948, "step": 14386 }, { "epoch": 0.857551555608535, "grad_norm": 2.700202703475952, "learning_rate": 6.197779024939604e-05, "loss": 1.2606, "step": 14388 }, { "epoch": 0.857670759327691, "grad_norm": 3.1781883239746094, "learning_rate": 6.196860846497018e-05, "loss": 1.3818, "step": 14390 }, { "epoch": 0.8577899630468471, "grad_norm": 3.0774903297424316, "learning_rate": 6.195942625238625e-05, "loss": 1.2504, "step": 14392 }, { "epoch": 0.8579091667660032, "grad_norm": 3.027529001235962, "learning_rate": 6.195024361197272e-05, "loss": 1.222, "step": 14394 }, { "epoch": 0.8580283704851591, "grad_norm": 3.1592164039611816, "learning_rate": 6.194106054405811e-05, "loss": 1.2516, "step": 14396 }, { "epoch": 0.8581475742043152, "grad_norm": 2.933600425720215, "learning_rate": 6.193187704897087e-05, "loss": 1.3344, "step": 14398 }, { "epoch": 0.8582667779234712, "grad_norm": 3.2930281162261963, "learning_rate": 6.192269312703959e-05, "loss": 1.2597, "step": 14400 }, { "epoch": 0.8583859816426272, "grad_norm": 4.193871974945068, "learning_rate": 6.191350877859278e-05, "loss": 1.3098, "step": 14402 }, { "epoch": 0.8585051853617833, "grad_norm": 3.046804428100586, "learning_rate": 6.1904324003959e-05, "loss": 1.148, "step": 14404 }, { "epoch": 0.8586243890809393, "grad_norm": 3.188260793685913, "learning_rate": 6.189513880346681e-05, "loss": 1.3347, "step": 14406 }, { "epoch": 0.8587435928000954, "grad_norm": 2.6832048892974854, "learning_rate": 6.188595317744482e-05, "loss": 1.2654, "step": 14408 }, { "epoch": 0.8588627965192513, "grad_norm": 3.1732616424560547, "learning_rate": 6.187676712622161e-05, "loss": 1.3876, "step": 14410 }, { "epoch": 0.8589820002384074, "grad_norm": 2.7976136207580566, "learning_rate": 6.186758065012581e-05, "loss": 1.3407, "step": 14412 }, { "epoch": 0.8591012039575635, "grad_norm": 3.1838555335998535, "learning_rate": 6.185839374948605e-05, "loss": 1.3131, "step": 14414 }, { "epoch": 0.8592204076767195, "grad_norm": 2.892820358276367, "learning_rate": 6.184920642463094e-05, "loss": 1.3808, "step": 14416 }, { "epoch": 0.8593396113958756, "grad_norm": 3.622649669647217, "learning_rate": 6.184001867588921e-05, "loss": 1.4668, "step": 14418 }, { "epoch": 0.8594588151150316, "grad_norm": 2.89424204826355, "learning_rate": 6.183083050358949e-05, "loss": 1.301, "step": 14420 }, { "epoch": 0.8595780188341876, "grad_norm": 2.9090840816497803, "learning_rate": 6.182164190806048e-05, "loss": 1.2452, "step": 14422 }, { "epoch": 0.8596972225533437, "grad_norm": 3.68251633644104, "learning_rate": 6.18124528896309e-05, "loss": 1.5791, "step": 14424 }, { "epoch": 0.8598164262724997, "grad_norm": 2.9174840450286865, "learning_rate": 6.180326344862947e-05, "loss": 1.3029, "step": 14426 }, { "epoch": 0.8599356299916557, "grad_norm": 2.980302095413208, "learning_rate": 6.179407358538492e-05, "loss": 1.2668, "step": 14428 }, { "epoch": 0.8600548337108118, "grad_norm": 3.155129909515381, "learning_rate": 6.178488330022599e-05, "loss": 1.3406, "step": 14430 }, { "epoch": 0.8601740374299678, "grad_norm": 3.1253933906555176, "learning_rate": 6.177569259348146e-05, "loss": 1.3739, "step": 14432 }, { "epoch": 0.8602932411491239, "grad_norm": 2.8908097743988037, "learning_rate": 6.176650146548013e-05, "loss": 1.1844, "step": 14434 }, { "epoch": 0.8604124448682798, "grad_norm": 3.029484272003174, "learning_rate": 6.175730991655077e-05, "loss": 1.3148, "step": 14436 }, { "epoch": 0.8605316485874359, "grad_norm": 2.9060745239257812, "learning_rate": 6.174811794702222e-05, "loss": 1.2751, "step": 14438 }, { "epoch": 0.860650852306592, "grad_norm": 3.0299556255340576, "learning_rate": 6.173892555722331e-05, "loss": 1.2789, "step": 14440 }, { "epoch": 0.860770056025748, "grad_norm": 3.0763795375823975, "learning_rate": 6.172973274748284e-05, "loss": 1.2719, "step": 14442 }, { "epoch": 0.860889259744904, "grad_norm": 3.2345404624938965, "learning_rate": 6.17205395181297e-05, "loss": 1.3112, "step": 14444 }, { "epoch": 0.8610084634640601, "grad_norm": 3.099865436553955, "learning_rate": 6.171134586949276e-05, "loss": 1.2125, "step": 14446 }, { "epoch": 0.8611276671832161, "grad_norm": 3.326162099838257, "learning_rate": 6.170215180190092e-05, "loss": 1.338, "step": 14448 }, { "epoch": 0.8612468709023722, "grad_norm": 3.0800669193267822, "learning_rate": 6.169295731568307e-05, "loss": 1.3552, "step": 14450 }, { "epoch": 0.8613660746215281, "grad_norm": 3.08740234375, "learning_rate": 6.168376241116812e-05, "loss": 1.3288, "step": 14452 }, { "epoch": 0.8614852783406842, "grad_norm": 2.784813165664673, "learning_rate": 6.167456708868503e-05, "loss": 1.3074, "step": 14454 }, { "epoch": 0.8616044820598403, "grad_norm": 3.358177423477173, "learning_rate": 6.166537134856271e-05, "loss": 1.3547, "step": 14456 }, { "epoch": 0.8617236857789963, "grad_norm": 2.9537246227264404, "learning_rate": 6.165617519113016e-05, "loss": 1.2509, "step": 14458 }, { "epoch": 0.8618428894981524, "grad_norm": 3.447658061981201, "learning_rate": 6.164697861671635e-05, "loss": 1.3528, "step": 14460 }, { "epoch": 0.8619620932173084, "grad_norm": 2.8855669498443604, "learning_rate": 6.163778162565024e-05, "loss": 1.3673, "step": 14462 }, { "epoch": 0.8620812969364644, "grad_norm": 3.029242753982544, "learning_rate": 6.162858421826087e-05, "loss": 1.3919, "step": 14464 }, { "epoch": 0.8622005006556205, "grad_norm": 2.980628728866577, "learning_rate": 6.161938639487727e-05, "loss": 1.3322, "step": 14466 }, { "epoch": 0.8623197043747765, "grad_norm": 3.5036468505859375, "learning_rate": 6.161018815582846e-05, "loss": 1.354, "step": 14468 }, { "epoch": 0.8624389080939325, "grad_norm": 3.119229793548584, "learning_rate": 6.160098950144349e-05, "loss": 1.2786, "step": 14470 }, { "epoch": 0.8625581118130886, "grad_norm": 3.1541125774383545, "learning_rate": 6.159179043205144e-05, "loss": 1.3287, "step": 14472 }, { "epoch": 0.8626773155322446, "grad_norm": 3.087190628051758, "learning_rate": 6.158259094798137e-05, "loss": 1.2369, "step": 14474 }, { "epoch": 0.8627965192514007, "grad_norm": 3.2433929443359375, "learning_rate": 6.15733910495624e-05, "loss": 1.4778, "step": 14476 }, { "epoch": 0.8629157229705566, "grad_norm": 3.1822621822357178, "learning_rate": 6.156419073712364e-05, "loss": 1.3754, "step": 14478 }, { "epoch": 0.8630349266897127, "grad_norm": 2.9485435485839844, "learning_rate": 6.15549900109942e-05, "loss": 1.2765, "step": 14480 }, { "epoch": 0.8631541304088688, "grad_norm": 2.924941062927246, "learning_rate": 6.154578887150324e-05, "loss": 1.4084, "step": 14482 }, { "epoch": 0.8632733341280248, "grad_norm": 3.3391122817993164, "learning_rate": 6.153658731897992e-05, "loss": 1.3347, "step": 14484 }, { "epoch": 0.8633925378471808, "grad_norm": 3.0338237285614014, "learning_rate": 6.152738535375338e-05, "loss": 1.1936, "step": 14486 }, { "epoch": 0.8635117415663369, "grad_norm": 2.9414327144622803, "learning_rate": 6.151818297615283e-05, "loss": 1.327, "step": 14488 }, { "epoch": 0.8636309452854929, "grad_norm": 2.7730729579925537, "learning_rate": 6.150898018650748e-05, "loss": 1.1734, "step": 14490 }, { "epoch": 0.863750149004649, "grad_norm": 2.950545310974121, "learning_rate": 6.149977698514651e-05, "loss": 1.3211, "step": 14492 }, { "epoch": 0.8638693527238049, "grad_norm": 2.9888806343078613, "learning_rate": 6.149057337239917e-05, "loss": 1.1118, "step": 14494 }, { "epoch": 0.863988556442961, "grad_norm": 3.1081595420837402, "learning_rate": 6.148136934859472e-05, "loss": 1.3162, "step": 14496 }, { "epoch": 0.8641077601621171, "grad_norm": 2.925595998764038, "learning_rate": 6.14721649140624e-05, "loss": 1.175, "step": 14498 }, { "epoch": 0.8642269638812731, "grad_norm": 3.188636064529419, "learning_rate": 6.14629600691315e-05, "loss": 1.2556, "step": 14500 }, { "epoch": 0.8643461676004291, "grad_norm": 3.177138566970825, "learning_rate": 6.145375481413129e-05, "loss": 1.2921, "step": 14502 }, { "epoch": 0.8644653713195851, "grad_norm": 3.014806032180786, "learning_rate": 6.144454914939108e-05, "loss": 1.2952, "step": 14504 }, { "epoch": 0.8645845750387412, "grad_norm": 3.281632661819458, "learning_rate": 6.14353430752402e-05, "loss": 1.2587, "step": 14506 }, { "epoch": 0.8647037787578973, "grad_norm": 3.031853437423706, "learning_rate": 6.142613659200796e-05, "loss": 1.28, "step": 14508 }, { "epoch": 0.8648229824770532, "grad_norm": 3.263594627380371, "learning_rate": 6.141692970002372e-05, "loss": 1.3377, "step": 14510 }, { "epoch": 0.8649421861962093, "grad_norm": 2.9658026695251465, "learning_rate": 6.140772239961685e-05, "loss": 1.2405, "step": 14512 }, { "epoch": 0.8650613899153654, "grad_norm": 2.9777510166168213, "learning_rate": 6.139851469111671e-05, "loss": 1.2007, "step": 14514 }, { "epoch": 0.8651805936345214, "grad_norm": 3.021294593811035, "learning_rate": 6.138930657485271e-05, "loss": 1.4211, "step": 14516 }, { "epoch": 0.8652997973536775, "grad_norm": 2.78212833404541, "learning_rate": 6.138009805115423e-05, "loss": 1.1815, "step": 14518 }, { "epoch": 0.8654190010728334, "grad_norm": 3.011545181274414, "learning_rate": 6.137088912035071e-05, "loss": 1.3923, "step": 14520 }, { "epoch": 0.8655382047919895, "grad_norm": 3.1150319576263428, "learning_rate": 6.13616797827716e-05, "loss": 1.3174, "step": 14522 }, { "epoch": 0.8656574085111456, "grad_norm": 3.0433340072631836, "learning_rate": 6.13524700387463e-05, "loss": 1.2282, "step": 14524 }, { "epoch": 0.8657766122303016, "grad_norm": 3.5008835792541504, "learning_rate": 6.134325988860433e-05, "loss": 1.3069, "step": 14526 }, { "epoch": 0.8658958159494576, "grad_norm": 2.9575109481811523, "learning_rate": 6.133404933267513e-05, "loss": 1.2529, "step": 14528 }, { "epoch": 0.8660150196686136, "grad_norm": 3.014582395553589, "learning_rate": 6.132483837128823e-05, "loss": 1.1876, "step": 14530 }, { "epoch": 0.8661342233877697, "grad_norm": 3.184757709503174, "learning_rate": 6.131562700477308e-05, "loss": 1.3643, "step": 14532 }, { "epoch": 0.8662534271069258, "grad_norm": 2.7411203384399414, "learning_rate": 6.130641523345927e-05, "loss": 1.2753, "step": 14534 }, { "epoch": 0.8663726308260817, "grad_norm": 3.375786781311035, "learning_rate": 6.129720305767628e-05, "loss": 1.268, "step": 14536 }, { "epoch": 0.8664918345452378, "grad_norm": 2.9936370849609375, "learning_rate": 6.128799047775367e-05, "loss": 1.2974, "step": 14538 }, { "epoch": 0.8666110382643939, "grad_norm": 2.7905755043029785, "learning_rate": 6.127877749402105e-05, "loss": 1.3138, "step": 14540 }, { "epoch": 0.8667302419835499, "grad_norm": 2.99900484085083, "learning_rate": 6.126956410680795e-05, "loss": 1.1825, "step": 14542 }, { "epoch": 0.866849445702706, "grad_norm": 3.256967067718506, "learning_rate": 6.1260350316444e-05, "loss": 1.3252, "step": 14544 }, { "epoch": 0.8669686494218619, "grad_norm": 2.845892906188965, "learning_rate": 6.125113612325878e-05, "loss": 1.442, "step": 14546 }, { "epoch": 0.867087853141018, "grad_norm": 3.412452459335327, "learning_rate": 6.124192152758194e-05, "loss": 1.4002, "step": 14548 }, { "epoch": 0.8672070568601741, "grad_norm": 3.196903944015503, "learning_rate": 6.123270652974308e-05, "loss": 1.3781, "step": 14550 }, { "epoch": 0.86732626057933, "grad_norm": 3.2301876544952393, "learning_rate": 6.122349113007188e-05, "loss": 1.2968, "step": 14552 }, { "epoch": 0.8674454642984861, "grad_norm": 3.5027918815612793, "learning_rate": 6.121427532889801e-05, "loss": 1.2711, "step": 14554 }, { "epoch": 0.8675646680176422, "grad_norm": 3.2584311962127686, "learning_rate": 6.120505912655114e-05, "loss": 1.1881, "step": 14556 }, { "epoch": 0.8676838717367982, "grad_norm": 3.1373884677886963, "learning_rate": 6.119584252336097e-05, "loss": 1.2942, "step": 14558 }, { "epoch": 0.8678030754559543, "grad_norm": 2.772801399230957, "learning_rate": 6.118662551965721e-05, "loss": 1.1886, "step": 14560 }, { "epoch": 0.8679222791751102, "grad_norm": 3.374382734298706, "learning_rate": 6.117740811576957e-05, "loss": 1.4054, "step": 14562 }, { "epoch": 0.8680414828942663, "grad_norm": 3.9277760982513428, "learning_rate": 6.116819031202781e-05, "loss": 1.3203, "step": 14564 }, { "epoch": 0.8681606866134224, "grad_norm": 2.9545609951019287, "learning_rate": 6.115897210876166e-05, "loss": 1.3607, "step": 14566 }, { "epoch": 0.8682798903325784, "grad_norm": 3.0549373626708984, "learning_rate": 6.11497535063009e-05, "loss": 1.3936, "step": 14568 }, { "epoch": 0.8683990940517344, "grad_norm": 2.840433120727539, "learning_rate": 6.11405345049753e-05, "loss": 1.2351, "step": 14570 }, { "epoch": 0.8685182977708904, "grad_norm": 3.043405294418335, "learning_rate": 6.113131510511468e-05, "loss": 1.3567, "step": 14572 }, { "epoch": 0.8686375014900465, "grad_norm": 2.9911859035491943, "learning_rate": 6.112209530704884e-05, "loss": 1.2161, "step": 14574 }, { "epoch": 0.8687567052092026, "grad_norm": 3.189143180847168, "learning_rate": 6.111287511110758e-05, "loss": 1.2103, "step": 14576 }, { "epoch": 0.8688759089283585, "grad_norm": 2.9387001991271973, "learning_rate": 6.110365451762075e-05, "loss": 1.3028, "step": 14578 }, { "epoch": 0.8689951126475146, "grad_norm": 3.2184066772460938, "learning_rate": 6.109443352691823e-05, "loss": 1.3586, "step": 14580 }, { "epoch": 0.8691143163666707, "grad_norm": 3.1595895290374756, "learning_rate": 6.108521213932984e-05, "loss": 1.2665, "step": 14582 }, { "epoch": 0.8692335200858267, "grad_norm": 2.887110948562622, "learning_rate": 6.10759903551855e-05, "loss": 1.2836, "step": 14584 }, { "epoch": 0.8693527238049827, "grad_norm": 3.0830531120300293, "learning_rate": 6.106676817481509e-05, "loss": 1.2972, "step": 14586 }, { "epoch": 0.8694719275241387, "grad_norm": 2.98633074760437, "learning_rate": 6.10575455985485e-05, "loss": 1.2696, "step": 14588 }, { "epoch": 0.8695911312432948, "grad_norm": 2.895937442779541, "learning_rate": 6.10483226267157e-05, "loss": 1.2271, "step": 14590 }, { "epoch": 0.8697103349624509, "grad_norm": 3.047222852706909, "learning_rate": 6.103909925964657e-05, "loss": 1.225, "step": 14592 }, { "epoch": 0.8698295386816068, "grad_norm": 3.1191136837005615, "learning_rate": 6.1029875497671106e-05, "loss": 1.261, "step": 14594 }, { "epoch": 0.8699487424007629, "grad_norm": 3.219860315322876, "learning_rate": 6.102065134111924e-05, "loss": 1.2147, "step": 14596 }, { "epoch": 0.8700679461199189, "grad_norm": 3.095388650894165, "learning_rate": 6.101142679032098e-05, "loss": 1.2918, "step": 14598 }, { "epoch": 0.870187149839075, "grad_norm": 3.117309331893921, "learning_rate": 6.100220184560631e-05, "loss": 1.3917, "step": 14600 }, { "epoch": 0.870306353558231, "grad_norm": 3.0035548210144043, "learning_rate": 6.0992976507305235e-05, "loss": 1.3467, "step": 14602 }, { "epoch": 0.870425557277387, "grad_norm": 2.651939868927002, "learning_rate": 6.098375077574777e-05, "loss": 1.2239, "step": 14604 }, { "epoch": 0.8705447609965431, "grad_norm": 3.355675458908081, "learning_rate": 6.0974524651263985e-05, "loss": 1.2394, "step": 14606 }, { "epoch": 0.8706639647156992, "grad_norm": 3.0277607440948486, "learning_rate": 6.096529813418388e-05, "loss": 1.3021, "step": 14608 }, { "epoch": 0.8707831684348551, "grad_norm": 3.330091953277588, "learning_rate": 6.0956071224837555e-05, "loss": 1.2441, "step": 14610 }, { "epoch": 0.8709023721540112, "grad_norm": 3.611927032470703, "learning_rate": 6.0946843923555064e-05, "loss": 1.2613, "step": 14612 }, { "epoch": 0.8710215758731672, "grad_norm": 2.9973158836364746, "learning_rate": 6.0937616230666514e-05, "loss": 1.2487, "step": 14614 }, { "epoch": 0.8711407795923233, "grad_norm": 3.2753753662109375, "learning_rate": 6.092838814650201e-05, "loss": 1.3753, "step": 14616 }, { "epoch": 0.8712599833114794, "grad_norm": 2.976909875869751, "learning_rate": 6.091915967139168e-05, "loss": 1.3345, "step": 14618 }, { "epoch": 0.8713791870306353, "grad_norm": 3.228844404220581, "learning_rate": 6.090993080566565e-05, "loss": 1.4305, "step": 14620 }, { "epoch": 0.8714983907497914, "grad_norm": 3.026048421859741, "learning_rate": 6.090070154965406e-05, "loss": 1.2828, "step": 14622 }, { "epoch": 0.8716175944689474, "grad_norm": 2.8607795238494873, "learning_rate": 6.0891471903687083e-05, "loss": 1.3679, "step": 14624 }, { "epoch": 0.8717367981881035, "grad_norm": 3.0685787200927734, "learning_rate": 6.088224186809489e-05, "loss": 1.3608, "step": 14626 }, { "epoch": 0.8718560019072595, "grad_norm": 2.6149990558624268, "learning_rate": 6.087301144320766e-05, "loss": 1.2446, "step": 14628 }, { "epoch": 0.8719752056264155, "grad_norm": 3.005690813064575, "learning_rate": 6.086378062935563e-05, "loss": 1.2197, "step": 14630 }, { "epoch": 0.8720944093455716, "grad_norm": 3.2668662071228027, "learning_rate": 6.085454942686898e-05, "loss": 1.3067, "step": 14632 }, { "epoch": 0.8722136130647277, "grad_norm": 3.3280389308929443, "learning_rate": 6.084531783607796e-05, "loss": 1.2973, "step": 14634 }, { "epoch": 0.8723328167838836, "grad_norm": 3.2716989517211914, "learning_rate": 6.083608585731283e-05, "loss": 1.2571, "step": 14636 }, { "epoch": 0.8724520205030397, "grad_norm": 3.2008087635040283, "learning_rate": 6.082685349090382e-05, "loss": 1.2004, "step": 14638 }, { "epoch": 0.8725712242221957, "grad_norm": 3.162691116333008, "learning_rate": 6.081762073718121e-05, "loss": 1.3503, "step": 14640 }, { "epoch": 0.8726904279413518, "grad_norm": 3.0078394412994385, "learning_rate": 6.080838759647531e-05, "loss": 1.282, "step": 14642 }, { "epoch": 0.8728096316605078, "grad_norm": 3.021949529647827, "learning_rate": 6.0799154069116404e-05, "loss": 1.4035, "step": 14644 }, { "epoch": 0.8729288353796638, "grad_norm": 3.2931957244873047, "learning_rate": 6.07899201554348e-05, "loss": 1.4947, "step": 14646 }, { "epoch": 0.8730480390988199, "grad_norm": 2.9533562660217285, "learning_rate": 6.0780685855760834e-05, "loss": 1.1752, "step": 14648 }, { "epoch": 0.873167242817976, "grad_norm": 2.9080727100372314, "learning_rate": 6.0771451170424864e-05, "loss": 1.2552, "step": 14650 }, { "epoch": 0.8732864465371319, "grad_norm": 2.9034199714660645, "learning_rate": 6.076221609975722e-05, "loss": 1.3067, "step": 14652 }, { "epoch": 0.873405650256288, "grad_norm": 3.0978050231933594, "learning_rate": 6.0752980644088276e-05, "loss": 1.322, "step": 14654 }, { "epoch": 0.873524853975444, "grad_norm": 3.223433494567871, "learning_rate": 6.074374480374844e-05, "loss": 1.2558, "step": 14656 }, { "epoch": 0.8736440576946001, "grad_norm": 3.0647144317626953, "learning_rate": 6.0734508579068085e-05, "loss": 1.3385, "step": 14658 }, { "epoch": 0.8737632614137562, "grad_norm": 3.3843839168548584, "learning_rate": 6.0725271970377614e-05, "loss": 1.101, "step": 14660 }, { "epoch": 0.8738824651329121, "grad_norm": 3.3264424800872803, "learning_rate": 6.0716034978007496e-05, "loss": 1.2784, "step": 14662 }, { "epoch": 0.8740016688520682, "grad_norm": 2.9482624530792236, "learning_rate": 6.0706797602288135e-05, "loss": 1.3123, "step": 14664 }, { "epoch": 0.8741208725712242, "grad_norm": 2.9093539714813232, "learning_rate": 6.069755984354999e-05, "loss": 1.2128, "step": 14666 }, { "epoch": 0.8742400762903803, "grad_norm": 3.049292802810669, "learning_rate": 6.068832170212352e-05, "loss": 1.3342, "step": 14668 }, { "epoch": 0.8743592800095363, "grad_norm": 2.758542060852051, "learning_rate": 6.067908317833922e-05, "loss": 1.2497, "step": 14670 }, { "epoch": 0.8744784837286923, "grad_norm": 3.0181968212127686, "learning_rate": 6.0669844272527565e-05, "loss": 1.4708, "step": 14672 }, { "epoch": 0.8745976874478484, "grad_norm": 2.796438455581665, "learning_rate": 6.0660604985019084e-05, "loss": 1.3618, "step": 14674 }, { "epoch": 0.8747168911670045, "grad_norm": 3.1158018112182617, "learning_rate": 6.065136531614429e-05, "loss": 1.4136, "step": 14676 }, { "epoch": 0.8748360948861604, "grad_norm": 2.8597214221954346, "learning_rate": 6.064212526623372e-05, "loss": 1.2249, "step": 14678 }, { "epoch": 0.8749552986053165, "grad_norm": 3.1876611709594727, "learning_rate": 6.063288483561791e-05, "loss": 1.4646, "step": 14680 }, { "epoch": 0.8750745023244725, "grad_norm": 2.9742226600646973, "learning_rate": 6.0623644024627436e-05, "loss": 1.4041, "step": 14682 }, { "epoch": 0.8751937060436286, "grad_norm": 2.9028661251068115, "learning_rate": 6.0614402833592844e-05, "loss": 1.2767, "step": 14684 }, { "epoch": 0.8753129097627846, "grad_norm": 3.3674004077911377, "learning_rate": 6.0605161262844766e-05, "loss": 1.3727, "step": 14686 }, { "epoch": 0.8754321134819406, "grad_norm": 3.088831901550293, "learning_rate": 6.059591931271379e-05, "loss": 1.2983, "step": 14688 }, { "epoch": 0.8755513172010967, "grad_norm": 3.2253823280334473, "learning_rate": 6.058667698353051e-05, "loss": 1.4241, "step": 14690 }, { "epoch": 0.8756705209202527, "grad_norm": 3.0398848056793213, "learning_rate": 6.057743427562558e-05, "loss": 1.3179, "step": 14692 }, { "epoch": 0.8757897246394087, "grad_norm": 3.1850883960723877, "learning_rate": 6.056819118932964e-05, "loss": 1.2715, "step": 14694 }, { "epoch": 0.8759089283585648, "grad_norm": 3.0126922130584717, "learning_rate": 6.055894772497335e-05, "loss": 1.2725, "step": 14696 }, { "epoch": 0.8760281320777208, "grad_norm": 2.666506767272949, "learning_rate": 6.0549703882887346e-05, "loss": 1.3769, "step": 14698 }, { "epoch": 0.8761473357968769, "grad_norm": 3.134078025817871, "learning_rate": 6.054045966340236e-05, "loss": 1.236, "step": 14700 }, { "epoch": 0.876266539516033, "grad_norm": 3.231548547744751, "learning_rate": 6.0531215066849055e-05, "loss": 1.347, "step": 14702 }, { "epoch": 0.8763857432351889, "grad_norm": 3.150926113128662, "learning_rate": 6.0521970093558156e-05, "loss": 1.3371, "step": 14704 }, { "epoch": 0.876504946954345, "grad_norm": 3.413640022277832, "learning_rate": 6.05127247438604e-05, "loss": 1.4117, "step": 14706 }, { "epoch": 0.876624150673501, "grad_norm": 3.5087759494781494, "learning_rate": 6.0503479018086495e-05, "loss": 1.2871, "step": 14708 }, { "epoch": 0.876743354392657, "grad_norm": 3.2286202907562256, "learning_rate": 6.049423291656722e-05, "loss": 1.2977, "step": 14710 }, { "epoch": 0.8768625581118131, "grad_norm": 3.2916371822357178, "learning_rate": 6.048498643963333e-05, "loss": 1.387, "step": 14712 }, { "epoch": 0.8769817618309691, "grad_norm": 3.18882417678833, "learning_rate": 6.047573958761559e-05, "loss": 1.3478, "step": 14714 }, { "epoch": 0.8771009655501252, "grad_norm": 2.6982991695404053, "learning_rate": 6.04664923608448e-05, "loss": 1.1965, "step": 14716 }, { "epoch": 0.8772201692692811, "grad_norm": 2.8173580169677734, "learning_rate": 6.0457244759651776e-05, "loss": 1.2104, "step": 14718 }, { "epoch": 0.8773393729884372, "grad_norm": 2.8902370929718018, "learning_rate": 6.044799678436733e-05, "loss": 1.2112, "step": 14720 }, { "epoch": 0.8774585767075933, "grad_norm": 2.9531033039093018, "learning_rate": 6.043874843532229e-05, "loss": 1.2371, "step": 14722 }, { "epoch": 0.8775777804267493, "grad_norm": 3.218222141265869, "learning_rate": 6.042949971284749e-05, "loss": 1.1948, "step": 14724 }, { "epoch": 0.8776969841459054, "grad_norm": 3.1756246089935303, "learning_rate": 6.0420250617273835e-05, "loss": 1.2984, "step": 14726 }, { "epoch": 0.8778161878650614, "grad_norm": 3.159470319747925, "learning_rate": 6.0411001148932134e-05, "loss": 1.1775, "step": 14728 }, { "epoch": 0.8779353915842174, "grad_norm": 3.0660171508789062, "learning_rate": 6.04017513081533e-05, "loss": 1.3379, "step": 14730 }, { "epoch": 0.8780545953033735, "grad_norm": 3.6810288429260254, "learning_rate": 6.039250109526824e-05, "loss": 1.2598, "step": 14732 }, { "epoch": 0.8781737990225295, "grad_norm": 3.0399045944213867, "learning_rate": 6.038325051060786e-05, "loss": 1.2296, "step": 14734 }, { "epoch": 0.8782930027416855, "grad_norm": 2.819394111633301, "learning_rate": 6.037399955450307e-05, "loss": 1.113, "step": 14736 }, { "epoch": 0.8784122064608416, "grad_norm": 3.2427430152893066, "learning_rate": 6.036474822728484e-05, "loss": 1.2823, "step": 14738 }, { "epoch": 0.8785314101799976, "grad_norm": 3.023047924041748, "learning_rate": 6.0355496529284106e-05, "loss": 1.2861, "step": 14740 }, { "epoch": 0.8786506138991537, "grad_norm": 2.858018159866333, "learning_rate": 6.0346244460831814e-05, "loss": 1.1948, "step": 14742 }, { "epoch": 0.8787698176183097, "grad_norm": 3.458176612854004, "learning_rate": 6.033699202225896e-05, "loss": 1.303, "step": 14744 }, { "epoch": 0.8788890213374657, "grad_norm": 3.1936709880828857, "learning_rate": 6.032773921389655e-05, "loss": 1.2597, "step": 14746 }, { "epoch": 0.8790082250566218, "grad_norm": 3.0636825561523438, "learning_rate": 6.031848603607555e-05, "loss": 1.2506, "step": 14748 }, { "epoch": 0.8791274287757778, "grad_norm": 2.7684786319732666, "learning_rate": 6.030923248912701e-05, "loss": 1.2209, "step": 14750 }, { "epoch": 0.8792466324949338, "grad_norm": 2.9465138912200928, "learning_rate": 6.0299978573381954e-05, "loss": 1.2751, "step": 14752 }, { "epoch": 0.8793658362140899, "grad_norm": 3.126288890838623, "learning_rate": 6.029072428917143e-05, "loss": 1.1584, "step": 14754 }, { "epoch": 0.8794850399332459, "grad_norm": 3.3710741996765137, "learning_rate": 6.028146963682648e-05, "loss": 1.4699, "step": 14756 }, { "epoch": 0.879604243652402, "grad_norm": 2.955930233001709, "learning_rate": 6.027221461667818e-05, "loss": 1.2498, "step": 14758 }, { "epoch": 0.8797234473715579, "grad_norm": 2.9057564735412598, "learning_rate": 6.026295922905763e-05, "loss": 1.2988, "step": 14760 }, { "epoch": 0.879842651090714, "grad_norm": 3.0328400135040283, "learning_rate": 6.02537034742959e-05, "loss": 1.3979, "step": 14762 }, { "epoch": 0.8799618548098701, "grad_norm": 3.214416742324829, "learning_rate": 6.024444735272412e-05, "loss": 1.2861, "step": 14764 }, { "epoch": 0.8800810585290261, "grad_norm": 3.3479666709899902, "learning_rate": 6.0235190864673406e-05, "loss": 1.4849, "step": 14766 }, { "epoch": 0.8802002622481822, "grad_norm": 3.275750160217285, "learning_rate": 6.0225934010474886e-05, "loss": 1.3221, "step": 14768 }, { "epoch": 0.8803194659673382, "grad_norm": 3.171750783920288, "learning_rate": 6.0216676790459735e-05, "loss": 1.3678, "step": 14770 }, { "epoch": 0.8804386696864942, "grad_norm": 3.083010673522949, "learning_rate": 6.0207419204959104e-05, "loss": 1.35, "step": 14772 }, { "epoch": 0.8805578734056503, "grad_norm": 2.962048292160034, "learning_rate": 6.019816125430414e-05, "loss": 1.3129, "step": 14774 }, { "epoch": 0.8806770771248063, "grad_norm": 3.1976206302642822, "learning_rate": 6.0188902938826065e-05, "loss": 1.4329, "step": 14776 }, { "epoch": 0.8807962808439623, "grad_norm": 3.222465753555298, "learning_rate": 6.0179644258856084e-05, "loss": 1.575, "step": 14778 }, { "epoch": 0.8809154845631184, "grad_norm": 3.2317845821380615, "learning_rate": 6.017038521472538e-05, "loss": 1.3193, "step": 14780 }, { "epoch": 0.8810346882822744, "grad_norm": 3.343311071395874, "learning_rate": 6.016112580676521e-05, "loss": 1.3012, "step": 14782 }, { "epoch": 0.8811538920014305, "grad_norm": 3.2002766132354736, "learning_rate": 6.01518660353068e-05, "loss": 1.21, "step": 14784 }, { "epoch": 0.8812730957205864, "grad_norm": 3.2004427909851074, "learning_rate": 6.0142605900681415e-05, "loss": 1.3083, "step": 14786 }, { "epoch": 0.8813922994397425, "grad_norm": 3.1810405254364014, "learning_rate": 6.013334540322031e-05, "loss": 1.2253, "step": 14788 }, { "epoch": 0.8815115031588986, "grad_norm": 3.1330904960632324, "learning_rate": 6.0124084543254764e-05, "loss": 1.1127, "step": 14790 }, { "epoch": 0.8816307068780546, "grad_norm": 3.279287099838257, "learning_rate": 6.0114823321116084e-05, "loss": 1.2859, "step": 14792 }, { "epoch": 0.8817499105972106, "grad_norm": 3.0909807682037354, "learning_rate": 6.0105561737135555e-05, "loss": 1.3616, "step": 14794 }, { "epoch": 0.8818691143163667, "grad_norm": 3.5020530223846436, "learning_rate": 6.009629979164451e-05, "loss": 1.248, "step": 14796 }, { "epoch": 0.8819883180355227, "grad_norm": 3.4301679134368896, "learning_rate": 6.008703748497429e-05, "loss": 1.6387, "step": 14798 }, { "epoch": 0.8821075217546788, "grad_norm": 2.8838207721710205, "learning_rate": 6.00777748174562e-05, "loss": 1.1739, "step": 14800 }, { "epoch": 0.8822267254738347, "grad_norm": 3.0945839881896973, "learning_rate": 6.0068511789421655e-05, "loss": 1.31, "step": 14802 }, { "epoch": 0.8823459291929908, "grad_norm": 3.0732717514038086, "learning_rate": 6.005924840120198e-05, "loss": 1.461, "step": 14804 }, { "epoch": 0.8824651329121469, "grad_norm": 2.8663558959960938, "learning_rate": 6.0049984653128565e-05, "loss": 1.0919, "step": 14806 }, { "epoch": 0.8825843366313029, "grad_norm": 3.238856315612793, "learning_rate": 6.004072054553283e-05, "loss": 1.2184, "step": 14808 }, { "epoch": 0.882703540350459, "grad_norm": 3.0264315605163574, "learning_rate": 6.003145607874615e-05, "loss": 1.2422, "step": 14810 }, { "epoch": 0.8828227440696149, "grad_norm": 3.2172231674194336, "learning_rate": 6.0022191253099955e-05, "loss": 1.2897, "step": 14812 }, { "epoch": 0.882941947788771, "grad_norm": 2.7402522563934326, "learning_rate": 6.0012926068925714e-05, "loss": 1.2827, "step": 14814 }, { "epoch": 0.8830611515079271, "grad_norm": 3.1645162105560303, "learning_rate": 6.000366052655485e-05, "loss": 1.2288, "step": 14816 }, { "epoch": 0.883180355227083, "grad_norm": 3.3454177379608154, "learning_rate": 5.999439462631881e-05, "loss": 1.2691, "step": 14818 }, { "epoch": 0.8832995589462391, "grad_norm": 3.2436366081237793, "learning_rate": 5.998512836854908e-05, "loss": 1.2725, "step": 14820 }, { "epoch": 0.8834187626653952, "grad_norm": 3.2999179363250732, "learning_rate": 5.997586175357714e-05, "loss": 1.2807, "step": 14822 }, { "epoch": 0.8835379663845512, "grad_norm": 3.109848737716675, "learning_rate": 5.9966594781734506e-05, "loss": 1.2199, "step": 14824 }, { "epoch": 0.8836571701037073, "grad_norm": 3.3368849754333496, "learning_rate": 5.995732745335265e-05, "loss": 1.3187, "step": 14826 }, { "epoch": 0.8837763738228632, "grad_norm": 3.0849547386169434, "learning_rate": 5.9948059768763156e-05, "loss": 1.376, "step": 14828 }, { "epoch": 0.8838955775420193, "grad_norm": 2.981924533843994, "learning_rate": 5.993879172829751e-05, "loss": 1.1607, "step": 14830 }, { "epoch": 0.8840147812611754, "grad_norm": 3.2624635696411133, "learning_rate": 5.992952333228728e-05, "loss": 1.2867, "step": 14832 }, { "epoch": 0.8841339849803314, "grad_norm": 3.1837735176086426, "learning_rate": 5.992025458106403e-05, "loss": 1.3347, "step": 14834 }, { "epoch": 0.8842531886994874, "grad_norm": 2.904888868331909, "learning_rate": 5.991098547495933e-05, "loss": 1.2484, "step": 14836 }, { "epoch": 0.8843723924186435, "grad_norm": 2.9344003200531006, "learning_rate": 5.990171601430477e-05, "loss": 1.2232, "step": 14838 }, { "epoch": 0.8844915961377995, "grad_norm": 3.0922093391418457, "learning_rate": 5.989244619943194e-05, "loss": 1.2651, "step": 14840 }, { "epoch": 0.8846107998569556, "grad_norm": 3.16152024269104, "learning_rate": 5.988317603067247e-05, "loss": 1.3717, "step": 14842 }, { "epoch": 0.8847300035761115, "grad_norm": 3.0886831283569336, "learning_rate": 5.9873905508357973e-05, "loss": 1.2611, "step": 14844 }, { "epoch": 0.8848492072952676, "grad_norm": 3.1117522716522217, "learning_rate": 5.986463463282011e-05, "loss": 1.2472, "step": 14846 }, { "epoch": 0.8849684110144237, "grad_norm": 2.9492270946502686, "learning_rate": 5.9855363404390505e-05, "loss": 1.3204, "step": 14848 }, { "epoch": 0.8850876147335797, "grad_norm": 2.5579936504364014, "learning_rate": 5.984609182340082e-05, "loss": 1.1696, "step": 14850 }, { "epoch": 0.8852068184527357, "grad_norm": 3.155569076538086, "learning_rate": 5.983681989018276e-05, "loss": 1.4447, "step": 14852 }, { "epoch": 0.8853260221718917, "grad_norm": 2.9421679973602295, "learning_rate": 5.982754760506799e-05, "loss": 1.1964, "step": 14854 }, { "epoch": 0.8854452258910478, "grad_norm": 3.233811140060425, "learning_rate": 5.981827496838822e-05, "loss": 1.3293, "step": 14856 }, { "epoch": 0.8855644296102039, "grad_norm": 3.0927205085754395, "learning_rate": 5.980900198047514e-05, "loss": 1.2165, "step": 14858 }, { "epoch": 0.8856836333293598, "grad_norm": 3.342527151107788, "learning_rate": 5.9799728641660525e-05, "loss": 1.4385, "step": 14860 }, { "epoch": 0.8858028370485159, "grad_norm": 3.1769754886627197, "learning_rate": 5.979045495227609e-05, "loss": 1.2587, "step": 14862 }, { "epoch": 0.885922040767672, "grad_norm": 2.9140844345092773, "learning_rate": 5.978118091265357e-05, "loss": 1.2522, "step": 14864 }, { "epoch": 0.886041244486828, "grad_norm": 2.846350908279419, "learning_rate": 5.977190652312474e-05, "loss": 1.2398, "step": 14866 }, { "epoch": 0.886160448205984, "grad_norm": 2.741870403289795, "learning_rate": 5.976263178402138e-05, "loss": 1.2629, "step": 14868 }, { "epoch": 0.88627965192514, "grad_norm": 2.9028737545013428, "learning_rate": 5.975335669567528e-05, "loss": 1.1995, "step": 14870 }, { "epoch": 0.8863988556442961, "grad_norm": 2.9476118087768555, "learning_rate": 5.974408125841824e-05, "loss": 1.1643, "step": 14872 }, { "epoch": 0.8865180593634522, "grad_norm": 2.991964101791382, "learning_rate": 5.973480547258208e-05, "loss": 1.2735, "step": 14874 }, { "epoch": 0.8866372630826082, "grad_norm": 2.942568778991699, "learning_rate": 5.972552933849862e-05, "loss": 1.3145, "step": 14876 }, { "epoch": 0.8867564668017642, "grad_norm": 3.2482612133026123, "learning_rate": 5.97162528564997e-05, "loss": 1.3104, "step": 14878 }, { "epoch": 0.8868756705209202, "grad_norm": 3.085080623626709, "learning_rate": 5.970697602691717e-05, "loss": 1.4296, "step": 14880 }, { "epoch": 0.8869948742400763, "grad_norm": 2.970402479171753, "learning_rate": 5.969769885008289e-05, "loss": 1.2486, "step": 14882 }, { "epoch": 0.8871140779592324, "grad_norm": 3.138028144836426, "learning_rate": 5.968842132632875e-05, "loss": 1.1822, "step": 14884 }, { "epoch": 0.8872332816783883, "grad_norm": 3.012903928756714, "learning_rate": 5.967914345598663e-05, "loss": 1.3403, "step": 14886 }, { "epoch": 0.8873524853975444, "grad_norm": 3.174286365509033, "learning_rate": 5.966986523938843e-05, "loss": 1.2914, "step": 14888 }, { "epoch": 0.8874716891167005, "grad_norm": 3.3534207344055176, "learning_rate": 5.966058667686606e-05, "loss": 1.351, "step": 14890 }, { "epoch": 0.8875908928358565, "grad_norm": 3.207479953765869, "learning_rate": 5.9651307768751465e-05, "loss": 1.2808, "step": 14892 }, { "epoch": 0.8877100965550125, "grad_norm": 3.214101552963257, "learning_rate": 5.9642028515376566e-05, "loss": 1.4336, "step": 14894 }, { "epoch": 0.8878293002741685, "grad_norm": 3.4142186641693115, "learning_rate": 5.96327489170733e-05, "loss": 1.2366, "step": 14896 }, { "epoch": 0.8879485039933246, "grad_norm": 2.9126760959625244, "learning_rate": 5.962346897417366e-05, "loss": 1.2943, "step": 14898 }, { "epoch": 0.8880677077124807, "grad_norm": 3.158895492553711, "learning_rate": 5.961418868700961e-05, "loss": 1.4027, "step": 14900 }, { "epoch": 0.8881869114316366, "grad_norm": 3.1002142429351807, "learning_rate": 5.9604908055913124e-05, "loss": 1.3423, "step": 14902 }, { "epoch": 0.8883061151507927, "grad_norm": 3.1214277744293213, "learning_rate": 5.959562708121622e-05, "loss": 1.1997, "step": 14904 }, { "epoch": 0.8884253188699487, "grad_norm": 3.403830051422119, "learning_rate": 5.9586345763250925e-05, "loss": 1.2735, "step": 14906 }, { "epoch": 0.8885445225891048, "grad_norm": 3.0048863887786865, "learning_rate": 5.957706410234921e-05, "loss": 1.2003, "step": 14908 }, { "epoch": 0.8886637263082608, "grad_norm": 2.90708065032959, "learning_rate": 5.9567782098843175e-05, "loss": 1.3338, "step": 14910 }, { "epoch": 0.8887829300274168, "grad_norm": 3.3757941722869873, "learning_rate": 5.955849975306482e-05, "loss": 1.2298, "step": 14912 }, { "epoch": 0.8889021337465729, "grad_norm": 3.3794922828674316, "learning_rate": 5.9549217065346216e-05, "loss": 1.3997, "step": 14914 }, { "epoch": 0.889021337465729, "grad_norm": 3.201664924621582, "learning_rate": 5.953993403601946e-05, "loss": 1.2564, "step": 14916 }, { "epoch": 0.889140541184885, "grad_norm": 3.2699198722839355, "learning_rate": 5.953065066541662e-05, "loss": 1.3198, "step": 14918 }, { "epoch": 0.889259744904041, "grad_norm": 3.139646053314209, "learning_rate": 5.9521366953869805e-05, "loss": 1.3654, "step": 14920 }, { "epoch": 0.889378948623197, "grad_norm": 3.1343047618865967, "learning_rate": 5.9512082901711106e-05, "loss": 1.5245, "step": 14922 }, { "epoch": 0.8894981523423531, "grad_norm": 3.240347146987915, "learning_rate": 5.950279850927266e-05, "loss": 1.3066, "step": 14924 }, { "epoch": 0.8896173560615092, "grad_norm": 2.7847023010253906, "learning_rate": 5.94935137768866e-05, "loss": 1.2833, "step": 14926 }, { "epoch": 0.8897365597806651, "grad_norm": 3.175248146057129, "learning_rate": 5.9484228704885056e-05, "loss": 1.2443, "step": 14928 }, { "epoch": 0.8898557634998212, "grad_norm": 3.098127603530884, "learning_rate": 5.947494329360021e-05, "loss": 1.2444, "step": 14930 }, { "epoch": 0.8899749672189773, "grad_norm": 2.9324686527252197, "learning_rate": 5.946565754336424e-05, "loss": 1.2535, "step": 14932 }, { "epoch": 0.8900941709381333, "grad_norm": 3.4425559043884277, "learning_rate": 5.945637145450928e-05, "loss": 1.2701, "step": 14934 }, { "epoch": 0.8902133746572893, "grad_norm": 3.1765758991241455, "learning_rate": 5.94470850273676e-05, "loss": 1.2468, "step": 14936 }, { "epoch": 0.8903325783764453, "grad_norm": 3.1113433837890625, "learning_rate": 5.943779826227135e-05, "loss": 1.4118, "step": 14938 }, { "epoch": 0.8904517820956014, "grad_norm": 3.085136890411377, "learning_rate": 5.942851115955275e-05, "loss": 1.1979, "step": 14940 }, { "epoch": 0.8905709858147575, "grad_norm": 3.194791793823242, "learning_rate": 5.941922371954406e-05, "loss": 1.2639, "step": 14942 }, { "epoch": 0.8906901895339134, "grad_norm": 3.0816168785095215, "learning_rate": 5.940993594257751e-05, "loss": 1.3382, "step": 14944 }, { "epoch": 0.8908093932530695, "grad_norm": 2.975353956222534, "learning_rate": 5.940064782898534e-05, "loss": 1.2466, "step": 14946 }, { "epoch": 0.8909285969722255, "grad_norm": 3.0031211376190186, "learning_rate": 5.939135937909985e-05, "loss": 1.3325, "step": 14948 }, { "epoch": 0.8910478006913816, "grad_norm": 3.1966278553009033, "learning_rate": 5.938207059325329e-05, "loss": 1.2119, "step": 14950 }, { "epoch": 0.8911670044105376, "grad_norm": 2.721402645111084, "learning_rate": 5.937278147177798e-05, "loss": 1.2592, "step": 14952 }, { "epoch": 0.8912862081296936, "grad_norm": 3.0581064224243164, "learning_rate": 5.9363492015006205e-05, "loss": 1.2674, "step": 14954 }, { "epoch": 0.8914054118488497, "grad_norm": 3.6539039611816406, "learning_rate": 5.935420222327028e-05, "loss": 1.3371, "step": 14956 }, { "epoch": 0.8915246155680058, "grad_norm": 3.347041368484497, "learning_rate": 5.9344912096902535e-05, "loss": 1.2933, "step": 14958 }, { "epoch": 0.8916438192871617, "grad_norm": 3.0959701538085938, "learning_rate": 5.9335621636235306e-05, "loss": 1.17, "step": 14960 }, { "epoch": 0.8917630230063178, "grad_norm": 3.3487281799316406, "learning_rate": 5.932633084160095e-05, "loss": 1.2143, "step": 14962 }, { "epoch": 0.8918822267254738, "grad_norm": 2.9741392135620117, "learning_rate": 5.931703971333185e-05, "loss": 1.1186, "step": 14964 }, { "epoch": 0.8920014304446299, "grad_norm": 2.920914649963379, "learning_rate": 5.930774825176034e-05, "loss": 1.1413, "step": 14966 }, { "epoch": 0.892120634163786, "grad_norm": 3.053100109100342, "learning_rate": 5.9298456457218845e-05, "loss": 1.3502, "step": 14968 }, { "epoch": 0.8922398378829419, "grad_norm": 2.9074795246124268, "learning_rate": 5.928916433003974e-05, "loss": 1.2812, "step": 14970 }, { "epoch": 0.892359041602098, "grad_norm": 3.6963987350463867, "learning_rate": 5.9279871870555434e-05, "loss": 1.2811, "step": 14972 }, { "epoch": 0.892478245321254, "grad_norm": 3.314669609069824, "learning_rate": 5.9270579079098366e-05, "loss": 1.3662, "step": 14974 }, { "epoch": 0.89259744904041, "grad_norm": 3.303596258163452, "learning_rate": 5.926128595600098e-05, "loss": 1.2941, "step": 14976 }, { "epoch": 0.8927166527595661, "grad_norm": 2.9979960918426514, "learning_rate": 5.9251992501595685e-05, "loss": 1.302, "step": 14978 }, { "epoch": 0.8928358564787221, "grad_norm": 3.034289598464966, "learning_rate": 5.924269871621497e-05, "loss": 1.2768, "step": 14980 }, { "epoch": 0.8929550601978782, "grad_norm": 2.841207504272461, "learning_rate": 5.923340460019132e-05, "loss": 1.2531, "step": 14982 }, { "epoch": 0.8930742639170343, "grad_norm": 3.0676820278167725, "learning_rate": 5.9224110153857174e-05, "loss": 1.1713, "step": 14984 }, { "epoch": 0.8931934676361902, "grad_norm": 2.945582866668701, "learning_rate": 5.921481537754505e-05, "loss": 1.2289, "step": 14986 }, { "epoch": 0.8933126713553463, "grad_norm": 2.8312265872955322, "learning_rate": 5.920552027158747e-05, "loss": 1.2845, "step": 14988 }, { "epoch": 0.8934318750745023, "grad_norm": 3.4757115840911865, "learning_rate": 5.9196224836316916e-05, "loss": 1.3895, "step": 14990 }, { "epoch": 0.8935510787936584, "grad_norm": 2.6735334396362305, "learning_rate": 5.918692907206592e-05, "loss": 1.1629, "step": 14992 }, { "epoch": 0.8936702825128144, "grad_norm": 2.8768038749694824, "learning_rate": 5.9177632979167066e-05, "loss": 1.1465, "step": 14994 }, { "epoch": 0.8937894862319704, "grad_norm": 3.2369306087493896, "learning_rate": 5.916833655795287e-05, "loss": 1.309, "step": 14996 }, { "epoch": 0.8939086899511265, "grad_norm": 3.27693247795105, "learning_rate": 5.915903980875591e-05, "loss": 1.1435, "step": 14998 }, { "epoch": 0.8940278936702825, "grad_norm": 3.258014678955078, "learning_rate": 5.914974273190875e-05, "loss": 1.3728, "step": 15000 }, { "epoch": 0.8941470973894385, "grad_norm": 2.9096052646636963, "learning_rate": 5.9140445327744e-05, "loss": 1.2332, "step": 15002 }, { "epoch": 0.8942663011085946, "grad_norm": 3.0527725219726562, "learning_rate": 5.913114759659423e-05, "loss": 1.3339, "step": 15004 }, { "epoch": 0.8943855048277506, "grad_norm": 3.006009101867676, "learning_rate": 5.9121849538792075e-05, "loss": 1.3147, "step": 15006 }, { "epoch": 0.8945047085469067, "grad_norm": 3.5298333168029785, "learning_rate": 5.911255115467016e-05, "loss": 1.3561, "step": 15008 }, { "epoch": 0.8946239122660627, "grad_norm": 3.3512299060821533, "learning_rate": 5.91032524445611e-05, "loss": 1.4582, "step": 15010 }, { "epoch": 0.8947431159852187, "grad_norm": 3.064276933670044, "learning_rate": 5.909395340879757e-05, "loss": 1.2212, "step": 15012 }, { "epoch": 0.8948623197043748, "grad_norm": 3.2891287803649902, "learning_rate": 5.9084654047712195e-05, "loss": 1.2812, "step": 15014 }, { "epoch": 0.8949815234235308, "grad_norm": 3.2462003231048584, "learning_rate": 5.907535436163767e-05, "loss": 1.2256, "step": 15016 }, { "epoch": 0.8951007271426868, "grad_norm": 3.2900664806365967, "learning_rate": 5.906605435090666e-05, "loss": 1.3698, "step": 15018 }, { "epoch": 0.8952199308618429, "grad_norm": 2.9239537715911865, "learning_rate": 5.905675401585188e-05, "loss": 1.3064, "step": 15020 }, { "epoch": 0.8953391345809989, "grad_norm": 3.0244224071502686, "learning_rate": 5.9047453356806014e-05, "loss": 1.2128, "step": 15022 }, { "epoch": 0.895458338300155, "grad_norm": 3.352764368057251, "learning_rate": 5.903815237410177e-05, "loss": 1.2217, "step": 15024 }, { "epoch": 0.8955775420193111, "grad_norm": 2.989612340927124, "learning_rate": 5.902885106807193e-05, "loss": 1.2798, "step": 15026 }, { "epoch": 0.895696745738467, "grad_norm": 2.7444376945495605, "learning_rate": 5.901954943904916e-05, "loss": 1.1185, "step": 15028 }, { "epoch": 0.8958159494576231, "grad_norm": 3.3348352909088135, "learning_rate": 5.9010247487366266e-05, "loss": 1.3116, "step": 15030 }, { "epoch": 0.8959351531767791, "grad_norm": 2.9261085987091064, "learning_rate": 5.900094521335599e-05, "loss": 1.3665, "step": 15032 }, { "epoch": 0.8960543568959352, "grad_norm": 3.4823474884033203, "learning_rate": 5.899164261735111e-05, "loss": 1.1522, "step": 15034 }, { "epoch": 0.8961735606150912, "grad_norm": 2.8625686168670654, "learning_rate": 5.898233969968438e-05, "loss": 1.2459, "step": 15036 }, { "epoch": 0.8962927643342472, "grad_norm": 3.1618196964263916, "learning_rate": 5.8973036460688655e-05, "loss": 1.343, "step": 15038 }, { "epoch": 0.8964119680534033, "grad_norm": 2.890392780303955, "learning_rate": 5.896373290069671e-05, "loss": 1.2445, "step": 15040 }, { "epoch": 0.8965311717725593, "grad_norm": 2.7245614528656006, "learning_rate": 5.895442902004137e-05, "loss": 1.2767, "step": 15042 }, { "epoch": 0.8966503754917153, "grad_norm": 3.312976121902466, "learning_rate": 5.8945124819055474e-05, "loss": 1.3653, "step": 15044 }, { "epoch": 0.8967695792108714, "grad_norm": 3.149735689163208, "learning_rate": 5.893582029807184e-05, "loss": 1.2145, "step": 15046 }, { "epoch": 0.8968887829300274, "grad_norm": 3.0042848587036133, "learning_rate": 5.8926515457423345e-05, "loss": 1.2033, "step": 15048 }, { "epoch": 0.8970079866491835, "grad_norm": 3.074526071548462, "learning_rate": 5.891721029744286e-05, "loss": 1.1317, "step": 15050 }, { "epoch": 0.8971271903683395, "grad_norm": 2.7973313331604004, "learning_rate": 5.890790481846325e-05, "loss": 1.3542, "step": 15052 }, { "epoch": 0.8972463940874955, "grad_norm": 3.68938946723938, "learning_rate": 5.88985990208174e-05, "loss": 1.2887, "step": 15054 }, { "epoch": 0.8973655978066516, "grad_norm": 2.8177413940429688, "learning_rate": 5.888929290483821e-05, "loss": 1.3212, "step": 15056 }, { "epoch": 0.8974848015258076, "grad_norm": 3.074862003326416, "learning_rate": 5.887998647085863e-05, "loss": 1.2812, "step": 15058 }, { "epoch": 0.8976040052449636, "grad_norm": 3.1106600761413574, "learning_rate": 5.887067971921153e-05, "loss": 1.2587, "step": 15060 }, { "epoch": 0.8977232089641197, "grad_norm": 3.013030767440796, "learning_rate": 5.886137265022985e-05, "loss": 1.3285, "step": 15062 }, { "epoch": 0.8978424126832757, "grad_norm": 3.2160794734954834, "learning_rate": 5.885206526424658e-05, "loss": 1.2829, "step": 15064 }, { "epoch": 0.8979616164024318, "grad_norm": 2.943115234375, "learning_rate": 5.884275756159463e-05, "loss": 1.1793, "step": 15066 }, { "epoch": 0.8980808201215877, "grad_norm": 3.2814552783966064, "learning_rate": 5.883344954260699e-05, "loss": 1.4, "step": 15068 }, { "epoch": 0.8982000238407438, "grad_norm": 4.383947849273682, "learning_rate": 5.8824141207616636e-05, "loss": 1.2095, "step": 15070 }, { "epoch": 0.8983192275598999, "grad_norm": 2.6673741340637207, "learning_rate": 5.881483255695658e-05, "loss": 1.1518, "step": 15072 }, { "epoch": 0.8984384312790559, "grad_norm": 3.238901376724243, "learning_rate": 5.880552359095978e-05, "loss": 1.2796, "step": 15074 }, { "epoch": 0.898557634998212, "grad_norm": 3.1335701942443848, "learning_rate": 5.8796214309959276e-05, "loss": 1.3642, "step": 15076 }, { "epoch": 0.898676838717368, "grad_norm": 2.7762296199798584, "learning_rate": 5.8786904714288094e-05, "loss": 1.1926, "step": 15078 }, { "epoch": 0.898796042436524, "grad_norm": 3.4214224815368652, "learning_rate": 5.8777594804279266e-05, "loss": 1.5172, "step": 15080 }, { "epoch": 0.8989152461556801, "grad_norm": 3.2912135124206543, "learning_rate": 5.8768284580265844e-05, "loss": 1.2644, "step": 15082 }, { "epoch": 0.899034449874836, "grad_norm": 2.7692222595214844, "learning_rate": 5.8758974042580875e-05, "loss": 1.1774, "step": 15084 }, { "epoch": 0.8991536535939921, "grad_norm": 3.4128940105438232, "learning_rate": 5.874966319155743e-05, "loss": 1.3049, "step": 15086 }, { "epoch": 0.8992728573131482, "grad_norm": 3.2781729698181152, "learning_rate": 5.8740352027528625e-05, "loss": 1.2879, "step": 15088 }, { "epoch": 0.8993920610323042, "grad_norm": 3.4312779903411865, "learning_rate": 5.87310405508275e-05, "loss": 1.3104, "step": 15090 }, { "epoch": 0.8995112647514603, "grad_norm": 3.152648687362671, "learning_rate": 5.8721728761787186e-05, "loss": 1.3408, "step": 15092 }, { "epoch": 0.8996304684706162, "grad_norm": 2.9223170280456543, "learning_rate": 5.871241666074079e-05, "loss": 1.3575, "step": 15094 }, { "epoch": 0.8997496721897723, "grad_norm": 3.0424225330352783, "learning_rate": 5.870310424802143e-05, "loss": 1.1837, "step": 15096 }, { "epoch": 0.8998688759089284, "grad_norm": 3.4768128395080566, "learning_rate": 5.869379152396228e-05, "loss": 1.2992, "step": 15098 }, { "epoch": 0.8999880796280844, "grad_norm": 3.0199968814849854, "learning_rate": 5.868447848889643e-05, "loss": 1.1779, "step": 15100 }, { "epoch": 0.9001072833472404, "grad_norm": 3.132741928100586, "learning_rate": 5.867516514315711e-05, "loss": 1.3175, "step": 15102 }, { "epoch": 0.9002264870663965, "grad_norm": 3.1710660457611084, "learning_rate": 5.866585148707743e-05, "loss": 1.3378, "step": 15104 }, { "epoch": 0.9003456907855525, "grad_norm": 3.202300548553467, "learning_rate": 5.8656537520990583e-05, "loss": 1.3427, "step": 15106 }, { "epoch": 0.9004648945047086, "grad_norm": 3.2919063568115234, "learning_rate": 5.864722324522979e-05, "loss": 1.2417, "step": 15108 }, { "epoch": 0.9005840982238645, "grad_norm": 3.3996803760528564, "learning_rate": 5.863790866012823e-05, "loss": 1.3436, "step": 15110 }, { "epoch": 0.9007033019430206, "grad_norm": 3.365311622619629, "learning_rate": 5.86285937660191e-05, "loss": 1.4104, "step": 15112 }, { "epoch": 0.9008225056621767, "grad_norm": 3.2857580184936523, "learning_rate": 5.8619278563235666e-05, "loss": 1.3633, "step": 15114 }, { "epoch": 0.9009417093813327, "grad_norm": 3.1130459308624268, "learning_rate": 5.860996305211116e-05, "loss": 1.2034, "step": 15116 }, { "epoch": 0.9010609131004887, "grad_norm": 4.026137828826904, "learning_rate": 5.86006472329788e-05, "loss": 1.3395, "step": 15118 }, { "epoch": 0.9011801168196448, "grad_norm": 3.011444568634033, "learning_rate": 5.8591331106171874e-05, "loss": 1.2153, "step": 15120 }, { "epoch": 0.9012993205388008, "grad_norm": 3.704615354537964, "learning_rate": 5.8582014672023635e-05, "loss": 1.2598, "step": 15122 }, { "epoch": 0.9014185242579569, "grad_norm": 3.4356167316436768, "learning_rate": 5.857269793086737e-05, "loss": 1.3576, "step": 15124 }, { "epoch": 0.9015377279771128, "grad_norm": 2.9427871704101562, "learning_rate": 5.8563380883036355e-05, "loss": 1.1855, "step": 15126 }, { "epoch": 0.9016569316962689, "grad_norm": 3.197446584701538, "learning_rate": 5.855406352886392e-05, "loss": 1.4682, "step": 15128 }, { "epoch": 0.901776135415425, "grad_norm": 2.9395670890808105, "learning_rate": 5.854474586868336e-05, "loss": 1.2733, "step": 15130 }, { "epoch": 0.901895339134581, "grad_norm": 2.599946975708008, "learning_rate": 5.853542790282801e-05, "loss": 1.0946, "step": 15132 }, { "epoch": 0.9020145428537371, "grad_norm": 2.5547473430633545, "learning_rate": 5.85261096316312e-05, "loss": 1.1513, "step": 15134 }, { "epoch": 0.902133746572893, "grad_norm": 3.418689727783203, "learning_rate": 5.851679105542627e-05, "loss": 1.3954, "step": 15136 }, { "epoch": 0.9022529502920491, "grad_norm": 3.049546480178833, "learning_rate": 5.850747217454659e-05, "loss": 1.2171, "step": 15138 }, { "epoch": 0.9023721540112052, "grad_norm": 2.6392312049865723, "learning_rate": 5.8498152989325514e-05, "loss": 1.1724, "step": 15140 }, { "epoch": 0.9024913577303612, "grad_norm": 3.3139235973358154, "learning_rate": 5.848883350009644e-05, "loss": 1.2982, "step": 15142 }, { "epoch": 0.9026105614495172, "grad_norm": 2.8079299926757812, "learning_rate": 5.847951370719274e-05, "loss": 1.1986, "step": 15144 }, { "epoch": 0.9027297651686733, "grad_norm": 3.3291585445404053, "learning_rate": 5.847019361094782e-05, "loss": 1.1404, "step": 15146 }, { "epoch": 0.9028489688878293, "grad_norm": 3.1915125846862793, "learning_rate": 5.846087321169511e-05, "loss": 1.3278, "step": 15148 }, { "epoch": 0.9029681726069854, "grad_norm": 3.2175133228302, "learning_rate": 5.845155250976799e-05, "loss": 1.4499, "step": 15150 }, { "epoch": 0.9030873763261413, "grad_norm": 3.016735792160034, "learning_rate": 5.8442231505499945e-05, "loss": 1.4238, "step": 15152 }, { "epoch": 0.9032065800452974, "grad_norm": 3.0943715572357178, "learning_rate": 5.843291019922439e-05, "loss": 1.3722, "step": 15154 }, { "epoch": 0.9033257837644535, "grad_norm": 2.8319218158721924, "learning_rate": 5.842358859127478e-05, "loss": 1.1898, "step": 15156 }, { "epoch": 0.9034449874836095, "grad_norm": 2.7979636192321777, "learning_rate": 5.8414266681984574e-05, "loss": 1.3091, "step": 15158 }, { "epoch": 0.9035641912027655, "grad_norm": 3.3610799312591553, "learning_rate": 5.840494447168727e-05, "loss": 1.378, "step": 15160 }, { "epoch": 0.9036833949219215, "grad_norm": 2.946685314178467, "learning_rate": 5.8395621960716354e-05, "loss": 1.4253, "step": 15162 }, { "epoch": 0.9038025986410776, "grad_norm": 3.0201284885406494, "learning_rate": 5.83862991494053e-05, "loss": 1.2736, "step": 15164 }, { "epoch": 0.9039218023602337, "grad_norm": 3.3805184364318848, "learning_rate": 5.837697603808764e-05, "loss": 1.4306, "step": 15166 }, { "epoch": 0.9040410060793896, "grad_norm": 3.711244821548462, "learning_rate": 5.8367652627096894e-05, "loss": 1.2552, "step": 15168 }, { "epoch": 0.9041602097985457, "grad_norm": 3.6846323013305664, "learning_rate": 5.835832891676656e-05, "loss": 1.3141, "step": 15170 }, { "epoch": 0.9042794135177018, "grad_norm": 3.141155958175659, "learning_rate": 5.8349004907430216e-05, "loss": 1.2356, "step": 15172 }, { "epoch": 0.9043986172368578, "grad_norm": 3.2444727420806885, "learning_rate": 5.833968059942141e-05, "loss": 1.2404, "step": 15174 }, { "epoch": 0.9045178209560139, "grad_norm": 3.067671537399292, "learning_rate": 5.833035599307367e-05, "loss": 1.2215, "step": 15176 }, { "epoch": 0.9046370246751698, "grad_norm": 2.7779011726379395, "learning_rate": 5.832103108872062e-05, "loss": 1.2053, "step": 15178 }, { "epoch": 0.9047562283943259, "grad_norm": 2.7182908058166504, "learning_rate": 5.8311705886695814e-05, "loss": 1.2506, "step": 15180 }, { "epoch": 0.904875432113482, "grad_norm": 3.114769697189331, "learning_rate": 5.830238038733283e-05, "loss": 1.2787, "step": 15182 }, { "epoch": 0.904994635832638, "grad_norm": 2.737308979034424, "learning_rate": 5.829305459096531e-05, "loss": 1.332, "step": 15184 }, { "epoch": 0.905113839551794, "grad_norm": 3.565666675567627, "learning_rate": 5.828372849792686e-05, "loss": 1.2683, "step": 15186 }, { "epoch": 0.90523304327095, "grad_norm": 3.0440196990966797, "learning_rate": 5.8274402108551085e-05, "loss": 1.4608, "step": 15188 }, { "epoch": 0.9053522469901061, "grad_norm": 2.8666718006134033, "learning_rate": 5.8265075423171633e-05, "loss": 1.2638, "step": 15190 }, { "epoch": 0.9054714507092622, "grad_norm": 2.9581832885742188, "learning_rate": 5.825574844212218e-05, "loss": 1.301, "step": 15192 }, { "epoch": 0.9055906544284181, "grad_norm": 2.92760968208313, "learning_rate": 5.824642116573633e-05, "loss": 1.2264, "step": 15194 }, { "epoch": 0.9057098581475742, "grad_norm": 2.798567533493042, "learning_rate": 5.823709359434779e-05, "loss": 1.2183, "step": 15196 }, { "epoch": 0.9058290618667303, "grad_norm": 3.1737208366394043, "learning_rate": 5.822776572829024e-05, "loss": 1.3536, "step": 15198 }, { "epoch": 0.9059482655858863, "grad_norm": 3.0986478328704834, "learning_rate": 5.8218437567897345e-05, "loss": 1.5414, "step": 15200 }, { "epoch": 0.9060674693050423, "grad_norm": 3.366485357284546, "learning_rate": 5.820910911350281e-05, "loss": 1.2767, "step": 15202 }, { "epoch": 0.9061866730241983, "grad_norm": 3.341937303543091, "learning_rate": 5.819978036544038e-05, "loss": 1.2656, "step": 15204 }, { "epoch": 0.9063058767433544, "grad_norm": 3.2856006622314453, "learning_rate": 5.819045132404374e-05, "loss": 1.3552, "step": 15206 }, { "epoch": 0.9064250804625105, "grad_norm": 3.006234884262085, "learning_rate": 5.818112198964664e-05, "loss": 1.2959, "step": 15208 }, { "epoch": 0.9065442841816664, "grad_norm": 3.0527944564819336, "learning_rate": 5.81717923625828e-05, "loss": 1.423, "step": 15210 }, { "epoch": 0.9066634879008225, "grad_norm": 3.177258253097534, "learning_rate": 5.8162462443185995e-05, "loss": 1.3819, "step": 15212 }, { "epoch": 0.9067826916199786, "grad_norm": 2.972639799118042, "learning_rate": 5.8153132231789975e-05, "loss": 1.1798, "step": 15214 }, { "epoch": 0.9069018953391346, "grad_norm": 3.1304843425750732, "learning_rate": 5.814380172872853e-05, "loss": 1.3518, "step": 15216 }, { "epoch": 0.9070210990582906, "grad_norm": 3.1595890522003174, "learning_rate": 5.813447093433543e-05, "loss": 1.1746, "step": 15218 }, { "epoch": 0.9071403027774466, "grad_norm": 3.1085574626922607, "learning_rate": 5.812513984894447e-05, "loss": 1.2285, "step": 15220 }, { "epoch": 0.9072595064966027, "grad_norm": 2.8077621459960938, "learning_rate": 5.811580847288947e-05, "loss": 1.1812, "step": 15222 }, { "epoch": 0.9073787102157588, "grad_norm": 2.7223598957061768, "learning_rate": 5.8106476806504214e-05, "loss": 1.2037, "step": 15224 }, { "epoch": 0.9074979139349147, "grad_norm": 3.149320125579834, "learning_rate": 5.809714485012254e-05, "loss": 1.1662, "step": 15226 }, { "epoch": 0.9076171176540708, "grad_norm": 3.0015976428985596, "learning_rate": 5.808781260407832e-05, "loss": 1.2826, "step": 15228 }, { "epoch": 0.9077363213732268, "grad_norm": 3.028233766555786, "learning_rate": 5.807848006870535e-05, "loss": 1.2493, "step": 15230 }, { "epoch": 0.9078555250923829, "grad_norm": 3.081735610961914, "learning_rate": 5.806914724433752e-05, "loss": 1.257, "step": 15232 }, { "epoch": 0.907974728811539, "grad_norm": 3.6782243251800537, "learning_rate": 5.805981413130867e-05, "loss": 1.2719, "step": 15234 }, { "epoch": 0.9080939325306949, "grad_norm": 3.4604012966156006, "learning_rate": 5.8050480729952694e-05, "loss": 1.4288, "step": 15236 }, { "epoch": 0.908213136249851, "grad_norm": 3.2553818225860596, "learning_rate": 5.8041147040603504e-05, "loss": 1.1786, "step": 15238 }, { "epoch": 0.9083323399690071, "grad_norm": 3.568058490753174, "learning_rate": 5.803181306359494e-05, "loss": 1.4181, "step": 15240 }, { "epoch": 0.908451543688163, "grad_norm": 3.180198907852173, "learning_rate": 5.802247879926096e-05, "loss": 1.3694, "step": 15242 }, { "epoch": 0.9085707474073191, "grad_norm": 3.182067632675171, "learning_rate": 5.801314424793546e-05, "loss": 1.2636, "step": 15244 }, { "epoch": 0.9086899511264751, "grad_norm": 3.1158792972564697, "learning_rate": 5.800380940995236e-05, "loss": 1.235, "step": 15246 }, { "epoch": 0.9088091548456312, "grad_norm": 3.132369041442871, "learning_rate": 5.799447428564563e-05, "loss": 1.3205, "step": 15248 }, { "epoch": 0.9089283585647873, "grad_norm": 3.0923948287963867, "learning_rate": 5.7985138875349196e-05, "loss": 1.2161, "step": 15250 }, { "epoch": 0.9090475622839432, "grad_norm": 3.0621981620788574, "learning_rate": 5.7975803179397034e-05, "loss": 1.303, "step": 15252 }, { "epoch": 0.9091667660030993, "grad_norm": 3.2923951148986816, "learning_rate": 5.79664671981231e-05, "loss": 1.1908, "step": 15254 }, { "epoch": 0.9092859697222553, "grad_norm": 3.4132564067840576, "learning_rate": 5.795713093186137e-05, "loss": 1.38, "step": 15256 }, { "epoch": 0.9094051734414114, "grad_norm": 2.8320505619049072, "learning_rate": 5.794779438094584e-05, "loss": 1.3096, "step": 15258 }, { "epoch": 0.9095243771605674, "grad_norm": 3.0178847312927246, "learning_rate": 5.7938457545710514e-05, "loss": 1.5144, "step": 15260 }, { "epoch": 0.9096435808797234, "grad_norm": 2.8596436977386475, "learning_rate": 5.7929120426489406e-05, "loss": 1.4131, "step": 15262 }, { "epoch": 0.9097627845988795, "grad_norm": 2.912071466445923, "learning_rate": 5.7919783023616526e-05, "loss": 1.1837, "step": 15264 }, { "epoch": 0.9098819883180356, "grad_norm": 3.278953790664673, "learning_rate": 5.79104453374259e-05, "loss": 1.4059, "step": 15266 }, { "epoch": 0.9100011920371915, "grad_norm": 3.0141730308532715, "learning_rate": 5.7901107368251605e-05, "loss": 1.2344, "step": 15268 }, { "epoch": 0.9101203957563476, "grad_norm": 3.2967300415039062, "learning_rate": 5.789176911642765e-05, "loss": 1.3166, "step": 15270 }, { "epoch": 0.9102395994755036, "grad_norm": 3.4668588638305664, "learning_rate": 5.7882430582288106e-05, "loss": 1.3167, "step": 15272 }, { "epoch": 0.9103588031946597, "grad_norm": 3.23982310295105, "learning_rate": 5.7873091766167066e-05, "loss": 1.3129, "step": 15274 }, { "epoch": 0.9104780069138158, "grad_norm": 3.3068809509277344, "learning_rate": 5.7863752668398586e-05, "loss": 1.1613, "step": 15276 }, { "epoch": 0.9105972106329717, "grad_norm": 2.9370834827423096, "learning_rate": 5.7854413289316756e-05, "loss": 1.3137, "step": 15278 }, { "epoch": 0.9107164143521278, "grad_norm": 2.7353055477142334, "learning_rate": 5.784507362925571e-05, "loss": 1.2978, "step": 15280 }, { "epoch": 0.9108356180712838, "grad_norm": 3.121877908706665, "learning_rate": 5.783573368854954e-05, "loss": 1.2471, "step": 15282 }, { "epoch": 0.9109548217904399, "grad_norm": 3.013842821121216, "learning_rate": 5.7826393467532344e-05, "loss": 1.4151, "step": 15284 }, { "epoch": 0.9110740255095959, "grad_norm": 3.0015921592712402, "learning_rate": 5.78170529665383e-05, "loss": 1.2141, "step": 15286 }, { "epoch": 0.9111932292287519, "grad_norm": 3.1073334217071533, "learning_rate": 5.780771218590152e-05, "loss": 1.4214, "step": 15288 }, { "epoch": 0.911312432947908, "grad_norm": 2.958012819290161, "learning_rate": 5.779837112595615e-05, "loss": 1.1984, "step": 15290 }, { "epoch": 0.9114316366670641, "grad_norm": 3.0303032398223877, "learning_rate": 5.778902978703636e-05, "loss": 1.2434, "step": 15292 }, { "epoch": 0.91155084038622, "grad_norm": 3.447953701019287, "learning_rate": 5.777968816947634e-05, "loss": 1.2691, "step": 15294 }, { "epoch": 0.9116700441053761, "grad_norm": 2.9384078979492188, "learning_rate": 5.7770346273610254e-05, "loss": 1.4318, "step": 15296 }, { "epoch": 0.9117892478245321, "grad_norm": 3.1570510864257812, "learning_rate": 5.77610040997723e-05, "loss": 1.3374, "step": 15298 }, { "epoch": 0.9119084515436882, "grad_norm": 3.242793321609497, "learning_rate": 5.775166164829666e-05, "loss": 1.2998, "step": 15300 }, { "epoch": 0.9120276552628442, "grad_norm": 3.1540310382843018, "learning_rate": 5.774231891951757e-05, "loss": 1.2999, "step": 15302 }, { "epoch": 0.9121468589820002, "grad_norm": 3.0553531646728516, "learning_rate": 5.7732975913769226e-05, "loss": 1.3062, "step": 15304 }, { "epoch": 0.9122660627011563, "grad_norm": 3.5247137546539307, "learning_rate": 5.772363263138589e-05, "loss": 1.3581, "step": 15306 }, { "epoch": 0.9123852664203124, "grad_norm": 3.3251776695251465, "learning_rate": 5.77142890727018e-05, "loss": 1.3718, "step": 15308 }, { "epoch": 0.9125044701394683, "grad_norm": 3.649008274078369, "learning_rate": 5.7704945238051176e-05, "loss": 1.3691, "step": 15310 }, { "epoch": 0.9126236738586244, "grad_norm": 3.026320695877075, "learning_rate": 5.7695601127768306e-05, "loss": 1.3886, "step": 15312 }, { "epoch": 0.9127428775777804, "grad_norm": 2.8757190704345703, "learning_rate": 5.768625674218747e-05, "loss": 1.235, "step": 15314 }, { "epoch": 0.9128620812969365, "grad_norm": 2.8564822673797607, "learning_rate": 5.7676912081642907e-05, "loss": 1.1328, "step": 15316 }, { "epoch": 0.9129812850160925, "grad_norm": 3.0785253047943115, "learning_rate": 5.766756714646895e-05, "loss": 1.2578, "step": 15318 }, { "epoch": 0.9131004887352485, "grad_norm": 3.3442182540893555, "learning_rate": 5.765822193699987e-05, "loss": 1.2823, "step": 15320 }, { "epoch": 0.9132196924544046, "grad_norm": 3.280653476715088, "learning_rate": 5.764887645357e-05, "loss": 1.3638, "step": 15322 }, { "epoch": 0.9133388961735606, "grad_norm": 2.8679964542388916, "learning_rate": 5.763953069651363e-05, "loss": 1.2128, "step": 15324 }, { "epoch": 0.9134580998927166, "grad_norm": 2.934523582458496, "learning_rate": 5.763018466616512e-05, "loss": 1.2985, "step": 15326 }, { "epoch": 0.9135773036118727, "grad_norm": 2.8405089378356934, "learning_rate": 5.76208383628588e-05, "loss": 1.1226, "step": 15328 }, { "epoch": 0.9136965073310287, "grad_norm": 2.9449453353881836, "learning_rate": 5.761149178692902e-05, "loss": 1.2728, "step": 15330 }, { "epoch": 0.9138157110501848, "grad_norm": 3.329824924468994, "learning_rate": 5.760214493871013e-05, "loss": 1.1775, "step": 15332 }, { "epoch": 0.9139349147693409, "grad_norm": 3.065464735031128, "learning_rate": 5.7592797818536516e-05, "loss": 1.4826, "step": 15334 }, { "epoch": 0.9140541184884968, "grad_norm": 3.1710729598999023, "learning_rate": 5.758345042674252e-05, "loss": 1.3247, "step": 15336 }, { "epoch": 0.9141733222076529, "grad_norm": 3.0712389945983887, "learning_rate": 5.757410276366257e-05, "loss": 1.4222, "step": 15338 }, { "epoch": 0.9142925259268089, "grad_norm": 2.739948034286499, "learning_rate": 5.7564754829631064e-05, "loss": 1.2665, "step": 15340 }, { "epoch": 0.914411729645965, "grad_norm": 3.2308714389801025, "learning_rate": 5.755540662498238e-05, "loss": 1.2653, "step": 15342 }, { "epoch": 0.914530933365121, "grad_norm": 3.3629846572875977, "learning_rate": 5.7546058150050965e-05, "loss": 1.3531, "step": 15344 }, { "epoch": 0.914650137084277, "grad_norm": 3.2479395866394043, "learning_rate": 5.7536709405171216e-05, "loss": 1.2483, "step": 15346 }, { "epoch": 0.9147693408034331, "grad_norm": 3.504380226135254, "learning_rate": 5.752736039067759e-05, "loss": 1.2557, "step": 15348 }, { "epoch": 0.914888544522589, "grad_norm": 2.9875004291534424, "learning_rate": 5.751801110690452e-05, "loss": 1.2057, "step": 15350 }, { "epoch": 0.9150077482417451, "grad_norm": 2.779503583908081, "learning_rate": 5.7508661554186485e-05, "loss": 1.1032, "step": 15352 }, { "epoch": 0.9151269519609012, "grad_norm": 3.2620279788970947, "learning_rate": 5.749931173285793e-05, "loss": 1.2085, "step": 15354 }, { "epoch": 0.9152461556800572, "grad_norm": 3.1750168800354004, "learning_rate": 5.748996164325332e-05, "loss": 1.2517, "step": 15356 }, { "epoch": 0.9153653593992133, "grad_norm": 3.2497150897979736, "learning_rate": 5.7480611285707184e-05, "loss": 1.2844, "step": 15358 }, { "epoch": 0.9154845631183693, "grad_norm": 3.16459584236145, "learning_rate": 5.747126066055396e-05, "loss": 1.2719, "step": 15360 }, { "epoch": 0.9156037668375253, "grad_norm": 3.1493866443634033, "learning_rate": 5.74619097681282e-05, "loss": 1.2255, "step": 15362 }, { "epoch": 0.9157229705566814, "grad_norm": 3.2047007083892822, "learning_rate": 5.745255860876439e-05, "loss": 1.2566, "step": 15364 }, { "epoch": 0.9158421742758374, "grad_norm": 2.936655044555664, "learning_rate": 5.744320718279707e-05, "loss": 1.2434, "step": 15366 }, { "epoch": 0.9159613779949934, "grad_norm": 3.4261109828948975, "learning_rate": 5.743385549056074e-05, "loss": 1.2467, "step": 15368 }, { "epoch": 0.9160805817141495, "grad_norm": 3.3168671131134033, "learning_rate": 5.7424503532389976e-05, "loss": 1.416, "step": 15370 }, { "epoch": 0.9161997854333055, "grad_norm": 2.9386916160583496, "learning_rate": 5.741515130861932e-05, "loss": 1.2042, "step": 15372 }, { "epoch": 0.9163189891524616, "grad_norm": 3.164290428161621, "learning_rate": 5.740579881958333e-05, "loss": 1.3165, "step": 15374 }, { "epoch": 0.9164381928716175, "grad_norm": 4.627039909362793, "learning_rate": 5.7396446065616584e-05, "loss": 1.29, "step": 15376 }, { "epoch": 0.9165573965907736, "grad_norm": 3.1528706550598145, "learning_rate": 5.738709304705364e-05, "loss": 1.33, "step": 15378 }, { "epoch": 0.9166766003099297, "grad_norm": 2.933501958847046, "learning_rate": 5.737773976422911e-05, "loss": 1.1797, "step": 15380 }, { "epoch": 0.9167958040290857, "grad_norm": 3.162675380706787, "learning_rate": 5.7368386217477586e-05, "loss": 1.4062, "step": 15382 }, { "epoch": 0.9169150077482418, "grad_norm": 3.045681953430176, "learning_rate": 5.735903240713368e-05, "loss": 1.3224, "step": 15384 }, { "epoch": 0.9170342114673978, "grad_norm": 3.0503790378570557, "learning_rate": 5.734967833353201e-05, "loss": 1.3395, "step": 15386 }, { "epoch": 0.9171534151865538, "grad_norm": 3.0221352577209473, "learning_rate": 5.734032399700719e-05, "loss": 1.3296, "step": 15388 }, { "epoch": 0.9172726189057099, "grad_norm": 2.7901437282562256, "learning_rate": 5.7330969397893875e-05, "loss": 1.2959, "step": 15390 }, { "epoch": 0.9173918226248658, "grad_norm": 3.16208553314209, "learning_rate": 5.732161453652669e-05, "loss": 1.1862, "step": 15392 }, { "epoch": 0.9175110263440219, "grad_norm": 3.2252681255340576, "learning_rate": 5.731225941324031e-05, "loss": 1.2474, "step": 15394 }, { "epoch": 0.917630230063178, "grad_norm": 3.148399829864502, "learning_rate": 5.7302904028369384e-05, "loss": 1.3361, "step": 15396 }, { "epoch": 0.917749433782334, "grad_norm": 3.185823440551758, "learning_rate": 5.729354838224861e-05, "loss": 1.3739, "step": 15398 }, { "epoch": 0.9178686375014901, "grad_norm": 3.225965738296509, "learning_rate": 5.728419247521264e-05, "loss": 1.361, "step": 15400 }, { "epoch": 0.9179878412206461, "grad_norm": 3.0729000568389893, "learning_rate": 5.7274836307596194e-05, "loss": 1.2671, "step": 15402 }, { "epoch": 0.9181070449398021, "grad_norm": 2.8598718643188477, "learning_rate": 5.726547987973396e-05, "loss": 1.372, "step": 15404 }, { "epoch": 0.9182262486589582, "grad_norm": 2.9910998344421387, "learning_rate": 5.725612319196064e-05, "loss": 1.2625, "step": 15406 }, { "epoch": 0.9183454523781142, "grad_norm": 2.997756004333496, "learning_rate": 5.724676624461098e-05, "loss": 1.344, "step": 15408 }, { "epoch": 0.9184646560972702, "grad_norm": 3.252655267715454, "learning_rate": 5.7237409038019704e-05, "loss": 1.4542, "step": 15410 }, { "epoch": 0.9185838598164263, "grad_norm": 3.4022505283355713, "learning_rate": 5.7228051572521524e-05, "loss": 1.1968, "step": 15412 }, { "epoch": 0.9187030635355823, "grad_norm": 2.997609853744507, "learning_rate": 5.721869384845122e-05, "loss": 1.1671, "step": 15414 }, { "epoch": 0.9188222672547384, "grad_norm": 3.026045322418213, "learning_rate": 5.7209335866143546e-05, "loss": 1.179, "step": 15416 }, { "epoch": 0.9189414709738943, "grad_norm": 3.209826707839966, "learning_rate": 5.719997762593324e-05, "loss": 1.3844, "step": 15418 }, { "epoch": 0.9190606746930504, "grad_norm": 3.3699710369110107, "learning_rate": 5.719061912815512e-05, "loss": 1.3054, "step": 15420 }, { "epoch": 0.9191798784122065, "grad_norm": 3.0541889667510986, "learning_rate": 5.718126037314395e-05, "loss": 1.212, "step": 15422 }, { "epoch": 0.9192990821313625, "grad_norm": 3.679924964904785, "learning_rate": 5.7171901361234524e-05, "loss": 1.1899, "step": 15424 }, { "epoch": 0.9194182858505185, "grad_norm": 2.8120813369750977, "learning_rate": 5.716254209276163e-05, "loss": 1.2818, "step": 15426 }, { "epoch": 0.9195374895696746, "grad_norm": 3.1225409507751465, "learning_rate": 5.7153182568060116e-05, "loss": 1.2757, "step": 15428 }, { "epoch": 0.9196566932888306, "grad_norm": 3.3709335327148438, "learning_rate": 5.7143822787464786e-05, "loss": 1.3085, "step": 15430 }, { "epoch": 0.9197758970079867, "grad_norm": 3.298165798187256, "learning_rate": 5.713446275131047e-05, "loss": 1.1669, "step": 15432 }, { "epoch": 0.9198951007271426, "grad_norm": 3.4127418994903564, "learning_rate": 5.7125102459932024e-05, "loss": 1.4453, "step": 15434 }, { "epoch": 0.9200143044462987, "grad_norm": 2.8061118125915527, "learning_rate": 5.7115741913664264e-05, "loss": 1.055, "step": 15436 }, { "epoch": 0.9201335081654548, "grad_norm": 2.9179840087890625, "learning_rate": 5.7106381112842075e-05, "loss": 1.3365, "step": 15438 }, { "epoch": 0.9202527118846108, "grad_norm": 3.3813095092773438, "learning_rate": 5.709702005780032e-05, "loss": 1.3694, "step": 15440 }, { "epoch": 0.9203719156037669, "grad_norm": 3.117173194885254, "learning_rate": 5.708765874887389e-05, "loss": 1.2668, "step": 15442 }, { "epoch": 0.9204911193229228, "grad_norm": 3.1887848377227783, "learning_rate": 5.7078297186397634e-05, "loss": 1.3639, "step": 15444 }, { "epoch": 0.9206103230420789, "grad_norm": 3.2498087882995605, "learning_rate": 5.706893537070648e-05, "loss": 1.3694, "step": 15446 }, { "epoch": 0.920729526761235, "grad_norm": 2.9732589721679688, "learning_rate": 5.705957330213533e-05, "loss": 1.3239, "step": 15448 }, { "epoch": 0.920848730480391, "grad_norm": 2.9234583377838135, "learning_rate": 5.705021098101907e-05, "loss": 1.2281, "step": 15450 }, { "epoch": 0.920967934199547, "grad_norm": 3.4258971214294434, "learning_rate": 5.704084840769266e-05, "loss": 1.2561, "step": 15452 }, { "epoch": 0.9210871379187031, "grad_norm": 3.0963237285614014, "learning_rate": 5.703148558249101e-05, "loss": 1.3758, "step": 15454 }, { "epoch": 0.9212063416378591, "grad_norm": 2.9843101501464844, "learning_rate": 5.702212250574905e-05, "loss": 1.2411, "step": 15456 }, { "epoch": 0.9213255453570152, "grad_norm": 2.998056650161743, "learning_rate": 5.701275917780174e-05, "loss": 1.3968, "step": 15458 }, { "epoch": 0.9214447490761711, "grad_norm": 2.8610408306121826, "learning_rate": 5.7003395598984055e-05, "loss": 1.1764, "step": 15460 }, { "epoch": 0.9215639527953272, "grad_norm": 3.4219419956207275, "learning_rate": 5.699403176963094e-05, "loss": 1.2995, "step": 15462 }, { "epoch": 0.9216831565144833, "grad_norm": 2.8024652004241943, "learning_rate": 5.698466769007739e-05, "loss": 1.314, "step": 15464 }, { "epoch": 0.9218023602336393, "grad_norm": 2.9171714782714844, "learning_rate": 5.6975303360658373e-05, "loss": 1.3556, "step": 15466 }, { "epoch": 0.9219215639527953, "grad_norm": 2.9659907817840576, "learning_rate": 5.6965938781708886e-05, "loss": 1.1759, "step": 15468 }, { "epoch": 0.9220407676719513, "grad_norm": 3.128098249435425, "learning_rate": 5.695657395356392e-05, "loss": 1.3262, "step": 15470 }, { "epoch": 0.9221599713911074, "grad_norm": 3.1744306087493896, "learning_rate": 5.694720887655852e-05, "loss": 1.3064, "step": 15472 }, { "epoch": 0.9222791751102635, "grad_norm": 3.1352531909942627, "learning_rate": 5.6937843551027695e-05, "loss": 1.1473, "step": 15474 }, { "epoch": 0.9223983788294194, "grad_norm": 3.2173044681549072, "learning_rate": 5.692847797730644e-05, "loss": 1.2009, "step": 15476 }, { "epoch": 0.9225175825485755, "grad_norm": 2.9206066131591797, "learning_rate": 5.691911215572986e-05, "loss": 1.2166, "step": 15478 }, { "epoch": 0.9226367862677316, "grad_norm": 3.0229244232177734, "learning_rate": 5.6909746086632945e-05, "loss": 1.2027, "step": 15480 }, { "epoch": 0.9227559899868876, "grad_norm": 2.67623233795166, "learning_rate": 5.690037977035077e-05, "loss": 1.2177, "step": 15482 }, { "epoch": 0.9228751937060437, "grad_norm": 2.937591314315796, "learning_rate": 5.689101320721839e-05, "loss": 1.1083, "step": 15484 }, { "epoch": 0.9229943974251996, "grad_norm": 3.1137404441833496, "learning_rate": 5.6881646397570906e-05, "loss": 1.3186, "step": 15486 }, { "epoch": 0.9231136011443557, "grad_norm": 3.2157938480377197, "learning_rate": 5.6872279341743395e-05, "loss": 1.2952, "step": 15488 }, { "epoch": 0.9232328048635118, "grad_norm": 3.0412755012512207, "learning_rate": 5.686291204007091e-05, "loss": 1.4471, "step": 15490 }, { "epoch": 0.9233520085826677, "grad_norm": 3.5427119731903076, "learning_rate": 5.68535444928886e-05, "loss": 1.3092, "step": 15492 }, { "epoch": 0.9234712123018238, "grad_norm": 3.2965147495269775, "learning_rate": 5.6844176700531546e-05, "loss": 1.3064, "step": 15494 }, { "epoch": 0.9235904160209799, "grad_norm": 3.1827445030212402, "learning_rate": 5.6834808663334895e-05, "loss": 1.444, "step": 15496 }, { "epoch": 0.9237096197401359, "grad_norm": 2.6059823036193848, "learning_rate": 5.682544038163373e-05, "loss": 1.3624, "step": 15498 }, { "epoch": 0.923828823459292, "grad_norm": 3.0988383293151855, "learning_rate": 5.681607185576322e-05, "loss": 1.194, "step": 15500 }, { "epoch": 0.9239480271784479, "grad_norm": 2.966618061065674, "learning_rate": 5.680670308605849e-05, "loss": 1.1836, "step": 15502 }, { "epoch": 0.924067230897604, "grad_norm": 3.2144689559936523, "learning_rate": 5.6797334072854705e-05, "loss": 1.2425, "step": 15504 }, { "epoch": 0.9241864346167601, "grad_norm": 2.9657208919525146, "learning_rate": 5.678796481648703e-05, "loss": 1.2626, "step": 15506 }, { "epoch": 0.9243056383359161, "grad_norm": 3.0800609588623047, "learning_rate": 5.6778595317290626e-05, "loss": 1.3185, "step": 15508 }, { "epoch": 0.9244248420550721, "grad_norm": 3.2166709899902344, "learning_rate": 5.676922557560067e-05, "loss": 1.4663, "step": 15510 }, { "epoch": 0.9245440457742281, "grad_norm": 3.005319356918335, "learning_rate": 5.6759855591752364e-05, "loss": 1.3058, "step": 15512 }, { "epoch": 0.9246632494933842, "grad_norm": 2.900754928588867, "learning_rate": 5.6750485366080874e-05, "loss": 1.2501, "step": 15514 }, { "epoch": 0.9247824532125403, "grad_norm": 3.2056386470794678, "learning_rate": 5.674111489892144e-05, "loss": 1.3295, "step": 15516 }, { "epoch": 0.9249016569316962, "grad_norm": 2.94614315032959, "learning_rate": 5.673174419060927e-05, "loss": 1.2285, "step": 15518 }, { "epoch": 0.9250208606508523, "grad_norm": 3.1156060695648193, "learning_rate": 5.6722373241479576e-05, "loss": 1.3048, "step": 15520 }, { "epoch": 0.9251400643700084, "grad_norm": 3.163262128829956, "learning_rate": 5.671300205186757e-05, "loss": 1.2882, "step": 15522 }, { "epoch": 0.9252592680891644, "grad_norm": 2.797483205795288, "learning_rate": 5.670363062210855e-05, "loss": 1.2492, "step": 15524 }, { "epoch": 0.9253784718083204, "grad_norm": 3.002603054046631, "learning_rate": 5.669425895253769e-05, "loss": 1.2438, "step": 15526 }, { "epoch": 0.9254976755274764, "grad_norm": 3.135908365249634, "learning_rate": 5.66848870434903e-05, "loss": 1.2798, "step": 15528 }, { "epoch": 0.9256168792466325, "grad_norm": 3.1318657398223877, "learning_rate": 5.667551489530163e-05, "loss": 1.1167, "step": 15530 }, { "epoch": 0.9257360829657886, "grad_norm": 3.22935152053833, "learning_rate": 5.666614250830694e-05, "loss": 1.2202, "step": 15532 }, { "epoch": 0.9258552866849445, "grad_norm": 2.9443745613098145, "learning_rate": 5.665676988284152e-05, "loss": 1.0659, "step": 15534 }, { "epoch": 0.9259744904041006, "grad_norm": 3.306680202484131, "learning_rate": 5.6647397019240686e-05, "loss": 1.1916, "step": 15536 }, { "epoch": 0.9260936941232566, "grad_norm": 2.8268883228302, "learning_rate": 5.663802391783972e-05, "loss": 1.1275, "step": 15538 }, { "epoch": 0.9262128978424127, "grad_norm": 3.34162974357605, "learning_rate": 5.6628650578973906e-05, "loss": 1.4362, "step": 15540 }, { "epoch": 0.9263321015615688, "grad_norm": 3.1464765071868896, "learning_rate": 5.661927700297859e-05, "loss": 1.1223, "step": 15542 }, { "epoch": 0.9264513052807247, "grad_norm": 3.4335880279541016, "learning_rate": 5.660990319018908e-05, "loss": 1.2715, "step": 15544 }, { "epoch": 0.9265705089998808, "grad_norm": 2.96972918510437, "learning_rate": 5.6600529140940726e-05, "loss": 1.39, "step": 15546 }, { "epoch": 0.9266897127190369, "grad_norm": 3.3001532554626465, "learning_rate": 5.659115485556886e-05, "loss": 1.3591, "step": 15548 }, { "epoch": 0.9268089164381929, "grad_norm": 3.0840303897857666, "learning_rate": 5.658178033440884e-05, "loss": 1.3148, "step": 15550 }, { "epoch": 0.9269281201573489, "grad_norm": 2.868647575378418, "learning_rate": 5.657240557779602e-05, "loss": 1.1353, "step": 15552 }, { "epoch": 0.9270473238765049, "grad_norm": 3.3482789993286133, "learning_rate": 5.6563030586065744e-05, "loss": 1.3095, "step": 15554 }, { "epoch": 0.927166527595661, "grad_norm": 3.413665533065796, "learning_rate": 5.655365535955343e-05, "loss": 1.2307, "step": 15556 }, { "epoch": 0.9272857313148171, "grad_norm": 3.426530361175537, "learning_rate": 5.654427989859442e-05, "loss": 1.4362, "step": 15558 }, { "epoch": 0.927404935033973, "grad_norm": 3.1702864170074463, "learning_rate": 5.653490420352414e-05, "loss": 1.3257, "step": 15560 }, { "epoch": 0.9275241387531291, "grad_norm": 3.1319620609283447, "learning_rate": 5.652552827467799e-05, "loss": 1.2847, "step": 15562 }, { "epoch": 0.9276433424722851, "grad_norm": 3.273188352584839, "learning_rate": 5.651615211239135e-05, "loss": 1.3174, "step": 15564 }, { "epoch": 0.9277625461914412, "grad_norm": 3.094571828842163, "learning_rate": 5.650677571699965e-05, "loss": 1.1464, "step": 15566 }, { "epoch": 0.9278817499105972, "grad_norm": 3.0346505641937256, "learning_rate": 5.649739908883833e-05, "loss": 1.3996, "step": 15568 }, { "epoch": 0.9280009536297532, "grad_norm": 2.9898481369018555, "learning_rate": 5.648802222824282e-05, "loss": 1.3384, "step": 15570 }, { "epoch": 0.9281201573489093, "grad_norm": 3.0921740531921387, "learning_rate": 5.647864513554855e-05, "loss": 1.439, "step": 15572 }, { "epoch": 0.9282393610680654, "grad_norm": 3.4514079093933105, "learning_rate": 5.646926781109096e-05, "loss": 1.3515, "step": 15574 }, { "epoch": 0.9283585647872213, "grad_norm": 3.350665807723999, "learning_rate": 5.645989025520555e-05, "loss": 1.4105, "step": 15576 }, { "epoch": 0.9284777685063774, "grad_norm": 3.4114272594451904, "learning_rate": 5.645051246822775e-05, "loss": 1.2069, "step": 15578 }, { "epoch": 0.9285969722255334, "grad_norm": 3.1260931491851807, "learning_rate": 5.644113445049305e-05, "loss": 1.3364, "step": 15580 }, { "epoch": 0.9287161759446895, "grad_norm": 3.185267210006714, "learning_rate": 5.643175620233694e-05, "loss": 1.3352, "step": 15582 }, { "epoch": 0.9288353796638456, "grad_norm": 3.3420944213867188, "learning_rate": 5.64223777240949e-05, "loss": 1.2834, "step": 15584 }, { "epoch": 0.9289545833830015, "grad_norm": 2.789187431335449, "learning_rate": 5.641299901610244e-05, "loss": 1.2344, "step": 15586 }, { "epoch": 0.9290737871021576, "grad_norm": 3.1403276920318604, "learning_rate": 5.6403620078695064e-05, "loss": 1.3046, "step": 15588 }, { "epoch": 0.9291929908213137, "grad_norm": 3.4431562423706055, "learning_rate": 5.6394240912208276e-05, "loss": 1.4347, "step": 15590 }, { "epoch": 0.9293121945404696, "grad_norm": 3.1693685054779053, "learning_rate": 5.638486151697762e-05, "loss": 1.3178, "step": 15592 }, { "epoch": 0.9294313982596257, "grad_norm": 3.0249545574188232, "learning_rate": 5.637548189333862e-05, "loss": 1.1894, "step": 15594 }, { "epoch": 0.9295506019787817, "grad_norm": 3.2907509803771973, "learning_rate": 5.6366102041626825e-05, "loss": 1.2321, "step": 15596 }, { "epoch": 0.9296698056979378, "grad_norm": 2.8405394554138184, "learning_rate": 5.635672196217776e-05, "loss": 1.1453, "step": 15598 }, { "epoch": 0.9297890094170939, "grad_norm": 3.071777820587158, "learning_rate": 5.634734165532704e-05, "loss": 1.4395, "step": 15600 }, { "epoch": 0.9299082131362498, "grad_norm": 3.063237190246582, "learning_rate": 5.633796112141017e-05, "loss": 1.3062, "step": 15602 }, { "epoch": 0.9300274168554059, "grad_norm": 2.734374761581421, "learning_rate": 5.632858036076273e-05, "loss": 1.2289, "step": 15604 }, { "epoch": 0.9301466205745619, "grad_norm": 2.9462521076202393, "learning_rate": 5.631919937372033e-05, "loss": 1.2705, "step": 15606 }, { "epoch": 0.930265824293718, "grad_norm": 3.0629727840423584, "learning_rate": 5.630981816061855e-05, "loss": 1.1999, "step": 15608 }, { "epoch": 0.930385028012874, "grad_norm": 3.074012041091919, "learning_rate": 5.630043672179297e-05, "loss": 1.2613, "step": 15610 }, { "epoch": 0.93050423173203, "grad_norm": 3.027146577835083, "learning_rate": 5.629105505757924e-05, "loss": 1.3054, "step": 15612 }, { "epoch": 0.9306234354511861, "grad_norm": 2.7321956157684326, "learning_rate": 5.6281673168312934e-05, "loss": 1.2305, "step": 15614 }, { "epoch": 0.9307426391703422, "grad_norm": 3.3182716369628906, "learning_rate": 5.627229105432967e-05, "loss": 1.3194, "step": 15616 }, { "epoch": 0.9308618428894981, "grad_norm": 3.398409843444824, "learning_rate": 5.626290871596511e-05, "loss": 1.3803, "step": 15618 }, { "epoch": 0.9309810466086542, "grad_norm": 3.0137481689453125, "learning_rate": 5.625352615355488e-05, "loss": 1.2947, "step": 15620 }, { "epoch": 0.9311002503278102, "grad_norm": 3.1121582984924316, "learning_rate": 5.624414336743462e-05, "loss": 1.2774, "step": 15622 }, { "epoch": 0.9312194540469663, "grad_norm": 2.6740682125091553, "learning_rate": 5.6234760357939985e-05, "loss": 1.2576, "step": 15624 }, { "epoch": 0.9313386577661223, "grad_norm": 3.061506986618042, "learning_rate": 5.622537712540664e-05, "loss": 1.1947, "step": 15626 }, { "epoch": 0.9314578614852783, "grad_norm": 3.3716578483581543, "learning_rate": 5.621599367017025e-05, "loss": 1.3593, "step": 15628 }, { "epoch": 0.9315770652044344, "grad_norm": 2.967833995819092, "learning_rate": 5.620660999256652e-05, "loss": 1.3247, "step": 15630 }, { "epoch": 0.9316962689235904, "grad_norm": 3.0820631980895996, "learning_rate": 5.61972260929311e-05, "loss": 1.166, "step": 15632 }, { "epoch": 0.9318154726427464, "grad_norm": 3.058987617492676, "learning_rate": 5.6187841971599696e-05, "loss": 1.2878, "step": 15634 }, { "epoch": 0.9319346763619025, "grad_norm": 3.1043481826782227, "learning_rate": 5.617845762890801e-05, "loss": 1.2614, "step": 15636 }, { "epoch": 0.9320538800810585, "grad_norm": 3.2794222831726074, "learning_rate": 5.616907306519176e-05, "loss": 1.4237, "step": 15638 }, { "epoch": 0.9321730838002146, "grad_norm": 3.098369598388672, "learning_rate": 5.615968828078666e-05, "loss": 1.4597, "step": 15640 }, { "epoch": 0.9322922875193707, "grad_norm": 2.9628336429595947, "learning_rate": 5.6150303276028425e-05, "loss": 1.1891, "step": 15642 }, { "epoch": 0.9324114912385266, "grad_norm": 3.2560672760009766, "learning_rate": 5.614091805125283e-05, "loss": 1.2731, "step": 15644 }, { "epoch": 0.9325306949576827, "grad_norm": 3.317594051361084, "learning_rate": 5.613153260679557e-05, "loss": 1.297, "step": 15646 }, { "epoch": 0.9326498986768387, "grad_norm": 3.370810031890869, "learning_rate": 5.612214694299239e-05, "loss": 1.3994, "step": 15648 }, { "epoch": 0.9327691023959948, "grad_norm": 2.9505727291107178, "learning_rate": 5.6112761060179085e-05, "loss": 1.264, "step": 15650 }, { "epoch": 0.9328883061151508, "grad_norm": 3.192077398300171, "learning_rate": 5.61033749586914e-05, "loss": 1.2644, "step": 15652 }, { "epoch": 0.9330075098343068, "grad_norm": 2.74185848236084, "learning_rate": 5.6093988638865116e-05, "loss": 1.2607, "step": 15654 }, { "epoch": 0.9331267135534629, "grad_norm": 3.0494768619537354, "learning_rate": 5.608460210103599e-05, "loss": 1.11, "step": 15656 }, { "epoch": 0.9332459172726189, "grad_norm": 3.083829641342163, "learning_rate": 5.607521534553984e-05, "loss": 1.1093, "step": 15658 }, { "epoch": 0.9333651209917749, "grad_norm": 3.231961965560913, "learning_rate": 5.606582837271246e-05, "loss": 1.2461, "step": 15660 }, { "epoch": 0.933484324710931, "grad_norm": 2.983362913131714, "learning_rate": 5.605644118288963e-05, "loss": 1.2389, "step": 15662 }, { "epoch": 0.933603528430087, "grad_norm": 3.089237928390503, "learning_rate": 5.604705377640719e-05, "loss": 1.216, "step": 15664 }, { "epoch": 0.9337227321492431, "grad_norm": 2.841566324234009, "learning_rate": 5.603766615360094e-05, "loss": 1.2945, "step": 15666 }, { "epoch": 0.9338419358683991, "grad_norm": 2.8349051475524902, "learning_rate": 5.602827831480671e-05, "loss": 1.2372, "step": 15668 }, { "epoch": 0.9339611395875551, "grad_norm": 3.203200340270996, "learning_rate": 5.6018890260360336e-05, "loss": 1.3351, "step": 15670 }, { "epoch": 0.9340803433067112, "grad_norm": 3.0604617595672607, "learning_rate": 5.600950199059768e-05, "loss": 1.3965, "step": 15672 }, { "epoch": 0.9341995470258672, "grad_norm": 3.3543524742126465, "learning_rate": 5.600011350585457e-05, "loss": 1.1884, "step": 15674 }, { "epoch": 0.9343187507450232, "grad_norm": 2.999610662460327, "learning_rate": 5.5990724806466864e-05, "loss": 1.2406, "step": 15676 }, { "epoch": 0.9344379544641793, "grad_norm": 3.2999608516693115, "learning_rate": 5.598133589277044e-05, "loss": 1.387, "step": 15678 }, { "epoch": 0.9345571581833353, "grad_norm": 2.9493329524993896, "learning_rate": 5.5971946765101156e-05, "loss": 1.3282, "step": 15680 }, { "epoch": 0.9346763619024914, "grad_norm": 3.0247864723205566, "learning_rate": 5.596255742379492e-05, "loss": 1.2186, "step": 15682 }, { "epoch": 0.9347955656216475, "grad_norm": 3.1443803310394287, "learning_rate": 5.59531678691876e-05, "loss": 1.3529, "step": 15684 }, { "epoch": 0.9349147693408034, "grad_norm": 3.051323175430298, "learning_rate": 5.5943778101615085e-05, "loss": 1.2831, "step": 15686 }, { "epoch": 0.9350339730599595, "grad_norm": 3.0490214824676514, "learning_rate": 5.5934388121413294e-05, "loss": 1.1966, "step": 15688 }, { "epoch": 0.9351531767791155, "grad_norm": 3.230173110961914, "learning_rate": 5.592499792891815e-05, "loss": 1.2727, "step": 15690 }, { "epoch": 0.9352723804982715, "grad_norm": 2.955677032470703, "learning_rate": 5.591560752446554e-05, "loss": 1.2483, "step": 15692 }, { "epoch": 0.9353915842174276, "grad_norm": 3.3618128299713135, "learning_rate": 5.590621690839142e-05, "loss": 1.1992, "step": 15694 }, { "epoch": 0.9355107879365836, "grad_norm": 3.249915838241577, "learning_rate": 5.589682608103172e-05, "loss": 1.4366, "step": 15696 }, { "epoch": 0.9356299916557397, "grad_norm": 2.90767765045166, "learning_rate": 5.588743504272237e-05, "loss": 1.2207, "step": 15698 }, { "epoch": 0.9357491953748956, "grad_norm": 2.9200921058654785, "learning_rate": 5.5878043793799305e-05, "loss": 1.2787, "step": 15700 }, { "epoch": 0.9358683990940517, "grad_norm": 3.083864450454712, "learning_rate": 5.5868652334598524e-05, "loss": 1.3022, "step": 15702 }, { "epoch": 0.9359876028132078, "grad_norm": 3.2716727256774902, "learning_rate": 5.585926066545597e-05, "loss": 1.2895, "step": 15704 }, { "epoch": 0.9361068065323638, "grad_norm": 3.2991373538970947, "learning_rate": 5.584986878670761e-05, "loss": 1.4014, "step": 15706 }, { "epoch": 0.9362260102515199, "grad_norm": 2.958019971847534, "learning_rate": 5.584047669868943e-05, "loss": 1.2285, "step": 15708 }, { "epoch": 0.9363452139706759, "grad_norm": 3.177643060684204, "learning_rate": 5.583108440173741e-05, "loss": 1.3652, "step": 15710 }, { "epoch": 0.9364644176898319, "grad_norm": 3.475184679031372, "learning_rate": 5.5821691896187546e-05, "loss": 1.3796, "step": 15712 }, { "epoch": 0.936583621408988, "grad_norm": 2.982447385787964, "learning_rate": 5.581229918237585e-05, "loss": 1.2788, "step": 15714 }, { "epoch": 0.936702825128144, "grad_norm": 3.0650365352630615, "learning_rate": 5.580290626063832e-05, "loss": 1.389, "step": 15716 }, { "epoch": 0.9368220288473, "grad_norm": 3.1114978790283203, "learning_rate": 5.5793513131310994e-05, "loss": 1.2426, "step": 15718 }, { "epoch": 0.9369412325664561, "grad_norm": 2.981067419052124, "learning_rate": 5.5784119794729874e-05, "loss": 1.1838, "step": 15720 }, { "epoch": 0.9370604362856121, "grad_norm": 3.261263847351074, "learning_rate": 5.577472625123099e-05, "loss": 1.1711, "step": 15722 }, { "epoch": 0.9371796400047682, "grad_norm": 3.1780154705047607, "learning_rate": 5.576533250115039e-05, "loss": 1.2601, "step": 15724 }, { "epoch": 0.9372988437239241, "grad_norm": 3.077414035797119, "learning_rate": 5.575593854482414e-05, "loss": 1.3471, "step": 15726 }, { "epoch": 0.9374180474430802, "grad_norm": 5.663681983947754, "learning_rate": 5.574654438258826e-05, "loss": 1.2663, "step": 15728 }, { "epoch": 0.9375372511622363, "grad_norm": 2.7897353172302246, "learning_rate": 5.573715001477883e-05, "loss": 1.1855, "step": 15730 }, { "epoch": 0.9376564548813923, "grad_norm": 2.888211250305176, "learning_rate": 5.57277554417319e-05, "loss": 1.2615, "step": 15732 }, { "epoch": 0.9377756586005483, "grad_norm": 2.9523918628692627, "learning_rate": 5.5718360663783585e-05, "loss": 1.1522, "step": 15734 }, { "epoch": 0.9378948623197044, "grad_norm": 3.1818642616271973, "learning_rate": 5.570896568126993e-05, "loss": 1.0957, "step": 15736 }, { "epoch": 0.9380140660388604, "grad_norm": 2.7937703132629395, "learning_rate": 5.569957049452703e-05, "loss": 1.2974, "step": 15738 }, { "epoch": 0.9381332697580165, "grad_norm": 2.7987701892852783, "learning_rate": 5.5690175103891006e-05, "loss": 1.2655, "step": 15740 }, { "epoch": 0.9382524734771724, "grad_norm": 3.176178455352783, "learning_rate": 5.5680779509697956e-05, "loss": 1.2047, "step": 15742 }, { "epoch": 0.9383716771963285, "grad_norm": 2.9319238662719727, "learning_rate": 5.567138371228396e-05, "loss": 1.1313, "step": 15744 }, { "epoch": 0.9384908809154846, "grad_norm": 3.056842803955078, "learning_rate": 5.5661987711985187e-05, "loss": 1.3136, "step": 15746 }, { "epoch": 0.9386100846346406, "grad_norm": 3.1626710891723633, "learning_rate": 5.565259150913774e-05, "loss": 1.4498, "step": 15748 }, { "epoch": 0.9387292883537967, "grad_norm": 3.214059352874756, "learning_rate": 5.564319510407776e-05, "loss": 1.2402, "step": 15750 }, { "epoch": 0.9388484920729526, "grad_norm": 2.9263064861297607, "learning_rate": 5.5633798497141374e-05, "loss": 1.0857, "step": 15752 }, { "epoch": 0.9389676957921087, "grad_norm": 3.2483181953430176, "learning_rate": 5.562440168866474e-05, "loss": 1.2825, "step": 15754 }, { "epoch": 0.9390868995112648, "grad_norm": 3.2084577083587646, "learning_rate": 5.561500467898401e-05, "loss": 1.2662, "step": 15756 }, { "epoch": 0.9392061032304208, "grad_norm": 2.7995705604553223, "learning_rate": 5.560560746843535e-05, "loss": 1.3002, "step": 15758 }, { "epoch": 0.9393253069495768, "grad_norm": 3.118224620819092, "learning_rate": 5.5596210057354934e-05, "loss": 1.2628, "step": 15760 }, { "epoch": 0.9394445106687329, "grad_norm": 3.290675163269043, "learning_rate": 5.558681244607895e-05, "loss": 1.2778, "step": 15762 }, { "epoch": 0.9395637143878889, "grad_norm": 3.3511362075805664, "learning_rate": 5.557741463494356e-05, "loss": 1.3447, "step": 15764 }, { "epoch": 0.939682918107045, "grad_norm": 2.923145294189453, "learning_rate": 5.556801662428497e-05, "loss": 1.1709, "step": 15766 }, { "epoch": 0.9398021218262009, "grad_norm": 3.1754586696624756, "learning_rate": 5.5558618414439367e-05, "loss": 1.2208, "step": 15768 }, { "epoch": 0.939921325545357, "grad_norm": 3.2359633445739746, "learning_rate": 5.554922000574295e-05, "loss": 1.283, "step": 15770 }, { "epoch": 0.9400405292645131, "grad_norm": 3.610313892364502, "learning_rate": 5.553982139853198e-05, "loss": 1.3069, "step": 15772 }, { "epoch": 0.9401597329836691, "grad_norm": 3.2084546089172363, "learning_rate": 5.553042259314263e-05, "loss": 1.3253, "step": 15774 }, { "epoch": 0.9402789367028251, "grad_norm": 2.8880505561828613, "learning_rate": 5.5521023589911124e-05, "loss": 1.1057, "step": 15776 }, { "epoch": 0.9403981404219812, "grad_norm": 3.133312702178955, "learning_rate": 5.551162438917373e-05, "loss": 1.3637, "step": 15778 }, { "epoch": 0.9405173441411372, "grad_norm": 3.225043535232544, "learning_rate": 5.550222499126669e-05, "loss": 1.2107, "step": 15780 }, { "epoch": 0.9406365478602933, "grad_norm": 3.239985704421997, "learning_rate": 5.5492825396526216e-05, "loss": 1.2994, "step": 15782 }, { "epoch": 0.9407557515794492, "grad_norm": 3.387606143951416, "learning_rate": 5.5483425605288585e-05, "loss": 1.2858, "step": 15784 }, { "epoch": 0.9408749552986053, "grad_norm": 3.2194459438323975, "learning_rate": 5.547402561789007e-05, "loss": 1.4164, "step": 15786 }, { "epoch": 0.9409941590177614, "grad_norm": 2.828577756881714, "learning_rate": 5.54646254346669e-05, "loss": 1.2152, "step": 15788 }, { "epoch": 0.9411133627369174, "grad_norm": 2.872097969055176, "learning_rate": 5.5455225055955416e-05, "loss": 1.167, "step": 15790 }, { "epoch": 0.9412325664560734, "grad_norm": 2.972569465637207, "learning_rate": 5.5445824482091854e-05, "loss": 1.1206, "step": 15792 }, { "epoch": 0.9413517701752294, "grad_norm": 3.136737108230591, "learning_rate": 5.5436423713412524e-05, "loss": 1.2909, "step": 15794 }, { "epoch": 0.9414709738943855, "grad_norm": 3.360825538635254, "learning_rate": 5.542702275025371e-05, "loss": 1.3412, "step": 15796 }, { "epoch": 0.9415901776135416, "grad_norm": 3.2171990871429443, "learning_rate": 5.541762159295172e-05, "loss": 1.3466, "step": 15798 }, { "epoch": 0.9417093813326975, "grad_norm": 3.2435998916625977, "learning_rate": 5.540822024184288e-05, "loss": 1.3659, "step": 15800 }, { "epoch": 0.9418285850518536, "grad_norm": 3.3713419437408447, "learning_rate": 5.539881869726348e-05, "loss": 1.3856, "step": 15802 }, { "epoch": 0.9419477887710097, "grad_norm": 3.281528949737549, "learning_rate": 5.5389416959549876e-05, "loss": 1.2166, "step": 15804 }, { "epoch": 0.9420669924901657, "grad_norm": 3.727426528930664, "learning_rate": 5.538001502903839e-05, "loss": 1.3849, "step": 15806 }, { "epoch": 0.9421861962093218, "grad_norm": 3.010080575942993, "learning_rate": 5.5370612906065344e-05, "loss": 1.1508, "step": 15808 }, { "epoch": 0.9423053999284777, "grad_norm": 3.0744171142578125, "learning_rate": 5.536121059096713e-05, "loss": 1.1569, "step": 15810 }, { "epoch": 0.9424246036476338, "grad_norm": 3.294039487838745, "learning_rate": 5.535180808408005e-05, "loss": 1.2255, "step": 15812 }, { "epoch": 0.9425438073667899, "grad_norm": 3.279574394226074, "learning_rate": 5.534240538574047e-05, "loss": 1.1996, "step": 15814 }, { "epoch": 0.9426630110859459, "grad_norm": 3.111966371536255, "learning_rate": 5.533300249628479e-05, "loss": 1.3756, "step": 15816 }, { "epoch": 0.9427822148051019, "grad_norm": 3.434668779373169, "learning_rate": 5.532359941604937e-05, "loss": 1.2936, "step": 15818 }, { "epoch": 0.9429014185242579, "grad_norm": 2.8241429328918457, "learning_rate": 5.531419614537057e-05, "loss": 1.1331, "step": 15820 }, { "epoch": 0.943020622243414, "grad_norm": 3.076125144958496, "learning_rate": 5.5304792684584785e-05, "loss": 1.0927, "step": 15822 }, { "epoch": 0.9431398259625701, "grad_norm": 3.056979179382324, "learning_rate": 5.529538903402842e-05, "loss": 1.4281, "step": 15824 }, { "epoch": 0.943259029681726, "grad_norm": 3.211071252822876, "learning_rate": 5.528598519403788e-05, "loss": 1.2442, "step": 15826 }, { "epoch": 0.9433782334008821, "grad_norm": 3.260789632797241, "learning_rate": 5.527658116494957e-05, "loss": 1.3793, "step": 15828 }, { "epoch": 0.9434974371200382, "grad_norm": 3.2661285400390625, "learning_rate": 5.526717694709989e-05, "loss": 1.2022, "step": 15830 }, { "epoch": 0.9436166408391942, "grad_norm": 3.356278896331787, "learning_rate": 5.525777254082527e-05, "loss": 1.3536, "step": 15832 }, { "epoch": 0.9437358445583502, "grad_norm": 3.2409842014312744, "learning_rate": 5.524836794646212e-05, "loss": 1.3233, "step": 15834 }, { "epoch": 0.9438550482775062, "grad_norm": 3.339261770248413, "learning_rate": 5.523896316434691e-05, "loss": 1.2954, "step": 15836 }, { "epoch": 0.9439742519966623, "grad_norm": 3.0756921768188477, "learning_rate": 5.522955819481607e-05, "loss": 1.3699, "step": 15838 }, { "epoch": 0.9440934557158184, "grad_norm": 3.0221924781799316, "learning_rate": 5.5220153038206026e-05, "loss": 1.2936, "step": 15840 }, { "epoch": 0.9442126594349743, "grad_norm": 3.2974960803985596, "learning_rate": 5.521074769485325e-05, "loss": 1.3764, "step": 15842 }, { "epoch": 0.9443318631541304, "grad_norm": 3.165290594100952, "learning_rate": 5.520134216509421e-05, "loss": 1.2848, "step": 15844 }, { "epoch": 0.9444510668732864, "grad_norm": 3.099182605743408, "learning_rate": 5.519193644926535e-05, "loss": 1.3447, "step": 15846 }, { "epoch": 0.9445702705924425, "grad_norm": 3.1708364486694336, "learning_rate": 5.5182530547703157e-05, "loss": 1.3055, "step": 15848 }, { "epoch": 0.9446894743115986, "grad_norm": 3.0888254642486572, "learning_rate": 5.517312446074413e-05, "loss": 1.1972, "step": 15850 }, { "epoch": 0.9448086780307545, "grad_norm": 3.374748706817627, "learning_rate": 5.516371818872473e-05, "loss": 1.2796, "step": 15852 }, { "epoch": 0.9449278817499106, "grad_norm": 3.1719133853912354, "learning_rate": 5.5154311731981456e-05, "loss": 1.3791, "step": 15854 }, { "epoch": 0.9450470854690667, "grad_norm": 3.0664637088775635, "learning_rate": 5.514490509085084e-05, "loss": 1.3353, "step": 15856 }, { "epoch": 0.9451662891882227, "grad_norm": 3.262282609939575, "learning_rate": 5.513549826566935e-05, "loss": 1.2728, "step": 15858 }, { "epoch": 0.9452854929073787, "grad_norm": 3.3548805713653564, "learning_rate": 5.5126091256773514e-05, "loss": 1.1985, "step": 15860 }, { "epoch": 0.9454046966265347, "grad_norm": 3.044506311416626, "learning_rate": 5.511668406449987e-05, "loss": 1.1554, "step": 15862 }, { "epoch": 0.9455239003456908, "grad_norm": 2.873929977416992, "learning_rate": 5.5107276689184916e-05, "loss": 1.265, "step": 15864 }, { "epoch": 0.9456431040648469, "grad_norm": 2.833883047103882, "learning_rate": 5.50978691311652e-05, "loss": 1.1381, "step": 15866 }, { "epoch": 0.9457623077840028, "grad_norm": 2.9496448040008545, "learning_rate": 5.508846139077727e-05, "loss": 1.243, "step": 15868 }, { "epoch": 0.9458815115031589, "grad_norm": 3.3413267135620117, "learning_rate": 5.507905346835768e-05, "loss": 1.1848, "step": 15870 }, { "epoch": 0.9460007152223149, "grad_norm": 3.1282434463500977, "learning_rate": 5.506964536424294e-05, "loss": 1.3268, "step": 15872 }, { "epoch": 0.946119918941471, "grad_norm": 2.9136078357696533, "learning_rate": 5.506023707876966e-05, "loss": 1.2628, "step": 15874 }, { "epoch": 0.946239122660627, "grad_norm": 3.1455395221710205, "learning_rate": 5.505082861227437e-05, "loss": 1.3101, "step": 15876 }, { "epoch": 0.946358326379783, "grad_norm": 2.846167802810669, "learning_rate": 5.504141996509366e-05, "loss": 1.2025, "step": 15878 }, { "epoch": 0.9464775300989391, "grad_norm": 3.287034511566162, "learning_rate": 5.5032011137564135e-05, "loss": 1.2611, "step": 15880 }, { "epoch": 0.9465967338180952, "grad_norm": 2.889730215072632, "learning_rate": 5.5022602130022325e-05, "loss": 1.2106, "step": 15882 }, { "epoch": 0.9467159375372511, "grad_norm": 2.985292911529541, "learning_rate": 5.501319294280487e-05, "loss": 1.2419, "step": 15884 }, { "epoch": 0.9468351412564072, "grad_norm": 2.9364027976989746, "learning_rate": 5.500378357624835e-05, "loss": 1.2465, "step": 15886 }, { "epoch": 0.9469543449755632, "grad_norm": 2.563567638397217, "learning_rate": 5.4994374030689364e-05, "loss": 1.3244, "step": 15888 }, { "epoch": 0.9470735486947193, "grad_norm": 2.930565357208252, "learning_rate": 5.498496430646453e-05, "loss": 1.2559, "step": 15890 }, { "epoch": 0.9471927524138753, "grad_norm": 3.008244037628174, "learning_rate": 5.497555440391047e-05, "loss": 1.1788, "step": 15892 }, { "epoch": 0.9473119561330313, "grad_norm": 3.0147712230682373, "learning_rate": 5.496614432336381e-05, "loss": 1.1809, "step": 15894 }, { "epoch": 0.9474311598521874, "grad_norm": 3.3506789207458496, "learning_rate": 5.4956734065161176e-05, "loss": 1.3602, "step": 15896 }, { "epoch": 0.9475503635713435, "grad_norm": 3.017305612564087, "learning_rate": 5.494732362963919e-05, "loss": 1.2017, "step": 15898 }, { "epoch": 0.9476695672904994, "grad_norm": 3.1680824756622314, "learning_rate": 5.493791301713453e-05, "loss": 1.2637, "step": 15900 }, { "epoch": 0.9477887710096555, "grad_norm": 3.175037145614624, "learning_rate": 5.4928502227983824e-05, "loss": 1.1815, "step": 15902 }, { "epoch": 0.9479079747288115, "grad_norm": 3.233792543411255, "learning_rate": 5.4919091262523716e-05, "loss": 1.2501, "step": 15904 }, { "epoch": 0.9480271784479676, "grad_norm": 3.2117257118225098, "learning_rate": 5.490968012109089e-05, "loss": 1.3394, "step": 15906 }, { "epoch": 0.9481463821671237, "grad_norm": 3.2206268310546875, "learning_rate": 5.4900268804022016e-05, "loss": 1.4051, "step": 15908 }, { "epoch": 0.9482655858862796, "grad_norm": 3.0615005493164062, "learning_rate": 5.489085731165374e-05, "loss": 1.2706, "step": 15910 }, { "epoch": 0.9483847896054357, "grad_norm": 3.0886921882629395, "learning_rate": 5.488144564432278e-05, "loss": 1.2104, "step": 15912 }, { "epoch": 0.9485039933245917, "grad_norm": 3.0463650226593018, "learning_rate": 5.487203380236582e-05, "loss": 1.2491, "step": 15914 }, { "epoch": 0.9486231970437478, "grad_norm": 3.004836082458496, "learning_rate": 5.486262178611953e-05, "loss": 1.4333, "step": 15916 }, { "epoch": 0.9487424007629038, "grad_norm": 2.7922165393829346, "learning_rate": 5.485320959592062e-05, "loss": 1.1751, "step": 15918 }, { "epoch": 0.9488616044820598, "grad_norm": 3.04604172706604, "learning_rate": 5.48437972321058e-05, "loss": 1.1831, "step": 15920 }, { "epoch": 0.9489808082012159, "grad_norm": 3.099897623062134, "learning_rate": 5.4834384695011764e-05, "loss": 1.2286, "step": 15922 }, { "epoch": 0.949100011920372, "grad_norm": 3.0966286659240723, "learning_rate": 5.482497198497526e-05, "loss": 1.2581, "step": 15924 }, { "epoch": 0.9492192156395279, "grad_norm": 2.7840161323547363, "learning_rate": 5.4815559102333003e-05, "loss": 1.1044, "step": 15926 }, { "epoch": 0.949338419358684, "grad_norm": 3.124732732772827, "learning_rate": 5.480614604742171e-05, "loss": 1.2518, "step": 15928 }, { "epoch": 0.94945762307784, "grad_norm": 3.1077587604522705, "learning_rate": 5.479673282057815e-05, "loss": 1.273, "step": 15930 }, { "epoch": 0.9495768267969961, "grad_norm": 2.8328661918640137, "learning_rate": 5.478731942213903e-05, "loss": 1.3374, "step": 15932 }, { "epoch": 0.9496960305161521, "grad_norm": 3.016751527786255, "learning_rate": 5.4777905852441114e-05, "loss": 1.3576, "step": 15934 }, { "epoch": 0.9498152342353081, "grad_norm": 2.871630907058716, "learning_rate": 5.476849211182114e-05, "loss": 1.2297, "step": 15936 }, { "epoch": 0.9499344379544642, "grad_norm": 3.086092948913574, "learning_rate": 5.475907820061592e-05, "loss": 1.2477, "step": 15938 }, { "epoch": 0.9500536416736202, "grad_norm": 2.872391939163208, "learning_rate": 5.474966411916218e-05, "loss": 1.2259, "step": 15940 }, { "epoch": 0.9501728453927762, "grad_norm": 3.1137897968292236, "learning_rate": 5.474024986779669e-05, "loss": 1.3102, "step": 15942 }, { "epoch": 0.9502920491119323, "grad_norm": 3.2766120433807373, "learning_rate": 5.473083544685625e-05, "loss": 1.3312, "step": 15944 }, { "epoch": 0.9504112528310883, "grad_norm": 3.4332942962646484, "learning_rate": 5.4721420856677667e-05, "loss": 1.3449, "step": 15946 }, { "epoch": 0.9505304565502444, "grad_norm": 3.134347438812256, "learning_rate": 5.471200609759768e-05, "loss": 1.2752, "step": 15948 }, { "epoch": 0.9506496602694005, "grad_norm": 3.16420841217041, "learning_rate": 5.470259116995311e-05, "loss": 1.3369, "step": 15950 }, { "epoch": 0.9507688639885564, "grad_norm": 2.989187717437744, "learning_rate": 5.469317607408078e-05, "loss": 1.2393, "step": 15952 }, { "epoch": 0.9508880677077125, "grad_norm": 3.3496017456054688, "learning_rate": 5.468376081031746e-05, "loss": 1.4021, "step": 15954 }, { "epoch": 0.9510072714268685, "grad_norm": 3.0497918128967285, "learning_rate": 5.4674345379e-05, "loss": 1.514, "step": 15956 }, { "epoch": 0.9511264751460246, "grad_norm": 3.344003438949585, "learning_rate": 5.466492978046522e-05, "loss": 1.2464, "step": 15958 }, { "epoch": 0.9512456788651806, "grad_norm": 3.1262340545654297, "learning_rate": 5.465551401504995e-05, "loss": 1.2374, "step": 15960 }, { "epoch": 0.9513648825843366, "grad_norm": 3.282036304473877, "learning_rate": 5.4646098083091004e-05, "loss": 1.3641, "step": 15962 }, { "epoch": 0.9514840863034927, "grad_norm": 2.710329294204712, "learning_rate": 5.463668198492523e-05, "loss": 1.1788, "step": 15964 }, { "epoch": 0.9516032900226487, "grad_norm": 6.736769676208496, "learning_rate": 5.462726572088949e-05, "loss": 1.4447, "step": 15966 }, { "epoch": 0.9517224937418047, "grad_norm": 3.4455020427703857, "learning_rate": 5.46178492913206e-05, "loss": 1.3453, "step": 15968 }, { "epoch": 0.9518416974609608, "grad_norm": 3.171947479248047, "learning_rate": 5.4608432696555455e-05, "loss": 1.1942, "step": 15970 }, { "epoch": 0.9519609011801168, "grad_norm": 2.931495428085327, "learning_rate": 5.459901593693091e-05, "loss": 1.2598, "step": 15972 }, { "epoch": 0.9520801048992729, "grad_norm": 3.030710458755493, "learning_rate": 5.458959901278381e-05, "loss": 1.3, "step": 15974 }, { "epoch": 0.9521993086184289, "grad_norm": 2.669421672821045, "learning_rate": 5.458018192445108e-05, "loss": 1.0797, "step": 15976 }, { "epoch": 0.9523185123375849, "grad_norm": 2.9703001976013184, "learning_rate": 5.4570764672269556e-05, "loss": 1.2157, "step": 15978 }, { "epoch": 0.952437716056741, "grad_norm": 3.2443926334381104, "learning_rate": 5.4561347256576137e-05, "loss": 1.3221, "step": 15980 }, { "epoch": 0.952556919775897, "grad_norm": 3.118225336074829, "learning_rate": 5.4551929677707715e-05, "loss": 1.4516, "step": 15982 }, { "epoch": 0.952676123495053, "grad_norm": 3.2107207775115967, "learning_rate": 5.454251193600121e-05, "loss": 1.2763, "step": 15984 }, { "epoch": 0.9527953272142091, "grad_norm": 3.2327022552490234, "learning_rate": 5.4533094031793495e-05, "loss": 1.2362, "step": 15986 }, { "epoch": 0.9529145309333651, "grad_norm": 3.019516706466675, "learning_rate": 5.452367596542151e-05, "loss": 1.3912, "step": 15988 }, { "epoch": 0.9530337346525212, "grad_norm": 3.169236421585083, "learning_rate": 5.451425773722216e-05, "loss": 1.4588, "step": 15990 }, { "epoch": 0.9531529383716772, "grad_norm": 3.021573543548584, "learning_rate": 5.4504839347532353e-05, "loss": 1.2122, "step": 15992 }, { "epoch": 0.9532721420908332, "grad_norm": 3.14024019241333, "learning_rate": 5.449542079668904e-05, "loss": 1.4137, "step": 15994 }, { "epoch": 0.9533913458099893, "grad_norm": 3.1764087677001953, "learning_rate": 5.4486002085029143e-05, "loss": 1.3118, "step": 15996 }, { "epoch": 0.9535105495291453, "grad_norm": 3.1180200576782227, "learning_rate": 5.44765832128896e-05, "loss": 1.2044, "step": 15998 }, { "epoch": 0.9536297532483013, "grad_norm": 2.6654140949249268, "learning_rate": 5.446716418060735e-05, "loss": 1.348, "step": 16000 }, { "epoch": 0.9537489569674574, "grad_norm": 2.6499836444854736, "learning_rate": 5.445774498851936e-05, "loss": 1.1655, "step": 16002 }, { "epoch": 0.9538681606866134, "grad_norm": 3.1651716232299805, "learning_rate": 5.444832563696258e-05, "loss": 1.2532, "step": 16004 }, { "epoch": 0.9539873644057695, "grad_norm": 3.4488253593444824, "learning_rate": 5.443890612627398e-05, "loss": 1.4274, "step": 16006 }, { "epoch": 0.9541065681249254, "grad_norm": 3.2444212436676025, "learning_rate": 5.4429486456790516e-05, "loss": 1.3047, "step": 16008 }, { "epoch": 0.9542257718440815, "grad_norm": 3.4096384048461914, "learning_rate": 5.4420066628849164e-05, "loss": 1.1881, "step": 16010 }, { "epoch": 0.9543449755632376, "grad_norm": 3.3171753883361816, "learning_rate": 5.4410646642786886e-05, "loss": 1.4184, "step": 16012 }, { "epoch": 0.9544641792823936, "grad_norm": 2.6922295093536377, "learning_rate": 5.4401226498940713e-05, "loss": 1.1819, "step": 16014 }, { "epoch": 0.9545833830015497, "grad_norm": 2.762592315673828, "learning_rate": 5.439180619764761e-05, "loss": 1.2627, "step": 16016 }, { "epoch": 0.9547025867207057, "grad_norm": 3.3179779052734375, "learning_rate": 5.438238573924457e-05, "loss": 1.2998, "step": 16018 }, { "epoch": 0.9548217904398617, "grad_norm": 2.803330421447754, "learning_rate": 5.437296512406859e-05, "loss": 1.2863, "step": 16020 }, { "epoch": 0.9549409941590178, "grad_norm": 3.353799819946289, "learning_rate": 5.436354435245669e-05, "loss": 1.2442, "step": 16022 }, { "epoch": 0.9550601978781738, "grad_norm": 3.1423797607421875, "learning_rate": 5.435412342474586e-05, "loss": 1.2464, "step": 16024 }, { "epoch": 0.9551794015973298, "grad_norm": 3.913179874420166, "learning_rate": 5.4344702341273166e-05, "loss": 1.1805, "step": 16026 }, { "epoch": 0.9552986053164859, "grad_norm": 2.963383913040161, "learning_rate": 5.433528110237559e-05, "loss": 1.3278, "step": 16028 }, { "epoch": 0.9554178090356419, "grad_norm": 3.0820045471191406, "learning_rate": 5.432585970839018e-05, "loss": 1.2023, "step": 16030 }, { "epoch": 0.955537012754798, "grad_norm": 2.9799957275390625, "learning_rate": 5.431643815965396e-05, "loss": 1.4351, "step": 16032 }, { "epoch": 0.9556562164739539, "grad_norm": 2.9516568183898926, "learning_rate": 5.430701645650399e-05, "loss": 1.3331, "step": 16034 }, { "epoch": 0.95577542019311, "grad_norm": 3.195455551147461, "learning_rate": 5.429759459927731e-05, "loss": 1.1853, "step": 16036 }, { "epoch": 0.9558946239122661, "grad_norm": 3.1032729148864746, "learning_rate": 5.428817258831095e-05, "loss": 1.2305, "step": 16038 }, { "epoch": 0.9560138276314221, "grad_norm": 3.42704176902771, "learning_rate": 5.427875042394199e-05, "loss": 1.3596, "step": 16040 }, { "epoch": 0.9561330313505781, "grad_norm": 3.0127391815185547, "learning_rate": 5.42693281065075e-05, "loss": 1.1869, "step": 16042 }, { "epoch": 0.9562522350697342, "grad_norm": 3.296001672744751, "learning_rate": 5.425990563634451e-05, "loss": 1.2708, "step": 16044 }, { "epoch": 0.9563714387888902, "grad_norm": 3.3000996112823486, "learning_rate": 5.425048301379014e-05, "loss": 1.462, "step": 16046 }, { "epoch": 0.9564906425080463, "grad_norm": 3.0627520084381104, "learning_rate": 5.424106023918145e-05, "loss": 1.139, "step": 16048 }, { "epoch": 0.9566098462272022, "grad_norm": 3.3697235584259033, "learning_rate": 5.423163731285552e-05, "loss": 1.2398, "step": 16050 }, { "epoch": 0.9567290499463583, "grad_norm": 3.4057743549346924, "learning_rate": 5.422221423514945e-05, "loss": 1.3387, "step": 16052 }, { "epoch": 0.9568482536655144, "grad_norm": 3.1674201488494873, "learning_rate": 5.421279100640033e-05, "loss": 1.3526, "step": 16054 }, { "epoch": 0.9569674573846704, "grad_norm": 3.1320853233337402, "learning_rate": 5.4203367626945246e-05, "loss": 1.2253, "step": 16056 }, { "epoch": 0.9570866611038265, "grad_norm": 3.3036539554595947, "learning_rate": 5.419394409712133e-05, "loss": 1.2165, "step": 16058 }, { "epoch": 0.9572058648229824, "grad_norm": 3.1344807147979736, "learning_rate": 5.418452041726569e-05, "loss": 1.2555, "step": 16060 }, { "epoch": 0.9573250685421385, "grad_norm": 2.7876334190368652, "learning_rate": 5.4175096587715435e-05, "loss": 1.3221, "step": 16062 }, { "epoch": 0.9574442722612946, "grad_norm": 2.868784189224243, "learning_rate": 5.416567260880768e-05, "loss": 1.1735, "step": 16064 }, { "epoch": 0.9575634759804506, "grad_norm": 2.906127691268921, "learning_rate": 5.415624848087959e-05, "loss": 1.3424, "step": 16066 }, { "epoch": 0.9576826796996066, "grad_norm": 3.389159917831421, "learning_rate": 5.414682420426826e-05, "loss": 1.3526, "step": 16068 }, { "epoch": 0.9578018834187627, "grad_norm": 3.114774227142334, "learning_rate": 5.413739977931082e-05, "loss": 1.1992, "step": 16070 }, { "epoch": 0.9579210871379187, "grad_norm": 2.8769643306732178, "learning_rate": 5.412797520634445e-05, "loss": 1.234, "step": 16072 }, { "epoch": 0.9580402908570748, "grad_norm": 2.78694224357605, "learning_rate": 5.411855048570629e-05, "loss": 1.1191, "step": 16074 }, { "epoch": 0.9581594945762307, "grad_norm": 3.4649524688720703, "learning_rate": 5.410912561773346e-05, "loss": 1.193, "step": 16076 }, { "epoch": 0.9582786982953868, "grad_norm": 3.036609172821045, "learning_rate": 5.409970060276317e-05, "loss": 1.2923, "step": 16078 }, { "epoch": 0.9583979020145429, "grad_norm": 3.3651206493377686, "learning_rate": 5.409027544113257e-05, "loss": 1.4359, "step": 16080 }, { "epoch": 0.9585171057336989, "grad_norm": 3.134479284286499, "learning_rate": 5.408085013317881e-05, "loss": 1.2233, "step": 16082 }, { "epoch": 0.9586363094528549, "grad_norm": 3.2211451530456543, "learning_rate": 5.4071424679239066e-05, "loss": 1.2722, "step": 16084 }, { "epoch": 0.958755513172011, "grad_norm": 3.189622640609741, "learning_rate": 5.406199907965055e-05, "loss": 1.1896, "step": 16086 }, { "epoch": 0.958874716891167, "grad_norm": 3.292433977127075, "learning_rate": 5.405257333475042e-05, "loss": 1.2471, "step": 16088 }, { "epoch": 0.9589939206103231, "grad_norm": 2.6605236530303955, "learning_rate": 5.4043147444875886e-05, "loss": 1.141, "step": 16090 }, { "epoch": 0.959113124329479, "grad_norm": 2.67333984375, "learning_rate": 5.403372141036413e-05, "loss": 1.2791, "step": 16092 }, { "epoch": 0.9592323280486351, "grad_norm": 3.2070469856262207, "learning_rate": 5.402429523155234e-05, "loss": 1.4551, "step": 16094 }, { "epoch": 0.9593515317677912, "grad_norm": 3.7059783935546875, "learning_rate": 5.401486890877777e-05, "loss": 1.4418, "step": 16096 }, { "epoch": 0.9594707354869472, "grad_norm": 2.8568997383117676, "learning_rate": 5.4005442442377585e-05, "loss": 1.2373, "step": 16098 }, { "epoch": 0.9595899392061032, "grad_norm": 3.1612823009490967, "learning_rate": 5.399601583268903e-05, "loss": 1.2677, "step": 16100 }, { "epoch": 0.9597091429252592, "grad_norm": 3.274641990661621, "learning_rate": 5.39865890800493e-05, "loss": 1.322, "step": 16102 }, { "epoch": 0.9598283466444153, "grad_norm": 2.865211009979248, "learning_rate": 5.397716218479565e-05, "loss": 1.1785, "step": 16104 }, { "epoch": 0.9599475503635714, "grad_norm": 2.884629011154175, "learning_rate": 5.3967735147265295e-05, "loss": 1.2763, "step": 16106 }, { "epoch": 0.9600667540827273, "grad_norm": 3.264984130859375, "learning_rate": 5.3958307967795485e-05, "loss": 1.4229, "step": 16108 }, { "epoch": 0.9601859578018834, "grad_norm": 3.017819881439209, "learning_rate": 5.394888064672345e-05, "loss": 1.2145, "step": 16110 }, { "epoch": 0.9603051615210395, "grad_norm": 3.1814160346984863, "learning_rate": 5.3939453184386454e-05, "loss": 1.1093, "step": 16112 }, { "epoch": 0.9604243652401955, "grad_norm": 2.953747272491455, "learning_rate": 5.393002558112172e-05, "loss": 1.1995, "step": 16114 }, { "epoch": 0.9605435689593516, "grad_norm": 3.618821620941162, "learning_rate": 5.3920597837266554e-05, "loss": 1.2401, "step": 16116 }, { "epoch": 0.9606627726785075, "grad_norm": 3.044543504714966, "learning_rate": 5.391116995315817e-05, "loss": 1.2091, "step": 16118 }, { "epoch": 0.9607819763976636, "grad_norm": 3.1553571224212646, "learning_rate": 5.390174192913384e-05, "loss": 1.3522, "step": 16120 }, { "epoch": 0.9609011801168197, "grad_norm": 3.0879600048065186, "learning_rate": 5.389231376553087e-05, "loss": 1.2834, "step": 16122 }, { "epoch": 0.9610203838359757, "grad_norm": 2.954619884490967, "learning_rate": 5.3882885462686514e-05, "loss": 1.2437, "step": 16124 }, { "epoch": 0.9611395875551317, "grad_norm": 3.0820932388305664, "learning_rate": 5.3873457020938066e-05, "loss": 1.3204, "step": 16126 }, { "epoch": 0.9612587912742877, "grad_norm": 2.624190330505371, "learning_rate": 5.386402844062281e-05, "loss": 1.1539, "step": 16128 }, { "epoch": 0.9613779949934438, "grad_norm": 3.225271224975586, "learning_rate": 5.385459972207804e-05, "loss": 1.4063, "step": 16130 }, { "epoch": 0.9614971987125999, "grad_norm": 3.0652413368225098, "learning_rate": 5.384517086564104e-05, "loss": 1.1238, "step": 16132 }, { "epoch": 0.9616164024317558, "grad_norm": 2.7512004375457764, "learning_rate": 5.38357418716491e-05, "loss": 1.1236, "step": 16134 }, { "epoch": 0.9617356061509119, "grad_norm": 2.8334341049194336, "learning_rate": 5.382631274043958e-05, "loss": 1.3124, "step": 16136 }, { "epoch": 0.961854809870068, "grad_norm": 3.0947015285491943, "learning_rate": 5.3816883472349756e-05, "loss": 1.1888, "step": 16138 }, { "epoch": 0.961974013589224, "grad_norm": 3.3773529529571533, "learning_rate": 5.3807454067716934e-05, "loss": 1.2648, "step": 16140 }, { "epoch": 0.96209321730838, "grad_norm": 2.888223171234131, "learning_rate": 5.3798024526878476e-05, "loss": 1.1467, "step": 16142 }, { "epoch": 0.962212421027536, "grad_norm": 3.2318780422210693, "learning_rate": 5.378859485017168e-05, "loss": 1.3218, "step": 16144 }, { "epoch": 0.9623316247466921, "grad_norm": 3.1781883239746094, "learning_rate": 5.3779165037933875e-05, "loss": 1.2189, "step": 16146 }, { "epoch": 0.9624508284658482, "grad_norm": 2.9293835163116455, "learning_rate": 5.376973509050242e-05, "loss": 1.1924, "step": 16148 }, { "epoch": 0.9625700321850041, "grad_norm": 3.0093629360198975, "learning_rate": 5.376030500821463e-05, "loss": 1.2388, "step": 16150 }, { "epoch": 0.9626892359041602, "grad_norm": 3.193122625350952, "learning_rate": 5.3750874791407856e-05, "loss": 1.239, "step": 16152 }, { "epoch": 0.9628084396233162, "grad_norm": 3.562462329864502, "learning_rate": 5.374144444041948e-05, "loss": 1.381, "step": 16154 }, { "epoch": 0.9629276433424723, "grad_norm": 3.1549673080444336, "learning_rate": 5.3732013955586835e-05, "loss": 1.2414, "step": 16156 }, { "epoch": 0.9630468470616284, "grad_norm": 3.0247135162353516, "learning_rate": 5.372258333724726e-05, "loss": 1.2899, "step": 16158 }, { "epoch": 0.9631660507807843, "grad_norm": 3.112062931060791, "learning_rate": 5.371315258573816e-05, "loss": 1.1751, "step": 16160 }, { "epoch": 0.9632852544999404, "grad_norm": 2.634948968887329, "learning_rate": 5.3703721701396884e-05, "loss": 1.119, "step": 16162 }, { "epoch": 0.9634044582190965, "grad_norm": 3.5208961963653564, "learning_rate": 5.36942906845608e-05, "loss": 1.412, "step": 16164 }, { "epoch": 0.9635236619382525, "grad_norm": 3.033757448196411, "learning_rate": 5.36848595355673e-05, "loss": 1.1576, "step": 16166 }, { "epoch": 0.9636428656574085, "grad_norm": 3.5429649353027344, "learning_rate": 5.367542825475378e-05, "loss": 1.2631, "step": 16168 }, { "epoch": 0.9637620693765645, "grad_norm": 3.364208459854126, "learning_rate": 5.3665996842457614e-05, "loss": 1.2861, "step": 16170 }, { "epoch": 0.9638812730957206, "grad_norm": 3.3235347270965576, "learning_rate": 5.365656529901619e-05, "loss": 1.3557, "step": 16172 }, { "epoch": 0.9640004768148767, "grad_norm": 2.9834446907043457, "learning_rate": 5.364713362476692e-05, "loss": 1.2447, "step": 16174 }, { "epoch": 0.9641196805340326, "grad_norm": 3.7791030406951904, "learning_rate": 5.36377018200472e-05, "loss": 1.2715, "step": 16176 }, { "epoch": 0.9642388842531887, "grad_norm": 3.3265914916992188, "learning_rate": 5.362826988519443e-05, "loss": 1.3098, "step": 16178 }, { "epoch": 0.9643580879723448, "grad_norm": 2.9907357692718506, "learning_rate": 5.361883782054604e-05, "loss": 1.2456, "step": 16180 }, { "epoch": 0.9644772916915008, "grad_norm": 3.2221765518188477, "learning_rate": 5.360940562643945e-05, "loss": 1.3909, "step": 16182 }, { "epoch": 0.9645964954106568, "grad_norm": 2.7772631645202637, "learning_rate": 5.359997330321206e-05, "loss": 1.2121, "step": 16184 }, { "epoch": 0.9647156991298128, "grad_norm": 3.0089545249938965, "learning_rate": 5.359054085120131e-05, "loss": 1.1495, "step": 16186 }, { "epoch": 0.9648349028489689, "grad_norm": 3.1906542778015137, "learning_rate": 5.358110827074463e-05, "loss": 1.2201, "step": 16188 }, { "epoch": 0.964954106568125, "grad_norm": 3.2043023109436035, "learning_rate": 5.3571675562179446e-05, "loss": 1.1859, "step": 16190 }, { "epoch": 0.9650733102872809, "grad_norm": 3.6861491203308105, "learning_rate": 5.3562242725843215e-05, "loss": 1.3188, "step": 16192 }, { "epoch": 0.965192514006437, "grad_norm": 3.021034002304077, "learning_rate": 5.355280976207337e-05, "loss": 1.2208, "step": 16194 }, { "epoch": 0.965311717725593, "grad_norm": 2.990293264389038, "learning_rate": 5.354337667120737e-05, "loss": 1.1165, "step": 16196 }, { "epoch": 0.9654309214447491, "grad_norm": 3.5199334621429443, "learning_rate": 5.3533943453582643e-05, "loss": 1.2624, "step": 16198 }, { "epoch": 0.9655501251639051, "grad_norm": 3.2437705993652344, "learning_rate": 5.3524510109536694e-05, "loss": 1.1659, "step": 16200 }, { "epoch": 0.9656693288830611, "grad_norm": 3.239995241165161, "learning_rate": 5.351507663940696e-05, "loss": 1.2507, "step": 16202 }, { "epoch": 0.9657885326022172, "grad_norm": 3.35823917388916, "learning_rate": 5.3505643043530896e-05, "loss": 1.2865, "step": 16204 }, { "epoch": 0.9659077363213733, "grad_norm": 2.9385361671447754, "learning_rate": 5.349620932224598e-05, "loss": 1.4207, "step": 16206 }, { "epoch": 0.9660269400405292, "grad_norm": 4.026708126068115, "learning_rate": 5.34867754758897e-05, "loss": 1.3585, "step": 16208 }, { "epoch": 0.9661461437596853, "grad_norm": 2.8679113388061523, "learning_rate": 5.3477341504799526e-05, "loss": 1.3936, "step": 16210 }, { "epoch": 0.9662653474788413, "grad_norm": 2.8924434185028076, "learning_rate": 5.3467907409312953e-05, "loss": 1.1387, "step": 16212 }, { "epoch": 0.9663845511979974, "grad_norm": 3.616018772125244, "learning_rate": 5.3458473189767475e-05, "loss": 1.2183, "step": 16214 }, { "epoch": 0.9665037549171535, "grad_norm": 3.2397587299346924, "learning_rate": 5.344903884650058e-05, "loss": 1.1256, "step": 16216 }, { "epoch": 0.9666229586363094, "grad_norm": 3.1434950828552246, "learning_rate": 5.3439604379849764e-05, "loss": 1.3132, "step": 16218 }, { "epoch": 0.9667421623554655, "grad_norm": 3.2577569484710693, "learning_rate": 5.343016979015252e-05, "loss": 1.19, "step": 16220 }, { "epoch": 0.9668613660746215, "grad_norm": 3.204223394393921, "learning_rate": 5.3420735077746367e-05, "loss": 1.129, "step": 16222 }, { "epoch": 0.9669805697937776, "grad_norm": 3.167015314102173, "learning_rate": 5.341130024296881e-05, "loss": 1.2198, "step": 16224 }, { "epoch": 0.9670997735129336, "grad_norm": 3.05538272857666, "learning_rate": 5.340186528615738e-05, "loss": 1.1782, "step": 16226 }, { "epoch": 0.9672189772320896, "grad_norm": 2.6597814559936523, "learning_rate": 5.33924302076496e-05, "loss": 1.2672, "step": 16228 }, { "epoch": 0.9673381809512457, "grad_norm": 3.0368716716766357, "learning_rate": 5.3382995007782966e-05, "loss": 1.2005, "step": 16230 }, { "epoch": 0.9674573846704018, "grad_norm": 3.451514959335327, "learning_rate": 5.337355968689505e-05, "loss": 1.1654, "step": 16232 }, { "epoch": 0.9675765883895577, "grad_norm": 3.0761263370513916, "learning_rate": 5.3364124245323346e-05, "loss": 1.2304, "step": 16234 }, { "epoch": 0.9676957921087138, "grad_norm": 2.8332982063293457, "learning_rate": 5.33546886834054e-05, "loss": 1.2543, "step": 16236 }, { "epoch": 0.9678149958278698, "grad_norm": 2.851426124572754, "learning_rate": 5.334525300147877e-05, "loss": 1.244, "step": 16238 }, { "epoch": 0.9679341995470259, "grad_norm": 2.9867172241210938, "learning_rate": 5.333581719988099e-05, "loss": 1.1439, "step": 16240 }, { "epoch": 0.9680534032661819, "grad_norm": 3.2622029781341553, "learning_rate": 5.332638127894961e-05, "loss": 1.1483, "step": 16242 }, { "epoch": 0.9681726069853379, "grad_norm": 2.952331304550171, "learning_rate": 5.331694523902221e-05, "loss": 1.3289, "step": 16244 }, { "epoch": 0.968291810704494, "grad_norm": 2.8509912490844727, "learning_rate": 5.3307509080436324e-05, "loss": 1.1547, "step": 16246 }, { "epoch": 0.96841101442365, "grad_norm": 2.786968946456909, "learning_rate": 5.329807280352951e-05, "loss": 1.2121, "step": 16248 }, { "epoch": 0.968530218142806, "grad_norm": 3.194891929626465, "learning_rate": 5.328863640863936e-05, "loss": 1.2041, "step": 16250 }, { "epoch": 0.9686494218619621, "grad_norm": 3.296937942504883, "learning_rate": 5.327919989610343e-05, "loss": 1.2116, "step": 16252 }, { "epoch": 0.9687686255811181, "grad_norm": 3.5543313026428223, "learning_rate": 5.3269763266259286e-05, "loss": 1.2639, "step": 16254 }, { "epoch": 0.9688878293002742, "grad_norm": 2.882669687271118, "learning_rate": 5.3260326519444534e-05, "loss": 1.2465, "step": 16256 }, { "epoch": 0.9690070330194303, "grad_norm": 3.1906626224517822, "learning_rate": 5.325088965599676e-05, "loss": 1.3103, "step": 16258 }, { "epoch": 0.9691262367385862, "grad_norm": 3.573251247406006, "learning_rate": 5.324145267625353e-05, "loss": 1.3626, "step": 16260 }, { "epoch": 0.9692454404577423, "grad_norm": 3.127976417541504, "learning_rate": 5.3232015580552454e-05, "loss": 1.2831, "step": 16262 }, { "epoch": 0.9693646441768983, "grad_norm": 3.095454454421997, "learning_rate": 5.3222578369231115e-05, "loss": 1.2252, "step": 16264 }, { "epoch": 0.9694838478960544, "grad_norm": 2.9654934406280518, "learning_rate": 5.3213141042627115e-05, "loss": 1.3208, "step": 16266 }, { "epoch": 0.9696030516152104, "grad_norm": 3.3300840854644775, "learning_rate": 5.320370360107807e-05, "loss": 1.2627, "step": 16268 }, { "epoch": 0.9697222553343664, "grad_norm": 2.8355023860931396, "learning_rate": 5.319426604492159e-05, "loss": 1.197, "step": 16270 }, { "epoch": 0.9698414590535225, "grad_norm": 3.258383274078369, "learning_rate": 5.318482837449528e-05, "loss": 1.4627, "step": 16272 }, { "epoch": 0.9699606627726786, "grad_norm": 3.0343823432922363, "learning_rate": 5.3175390590136764e-05, "loss": 1.276, "step": 16274 }, { "epoch": 0.9700798664918345, "grad_norm": 3.493626594543457, "learning_rate": 5.316595269218368e-05, "loss": 1.2949, "step": 16276 }, { "epoch": 0.9701990702109906, "grad_norm": 3.074716329574585, "learning_rate": 5.315651468097361e-05, "loss": 1.2298, "step": 16278 }, { "epoch": 0.9703182739301466, "grad_norm": 3.108182668685913, "learning_rate": 5.314707655684421e-05, "loss": 1.2794, "step": 16280 }, { "epoch": 0.9704374776493027, "grad_norm": 3.0252230167388916, "learning_rate": 5.313763832013313e-05, "loss": 1.1631, "step": 16282 }, { "epoch": 0.9705566813684587, "grad_norm": 2.920870304107666, "learning_rate": 5.312819997117798e-05, "loss": 1.2486, "step": 16284 }, { "epoch": 0.9706758850876147, "grad_norm": 3.4644904136657715, "learning_rate": 5.3118761510316416e-05, "loss": 1.3416, "step": 16286 }, { "epoch": 0.9707950888067708, "grad_norm": 2.819410562515259, "learning_rate": 5.3109322937886084e-05, "loss": 1.1071, "step": 16288 }, { "epoch": 0.9709142925259268, "grad_norm": 3.330054759979248, "learning_rate": 5.309988425422464e-05, "loss": 1.2195, "step": 16290 }, { "epoch": 0.9710334962450828, "grad_norm": 3.192375659942627, "learning_rate": 5.309044545966972e-05, "loss": 1.4858, "step": 16292 }, { "epoch": 0.9711526999642389, "grad_norm": 3.3320040702819824, "learning_rate": 5.3081006554559e-05, "loss": 1.2246, "step": 16294 }, { "epoch": 0.9712719036833949, "grad_norm": 3.0947415828704834, "learning_rate": 5.307156753923014e-05, "loss": 1.4262, "step": 16296 }, { "epoch": 0.971391107402551, "grad_norm": 3.671509265899658, "learning_rate": 5.3062128414020785e-05, "loss": 1.4703, "step": 16298 }, { "epoch": 0.971510311121707, "grad_norm": 2.94942569732666, "learning_rate": 5.305268917926861e-05, "loss": 1.1187, "step": 16300 }, { "epoch": 0.971629514840863, "grad_norm": 2.8763365745544434, "learning_rate": 5.3043249835311314e-05, "loss": 1.2208, "step": 16302 }, { "epoch": 0.9717487185600191, "grad_norm": 3.1541833877563477, "learning_rate": 5.303381038248656e-05, "loss": 1.4337, "step": 16304 }, { "epoch": 0.9718679222791751, "grad_norm": 3.00789475440979, "learning_rate": 5.302437082113203e-05, "loss": 1.1916, "step": 16306 }, { "epoch": 0.9719871259983311, "grad_norm": 3.0613579750061035, "learning_rate": 5.3014931151585414e-05, "loss": 1.3319, "step": 16308 }, { "epoch": 0.9721063297174872, "grad_norm": 3.147756576538086, "learning_rate": 5.300549137418438e-05, "loss": 1.3919, "step": 16310 }, { "epoch": 0.9722255334366432, "grad_norm": 3.267735719680786, "learning_rate": 5.299605148926663e-05, "loss": 1.3538, "step": 16312 }, { "epoch": 0.9723447371557993, "grad_norm": 3.4426333904266357, "learning_rate": 5.2986611497169893e-05, "loss": 1.2337, "step": 16314 }, { "epoch": 0.9724639408749552, "grad_norm": 3.453651189804077, "learning_rate": 5.297717139823183e-05, "loss": 1.3159, "step": 16316 }, { "epoch": 0.9725831445941113, "grad_norm": 3.3665847778320312, "learning_rate": 5.296773119279015e-05, "loss": 1.2885, "step": 16318 }, { "epoch": 0.9727023483132674, "grad_norm": 2.5642964839935303, "learning_rate": 5.295829088118258e-05, "loss": 1.1075, "step": 16320 }, { "epoch": 0.9728215520324234, "grad_norm": 2.8202638626098633, "learning_rate": 5.294885046374683e-05, "loss": 1.2091, "step": 16322 }, { "epoch": 0.9729407557515795, "grad_norm": 2.6968843936920166, "learning_rate": 5.2939409940820604e-05, "loss": 1.2507, "step": 16324 }, { "epoch": 0.9730599594707355, "grad_norm": 2.900831460952759, "learning_rate": 5.2929969312741625e-05, "loss": 1.2374, "step": 16326 }, { "epoch": 0.9731791631898915, "grad_norm": 2.913489818572998, "learning_rate": 5.2920528579847625e-05, "loss": 1.2285, "step": 16328 }, { "epoch": 0.9732983669090476, "grad_norm": 3.228269338607788, "learning_rate": 5.2911087742476325e-05, "loss": 1.3222, "step": 16330 }, { "epoch": 0.9734175706282036, "grad_norm": 3.201864242553711, "learning_rate": 5.290164680096544e-05, "loss": 1.4329, "step": 16332 }, { "epoch": 0.9735367743473596, "grad_norm": 2.80682373046875, "learning_rate": 5.289220575565274e-05, "loss": 1.2068, "step": 16334 }, { "epoch": 0.9736559780665157, "grad_norm": 2.88423490524292, "learning_rate": 5.288276460687595e-05, "loss": 1.1932, "step": 16336 }, { "epoch": 0.9737751817856717, "grad_norm": 3.273695468902588, "learning_rate": 5.287332335497279e-05, "loss": 1.3265, "step": 16338 }, { "epoch": 0.9738943855048278, "grad_norm": 2.9275262355804443, "learning_rate": 5.286388200028103e-05, "loss": 1.1281, "step": 16340 }, { "epoch": 0.9740135892239837, "grad_norm": 2.9066407680511475, "learning_rate": 5.2854440543138406e-05, "loss": 1.2678, "step": 16342 }, { "epoch": 0.9741327929431398, "grad_norm": 3.183478593826294, "learning_rate": 5.2844998983882674e-05, "loss": 1.3194, "step": 16344 }, { "epoch": 0.9742519966622959, "grad_norm": 2.994403123855591, "learning_rate": 5.2835557322851604e-05, "loss": 1.1322, "step": 16346 }, { "epoch": 0.9743712003814519, "grad_norm": 3.3963217735290527, "learning_rate": 5.282611556038295e-05, "loss": 1.3481, "step": 16348 }, { "epoch": 0.9744904041006079, "grad_norm": 3.424483060836792, "learning_rate": 5.281667369681446e-05, "loss": 1.2501, "step": 16350 }, { "epoch": 0.974609607819764, "grad_norm": 3.4748494625091553, "learning_rate": 5.280723173248393e-05, "loss": 1.2913, "step": 16352 }, { "epoch": 0.97472881153892, "grad_norm": 3.2711079120635986, "learning_rate": 5.279778966772911e-05, "loss": 1.4063, "step": 16354 }, { "epoch": 0.9748480152580761, "grad_norm": 3.1139070987701416, "learning_rate": 5.278834750288777e-05, "loss": 1.2473, "step": 16356 }, { "epoch": 0.974967218977232, "grad_norm": 2.72175669670105, "learning_rate": 5.2778905238297714e-05, "loss": 1.2517, "step": 16358 }, { "epoch": 0.9750864226963881, "grad_norm": 3.262882709503174, "learning_rate": 5.27694628742967e-05, "loss": 1.2278, "step": 16360 }, { "epoch": 0.9752056264155442, "grad_norm": 2.9906764030456543, "learning_rate": 5.2760020411222533e-05, "loss": 1.1022, "step": 16362 }, { "epoch": 0.9753248301347002, "grad_norm": 3.0451138019561768, "learning_rate": 5.275057784941299e-05, "loss": 1.2696, "step": 16364 }, { "epoch": 0.9754440338538563, "grad_norm": 3.0031838417053223, "learning_rate": 5.274113518920586e-05, "loss": 1.2695, "step": 16366 }, { "epoch": 0.9755632375730123, "grad_norm": 3.4277729988098145, "learning_rate": 5.273169243093895e-05, "loss": 1.2683, "step": 16368 }, { "epoch": 0.9756824412921683, "grad_norm": 2.907789945602417, "learning_rate": 5.2722249574950056e-05, "loss": 1.0963, "step": 16370 }, { "epoch": 0.9758016450113244, "grad_norm": 3.564305543899536, "learning_rate": 5.271280662157698e-05, "loss": 1.3858, "step": 16372 }, { "epoch": 0.9759208487304804, "grad_norm": 2.757537841796875, "learning_rate": 5.2703363571157535e-05, "loss": 1.0764, "step": 16374 }, { "epoch": 0.9760400524496364, "grad_norm": 2.708134889602661, "learning_rate": 5.2693920424029506e-05, "loss": 1.1386, "step": 16376 }, { "epoch": 0.9761592561687925, "grad_norm": 3.2841856479644775, "learning_rate": 5.2684477180530745e-05, "loss": 1.2999, "step": 16378 }, { "epoch": 0.9762784598879485, "grad_norm": 3.301480770111084, "learning_rate": 5.267503384099905e-05, "loss": 1.3163, "step": 16380 }, { "epoch": 0.9763976636071046, "grad_norm": 3.2475900650024414, "learning_rate": 5.2665590405772237e-05, "loss": 1.458, "step": 16382 }, { "epoch": 0.9765168673262605, "grad_norm": 3.1503212451934814, "learning_rate": 5.265614687518814e-05, "loss": 1.1323, "step": 16384 }, { "epoch": 0.9766360710454166, "grad_norm": 3.277817726135254, "learning_rate": 5.2646703249584574e-05, "loss": 1.4288, "step": 16386 }, { "epoch": 0.9767552747645727, "grad_norm": 3.161470890045166, "learning_rate": 5.2637259529299375e-05, "loss": 1.2932, "step": 16388 }, { "epoch": 0.9768744784837287, "grad_norm": 3.017385721206665, "learning_rate": 5.262781571467038e-05, "loss": 1.2664, "step": 16390 }, { "epoch": 0.9769936822028847, "grad_norm": 3.0924036502838135, "learning_rate": 5.261837180603544e-05, "loss": 1.3002, "step": 16392 }, { "epoch": 0.9771128859220408, "grad_norm": 3.1248300075531006, "learning_rate": 5.260892780373236e-05, "loss": 1.1988, "step": 16394 }, { "epoch": 0.9772320896411968, "grad_norm": 3.381127119064331, "learning_rate": 5.2599483708099016e-05, "loss": 1.1639, "step": 16396 }, { "epoch": 0.9773512933603529, "grad_norm": 3.3699557781219482, "learning_rate": 5.259003951947327e-05, "loss": 1.2965, "step": 16398 }, { "epoch": 0.9774704970795088, "grad_norm": 3.0001754760742188, "learning_rate": 5.258059523819292e-05, "loss": 1.2303, "step": 16400 }, { "epoch": 0.9775897007986649, "grad_norm": 3.0479674339294434, "learning_rate": 5.257115086459584e-05, "loss": 1.2848, "step": 16402 }, { "epoch": 0.977708904517821, "grad_norm": 3.1014976501464844, "learning_rate": 5.256170639901991e-05, "loss": 1.193, "step": 16404 }, { "epoch": 0.977828108236977, "grad_norm": 3.2047481536865234, "learning_rate": 5.2552261841802995e-05, "loss": 1.21, "step": 16406 }, { "epoch": 0.977947311956133, "grad_norm": 3.360109567642212, "learning_rate": 5.254281719328291e-05, "loss": 1.2229, "step": 16408 }, { "epoch": 0.978066515675289, "grad_norm": 3.1773035526275635, "learning_rate": 5.253337245379757e-05, "loss": 1.3695, "step": 16410 }, { "epoch": 0.9781857193944451, "grad_norm": 3.3567545413970947, "learning_rate": 5.252392762368484e-05, "loss": 1.2835, "step": 16412 }, { "epoch": 0.9783049231136012, "grad_norm": 3.551288604736328, "learning_rate": 5.251448270328256e-05, "loss": 1.2279, "step": 16414 }, { "epoch": 0.9784241268327571, "grad_norm": 2.9710729122161865, "learning_rate": 5.2505037692928646e-05, "loss": 1.1498, "step": 16416 }, { "epoch": 0.9785433305519132, "grad_norm": 3.392219305038452, "learning_rate": 5.2495592592960974e-05, "loss": 1.3291, "step": 16418 }, { "epoch": 0.9786625342710693, "grad_norm": 3.0603973865509033, "learning_rate": 5.248614740371739e-05, "loss": 1.2662, "step": 16420 }, { "epoch": 0.9787817379902253, "grad_norm": 2.8671395778656006, "learning_rate": 5.2476702125535816e-05, "loss": 1.3616, "step": 16422 }, { "epoch": 0.9789009417093814, "grad_norm": 3.096505880355835, "learning_rate": 5.246725675875415e-05, "loss": 1.2207, "step": 16424 }, { "epoch": 0.9790201454285373, "grad_norm": 2.936218738555908, "learning_rate": 5.2457811303710244e-05, "loss": 1.3099, "step": 16426 }, { "epoch": 0.9791393491476934, "grad_norm": 3.1525464057922363, "learning_rate": 5.244836576074204e-05, "loss": 1.3273, "step": 16428 }, { "epoch": 0.9792585528668495, "grad_norm": 3.036658763885498, "learning_rate": 5.24389201301874e-05, "loss": 1.3292, "step": 16430 }, { "epoch": 0.9793777565860055, "grad_norm": 3.126620292663574, "learning_rate": 5.242947441238425e-05, "loss": 1.2232, "step": 16432 }, { "epoch": 0.9794969603051615, "grad_norm": 3.1063432693481445, "learning_rate": 5.242002860767048e-05, "loss": 1.1312, "step": 16434 }, { "epoch": 0.9796161640243175, "grad_norm": 3.092928171157837, "learning_rate": 5.2410582716384004e-05, "loss": 1.1706, "step": 16436 }, { "epoch": 0.9797353677434736, "grad_norm": 3.049602508544922, "learning_rate": 5.240113673886274e-05, "loss": 1.3059, "step": 16438 }, { "epoch": 0.9798545714626297, "grad_norm": 3.065903663635254, "learning_rate": 5.2391690675444605e-05, "loss": 1.261, "step": 16440 }, { "epoch": 0.9799737751817856, "grad_norm": 2.9439480304718018, "learning_rate": 5.2382244526467516e-05, "loss": 1.1988, "step": 16442 }, { "epoch": 0.9800929789009417, "grad_norm": 2.9193363189697266, "learning_rate": 5.2372798292269385e-05, "loss": 1.2526, "step": 16444 }, { "epoch": 0.9802121826200978, "grad_norm": 3.2348248958587646, "learning_rate": 5.236335197318814e-05, "loss": 1.2178, "step": 16446 }, { "epoch": 0.9803313863392538, "grad_norm": 3.0251755714416504, "learning_rate": 5.23539055695617e-05, "loss": 1.274, "step": 16448 }, { "epoch": 0.9804505900584098, "grad_norm": 3.2456281185150146, "learning_rate": 5.2344459081728015e-05, "loss": 1.1944, "step": 16450 }, { "epoch": 0.9805697937775658, "grad_norm": 3.0367679595947266, "learning_rate": 5.233501251002499e-05, "loss": 1.3365, "step": 16452 }, { "epoch": 0.9806889974967219, "grad_norm": 3.1228673458099365, "learning_rate": 5.23255658547906e-05, "loss": 1.2748, "step": 16454 }, { "epoch": 0.980808201215878, "grad_norm": 3.475362777709961, "learning_rate": 5.231611911636276e-05, "loss": 1.2026, "step": 16456 }, { "epoch": 0.9809274049350339, "grad_norm": 3.0709216594696045, "learning_rate": 5.2306672295079406e-05, "loss": 1.1987, "step": 16458 }, { "epoch": 0.98104660865419, "grad_norm": 3.136085271835327, "learning_rate": 5.22972253912785e-05, "loss": 1.3241, "step": 16460 }, { "epoch": 0.9811658123733461, "grad_norm": 3.1586849689483643, "learning_rate": 5.2287778405297985e-05, "loss": 1.2836, "step": 16462 }, { "epoch": 0.9812850160925021, "grad_norm": 2.8329546451568604, "learning_rate": 5.22783313374758e-05, "loss": 1.1747, "step": 16464 }, { "epoch": 0.9814042198116582, "grad_norm": 3.0182785987854004, "learning_rate": 5.226888418814989e-05, "loss": 1.1782, "step": 16466 }, { "epoch": 0.9815234235308141, "grad_norm": 2.663968324661255, "learning_rate": 5.225943695765826e-05, "loss": 1.2839, "step": 16468 }, { "epoch": 0.9816426272499702, "grad_norm": 2.9966061115264893, "learning_rate": 5.2249989646338824e-05, "loss": 1.2179, "step": 16470 }, { "epoch": 0.9817618309691263, "grad_norm": 3.2863688468933105, "learning_rate": 5.224054225452957e-05, "loss": 1.1891, "step": 16472 }, { "epoch": 0.9818810346882823, "grad_norm": 2.918896436691284, "learning_rate": 5.223109478256845e-05, "loss": 1.1801, "step": 16474 }, { "epoch": 0.9820002384074383, "grad_norm": 3.353012800216675, "learning_rate": 5.2221647230793436e-05, "loss": 1.2188, "step": 16476 }, { "epoch": 0.9821194421265943, "grad_norm": 2.69626522064209, "learning_rate": 5.221219959954249e-05, "loss": 1.0411, "step": 16478 }, { "epoch": 0.9822386458457504, "grad_norm": 3.2778589725494385, "learning_rate": 5.22027518891536e-05, "loss": 1.4771, "step": 16480 }, { "epoch": 0.9823578495649065, "grad_norm": 3.1605184078216553, "learning_rate": 5.219330409996473e-05, "loss": 1.2256, "step": 16482 }, { "epoch": 0.9824770532840624, "grad_norm": 2.93318247795105, "learning_rate": 5.2183856232313876e-05, "loss": 1.278, "step": 16484 }, { "epoch": 0.9825962570032185, "grad_norm": 3.610872745513916, "learning_rate": 5.217440828653902e-05, "loss": 1.3662, "step": 16486 }, { "epoch": 0.9827154607223746, "grad_norm": 2.673689126968384, "learning_rate": 5.2164960262978146e-05, "loss": 1.276, "step": 16488 }, { "epoch": 0.9828346644415306, "grad_norm": 2.872788667678833, "learning_rate": 5.2155512161969225e-05, "loss": 1.2723, "step": 16490 }, { "epoch": 0.9829538681606866, "grad_norm": 3.032566785812378, "learning_rate": 5.214606398385026e-05, "loss": 1.269, "step": 16492 }, { "epoch": 0.9830730718798426, "grad_norm": 3.006793260574341, "learning_rate": 5.213661572895924e-05, "loss": 1.2338, "step": 16494 }, { "epoch": 0.9831922755989987, "grad_norm": 3.1387717723846436, "learning_rate": 5.212716739763417e-05, "loss": 1.2265, "step": 16496 }, { "epoch": 0.9833114793181548, "grad_norm": 3.1274800300598145, "learning_rate": 5.2117718990213025e-05, "loss": 1.1917, "step": 16498 }, { "epoch": 0.9834306830373107, "grad_norm": 3.0537619590759277, "learning_rate": 5.210827050703385e-05, "loss": 1.1721, "step": 16500 }, { "epoch": 0.9835498867564668, "grad_norm": 2.72160005569458, "learning_rate": 5.2098821948434616e-05, "loss": 1.0999, "step": 16502 }, { "epoch": 0.9836690904756228, "grad_norm": 2.7956855297088623, "learning_rate": 5.208937331475335e-05, "loss": 1.2157, "step": 16504 }, { "epoch": 0.9837882941947789, "grad_norm": 3.0215699672698975, "learning_rate": 5.2079924606328045e-05, "loss": 1.1929, "step": 16506 }, { "epoch": 0.983907497913935, "grad_norm": 3.3235955238342285, "learning_rate": 5.207047582349672e-05, "loss": 1.3182, "step": 16508 }, { "epoch": 0.9840267016330909, "grad_norm": 3.256088972091675, "learning_rate": 5.206102696659738e-05, "loss": 1.1744, "step": 16510 }, { "epoch": 0.984145905352247, "grad_norm": 3.347891330718994, "learning_rate": 5.2051578035968076e-05, "loss": 1.2697, "step": 16512 }, { "epoch": 0.9842651090714031, "grad_norm": 3.5441713333129883, "learning_rate": 5.204212903194678e-05, "loss": 1.3598, "step": 16514 }, { "epoch": 0.984384312790559, "grad_norm": 3.1349215507507324, "learning_rate": 5.2032679954871554e-05, "loss": 1.2675, "step": 16516 }, { "epoch": 0.9845035165097151, "grad_norm": 3.0065293312072754, "learning_rate": 5.2023230805080424e-05, "loss": 1.2137, "step": 16518 }, { "epoch": 0.9846227202288711, "grad_norm": 2.6911208629608154, "learning_rate": 5.2013781582911406e-05, "loss": 1.2638, "step": 16520 }, { "epoch": 0.9847419239480272, "grad_norm": 2.742213249206543, "learning_rate": 5.200433228870251e-05, "loss": 1.1921, "step": 16522 }, { "epoch": 0.9848611276671833, "grad_norm": 2.9433135986328125, "learning_rate": 5.19948829227918e-05, "loss": 1.2141, "step": 16524 }, { "epoch": 0.9849803313863392, "grad_norm": 3.2078800201416016, "learning_rate": 5.1985433485517295e-05, "loss": 1.2665, "step": 16526 }, { "epoch": 0.9850995351054953, "grad_norm": 3.199448823928833, "learning_rate": 5.197598397721706e-05, "loss": 1.3077, "step": 16528 }, { "epoch": 0.9852187388246513, "grad_norm": 3.0677311420440674, "learning_rate": 5.1966534398229095e-05, "loss": 1.2446, "step": 16530 }, { "epoch": 0.9853379425438074, "grad_norm": 3.2780489921569824, "learning_rate": 5.195708474889148e-05, "loss": 1.3346, "step": 16532 }, { "epoch": 0.9854571462629634, "grad_norm": 2.8563873767852783, "learning_rate": 5.194763502954224e-05, "loss": 1.2971, "step": 16534 }, { "epoch": 0.9855763499821194, "grad_norm": 3.3491647243499756, "learning_rate": 5.1938185240519446e-05, "loss": 1.1867, "step": 16536 }, { "epoch": 0.9856955537012755, "grad_norm": 2.8753128051757812, "learning_rate": 5.192873538216112e-05, "loss": 1.284, "step": 16538 }, { "epoch": 0.9858147574204316, "grad_norm": 2.8322601318359375, "learning_rate": 5.191928545480533e-05, "loss": 1.1775, "step": 16540 }, { "epoch": 0.9859339611395875, "grad_norm": 3.2725884914398193, "learning_rate": 5.1909835458790135e-05, "loss": 1.3962, "step": 16542 }, { "epoch": 0.9860531648587436, "grad_norm": 3.447408437728882, "learning_rate": 5.190038539445359e-05, "loss": 1.3229, "step": 16544 }, { "epoch": 0.9861723685778996, "grad_norm": 3.054896593093872, "learning_rate": 5.1890935262133763e-05, "loss": 1.1366, "step": 16546 }, { "epoch": 0.9862915722970557, "grad_norm": 3.047403573989868, "learning_rate": 5.188148506216871e-05, "loss": 1.1594, "step": 16548 }, { "epoch": 0.9864107760162117, "grad_norm": 3.1346137523651123, "learning_rate": 5.1872034794896504e-05, "loss": 1.2567, "step": 16550 }, { "epoch": 0.9865299797353677, "grad_norm": 3.160071611404419, "learning_rate": 5.1862584460655206e-05, "loss": 1.2931, "step": 16552 }, { "epoch": 0.9866491834545238, "grad_norm": 3.024150848388672, "learning_rate": 5.1853134059782874e-05, "loss": 1.2955, "step": 16554 }, { "epoch": 0.9867683871736799, "grad_norm": 3.2668399810791016, "learning_rate": 5.184368359261761e-05, "loss": 1.3216, "step": 16556 }, { "epoch": 0.9868875908928358, "grad_norm": 3.169198513031006, "learning_rate": 5.1834233059497474e-05, "loss": 1.2083, "step": 16558 }, { "epoch": 0.9870067946119919, "grad_norm": 3.2960829734802246, "learning_rate": 5.182478246076055e-05, "loss": 1.2627, "step": 16560 }, { "epoch": 0.9871259983311479, "grad_norm": 3.3145124912261963, "learning_rate": 5.1815331796744916e-05, "loss": 1.3772, "step": 16562 }, { "epoch": 0.987245202050304, "grad_norm": 2.99141263961792, "learning_rate": 5.180588106778864e-05, "loss": 1.2461, "step": 16564 }, { "epoch": 0.98736440576946, "grad_norm": 3.2677807807922363, "learning_rate": 5.179643027422982e-05, "loss": 1.3258, "step": 16566 }, { "epoch": 0.987483609488616, "grad_norm": 3.314669609069824, "learning_rate": 5.178697941640655e-05, "loss": 1.4392, "step": 16568 }, { "epoch": 0.9876028132077721, "grad_norm": 2.7229690551757812, "learning_rate": 5.1777528494656914e-05, "loss": 1.0988, "step": 16570 }, { "epoch": 0.9877220169269281, "grad_norm": 2.8409299850463867, "learning_rate": 5.1768077509318994e-05, "loss": 1.0979, "step": 16572 }, { "epoch": 0.9878412206460842, "grad_norm": 3.159416675567627, "learning_rate": 5.175862646073089e-05, "loss": 1.3207, "step": 16574 }, { "epoch": 0.9879604243652402, "grad_norm": 3.185983419418335, "learning_rate": 5.17491753492307e-05, "loss": 1.3151, "step": 16576 }, { "epoch": 0.9880796280843962, "grad_norm": 3.0390889644622803, "learning_rate": 5.173972417515655e-05, "loss": 1.3605, "step": 16578 }, { "epoch": 0.9881988318035523, "grad_norm": 3.119565010070801, "learning_rate": 5.173027293884648e-05, "loss": 1.2802, "step": 16580 }, { "epoch": 0.9883180355227084, "grad_norm": 2.8484551906585693, "learning_rate": 5.172082164063865e-05, "loss": 1.2018, "step": 16582 }, { "epoch": 0.9884372392418643, "grad_norm": 3.2026727199554443, "learning_rate": 5.171137028087113e-05, "loss": 1.2607, "step": 16584 }, { "epoch": 0.9885564429610204, "grad_norm": 2.915461540222168, "learning_rate": 5.170191885988204e-05, "loss": 1.2073, "step": 16586 }, { "epoch": 0.9886756466801764, "grad_norm": 3.088895320892334, "learning_rate": 5.169246737800949e-05, "loss": 1.2667, "step": 16588 }, { "epoch": 0.9887948503993325, "grad_norm": 3.018247365951538, "learning_rate": 5.1683015835591586e-05, "loss": 1.287, "step": 16590 }, { "epoch": 0.9889140541184885, "grad_norm": 3.8042190074920654, "learning_rate": 5.1673564232966466e-05, "loss": 1.2591, "step": 16592 }, { "epoch": 0.9890332578376445, "grad_norm": 3.309110403060913, "learning_rate": 5.166411257047221e-05, "loss": 1.2725, "step": 16594 }, { "epoch": 0.9891524615568006, "grad_norm": 3.2778372764587402, "learning_rate": 5.1654660848446966e-05, "loss": 1.3057, "step": 16596 }, { "epoch": 0.9892716652759566, "grad_norm": 3.0344619750976562, "learning_rate": 5.1645209067228825e-05, "loss": 1.166, "step": 16598 }, { "epoch": 0.9893908689951126, "grad_norm": 3.3293094635009766, "learning_rate": 5.1635757227155934e-05, "loss": 1.2801, "step": 16600 }, { "epoch": 0.9895100727142687, "grad_norm": 3.3461101055145264, "learning_rate": 5.1626305328566416e-05, "loss": 1.2767, "step": 16602 }, { "epoch": 0.9896292764334247, "grad_norm": 3.2351105213165283, "learning_rate": 5.161685337179839e-05, "loss": 1.2074, "step": 16604 }, { "epoch": 0.9897484801525808, "grad_norm": 3.3419899940490723, "learning_rate": 5.160740135718998e-05, "loss": 1.3921, "step": 16606 }, { "epoch": 0.9898676838717368, "grad_norm": 3.1322357654571533, "learning_rate": 5.159794928507934e-05, "loss": 1.3055, "step": 16608 }, { "epoch": 0.9899868875908928, "grad_norm": 3.234701633453369, "learning_rate": 5.158849715580458e-05, "loss": 1.3823, "step": 16610 }, { "epoch": 0.9901060913100489, "grad_norm": 2.961772918701172, "learning_rate": 5.1579044969703825e-05, "loss": 1.1942, "step": 16612 }, { "epoch": 0.9902252950292049, "grad_norm": 3.2992360591888428, "learning_rate": 5.156959272711525e-05, "loss": 1.2674, "step": 16614 }, { "epoch": 0.990344498748361, "grad_norm": 3.2703988552093506, "learning_rate": 5.1560140428376956e-05, "loss": 1.2664, "step": 16616 }, { "epoch": 0.990463702467517, "grad_norm": 3.0743978023529053, "learning_rate": 5.15506880738271e-05, "loss": 1.314, "step": 16618 }, { "epoch": 0.990582906186673, "grad_norm": 3.032210111618042, "learning_rate": 5.154123566380385e-05, "loss": 1.0913, "step": 16620 }, { "epoch": 0.9907021099058291, "grad_norm": 2.9977359771728516, "learning_rate": 5.153178319864531e-05, "loss": 1.3088, "step": 16622 }, { "epoch": 0.990821313624985, "grad_norm": 2.967378616333008, "learning_rate": 5.1522330678689643e-05, "loss": 1.2236, "step": 16624 }, { "epoch": 0.9909405173441411, "grad_norm": 3.3388819694519043, "learning_rate": 5.1512878104275007e-05, "loss": 1.2045, "step": 16626 }, { "epoch": 0.9910597210632972, "grad_norm": 3.074601888656616, "learning_rate": 5.150342547573954e-05, "loss": 1.3506, "step": 16628 }, { "epoch": 0.9911789247824532, "grad_norm": 2.800116777420044, "learning_rate": 5.149397279342141e-05, "loss": 1.2669, "step": 16630 }, { "epoch": 0.9912981285016093, "grad_norm": 2.902604579925537, "learning_rate": 5.148452005765874e-05, "loss": 1.4186, "step": 16632 }, { "epoch": 0.9914173322207653, "grad_norm": 2.8722712993621826, "learning_rate": 5.1475067268789725e-05, "loss": 1.3538, "step": 16634 }, { "epoch": 0.9915365359399213, "grad_norm": 3.077263832092285, "learning_rate": 5.14656144271525e-05, "loss": 1.2243, "step": 16636 }, { "epoch": 0.9916557396590774, "grad_norm": 3.114865779876709, "learning_rate": 5.145616153308524e-05, "loss": 1.2423, "step": 16638 }, { "epoch": 0.9917749433782334, "grad_norm": 3.14683198928833, "learning_rate": 5.1446708586926095e-05, "loss": 1.2027, "step": 16640 }, { "epoch": 0.9918941470973894, "grad_norm": 3.0944814682006836, "learning_rate": 5.1437255589013233e-05, "loss": 1.1204, "step": 16642 }, { "epoch": 0.9920133508165455, "grad_norm": 2.9456191062927246, "learning_rate": 5.142780253968481e-05, "loss": 1.2829, "step": 16644 }, { "epoch": 0.9921325545357015, "grad_norm": 3.431028127670288, "learning_rate": 5.141834943927902e-05, "loss": 1.4519, "step": 16646 }, { "epoch": 0.9922517582548576, "grad_norm": 3.0109612941741943, "learning_rate": 5.140889628813402e-05, "loss": 1.1313, "step": 16648 }, { "epoch": 0.9923709619740136, "grad_norm": 3.233456611633301, "learning_rate": 5.139944308658796e-05, "loss": 1.1406, "step": 16650 }, { "epoch": 0.9924901656931696, "grad_norm": 3.212908983230591, "learning_rate": 5.1389989834979056e-05, "loss": 1.3713, "step": 16652 }, { "epoch": 0.9926093694123257, "grad_norm": 3.61220121383667, "learning_rate": 5.138053653364546e-05, "loss": 1.3257, "step": 16654 }, { "epoch": 0.9927285731314817, "grad_norm": 2.924272060394287, "learning_rate": 5.137108318292533e-05, "loss": 1.1488, "step": 16656 }, { "epoch": 0.9928477768506377, "grad_norm": 2.838602066040039, "learning_rate": 5.136162978315687e-05, "loss": 1.2636, "step": 16658 }, { "epoch": 0.9929669805697938, "grad_norm": 3.3484551906585693, "learning_rate": 5.1352176334678256e-05, "loss": 1.3479, "step": 16660 }, { "epoch": 0.9930861842889498, "grad_norm": 3.0548694133758545, "learning_rate": 5.134272283782767e-05, "loss": 1.4083, "step": 16662 }, { "epoch": 0.9932053880081059, "grad_norm": 3.2531046867370605, "learning_rate": 5.133326929294329e-05, "loss": 1.2795, "step": 16664 }, { "epoch": 0.9933245917272618, "grad_norm": 3.1868906021118164, "learning_rate": 5.132381570036331e-05, "loss": 1.1956, "step": 16666 }, { "epoch": 0.9934437954464179, "grad_norm": 3.26896333694458, "learning_rate": 5.1314362060425915e-05, "loss": 1.1762, "step": 16668 }, { "epoch": 0.993562999165574, "grad_norm": 3.0974926948547363, "learning_rate": 5.130490837346929e-05, "loss": 1.3435, "step": 16670 }, { "epoch": 0.99368220288473, "grad_norm": 2.6913790702819824, "learning_rate": 5.129545463983162e-05, "loss": 1.2251, "step": 16672 }, { "epoch": 0.993801406603886, "grad_norm": 3.3542096614837646, "learning_rate": 5.1286000859851114e-05, "loss": 1.2909, "step": 16674 }, { "epoch": 0.9939206103230421, "grad_norm": 3.2960166931152344, "learning_rate": 5.1276547033865954e-05, "loss": 1.3316, "step": 16676 }, { "epoch": 0.9940398140421981, "grad_norm": 2.8502418994903564, "learning_rate": 5.126709316221433e-05, "loss": 1.1972, "step": 16678 }, { "epoch": 0.9941590177613542, "grad_norm": 3.076138973236084, "learning_rate": 5.1257639245234465e-05, "loss": 1.2842, "step": 16680 }, { "epoch": 0.9942782214805101, "grad_norm": 3.5893776416778564, "learning_rate": 5.124818528326453e-05, "loss": 1.2626, "step": 16682 }, { "epoch": 0.9943974251996662, "grad_norm": 3.0954976081848145, "learning_rate": 5.123873127664275e-05, "loss": 1.2462, "step": 16684 }, { "epoch": 0.9945166289188223, "grad_norm": 3.067242383956909, "learning_rate": 5.122927722570731e-05, "loss": 1.2594, "step": 16686 }, { "epoch": 0.9946358326379783, "grad_norm": 3.3217885494232178, "learning_rate": 5.12198231307964e-05, "loss": 1.1949, "step": 16688 }, { "epoch": 0.9947550363571344, "grad_norm": 3.1167337894439697, "learning_rate": 5.121036899224825e-05, "loss": 1.3219, "step": 16690 }, { "epoch": 0.9948742400762903, "grad_norm": 3.1112918853759766, "learning_rate": 5.120091481040107e-05, "loss": 1.3308, "step": 16692 }, { "epoch": 0.9949934437954464, "grad_norm": 3.236152410507202, "learning_rate": 5.119146058559305e-05, "loss": 1.3519, "step": 16694 }, { "epoch": 0.9951126475146025, "grad_norm": 2.8918702602386475, "learning_rate": 5.11820063181624e-05, "loss": 1.1059, "step": 16696 }, { "epoch": 0.9952318512337585, "grad_norm": 3.2317395210266113, "learning_rate": 5.117255200844737e-05, "loss": 1.3209, "step": 16698 }, { "epoch": 0.9953510549529145, "grad_norm": 2.945173501968384, "learning_rate": 5.116309765678612e-05, "loss": 1.2224, "step": 16700 }, { "epoch": 0.9954702586720706, "grad_norm": 2.8262085914611816, "learning_rate": 5.115364326351689e-05, "loss": 1.1988, "step": 16702 }, { "epoch": 0.9955894623912266, "grad_norm": 3.5190589427948, "learning_rate": 5.114418882897789e-05, "loss": 1.3338, "step": 16704 }, { "epoch": 0.9957086661103827, "grad_norm": 3.158529758453369, "learning_rate": 5.1134734353507354e-05, "loss": 1.3114, "step": 16706 }, { "epoch": 0.9958278698295386, "grad_norm": 2.9548490047454834, "learning_rate": 5.1125279837443476e-05, "loss": 1.3157, "step": 16708 }, { "epoch": 0.9959470735486947, "grad_norm": 2.7856907844543457, "learning_rate": 5.111582528112449e-05, "loss": 1.3036, "step": 16710 }, { "epoch": 0.9960662772678508, "grad_norm": 3.101792335510254, "learning_rate": 5.1106370684888615e-05, "loss": 1.3027, "step": 16712 }, { "epoch": 0.9961854809870068, "grad_norm": 2.904935359954834, "learning_rate": 5.1096916049074086e-05, "loss": 1.2373, "step": 16714 }, { "epoch": 0.9963046847061628, "grad_norm": 3.202547073364258, "learning_rate": 5.108746137401911e-05, "loss": 1.2261, "step": 16716 }, { "epoch": 0.9964238884253188, "grad_norm": 3.41943359375, "learning_rate": 5.1078006660061914e-05, "loss": 1.2709, "step": 16718 }, { "epoch": 0.9965430921444749, "grad_norm": 3.2389109134674072, "learning_rate": 5.106855190754073e-05, "loss": 1.3289, "step": 16720 }, { "epoch": 0.996662295863631, "grad_norm": 2.8555448055267334, "learning_rate": 5.1059097116793794e-05, "loss": 1.2804, "step": 16722 }, { "epoch": 0.996781499582787, "grad_norm": 3.0294582843780518, "learning_rate": 5.104964228815933e-05, "loss": 1.2479, "step": 16724 }, { "epoch": 0.996900703301943, "grad_norm": 3.1131322383880615, "learning_rate": 5.104018742197557e-05, "loss": 1.4332, "step": 16726 }, { "epoch": 0.9970199070210991, "grad_norm": 3.238335132598877, "learning_rate": 5.103073251858076e-05, "loss": 1.2782, "step": 16728 }, { "epoch": 0.9971391107402551, "grad_norm": 3.154752731323242, "learning_rate": 5.102127757831311e-05, "loss": 1.119, "step": 16730 }, { "epoch": 0.9972583144594112, "grad_norm": 3.2905218601226807, "learning_rate": 5.101182260151085e-05, "loss": 1.2921, "step": 16732 }, { "epoch": 0.9973775181785671, "grad_norm": 3.0036842823028564, "learning_rate": 5.100236758851226e-05, "loss": 1.1471, "step": 16734 }, { "epoch": 0.9974967218977232, "grad_norm": 3.109494209289551, "learning_rate": 5.0992912539655537e-05, "loss": 1.3059, "step": 16736 }, { "epoch": 0.9976159256168793, "grad_norm": 2.996920347213745, "learning_rate": 5.0983457455278935e-05, "loss": 1.2827, "step": 16738 }, { "epoch": 0.9977351293360353, "grad_norm": 2.6606314182281494, "learning_rate": 5.097400233572069e-05, "loss": 1.25, "step": 16740 }, { "epoch": 0.9978543330551913, "grad_norm": 3.089104652404785, "learning_rate": 5.096454718131907e-05, "loss": 1.4051, "step": 16742 }, { "epoch": 0.9979735367743474, "grad_norm": 3.1299126148223877, "learning_rate": 5.095509199241229e-05, "loss": 1.1724, "step": 16744 }, { "epoch": 0.9980927404935034, "grad_norm": 3.0894999504089355, "learning_rate": 5.094563676933859e-05, "loss": 1.1814, "step": 16746 }, { "epoch": 0.9982119442126595, "grad_norm": 3.233454704284668, "learning_rate": 5.0936181512436234e-05, "loss": 1.1522, "step": 16748 }, { "epoch": 0.9983311479318154, "grad_norm": 3.019683837890625, "learning_rate": 5.0926726222043466e-05, "loss": 1.1922, "step": 16750 }, { "epoch": 0.9984503516509715, "grad_norm": 3.2510266304016113, "learning_rate": 5.091727089849851e-05, "loss": 1.3852, "step": 16752 }, { "epoch": 0.9985695553701276, "grad_norm": 3.0148799419403076, "learning_rate": 5.090781554213966e-05, "loss": 1.11, "step": 16754 }, { "epoch": 0.9986887590892836, "grad_norm": 2.893336534500122, "learning_rate": 5.0898360153305135e-05, "loss": 1.0931, "step": 16756 }, { "epoch": 0.9988079628084396, "grad_norm": 3.0249176025390625, "learning_rate": 5.08889047323332e-05, "loss": 1.2595, "step": 16758 }, { "epoch": 0.9989271665275956, "grad_norm": 3.168421745300293, "learning_rate": 5.08794492795621e-05, "loss": 1.2669, "step": 16760 }, { "epoch": 0.9990463702467517, "grad_norm": 3.0028505325317383, "learning_rate": 5.086999379533008e-05, "loss": 1.1543, "step": 16762 }, { "epoch": 0.9991655739659078, "grad_norm": 3.226123809814453, "learning_rate": 5.0860538279975413e-05, "loss": 1.2779, "step": 16764 }, { "epoch": 0.9992847776850637, "grad_norm": 3.079768180847168, "learning_rate": 5.085108273383633e-05, "loss": 1.2346, "step": 16766 }, { "epoch": 0.9994039814042198, "grad_norm": 3.2366690635681152, "learning_rate": 5.084162715725112e-05, "loss": 1.1922, "step": 16768 }, { "epoch": 0.9995231851233759, "grad_norm": 3.338644027709961, "learning_rate": 5.083217155055804e-05, "loss": 1.2376, "step": 16770 }, { "epoch": 0.9996423888425319, "grad_norm": 3.170870304107666, "learning_rate": 5.082271591409531e-05, "loss": 1.278, "step": 16772 }, { "epoch": 0.999761592561688, "grad_norm": 2.923565149307251, "learning_rate": 5.081326024820125e-05, "loss": 1.1876, "step": 16774 }, { "epoch": 0.9998807962808439, "grad_norm": 3.1995606422424316, "learning_rate": 5.0803804553214064e-05, "loss": 1.3337, "step": 16776 }, { "epoch": 1.0, "grad_norm": 2.717222213745117, "learning_rate": 5.079434882947203e-05, "loss": 1.1574, "step": 16778 }, { "epoch": 1.000119203719156, "grad_norm": 3.254727602005005, "learning_rate": 5.078489307731344e-05, "loss": 1.2789, "step": 16780 }, { "epoch": 1.0002384074383122, "grad_norm": 3.062354564666748, "learning_rate": 5.0775437297076535e-05, "loss": 1.2192, "step": 16782 }, { "epoch": 1.0003576111574681, "grad_norm": 3.2920937538146973, "learning_rate": 5.076598148909957e-05, "loss": 1.2044, "step": 16784 }, { "epoch": 1.000476814876624, "grad_norm": 3.0293452739715576, "learning_rate": 5.075652565372084e-05, "loss": 1.1346, "step": 16786 }, { "epoch": 1.0005960185957803, "grad_norm": 2.7316832542419434, "learning_rate": 5.074706979127861e-05, "loss": 1.1639, "step": 16788 }, { "epoch": 1.0007152223149363, "grad_norm": 3.1646711826324463, "learning_rate": 5.0737613902111125e-05, "loss": 1.1887, "step": 16790 }, { "epoch": 1.0008344260340922, "grad_norm": 3.0205109119415283, "learning_rate": 5.072815798655667e-05, "loss": 1.3106, "step": 16792 }, { "epoch": 1.0009536297532482, "grad_norm": 3.394625186920166, "learning_rate": 5.07187020449535e-05, "loss": 1.2373, "step": 16794 }, { "epoch": 1.0010728334724044, "grad_norm": 3.1145567893981934, "learning_rate": 5.070924607763992e-05, "loss": 1.2313, "step": 16796 }, { "epoch": 1.0011920371915604, "grad_norm": 3.4546926021575928, "learning_rate": 5.069979008495416e-05, "loss": 1.1522, "step": 16798 }, { "epoch": 1.0013112409107163, "grad_norm": 2.7444100379943848, "learning_rate": 5.0690334067234525e-05, "loss": 1.2131, "step": 16800 }, { "epoch": 1.0014304446298725, "grad_norm": 2.9821605682373047, "learning_rate": 5.0680878024819276e-05, "loss": 1.229, "step": 16802 }, { "epoch": 1.0015496483490285, "grad_norm": 2.9097745418548584, "learning_rate": 5.067142195804669e-05, "loss": 1.21, "step": 16804 }, { "epoch": 1.0016688520681845, "grad_norm": 3.1945717334747314, "learning_rate": 5.0661965867255054e-05, "loss": 1.3113, "step": 16806 }, { "epoch": 1.0017880557873406, "grad_norm": 3.0360701084136963, "learning_rate": 5.065250975278263e-05, "loss": 1.2451, "step": 16808 }, { "epoch": 1.0019072595064966, "grad_norm": 3.0924251079559326, "learning_rate": 5.0643053614967686e-05, "loss": 1.1782, "step": 16810 }, { "epoch": 1.0020264632256526, "grad_norm": 3.41764497756958, "learning_rate": 5.063359745414853e-05, "loss": 1.1802, "step": 16812 }, { "epoch": 1.0021456669448088, "grad_norm": 2.881141424179077, "learning_rate": 5.0624141270663416e-05, "loss": 1.1585, "step": 16814 }, { "epoch": 1.0022648706639647, "grad_norm": 2.973438024520874, "learning_rate": 5.061468506485062e-05, "loss": 1.1884, "step": 16816 }, { "epoch": 1.0023840743831207, "grad_norm": 3.0315396785736084, "learning_rate": 5.0605228837048466e-05, "loss": 1.1046, "step": 16818 }, { "epoch": 1.0025032781022767, "grad_norm": 3.084143877029419, "learning_rate": 5.05957725875952e-05, "loss": 1.0675, "step": 16820 }, { "epoch": 1.0026224818214329, "grad_norm": 3.039233446121216, "learning_rate": 5.058631631682909e-05, "loss": 1.1181, "step": 16822 }, { "epoch": 1.0027416855405888, "grad_norm": 3.0336661338806152, "learning_rate": 5.0576860025088456e-05, "loss": 1.1499, "step": 16824 }, { "epoch": 1.0028608892597448, "grad_norm": 3.8087334632873535, "learning_rate": 5.056740371271156e-05, "loss": 1.2421, "step": 16826 }, { "epoch": 1.002980092978901, "grad_norm": 3.0627317428588867, "learning_rate": 5.0557947380036695e-05, "loss": 1.216, "step": 16828 }, { "epoch": 1.003099296698057, "grad_norm": 3.085515260696411, "learning_rate": 5.0548491027402133e-05, "loss": 1.0942, "step": 16830 }, { "epoch": 1.003218500417213, "grad_norm": 3.089484214782715, "learning_rate": 5.0539034655146176e-05, "loss": 1.0336, "step": 16832 }, { "epoch": 1.0033377041363691, "grad_norm": 3.3528177738189697, "learning_rate": 5.0529578263607114e-05, "loss": 1.0705, "step": 16834 }, { "epoch": 1.003456907855525, "grad_norm": 2.803252696990967, "learning_rate": 5.052012185312322e-05, "loss": 1.0595, "step": 16836 }, { "epoch": 1.003576111574681, "grad_norm": 3.093254804611206, "learning_rate": 5.051066542403278e-05, "loss": 1.1906, "step": 16838 }, { "epoch": 1.0036953152938373, "grad_norm": 2.9431891441345215, "learning_rate": 5.0501208976674097e-05, "loss": 1.2854, "step": 16840 }, { "epoch": 1.0038145190129932, "grad_norm": 2.981755495071411, "learning_rate": 5.049175251138545e-05, "loss": 1.1469, "step": 16842 }, { "epoch": 1.0039337227321492, "grad_norm": 3.067807674407959, "learning_rate": 5.048229602850514e-05, "loss": 1.226, "step": 16844 }, { "epoch": 1.0040529264513052, "grad_norm": 2.777862787246704, "learning_rate": 5.047283952837145e-05, "loss": 1.3359, "step": 16846 }, { "epoch": 1.0041721301704614, "grad_norm": 2.4138004779815674, "learning_rate": 5.0463383011322664e-05, "loss": 1.0874, "step": 16848 }, { "epoch": 1.0042913338896173, "grad_norm": 3.435925245285034, "learning_rate": 5.04539264776971e-05, "loss": 1.1644, "step": 16850 }, { "epoch": 1.0044105376087733, "grad_norm": 3.1413156986236572, "learning_rate": 5.0444469927833016e-05, "loss": 1.2306, "step": 16852 }, { "epoch": 1.0045297413279295, "grad_norm": 3.2232186794281006, "learning_rate": 5.0435013362068715e-05, "loss": 1.2492, "step": 16854 }, { "epoch": 1.0046489450470855, "grad_norm": 3.0055229663848877, "learning_rate": 5.042555678074251e-05, "loss": 1.233, "step": 16856 }, { "epoch": 1.0047681487662414, "grad_norm": 3.2092041969299316, "learning_rate": 5.0416100184192695e-05, "loss": 1.0983, "step": 16858 }, { "epoch": 1.0048873524853976, "grad_norm": 3.0821168422698975, "learning_rate": 5.040664357275754e-05, "loss": 1.172, "step": 16860 }, { "epoch": 1.0050065562045536, "grad_norm": 2.479412078857422, "learning_rate": 5.039718694677534e-05, "loss": 1.1044, "step": 16862 }, { "epoch": 1.0051257599237096, "grad_norm": 3.0237653255462646, "learning_rate": 5.038773030658444e-05, "loss": 1.1551, "step": 16864 }, { "epoch": 1.0052449636428658, "grad_norm": 3.1596028804779053, "learning_rate": 5.037827365252306e-05, "loss": 1.3127, "step": 16866 }, { "epoch": 1.0053641673620217, "grad_norm": 2.7279253005981445, "learning_rate": 5.0368816984929554e-05, "loss": 1.1143, "step": 16868 }, { "epoch": 1.0054833710811777, "grad_norm": 3.391951560974121, "learning_rate": 5.035936030414221e-05, "loss": 1.2808, "step": 16870 }, { "epoch": 1.0056025748003337, "grad_norm": 3.28147554397583, "learning_rate": 5.0349903610499314e-05, "loss": 1.2275, "step": 16872 }, { "epoch": 1.0057217785194899, "grad_norm": 3.1028764247894287, "learning_rate": 5.0340446904339154e-05, "loss": 1.1559, "step": 16874 }, { "epoch": 1.0058409822386458, "grad_norm": 3.095912218093872, "learning_rate": 5.033099018600006e-05, "loss": 1.3091, "step": 16876 }, { "epoch": 1.0059601859578018, "grad_norm": 3.3747403621673584, "learning_rate": 5.032153345582031e-05, "loss": 1.277, "step": 16878 }, { "epoch": 1.006079389676958, "grad_norm": 3.395404815673828, "learning_rate": 5.031207671413821e-05, "loss": 1.219, "step": 16880 }, { "epoch": 1.006198593396114, "grad_norm": 3.530212879180908, "learning_rate": 5.030261996129206e-05, "loss": 1.1587, "step": 16882 }, { "epoch": 1.00631779711527, "grad_norm": 3.3078222274780273, "learning_rate": 5.029316319762015e-05, "loss": 1.3026, "step": 16884 }, { "epoch": 1.006437000834426, "grad_norm": 3.187681198120117, "learning_rate": 5.02837064234608e-05, "loss": 1.3259, "step": 16886 }, { "epoch": 1.006556204553582, "grad_norm": 3.322608709335327, "learning_rate": 5.027424963915229e-05, "loss": 1.2617, "step": 16888 }, { "epoch": 1.006675408272738, "grad_norm": 3.2528672218322754, "learning_rate": 5.026479284503294e-05, "loss": 1.1389, "step": 16890 }, { "epoch": 1.0067946119918942, "grad_norm": 3.360015630722046, "learning_rate": 5.025533604144105e-05, "loss": 1.4056, "step": 16892 }, { "epoch": 1.0069138157110502, "grad_norm": 2.983346700668335, "learning_rate": 5.0245879228714897e-05, "loss": 1.1206, "step": 16894 }, { "epoch": 1.0070330194302062, "grad_norm": 3.1666204929351807, "learning_rate": 5.023642240719282e-05, "loss": 1.2022, "step": 16896 }, { "epoch": 1.0071522231493624, "grad_norm": 3.1945977210998535, "learning_rate": 5.022696557721309e-05, "loss": 1.2524, "step": 16898 }, { "epoch": 1.0072714268685183, "grad_norm": 2.9082958698272705, "learning_rate": 5.0217508739114025e-05, "loss": 1.1881, "step": 16900 }, { "epoch": 1.0073906305876743, "grad_norm": 2.90751314163208, "learning_rate": 5.020805189323394e-05, "loss": 1.1111, "step": 16902 }, { "epoch": 1.0075098343068303, "grad_norm": 3.224482297897339, "learning_rate": 5.019859503991113e-05, "loss": 1.2167, "step": 16904 }, { "epoch": 1.0076290380259865, "grad_norm": 3.171447515487671, "learning_rate": 5.018913817948388e-05, "loss": 1.2696, "step": 16906 }, { "epoch": 1.0077482417451424, "grad_norm": 3.6028425693511963, "learning_rate": 5.0179681312290515e-05, "loss": 1.1878, "step": 16908 }, { "epoch": 1.0078674454642984, "grad_norm": 3.1940534114837646, "learning_rate": 5.017022443866935e-05, "loss": 1.3309, "step": 16910 }, { "epoch": 1.0079866491834546, "grad_norm": 2.9738008975982666, "learning_rate": 5.016076755895867e-05, "loss": 1.1585, "step": 16912 }, { "epoch": 1.0081058529026106, "grad_norm": 3.319695234298706, "learning_rate": 5.015131067349676e-05, "loss": 1.3532, "step": 16914 }, { "epoch": 1.0082250566217665, "grad_norm": 3.144651412963867, "learning_rate": 5.014185378262199e-05, "loss": 1.2853, "step": 16916 }, { "epoch": 1.0083442603409227, "grad_norm": 3.361675977706909, "learning_rate": 5.013239688667258e-05, "loss": 1.1974, "step": 16918 }, { "epoch": 1.0084634640600787, "grad_norm": 3.4791057109832764, "learning_rate": 5.012293998598691e-05, "loss": 1.1874, "step": 16920 }, { "epoch": 1.0085826677792347, "grad_norm": 2.9622631072998047, "learning_rate": 5.011348308090325e-05, "loss": 1.252, "step": 16922 }, { "epoch": 1.0087018714983909, "grad_norm": 3.3661835193634033, "learning_rate": 5.010402617175991e-05, "loss": 1.1922, "step": 16924 }, { "epoch": 1.0088210752175468, "grad_norm": 3.0638163089752197, "learning_rate": 5.0094569258895195e-05, "loss": 1.1495, "step": 16926 }, { "epoch": 1.0089402789367028, "grad_norm": 3.332249641418457, "learning_rate": 5.0085112342647425e-05, "loss": 1.2266, "step": 16928 }, { "epoch": 1.0090594826558588, "grad_norm": 3.1159656047821045, "learning_rate": 5.007565542335487e-05, "loss": 1.1365, "step": 16930 }, { "epoch": 1.009178686375015, "grad_norm": 2.826061487197876, "learning_rate": 5.006619850135589e-05, "loss": 1.187, "step": 16932 }, { "epoch": 1.009297890094171, "grad_norm": 3.2838008403778076, "learning_rate": 5.005674157698875e-05, "loss": 1.1959, "step": 16934 }, { "epoch": 1.009417093813327, "grad_norm": 3.0023927688598633, "learning_rate": 5.004728465059178e-05, "loss": 1.1331, "step": 16936 }, { "epoch": 1.009536297532483, "grad_norm": 3.1121749877929688, "learning_rate": 5.003782772250325e-05, "loss": 1.125, "step": 16938 }, { "epoch": 1.009655501251639, "grad_norm": 3.084127902984619, "learning_rate": 5.002837079306152e-05, "loss": 1.2453, "step": 16940 }, { "epoch": 1.009774704970795, "grad_norm": 3.495288610458374, "learning_rate": 5.001891386260487e-05, "loss": 1.2033, "step": 16942 }, { "epoch": 1.0098939086899512, "grad_norm": 3.1807358264923096, "learning_rate": 5.000945693147159e-05, "loss": 1.3547, "step": 16944 }, { "epoch": 1.0100131124091072, "grad_norm": 2.8634066581726074, "learning_rate": 5e-05, "loss": 1.1588, "step": 16946 }, { "epoch": 1.0101323161282632, "grad_norm": 3.0234315395355225, "learning_rate": 4.999054306852844e-05, "loss": 1.1725, "step": 16948 }, { "epoch": 1.0102515198474193, "grad_norm": 2.8926823139190674, "learning_rate": 4.9981086137395144e-05, "loss": 1.1289, "step": 16950 }, { "epoch": 1.0103707235665753, "grad_norm": 3.1081793308258057, "learning_rate": 4.99716292069385e-05, "loss": 1.208, "step": 16952 }, { "epoch": 1.0104899272857313, "grad_norm": 2.8856871128082275, "learning_rate": 4.9962172277496746e-05, "loss": 1.2009, "step": 16954 }, { "epoch": 1.0106091310048873, "grad_norm": 3.2397377490997314, "learning_rate": 4.995271534940824e-05, "loss": 1.3912, "step": 16956 }, { "epoch": 1.0107283347240434, "grad_norm": 3.354198694229126, "learning_rate": 4.9943258423011254e-05, "loss": 1.4074, "step": 16958 }, { "epoch": 1.0108475384431994, "grad_norm": 3.2301700115203857, "learning_rate": 4.993380149864413e-05, "loss": 1.375, "step": 16960 }, { "epoch": 1.0109667421623554, "grad_norm": 3.475945472717285, "learning_rate": 4.9924344576645136e-05, "loss": 1.2534, "step": 16962 }, { "epoch": 1.0110859458815116, "grad_norm": 3.339609384536743, "learning_rate": 4.9914887657352594e-05, "loss": 1.2555, "step": 16964 }, { "epoch": 1.0112051496006675, "grad_norm": 3.1672911643981934, "learning_rate": 4.990543074110482e-05, "loss": 1.2139, "step": 16966 }, { "epoch": 1.0113243533198235, "grad_norm": 2.9417641162872314, "learning_rate": 4.98959738282401e-05, "loss": 1.1732, "step": 16968 }, { "epoch": 1.0114435570389797, "grad_norm": 3.536576271057129, "learning_rate": 4.9886516919096763e-05, "loss": 1.166, "step": 16970 }, { "epoch": 1.0115627607581357, "grad_norm": 2.9995667934417725, "learning_rate": 4.987706001401309e-05, "loss": 1.1926, "step": 16972 }, { "epoch": 1.0116819644772916, "grad_norm": 2.632122755050659, "learning_rate": 4.986760311332743e-05, "loss": 1.0877, "step": 16974 }, { "epoch": 1.0118011681964478, "grad_norm": 3.1582672595977783, "learning_rate": 4.985814621737803e-05, "loss": 1.2535, "step": 16976 }, { "epoch": 1.0119203719156038, "grad_norm": 2.8087897300720215, "learning_rate": 4.984868932650324e-05, "loss": 1.163, "step": 16978 }, { "epoch": 1.0120395756347598, "grad_norm": 3.068387746810913, "learning_rate": 4.983923244104136e-05, "loss": 1.1164, "step": 16980 }, { "epoch": 1.0121587793539157, "grad_norm": 3.280345916748047, "learning_rate": 4.982977556133065e-05, "loss": 1.1518, "step": 16982 }, { "epoch": 1.012277983073072, "grad_norm": 2.956946849822998, "learning_rate": 4.982031868770949e-05, "loss": 1.1495, "step": 16984 }, { "epoch": 1.012397186792228, "grad_norm": 3.338961362838745, "learning_rate": 4.981086182051612e-05, "loss": 1.1904, "step": 16986 }, { "epoch": 1.0125163905113839, "grad_norm": 2.8642096519470215, "learning_rate": 4.980140496008888e-05, "loss": 1.0609, "step": 16988 }, { "epoch": 1.01263559423054, "grad_norm": 3.2607972621917725, "learning_rate": 4.9791948106766055e-05, "loss": 1.212, "step": 16990 }, { "epoch": 1.012754797949696, "grad_norm": 3.1963908672332764, "learning_rate": 4.978249126088598e-05, "loss": 1.1658, "step": 16992 }, { "epoch": 1.012874001668852, "grad_norm": 3.094892740249634, "learning_rate": 4.9773034422786927e-05, "loss": 1.212, "step": 16994 }, { "epoch": 1.0129932053880082, "grad_norm": 2.8857908248901367, "learning_rate": 4.976357759280719e-05, "loss": 1.1567, "step": 16996 }, { "epoch": 1.0131124091071642, "grad_norm": 2.9760513305664062, "learning_rate": 4.9754120771285115e-05, "loss": 1.2378, "step": 16998 }, { "epoch": 1.0132316128263201, "grad_norm": 2.8459300994873047, "learning_rate": 4.974466395855897e-05, "loss": 1.1111, "step": 17000 }, { "epoch": 1.0133508165454763, "grad_norm": 2.9937326908111572, "learning_rate": 4.973520715496707e-05, "loss": 1.2161, "step": 17002 }, { "epoch": 1.0134700202646323, "grad_norm": 3.0873422622680664, "learning_rate": 4.972575036084771e-05, "loss": 1.1991, "step": 17004 }, { "epoch": 1.0135892239837883, "grad_norm": 3.0551040172576904, "learning_rate": 4.971629357653922e-05, "loss": 1.281, "step": 17006 }, { "epoch": 1.0137084277029442, "grad_norm": 3.1186683177948, "learning_rate": 4.9706836802379866e-05, "loss": 1.2548, "step": 17008 }, { "epoch": 1.0138276314221004, "grad_norm": 3.177783727645874, "learning_rate": 4.969738003870795e-05, "loss": 1.2841, "step": 17010 }, { "epoch": 1.0139468351412564, "grad_norm": 3.2947027683258057, "learning_rate": 4.968792328586181e-05, "loss": 1.3112, "step": 17012 }, { "epoch": 1.0140660388604124, "grad_norm": 3.363065242767334, "learning_rate": 4.9678466544179694e-05, "loss": 1.2452, "step": 17014 }, { "epoch": 1.0141852425795685, "grad_norm": 3.0235676765441895, "learning_rate": 4.966900981399995e-05, "loss": 1.1695, "step": 17016 }, { "epoch": 1.0143044462987245, "grad_norm": 3.0118353366851807, "learning_rate": 4.9659553095660844e-05, "loss": 1.1651, "step": 17018 }, { "epoch": 1.0144236500178805, "grad_norm": 3.103027820587158, "learning_rate": 4.96500963895007e-05, "loss": 1.1871, "step": 17020 }, { "epoch": 1.0145428537370367, "grad_norm": 3.359752893447876, "learning_rate": 4.9640639695857814e-05, "loss": 1.3877, "step": 17022 }, { "epoch": 1.0146620574561926, "grad_norm": 3.0577564239501953, "learning_rate": 4.963118301507045e-05, "loss": 1.2023, "step": 17024 }, { "epoch": 1.0147812611753486, "grad_norm": 3.287740707397461, "learning_rate": 4.962172634747695e-05, "loss": 1.2302, "step": 17026 }, { "epoch": 1.0149004648945048, "grad_norm": 3.0781071186065674, "learning_rate": 4.961226969341558e-05, "loss": 1.2496, "step": 17028 }, { "epoch": 1.0150196686136608, "grad_norm": 3.228476047515869, "learning_rate": 4.9602813053224665e-05, "loss": 1.3053, "step": 17030 }, { "epoch": 1.0151388723328167, "grad_norm": 3.320803642272949, "learning_rate": 4.959335642724247e-05, "loss": 1.2246, "step": 17032 }, { "epoch": 1.0152580760519727, "grad_norm": 3.039400339126587, "learning_rate": 4.9583899815807324e-05, "loss": 1.1059, "step": 17034 }, { "epoch": 1.015377279771129, "grad_norm": 3.2681496143341064, "learning_rate": 4.957444321925748e-05, "loss": 1.2869, "step": 17036 }, { "epoch": 1.0154964834902849, "grad_norm": 3.015479326248169, "learning_rate": 4.9564986637931296e-05, "loss": 1.1982, "step": 17038 }, { "epoch": 1.0156156872094408, "grad_norm": 3.3021743297576904, "learning_rate": 4.9555530072167e-05, "loss": 1.1809, "step": 17040 }, { "epoch": 1.015734890928597, "grad_norm": 3.1282317638397217, "learning_rate": 4.9546073522302914e-05, "loss": 1.2249, "step": 17042 }, { "epoch": 1.015854094647753, "grad_norm": 3.359095811843872, "learning_rate": 4.9536616988677354e-05, "loss": 1.2339, "step": 17044 }, { "epoch": 1.015973298366909, "grad_norm": 3.439924955368042, "learning_rate": 4.9527160471628555e-05, "loss": 1.2239, "step": 17046 }, { "epoch": 1.0160925020860652, "grad_norm": 3.181480646133423, "learning_rate": 4.9517703971494874e-05, "loss": 1.1807, "step": 17048 }, { "epoch": 1.0162117058052211, "grad_norm": 3.0218300819396973, "learning_rate": 4.950824748861455e-05, "loss": 1.0998, "step": 17050 }, { "epoch": 1.016330909524377, "grad_norm": 3.340940237045288, "learning_rate": 4.949879102332591e-05, "loss": 1.2567, "step": 17052 }, { "epoch": 1.0164501132435333, "grad_norm": 3.127087354660034, "learning_rate": 4.948933457596724e-05, "loss": 1.0597, "step": 17054 }, { "epoch": 1.0165693169626893, "grad_norm": 2.962757110595703, "learning_rate": 4.9479878146876794e-05, "loss": 1.1655, "step": 17056 }, { "epoch": 1.0166885206818452, "grad_norm": 3.2244558334350586, "learning_rate": 4.9470421736392905e-05, "loss": 1.2309, "step": 17058 }, { "epoch": 1.0168077244010014, "grad_norm": 2.7097554206848145, "learning_rate": 4.946096534485383e-05, "loss": 1.1506, "step": 17060 }, { "epoch": 1.0169269281201574, "grad_norm": 3.1432316303253174, "learning_rate": 4.9451508972597885e-05, "loss": 1.2069, "step": 17062 }, { "epoch": 1.0170461318393134, "grad_norm": 2.9582908153533936, "learning_rate": 4.944205261996332e-05, "loss": 1.075, "step": 17064 }, { "epoch": 1.0171653355584693, "grad_norm": 3.209038257598877, "learning_rate": 4.943259628728845e-05, "loss": 1.2308, "step": 17066 }, { "epoch": 1.0172845392776255, "grad_norm": 3.1284048557281494, "learning_rate": 4.942313997491157e-05, "loss": 1.3422, "step": 17068 }, { "epoch": 1.0174037429967815, "grad_norm": 3.18740177154541, "learning_rate": 4.941368368317092e-05, "loss": 1.2775, "step": 17070 }, { "epoch": 1.0175229467159375, "grad_norm": 3.2975668907165527, "learning_rate": 4.940422741240482e-05, "loss": 1.1593, "step": 17072 }, { "epoch": 1.0176421504350937, "grad_norm": 3.1223771572113037, "learning_rate": 4.9394771162951545e-05, "loss": 1.2882, "step": 17074 }, { "epoch": 1.0177613541542496, "grad_norm": 3.193232536315918, "learning_rate": 4.9385314935149386e-05, "loss": 1.1552, "step": 17076 }, { "epoch": 1.0178805578734056, "grad_norm": 3.060832977294922, "learning_rate": 4.937585872933659e-05, "loss": 1.1029, "step": 17078 }, { "epoch": 1.0179997615925618, "grad_norm": 2.9011223316192627, "learning_rate": 4.9366402545851485e-05, "loss": 1.2255, "step": 17080 }, { "epoch": 1.0181189653117177, "grad_norm": 3.184757709503174, "learning_rate": 4.9356946385032325e-05, "loss": 1.2461, "step": 17082 }, { "epoch": 1.0182381690308737, "grad_norm": 3.392746686935425, "learning_rate": 4.934749024721738e-05, "loss": 1.2249, "step": 17084 }, { "epoch": 1.01835737275003, "grad_norm": 3.065011501312256, "learning_rate": 4.9338034132744964e-05, "loss": 1.2976, "step": 17086 }, { "epoch": 1.0184765764691859, "grad_norm": 3.238507032394409, "learning_rate": 4.932857804195332e-05, "loss": 1.1286, "step": 17088 }, { "epoch": 1.0185957801883418, "grad_norm": 3.457050085067749, "learning_rate": 4.931912197518073e-05, "loss": 1.3992, "step": 17090 }, { "epoch": 1.0187149839074978, "grad_norm": 3.0389254093170166, "learning_rate": 4.930966593276547e-05, "loss": 1.156, "step": 17092 }, { "epoch": 1.018834187626654, "grad_norm": 3.6148080825805664, "learning_rate": 4.9300209915045855e-05, "loss": 1.1986, "step": 17094 }, { "epoch": 1.01895339134581, "grad_norm": 2.847245216369629, "learning_rate": 4.929075392236009e-05, "loss": 1.0951, "step": 17096 }, { "epoch": 1.019072595064966, "grad_norm": 3.1798033714294434, "learning_rate": 4.9281297955046504e-05, "loss": 1.1472, "step": 17098 }, { "epoch": 1.0191917987841221, "grad_norm": 3.2499444484710693, "learning_rate": 4.927184201344336e-05, "loss": 1.169, "step": 17100 }, { "epoch": 1.019311002503278, "grad_norm": 3.229480028152466, "learning_rate": 4.9262386097888894e-05, "loss": 1.1639, "step": 17102 }, { "epoch": 1.019430206222434, "grad_norm": 3.041682243347168, "learning_rate": 4.9252930208721406e-05, "loss": 1.1887, "step": 17104 }, { "epoch": 1.0195494099415903, "grad_norm": 3.138673782348633, "learning_rate": 4.924347434627916e-05, "loss": 1.136, "step": 17106 }, { "epoch": 1.0196686136607462, "grad_norm": 3.1038761138916016, "learning_rate": 4.923401851090044e-05, "loss": 1.2163, "step": 17108 }, { "epoch": 1.0197878173799022, "grad_norm": 3.4139487743377686, "learning_rate": 4.922456270292346e-05, "loss": 1.2186, "step": 17110 }, { "epoch": 1.0199070210990584, "grad_norm": 2.8307342529296875, "learning_rate": 4.921510692268657e-05, "loss": 1.1414, "step": 17112 }, { "epoch": 1.0200262248182144, "grad_norm": 3.2951407432556152, "learning_rate": 4.9205651170527974e-05, "loss": 1.1832, "step": 17114 }, { "epoch": 1.0201454285373703, "grad_norm": 3.2221875190734863, "learning_rate": 4.919619544678595e-05, "loss": 1.2191, "step": 17116 }, { "epoch": 1.0202646322565263, "grad_norm": 3.0500030517578125, "learning_rate": 4.9186739751798785e-05, "loss": 1.4531, "step": 17118 }, { "epoch": 1.0203838359756825, "grad_norm": 3.3008861541748047, "learning_rate": 4.917728408590469e-05, "loss": 1.1971, "step": 17120 }, { "epoch": 1.0205030396948385, "grad_norm": 3.160789728164673, "learning_rate": 4.9167828449441974e-05, "loss": 1.1858, "step": 17122 }, { "epoch": 1.0206222434139944, "grad_norm": 2.8656513690948486, "learning_rate": 4.9158372842748875e-05, "loss": 1.0584, "step": 17124 }, { "epoch": 1.0207414471331506, "grad_norm": 2.9269824028015137, "learning_rate": 4.914891726616368e-05, "loss": 1.1523, "step": 17126 }, { "epoch": 1.0208606508523066, "grad_norm": 3.203420877456665, "learning_rate": 4.913946172002461e-05, "loss": 1.1828, "step": 17128 }, { "epoch": 1.0209798545714626, "grad_norm": 3.350839853286743, "learning_rate": 4.913000620466993e-05, "loss": 1.3157, "step": 17130 }, { "epoch": 1.0210990582906188, "grad_norm": 3.4034628868103027, "learning_rate": 4.912055072043793e-05, "loss": 1.2946, "step": 17132 }, { "epoch": 1.0212182620097747, "grad_norm": 3.085667133331299, "learning_rate": 4.9111095267666814e-05, "loss": 1.1685, "step": 17134 }, { "epoch": 1.0213374657289307, "grad_norm": 3.1356444358825684, "learning_rate": 4.910163984669488e-05, "loss": 1.1343, "step": 17136 }, { "epoch": 1.0214566694480869, "grad_norm": 3.263720750808716, "learning_rate": 4.909218445786033e-05, "loss": 1.2404, "step": 17138 }, { "epoch": 1.0215758731672429, "grad_norm": 3.3409385681152344, "learning_rate": 4.908272910150149e-05, "loss": 1.1309, "step": 17140 }, { "epoch": 1.0216950768863988, "grad_norm": 2.8541314601898193, "learning_rate": 4.907327377795656e-05, "loss": 1.2387, "step": 17142 }, { "epoch": 1.0218142806055548, "grad_norm": 3.170389413833618, "learning_rate": 4.906381848756377e-05, "loss": 1.194, "step": 17144 }, { "epoch": 1.021933484324711, "grad_norm": 3.2249062061309814, "learning_rate": 4.905436323066143e-05, "loss": 1.2724, "step": 17146 }, { "epoch": 1.022052688043867, "grad_norm": 3.382598876953125, "learning_rate": 4.904490800758772e-05, "loss": 1.1515, "step": 17148 }, { "epoch": 1.022171891763023, "grad_norm": 3.2382266521453857, "learning_rate": 4.903545281868095e-05, "loss": 1.1782, "step": 17150 }, { "epoch": 1.0222910954821791, "grad_norm": 3.1483054161071777, "learning_rate": 4.902599766427931e-05, "loss": 1.3917, "step": 17152 }, { "epoch": 1.022410299201335, "grad_norm": 3.2470216751098633, "learning_rate": 4.901654254472107e-05, "loss": 1.2072, "step": 17154 }, { "epoch": 1.022529502920491, "grad_norm": 3.326575994491577, "learning_rate": 4.900708746034446e-05, "loss": 1.2546, "step": 17156 }, { "epoch": 1.0226487066396472, "grad_norm": 3.2726778984069824, "learning_rate": 4.899763241148776e-05, "loss": 1.1824, "step": 17158 }, { "epoch": 1.0227679103588032, "grad_norm": 3.409522294998169, "learning_rate": 4.898817739848916e-05, "loss": 1.2951, "step": 17160 }, { "epoch": 1.0228871140779592, "grad_norm": 3.096137285232544, "learning_rate": 4.897872242168691e-05, "loss": 1.1741, "step": 17162 }, { "epoch": 1.0230063177971154, "grad_norm": 2.8506710529327393, "learning_rate": 4.896926748141927e-05, "loss": 1.1442, "step": 17164 }, { "epoch": 1.0231255215162713, "grad_norm": 3.2045607566833496, "learning_rate": 4.8959812578024436e-05, "loss": 1.1744, "step": 17166 }, { "epoch": 1.0232447252354273, "grad_norm": 2.953977584838867, "learning_rate": 4.8950357711840675e-05, "loss": 1.2646, "step": 17168 }, { "epoch": 1.0233639289545833, "grad_norm": 3.3477299213409424, "learning_rate": 4.894090288320621e-05, "loss": 1.2485, "step": 17170 }, { "epoch": 1.0234831326737395, "grad_norm": 3.0774407386779785, "learning_rate": 4.893144809245928e-05, "loss": 1.1201, "step": 17172 }, { "epoch": 1.0236023363928954, "grad_norm": 3.0749142169952393, "learning_rate": 4.8921993339938104e-05, "loss": 1.0717, "step": 17174 }, { "epoch": 1.0237215401120514, "grad_norm": 3.8949222564697266, "learning_rate": 4.891253862598091e-05, "loss": 1.2228, "step": 17176 }, { "epoch": 1.0238407438312076, "grad_norm": 3.0827293395996094, "learning_rate": 4.8903083950925946e-05, "loss": 1.1686, "step": 17178 }, { "epoch": 1.0239599475503636, "grad_norm": 3.185879945755005, "learning_rate": 4.889362931511138e-05, "loss": 1.2653, "step": 17180 }, { "epoch": 1.0240791512695195, "grad_norm": 2.9915506839752197, "learning_rate": 4.8884174718875525e-05, "loss": 1.2639, "step": 17182 }, { "epoch": 1.0241983549886757, "grad_norm": 2.9277966022491455, "learning_rate": 4.887472016255653e-05, "loss": 1.0732, "step": 17184 }, { "epoch": 1.0243175587078317, "grad_norm": 3.22279691696167, "learning_rate": 4.886526564649266e-05, "loss": 1.149, "step": 17186 }, { "epoch": 1.0244367624269877, "grad_norm": 3.120129108428955, "learning_rate": 4.885581117102212e-05, "loss": 1.2572, "step": 17188 }, { "epoch": 1.0245559661461439, "grad_norm": 2.964555263519287, "learning_rate": 4.884635673648312e-05, "loss": 1.2025, "step": 17190 }, { "epoch": 1.0246751698652998, "grad_norm": 3.3729965686798096, "learning_rate": 4.88369023432139e-05, "loss": 1.4538, "step": 17192 }, { "epoch": 1.0247943735844558, "grad_norm": 2.7252116203308105, "learning_rate": 4.882744799155264e-05, "loss": 1.18, "step": 17194 }, { "epoch": 1.0249135773036118, "grad_norm": 2.926727294921875, "learning_rate": 4.88179936818376e-05, "loss": 1.1121, "step": 17196 }, { "epoch": 1.025032781022768, "grad_norm": 3.056241273880005, "learning_rate": 4.880853941440696e-05, "loss": 1.2666, "step": 17198 }, { "epoch": 1.025151984741924, "grad_norm": 2.7717185020446777, "learning_rate": 4.8799085189598944e-05, "loss": 1.2418, "step": 17200 }, { "epoch": 1.02527118846108, "grad_norm": 2.747128486633301, "learning_rate": 4.878963100775174e-05, "loss": 1.2781, "step": 17202 }, { "epoch": 1.025390392180236, "grad_norm": 3.102996587753296, "learning_rate": 4.878017686920361e-05, "loss": 1.0904, "step": 17204 }, { "epoch": 1.025509595899392, "grad_norm": 2.9988439083099365, "learning_rate": 4.877072277429272e-05, "loss": 1.1753, "step": 17206 }, { "epoch": 1.025628799618548, "grad_norm": 3.170386552810669, "learning_rate": 4.876126872335727e-05, "loss": 1.2805, "step": 17208 }, { "epoch": 1.0257480033377042, "grad_norm": 2.763878345489502, "learning_rate": 4.875181471673549e-05, "loss": 1.2033, "step": 17210 }, { "epoch": 1.0258672070568602, "grad_norm": 3.039726972579956, "learning_rate": 4.874236075476554e-05, "loss": 1.099, "step": 17212 }, { "epoch": 1.0259864107760162, "grad_norm": 3.1282286643981934, "learning_rate": 4.873290683778568e-05, "loss": 1.2519, "step": 17214 }, { "epoch": 1.0261056144951723, "grad_norm": 2.917635917663574, "learning_rate": 4.872345296613405e-05, "loss": 1.0773, "step": 17216 }, { "epoch": 1.0262248182143283, "grad_norm": 3.342862129211426, "learning_rate": 4.87139991401489e-05, "loss": 1.3335, "step": 17218 }, { "epoch": 1.0263440219334843, "grad_norm": 3.4496538639068604, "learning_rate": 4.8704545360168395e-05, "loss": 1.2047, "step": 17220 }, { "epoch": 1.0264632256526403, "grad_norm": 3.2248804569244385, "learning_rate": 4.8695091626530724e-05, "loss": 1.0665, "step": 17222 }, { "epoch": 1.0265824293717964, "grad_norm": 3.0818207263946533, "learning_rate": 4.8685637939574104e-05, "loss": 1.0565, "step": 17224 }, { "epoch": 1.0267016330909524, "grad_norm": 3.536486864089966, "learning_rate": 4.867618429963669e-05, "loss": 1.1855, "step": 17226 }, { "epoch": 1.0268208368101084, "grad_norm": 3.0980725288391113, "learning_rate": 4.866673070705672e-05, "loss": 1.1483, "step": 17228 }, { "epoch": 1.0269400405292646, "grad_norm": 3.202599287033081, "learning_rate": 4.8657277162172335e-05, "loss": 1.2445, "step": 17230 }, { "epoch": 1.0270592442484205, "grad_norm": 3.2704014778137207, "learning_rate": 4.864782366532175e-05, "loss": 1.1781, "step": 17232 }, { "epoch": 1.0271784479675765, "grad_norm": 2.778590202331543, "learning_rate": 4.863837021684315e-05, "loss": 1.2796, "step": 17234 }, { "epoch": 1.0272976516867327, "grad_norm": 3.473330497741699, "learning_rate": 4.862891681707468e-05, "loss": 1.2985, "step": 17236 }, { "epoch": 1.0274168554058887, "grad_norm": 3.0908682346343994, "learning_rate": 4.8619463466354565e-05, "loss": 1.2827, "step": 17238 }, { "epoch": 1.0275360591250446, "grad_norm": 3.1633095741271973, "learning_rate": 4.861001016502095e-05, "loss": 1.2348, "step": 17240 }, { "epoch": 1.0276552628442008, "grad_norm": 2.844005823135376, "learning_rate": 4.860055691341205e-05, "loss": 1.1528, "step": 17242 }, { "epoch": 1.0277744665633568, "grad_norm": 3.0605034828186035, "learning_rate": 4.859110371186598e-05, "loss": 1.2822, "step": 17244 }, { "epoch": 1.0278936702825128, "grad_norm": 3.065478801727295, "learning_rate": 4.8581650560720984e-05, "loss": 1.2376, "step": 17246 }, { "epoch": 1.0280128740016687, "grad_norm": 3.50140118598938, "learning_rate": 4.85721974603152e-05, "loss": 1.3107, "step": 17248 }, { "epoch": 1.028132077720825, "grad_norm": 3.1478171348571777, "learning_rate": 4.856274441098678e-05, "loss": 1.3112, "step": 17250 }, { "epoch": 1.028251281439981, "grad_norm": 3.1329267024993896, "learning_rate": 4.855329141307392e-05, "loss": 1.1902, "step": 17252 }, { "epoch": 1.0283704851591369, "grad_norm": 3.2246017456054688, "learning_rate": 4.854383846691477e-05, "loss": 1.2032, "step": 17254 }, { "epoch": 1.028489688878293, "grad_norm": 3.19482159614563, "learning_rate": 4.853438557284751e-05, "loss": 1.2714, "step": 17256 }, { "epoch": 1.028608892597449, "grad_norm": 3.227597951889038, "learning_rate": 4.852493273121027e-05, "loss": 1.2006, "step": 17258 }, { "epoch": 1.028728096316605, "grad_norm": 3.1089539527893066, "learning_rate": 4.8515479942341266e-05, "loss": 1.0686, "step": 17260 }, { "epoch": 1.0288473000357612, "grad_norm": 3.0918917655944824, "learning_rate": 4.85060272065786e-05, "loss": 1.1847, "step": 17262 }, { "epoch": 1.0289665037549172, "grad_norm": 2.6949613094329834, "learning_rate": 4.8496574524260466e-05, "loss": 1.1726, "step": 17264 }, { "epoch": 1.0290857074740731, "grad_norm": 3.132794141769409, "learning_rate": 4.848712189572501e-05, "loss": 1.1696, "step": 17266 }, { "epoch": 1.0292049111932293, "grad_norm": 3.0115225315093994, "learning_rate": 4.847766932131037e-05, "loss": 1.3236, "step": 17268 }, { "epoch": 1.0293241149123853, "grad_norm": 3.1523220539093018, "learning_rate": 4.84682168013547e-05, "loss": 1.2687, "step": 17270 }, { "epoch": 1.0294433186315413, "grad_norm": 2.958091974258423, "learning_rate": 4.845876433619616e-05, "loss": 1.2037, "step": 17272 }, { "epoch": 1.0295625223506972, "grad_norm": 2.8288285732269287, "learning_rate": 4.84493119261729e-05, "loss": 1.0904, "step": 17274 }, { "epoch": 1.0296817260698534, "grad_norm": 3.237248420715332, "learning_rate": 4.8439859571623035e-05, "loss": 1.0981, "step": 17276 }, { "epoch": 1.0298009297890094, "grad_norm": 3.32108473777771, "learning_rate": 4.8430407272884765e-05, "loss": 1.2608, "step": 17278 }, { "epoch": 1.0299201335081654, "grad_norm": 3.454502582550049, "learning_rate": 4.8420955030296186e-05, "loss": 1.1296, "step": 17280 }, { "epoch": 1.0300393372273215, "grad_norm": 3.3109042644500732, "learning_rate": 4.8411502844195435e-05, "loss": 1.1913, "step": 17282 }, { "epoch": 1.0301585409464775, "grad_norm": 3.441556453704834, "learning_rate": 4.840205071492068e-05, "loss": 1.347, "step": 17284 }, { "epoch": 1.0302777446656335, "grad_norm": 2.963562250137329, "learning_rate": 4.839259864281002e-05, "loss": 1.1729, "step": 17286 }, { "epoch": 1.0303969483847897, "grad_norm": 3.4715054035186768, "learning_rate": 4.838314662820162e-05, "loss": 1.2405, "step": 17288 }, { "epoch": 1.0305161521039456, "grad_norm": 3.064061403274536, "learning_rate": 4.837369467143358e-05, "loss": 1.1361, "step": 17290 }, { "epoch": 1.0306353558231016, "grad_norm": 3.29845929145813, "learning_rate": 4.836424277284408e-05, "loss": 1.3825, "step": 17292 }, { "epoch": 1.0307545595422578, "grad_norm": 3.2831223011016846, "learning_rate": 4.835479093277119e-05, "loss": 1.1929, "step": 17294 }, { "epoch": 1.0308737632614138, "grad_norm": 3.3333535194396973, "learning_rate": 4.8345339151553045e-05, "loss": 1.2112, "step": 17296 }, { "epoch": 1.0309929669805697, "grad_norm": 3.205996036529541, "learning_rate": 4.833588742952781e-05, "loss": 1.1214, "step": 17298 }, { "epoch": 1.031112170699726, "grad_norm": 3.3638627529144287, "learning_rate": 4.832643576703355e-05, "loss": 1.3453, "step": 17300 }, { "epoch": 1.031231374418882, "grad_norm": 3.2625961303710938, "learning_rate": 4.831698416440842e-05, "loss": 1.2639, "step": 17302 }, { "epoch": 1.0313505781380379, "grad_norm": 3.0472934246063232, "learning_rate": 4.8307532621990515e-05, "loss": 1.2512, "step": 17304 }, { "epoch": 1.0314697818571938, "grad_norm": 3.5727806091308594, "learning_rate": 4.829808114011798e-05, "loss": 1.2439, "step": 17306 }, { "epoch": 1.03158898557635, "grad_norm": 3.337183713912964, "learning_rate": 4.828862971912889e-05, "loss": 1.1486, "step": 17308 }, { "epoch": 1.031708189295506, "grad_norm": 3.2381458282470703, "learning_rate": 4.827917835936137e-05, "loss": 1.2586, "step": 17310 }, { "epoch": 1.031827393014662, "grad_norm": 3.1229772567749023, "learning_rate": 4.826972706115353e-05, "loss": 1.2003, "step": 17312 }, { "epoch": 1.0319465967338182, "grad_norm": 3.0072121620178223, "learning_rate": 4.826027582484346e-05, "loss": 1.2469, "step": 17314 }, { "epoch": 1.0320658004529741, "grad_norm": 3.283747673034668, "learning_rate": 4.82508246507693e-05, "loss": 1.3135, "step": 17316 }, { "epoch": 1.03218500417213, "grad_norm": 2.768303394317627, "learning_rate": 4.824137353926911e-05, "loss": 1.109, "step": 17318 }, { "epoch": 1.0323042078912863, "grad_norm": 2.767957925796509, "learning_rate": 4.823192249068101e-05, "loss": 1.2155, "step": 17320 }, { "epoch": 1.0324234116104423, "grad_norm": 3.011620044708252, "learning_rate": 4.822247150534309e-05, "loss": 1.2347, "step": 17322 }, { "epoch": 1.0325426153295982, "grad_norm": 2.8725969791412354, "learning_rate": 4.8213020583593465e-05, "loss": 1.2311, "step": 17324 }, { "epoch": 1.0326618190487544, "grad_norm": 3.306893825531006, "learning_rate": 4.8203569725770195e-05, "loss": 1.1278, "step": 17326 }, { "epoch": 1.0327810227679104, "grad_norm": 2.9745960235595703, "learning_rate": 4.819411893221137e-05, "loss": 1.1564, "step": 17328 }, { "epoch": 1.0329002264870664, "grad_norm": 2.8728034496307373, "learning_rate": 4.818466820325511e-05, "loss": 1.3735, "step": 17330 }, { "epoch": 1.0330194302062223, "grad_norm": 2.830827236175537, "learning_rate": 4.817521753923946e-05, "loss": 1.0633, "step": 17332 }, { "epoch": 1.0331386339253785, "grad_norm": 2.959315299987793, "learning_rate": 4.816576694050254e-05, "loss": 1.088, "step": 17334 }, { "epoch": 1.0332578376445345, "grad_norm": 2.967858076095581, "learning_rate": 4.815631640738239e-05, "loss": 1.2997, "step": 17336 }, { "epoch": 1.0333770413636905, "grad_norm": 3.0975887775421143, "learning_rate": 4.814686594021714e-05, "loss": 1.2013, "step": 17338 }, { "epoch": 1.0334962450828467, "grad_norm": 2.649099349975586, "learning_rate": 4.813741553934482e-05, "loss": 1.2592, "step": 17340 }, { "epoch": 1.0336154488020026, "grad_norm": 3.1297476291656494, "learning_rate": 4.812796520510351e-05, "loss": 1.0368, "step": 17342 }, { "epoch": 1.0337346525211586, "grad_norm": 3.3269448280334473, "learning_rate": 4.811851493783131e-05, "loss": 1.1255, "step": 17344 }, { "epoch": 1.0338538562403148, "grad_norm": 3.2054708003997803, "learning_rate": 4.8109064737866235e-05, "loss": 1.1683, "step": 17346 }, { "epoch": 1.0339730599594708, "grad_norm": 3.6441001892089844, "learning_rate": 4.8099614605546416e-05, "loss": 1.307, "step": 17348 }, { "epoch": 1.0340922636786267, "grad_norm": 3.5656840801239014, "learning_rate": 4.809016454120986e-05, "loss": 1.2945, "step": 17350 }, { "epoch": 1.034211467397783, "grad_norm": 3.3123018741607666, "learning_rate": 4.8080714545194674e-05, "loss": 1.2539, "step": 17352 }, { "epoch": 1.0343306711169389, "grad_norm": 3.128498077392578, "learning_rate": 4.80712646178389e-05, "loss": 1.3799, "step": 17354 }, { "epoch": 1.0344498748360949, "grad_norm": 2.8499414920806885, "learning_rate": 4.806181475948057e-05, "loss": 1.0812, "step": 17356 }, { "epoch": 1.0345690785552508, "grad_norm": 3.388272762298584, "learning_rate": 4.8052364970457766e-05, "loss": 1.1647, "step": 17358 }, { "epoch": 1.034688282274407, "grad_norm": 3.1713457107543945, "learning_rate": 4.804291525110852e-05, "loss": 1.2276, "step": 17360 }, { "epoch": 1.034807485993563, "grad_norm": 3.20147442817688, "learning_rate": 4.803346560177092e-05, "loss": 1.1468, "step": 17362 }, { "epoch": 1.034926689712719, "grad_norm": 3.0356907844543457, "learning_rate": 4.802401602278296e-05, "loss": 1.0673, "step": 17364 }, { "epoch": 1.0350458934318751, "grad_norm": 2.5142476558685303, "learning_rate": 4.801456651448271e-05, "loss": 1.0103, "step": 17366 }, { "epoch": 1.035165097151031, "grad_norm": 3.296107769012451, "learning_rate": 4.800511707720823e-05, "loss": 1.2958, "step": 17368 }, { "epoch": 1.035284300870187, "grad_norm": 3.1921966075897217, "learning_rate": 4.7995667711297505e-05, "loss": 1.2033, "step": 17370 }, { "epoch": 1.0354035045893433, "grad_norm": 2.905533790588379, "learning_rate": 4.7986218417088626e-05, "loss": 1.1108, "step": 17372 }, { "epoch": 1.0355227083084992, "grad_norm": 3.1296534538269043, "learning_rate": 4.797676919491959e-05, "loss": 1.1727, "step": 17374 }, { "epoch": 1.0356419120276552, "grad_norm": 3.372366428375244, "learning_rate": 4.796732004512846e-05, "loss": 1.207, "step": 17376 }, { "epoch": 1.0357611157468114, "grad_norm": 2.8933544158935547, "learning_rate": 4.795787096805321e-05, "loss": 1.128, "step": 17378 }, { "epoch": 1.0358803194659674, "grad_norm": 2.8584189414978027, "learning_rate": 4.794842196403194e-05, "loss": 1.0299, "step": 17380 }, { "epoch": 1.0359995231851233, "grad_norm": 2.9884049892425537, "learning_rate": 4.793897303340262e-05, "loss": 1.1552, "step": 17382 }, { "epoch": 1.0361187269042793, "grad_norm": 2.76293683052063, "learning_rate": 4.792952417650329e-05, "loss": 1.1401, "step": 17384 }, { "epoch": 1.0362379306234355, "grad_norm": 2.851893663406372, "learning_rate": 4.792007539367197e-05, "loss": 1.2153, "step": 17386 }, { "epoch": 1.0363571343425915, "grad_norm": 3.1102404594421387, "learning_rate": 4.7910626685246665e-05, "loss": 1.1991, "step": 17388 }, { "epoch": 1.0364763380617474, "grad_norm": 3.151638984680176, "learning_rate": 4.790117805156539e-05, "loss": 1.2945, "step": 17390 }, { "epoch": 1.0365955417809036, "grad_norm": 2.772782325744629, "learning_rate": 4.789172949296615e-05, "loss": 1.2061, "step": 17392 }, { "epoch": 1.0367147455000596, "grad_norm": 2.9336605072021484, "learning_rate": 4.788228100978698e-05, "loss": 1.1691, "step": 17394 }, { "epoch": 1.0368339492192156, "grad_norm": 3.1554596424102783, "learning_rate": 4.787283260236584e-05, "loss": 1.2846, "step": 17396 }, { "epoch": 1.0369531529383718, "grad_norm": 3.1638693809509277, "learning_rate": 4.7863384271040775e-05, "loss": 1.2941, "step": 17398 }, { "epoch": 1.0370723566575277, "grad_norm": 2.8916878700256348, "learning_rate": 4.785393601614977e-05, "loss": 1.0452, "step": 17400 }, { "epoch": 1.0371915603766837, "grad_norm": 3.469022512435913, "learning_rate": 4.784448783803079e-05, "loss": 1.1977, "step": 17402 }, { "epoch": 1.0373107640958399, "grad_norm": 2.8425538539886475, "learning_rate": 4.783503973702187e-05, "loss": 1.2159, "step": 17404 }, { "epoch": 1.0374299678149959, "grad_norm": 2.7914021015167236, "learning_rate": 4.782559171346098e-05, "loss": 1.1904, "step": 17406 }, { "epoch": 1.0375491715341518, "grad_norm": 3.247767686843872, "learning_rate": 4.7816143767686135e-05, "loss": 1.117, "step": 17408 }, { "epoch": 1.037668375253308, "grad_norm": 3.030214309692383, "learning_rate": 4.780669590003526e-05, "loss": 1.0633, "step": 17410 }, { "epoch": 1.037787578972464, "grad_norm": 3.206592559814453, "learning_rate": 4.779724811084641e-05, "loss": 1.3572, "step": 17412 }, { "epoch": 1.03790678269162, "grad_norm": 3.4833602905273438, "learning_rate": 4.778780040045753e-05, "loss": 1.2363, "step": 17414 }, { "epoch": 1.038025986410776, "grad_norm": 3.161945104598999, "learning_rate": 4.777835276920658e-05, "loss": 1.2755, "step": 17416 }, { "epoch": 1.0381451901299321, "grad_norm": 3.249742269515991, "learning_rate": 4.776890521743158e-05, "loss": 1.1369, "step": 17418 }, { "epoch": 1.038264393849088, "grad_norm": 3.0657942295074463, "learning_rate": 4.775945774547044e-05, "loss": 1.2087, "step": 17420 }, { "epoch": 1.038383597568244, "grad_norm": 3.215500593185425, "learning_rate": 4.775001035366118e-05, "loss": 1.3354, "step": 17422 }, { "epoch": 1.0385028012874002, "grad_norm": 3.0073318481445312, "learning_rate": 4.774056304234175e-05, "loss": 1.1405, "step": 17424 }, { "epoch": 1.0386220050065562, "grad_norm": 3.2818994522094727, "learning_rate": 4.7731115811850115e-05, "loss": 1.3516, "step": 17426 }, { "epoch": 1.0387412087257122, "grad_norm": 3.225615978240967, "learning_rate": 4.772166866252423e-05, "loss": 1.2212, "step": 17428 }, { "epoch": 1.0388604124448684, "grad_norm": 3.129911184310913, "learning_rate": 4.771222159470203e-05, "loss": 1.2828, "step": 17430 }, { "epoch": 1.0389796161640243, "grad_norm": 3.180367946624756, "learning_rate": 4.7702774608721524e-05, "loss": 1.1989, "step": 17432 }, { "epoch": 1.0390988198831803, "grad_norm": 3.1388556957244873, "learning_rate": 4.7693327704920605e-05, "loss": 1.1542, "step": 17434 }, { "epoch": 1.0392180236023365, "grad_norm": 3.3323042392730713, "learning_rate": 4.768388088363726e-05, "loss": 1.373, "step": 17436 }, { "epoch": 1.0393372273214925, "grad_norm": 3.074204206466675, "learning_rate": 4.7674434145209404e-05, "loss": 1.2306, "step": 17438 }, { "epoch": 1.0394564310406484, "grad_norm": 3.2658376693725586, "learning_rate": 4.7664987489975014e-05, "loss": 1.1797, "step": 17440 }, { "epoch": 1.0395756347598044, "grad_norm": 2.789961338043213, "learning_rate": 4.765554091827198e-05, "loss": 1.1034, "step": 17442 }, { "epoch": 1.0396948384789606, "grad_norm": 3.07729172706604, "learning_rate": 4.7646094430438306e-05, "loss": 1.1029, "step": 17444 }, { "epoch": 1.0398140421981166, "grad_norm": 3.13830304145813, "learning_rate": 4.7636648026811874e-05, "loss": 1.2812, "step": 17446 }, { "epoch": 1.0399332459172725, "grad_norm": 3.109227180480957, "learning_rate": 4.762720170773062e-05, "loss": 1.184, "step": 17448 }, { "epoch": 1.0400524496364287, "grad_norm": 3.078263521194458, "learning_rate": 4.76177554735325e-05, "loss": 1.2573, "step": 17450 }, { "epoch": 1.0401716533555847, "grad_norm": 2.979468822479248, "learning_rate": 4.76083093245554e-05, "loss": 1.0684, "step": 17452 }, { "epoch": 1.0402908570747407, "grad_norm": 3.315082550048828, "learning_rate": 4.7598863261137264e-05, "loss": 1.1267, "step": 17454 }, { "epoch": 1.0404100607938969, "grad_norm": 2.943948984146118, "learning_rate": 4.7589417283615994e-05, "loss": 1.1107, "step": 17456 }, { "epoch": 1.0405292645130528, "grad_norm": 2.6144590377807617, "learning_rate": 4.7579971392329536e-05, "loss": 1.3888, "step": 17458 }, { "epoch": 1.0406484682322088, "grad_norm": 3.2947487831115723, "learning_rate": 4.7570525587615775e-05, "loss": 1.2191, "step": 17460 }, { "epoch": 1.040767671951365, "grad_norm": 3.15824294090271, "learning_rate": 4.756107986981261e-05, "loss": 1.2463, "step": 17462 }, { "epoch": 1.040886875670521, "grad_norm": 3.003945827484131, "learning_rate": 4.755163423925799e-05, "loss": 1.0662, "step": 17464 }, { "epoch": 1.041006079389677, "grad_norm": 2.9386110305786133, "learning_rate": 4.754218869628976e-05, "loss": 1.0438, "step": 17466 }, { "epoch": 1.041125283108833, "grad_norm": 3.056476593017578, "learning_rate": 4.753274324124587e-05, "loss": 1.2924, "step": 17468 }, { "epoch": 1.041244486827989, "grad_norm": 3.210031270980835, "learning_rate": 4.752329787446418e-05, "loss": 1.1941, "step": 17470 }, { "epoch": 1.041363690547145, "grad_norm": 2.911062240600586, "learning_rate": 4.751385259628262e-05, "loss": 1.1427, "step": 17472 }, { "epoch": 1.041482894266301, "grad_norm": 2.9712491035461426, "learning_rate": 4.750440740703905e-05, "loss": 1.1569, "step": 17474 }, { "epoch": 1.0416020979854572, "grad_norm": 3.172682046890259, "learning_rate": 4.749496230707136e-05, "loss": 1.2615, "step": 17476 }, { "epoch": 1.0417213017046132, "grad_norm": 3.0889620780944824, "learning_rate": 4.748551729671744e-05, "loss": 1.232, "step": 17478 }, { "epoch": 1.0418405054237692, "grad_norm": 3.156688928604126, "learning_rate": 4.7476072376315164e-05, "loss": 1.2264, "step": 17480 }, { "epoch": 1.0419597091429253, "grad_norm": 2.952444076538086, "learning_rate": 4.746662754620244e-05, "loss": 1.1745, "step": 17482 }, { "epoch": 1.0420789128620813, "grad_norm": 2.9309682846069336, "learning_rate": 4.745718280671709e-05, "loss": 1.172, "step": 17484 }, { "epoch": 1.0421981165812373, "grad_norm": 3.1468214988708496, "learning_rate": 4.744773815819702e-05, "loss": 1.092, "step": 17486 }, { "epoch": 1.0423173203003935, "grad_norm": 3.0535969734191895, "learning_rate": 4.743829360098008e-05, "loss": 1.2247, "step": 17488 }, { "epoch": 1.0424365240195494, "grad_norm": 2.9922187328338623, "learning_rate": 4.742884913540416e-05, "loss": 1.1613, "step": 17490 }, { "epoch": 1.0425557277387054, "grad_norm": 3.1596434116363525, "learning_rate": 4.74194047618071e-05, "loss": 1.154, "step": 17492 }, { "epoch": 1.0426749314578614, "grad_norm": 3.0501582622528076, "learning_rate": 4.740996048052675e-05, "loss": 1.1747, "step": 17494 }, { "epoch": 1.0427941351770176, "grad_norm": 2.9666481018066406, "learning_rate": 4.740051629190099e-05, "loss": 1.2136, "step": 17496 }, { "epoch": 1.0429133388961735, "grad_norm": 3.0281527042388916, "learning_rate": 4.739107219626764e-05, "loss": 1.1348, "step": 17498 }, { "epoch": 1.0430325426153295, "grad_norm": 2.951165199279785, "learning_rate": 4.738162819396458e-05, "loss": 1.1027, "step": 17500 }, { "epoch": 1.0431517463344857, "grad_norm": 2.8730082511901855, "learning_rate": 4.737218428532962e-05, "loss": 1.2293, "step": 17502 }, { "epoch": 1.0432709500536417, "grad_norm": 3.200993776321411, "learning_rate": 4.7362740470700636e-05, "loss": 1.177, "step": 17504 }, { "epoch": 1.0433901537727976, "grad_norm": 2.922438383102417, "learning_rate": 4.735329675041545e-05, "loss": 1.1813, "step": 17506 }, { "epoch": 1.0435093574919538, "grad_norm": 3.5234391689300537, "learning_rate": 4.7343853124811874e-05, "loss": 1.3986, "step": 17508 }, { "epoch": 1.0436285612111098, "grad_norm": 3.9962494373321533, "learning_rate": 4.7334409594227775e-05, "loss": 1.2019, "step": 17510 }, { "epoch": 1.0437477649302658, "grad_norm": 2.941504716873169, "learning_rate": 4.732496615900096e-05, "loss": 1.2341, "step": 17512 }, { "epoch": 1.043866968649422, "grad_norm": 3.3422141075134277, "learning_rate": 4.731552281946927e-05, "loss": 1.1265, "step": 17514 }, { "epoch": 1.043986172368578, "grad_norm": 3.09714674949646, "learning_rate": 4.730607957597049e-05, "loss": 1.0774, "step": 17516 }, { "epoch": 1.044105376087734, "grad_norm": 3.0021708011627197, "learning_rate": 4.729663642884248e-05, "loss": 1.1297, "step": 17518 }, { "epoch": 1.0442245798068899, "grad_norm": 3.367769956588745, "learning_rate": 4.728719337842304e-05, "loss": 1.1739, "step": 17520 }, { "epoch": 1.044343783526046, "grad_norm": 3.140277147293091, "learning_rate": 4.7277750425049955e-05, "loss": 1.1715, "step": 17522 }, { "epoch": 1.044462987245202, "grad_norm": 3.1143875122070312, "learning_rate": 4.726830756906106e-05, "loss": 1.2762, "step": 17524 }, { "epoch": 1.044582190964358, "grad_norm": 2.708848237991333, "learning_rate": 4.725886481079414e-05, "loss": 1.2654, "step": 17526 }, { "epoch": 1.0447013946835142, "grad_norm": 3.197561264038086, "learning_rate": 4.724942215058703e-05, "loss": 1.1792, "step": 17528 }, { "epoch": 1.0448205984026702, "grad_norm": 2.9726486206054688, "learning_rate": 4.723997958877747e-05, "loss": 1.1003, "step": 17530 }, { "epoch": 1.0449398021218261, "grad_norm": 3.2089931964874268, "learning_rate": 4.7230537125703304e-05, "loss": 1.2498, "step": 17532 }, { "epoch": 1.0450590058409823, "grad_norm": 3.038966655731201, "learning_rate": 4.7221094761702304e-05, "loss": 1.1604, "step": 17534 }, { "epoch": 1.0451782095601383, "grad_norm": 3.1891651153564453, "learning_rate": 4.7211652497112235e-05, "loss": 1.1324, "step": 17536 }, { "epoch": 1.0452974132792943, "grad_norm": 2.8126001358032227, "learning_rate": 4.720221033227091e-05, "loss": 1.0704, "step": 17538 }, { "epoch": 1.0454166169984505, "grad_norm": 3.112536668777466, "learning_rate": 4.719276826751608e-05, "loss": 1.223, "step": 17540 }, { "epoch": 1.0455358207176064, "grad_norm": 3.1426172256469727, "learning_rate": 4.7183326303185554e-05, "loss": 1.2867, "step": 17542 }, { "epoch": 1.0456550244367624, "grad_norm": 4.236761093139648, "learning_rate": 4.717388443961705e-05, "loss": 1.3083, "step": 17544 }, { "epoch": 1.0457742281559184, "grad_norm": 3.274650812149048, "learning_rate": 4.716444267714841e-05, "loss": 1.1947, "step": 17546 }, { "epoch": 1.0458934318750746, "grad_norm": 2.959711790084839, "learning_rate": 4.7155001016117324e-05, "loss": 1.0757, "step": 17548 }, { "epoch": 1.0460126355942305, "grad_norm": 3.2065563201904297, "learning_rate": 4.71455594568616e-05, "loss": 1.2617, "step": 17550 }, { "epoch": 1.0461318393133865, "grad_norm": 2.8283638954162598, "learning_rate": 4.7136117999718995e-05, "loss": 1.1409, "step": 17552 }, { "epoch": 1.0462510430325427, "grad_norm": 3.1945931911468506, "learning_rate": 4.712667664502722e-05, "loss": 1.1198, "step": 17554 }, { "epoch": 1.0463702467516987, "grad_norm": 3.4417996406555176, "learning_rate": 4.711723539312407e-05, "loss": 1.4416, "step": 17556 }, { "epoch": 1.0464894504708546, "grad_norm": 2.9427402019500732, "learning_rate": 4.710779424434726e-05, "loss": 1.0938, "step": 17558 }, { "epoch": 1.0466086541900108, "grad_norm": 3.4004106521606445, "learning_rate": 4.709835319903457e-05, "loss": 1.1955, "step": 17560 }, { "epoch": 1.0467278579091668, "grad_norm": 2.7454605102539062, "learning_rate": 4.7088912257523686e-05, "loss": 1.1484, "step": 17562 }, { "epoch": 1.0468470616283228, "grad_norm": 3.064145565032959, "learning_rate": 4.707947142015238e-05, "loss": 1.1522, "step": 17564 }, { "epoch": 1.046966265347479, "grad_norm": 3.0332794189453125, "learning_rate": 4.707003068725839e-05, "loss": 1.1698, "step": 17566 }, { "epoch": 1.047085469066635, "grad_norm": 3.2805685997009277, "learning_rate": 4.706059005917941e-05, "loss": 1.2019, "step": 17568 }, { "epoch": 1.0472046727857909, "grad_norm": 3.1273953914642334, "learning_rate": 4.7051149536253186e-05, "loss": 1.0932, "step": 17570 }, { "epoch": 1.0473238765049468, "grad_norm": 3.135348081588745, "learning_rate": 4.704170911881742e-05, "loss": 1.3181, "step": 17572 }, { "epoch": 1.047443080224103, "grad_norm": 3.1179146766662598, "learning_rate": 4.703226880720987e-05, "loss": 1.1428, "step": 17574 }, { "epoch": 1.047562283943259, "grad_norm": 2.998138904571533, "learning_rate": 4.702282860176818e-05, "loss": 1.2744, "step": 17576 }, { "epoch": 1.047681487662415, "grad_norm": 2.9278573989868164, "learning_rate": 4.7013388502830125e-05, "loss": 1.1876, "step": 17578 }, { "epoch": 1.0478006913815712, "grad_norm": 3.1217565536499023, "learning_rate": 4.700394851073337e-05, "loss": 1.2441, "step": 17580 }, { "epoch": 1.0479198951007271, "grad_norm": 3.0344717502593994, "learning_rate": 4.699450862581563e-05, "loss": 1.1679, "step": 17582 }, { "epoch": 1.048039098819883, "grad_norm": 2.852924108505249, "learning_rate": 4.698506884841462e-05, "loss": 1.207, "step": 17584 }, { "epoch": 1.0481583025390393, "grad_norm": 3.369164228439331, "learning_rate": 4.697562917886798e-05, "loss": 1.1793, "step": 17586 }, { "epoch": 1.0482775062581953, "grad_norm": 2.862779378890991, "learning_rate": 4.696618961751345e-05, "loss": 1.1547, "step": 17588 }, { "epoch": 1.0483967099773512, "grad_norm": 3.2489678859710693, "learning_rate": 4.695675016468868e-05, "loss": 1.1815, "step": 17590 }, { "epoch": 1.0485159136965074, "grad_norm": 3.308391571044922, "learning_rate": 4.694731082073139e-05, "loss": 1.2315, "step": 17592 }, { "epoch": 1.0486351174156634, "grad_norm": 3.56225323677063, "learning_rate": 4.693787158597924e-05, "loss": 1.4517, "step": 17594 }, { "epoch": 1.0487543211348194, "grad_norm": 3.614960193634033, "learning_rate": 4.692843246076988e-05, "loss": 1.2869, "step": 17596 }, { "epoch": 1.0488735248539753, "grad_norm": 3.100738763809204, "learning_rate": 4.691899344544102e-05, "loss": 1.3982, "step": 17598 }, { "epoch": 1.0489927285731315, "grad_norm": 3.2533719539642334, "learning_rate": 4.690955454033029e-05, "loss": 1.2438, "step": 17600 }, { "epoch": 1.0491119322922875, "grad_norm": 3.0752310752868652, "learning_rate": 4.690011574577537e-05, "loss": 1.1717, "step": 17602 }, { "epoch": 1.0492311360114435, "grad_norm": 3.0130417346954346, "learning_rate": 4.6890677062113914e-05, "loss": 1.214, "step": 17604 }, { "epoch": 1.0493503397305997, "grad_norm": 3.2107009887695312, "learning_rate": 4.6881238489683596e-05, "loss": 1.1885, "step": 17606 }, { "epoch": 1.0494695434497556, "grad_norm": 3.142806053161621, "learning_rate": 4.687180002882202e-05, "loss": 1.1131, "step": 17608 }, { "epoch": 1.0495887471689116, "grad_norm": 3.3711042404174805, "learning_rate": 4.686236167986688e-05, "loss": 1.1515, "step": 17610 }, { "epoch": 1.0497079508880678, "grad_norm": 3.2053656578063965, "learning_rate": 4.68529234431558e-05, "loss": 1.2283, "step": 17612 }, { "epoch": 1.0498271546072238, "grad_norm": 2.9332010746002197, "learning_rate": 4.68434853190264e-05, "loss": 1.1708, "step": 17614 }, { "epoch": 1.0499463583263797, "grad_norm": 2.907822608947754, "learning_rate": 4.6834047307816355e-05, "loss": 1.2859, "step": 17616 }, { "epoch": 1.050065562045536, "grad_norm": 3.1467137336730957, "learning_rate": 4.682460940986324e-05, "loss": 1.2068, "step": 17618 }, { "epoch": 1.0501847657646919, "grad_norm": 3.2957916259765625, "learning_rate": 4.681517162550473e-05, "loss": 1.1882, "step": 17620 }, { "epoch": 1.0503039694838479, "grad_norm": 3.303086996078491, "learning_rate": 4.680573395507841e-05, "loss": 1.1879, "step": 17622 }, { "epoch": 1.0504231732030038, "grad_norm": 3.0036489963531494, "learning_rate": 4.6796296398921944e-05, "loss": 1.2268, "step": 17624 }, { "epoch": 1.05054237692216, "grad_norm": 3.192744016647339, "learning_rate": 4.67868589573729e-05, "loss": 1.2371, "step": 17626 }, { "epoch": 1.050661580641316, "grad_norm": 3.164273977279663, "learning_rate": 4.67774216307689e-05, "loss": 1.1123, "step": 17628 }, { "epoch": 1.050780784360472, "grad_norm": 3.3964972496032715, "learning_rate": 4.676798441944758e-05, "loss": 1.2926, "step": 17630 }, { "epoch": 1.0508999880796281, "grad_norm": 2.6193525791168213, "learning_rate": 4.675854732374648e-05, "loss": 1.0516, "step": 17632 }, { "epoch": 1.0510191917987841, "grad_norm": 3.114596128463745, "learning_rate": 4.674911034400326e-05, "loss": 1.232, "step": 17634 }, { "epoch": 1.05113839551794, "grad_norm": 3.213064670562744, "learning_rate": 4.673967348055546e-05, "loss": 1.1302, "step": 17636 }, { "epoch": 1.0512575992370963, "grad_norm": 2.8727455139160156, "learning_rate": 4.6730236733740726e-05, "loss": 1.2163, "step": 17638 }, { "epoch": 1.0513768029562522, "grad_norm": 3.2554304599761963, "learning_rate": 4.672080010389659e-05, "loss": 1.1306, "step": 17640 }, { "epoch": 1.0514960066754082, "grad_norm": 3.120389223098755, "learning_rate": 4.671136359136065e-05, "loss": 1.0857, "step": 17642 }, { "epoch": 1.0516152103945644, "grad_norm": 3.1134536266326904, "learning_rate": 4.670192719647049e-05, "loss": 1.1179, "step": 17644 }, { "epoch": 1.0517344141137204, "grad_norm": 3.019477128982544, "learning_rate": 4.669249091956368e-05, "loss": 1.0135, "step": 17646 }, { "epoch": 1.0518536178328763, "grad_norm": 3.113007068634033, "learning_rate": 4.668305476097781e-05, "loss": 1.2241, "step": 17648 }, { "epoch": 1.0519728215520323, "grad_norm": 2.8161470890045166, "learning_rate": 4.6673618721050385e-05, "loss": 1.0626, "step": 17650 }, { "epoch": 1.0520920252711885, "grad_norm": 3.021272659301758, "learning_rate": 4.6664182800119015e-05, "loss": 1.1016, "step": 17652 }, { "epoch": 1.0522112289903445, "grad_norm": 3.1723079681396484, "learning_rate": 4.665474699852125e-05, "loss": 1.1296, "step": 17654 }, { "epoch": 1.0523304327095004, "grad_norm": 3.4224939346313477, "learning_rate": 4.664531131659461e-05, "loss": 1.3119, "step": 17656 }, { "epoch": 1.0524496364286566, "grad_norm": 3.195574998855591, "learning_rate": 4.663587575467668e-05, "loss": 1.2501, "step": 17658 }, { "epoch": 1.0525688401478126, "grad_norm": 3.1188647747039795, "learning_rate": 4.6626440313104964e-05, "loss": 1.1374, "step": 17660 }, { "epoch": 1.0526880438669686, "grad_norm": 3.2969350814819336, "learning_rate": 4.6617004992217046e-05, "loss": 1.0905, "step": 17662 }, { "epoch": 1.0528072475861248, "grad_norm": 3.4511356353759766, "learning_rate": 4.660756979235042e-05, "loss": 1.3232, "step": 17664 }, { "epoch": 1.0529264513052807, "grad_norm": 3.492550849914551, "learning_rate": 4.6598134713842624e-05, "loss": 1.2408, "step": 17666 }, { "epoch": 1.0530456550244367, "grad_norm": 2.922933340072632, "learning_rate": 4.6588699757031184e-05, "loss": 1.1853, "step": 17668 }, { "epoch": 1.053164858743593, "grad_norm": 3.2506062984466553, "learning_rate": 4.657926492225365e-05, "loss": 1.0853, "step": 17670 }, { "epoch": 1.0532840624627489, "grad_norm": 2.727027654647827, "learning_rate": 4.6569830209847505e-05, "loss": 1.1784, "step": 17672 }, { "epoch": 1.0534032661819048, "grad_norm": 3.5738961696624756, "learning_rate": 4.6560395620150255e-05, "loss": 1.2835, "step": 17674 }, { "epoch": 1.053522469901061, "grad_norm": 3.078536033630371, "learning_rate": 4.6550961153499435e-05, "loss": 1.292, "step": 17676 }, { "epoch": 1.053641673620217, "grad_norm": 3.321230173110962, "learning_rate": 4.654152681023252e-05, "loss": 1.2476, "step": 17678 }, { "epoch": 1.053760877339373, "grad_norm": 3.165482521057129, "learning_rate": 4.653209259068705e-05, "loss": 1.1451, "step": 17680 }, { "epoch": 1.053880081058529, "grad_norm": 3.4408438205718994, "learning_rate": 4.652265849520047e-05, "loss": 1.2537, "step": 17682 }, { "epoch": 1.0539992847776851, "grad_norm": 3.3517980575561523, "learning_rate": 4.65132245241103e-05, "loss": 1.2804, "step": 17684 }, { "epoch": 1.054118488496841, "grad_norm": 3.121051073074341, "learning_rate": 4.6503790677754036e-05, "loss": 1.0447, "step": 17686 }, { "epoch": 1.054237692215997, "grad_norm": 2.636204481124878, "learning_rate": 4.649435695646912e-05, "loss": 1.1148, "step": 17688 }, { "epoch": 1.0543568959351532, "grad_norm": 3.0650103092193604, "learning_rate": 4.6484923360593055e-05, "loss": 1.145, "step": 17690 }, { "epoch": 1.0544760996543092, "grad_norm": 3.1486666202545166, "learning_rate": 4.647548989046331e-05, "loss": 1.2474, "step": 17692 }, { "epoch": 1.0545953033734652, "grad_norm": 3.171865463256836, "learning_rate": 4.646605654641736e-05, "loss": 1.2897, "step": 17694 }, { "epoch": 1.0547145070926214, "grad_norm": 3.197476863861084, "learning_rate": 4.645662332879264e-05, "loss": 1.2564, "step": 17696 }, { "epoch": 1.0548337108117773, "grad_norm": 3.334902286529541, "learning_rate": 4.644719023792664e-05, "loss": 1.3179, "step": 17698 }, { "epoch": 1.0549529145309333, "grad_norm": 3.0762016773223877, "learning_rate": 4.643775727415681e-05, "loss": 1.2631, "step": 17700 }, { "epoch": 1.0550721182500895, "grad_norm": 2.9828314781188965, "learning_rate": 4.642832443782057e-05, "loss": 1.2649, "step": 17702 }, { "epoch": 1.0551913219692455, "grad_norm": 3.416438102722168, "learning_rate": 4.6418891729255393e-05, "loss": 1.1932, "step": 17704 }, { "epoch": 1.0553105256884014, "grad_norm": 3.1842381954193115, "learning_rate": 4.64094591487987e-05, "loss": 1.245, "step": 17706 }, { "epoch": 1.0554297294075574, "grad_norm": 3.3635566234588623, "learning_rate": 4.640002669678795e-05, "loss": 1.1013, "step": 17708 }, { "epoch": 1.0555489331267136, "grad_norm": 3.3052449226379395, "learning_rate": 4.639059437356055e-05, "loss": 1.2339, "step": 17710 }, { "epoch": 1.0556681368458696, "grad_norm": 3.4260005950927734, "learning_rate": 4.638116217945397e-05, "loss": 1.102, "step": 17712 }, { "epoch": 1.0557873405650255, "grad_norm": 2.8881053924560547, "learning_rate": 4.6371730114805576e-05, "loss": 1.1635, "step": 17714 }, { "epoch": 1.0559065442841817, "grad_norm": 3.089008092880249, "learning_rate": 4.636229817995281e-05, "loss": 1.2269, "step": 17716 }, { "epoch": 1.0560257480033377, "grad_norm": 3.0114104747772217, "learning_rate": 4.6352866375233104e-05, "loss": 1.2358, "step": 17718 }, { "epoch": 1.0561449517224937, "grad_norm": 3.093768835067749, "learning_rate": 4.634343470098382e-05, "loss": 1.1943, "step": 17720 }, { "epoch": 1.0562641554416499, "grad_norm": 3.208815336227417, "learning_rate": 4.6334003157542404e-05, "loss": 1.2105, "step": 17722 }, { "epoch": 1.0563833591608058, "grad_norm": 3.1712746620178223, "learning_rate": 4.6324571745246225e-05, "loss": 1.1577, "step": 17724 }, { "epoch": 1.0565025628799618, "grad_norm": 3.119142532348633, "learning_rate": 4.631514046443271e-05, "loss": 1.2988, "step": 17726 }, { "epoch": 1.056621766599118, "grad_norm": 3.141331434249878, "learning_rate": 4.630570931543921e-05, "loss": 1.2209, "step": 17728 }, { "epoch": 1.056740970318274, "grad_norm": 3.2063732147216797, "learning_rate": 4.6296278298603135e-05, "loss": 1.2956, "step": 17730 }, { "epoch": 1.05686017403743, "grad_norm": 3.2910611629486084, "learning_rate": 4.628684741426187e-05, "loss": 1.3362, "step": 17732 }, { "epoch": 1.056979377756586, "grad_norm": 2.9719924926757812, "learning_rate": 4.627741666275275e-05, "loss": 1.3024, "step": 17734 }, { "epoch": 1.057098581475742, "grad_norm": 3.2257301807403564, "learning_rate": 4.626798604441319e-05, "loss": 1.2992, "step": 17736 }, { "epoch": 1.057217785194898, "grad_norm": 3.275947332382202, "learning_rate": 4.625855555958053e-05, "loss": 1.1817, "step": 17738 }, { "epoch": 1.057336988914054, "grad_norm": 3.191838026046753, "learning_rate": 4.6249125208592156e-05, "loss": 1.1093, "step": 17740 }, { "epoch": 1.0574561926332102, "grad_norm": 2.8905556201934814, "learning_rate": 4.623969499178537e-05, "loss": 1.1795, "step": 17742 }, { "epoch": 1.0575753963523662, "grad_norm": 2.9854114055633545, "learning_rate": 4.62302649094976e-05, "loss": 1.252, "step": 17744 }, { "epoch": 1.0576946000715222, "grad_norm": 3.2939555644989014, "learning_rate": 4.6220834962066136e-05, "loss": 1.212, "step": 17746 }, { "epoch": 1.0578138037906784, "grad_norm": 3.098623037338257, "learning_rate": 4.621140514982833e-05, "loss": 1.1047, "step": 17748 }, { "epoch": 1.0579330075098343, "grad_norm": 3.0867342948913574, "learning_rate": 4.6201975473121536e-05, "loss": 1.1531, "step": 17750 }, { "epoch": 1.0580522112289903, "grad_norm": 3.264770984649658, "learning_rate": 4.6192545932283064e-05, "loss": 1.2262, "step": 17752 }, { "epoch": 1.0581714149481465, "grad_norm": 3.1157257556915283, "learning_rate": 4.6183116527650256e-05, "loss": 1.2565, "step": 17754 }, { "epoch": 1.0582906186673025, "grad_norm": 3.011107921600342, "learning_rate": 4.6173687259560425e-05, "loss": 1.1554, "step": 17756 }, { "epoch": 1.0584098223864584, "grad_norm": 2.9702439308166504, "learning_rate": 4.616425812835091e-05, "loss": 1.2657, "step": 17758 }, { "epoch": 1.0585290261056144, "grad_norm": 3.296706199645996, "learning_rate": 4.615482913435899e-05, "loss": 1.3228, "step": 17760 }, { "epoch": 1.0586482298247706, "grad_norm": 2.957479953765869, "learning_rate": 4.614540027792198e-05, "loss": 1.1816, "step": 17762 }, { "epoch": 1.0587674335439266, "grad_norm": 3.530327081680298, "learning_rate": 4.613597155937721e-05, "loss": 1.3232, "step": 17764 }, { "epoch": 1.0588866372630825, "grad_norm": 2.8755998611450195, "learning_rate": 4.6126542979061946e-05, "loss": 1.2726, "step": 17766 }, { "epoch": 1.0590058409822387, "grad_norm": 3.4362809658050537, "learning_rate": 4.611711453731349e-05, "loss": 1.3204, "step": 17768 }, { "epoch": 1.0591250447013947, "grad_norm": 3.1237499713897705, "learning_rate": 4.610768623446913e-05, "loss": 1.2185, "step": 17770 }, { "epoch": 1.0592442484205506, "grad_norm": 3.027592658996582, "learning_rate": 4.609825807086617e-05, "loss": 1.2066, "step": 17772 }, { "epoch": 1.0593634521397068, "grad_norm": 2.8672268390655518, "learning_rate": 4.608883004684184e-05, "loss": 1.2221, "step": 17774 }, { "epoch": 1.0594826558588628, "grad_norm": 3.1290431022644043, "learning_rate": 4.6079402162733464e-05, "loss": 1.2389, "step": 17776 }, { "epoch": 1.0596018595780188, "grad_norm": 3.426675796508789, "learning_rate": 4.6069974418878284e-05, "loss": 1.2997, "step": 17778 }, { "epoch": 1.059721063297175, "grad_norm": 3.756638288497925, "learning_rate": 4.606054681561355e-05, "loss": 1.2582, "step": 17780 }, { "epoch": 1.059840267016331, "grad_norm": 2.986870765686035, "learning_rate": 4.605111935327656e-05, "loss": 1.2642, "step": 17782 }, { "epoch": 1.059959470735487, "grad_norm": 2.91593599319458, "learning_rate": 4.604169203220452e-05, "loss": 1.2924, "step": 17784 }, { "epoch": 1.0600786744546429, "grad_norm": 3.4909911155700684, "learning_rate": 4.603226485273471e-05, "loss": 1.3267, "step": 17786 }, { "epoch": 1.060197878173799, "grad_norm": 3.2268552780151367, "learning_rate": 4.602283781520435e-05, "loss": 1.4767, "step": 17788 }, { "epoch": 1.060317081892955, "grad_norm": 3.278815507888794, "learning_rate": 4.601341091995071e-05, "loss": 1.2304, "step": 17790 }, { "epoch": 1.060436285612111, "grad_norm": 3.073305130004883, "learning_rate": 4.6003984167310994e-05, "loss": 1.2372, "step": 17792 }, { "epoch": 1.0605554893312672, "grad_norm": 2.7022838592529297, "learning_rate": 4.599455755762242e-05, "loss": 1.2442, "step": 17794 }, { "epoch": 1.0606746930504232, "grad_norm": 2.8540005683898926, "learning_rate": 4.5985131091222256e-05, "loss": 1.1723, "step": 17796 }, { "epoch": 1.0607938967695791, "grad_norm": 2.9517035484313965, "learning_rate": 4.597570476844766e-05, "loss": 1.17, "step": 17798 }, { "epoch": 1.0609131004887353, "grad_norm": 2.804353952407837, "learning_rate": 4.5966278589635885e-05, "loss": 1.1592, "step": 17800 }, { "epoch": 1.0610323042078913, "grad_norm": 3.1530957221984863, "learning_rate": 4.595685255512412e-05, "loss": 1.1812, "step": 17802 }, { "epoch": 1.0611515079270473, "grad_norm": 3.4348456859588623, "learning_rate": 4.594742666524959e-05, "loss": 1.2061, "step": 17804 }, { "epoch": 1.0612707116462035, "grad_norm": 3.078996419906616, "learning_rate": 4.593800092034947e-05, "loss": 1.2532, "step": 17806 }, { "epoch": 1.0613899153653594, "grad_norm": 2.904794216156006, "learning_rate": 4.592857532076094e-05, "loss": 1.1687, "step": 17808 }, { "epoch": 1.0615091190845154, "grad_norm": 2.975543260574341, "learning_rate": 4.591914986682121e-05, "loss": 1.2915, "step": 17810 }, { "epoch": 1.0616283228036716, "grad_norm": 3.445850372314453, "learning_rate": 4.590972455886744e-05, "loss": 1.3815, "step": 17812 }, { "epoch": 1.0617475265228276, "grad_norm": 3.043344020843506, "learning_rate": 4.590029939723684e-05, "loss": 1.241, "step": 17814 }, { "epoch": 1.0618667302419835, "grad_norm": 3.600182056427002, "learning_rate": 4.589087438226654e-05, "loss": 1.2941, "step": 17816 }, { "epoch": 1.0619859339611395, "grad_norm": 3.183448076248169, "learning_rate": 4.588144951429373e-05, "loss": 1.3522, "step": 17818 }, { "epoch": 1.0621051376802957, "grad_norm": 3.1648857593536377, "learning_rate": 4.587202479365557e-05, "loss": 1.3147, "step": 17820 }, { "epoch": 1.0622243413994517, "grad_norm": 3.014413595199585, "learning_rate": 4.5862600220689186e-05, "loss": 1.1781, "step": 17822 }, { "epoch": 1.0623435451186076, "grad_norm": 3.2050905227661133, "learning_rate": 4.585317579573177e-05, "loss": 1.2606, "step": 17824 }, { "epoch": 1.0624627488377638, "grad_norm": 3.2927615642547607, "learning_rate": 4.584375151912042e-05, "loss": 1.1817, "step": 17826 }, { "epoch": 1.0625819525569198, "grad_norm": 3.0382602214813232, "learning_rate": 4.583432739119233e-05, "loss": 1.1112, "step": 17828 }, { "epoch": 1.0627011562760758, "grad_norm": 2.9729058742523193, "learning_rate": 4.582490341228457e-05, "loss": 1.2165, "step": 17830 }, { "epoch": 1.062820359995232, "grad_norm": 3.3668479919433594, "learning_rate": 4.581547958273432e-05, "loss": 1.1896, "step": 17832 }, { "epoch": 1.062939563714388, "grad_norm": 3.2002110481262207, "learning_rate": 4.580605590287866e-05, "loss": 1.2243, "step": 17834 }, { "epoch": 1.0630587674335439, "grad_norm": 3.2483294010162354, "learning_rate": 4.5796632373054766e-05, "loss": 1.2363, "step": 17836 }, { "epoch": 1.0631779711527, "grad_norm": 3.1146442890167236, "learning_rate": 4.5787208993599696e-05, "loss": 1.0494, "step": 17838 }, { "epoch": 1.063297174871856, "grad_norm": 2.994089365005493, "learning_rate": 4.5777785764850565e-05, "loss": 1.229, "step": 17840 }, { "epoch": 1.063416378591012, "grad_norm": 3.3794918060302734, "learning_rate": 4.576836268714449e-05, "loss": 1.3491, "step": 17842 }, { "epoch": 1.063535582310168, "grad_norm": 2.6837592124938965, "learning_rate": 4.575893976081855e-05, "loss": 1.2235, "step": 17844 }, { "epoch": 1.0636547860293242, "grad_norm": 3.4113359451293945, "learning_rate": 4.574951698620987e-05, "loss": 1.2616, "step": 17846 }, { "epoch": 1.0637739897484801, "grad_norm": 2.76741623878479, "learning_rate": 4.5740094363655486e-05, "loss": 1.1347, "step": 17848 }, { "epoch": 1.0638931934676361, "grad_norm": 3.368950605392456, "learning_rate": 4.5730671893492514e-05, "loss": 1.2276, "step": 17850 }, { "epoch": 1.0640123971867923, "grad_norm": 3.152711868286133, "learning_rate": 4.5721249576058027e-05, "loss": 1.1108, "step": 17852 }, { "epoch": 1.0641316009059483, "grad_norm": 3.1120314598083496, "learning_rate": 4.571182741168906e-05, "loss": 1.2476, "step": 17854 }, { "epoch": 1.0642508046251042, "grad_norm": 3.1983532905578613, "learning_rate": 4.570240540072271e-05, "loss": 1.3302, "step": 17856 }, { "epoch": 1.0643700083442604, "grad_norm": 3.016373872756958, "learning_rate": 4.569298354349601e-05, "loss": 1.2624, "step": 17858 }, { "epoch": 1.0644892120634164, "grad_norm": 3.3925821781158447, "learning_rate": 4.568356184034605e-05, "loss": 1.1839, "step": 17860 }, { "epoch": 1.0646084157825724, "grad_norm": 2.8961808681488037, "learning_rate": 4.567414029160982e-05, "loss": 1.0849, "step": 17862 }, { "epoch": 1.0647276195017286, "grad_norm": 3.331941843032837, "learning_rate": 4.5664718897624415e-05, "loss": 1.212, "step": 17864 }, { "epoch": 1.0648468232208845, "grad_norm": 3.162423849105835, "learning_rate": 4.565529765872686e-05, "loss": 1.2988, "step": 17866 }, { "epoch": 1.0649660269400405, "grad_norm": 2.877852439880371, "learning_rate": 4.564587657525414e-05, "loss": 1.1318, "step": 17868 }, { "epoch": 1.0650852306591965, "grad_norm": 3.3149845600128174, "learning_rate": 4.563645564754333e-05, "loss": 1.2137, "step": 17870 }, { "epoch": 1.0652044343783527, "grad_norm": 3.5096678733825684, "learning_rate": 4.562703487593142e-05, "loss": 1.3164, "step": 17872 }, { "epoch": 1.0653236380975086, "grad_norm": 3.4350054264068604, "learning_rate": 4.561761426075545e-05, "loss": 1.233, "step": 17874 }, { "epoch": 1.0654428418166646, "grad_norm": 3.2946174144744873, "learning_rate": 4.5608193802352395e-05, "loss": 1.2412, "step": 17876 }, { "epoch": 1.0655620455358208, "grad_norm": 3.1458709239959717, "learning_rate": 4.55987735010593e-05, "loss": 1.1141, "step": 17878 }, { "epoch": 1.0656812492549768, "grad_norm": 2.9319417476654053, "learning_rate": 4.558935335721312e-05, "loss": 1.1821, "step": 17880 }, { "epoch": 1.0658004529741327, "grad_norm": 3.2512705326080322, "learning_rate": 4.5579933371150855e-05, "loss": 1.2417, "step": 17882 }, { "epoch": 1.065919656693289, "grad_norm": 3.462449312210083, "learning_rate": 4.5570513543209516e-05, "loss": 1.2981, "step": 17884 }, { "epoch": 1.0660388604124449, "grad_norm": 3.0844430923461914, "learning_rate": 4.556109387372603e-05, "loss": 1.1699, "step": 17886 }, { "epoch": 1.0661580641316009, "grad_norm": 3.217393398284912, "learning_rate": 4.5551674363037435e-05, "loss": 1.375, "step": 17888 }, { "epoch": 1.066277267850757, "grad_norm": 3.1426608562469482, "learning_rate": 4.554225501148065e-05, "loss": 1.116, "step": 17890 }, { "epoch": 1.066396471569913, "grad_norm": 2.98970627784729, "learning_rate": 4.5532835819392664e-05, "loss": 1.1321, "step": 17892 }, { "epoch": 1.066515675289069, "grad_norm": 2.7681643962860107, "learning_rate": 4.552341678711042e-05, "loss": 1.1055, "step": 17894 }, { "epoch": 1.066634879008225, "grad_norm": 3.0710434913635254, "learning_rate": 4.5513997914970875e-05, "loss": 1.1657, "step": 17896 }, { "epoch": 1.0667540827273811, "grad_norm": 3.2051384449005127, "learning_rate": 4.550457920331099e-05, "loss": 1.1561, "step": 17898 }, { "epoch": 1.0668732864465371, "grad_norm": 3.3704707622528076, "learning_rate": 4.549516065246766e-05, "loss": 1.0885, "step": 17900 }, { "epoch": 1.066992490165693, "grad_norm": 2.93489408493042, "learning_rate": 4.5485742262777856e-05, "loss": 1.1929, "step": 17902 }, { "epoch": 1.0671116938848493, "grad_norm": 3.0157341957092285, "learning_rate": 4.5476324034578496e-05, "loss": 1.2086, "step": 17904 }, { "epoch": 1.0672308976040052, "grad_norm": 3.420356273651123, "learning_rate": 4.5466905968206516e-05, "loss": 1.1931, "step": 17906 }, { "epoch": 1.0673501013231612, "grad_norm": 3.070272207260132, "learning_rate": 4.545748806399879e-05, "loss": 1.2955, "step": 17908 }, { "epoch": 1.0674693050423174, "grad_norm": 3.3655149936676025, "learning_rate": 4.544807032229229e-05, "loss": 1.3203, "step": 17910 }, { "epoch": 1.0675885087614734, "grad_norm": 3.137378454208374, "learning_rate": 4.543865274342388e-05, "loss": 1.2823, "step": 17912 }, { "epoch": 1.0677077124806293, "grad_norm": 3.2856369018554688, "learning_rate": 4.5429235327730455e-05, "loss": 1.1567, "step": 17914 }, { "epoch": 1.0678269161997855, "grad_norm": 2.9897119998931885, "learning_rate": 4.541981807554894e-05, "loss": 1.1322, "step": 17916 }, { "epoch": 1.0679461199189415, "grad_norm": 3.25268292427063, "learning_rate": 4.541040098721619e-05, "loss": 1.195, "step": 17918 }, { "epoch": 1.0680653236380975, "grad_norm": 2.960862874984741, "learning_rate": 4.540098406306911e-05, "loss": 1.1391, "step": 17920 }, { "epoch": 1.0681845273572534, "grad_norm": 2.8439395427703857, "learning_rate": 4.539156730344454e-05, "loss": 1.1066, "step": 17922 }, { "epoch": 1.0683037310764096, "grad_norm": 3.241724729537964, "learning_rate": 4.538215070867941e-05, "loss": 1.1606, "step": 17924 }, { "epoch": 1.0684229347955656, "grad_norm": 2.8638226985931396, "learning_rate": 4.5372734279110536e-05, "loss": 1.1189, "step": 17926 }, { "epoch": 1.0685421385147216, "grad_norm": 3.3716399669647217, "learning_rate": 4.5363318015074777e-05, "loss": 1.1653, "step": 17928 }, { "epoch": 1.0686613422338778, "grad_norm": 2.912142276763916, "learning_rate": 4.535390191690902e-05, "loss": 1.1637, "step": 17930 }, { "epoch": 1.0687805459530337, "grad_norm": 2.968245506286621, "learning_rate": 4.534448598495006e-05, "loss": 1.327, "step": 17932 }, { "epoch": 1.0688997496721897, "grad_norm": 3.198545217514038, "learning_rate": 4.533507021953479e-05, "loss": 1.1757, "step": 17934 }, { "epoch": 1.069018953391346, "grad_norm": 3.383897542953491, "learning_rate": 4.532565462099999e-05, "loss": 1.2818, "step": 17936 }, { "epoch": 1.0691381571105019, "grad_norm": 2.954758405685425, "learning_rate": 4.531623918968255e-05, "loss": 1.2062, "step": 17938 }, { "epoch": 1.0692573608296578, "grad_norm": 3.5144078731536865, "learning_rate": 4.530682392591925e-05, "loss": 1.1627, "step": 17940 }, { "epoch": 1.069376564548814, "grad_norm": 2.987168312072754, "learning_rate": 4.52974088300469e-05, "loss": 1.1369, "step": 17942 }, { "epoch": 1.06949576826797, "grad_norm": 3.0748188495635986, "learning_rate": 4.528799390240234e-05, "loss": 1.1638, "step": 17944 }, { "epoch": 1.069614971987126, "grad_norm": 3.316538095474243, "learning_rate": 4.527857914332234e-05, "loss": 1.2507, "step": 17946 }, { "epoch": 1.069734175706282, "grad_norm": 3.5390913486480713, "learning_rate": 4.526916455314375e-05, "loss": 1.161, "step": 17948 }, { "epoch": 1.0698533794254381, "grad_norm": 2.9248836040496826, "learning_rate": 4.52597501322033e-05, "loss": 1.1459, "step": 17950 }, { "epoch": 1.069972583144594, "grad_norm": 3.475203275680542, "learning_rate": 4.525033588083783e-05, "loss": 1.2397, "step": 17952 }, { "epoch": 1.07009178686375, "grad_norm": 3.1015679836273193, "learning_rate": 4.524092179938408e-05, "loss": 1.3217, "step": 17954 }, { "epoch": 1.0702109905829063, "grad_norm": 3.146299123764038, "learning_rate": 4.523150788817886e-05, "loss": 1.1902, "step": 17956 }, { "epoch": 1.0703301943020622, "grad_norm": 3.079070806503296, "learning_rate": 4.5222094147558905e-05, "loss": 1.24, "step": 17958 }, { "epoch": 1.0704493980212182, "grad_norm": 3.704493284225464, "learning_rate": 4.521268057786098e-05, "loss": 1.2722, "step": 17960 }, { "epoch": 1.0705686017403744, "grad_norm": 3.030278205871582, "learning_rate": 4.520326717942188e-05, "loss": 1.1377, "step": 17962 }, { "epoch": 1.0706878054595304, "grad_norm": 2.715108871459961, "learning_rate": 4.51938539525783e-05, "loss": 1.0863, "step": 17964 }, { "epoch": 1.0708070091786863, "grad_norm": 3.1664769649505615, "learning_rate": 4.5184440897667015e-05, "loss": 1.1364, "step": 17966 }, { "epoch": 1.0709262128978425, "grad_norm": 2.9933032989501953, "learning_rate": 4.517502801502474e-05, "loss": 1.157, "step": 17968 }, { "epoch": 1.0710454166169985, "grad_norm": 3.403592824935913, "learning_rate": 4.5165615304988254e-05, "loss": 1.2159, "step": 17970 }, { "epoch": 1.0711646203361544, "grad_norm": 2.8911495208740234, "learning_rate": 4.515620276789423e-05, "loss": 1.095, "step": 17972 }, { "epoch": 1.0712838240553104, "grad_norm": 3.1487221717834473, "learning_rate": 4.51467904040794e-05, "loss": 1.1013, "step": 17974 }, { "epoch": 1.0714030277744666, "grad_norm": 3.315370559692383, "learning_rate": 4.513737821388049e-05, "loss": 1.2658, "step": 17976 }, { "epoch": 1.0715222314936226, "grad_norm": 3.078963279724121, "learning_rate": 4.5127966197634185e-05, "loss": 1.2189, "step": 17978 }, { "epoch": 1.0716414352127785, "grad_norm": 3.3086981773376465, "learning_rate": 4.5118554355677225e-05, "loss": 1.2355, "step": 17980 }, { "epoch": 1.0717606389319347, "grad_norm": 3.3611018657684326, "learning_rate": 4.510914268834626e-05, "loss": 1.2444, "step": 17982 }, { "epoch": 1.0718798426510907, "grad_norm": 3.4186816215515137, "learning_rate": 4.5099731195977995e-05, "loss": 1.4665, "step": 17984 }, { "epoch": 1.0719990463702467, "grad_norm": 3.6250245571136475, "learning_rate": 4.5090319878909124e-05, "loss": 1.2318, "step": 17986 }, { "epoch": 1.0721182500894029, "grad_norm": 3.1091554164886475, "learning_rate": 4.5080908737476296e-05, "loss": 1.1306, "step": 17988 }, { "epoch": 1.0722374538085588, "grad_norm": 3.4599642753601074, "learning_rate": 4.5071497772016195e-05, "loss": 1.1913, "step": 17990 }, { "epoch": 1.0723566575277148, "grad_norm": 2.925718069076538, "learning_rate": 4.506208698286547e-05, "loss": 1.2461, "step": 17992 }, { "epoch": 1.072475861246871, "grad_norm": 3.160799980163574, "learning_rate": 4.505267637036082e-05, "loss": 1.0871, "step": 17994 }, { "epoch": 1.072595064966027, "grad_norm": 3.067222833633423, "learning_rate": 4.5043265934838836e-05, "loss": 1.2179, "step": 17996 }, { "epoch": 1.072714268685183, "grad_norm": 3.4865143299102783, "learning_rate": 4.50338556766362e-05, "loss": 1.1928, "step": 17998 }, { "epoch": 1.072833472404339, "grad_norm": 3.027153253555298, "learning_rate": 4.502444559608953e-05, "loss": 1.2676, "step": 18000 }, { "epoch": 1.072952676123495, "grad_norm": 3.221334934234619, "learning_rate": 4.501503569353548e-05, "loss": 1.1755, "step": 18002 }, { "epoch": 1.073071879842651, "grad_norm": 3.2475404739379883, "learning_rate": 4.5005625969310654e-05, "loss": 1.2594, "step": 18004 }, { "epoch": 1.073191083561807, "grad_norm": 3.190591335296631, "learning_rate": 4.4996216423751656e-05, "loss": 1.2333, "step": 18006 }, { "epoch": 1.0733102872809632, "grad_norm": 3.422241687774658, "learning_rate": 4.498680705719514e-05, "loss": 1.281, "step": 18008 }, { "epoch": 1.0734294910001192, "grad_norm": 3.6493611335754395, "learning_rate": 4.4977397869977666e-05, "loss": 1.2408, "step": 18010 }, { "epoch": 1.0735486947192752, "grad_norm": 3.2305233478546143, "learning_rate": 4.496798886243588e-05, "loss": 1.1556, "step": 18012 }, { "epoch": 1.0736678984384314, "grad_norm": 3.4188644886016846, "learning_rate": 4.4958580034906335e-05, "loss": 1.086, "step": 18014 }, { "epoch": 1.0737871021575873, "grad_norm": 2.8812222480773926, "learning_rate": 4.494917138772564e-05, "loss": 1.154, "step": 18016 }, { "epoch": 1.0739063058767433, "grad_norm": 3.2165048122406006, "learning_rate": 4.4939762921230366e-05, "loss": 1.1378, "step": 18018 }, { "epoch": 1.0740255095958995, "grad_norm": 3.362659454345703, "learning_rate": 4.493035463575707e-05, "loss": 1.2184, "step": 18020 }, { "epoch": 1.0741447133150555, "grad_norm": 3.000129461288452, "learning_rate": 4.492094653164234e-05, "loss": 1.1641, "step": 18022 }, { "epoch": 1.0742639170342114, "grad_norm": 3.5193986892700195, "learning_rate": 4.491153860922273e-05, "loss": 1.306, "step": 18024 }, { "epoch": 1.0743831207533674, "grad_norm": 3.3992714881896973, "learning_rate": 4.490213086883481e-05, "loss": 1.1773, "step": 18026 }, { "epoch": 1.0745023244725236, "grad_norm": 2.8580269813537598, "learning_rate": 4.4892723310815095e-05, "loss": 1.2141, "step": 18028 }, { "epoch": 1.0746215281916796, "grad_norm": 3.363839864730835, "learning_rate": 4.4883315935500143e-05, "loss": 1.4048, "step": 18030 }, { "epoch": 1.0747407319108355, "grad_norm": 3.099078893661499, "learning_rate": 4.48739087432265e-05, "loss": 1.1239, "step": 18032 }, { "epoch": 1.0748599356299917, "grad_norm": 3.36566162109375, "learning_rate": 4.486450173433066e-05, "loss": 1.1896, "step": 18034 }, { "epoch": 1.0749791393491477, "grad_norm": 3.3872568607330322, "learning_rate": 4.4855094909149174e-05, "loss": 1.3118, "step": 18036 }, { "epoch": 1.0750983430683037, "grad_norm": 3.023135185241699, "learning_rate": 4.4845688268018535e-05, "loss": 1.1399, "step": 18038 }, { "epoch": 1.0752175467874598, "grad_norm": 3.497446060180664, "learning_rate": 4.483628181127528e-05, "loss": 1.2847, "step": 18040 }, { "epoch": 1.0753367505066158, "grad_norm": 3.214743137359619, "learning_rate": 4.4826875539255876e-05, "loss": 1.2584, "step": 18042 }, { "epoch": 1.0754559542257718, "grad_norm": 3.5805835723876953, "learning_rate": 4.481746945229685e-05, "loss": 1.2065, "step": 18044 }, { "epoch": 1.075575157944928, "grad_norm": 3.294168472290039, "learning_rate": 4.480806355073467e-05, "loss": 1.2013, "step": 18046 }, { "epoch": 1.075694361664084, "grad_norm": 3.432199239730835, "learning_rate": 4.4798657834905805e-05, "loss": 1.1524, "step": 18048 }, { "epoch": 1.07581356538324, "grad_norm": 2.8546290397644043, "learning_rate": 4.478925230514677e-05, "loss": 1.283, "step": 18050 }, { "epoch": 1.0759327691023959, "grad_norm": 3.5852465629577637, "learning_rate": 4.4779846961793986e-05, "loss": 1.1549, "step": 18052 }, { "epoch": 1.076051972821552, "grad_norm": 3.3263180255889893, "learning_rate": 4.477044180518395e-05, "loss": 1.2325, "step": 18054 }, { "epoch": 1.076171176540708, "grad_norm": 2.7669246196746826, "learning_rate": 4.4761036835653086e-05, "loss": 1.1802, "step": 18056 }, { "epoch": 1.076290380259864, "grad_norm": 3.286935806274414, "learning_rate": 4.4751632053537884e-05, "loss": 1.2924, "step": 18058 }, { "epoch": 1.0764095839790202, "grad_norm": 2.924419403076172, "learning_rate": 4.474222745917474e-05, "loss": 1.1874, "step": 18060 }, { "epoch": 1.0765287876981762, "grad_norm": 2.9543306827545166, "learning_rate": 4.473282305290012e-05, "loss": 1.217, "step": 18062 }, { "epoch": 1.0766479914173321, "grad_norm": 2.6317179203033447, "learning_rate": 4.472341883505045e-05, "loss": 1.05, "step": 18064 }, { "epoch": 1.0767671951364883, "grad_norm": 3.233510732650757, "learning_rate": 4.4714014805962126e-05, "loss": 1.1425, "step": 18066 }, { "epoch": 1.0768863988556443, "grad_norm": 3.1916706562042236, "learning_rate": 4.4704610965971585e-05, "loss": 1.192, "step": 18068 }, { "epoch": 1.0770056025748003, "grad_norm": 3.1034064292907715, "learning_rate": 4.469520731541522e-05, "loss": 1.3618, "step": 18070 }, { "epoch": 1.0771248062939565, "grad_norm": 3.5448596477508545, "learning_rate": 4.4685803854629444e-05, "loss": 1.271, "step": 18072 }, { "epoch": 1.0772440100131124, "grad_norm": 3.3767926692962646, "learning_rate": 4.467640058395064e-05, "loss": 1.1912, "step": 18074 }, { "epoch": 1.0773632137322684, "grad_norm": 3.4652645587921143, "learning_rate": 4.4666997503715216e-05, "loss": 1.2781, "step": 18076 }, { "epoch": 1.0774824174514244, "grad_norm": 3.2438716888427734, "learning_rate": 4.465759461425953e-05, "loss": 1.1737, "step": 18078 }, { "epoch": 1.0776016211705806, "grad_norm": 3.1069934368133545, "learning_rate": 4.464819191591996e-05, "loss": 1.1708, "step": 18080 }, { "epoch": 1.0777208248897365, "grad_norm": 3.2030811309814453, "learning_rate": 4.4638789409032895e-05, "loss": 1.1643, "step": 18082 }, { "epoch": 1.0778400286088925, "grad_norm": 3.0335888862609863, "learning_rate": 4.4629387093934654e-05, "loss": 1.2339, "step": 18084 }, { "epoch": 1.0779592323280487, "grad_norm": 3.4005019664764404, "learning_rate": 4.461998497096162e-05, "loss": 1.3286, "step": 18086 }, { "epoch": 1.0780784360472047, "grad_norm": 3.3058643341064453, "learning_rate": 4.461058304045012e-05, "loss": 1.228, "step": 18088 }, { "epoch": 1.0781976397663606, "grad_norm": 3.197357416152954, "learning_rate": 4.4601181302736525e-05, "loss": 1.1882, "step": 18090 }, { "epoch": 1.0783168434855168, "grad_norm": 3.1653008460998535, "learning_rate": 4.4591779758157146e-05, "loss": 1.2343, "step": 18092 }, { "epoch": 1.0784360472046728, "grad_norm": 3.167787790298462, "learning_rate": 4.4582378407048285e-05, "loss": 1.193, "step": 18094 }, { "epoch": 1.0785552509238288, "grad_norm": 2.843881368637085, "learning_rate": 4.4572977249746315e-05, "loss": 1.3295, "step": 18096 }, { "epoch": 1.078674454642985, "grad_norm": 3.3588907718658447, "learning_rate": 4.4563576286587495e-05, "loss": 1.1715, "step": 18098 }, { "epoch": 1.078793658362141, "grad_norm": 3.154634714126587, "learning_rate": 4.455417551790816e-05, "loss": 1.1186, "step": 18100 }, { "epoch": 1.0789128620812969, "grad_norm": 2.9207966327667236, "learning_rate": 4.454477494404459e-05, "loss": 1.2778, "step": 18102 }, { "epoch": 1.079032065800453, "grad_norm": 3.2088141441345215, "learning_rate": 4.45353745653331e-05, "loss": 1.0826, "step": 18104 }, { "epoch": 1.079151269519609, "grad_norm": 3.15031099319458, "learning_rate": 4.452597438210996e-05, "loss": 1.2551, "step": 18106 }, { "epoch": 1.079270473238765, "grad_norm": 3.1860311031341553, "learning_rate": 4.4516574394711426e-05, "loss": 1.3283, "step": 18108 }, { "epoch": 1.079389676957921, "grad_norm": 3.369203567504883, "learning_rate": 4.45071746034738e-05, "loss": 1.1685, "step": 18110 }, { "epoch": 1.0795088806770772, "grad_norm": 3.143239736557007, "learning_rate": 4.449777500873332e-05, "loss": 1.1967, "step": 18112 }, { "epoch": 1.0796280843962331, "grad_norm": 2.938065528869629, "learning_rate": 4.4488375610826274e-05, "loss": 1.2365, "step": 18114 }, { "epoch": 1.0797472881153891, "grad_norm": 2.7618038654327393, "learning_rate": 4.4478976410088874e-05, "loss": 1.2458, "step": 18116 }, { "epoch": 1.0798664918345453, "grad_norm": 3.2063777446746826, "learning_rate": 4.446957740685739e-05, "loss": 1.2756, "step": 18118 }, { "epoch": 1.0799856955537013, "grad_norm": 3.6793746948242188, "learning_rate": 4.4460178601468024e-05, "loss": 1.0886, "step": 18120 }, { "epoch": 1.0801048992728572, "grad_norm": 3.2300703525543213, "learning_rate": 4.445077999425705e-05, "loss": 1.2553, "step": 18122 }, { "epoch": 1.0802241029920134, "grad_norm": 2.8685975074768066, "learning_rate": 4.444138158556065e-05, "loss": 1.0876, "step": 18124 }, { "epoch": 1.0803433067111694, "grad_norm": 2.877033233642578, "learning_rate": 4.443198337571505e-05, "loss": 1.0743, "step": 18126 }, { "epoch": 1.0804625104303254, "grad_norm": 2.964221715927124, "learning_rate": 4.4422585365056465e-05, "loss": 1.2931, "step": 18128 }, { "epoch": 1.0805817141494816, "grad_norm": 3.2050321102142334, "learning_rate": 4.4413187553921066e-05, "loss": 1.2159, "step": 18130 }, { "epoch": 1.0807009178686375, "grad_norm": 3.685798168182373, "learning_rate": 4.440378994264507e-05, "loss": 1.2568, "step": 18132 }, { "epoch": 1.0808201215877935, "grad_norm": 3.0323541164398193, "learning_rate": 4.439439253156465e-05, "loss": 1.1818, "step": 18134 }, { "epoch": 1.0809393253069497, "grad_norm": 3.1107945442199707, "learning_rate": 4.4384995321016e-05, "loss": 1.138, "step": 18136 }, { "epoch": 1.0810585290261057, "grad_norm": 2.9604156017303467, "learning_rate": 4.437559831133528e-05, "loss": 1.2347, "step": 18138 }, { "epoch": 1.0811777327452616, "grad_norm": 3.393956184387207, "learning_rate": 4.436620150285864e-05, "loss": 1.3021, "step": 18140 }, { "epoch": 1.0812969364644176, "grad_norm": 2.93658447265625, "learning_rate": 4.435680489592225e-05, "loss": 1.1807, "step": 18142 }, { "epoch": 1.0814161401835738, "grad_norm": 3.388350486755371, "learning_rate": 4.434740849086226e-05, "loss": 1.2193, "step": 18144 }, { "epoch": 1.0815353439027298, "grad_norm": 3.275336980819702, "learning_rate": 4.433801228801482e-05, "loss": 1.2621, "step": 18146 }, { "epoch": 1.0816545476218857, "grad_norm": 3.0376667976379395, "learning_rate": 4.432861628771603e-05, "loss": 1.1889, "step": 18148 }, { "epoch": 1.081773751341042, "grad_norm": 3.50803804397583, "learning_rate": 4.4319220490302056e-05, "loss": 1.3123, "step": 18150 }, { "epoch": 1.081892955060198, "grad_norm": 2.8574886322021484, "learning_rate": 4.4309824896109006e-05, "loss": 1.2186, "step": 18152 }, { "epoch": 1.0820121587793539, "grad_norm": 3.1035468578338623, "learning_rate": 4.4300429505472976e-05, "loss": 1.213, "step": 18154 }, { "epoch": 1.08213136249851, "grad_norm": 3.2602221965789795, "learning_rate": 4.429103431873009e-05, "loss": 1.2031, "step": 18156 }, { "epoch": 1.082250566217666, "grad_norm": 2.8130531311035156, "learning_rate": 4.4281639336216426e-05, "loss": 1.0599, "step": 18158 }, { "epoch": 1.082369769936822, "grad_norm": 2.9886388778686523, "learning_rate": 4.427224455826811e-05, "loss": 1.3394, "step": 18160 }, { "epoch": 1.0824889736559782, "grad_norm": 3.148563861846924, "learning_rate": 4.426284998522118e-05, "loss": 1.0827, "step": 18162 }, { "epoch": 1.0826081773751342, "grad_norm": 3.187654733657837, "learning_rate": 4.425345561741176e-05, "loss": 1.0861, "step": 18164 }, { "epoch": 1.0827273810942901, "grad_norm": 3.185624599456787, "learning_rate": 4.424406145517589e-05, "loss": 1.2692, "step": 18166 }, { "epoch": 1.082846584813446, "grad_norm": 3.25386643409729, "learning_rate": 4.423466749884962e-05, "loss": 1.24, "step": 18168 }, { "epoch": 1.0829657885326023, "grad_norm": 3.3145415782928467, "learning_rate": 4.4225273748769025e-05, "loss": 1.2989, "step": 18170 }, { "epoch": 1.0830849922517582, "grad_norm": 3.220785140991211, "learning_rate": 4.421588020527014e-05, "loss": 1.2245, "step": 18172 }, { "epoch": 1.0832041959709142, "grad_norm": 2.788698673248291, "learning_rate": 4.420648686868902e-05, "loss": 1.0283, "step": 18174 }, { "epoch": 1.0833233996900704, "grad_norm": 3.0599277019500732, "learning_rate": 4.4197093739361675e-05, "loss": 1.2344, "step": 18176 }, { "epoch": 1.0834426034092264, "grad_norm": 3.197856903076172, "learning_rate": 4.4187700817624164e-05, "loss": 1.3201, "step": 18178 }, { "epoch": 1.0835618071283823, "grad_norm": 3.0136613845825195, "learning_rate": 4.417830810381246e-05, "loss": 1.1445, "step": 18180 }, { "epoch": 1.0836810108475385, "grad_norm": 3.275709390640259, "learning_rate": 4.416891559826261e-05, "loss": 1.1499, "step": 18182 }, { "epoch": 1.0838002145666945, "grad_norm": 3.213085412979126, "learning_rate": 4.4159523301310605e-05, "loss": 1.2617, "step": 18184 }, { "epoch": 1.0839194182858505, "grad_norm": 3.1937310695648193, "learning_rate": 4.4150131213292404e-05, "loss": 1.1426, "step": 18186 }, { "epoch": 1.0840386220050067, "grad_norm": 2.9808905124664307, "learning_rate": 4.414073933454405e-05, "loss": 1.2524, "step": 18188 }, { "epoch": 1.0841578257241626, "grad_norm": 3.0952329635620117, "learning_rate": 4.413134766540148e-05, "loss": 1.2626, "step": 18190 }, { "epoch": 1.0842770294433186, "grad_norm": 2.914005994796753, "learning_rate": 4.4121956206200706e-05, "loss": 1.1368, "step": 18192 }, { "epoch": 1.0843962331624746, "grad_norm": 3.251391887664795, "learning_rate": 4.411256495727765e-05, "loss": 1.1349, "step": 18194 }, { "epoch": 1.0845154368816308, "grad_norm": 3.3510985374450684, "learning_rate": 4.410317391896829e-05, "loss": 1.2193, "step": 18196 }, { "epoch": 1.0846346406007867, "grad_norm": 3.063220262527466, "learning_rate": 4.40937830916086e-05, "loss": 1.1614, "step": 18198 }, { "epoch": 1.0847538443199427, "grad_norm": 3.284296989440918, "learning_rate": 4.408439247553447e-05, "loss": 1.3373, "step": 18200 }, { "epoch": 1.084873048039099, "grad_norm": 3.620316982269287, "learning_rate": 4.4075002071081866e-05, "loss": 1.2692, "step": 18202 }, { "epoch": 1.0849922517582549, "grad_norm": 3.239436149597168, "learning_rate": 4.4065611878586704e-05, "loss": 1.314, "step": 18204 }, { "epoch": 1.0851114554774108, "grad_norm": 3.073960542678833, "learning_rate": 4.405622189838492e-05, "loss": 1.1414, "step": 18206 }, { "epoch": 1.085230659196567, "grad_norm": 3.2985055446624756, "learning_rate": 4.40468321308124e-05, "loss": 1.2064, "step": 18208 }, { "epoch": 1.085349862915723, "grad_norm": 2.9751670360565186, "learning_rate": 4.4037442576205094e-05, "loss": 1.177, "step": 18210 }, { "epoch": 1.085469066634879, "grad_norm": 3.0145654678344727, "learning_rate": 4.4028053234898856e-05, "loss": 1.1512, "step": 18212 }, { "epoch": 1.0855882703540352, "grad_norm": 3.474332571029663, "learning_rate": 4.401866410722957e-05, "loss": 1.3145, "step": 18214 }, { "epoch": 1.0857074740731911, "grad_norm": 3.477714776992798, "learning_rate": 4.4009275193533155e-05, "loss": 1.1669, "step": 18216 }, { "epoch": 1.085826677792347, "grad_norm": 2.718510866165161, "learning_rate": 4.399988649414545e-05, "loss": 1.0572, "step": 18218 }, { "epoch": 1.085945881511503, "grad_norm": 3.183443069458008, "learning_rate": 4.3990498009402336e-05, "loss": 1.2803, "step": 18220 }, { "epoch": 1.0860650852306593, "grad_norm": 3.4895548820495605, "learning_rate": 4.398110973963966e-05, "loss": 1.1889, "step": 18222 }, { "epoch": 1.0861842889498152, "grad_norm": 3.22674298286438, "learning_rate": 4.3971721685193305e-05, "loss": 1.2293, "step": 18224 }, { "epoch": 1.0863034926689712, "grad_norm": 3.096311092376709, "learning_rate": 4.3962333846399076e-05, "loss": 1.2536, "step": 18226 }, { "epoch": 1.0864226963881274, "grad_norm": 3.163907527923584, "learning_rate": 4.395294622359282e-05, "loss": 1.1569, "step": 18228 }, { "epoch": 1.0865419001072834, "grad_norm": 3.1236648559570312, "learning_rate": 4.394355881711039e-05, "loss": 1.1081, "step": 18230 }, { "epoch": 1.0866611038264393, "grad_norm": 3.396897315979004, "learning_rate": 4.393417162728756e-05, "loss": 1.1194, "step": 18232 }, { "epoch": 1.0867803075455955, "grad_norm": 2.9809036254882812, "learning_rate": 4.392478465446017e-05, "loss": 1.1481, "step": 18234 }, { "epoch": 1.0868995112647515, "grad_norm": 3.5294110774993896, "learning_rate": 4.391539789896401e-05, "loss": 1.3253, "step": 18236 }, { "epoch": 1.0870187149839075, "grad_norm": 3.1292569637298584, "learning_rate": 4.3906011361134895e-05, "loss": 1.2668, "step": 18238 }, { "epoch": 1.0871379187030636, "grad_norm": 3.1514954566955566, "learning_rate": 4.3896625041308595e-05, "loss": 1.1369, "step": 18240 }, { "epoch": 1.0872571224222196, "grad_norm": 2.9423468112945557, "learning_rate": 4.388723893982093e-05, "loss": 1.326, "step": 18242 }, { "epoch": 1.0873763261413756, "grad_norm": 3.2639427185058594, "learning_rate": 4.3877853057007615e-05, "loss": 1.1111, "step": 18244 }, { "epoch": 1.0874955298605316, "grad_norm": 3.3289196491241455, "learning_rate": 4.386846739320444e-05, "loss": 1.1475, "step": 18246 }, { "epoch": 1.0876147335796877, "grad_norm": 3.1694231033325195, "learning_rate": 4.3859081948747194e-05, "loss": 1.0556, "step": 18248 }, { "epoch": 1.0877339372988437, "grad_norm": 3.2055745124816895, "learning_rate": 4.3849696723971566e-05, "loss": 1.1634, "step": 18250 }, { "epoch": 1.0878531410179997, "grad_norm": 2.9250290393829346, "learning_rate": 4.384031171921334e-05, "loss": 1.3362, "step": 18252 }, { "epoch": 1.0879723447371559, "grad_norm": 3.0913844108581543, "learning_rate": 4.383092693480824e-05, "loss": 1.1722, "step": 18254 }, { "epoch": 1.0880915484563118, "grad_norm": 2.9907889366149902, "learning_rate": 4.3821542371092e-05, "loss": 1.209, "step": 18256 }, { "epoch": 1.0882107521754678, "grad_norm": 3.328577756881714, "learning_rate": 4.381215802840032e-05, "loss": 1.2043, "step": 18258 }, { "epoch": 1.088329955894624, "grad_norm": 2.8603734970092773, "learning_rate": 4.3802773907068914e-05, "loss": 1.1686, "step": 18260 }, { "epoch": 1.08844915961378, "grad_norm": 3.243654251098633, "learning_rate": 4.3793390007433506e-05, "loss": 1.131, "step": 18262 }, { "epoch": 1.088568363332936, "grad_norm": 2.984652519226074, "learning_rate": 4.3784006329829753e-05, "loss": 1.0632, "step": 18264 }, { "epoch": 1.0886875670520921, "grad_norm": 3.1986098289489746, "learning_rate": 4.377462287459337e-05, "loss": 1.2048, "step": 18266 }, { "epoch": 1.088806770771248, "grad_norm": 3.097644567489624, "learning_rate": 4.376523964206002e-05, "loss": 1.2066, "step": 18268 }, { "epoch": 1.088925974490404, "grad_norm": 3.1097772121429443, "learning_rate": 4.375585663256538e-05, "loss": 1.1938, "step": 18270 }, { "epoch": 1.08904517820956, "grad_norm": 3.5682480335235596, "learning_rate": 4.374647384644514e-05, "loss": 1.2275, "step": 18272 }, { "epoch": 1.0891643819287162, "grad_norm": 2.724923610687256, "learning_rate": 4.3737091284034895e-05, "loss": 1.0898, "step": 18274 }, { "epoch": 1.0892835856478722, "grad_norm": 3.337280035018921, "learning_rate": 4.3727708945670334e-05, "loss": 1.2739, "step": 18276 }, { "epoch": 1.0894027893670282, "grad_norm": 3.3552610874176025, "learning_rate": 4.371832683168707e-05, "loss": 1.2347, "step": 18278 }, { "epoch": 1.0895219930861844, "grad_norm": 3.3422749042510986, "learning_rate": 4.370894494242078e-05, "loss": 1.3905, "step": 18280 }, { "epoch": 1.0896411968053403, "grad_norm": 3.0378851890563965, "learning_rate": 4.3699563278207026e-05, "loss": 1.1893, "step": 18282 }, { "epoch": 1.0897604005244963, "grad_norm": 3.1251983642578125, "learning_rate": 4.369018183938146e-05, "loss": 1.1916, "step": 18284 }, { "epoch": 1.0898796042436525, "grad_norm": 3.122483491897583, "learning_rate": 4.368080062627967e-05, "loss": 1.1732, "step": 18286 }, { "epoch": 1.0899988079628085, "grad_norm": 3.432893753051758, "learning_rate": 4.367141963923728e-05, "loss": 1.1494, "step": 18288 }, { "epoch": 1.0901180116819644, "grad_norm": 3.4352474212646484, "learning_rate": 4.3662038878589865e-05, "loss": 1.2799, "step": 18290 }, { "epoch": 1.0902372154011206, "grad_norm": 3.081193208694458, "learning_rate": 4.3652658344672975e-05, "loss": 1.2048, "step": 18292 }, { "epoch": 1.0903564191202766, "grad_norm": 3.342033624649048, "learning_rate": 4.3643278037822246e-05, "loss": 1.2219, "step": 18294 }, { "epoch": 1.0904756228394326, "grad_norm": 3.2711942195892334, "learning_rate": 4.3633897958373186e-05, "loss": 1.2161, "step": 18296 }, { "epoch": 1.0905948265585885, "grad_norm": 3.283681631088257, "learning_rate": 4.362451810666139e-05, "loss": 1.125, "step": 18298 }, { "epoch": 1.0907140302777447, "grad_norm": 2.6656887531280518, "learning_rate": 4.361513848302238e-05, "loss": 1.0623, "step": 18300 }, { "epoch": 1.0908332339969007, "grad_norm": 3.214078426361084, "learning_rate": 4.360575908779174e-05, "loss": 1.2068, "step": 18302 }, { "epoch": 1.0909524377160567, "grad_norm": 3.5599000453948975, "learning_rate": 4.359637992130496e-05, "loss": 1.2857, "step": 18304 }, { "epoch": 1.0910716414352128, "grad_norm": 2.874769449234009, "learning_rate": 4.358700098389757e-05, "loss": 1.019, "step": 18306 }, { "epoch": 1.0911908451543688, "grad_norm": 3.496025800704956, "learning_rate": 4.357762227590511e-05, "loss": 1.0932, "step": 18308 }, { "epoch": 1.0913100488735248, "grad_norm": 3.024728298187256, "learning_rate": 4.3568243797663054e-05, "loss": 1.1472, "step": 18310 }, { "epoch": 1.091429252592681, "grad_norm": 3.2433691024780273, "learning_rate": 4.355886554950696e-05, "loss": 1.1477, "step": 18312 }, { "epoch": 1.091548456311837, "grad_norm": 3.2326347827911377, "learning_rate": 4.3549487531772256e-05, "loss": 1.1172, "step": 18314 }, { "epoch": 1.091667660030993, "grad_norm": 3.128736972808838, "learning_rate": 4.354010974479446e-05, "loss": 1.3969, "step": 18316 }, { "epoch": 1.091786863750149, "grad_norm": 3.2354578971862793, "learning_rate": 4.353073218890905e-05, "loss": 1.3711, "step": 18318 }, { "epoch": 1.091906067469305, "grad_norm": 2.990220546722412, "learning_rate": 4.352135486445147e-05, "loss": 1.2386, "step": 18320 }, { "epoch": 1.092025271188461, "grad_norm": 3.1480319499969482, "learning_rate": 4.35119777717572e-05, "loss": 1.2048, "step": 18322 }, { "epoch": 1.092144474907617, "grad_norm": 3.4317121505737305, "learning_rate": 4.3502600911161675e-05, "loss": 1.3066, "step": 18324 }, { "epoch": 1.0922636786267732, "grad_norm": 3.110863447189331, "learning_rate": 4.3493224283000367e-05, "loss": 1.1116, "step": 18326 }, { "epoch": 1.0923828823459292, "grad_norm": 3.0849740505218506, "learning_rate": 4.3483847887608665e-05, "loss": 1.1642, "step": 18328 }, { "epoch": 1.0925020860650851, "grad_norm": 3.299708843231201, "learning_rate": 4.347447172532203e-05, "loss": 1.0976, "step": 18330 }, { "epoch": 1.0926212897842413, "grad_norm": 3.371796131134033, "learning_rate": 4.346509579647588e-05, "loss": 1.2381, "step": 18332 }, { "epoch": 1.0927404935033973, "grad_norm": 3.186147928237915, "learning_rate": 4.345572010140559e-05, "loss": 1.198, "step": 18334 }, { "epoch": 1.0928596972225533, "grad_norm": 3.0309011936187744, "learning_rate": 4.344634464044659e-05, "loss": 1.1495, "step": 18336 }, { "epoch": 1.0929789009417095, "grad_norm": 3.0602786540985107, "learning_rate": 4.343696941393426e-05, "loss": 1.1819, "step": 18338 }, { "epoch": 1.0930981046608654, "grad_norm": 3.00095272064209, "learning_rate": 4.3427594422204e-05, "loss": 1.2208, "step": 18340 }, { "epoch": 1.0932173083800214, "grad_norm": 3.2363970279693604, "learning_rate": 4.341821966559117e-05, "loss": 1.2499, "step": 18342 }, { "epoch": 1.0933365120991776, "grad_norm": 3.2038276195526123, "learning_rate": 4.3408845144431145e-05, "loss": 1.4149, "step": 18344 }, { "epoch": 1.0934557158183336, "grad_norm": 2.994084596633911, "learning_rate": 4.339947085905928e-05, "loss": 1.1321, "step": 18346 }, { "epoch": 1.0935749195374895, "grad_norm": 2.8728232383728027, "learning_rate": 4.3390096809810924e-05, "loss": 1.2015, "step": 18348 }, { "epoch": 1.0936941232566455, "grad_norm": 3.2310686111450195, "learning_rate": 4.338072299702143e-05, "loss": 1.2691, "step": 18350 }, { "epoch": 1.0938133269758017, "grad_norm": 3.3358497619628906, "learning_rate": 4.3371349421026105e-05, "loss": 1.3295, "step": 18352 }, { "epoch": 1.0939325306949577, "grad_norm": 3.177185535430908, "learning_rate": 4.33619760821603e-05, "loss": 1.0543, "step": 18354 }, { "epoch": 1.0940517344141136, "grad_norm": 3.242262125015259, "learning_rate": 4.335260298075932e-05, "loss": 1.196, "step": 18356 }, { "epoch": 1.0941709381332698, "grad_norm": 3.1702940464019775, "learning_rate": 4.3343230117158484e-05, "loss": 1.191, "step": 18358 }, { "epoch": 1.0942901418524258, "grad_norm": 3.0584628582000732, "learning_rate": 4.333385749169307e-05, "loss": 1.2643, "step": 18360 }, { "epoch": 1.0944093455715818, "grad_norm": 3.113445520401001, "learning_rate": 4.332448510469839e-05, "loss": 1.1335, "step": 18362 }, { "epoch": 1.094528549290738, "grad_norm": 3.1294682025909424, "learning_rate": 4.331511295650972e-05, "loss": 1.1689, "step": 18364 }, { "epoch": 1.094647753009894, "grad_norm": 3.3297300338745117, "learning_rate": 4.3305741047462326e-05, "loss": 1.0391, "step": 18366 }, { "epoch": 1.09476695672905, "grad_norm": 3.3579752445220947, "learning_rate": 4.3296369377891474e-05, "loss": 1.1727, "step": 18368 }, { "epoch": 1.094886160448206, "grad_norm": 3.016097068786621, "learning_rate": 4.328699794813242e-05, "loss": 1.2422, "step": 18370 }, { "epoch": 1.095005364167362, "grad_norm": 2.8966472148895264, "learning_rate": 4.3277626758520435e-05, "loss": 1.0443, "step": 18372 }, { "epoch": 1.095124567886518, "grad_norm": 2.762221574783325, "learning_rate": 4.3268255809390726e-05, "loss": 1.0686, "step": 18374 }, { "epoch": 1.095243771605674, "grad_norm": 3.039405107498169, "learning_rate": 4.325888510107856e-05, "loss": 1.2652, "step": 18376 }, { "epoch": 1.0953629753248302, "grad_norm": 2.97204327583313, "learning_rate": 4.324951463391913e-05, "loss": 1.2219, "step": 18378 }, { "epoch": 1.0954821790439861, "grad_norm": 3.3516416549682617, "learning_rate": 4.324014440824765e-05, "loss": 1.1497, "step": 18380 }, { "epoch": 1.0956013827631421, "grad_norm": 3.3333494663238525, "learning_rate": 4.323077442439935e-05, "loss": 1.1344, "step": 18382 }, { "epoch": 1.0957205864822983, "grad_norm": 3.2081427574157715, "learning_rate": 4.3221404682709386e-05, "loss": 1.1541, "step": 18384 }, { "epoch": 1.0958397902014543, "grad_norm": 3.45320463180542, "learning_rate": 4.3212035183512976e-05, "loss": 1.1667, "step": 18386 }, { "epoch": 1.0959589939206102, "grad_norm": 3.3695883750915527, "learning_rate": 4.320266592714529e-05, "loss": 1.3519, "step": 18388 }, { "epoch": 1.0960781976397664, "grad_norm": 3.1617517471313477, "learning_rate": 4.3193296913941524e-05, "loss": 1.2661, "step": 18390 }, { "epoch": 1.0961974013589224, "grad_norm": 3.1366963386535645, "learning_rate": 4.3183928144236804e-05, "loss": 1.1277, "step": 18392 }, { "epoch": 1.0963166050780784, "grad_norm": 3.137357234954834, "learning_rate": 4.317455961836628e-05, "loss": 1.2902, "step": 18394 }, { "epoch": 1.0964358087972346, "grad_norm": 3.309896945953369, "learning_rate": 4.316519133666514e-05, "loss": 1.2173, "step": 18396 }, { "epoch": 1.0965550125163905, "grad_norm": 3.2527377605438232, "learning_rate": 4.315582329946846e-05, "loss": 1.2552, "step": 18398 }, { "epoch": 1.0966742162355465, "grad_norm": 3.384751796722412, "learning_rate": 4.314645550711141e-05, "loss": 1.135, "step": 18400 }, { "epoch": 1.0967934199547025, "grad_norm": 3.2778358459472656, "learning_rate": 4.313708795992909e-05, "loss": 1.2269, "step": 18402 }, { "epoch": 1.0969126236738587, "grad_norm": 3.2358264923095703, "learning_rate": 4.3127720658256624e-05, "loss": 1.2281, "step": 18404 }, { "epoch": 1.0970318273930146, "grad_norm": 2.912205934524536, "learning_rate": 4.3118353602429085e-05, "loss": 1.2407, "step": 18406 }, { "epoch": 1.0971510311121706, "grad_norm": 3.4322900772094727, "learning_rate": 4.310898679278162e-05, "loss": 1.2587, "step": 18408 }, { "epoch": 1.0972702348313268, "grad_norm": 3.1721153259277344, "learning_rate": 4.309962022964925e-05, "loss": 1.1965, "step": 18410 }, { "epoch": 1.0973894385504828, "grad_norm": 3.517383098602295, "learning_rate": 4.3090253913367067e-05, "loss": 1.2617, "step": 18412 }, { "epoch": 1.0975086422696387, "grad_norm": 2.8653602600097656, "learning_rate": 4.308088784427017e-05, "loss": 1.0933, "step": 18414 }, { "epoch": 1.097627845988795, "grad_norm": 3.1460206508636475, "learning_rate": 4.3071522022693554e-05, "loss": 1.0598, "step": 18416 }, { "epoch": 1.097747049707951, "grad_norm": 3.3216774463653564, "learning_rate": 4.306215644897232e-05, "loss": 1.2043, "step": 18418 }, { "epoch": 1.0978662534271069, "grad_norm": 3.0717201232910156, "learning_rate": 4.3052791123441484e-05, "loss": 1.2081, "step": 18420 }, { "epoch": 1.097985457146263, "grad_norm": 3.2144737243652344, "learning_rate": 4.3043426046436094e-05, "loss": 1.2164, "step": 18422 }, { "epoch": 1.098104660865419, "grad_norm": 3.5179197788238525, "learning_rate": 4.3034061218291146e-05, "loss": 1.2604, "step": 18424 }, { "epoch": 1.098223864584575, "grad_norm": 3.1950337886810303, "learning_rate": 4.3024696639341645e-05, "loss": 1.1199, "step": 18426 }, { "epoch": 1.098343068303731, "grad_norm": 2.8086841106414795, "learning_rate": 4.301533230992264e-05, "loss": 1.0627, "step": 18428 }, { "epoch": 1.0984622720228872, "grad_norm": 2.909480333328247, "learning_rate": 4.300596823036907e-05, "loss": 1.0371, "step": 18430 }, { "epoch": 1.0985814757420431, "grad_norm": 3.002244472503662, "learning_rate": 4.2996604401015963e-05, "loss": 1.1187, "step": 18432 }, { "epoch": 1.098700679461199, "grad_norm": 2.8181862831115723, "learning_rate": 4.298724082219826e-05, "loss": 1.144, "step": 18434 }, { "epoch": 1.0988198831803553, "grad_norm": 3.2696638107299805, "learning_rate": 4.297787749425096e-05, "loss": 1.1344, "step": 18436 }, { "epoch": 1.0989390868995113, "grad_norm": 2.7684779167175293, "learning_rate": 4.296851441750902e-05, "loss": 1.0222, "step": 18438 }, { "epoch": 1.0990582906186672, "grad_norm": 2.968843936920166, "learning_rate": 4.295915159230736e-05, "loss": 1.1212, "step": 18440 }, { "epoch": 1.0991774943378234, "grad_norm": 3.1779122352600098, "learning_rate": 4.2949789018980934e-05, "loss": 1.3096, "step": 18442 }, { "epoch": 1.0992966980569794, "grad_norm": 3.053821325302124, "learning_rate": 4.2940426697864676e-05, "loss": 1.196, "step": 18444 }, { "epoch": 1.0994159017761354, "grad_norm": 2.8790788650512695, "learning_rate": 4.2931064629293534e-05, "loss": 1.1467, "step": 18446 }, { "epoch": 1.0995351054952915, "grad_norm": 3.0868029594421387, "learning_rate": 4.292170281360237e-05, "loss": 1.2052, "step": 18448 }, { "epoch": 1.0996543092144475, "grad_norm": 3.369652032852173, "learning_rate": 4.2912341251126124e-05, "loss": 1.3122, "step": 18450 }, { "epoch": 1.0997735129336035, "grad_norm": 3.1090316772460938, "learning_rate": 4.2902979942199696e-05, "loss": 1.3255, "step": 18452 }, { "epoch": 1.0998927166527595, "grad_norm": 3.230189085006714, "learning_rate": 4.2893618887157936e-05, "loss": 1.1669, "step": 18454 }, { "epoch": 1.1000119203719156, "grad_norm": 4.1758952140808105, "learning_rate": 4.288425808633575e-05, "loss": 1.1762, "step": 18456 }, { "epoch": 1.1001311240910716, "grad_norm": 3.047778367996216, "learning_rate": 4.2874897540067994e-05, "loss": 1.1298, "step": 18458 }, { "epoch": 1.1002503278102276, "grad_norm": 3.1649868488311768, "learning_rate": 4.2865537248689544e-05, "loss": 1.3172, "step": 18460 }, { "epoch": 1.1003695315293838, "grad_norm": 2.8677611351013184, "learning_rate": 4.285617721253522e-05, "loss": 1.026, "step": 18462 }, { "epoch": 1.1004887352485397, "grad_norm": 3.2684216499328613, "learning_rate": 4.2846817431939895e-05, "loss": 1.2169, "step": 18464 }, { "epoch": 1.1006079389676957, "grad_norm": 3.1049327850341797, "learning_rate": 4.283745790723836e-05, "loss": 1.1406, "step": 18466 }, { "epoch": 1.100727142686852, "grad_norm": 3.0598063468933105, "learning_rate": 4.282809863876549e-05, "loss": 1.316, "step": 18468 }, { "epoch": 1.1008463464060079, "grad_norm": 3.5758068561553955, "learning_rate": 4.2818739626856074e-05, "loss": 1.3742, "step": 18470 }, { "epoch": 1.1009655501251638, "grad_norm": 2.9867303371429443, "learning_rate": 4.280938087184489e-05, "loss": 1.0836, "step": 18472 }, { "epoch": 1.10108475384432, "grad_norm": 3.1404149532318115, "learning_rate": 4.280002237406676e-05, "loss": 1.2385, "step": 18474 }, { "epoch": 1.101203957563476, "grad_norm": 3.2071523666381836, "learning_rate": 4.279066413385646e-05, "loss": 1.2174, "step": 18476 }, { "epoch": 1.101323161282632, "grad_norm": 3.5434858798980713, "learning_rate": 4.27813061515488e-05, "loss": 1.4047, "step": 18478 }, { "epoch": 1.1014423650017882, "grad_norm": 3.3894240856170654, "learning_rate": 4.277194842747848e-05, "loss": 1.2407, "step": 18480 }, { "epoch": 1.1015615687209441, "grad_norm": 3.144144296646118, "learning_rate": 4.276259096198031e-05, "loss": 1.1571, "step": 18482 }, { "epoch": 1.1016807724401, "grad_norm": 3.0222198963165283, "learning_rate": 4.275323375538904e-05, "loss": 1.0484, "step": 18484 }, { "epoch": 1.101799976159256, "grad_norm": 3.297799825668335, "learning_rate": 4.274387680803936e-05, "loss": 1.1693, "step": 18486 }, { "epoch": 1.1019191798784123, "grad_norm": 3.0565059185028076, "learning_rate": 4.273452012026606e-05, "loss": 1.0986, "step": 18488 }, { "epoch": 1.1020383835975682, "grad_norm": 3.2253897190093994, "learning_rate": 4.272516369240381e-05, "loss": 1.2808, "step": 18490 }, { "epoch": 1.1021575873167242, "grad_norm": 2.961958169937134, "learning_rate": 4.2715807524787375e-05, "loss": 1.2822, "step": 18492 }, { "epoch": 1.1022767910358804, "grad_norm": 3.4199306964874268, "learning_rate": 4.27064516177514e-05, "loss": 1.3571, "step": 18494 }, { "epoch": 1.1023959947550364, "grad_norm": 3.2049636840820312, "learning_rate": 4.269709597163062e-05, "loss": 1.1556, "step": 18496 }, { "epoch": 1.1025151984741923, "grad_norm": 3.321399688720703, "learning_rate": 4.2687740586759717e-05, "loss": 1.1701, "step": 18498 }, { "epoch": 1.1026344021933485, "grad_norm": 2.797175645828247, "learning_rate": 4.267838546347332e-05, "loss": 1.215, "step": 18500 }, { "epoch": 1.1027536059125045, "grad_norm": 3.3623461723327637, "learning_rate": 4.266903060210615e-05, "loss": 1.1783, "step": 18502 }, { "epoch": 1.1028728096316605, "grad_norm": 3.062037467956543, "learning_rate": 4.265967600299282e-05, "loss": 1.2251, "step": 18504 }, { "epoch": 1.1029920133508166, "grad_norm": 3.2519571781158447, "learning_rate": 4.2650321666468005e-05, "loss": 1.1239, "step": 18506 }, { "epoch": 1.1031112170699726, "grad_norm": 2.825723648071289, "learning_rate": 4.264096759286632e-05, "loss": 1.0435, "step": 18508 }, { "epoch": 1.1032304207891286, "grad_norm": 3.0290956497192383, "learning_rate": 4.2631613782522426e-05, "loss": 1.0607, "step": 18510 }, { "epoch": 1.1033496245082848, "grad_norm": 3.0915729999542236, "learning_rate": 4.262226023577089e-05, "loss": 1.2072, "step": 18512 }, { "epoch": 1.1034688282274407, "grad_norm": 3.1121208667755127, "learning_rate": 4.261290695294636e-05, "loss": 1.1933, "step": 18514 }, { "epoch": 1.1035880319465967, "grad_norm": 3.1891419887542725, "learning_rate": 4.260355393438344e-05, "loss": 1.2095, "step": 18516 }, { "epoch": 1.1037072356657527, "grad_norm": 3.1187849044799805, "learning_rate": 4.2594201180416685e-05, "loss": 1.2135, "step": 18518 }, { "epoch": 1.1038264393849089, "grad_norm": 2.776707410812378, "learning_rate": 4.258484869138069e-05, "loss": 1.2501, "step": 18520 }, { "epoch": 1.1039456431040648, "grad_norm": 3.7051727771759033, "learning_rate": 4.257549646761003e-05, "loss": 1.3038, "step": 18522 }, { "epoch": 1.1040648468232208, "grad_norm": 3.1486639976501465, "learning_rate": 4.256614450943928e-05, "loss": 1.2463, "step": 18524 }, { "epoch": 1.104184050542377, "grad_norm": 2.891390562057495, "learning_rate": 4.255679281720295e-05, "loss": 1.2641, "step": 18526 }, { "epoch": 1.104303254261533, "grad_norm": 2.6873910427093506, "learning_rate": 4.254744139123562e-05, "loss": 1.2131, "step": 18528 }, { "epoch": 1.104422457980689, "grad_norm": 3.059033155441284, "learning_rate": 4.253809023187182e-05, "loss": 1.1147, "step": 18530 }, { "epoch": 1.1045416616998451, "grad_norm": 2.5529520511627197, "learning_rate": 4.2528739339446046e-05, "loss": 1.1091, "step": 18532 }, { "epoch": 1.104660865419001, "grad_norm": 3.6663920879364014, "learning_rate": 4.2519388714292834e-05, "loss": 1.2205, "step": 18534 }, { "epoch": 1.104780069138157, "grad_norm": 3.2067160606384277, "learning_rate": 4.251003835674668e-05, "loss": 1.1972, "step": 18536 }, { "epoch": 1.1048992728573133, "grad_norm": 3.5279932022094727, "learning_rate": 4.250068826714209e-05, "loss": 1.2894, "step": 18538 }, { "epoch": 1.1050184765764692, "grad_norm": 3.4459030628204346, "learning_rate": 4.249133844581352e-05, "loss": 1.1407, "step": 18540 }, { "epoch": 1.1051376802956252, "grad_norm": 3.255676507949829, "learning_rate": 4.2481988893095485e-05, "loss": 1.2027, "step": 18542 }, { "epoch": 1.1052568840147812, "grad_norm": 3.34374737739563, "learning_rate": 4.247263960932243e-05, "loss": 1.2049, "step": 18544 }, { "epoch": 1.1053760877339374, "grad_norm": 3.2094478607177734, "learning_rate": 4.246329059482879e-05, "loss": 1.0787, "step": 18546 }, { "epoch": 1.1054952914530933, "grad_norm": 2.913778781890869, "learning_rate": 4.245394184994906e-05, "loss": 1.1057, "step": 18548 }, { "epoch": 1.1056144951722493, "grad_norm": 3.203503370285034, "learning_rate": 4.244459337501763e-05, "loss": 1.168, "step": 18550 }, { "epoch": 1.1057336988914055, "grad_norm": 3.238992214202881, "learning_rate": 4.243524517036895e-05, "loss": 1.207, "step": 18552 }, { "epoch": 1.1058529026105615, "grad_norm": 3.2504007816314697, "learning_rate": 4.242589723633742e-05, "loss": 1.118, "step": 18554 }, { "epoch": 1.1059721063297174, "grad_norm": 3.395110845565796, "learning_rate": 4.241654957325748e-05, "loss": 1.2353, "step": 18556 }, { "epoch": 1.1060913100488736, "grad_norm": 2.963261365890503, "learning_rate": 4.2407202181463516e-05, "loss": 1.2112, "step": 18558 }, { "epoch": 1.1062105137680296, "grad_norm": 3.1168253421783447, "learning_rate": 4.2397855061289874e-05, "loss": 1.3654, "step": 18560 }, { "epoch": 1.1063297174871856, "grad_norm": 3.0751893520355225, "learning_rate": 4.238850821307101e-05, "loss": 1.2157, "step": 18562 }, { "epoch": 1.1064489212063418, "grad_norm": 3.250741958618164, "learning_rate": 4.237916163714121e-05, "loss": 1.3359, "step": 18564 }, { "epoch": 1.1065681249254977, "grad_norm": 3.0373756885528564, "learning_rate": 4.236981533383489e-05, "loss": 1.1861, "step": 18566 }, { "epoch": 1.1066873286446537, "grad_norm": 2.8397581577301025, "learning_rate": 4.236046930348637e-05, "loss": 1.191, "step": 18568 }, { "epoch": 1.1068065323638097, "grad_norm": 3.321340799331665, "learning_rate": 4.235112354643002e-05, "loss": 1.235, "step": 18570 }, { "epoch": 1.1069257360829658, "grad_norm": 2.8278846740722656, "learning_rate": 4.234177806300013e-05, "loss": 1.1238, "step": 18572 }, { "epoch": 1.1070449398021218, "grad_norm": 3.2395403385162354, "learning_rate": 4.233243285353107e-05, "loss": 1.2513, "step": 18574 }, { "epoch": 1.1071641435212778, "grad_norm": 3.348597526550293, "learning_rate": 4.2323087918357105e-05, "loss": 1.2816, "step": 18576 }, { "epoch": 1.107283347240434, "grad_norm": 2.876657009124756, "learning_rate": 4.231374325781254e-05, "loss": 1.1822, "step": 18578 }, { "epoch": 1.10740255095959, "grad_norm": 3.4338631629943848, "learning_rate": 4.2304398872231705e-05, "loss": 1.1947, "step": 18580 }, { "epoch": 1.107521754678746, "grad_norm": 3.029911518096924, "learning_rate": 4.229505476194883e-05, "loss": 1.2377, "step": 18582 }, { "epoch": 1.107640958397902, "grad_norm": 2.878101110458374, "learning_rate": 4.228571092729821e-05, "loss": 1.1623, "step": 18584 }, { "epoch": 1.107760162117058, "grad_norm": 3.426748275756836, "learning_rate": 4.22763673686141e-05, "loss": 1.1361, "step": 18586 }, { "epoch": 1.107879365836214, "grad_norm": 3.317868947982788, "learning_rate": 4.226702408623078e-05, "loss": 1.1849, "step": 18588 }, { "epoch": 1.1079985695553702, "grad_norm": 2.8723182678222656, "learning_rate": 4.225768108048245e-05, "loss": 1.1726, "step": 18590 }, { "epoch": 1.1081177732745262, "grad_norm": 2.9854321479797363, "learning_rate": 4.224833835170335e-05, "loss": 1.22, "step": 18592 }, { "epoch": 1.1082369769936822, "grad_norm": 3.075544834136963, "learning_rate": 4.223899590022773e-05, "loss": 1.2361, "step": 18594 }, { "epoch": 1.1083561807128381, "grad_norm": 3.0865061283111572, "learning_rate": 4.2229653726389765e-05, "loss": 1.2374, "step": 18596 }, { "epoch": 1.1084753844319943, "grad_norm": 4.094607353210449, "learning_rate": 4.2220311830523675e-05, "loss": 1.149, "step": 18598 }, { "epoch": 1.1085945881511503, "grad_norm": 3.0983760356903076, "learning_rate": 4.221097021296364e-05, "loss": 1.2789, "step": 18600 }, { "epoch": 1.1087137918703063, "grad_norm": 3.055426836013794, "learning_rate": 4.2201628874043855e-05, "loss": 1.1546, "step": 18602 }, { "epoch": 1.1088329955894625, "grad_norm": 3.0787150859832764, "learning_rate": 4.219228781409851e-05, "loss": 1.2657, "step": 18604 }, { "epoch": 1.1089521993086184, "grad_norm": 3.008333683013916, "learning_rate": 4.218294703346171e-05, "loss": 1.2525, "step": 18606 }, { "epoch": 1.1090714030277744, "grad_norm": 3.0325326919555664, "learning_rate": 4.217360653246766e-05, "loss": 1.1388, "step": 18608 }, { "epoch": 1.1091906067469306, "grad_norm": 3.026806116104126, "learning_rate": 4.216426631145047e-05, "loss": 1.0729, "step": 18610 }, { "epoch": 1.1093098104660866, "grad_norm": 3.2676806449890137, "learning_rate": 4.2154926370744305e-05, "loss": 1.1601, "step": 18612 }, { "epoch": 1.1094290141852425, "grad_norm": 3.1168696880340576, "learning_rate": 4.2145586710683235e-05, "loss": 1.1689, "step": 18614 }, { "epoch": 1.1095482179043987, "grad_norm": 2.8685717582702637, "learning_rate": 4.2136247331601425e-05, "loss": 0.9754, "step": 18616 }, { "epoch": 1.1096674216235547, "grad_norm": 3.569197654724121, "learning_rate": 4.212690823383296e-05, "loss": 1.1726, "step": 18618 }, { "epoch": 1.1097866253427107, "grad_norm": 3.162541151046753, "learning_rate": 4.2117569417711906e-05, "loss": 1.2205, "step": 18620 }, { "epoch": 1.1099058290618666, "grad_norm": 3.035885810852051, "learning_rate": 4.210823088357237e-05, "loss": 1.0535, "step": 18622 }, { "epoch": 1.1100250327810228, "grad_norm": 3.1808342933654785, "learning_rate": 4.209889263174841e-05, "loss": 1.0852, "step": 18624 }, { "epoch": 1.1101442365001788, "grad_norm": 2.8239645957946777, "learning_rate": 4.208955466257411e-05, "loss": 1.1609, "step": 18626 }, { "epoch": 1.1102634402193348, "grad_norm": 3.1720128059387207, "learning_rate": 4.2080216976383485e-05, "loss": 1.1645, "step": 18628 }, { "epoch": 1.110382643938491, "grad_norm": 2.809173345565796, "learning_rate": 4.2070879573510606e-05, "loss": 1.2506, "step": 18630 }, { "epoch": 1.110501847657647, "grad_norm": 2.8889541625976562, "learning_rate": 4.206154245428949e-05, "loss": 1.2011, "step": 18632 }, { "epoch": 1.110621051376803, "grad_norm": 3.1360714435577393, "learning_rate": 4.205220561905416e-05, "loss": 1.2215, "step": 18634 }, { "epoch": 1.110740255095959, "grad_norm": 2.8157565593719482, "learning_rate": 4.204286906813865e-05, "loss": 1.2535, "step": 18636 }, { "epoch": 1.110859458815115, "grad_norm": 3.180894136428833, "learning_rate": 4.203353280187692e-05, "loss": 1.264, "step": 18638 }, { "epoch": 1.110978662534271, "grad_norm": 3.0042412281036377, "learning_rate": 4.202419682060298e-05, "loss": 1.2903, "step": 18640 }, { "epoch": 1.1110978662534272, "grad_norm": 3.274226665496826, "learning_rate": 4.2014861124650795e-05, "loss": 1.2305, "step": 18642 }, { "epoch": 1.1112170699725832, "grad_norm": 3.0612070560455322, "learning_rate": 4.2005525714354375e-05, "loss": 1.125, "step": 18644 }, { "epoch": 1.1113362736917392, "grad_norm": 3.1407148838043213, "learning_rate": 4.199619059004763e-05, "loss": 1.2134, "step": 18646 }, { "epoch": 1.1114554774108951, "grad_norm": 2.9389450550079346, "learning_rate": 4.1986855752064555e-05, "loss": 1.2057, "step": 18648 }, { "epoch": 1.1115746811300513, "grad_norm": 2.9398255348205566, "learning_rate": 4.197752120073906e-05, "loss": 1.1007, "step": 18650 }, { "epoch": 1.1116938848492073, "grad_norm": 2.818570137023926, "learning_rate": 4.196818693640507e-05, "loss": 1.1594, "step": 18652 }, { "epoch": 1.1118130885683633, "grad_norm": 3.354250907897949, "learning_rate": 4.195885295939652e-05, "loss": 1.2257, "step": 18654 }, { "epoch": 1.1119322922875194, "grad_norm": 3.2155568599700928, "learning_rate": 4.19495192700473e-05, "loss": 1.0895, "step": 18656 }, { "epoch": 1.1120514960066754, "grad_norm": 2.979051351547241, "learning_rate": 4.1940185868691344e-05, "loss": 1.0447, "step": 18658 }, { "epoch": 1.1121706997258314, "grad_norm": 3.2100741863250732, "learning_rate": 4.19308527556625e-05, "loss": 1.1769, "step": 18660 }, { "epoch": 1.1122899034449876, "grad_norm": 3.123199462890625, "learning_rate": 4.192151993129466e-05, "loss": 1.0282, "step": 18662 }, { "epoch": 1.1124091071641435, "grad_norm": 2.8367178440093994, "learning_rate": 4.191218739592171e-05, "loss": 1.2382, "step": 18664 }, { "epoch": 1.1125283108832995, "grad_norm": 3.086827516555786, "learning_rate": 4.1902855149877464e-05, "loss": 1.208, "step": 18666 }, { "epoch": 1.1126475146024557, "grad_norm": 3.207515239715576, "learning_rate": 4.189352319349581e-05, "loss": 1.1235, "step": 18668 }, { "epoch": 1.1127667183216117, "grad_norm": 3.2202274799346924, "learning_rate": 4.188419152711055e-05, "loss": 1.181, "step": 18670 }, { "epoch": 1.1128859220407676, "grad_norm": 3.2005844116210938, "learning_rate": 4.187486015105554e-05, "loss": 1.2208, "step": 18672 }, { "epoch": 1.1130051257599236, "grad_norm": 3.2516438961029053, "learning_rate": 4.1865529065664576e-05, "loss": 1.222, "step": 18674 }, { "epoch": 1.1131243294790798, "grad_norm": 3.3783979415893555, "learning_rate": 4.1856198271271485e-05, "loss": 1.3537, "step": 18676 }, { "epoch": 1.1132435331982358, "grad_norm": 3.3065483570098877, "learning_rate": 4.1846867768210044e-05, "loss": 1.2143, "step": 18678 }, { "epoch": 1.1133627369173917, "grad_norm": 3.0129764080047607, "learning_rate": 4.183753755681401e-05, "loss": 1.1402, "step": 18680 }, { "epoch": 1.113481940636548, "grad_norm": 3.0717878341674805, "learning_rate": 4.182820763741722e-05, "loss": 1.1337, "step": 18682 }, { "epoch": 1.113601144355704, "grad_norm": 2.9807932376861572, "learning_rate": 4.181887801035338e-05, "loss": 1.1296, "step": 18684 }, { "epoch": 1.1137203480748599, "grad_norm": 3.2975330352783203, "learning_rate": 4.180954867595627e-05, "loss": 1.1579, "step": 18686 }, { "epoch": 1.113839551794016, "grad_norm": 3.280879020690918, "learning_rate": 4.1800219634559624e-05, "loss": 1.2755, "step": 18688 }, { "epoch": 1.113958755513172, "grad_norm": 3.1200971603393555, "learning_rate": 4.1790890886497194e-05, "loss": 1.1274, "step": 18690 }, { "epoch": 1.114077959232328, "grad_norm": 3.0746538639068604, "learning_rate": 4.178156243210266e-05, "loss": 1.2782, "step": 18692 }, { "epoch": 1.1141971629514842, "grad_norm": 3.167433977127075, "learning_rate": 4.177223427170978e-05, "loss": 1.1546, "step": 18694 }, { "epoch": 1.1143163666706402, "grad_norm": 3.1881070137023926, "learning_rate": 4.1762906405652226e-05, "loss": 1.1566, "step": 18696 }, { "epoch": 1.1144355703897961, "grad_norm": 3.4178082942962646, "learning_rate": 4.175357883426368e-05, "loss": 1.2212, "step": 18698 }, { "epoch": 1.114554774108952, "grad_norm": 3.499516487121582, "learning_rate": 4.1744251557877846e-05, "loss": 1.1299, "step": 18700 }, { "epoch": 1.1146739778281083, "grad_norm": 3.3664145469665527, "learning_rate": 4.173492457682836e-05, "loss": 1.2479, "step": 18702 }, { "epoch": 1.1147931815472643, "grad_norm": 3.1368958950042725, "learning_rate": 4.172559789144892e-05, "loss": 1.1454, "step": 18704 }, { "epoch": 1.1149123852664202, "grad_norm": 3.290560483932495, "learning_rate": 4.171627150207314e-05, "loss": 1.1231, "step": 18706 }, { "epoch": 1.1150315889855764, "grad_norm": 3.197371482849121, "learning_rate": 4.170694540903469e-05, "loss": 1.2416, "step": 18708 }, { "epoch": 1.1151507927047324, "grad_norm": 3.2000255584716797, "learning_rate": 4.1697619612667175e-05, "loss": 1.1636, "step": 18710 }, { "epoch": 1.1152699964238884, "grad_norm": 2.8729772567749023, "learning_rate": 4.168829411330419e-05, "loss": 1.1506, "step": 18712 }, { "epoch": 1.1153892001430445, "grad_norm": 3.501983404159546, "learning_rate": 4.16789689112794e-05, "loss": 1.1933, "step": 18714 }, { "epoch": 1.1155084038622005, "grad_norm": 3.0961434841156006, "learning_rate": 4.166964400692633e-05, "loss": 1.1065, "step": 18716 }, { "epoch": 1.1156276075813565, "grad_norm": 3.346205234527588, "learning_rate": 4.166031940057861e-05, "loss": 1.2179, "step": 18718 }, { "epoch": 1.1157468113005127, "grad_norm": 3.23234224319458, "learning_rate": 4.165099509256978e-05, "loss": 1.2773, "step": 18720 }, { "epoch": 1.1158660150196686, "grad_norm": 3.2560038566589355, "learning_rate": 4.1641671083233454e-05, "loss": 1.2345, "step": 18722 }, { "epoch": 1.1159852187388246, "grad_norm": 2.845350503921509, "learning_rate": 4.163234737290314e-05, "loss": 1.1793, "step": 18724 }, { "epoch": 1.1161044224579806, "grad_norm": 2.8906238079071045, "learning_rate": 4.162302396191237e-05, "loss": 1.099, "step": 18726 }, { "epoch": 1.1162236261771368, "grad_norm": 3.228952407836914, "learning_rate": 4.1613700850594725e-05, "loss": 1.1927, "step": 18728 }, { "epoch": 1.1163428298962927, "grad_norm": 3.0528719425201416, "learning_rate": 4.1604378039283664e-05, "loss": 1.0958, "step": 18730 }, { "epoch": 1.1164620336154487, "grad_norm": 3.33988094329834, "learning_rate": 4.1595055528312746e-05, "loss": 1.0943, "step": 18732 }, { "epoch": 1.116581237334605, "grad_norm": 3.266011953353882, "learning_rate": 4.158573331801543e-05, "loss": 1.3063, "step": 18734 }, { "epoch": 1.1167004410537609, "grad_norm": 3.1743428707122803, "learning_rate": 4.157641140872523e-05, "loss": 1.3198, "step": 18736 }, { "epoch": 1.1168196447729168, "grad_norm": 3.0928573608398438, "learning_rate": 4.156708980077562e-05, "loss": 1.1954, "step": 18738 }, { "epoch": 1.116938848492073, "grad_norm": 3.0862483978271484, "learning_rate": 4.155776849450007e-05, "loss": 1.1473, "step": 18740 }, { "epoch": 1.117058052211229, "grad_norm": 3.0902774333953857, "learning_rate": 4.154844749023201e-05, "loss": 1.157, "step": 18742 }, { "epoch": 1.117177255930385, "grad_norm": 3.0497400760650635, "learning_rate": 4.15391267883049e-05, "loss": 1.1126, "step": 18744 }, { "epoch": 1.1172964596495412, "grad_norm": 3.2697830200195312, "learning_rate": 4.1529806389052194e-05, "loss": 1.2687, "step": 18746 }, { "epoch": 1.1174156633686971, "grad_norm": 3.5259509086608887, "learning_rate": 4.152048629280727e-05, "loss": 1.3084, "step": 18748 }, { "epoch": 1.117534867087853, "grad_norm": 2.948220729827881, "learning_rate": 4.1511166499903575e-05, "loss": 1.2218, "step": 18750 }, { "epoch": 1.117654070807009, "grad_norm": 3.25516676902771, "learning_rate": 4.150184701067449e-05, "loss": 1.2598, "step": 18752 }, { "epoch": 1.1177732745261653, "grad_norm": 2.967768907546997, "learning_rate": 4.149252782545343e-05, "loss": 1.3236, "step": 18754 }, { "epoch": 1.1178924782453212, "grad_norm": 2.788313388824463, "learning_rate": 4.148320894457375e-05, "loss": 1.1608, "step": 18756 }, { "epoch": 1.1180116819644772, "grad_norm": 2.749967575073242, "learning_rate": 4.147389036836881e-05, "loss": 1.0615, "step": 18758 }, { "epoch": 1.1181308856836334, "grad_norm": 3.059570789337158, "learning_rate": 4.146457209717201e-05, "loss": 1.3214, "step": 18760 }, { "epoch": 1.1182500894027894, "grad_norm": 2.8594558238983154, "learning_rate": 4.145525413131665e-05, "loss": 1.1104, "step": 18762 }, { "epoch": 1.1183692931219453, "grad_norm": 3.18788480758667, "learning_rate": 4.144593647113609e-05, "loss": 1.1949, "step": 18764 }, { "epoch": 1.1184884968411015, "grad_norm": 3.0542407035827637, "learning_rate": 4.143661911696364e-05, "loss": 1.2438, "step": 18766 }, { "epoch": 1.1186077005602575, "grad_norm": 3.099443197250366, "learning_rate": 4.142730206913264e-05, "loss": 1.1342, "step": 18768 }, { "epoch": 1.1187269042794135, "grad_norm": 3.1187870502471924, "learning_rate": 4.141798532797638e-05, "loss": 1.1543, "step": 18770 }, { "epoch": 1.1188461079985697, "grad_norm": 2.8469064235687256, "learning_rate": 4.140866889382814e-05, "loss": 1.0111, "step": 18772 }, { "epoch": 1.1189653117177256, "grad_norm": 2.9575693607330322, "learning_rate": 4.13993527670212e-05, "loss": 1.1136, "step": 18774 }, { "epoch": 1.1190845154368816, "grad_norm": 3.338273525238037, "learning_rate": 4.139003694788884e-05, "loss": 1.186, "step": 18776 }, { "epoch": 1.1192037191560376, "grad_norm": 3.2790820598602295, "learning_rate": 4.138072143676434e-05, "loss": 1.0964, "step": 18778 }, { "epoch": 1.1193229228751937, "grad_norm": 3.4195501804351807, "learning_rate": 4.1371406233980896e-05, "loss": 1.168, "step": 18780 }, { "epoch": 1.1194421265943497, "grad_norm": 3.044800281524658, "learning_rate": 4.136209133987179e-05, "loss": 1.0862, "step": 18782 }, { "epoch": 1.1195613303135057, "grad_norm": 3.1635255813598633, "learning_rate": 4.135277675477024e-05, "loss": 1.2133, "step": 18784 }, { "epoch": 1.1196805340326619, "grad_norm": 3.013427495956421, "learning_rate": 4.134346247900943e-05, "loss": 1.2604, "step": 18786 }, { "epoch": 1.1197997377518178, "grad_norm": 3.18829607963562, "learning_rate": 4.133414851292259e-05, "loss": 1.2647, "step": 18788 }, { "epoch": 1.1199189414709738, "grad_norm": 3.2739927768707275, "learning_rate": 4.13248348568429e-05, "loss": 1.0994, "step": 18790 }, { "epoch": 1.12003814519013, "grad_norm": 3.140840768814087, "learning_rate": 4.1315521511103575e-05, "loss": 1.1968, "step": 18792 }, { "epoch": 1.120157348909286, "grad_norm": 3.5159428119659424, "learning_rate": 4.130620847603773e-05, "loss": 1.2024, "step": 18794 }, { "epoch": 1.120276552628442, "grad_norm": 3.1746277809143066, "learning_rate": 4.129689575197857e-05, "loss": 1.2037, "step": 18796 }, { "epoch": 1.1203957563475981, "grad_norm": 2.9611096382141113, "learning_rate": 4.1287583339259214e-05, "loss": 1.1154, "step": 18798 }, { "epoch": 1.120514960066754, "grad_norm": 3.137561082839966, "learning_rate": 4.1278271238212826e-05, "loss": 1.1343, "step": 18800 }, { "epoch": 1.12063416378591, "grad_norm": 3.063007354736328, "learning_rate": 4.126895944917252e-05, "loss": 1.1171, "step": 18802 }, { "epoch": 1.120753367505066, "grad_norm": 3.3742666244506836, "learning_rate": 4.1259647972471394e-05, "loss": 1.2449, "step": 18804 }, { "epoch": 1.1208725712242222, "grad_norm": 3.180894136428833, "learning_rate": 4.125033680844257e-05, "loss": 1.3125, "step": 18806 }, { "epoch": 1.1209917749433782, "grad_norm": 3.250792980194092, "learning_rate": 4.124102595741912e-05, "loss": 1.0478, "step": 18808 }, { "epoch": 1.1211109786625342, "grad_norm": 3.182344913482666, "learning_rate": 4.1231715419734175e-05, "loss": 1.2403, "step": 18810 }, { "epoch": 1.1212301823816904, "grad_norm": 3.360288143157959, "learning_rate": 4.122240519572074e-05, "loss": 1.1713, "step": 18812 }, { "epoch": 1.1213493861008463, "grad_norm": 3.157890796661377, "learning_rate": 4.121309528571191e-05, "loss": 1.1926, "step": 18814 }, { "epoch": 1.1214685898200023, "grad_norm": 3.3148086071014404, "learning_rate": 4.120378569004074e-05, "loss": 1.134, "step": 18816 }, { "epoch": 1.1215877935391585, "grad_norm": 3.127373218536377, "learning_rate": 4.1194476409040234e-05, "loss": 1.1679, "step": 18818 }, { "epoch": 1.1217069972583145, "grad_norm": 3.2553281784057617, "learning_rate": 4.1185167443043446e-05, "loss": 1.2123, "step": 18820 }, { "epoch": 1.1218262009774704, "grad_norm": 3.039255142211914, "learning_rate": 4.117585879238336e-05, "loss": 1.2292, "step": 18822 }, { "epoch": 1.1219454046966266, "grad_norm": 3.4275450706481934, "learning_rate": 4.1166550457393024e-05, "loss": 1.2379, "step": 18824 }, { "epoch": 1.1220646084157826, "grad_norm": 3.380028009414673, "learning_rate": 4.1157242438405376e-05, "loss": 1.1538, "step": 18826 }, { "epoch": 1.1221838121349386, "grad_norm": 3.273376226425171, "learning_rate": 4.114793473575344e-05, "loss": 1.2572, "step": 18828 }, { "epoch": 1.1223030158540945, "grad_norm": 2.965040683746338, "learning_rate": 4.113862734977017e-05, "loss": 1.1274, "step": 18830 }, { "epoch": 1.1224222195732507, "grad_norm": 2.8342833518981934, "learning_rate": 4.112932028078848e-05, "loss": 1.2639, "step": 18832 }, { "epoch": 1.1225414232924067, "grad_norm": 3.3185415267944336, "learning_rate": 4.1120013529141396e-05, "loss": 1.1446, "step": 18834 }, { "epoch": 1.1226606270115627, "grad_norm": 3.1149613857269287, "learning_rate": 4.1110707095161785e-05, "loss": 1.1929, "step": 18836 }, { "epoch": 1.1227798307307189, "grad_norm": 2.9667413234710693, "learning_rate": 4.110140097918261e-05, "loss": 1.1426, "step": 18838 }, { "epoch": 1.1228990344498748, "grad_norm": 3.1188266277313232, "learning_rate": 4.109209518153675e-05, "loss": 1.1635, "step": 18840 }, { "epoch": 1.1230182381690308, "grad_norm": 3.430084466934204, "learning_rate": 4.1082789702557155e-05, "loss": 1.0504, "step": 18842 }, { "epoch": 1.123137441888187, "grad_norm": 3.2401390075683594, "learning_rate": 4.107348454257666e-05, "loss": 1.2958, "step": 18844 }, { "epoch": 1.123256645607343, "grad_norm": 3.489659547805786, "learning_rate": 4.106417970192816e-05, "loss": 1.1558, "step": 18846 }, { "epoch": 1.123375849326499, "grad_norm": 3.125960111618042, "learning_rate": 4.105487518094455e-05, "loss": 1.1379, "step": 18848 }, { "epoch": 1.1234950530456551, "grad_norm": 3.23531436920166, "learning_rate": 4.104557097995864e-05, "loss": 1.2624, "step": 18850 }, { "epoch": 1.123614256764811, "grad_norm": 3.4247751235961914, "learning_rate": 4.10362670993033e-05, "loss": 1.1936, "step": 18852 }, { "epoch": 1.123733460483967, "grad_norm": 3.072096109390259, "learning_rate": 4.1026963539311343e-05, "loss": 1.2146, "step": 18854 }, { "epoch": 1.123852664203123, "grad_norm": 3.1239566802978516, "learning_rate": 4.101766030031562e-05, "loss": 1.1247, "step": 18856 }, { "epoch": 1.1239718679222792, "grad_norm": 3.0956881046295166, "learning_rate": 4.100835738264891e-05, "loss": 1.1414, "step": 18858 }, { "epoch": 1.1240910716414352, "grad_norm": 3.171262741088867, "learning_rate": 4.099905478664402e-05, "loss": 1.2172, "step": 18860 }, { "epoch": 1.1242102753605911, "grad_norm": 3.074444055557251, "learning_rate": 4.098975251263375e-05, "loss": 1.1842, "step": 18862 }, { "epoch": 1.1243294790797473, "grad_norm": 3.1347532272338867, "learning_rate": 4.0980450560950844e-05, "loss": 1.2647, "step": 18864 }, { "epoch": 1.1244486827989033, "grad_norm": 3.288996458053589, "learning_rate": 4.0971148931928094e-05, "loss": 1.2462, "step": 18866 }, { "epoch": 1.1245678865180593, "grad_norm": 2.990246295928955, "learning_rate": 4.096184762589822e-05, "loss": 1.1556, "step": 18868 }, { "epoch": 1.1246870902372155, "grad_norm": 2.737293004989624, "learning_rate": 4.0952546643194e-05, "loss": 1.0654, "step": 18870 }, { "epoch": 1.1248062939563714, "grad_norm": 3.030027389526367, "learning_rate": 4.094324598414812e-05, "loss": 1.0671, "step": 18872 }, { "epoch": 1.1249254976755274, "grad_norm": 2.960914373397827, "learning_rate": 4.093394564909335e-05, "loss": 1.1205, "step": 18874 }, { "epoch": 1.1250447013946836, "grad_norm": 3.516536235809326, "learning_rate": 4.092464563836235e-05, "loss": 1.2579, "step": 18876 }, { "epoch": 1.1251639051138396, "grad_norm": 3.1025495529174805, "learning_rate": 4.091534595228781e-05, "loss": 1.3323, "step": 18878 }, { "epoch": 1.1252831088329955, "grad_norm": 3.086474657058716, "learning_rate": 4.090604659120246e-05, "loss": 1.1657, "step": 18880 }, { "epoch": 1.1254023125521515, "grad_norm": 3.2866530418395996, "learning_rate": 4.089674755543891e-05, "loss": 1.1348, "step": 18882 }, { "epoch": 1.1255215162713077, "grad_norm": 3.346238374710083, "learning_rate": 4.088744884532986e-05, "loss": 1.195, "step": 18884 }, { "epoch": 1.1256407199904637, "grad_norm": 2.893181324005127, "learning_rate": 4.087815046120792e-05, "loss": 1.1242, "step": 18886 }, { "epoch": 1.1257599237096199, "grad_norm": 3.4777302742004395, "learning_rate": 4.0868852403405783e-05, "loss": 1.3278, "step": 18888 }, { "epoch": 1.1258791274287758, "grad_norm": 3.4477005004882812, "learning_rate": 4.0859554672256025e-05, "loss": 1.4085, "step": 18890 }, { "epoch": 1.1259983311479318, "grad_norm": 3.3962936401367188, "learning_rate": 4.085025726809126e-05, "loss": 1.2327, "step": 18892 }, { "epoch": 1.1261175348670878, "grad_norm": 3.023510217666626, "learning_rate": 4.084096019124412e-05, "loss": 1.1262, "step": 18894 }, { "epoch": 1.126236738586244, "grad_norm": 2.9877593517303467, "learning_rate": 4.083166344204714e-05, "loss": 1.1468, "step": 18896 }, { "epoch": 1.1263559423054, "grad_norm": 3.178927183151245, "learning_rate": 4.0822367020832945e-05, "loss": 1.2264, "step": 18898 }, { "epoch": 1.126475146024556, "grad_norm": 3.3082115650177, "learning_rate": 4.081307092793407e-05, "loss": 1.379, "step": 18900 }, { "epoch": 1.126594349743712, "grad_norm": 2.9437806606292725, "learning_rate": 4.08037751636831e-05, "loss": 1.159, "step": 18902 }, { "epoch": 1.126713553462868, "grad_norm": 3.433178424835205, "learning_rate": 4.079447972841256e-05, "loss": 1.5094, "step": 18904 }, { "epoch": 1.126832757182024, "grad_norm": 3.437917947769165, "learning_rate": 4.0785184622454955e-05, "loss": 1.06, "step": 18906 }, { "epoch": 1.12695196090118, "grad_norm": 3.341301679611206, "learning_rate": 4.077588984614283e-05, "loss": 1.204, "step": 18908 }, { "epoch": 1.1270711646203362, "grad_norm": 3.0266687870025635, "learning_rate": 4.076659539980869e-05, "loss": 1.2298, "step": 18910 }, { "epoch": 1.1271903683394922, "grad_norm": 2.9244391918182373, "learning_rate": 4.075730128378503e-05, "loss": 1.0512, "step": 18912 }, { "epoch": 1.1273095720586483, "grad_norm": 3.2185182571411133, "learning_rate": 4.074800749840432e-05, "loss": 1.1722, "step": 18914 }, { "epoch": 1.1274287757778043, "grad_norm": 3.2559802532196045, "learning_rate": 4.073871404399904e-05, "loss": 1.0415, "step": 18916 }, { "epoch": 1.1275479794969603, "grad_norm": 3.262612819671631, "learning_rate": 4.072942092090163e-05, "loss": 1.2829, "step": 18918 }, { "epoch": 1.1276671832161163, "grad_norm": 3.3563132286071777, "learning_rate": 4.072012812944458e-05, "loss": 1.1827, "step": 18920 }, { "epoch": 1.1277863869352724, "grad_norm": 3.4366633892059326, "learning_rate": 4.071083566996028e-05, "loss": 1.38, "step": 18922 }, { "epoch": 1.1279055906544284, "grad_norm": 3.248866558074951, "learning_rate": 4.0701543542781166e-05, "loss": 1.3496, "step": 18924 }, { "epoch": 1.1280247943735844, "grad_norm": 3.1578850746154785, "learning_rate": 4.069225174823968e-05, "loss": 1.211, "step": 18926 }, { "epoch": 1.1281439980927406, "grad_norm": 3.2555177211761475, "learning_rate": 4.0682960286668164e-05, "loss": 1.1177, "step": 18928 }, { "epoch": 1.1282632018118965, "grad_norm": 3.3918018341064453, "learning_rate": 4.0673669158399056e-05, "loss": 1.283, "step": 18930 }, { "epoch": 1.1283824055310525, "grad_norm": 2.798682451248169, "learning_rate": 4.066437836376469e-05, "loss": 1.0829, "step": 18932 }, { "epoch": 1.1285016092502085, "grad_norm": 2.985614776611328, "learning_rate": 4.065508790309747e-05, "loss": 1.1442, "step": 18934 }, { "epoch": 1.1286208129693647, "grad_norm": 3.146247386932373, "learning_rate": 4.064579777672974e-05, "loss": 1.2148, "step": 18936 }, { "epoch": 1.1287400166885206, "grad_norm": 2.9603018760681152, "learning_rate": 4.063650798499381e-05, "loss": 1.0958, "step": 18938 }, { "epoch": 1.1288592204076768, "grad_norm": 3.461049795150757, "learning_rate": 4.062721852822203e-05, "loss": 1.2998, "step": 18940 }, { "epoch": 1.1289784241268328, "grad_norm": 2.870443344116211, "learning_rate": 4.06179294067467e-05, "loss": 0.9914, "step": 18942 }, { "epoch": 1.1290976278459888, "grad_norm": 3.3732848167419434, "learning_rate": 4.060864062090016e-05, "loss": 1.119, "step": 18944 }, { "epoch": 1.1292168315651447, "grad_norm": 3.409330368041992, "learning_rate": 4.059935217101466e-05, "loss": 1.2919, "step": 18946 }, { "epoch": 1.129336035284301, "grad_norm": 2.92379093170166, "learning_rate": 4.05900640574225e-05, "loss": 1.1215, "step": 18948 }, { "epoch": 1.129455239003457, "grad_norm": 3.258711338043213, "learning_rate": 4.058077628045596e-05, "loss": 1.1986, "step": 18950 }, { "epoch": 1.1295744427226129, "grad_norm": 3.4590728282928467, "learning_rate": 4.0571488840447256e-05, "loss": 1.1656, "step": 18952 }, { "epoch": 1.129693646441769, "grad_norm": 3.3497910499572754, "learning_rate": 4.056220173772867e-05, "loss": 1.1907, "step": 18954 }, { "epoch": 1.129812850160925, "grad_norm": 3.3386776447296143, "learning_rate": 4.05529149726324e-05, "loss": 1.2426, "step": 18956 }, { "epoch": 1.129932053880081, "grad_norm": 3.1749823093414307, "learning_rate": 4.054362854549072e-05, "loss": 1.2039, "step": 18958 }, { "epoch": 1.1300512575992372, "grad_norm": 3.3353590965270996, "learning_rate": 4.053434245663577e-05, "loss": 1.1696, "step": 18960 }, { "epoch": 1.1301704613183932, "grad_norm": 3.158798933029175, "learning_rate": 4.052505670639979e-05, "loss": 1.2372, "step": 18962 }, { "epoch": 1.1302896650375491, "grad_norm": 3.28739333152771, "learning_rate": 4.051577129511496e-05, "loss": 1.2516, "step": 18964 }, { "epoch": 1.1304088687567053, "grad_norm": 3.357590675354004, "learning_rate": 4.050648622311341e-05, "loss": 1.1175, "step": 18966 }, { "epoch": 1.1305280724758613, "grad_norm": 3.185281276702881, "learning_rate": 4.049720149072736e-05, "loss": 1.1246, "step": 18968 }, { "epoch": 1.1306472761950173, "grad_norm": 3.0843935012817383, "learning_rate": 4.04879170982889e-05, "loss": 1.0981, "step": 18970 }, { "epoch": 1.1307664799141732, "grad_norm": 3.4766314029693604, "learning_rate": 4.047863304613021e-05, "loss": 1.3052, "step": 18972 }, { "epoch": 1.1308856836333294, "grad_norm": 3.4445061683654785, "learning_rate": 4.046934933458338e-05, "loss": 1.2118, "step": 18974 }, { "epoch": 1.1310048873524854, "grad_norm": 2.9415996074676514, "learning_rate": 4.046006596398055e-05, "loss": 1.1246, "step": 18976 }, { "epoch": 1.1311240910716414, "grad_norm": 2.9373183250427246, "learning_rate": 4.045078293465379e-05, "loss": 1.3129, "step": 18978 }, { "epoch": 1.1312432947907975, "grad_norm": 2.956092357635498, "learning_rate": 4.044150024693519e-05, "loss": 1.215, "step": 18980 }, { "epoch": 1.1313624985099535, "grad_norm": 3.2732529640197754, "learning_rate": 4.043221790115686e-05, "loss": 1.1937, "step": 18982 }, { "epoch": 1.1314817022291095, "grad_norm": 3.2109200954437256, "learning_rate": 4.0422935897650794e-05, "loss": 1.1997, "step": 18984 }, { "epoch": 1.1316009059482657, "grad_norm": 3.1463303565979004, "learning_rate": 4.04136542367491e-05, "loss": 1.1789, "step": 18986 }, { "epoch": 1.1317201096674216, "grad_norm": 3.211155414581299, "learning_rate": 4.040437291878377e-05, "loss": 1.1263, "step": 18988 }, { "epoch": 1.1318393133865776, "grad_norm": 2.6177077293395996, "learning_rate": 4.039509194408689e-05, "loss": 1.0744, "step": 18990 }, { "epoch": 1.1319585171057338, "grad_norm": 3.0641326904296875, "learning_rate": 4.03858113129904e-05, "loss": 1.0988, "step": 18992 }, { "epoch": 1.1320777208248898, "grad_norm": 2.9738194942474365, "learning_rate": 4.0376531025826346e-05, "loss": 1.3839, "step": 18994 }, { "epoch": 1.1321969245440457, "grad_norm": 3.04799747467041, "learning_rate": 4.036725108292672e-05, "loss": 1.188, "step": 18996 }, { "epoch": 1.1323161282632017, "grad_norm": 3.130894422531128, "learning_rate": 4.0357971484623446e-05, "loss": 1.2656, "step": 18998 }, { "epoch": 1.132435331982358, "grad_norm": 3.3205370903015137, "learning_rate": 4.034869223124855e-05, "loss": 1.31, "step": 19000 }, { "epoch": 1.1325545357015139, "grad_norm": 2.9933691024780273, "learning_rate": 4.0339413323133946e-05, "loss": 1.0417, "step": 19002 }, { "epoch": 1.1326737394206698, "grad_norm": 3.6729538440704346, "learning_rate": 4.033013476061158e-05, "loss": 1.298, "step": 19004 }, { "epoch": 1.132792943139826, "grad_norm": 3.204357147216797, "learning_rate": 4.032085654401337e-05, "loss": 1.2188, "step": 19006 }, { "epoch": 1.132912146858982, "grad_norm": 3.437366247177124, "learning_rate": 4.031157867367126e-05, "loss": 1.3486, "step": 19008 }, { "epoch": 1.133031350578138, "grad_norm": 3.053231716156006, "learning_rate": 4.030230114991712e-05, "loss": 1.0574, "step": 19010 }, { "epoch": 1.1331505542972942, "grad_norm": 3.346543073654175, "learning_rate": 4.029302397308283e-05, "loss": 1.3793, "step": 19012 }, { "epoch": 1.1332697580164501, "grad_norm": 3.2033190727233887, "learning_rate": 4.0283747143500314e-05, "loss": 1.2075, "step": 19014 }, { "epoch": 1.133388961735606, "grad_norm": 3.1488711833953857, "learning_rate": 4.0274470661501385e-05, "loss": 1.1749, "step": 19016 }, { "epoch": 1.1335081654547623, "grad_norm": 3.0813820362091064, "learning_rate": 4.0265194527417925e-05, "loss": 1.1186, "step": 19018 }, { "epoch": 1.1336273691739183, "grad_norm": 2.8332154750823975, "learning_rate": 4.0255918741581754e-05, "loss": 1.1909, "step": 19020 }, { "epoch": 1.1337465728930742, "grad_norm": 3.1232423782348633, "learning_rate": 4.0246643304324734e-05, "loss": 1.3824, "step": 19022 }, { "epoch": 1.1338657766122302, "grad_norm": 3.3103392124176025, "learning_rate": 4.0237368215978624e-05, "loss": 1.1254, "step": 19024 }, { "epoch": 1.1339849803313864, "grad_norm": 3.210761308670044, "learning_rate": 4.022809347687527e-05, "loss": 1.0219, "step": 19026 }, { "epoch": 1.1341041840505424, "grad_norm": 2.770484209060669, "learning_rate": 4.021881908734646e-05, "loss": 1.2167, "step": 19028 }, { "epoch": 1.1342233877696983, "grad_norm": 3.1320202350616455, "learning_rate": 4.0209545047723915e-05, "loss": 1.1015, "step": 19030 }, { "epoch": 1.1343425914888545, "grad_norm": 3.3089609146118164, "learning_rate": 4.0200271358339487e-05, "loss": 1.3643, "step": 19032 }, { "epoch": 1.1344617952080105, "grad_norm": 3.029731273651123, "learning_rate": 4.019099801952485e-05, "loss": 1.1651, "step": 19034 }, { "epoch": 1.1345809989271665, "grad_norm": 3.1233129501342773, "learning_rate": 4.0181725031611795e-05, "loss": 1.2074, "step": 19036 }, { "epoch": 1.1347002026463227, "grad_norm": 3.0044455528259277, "learning_rate": 4.017245239493201e-05, "loss": 1.3003, "step": 19038 }, { "epoch": 1.1348194063654786, "grad_norm": 2.9645495414733887, "learning_rate": 4.016318010981725e-05, "loss": 1.2691, "step": 19040 }, { "epoch": 1.1349386100846346, "grad_norm": 2.7584068775177, "learning_rate": 4.0153908176599184e-05, "loss": 1.0744, "step": 19042 }, { "epoch": 1.1350578138037908, "grad_norm": 2.720879554748535, "learning_rate": 4.01446365956095e-05, "loss": 1.2791, "step": 19044 }, { "epoch": 1.1351770175229468, "grad_norm": 3.6442368030548096, "learning_rate": 4.013536536717991e-05, "loss": 1.1944, "step": 19046 }, { "epoch": 1.1352962212421027, "grad_norm": 3.3923470973968506, "learning_rate": 4.012609449164203e-05, "loss": 1.3106, "step": 19048 }, { "epoch": 1.1354154249612587, "grad_norm": 3.1930978298187256, "learning_rate": 4.0116823969327535e-05, "loss": 1.154, "step": 19050 }, { "epoch": 1.1355346286804149, "grad_norm": 3.34749436378479, "learning_rate": 4.010755380056806e-05, "loss": 1.306, "step": 19052 }, { "epoch": 1.1356538323995709, "grad_norm": 3.196274518966675, "learning_rate": 4.009828398569525e-05, "loss": 1.1745, "step": 19054 }, { "epoch": 1.1357730361187268, "grad_norm": 3.278008222579956, "learning_rate": 4.008901452504069e-05, "loss": 1.15, "step": 19056 }, { "epoch": 1.135892239837883, "grad_norm": 3.089272975921631, "learning_rate": 4.007974541893598e-05, "loss": 1.2177, "step": 19058 }, { "epoch": 1.136011443557039, "grad_norm": 3.416923999786377, "learning_rate": 4.007047666771274e-05, "loss": 1.2823, "step": 19060 }, { "epoch": 1.136130647276195, "grad_norm": 3.1333060264587402, "learning_rate": 4.0061208271702505e-05, "loss": 1.1667, "step": 19062 }, { "epoch": 1.1362498509953511, "grad_norm": 3.3383498191833496, "learning_rate": 4.005194023123686e-05, "loss": 1.2858, "step": 19064 }, { "epoch": 1.136369054714507, "grad_norm": 3.155750274658203, "learning_rate": 4.004267254664734e-05, "loss": 1.2047, "step": 19066 }, { "epoch": 1.136488258433663, "grad_norm": 3.4596922397613525, "learning_rate": 4.0033405218265505e-05, "loss": 1.3528, "step": 19068 }, { "epoch": 1.1366074621528193, "grad_norm": 2.9067904949188232, "learning_rate": 4.0024138246422876e-05, "loss": 1.1093, "step": 19070 }, { "epoch": 1.1367266658719752, "grad_norm": 3.1483469009399414, "learning_rate": 4.0014871631450935e-05, "loss": 1.0086, "step": 19072 }, { "epoch": 1.1368458695911312, "grad_norm": 3.2935285568237305, "learning_rate": 4.0005605373681204e-05, "loss": 1.261, "step": 19074 }, { "epoch": 1.1369650733102872, "grad_norm": 3.3334968090057373, "learning_rate": 3.999633947344516e-05, "loss": 1.1688, "step": 19076 }, { "epoch": 1.1370842770294434, "grad_norm": 2.9945859909057617, "learning_rate": 3.998707393107429e-05, "loss": 1.3331, "step": 19078 }, { "epoch": 1.1372034807485993, "grad_norm": 3.070683717727661, "learning_rate": 3.9977808746900037e-05, "loss": 1.0424, "step": 19080 }, { "epoch": 1.1373226844677553, "grad_norm": 3.4168918132781982, "learning_rate": 3.9968543921253856e-05, "loss": 1.1148, "step": 19082 }, { "epoch": 1.1374418881869115, "grad_norm": 3.1922643184661865, "learning_rate": 3.995927945446718e-05, "loss": 1.1902, "step": 19084 }, { "epoch": 1.1375610919060675, "grad_norm": 2.9758613109588623, "learning_rate": 3.995001534687145e-05, "loss": 1.1395, "step": 19086 }, { "epoch": 1.1376802956252234, "grad_norm": 3.2321674823760986, "learning_rate": 3.9940751598798044e-05, "loss": 1.1944, "step": 19088 }, { "epoch": 1.1377994993443796, "grad_norm": 3.0994691848754883, "learning_rate": 3.993148821057836e-05, "loss": 1.2846, "step": 19090 }, { "epoch": 1.1379187030635356, "grad_norm": 3.505521535873413, "learning_rate": 3.992222518254381e-05, "loss": 1.1757, "step": 19092 }, { "epoch": 1.1380379067826916, "grad_norm": 3.1766223907470703, "learning_rate": 3.991296251502573e-05, "loss": 1.3002, "step": 19094 }, { "epoch": 1.1381571105018478, "grad_norm": 3.3740756511688232, "learning_rate": 3.99037002083555e-05, "loss": 1.1809, "step": 19096 }, { "epoch": 1.1382763142210037, "grad_norm": 3.1186301708221436, "learning_rate": 3.989443826286445e-05, "loss": 1.1134, "step": 19098 }, { "epoch": 1.1383955179401597, "grad_norm": 3.1686651706695557, "learning_rate": 3.988517667888393e-05, "loss": 1.0691, "step": 19100 }, { "epoch": 1.1385147216593157, "grad_norm": 3.1441853046417236, "learning_rate": 3.9875915456745254e-05, "loss": 1.2704, "step": 19102 }, { "epoch": 1.1386339253784719, "grad_norm": 3.015497922897339, "learning_rate": 3.986665459677971e-05, "loss": 1.3409, "step": 19104 }, { "epoch": 1.1387531290976278, "grad_norm": 2.9213242530822754, "learning_rate": 3.9857394099318603e-05, "loss": 1.1646, "step": 19106 }, { "epoch": 1.1388723328167838, "grad_norm": 3.1030545234680176, "learning_rate": 3.98481339646932e-05, "loss": 1.1844, "step": 19108 }, { "epoch": 1.13899153653594, "grad_norm": 3.284888505935669, "learning_rate": 3.98388741932348e-05, "loss": 1.3302, "step": 19110 }, { "epoch": 1.139110740255096, "grad_norm": 3.3890559673309326, "learning_rate": 3.9829614785274624e-05, "loss": 1.1779, "step": 19112 }, { "epoch": 1.139229943974252, "grad_norm": 3.347074270248413, "learning_rate": 3.9820355741143935e-05, "loss": 1.1816, "step": 19114 }, { "epoch": 1.1393491476934081, "grad_norm": 3.1653919219970703, "learning_rate": 3.9811097061173954e-05, "loss": 1.2089, "step": 19116 }, { "epoch": 1.139468351412564, "grad_norm": 2.6841788291931152, "learning_rate": 3.9801838745695877e-05, "loss": 1.0524, "step": 19118 }, { "epoch": 1.13958755513172, "grad_norm": 2.9932618141174316, "learning_rate": 3.979258079504092e-05, "loss": 1.2695, "step": 19120 }, { "epoch": 1.1397067588508762, "grad_norm": 3.273310422897339, "learning_rate": 3.978332320954027e-05, "loss": 1.1305, "step": 19122 }, { "epoch": 1.1398259625700322, "grad_norm": 3.2503502368927, "learning_rate": 3.9774065989525126e-05, "loss": 1.153, "step": 19124 }, { "epoch": 1.1399451662891882, "grad_norm": 3.193352699279785, "learning_rate": 3.9764809135326606e-05, "loss": 1.2712, "step": 19126 }, { "epoch": 1.1400643700083442, "grad_norm": 3.188122510910034, "learning_rate": 3.97555526472759e-05, "loss": 1.2051, "step": 19128 }, { "epoch": 1.1401835737275003, "grad_norm": 2.9307632446289062, "learning_rate": 3.974629652570413e-05, "loss": 1.363, "step": 19130 }, { "epoch": 1.1403027774466563, "grad_norm": 3.2837719917297363, "learning_rate": 3.973704077094238e-05, "loss": 1.1566, "step": 19132 }, { "epoch": 1.1404219811658123, "grad_norm": 3.374230146408081, "learning_rate": 3.9727785383321835e-05, "loss": 1.1141, "step": 19134 }, { "epoch": 1.1405411848849685, "grad_norm": 3.3208580017089844, "learning_rate": 3.971853036317353e-05, "loss": 1.2772, "step": 19136 }, { "epoch": 1.1406603886041244, "grad_norm": 3.0948939323425293, "learning_rate": 3.970927571082859e-05, "loss": 1.087, "step": 19138 }, { "epoch": 1.1407795923232804, "grad_norm": 3.179791212081909, "learning_rate": 3.9700021426618044e-05, "loss": 1.103, "step": 19140 }, { "epoch": 1.1408987960424366, "grad_norm": 3.4027552604675293, "learning_rate": 3.969076751087299e-05, "loss": 1.2925, "step": 19142 }, { "epoch": 1.1410179997615926, "grad_norm": 2.9620635509490967, "learning_rate": 3.968151396392445e-05, "loss": 1.147, "step": 19144 }, { "epoch": 1.1411372034807485, "grad_norm": 3.2682974338531494, "learning_rate": 3.967226078610347e-05, "loss": 1.1725, "step": 19146 }, { "epoch": 1.1412564071999047, "grad_norm": 3.1486353874206543, "learning_rate": 3.966300797774105e-05, "loss": 1.2482, "step": 19148 }, { "epoch": 1.1413756109190607, "grad_norm": 3.225559711456299, "learning_rate": 3.96537555391682e-05, "loss": 1.259, "step": 19150 }, { "epoch": 1.1414948146382167, "grad_norm": 3.315182685852051, "learning_rate": 3.964450347071591e-05, "loss": 1.1393, "step": 19152 }, { "epoch": 1.1416140183573726, "grad_norm": 2.8140017986297607, "learning_rate": 3.963525177271516e-05, "loss": 1.1593, "step": 19154 }, { "epoch": 1.1417332220765288, "grad_norm": 3.3489582538604736, "learning_rate": 3.9626000445496934e-05, "loss": 1.1996, "step": 19156 }, { "epoch": 1.1418524257956848, "grad_norm": 3.106442451477051, "learning_rate": 3.961674948939215e-05, "loss": 1.2216, "step": 19158 }, { "epoch": 1.1419716295148408, "grad_norm": 2.9058306217193604, "learning_rate": 3.9607498904731765e-05, "loss": 1.189, "step": 19160 }, { "epoch": 1.142090833233997, "grad_norm": 3.1349503993988037, "learning_rate": 3.959824869184672e-05, "loss": 1.0353, "step": 19162 }, { "epoch": 1.142210036953153, "grad_norm": 3.0974411964416504, "learning_rate": 3.958899885106788e-05, "loss": 1.1309, "step": 19164 }, { "epoch": 1.142329240672309, "grad_norm": 3.270280122756958, "learning_rate": 3.9579749382726197e-05, "loss": 1.2466, "step": 19166 }, { "epoch": 1.142448444391465, "grad_norm": 3.0725228786468506, "learning_rate": 3.957050028715251e-05, "loss": 1.3557, "step": 19168 }, { "epoch": 1.142567648110621, "grad_norm": 3.0406131744384766, "learning_rate": 3.956125156467772e-05, "loss": 1.0502, "step": 19170 }, { "epoch": 1.142686851829777, "grad_norm": 3.189168691635132, "learning_rate": 3.955200321563267e-05, "loss": 1.1317, "step": 19172 }, { "epoch": 1.1428060555489332, "grad_norm": 3.4360949993133545, "learning_rate": 3.9542755240348235e-05, "loss": 1.1943, "step": 19174 }, { "epoch": 1.1429252592680892, "grad_norm": 2.9398598670959473, "learning_rate": 3.953350763915521e-05, "loss": 1.1329, "step": 19176 }, { "epoch": 1.1430444629872452, "grad_norm": 3.008194923400879, "learning_rate": 3.9524260412384415e-05, "loss": 1.0866, "step": 19178 }, { "epoch": 1.1431636667064011, "grad_norm": 3.253340244293213, "learning_rate": 3.95150135603667e-05, "loss": 1.2132, "step": 19180 }, { "epoch": 1.1432828704255573, "grad_norm": 3.344879150390625, "learning_rate": 3.9505767083432786e-05, "loss": 1.2027, "step": 19182 }, { "epoch": 1.1434020741447133, "grad_norm": 3.027362823486328, "learning_rate": 3.949652098191352e-05, "loss": 1.2802, "step": 19184 }, { "epoch": 1.1435212778638693, "grad_norm": 3.331888437271118, "learning_rate": 3.948727525613961e-05, "loss": 1.2434, "step": 19186 }, { "epoch": 1.1436404815830254, "grad_norm": 3.1185243129730225, "learning_rate": 3.9478029906441856e-05, "loss": 1.1377, "step": 19188 }, { "epoch": 1.1437596853021814, "grad_norm": 2.984264850616455, "learning_rate": 3.946878493315096e-05, "loss": 1.2397, "step": 19190 }, { "epoch": 1.1438788890213374, "grad_norm": 2.8868937492370605, "learning_rate": 3.945954033659765e-05, "loss": 1.1724, "step": 19192 }, { "epoch": 1.1439980927404936, "grad_norm": 3.0981993675231934, "learning_rate": 3.945029611711268e-05, "loss": 1.1274, "step": 19194 }, { "epoch": 1.1441172964596495, "grad_norm": 3.1445999145507812, "learning_rate": 3.944105227502667e-05, "loss": 1.2086, "step": 19196 }, { "epoch": 1.1442365001788055, "grad_norm": 3.437983989715576, "learning_rate": 3.943180881067038e-05, "loss": 1.092, "step": 19198 }, { "epoch": 1.1443557038979617, "grad_norm": 2.8990087509155273, "learning_rate": 3.9422565724374427e-05, "loss": 1.1992, "step": 19200 }, { "epoch": 1.1444749076171177, "grad_norm": 3.121332883834839, "learning_rate": 3.941332301646949e-05, "loss": 1.1424, "step": 19202 }, { "epoch": 1.1445941113362736, "grad_norm": 2.997218370437622, "learning_rate": 3.9404080687286216e-05, "loss": 1.3405, "step": 19204 }, { "epoch": 1.1447133150554296, "grad_norm": 3.0754759311676025, "learning_rate": 3.9394838737155245e-05, "loss": 1.2487, "step": 19206 }, { "epoch": 1.1448325187745858, "grad_norm": 3.400775909423828, "learning_rate": 3.938559716640716e-05, "loss": 1.1803, "step": 19208 }, { "epoch": 1.1449517224937418, "grad_norm": 3.200028657913208, "learning_rate": 3.937635597537258e-05, "loss": 1.0648, "step": 19210 }, { "epoch": 1.145070926212898, "grad_norm": 3.16560435295105, "learning_rate": 3.936711516438211e-05, "loss": 1.082, "step": 19212 }, { "epoch": 1.145190129932054, "grad_norm": 3.1589763164520264, "learning_rate": 3.935787473376629e-05, "loss": 1.151, "step": 19214 }, { "epoch": 1.14530933365121, "grad_norm": 3.2190709114074707, "learning_rate": 3.934863468385572e-05, "loss": 1.1834, "step": 19216 }, { "epoch": 1.1454285373703659, "grad_norm": 3.1321280002593994, "learning_rate": 3.9339395014980914e-05, "loss": 1.1335, "step": 19218 }, { "epoch": 1.145547741089522, "grad_norm": 3.1073265075683594, "learning_rate": 3.933015572747244e-05, "loss": 1.1113, "step": 19220 }, { "epoch": 1.145666944808678, "grad_norm": 2.99654483795166, "learning_rate": 3.93209168216608e-05, "loss": 1.1586, "step": 19222 }, { "epoch": 1.145786148527834, "grad_norm": 3.0879461765289307, "learning_rate": 3.931167829787649e-05, "loss": 1.0707, "step": 19224 }, { "epoch": 1.1459053522469902, "grad_norm": 3.3118271827697754, "learning_rate": 3.9302440156450036e-05, "loss": 1.1189, "step": 19226 }, { "epoch": 1.1460245559661462, "grad_norm": 3.272780656814575, "learning_rate": 3.929320239771187e-05, "loss": 1.2649, "step": 19228 }, { "epoch": 1.1461437596853021, "grad_norm": 3.267263412475586, "learning_rate": 3.928396502199252e-05, "loss": 1.2874, "step": 19230 }, { "epoch": 1.146262963404458, "grad_norm": 2.9874942302703857, "learning_rate": 3.927472802962238e-05, "loss": 1.2688, "step": 19232 }, { "epoch": 1.1463821671236143, "grad_norm": 3.382948160171509, "learning_rate": 3.9265491420931926e-05, "loss": 1.201, "step": 19234 }, { "epoch": 1.1465013708427703, "grad_norm": 3.149808406829834, "learning_rate": 3.925625519625159e-05, "loss": 1.1854, "step": 19236 }, { "epoch": 1.1466205745619265, "grad_norm": 3.0572831630706787, "learning_rate": 3.924701935591173e-05, "loss": 1.3073, "step": 19238 }, { "epoch": 1.1467397782810824, "grad_norm": 3.2336699962615967, "learning_rate": 3.92377839002428e-05, "loss": 1.1466, "step": 19240 }, { "epoch": 1.1468589820002384, "grad_norm": 3.4354162216186523, "learning_rate": 3.922854882957515e-05, "loss": 1.2727, "step": 19242 }, { "epoch": 1.1469781857193944, "grad_norm": 3.285104751586914, "learning_rate": 3.921931414423918e-05, "loss": 1.2934, "step": 19244 }, { "epoch": 1.1470973894385506, "grad_norm": 3.5714621543884277, "learning_rate": 3.92100798445652e-05, "loss": 1.1865, "step": 19246 }, { "epoch": 1.1472165931577065, "grad_norm": 2.8664495944976807, "learning_rate": 3.920084593088361e-05, "loss": 1.2246, "step": 19248 }, { "epoch": 1.1473357968768625, "grad_norm": 3.264364719390869, "learning_rate": 3.9191612403524686e-05, "loss": 1.1038, "step": 19250 }, { "epoch": 1.1474550005960187, "grad_norm": 2.974111318588257, "learning_rate": 3.9182379262818794e-05, "loss": 1.1778, "step": 19252 }, { "epoch": 1.1475742043151747, "grad_norm": 3.1236956119537354, "learning_rate": 3.9173146509096195e-05, "loss": 1.1339, "step": 19254 }, { "epoch": 1.1476934080343306, "grad_norm": 3.1923916339874268, "learning_rate": 3.9163914142687184e-05, "loss": 1.3872, "step": 19256 }, { "epoch": 1.1478126117534866, "grad_norm": 3.2164032459259033, "learning_rate": 3.9154682163922055e-05, "loss": 1.1998, "step": 19258 }, { "epoch": 1.1479318154726428, "grad_norm": 2.9705419540405273, "learning_rate": 3.914545057313102e-05, "loss": 1.1479, "step": 19260 }, { "epoch": 1.1480510191917987, "grad_norm": 2.9191012382507324, "learning_rate": 3.913621937064439e-05, "loss": 1.239, "step": 19262 }, { "epoch": 1.148170222910955, "grad_norm": 3.4268174171447754, "learning_rate": 3.912698855679234e-05, "loss": 1.2914, "step": 19264 }, { "epoch": 1.148289426630111, "grad_norm": 3.1619510650634766, "learning_rate": 3.911775813190512e-05, "loss": 1.0803, "step": 19266 }, { "epoch": 1.1484086303492669, "grad_norm": 3.365584373474121, "learning_rate": 3.9108528096312935e-05, "loss": 1.2846, "step": 19268 }, { "epoch": 1.1485278340684228, "grad_norm": 3.0232298374176025, "learning_rate": 3.909929845034595e-05, "loss": 1.168, "step": 19270 }, { "epoch": 1.148647037787579, "grad_norm": 3.296602725982666, "learning_rate": 3.909006919433436e-05, "loss": 1.2339, "step": 19272 }, { "epoch": 1.148766241506735, "grad_norm": 3.1629858016967773, "learning_rate": 3.908084032860831e-05, "loss": 1.1519, "step": 19274 }, { "epoch": 1.148885445225891, "grad_norm": 3.077545642852783, "learning_rate": 3.9071611853497995e-05, "loss": 1.1279, "step": 19276 }, { "epoch": 1.1490046489450472, "grad_norm": 3.246647357940674, "learning_rate": 3.906238376933349e-05, "loss": 1.2087, "step": 19278 }, { "epoch": 1.1491238526642031, "grad_norm": 3.318250894546509, "learning_rate": 3.905315607644494e-05, "loss": 1.2162, "step": 19280 }, { "epoch": 1.149243056383359, "grad_norm": 3.14495849609375, "learning_rate": 3.904392877516248e-05, "loss": 1.3397, "step": 19282 }, { "epoch": 1.149362260102515, "grad_norm": 3.4939122200012207, "learning_rate": 3.903470186581614e-05, "loss": 1.2375, "step": 19284 }, { "epoch": 1.1494814638216713, "grad_norm": 3.148881196975708, "learning_rate": 3.902547534873604e-05, "loss": 1.1774, "step": 19286 }, { "epoch": 1.1496006675408272, "grad_norm": 3.0442793369293213, "learning_rate": 3.9016249224252225e-05, "loss": 1.2374, "step": 19288 }, { "epoch": 1.1497198712599834, "grad_norm": 2.9311771392822266, "learning_rate": 3.900702349269478e-05, "loss": 1.2672, "step": 19290 }, { "epoch": 1.1498390749791394, "grad_norm": 3.1484620571136475, "learning_rate": 3.899779815439369e-05, "loss": 1.118, "step": 19292 }, { "epoch": 1.1499582786982954, "grad_norm": 2.8754289150238037, "learning_rate": 3.898857320967903e-05, "loss": 1.2635, "step": 19294 }, { "epoch": 1.1500774824174513, "grad_norm": 3.0817043781280518, "learning_rate": 3.897934865888078e-05, "loss": 1.2411, "step": 19296 }, { "epoch": 1.1501966861366075, "grad_norm": 3.3224079608917236, "learning_rate": 3.8970124502328905e-05, "loss": 1.2112, "step": 19298 }, { "epoch": 1.1503158898557635, "grad_norm": 3.2766754627227783, "learning_rate": 3.896090074035345e-05, "loss": 1.1196, "step": 19300 }, { "epoch": 1.1504350935749195, "grad_norm": 2.7153453826904297, "learning_rate": 3.895167737328432e-05, "loss": 1.0857, "step": 19302 }, { "epoch": 1.1505542972940757, "grad_norm": 3.3541676998138428, "learning_rate": 3.8942454401451505e-05, "loss": 1.1843, "step": 19304 }, { "epoch": 1.1506735010132316, "grad_norm": 3.4038448333740234, "learning_rate": 3.893323182518492e-05, "loss": 1.227, "step": 19306 }, { "epoch": 1.1507927047323876, "grad_norm": 2.8251445293426514, "learning_rate": 3.892400964481451e-05, "loss": 1.082, "step": 19308 }, { "epoch": 1.1509119084515436, "grad_norm": 3.3286068439483643, "learning_rate": 3.891478786067016e-05, "loss": 1.2629, "step": 19310 }, { "epoch": 1.1510311121706998, "grad_norm": 2.9440722465515137, "learning_rate": 3.890556647308178e-05, "loss": 1.1586, "step": 19312 }, { "epoch": 1.1511503158898557, "grad_norm": 3.085946559906006, "learning_rate": 3.889634548237926e-05, "loss": 1.1556, "step": 19314 }, { "epoch": 1.151269519609012, "grad_norm": 2.9823250770568848, "learning_rate": 3.8887124888892436e-05, "loss": 1.1369, "step": 19316 }, { "epoch": 1.1513887233281679, "grad_norm": 3.17620849609375, "learning_rate": 3.8877904692951176e-05, "loss": 1.2439, "step": 19318 }, { "epoch": 1.1515079270473239, "grad_norm": 3.2548670768737793, "learning_rate": 3.886868489488532e-05, "loss": 1.0879, "step": 19320 }, { "epoch": 1.1516271307664798, "grad_norm": 3.1971518993377686, "learning_rate": 3.88594654950247e-05, "loss": 1.1944, "step": 19322 }, { "epoch": 1.151746334485636, "grad_norm": 3.454511880874634, "learning_rate": 3.8850246493699105e-05, "loss": 1.1168, "step": 19324 }, { "epoch": 1.151865538204792, "grad_norm": 2.840205192565918, "learning_rate": 3.8841027891238353e-05, "loss": 1.2121, "step": 19326 }, { "epoch": 1.151984741923948, "grad_norm": 3.2947490215301514, "learning_rate": 3.883180968797222e-05, "loss": 1.0498, "step": 19328 }, { "epoch": 1.1521039456431041, "grad_norm": 3.189751148223877, "learning_rate": 3.882259188423043e-05, "loss": 1.1972, "step": 19330 }, { "epoch": 1.1522231493622601, "grad_norm": 3.5073938369750977, "learning_rate": 3.881337448034281e-05, "loss": 1.1508, "step": 19332 }, { "epoch": 1.152342353081416, "grad_norm": 3.366455316543579, "learning_rate": 3.880415747663904e-05, "loss": 1.1175, "step": 19334 }, { "epoch": 1.1524615568005723, "grad_norm": 3.170959711074829, "learning_rate": 3.879494087344886e-05, "loss": 1.1246, "step": 19336 }, { "epoch": 1.1525807605197282, "grad_norm": 3.096517324447632, "learning_rate": 3.8785724671101984e-05, "loss": 1.1126, "step": 19338 }, { "epoch": 1.1526999642388842, "grad_norm": 2.9065842628479004, "learning_rate": 3.8776508869928125e-05, "loss": 1.2845, "step": 19340 }, { "epoch": 1.1528191679580404, "grad_norm": 3.4622292518615723, "learning_rate": 3.8767293470256936e-05, "loss": 1.0943, "step": 19342 }, { "epoch": 1.1529383716771964, "grad_norm": 3.1812236309051514, "learning_rate": 3.8758078472418075e-05, "loss": 1.2054, "step": 19344 }, { "epoch": 1.1530575753963523, "grad_norm": 3.2800519466400146, "learning_rate": 3.8748863876741235e-05, "loss": 1.1065, "step": 19346 }, { "epoch": 1.1531767791155083, "grad_norm": 3.4141244888305664, "learning_rate": 3.8739649683556014e-05, "loss": 1.2261, "step": 19348 }, { "epoch": 1.1532959828346645, "grad_norm": 3.599546194076538, "learning_rate": 3.8730435893192055e-05, "loss": 1.0969, "step": 19350 }, { "epoch": 1.1534151865538205, "grad_norm": 3.361872911453247, "learning_rate": 3.872122250597895e-05, "loss": 1.209, "step": 19352 }, { "epoch": 1.1535343902729764, "grad_norm": 2.9560320377349854, "learning_rate": 3.871200952224633e-05, "loss": 1.1426, "step": 19354 }, { "epoch": 1.1536535939921326, "grad_norm": 3.161487102508545, "learning_rate": 3.870279694232374e-05, "loss": 1.2091, "step": 19356 }, { "epoch": 1.1537727977112886, "grad_norm": 3.137218475341797, "learning_rate": 3.869358476654075e-05, "loss": 1.2745, "step": 19358 }, { "epoch": 1.1538920014304446, "grad_norm": 3.244781494140625, "learning_rate": 3.868437299522693e-05, "loss": 1.0972, "step": 19360 }, { "epoch": 1.1540112051496008, "grad_norm": 3.0166609287261963, "learning_rate": 3.8675161628711776e-05, "loss": 1.2908, "step": 19362 }, { "epoch": 1.1541304088687567, "grad_norm": 3.1383721828460693, "learning_rate": 3.8665950667324874e-05, "loss": 1.1584, "step": 19364 }, { "epoch": 1.1542496125879127, "grad_norm": 3.1084048748016357, "learning_rate": 3.865674011139567e-05, "loss": 1.1586, "step": 19366 }, { "epoch": 1.154368816307069, "grad_norm": 4.109772682189941, "learning_rate": 3.864752996125369e-05, "loss": 1.102, "step": 19368 }, { "epoch": 1.1544880200262249, "grad_norm": 3.2956106662750244, "learning_rate": 3.86383202172284e-05, "loss": 1.2397, "step": 19370 }, { "epoch": 1.1546072237453808, "grad_norm": 3.1103570461273193, "learning_rate": 3.862911087964929e-05, "loss": 1.2266, "step": 19372 }, { "epoch": 1.1547264274645368, "grad_norm": 2.950165033340454, "learning_rate": 3.861990194884578e-05, "loss": 1.1704, "step": 19374 }, { "epoch": 1.154845631183693, "grad_norm": 3.2846922874450684, "learning_rate": 3.8610693425147304e-05, "loss": 1.1698, "step": 19376 }, { "epoch": 1.154964834902849, "grad_norm": 3.1807022094726562, "learning_rate": 3.860148530888331e-05, "loss": 1.234, "step": 19378 }, { "epoch": 1.155084038622005, "grad_norm": 3.0887372493743896, "learning_rate": 3.8592277600383164e-05, "loss": 1.1338, "step": 19380 }, { "epoch": 1.1552032423411611, "grad_norm": 2.9227707386016846, "learning_rate": 3.8583070299976296e-05, "loss": 1.1551, "step": 19382 }, { "epoch": 1.155322446060317, "grad_norm": 3.3144075870513916, "learning_rate": 3.857386340799205e-05, "loss": 1.3695, "step": 19384 }, { "epoch": 1.155441649779473, "grad_norm": 3.054675340652466, "learning_rate": 3.856465692475982e-05, "loss": 1.1305, "step": 19386 }, { "epoch": 1.1555608534986292, "grad_norm": 3.454700469970703, "learning_rate": 3.8555450850608934e-05, "loss": 1.1705, "step": 19388 }, { "epoch": 1.1556800572177852, "grad_norm": 3.0553019046783447, "learning_rate": 3.854624518586872e-05, "loss": 1.3323, "step": 19390 }, { "epoch": 1.1557992609369412, "grad_norm": 2.874239683151245, "learning_rate": 3.853703993086852e-05, "loss": 1.2618, "step": 19392 }, { "epoch": 1.1559184646560974, "grad_norm": 2.9513437747955322, "learning_rate": 3.8527835085937594e-05, "loss": 1.0772, "step": 19394 }, { "epoch": 1.1560376683752533, "grad_norm": 3.5096275806427, "learning_rate": 3.8518630651405284e-05, "loss": 1.2565, "step": 19396 }, { "epoch": 1.1561568720944093, "grad_norm": 3.0391223430633545, "learning_rate": 3.850942662760083e-05, "loss": 1.0603, "step": 19398 }, { "epoch": 1.1562760758135653, "grad_norm": 2.822591781616211, "learning_rate": 3.85002230148535e-05, "loss": 1.1479, "step": 19400 }, { "epoch": 1.1563952795327215, "grad_norm": 2.832811117172241, "learning_rate": 3.849101981349255e-05, "loss": 1.234, "step": 19402 }, { "epoch": 1.1565144832518774, "grad_norm": 3.0579869747161865, "learning_rate": 3.848181702384718e-05, "loss": 1.3691, "step": 19404 }, { "epoch": 1.1566336869710334, "grad_norm": 3.002504825592041, "learning_rate": 3.847261464624663e-05, "loss": 1.126, "step": 19406 }, { "epoch": 1.1567528906901896, "grad_norm": 3.1104722023010254, "learning_rate": 3.8463412681020095e-05, "loss": 1.1424, "step": 19408 }, { "epoch": 1.1568720944093456, "grad_norm": 3.2150275707244873, "learning_rate": 3.8454211128496774e-05, "loss": 1.3079, "step": 19410 }, { "epoch": 1.1569912981285015, "grad_norm": 3.078310251235962, "learning_rate": 3.8445009989005806e-05, "loss": 1.147, "step": 19412 }, { "epoch": 1.1571105018476577, "grad_norm": 3.2023019790649414, "learning_rate": 3.843580926287637e-05, "loss": 1.2356, "step": 19414 }, { "epoch": 1.1572297055668137, "grad_norm": 3.2403829097747803, "learning_rate": 3.842660895043762e-05, "loss": 1.173, "step": 19416 }, { "epoch": 1.1573489092859697, "grad_norm": 3.5196361541748047, "learning_rate": 3.8417409052018645e-05, "loss": 1.1836, "step": 19418 }, { "epoch": 1.1574681130051259, "grad_norm": 2.971849203109741, "learning_rate": 3.840820956794858e-05, "loss": 1.0927, "step": 19420 }, { "epoch": 1.1575873167242818, "grad_norm": 3.0743680000305176, "learning_rate": 3.839901049855652e-05, "loss": 1.1073, "step": 19422 }, { "epoch": 1.1577065204434378, "grad_norm": 2.996925115585327, "learning_rate": 3.838981184417156e-05, "loss": 1.3224, "step": 19424 }, { "epoch": 1.1578257241625938, "grad_norm": 3.4025604724884033, "learning_rate": 3.838061360512273e-05, "loss": 1.1365, "step": 19426 }, { "epoch": 1.15794492788175, "grad_norm": 3.33561635017395, "learning_rate": 3.837141578173913e-05, "loss": 1.2418, "step": 19428 }, { "epoch": 1.158064131600906, "grad_norm": 3.3618898391723633, "learning_rate": 3.8362218374349754e-05, "loss": 1.1057, "step": 19430 }, { "epoch": 1.158183335320062, "grad_norm": 3.455310344696045, "learning_rate": 3.835302138328366e-05, "loss": 1.2198, "step": 19432 }, { "epoch": 1.158302539039218, "grad_norm": 3.082813262939453, "learning_rate": 3.8343824808869853e-05, "loss": 1.2456, "step": 19434 }, { "epoch": 1.158421742758374, "grad_norm": 3.0161943435668945, "learning_rate": 3.8334628651437285e-05, "loss": 1.1356, "step": 19436 }, { "epoch": 1.15854094647753, "grad_norm": 3.0052366256713867, "learning_rate": 3.832543291131499e-05, "loss": 1.1807, "step": 19438 }, { "epoch": 1.1586601501966862, "grad_norm": 3.430878162384033, "learning_rate": 3.831623758883188e-05, "loss": 1.2861, "step": 19440 }, { "epoch": 1.1587793539158422, "grad_norm": 3.189859628677368, "learning_rate": 3.830704268431695e-05, "loss": 1.1541, "step": 19442 }, { "epoch": 1.1588985576349982, "grad_norm": 2.997441053390503, "learning_rate": 3.829784819809909e-05, "loss": 1.207, "step": 19444 }, { "epoch": 1.1590177613541544, "grad_norm": 3.1653289794921875, "learning_rate": 3.828865413050724e-05, "loss": 1.1186, "step": 19446 }, { "epoch": 1.1591369650733103, "grad_norm": 3.0595040321350098, "learning_rate": 3.8279460481870324e-05, "loss": 1.1524, "step": 19448 }, { "epoch": 1.1592561687924663, "grad_norm": 2.932965040206909, "learning_rate": 3.827026725251718e-05, "loss": 1.2637, "step": 19450 }, { "epoch": 1.1593753725116223, "grad_norm": 3.282493829727173, "learning_rate": 3.826107444277672e-05, "loss": 1.1741, "step": 19452 }, { "epoch": 1.1594945762307785, "grad_norm": 3.2932310104370117, "learning_rate": 3.825188205297778e-05, "loss": 1.1502, "step": 19454 }, { "epoch": 1.1596137799499344, "grad_norm": 3.515665054321289, "learning_rate": 3.8242690083449244e-05, "loss": 1.1726, "step": 19456 }, { "epoch": 1.1597329836690904, "grad_norm": 2.922144889831543, "learning_rate": 3.8233498534519884e-05, "loss": 1.0851, "step": 19458 }, { "epoch": 1.1598521873882466, "grad_norm": 2.6334915161132812, "learning_rate": 3.822430740651855e-05, "loss": 1.1676, "step": 19460 }, { "epoch": 1.1599713911074025, "grad_norm": 3.2446255683898926, "learning_rate": 3.821511669977404e-05, "loss": 1.2354, "step": 19462 }, { "epoch": 1.1600905948265585, "grad_norm": 3.278571605682373, "learning_rate": 3.82059264146151e-05, "loss": 1.2009, "step": 19464 }, { "epoch": 1.1602097985457147, "grad_norm": 3.3217504024505615, "learning_rate": 3.819673655137056e-05, "loss": 1.309, "step": 19466 }, { "epoch": 1.1603290022648707, "grad_norm": 2.9297549724578857, "learning_rate": 3.8187547110369106e-05, "loss": 1.1145, "step": 19468 }, { "epoch": 1.1604482059840266, "grad_norm": 3.4034793376922607, "learning_rate": 3.817835809193952e-05, "loss": 1.2264, "step": 19470 }, { "epoch": 1.1605674097031828, "grad_norm": 3.036074161529541, "learning_rate": 3.816916949641051e-05, "loss": 1.1251, "step": 19472 }, { "epoch": 1.1606866134223388, "grad_norm": 3.2067742347717285, "learning_rate": 3.81599813241108e-05, "loss": 1.2174, "step": 19474 }, { "epoch": 1.1608058171414948, "grad_norm": 2.8903472423553467, "learning_rate": 3.8150793575369066e-05, "loss": 1.1709, "step": 19476 }, { "epoch": 1.1609250208606507, "grad_norm": 3.354893684387207, "learning_rate": 3.8141606250513976e-05, "loss": 1.2548, "step": 19478 }, { "epoch": 1.161044224579807, "grad_norm": 2.9738430976867676, "learning_rate": 3.8132419349874216e-05, "loss": 1.1701, "step": 19480 }, { "epoch": 1.161163428298963, "grad_norm": 2.7008705139160156, "learning_rate": 3.81232328737784e-05, "loss": 1.0319, "step": 19482 }, { "epoch": 1.1612826320181189, "grad_norm": 2.9990806579589844, "learning_rate": 3.811404682255519e-05, "loss": 1.2321, "step": 19484 }, { "epoch": 1.161401835737275, "grad_norm": 2.803936243057251, "learning_rate": 3.810486119653319e-05, "loss": 1.0793, "step": 19486 }, { "epoch": 1.161521039456431, "grad_norm": 2.790809392929077, "learning_rate": 3.809567599604102e-05, "loss": 1.0308, "step": 19488 }, { "epoch": 1.161640243175587, "grad_norm": 3.414133071899414, "learning_rate": 3.808649122140723e-05, "loss": 1.1021, "step": 19490 }, { "epoch": 1.1617594468947432, "grad_norm": 3.3407626152038574, "learning_rate": 3.8077306872960414e-05, "loss": 1.334, "step": 19492 }, { "epoch": 1.1618786506138992, "grad_norm": 3.1104447841644287, "learning_rate": 3.806812295102915e-05, "loss": 1.2607, "step": 19494 }, { "epoch": 1.1619978543330551, "grad_norm": 4.117298126220703, "learning_rate": 3.805893945594191e-05, "loss": 1.301, "step": 19496 }, { "epoch": 1.1621170580522113, "grad_norm": 3.5797271728515625, "learning_rate": 3.804975638802729e-05, "loss": 1.1798, "step": 19498 }, { "epoch": 1.1622362617713673, "grad_norm": 3.170029401779175, "learning_rate": 3.804057374761376e-05, "loss": 1.2494, "step": 19500 }, { "epoch": 1.1623554654905233, "grad_norm": 3.052731513977051, "learning_rate": 3.803139153502983e-05, "loss": 1.0483, "step": 19502 }, { "epoch": 1.1624746692096792, "grad_norm": 3.340620279312134, "learning_rate": 3.802220975060397e-05, "loss": 1.324, "step": 19504 }, { "epoch": 1.1625938729288354, "grad_norm": 3.5457963943481445, "learning_rate": 3.801302839466466e-05, "loss": 1.2335, "step": 19506 }, { "epoch": 1.1627130766479914, "grad_norm": 3.266630172729492, "learning_rate": 3.800384746754033e-05, "loss": 1.0819, "step": 19508 }, { "epoch": 1.1628322803671474, "grad_norm": 2.95408034324646, "learning_rate": 3.799466696955942e-05, "loss": 1.174, "step": 19510 }, { "epoch": 1.1629514840863036, "grad_norm": 3.513890027999878, "learning_rate": 3.798548690105036e-05, "loss": 1.2043, "step": 19512 }, { "epoch": 1.1630706878054595, "grad_norm": 3.0574028491973877, "learning_rate": 3.797630726234152e-05, "loss": 1.2108, "step": 19514 }, { "epoch": 1.1631898915246155, "grad_norm": 3.524430751800537, "learning_rate": 3.796712805376132e-05, "loss": 1.1183, "step": 19516 }, { "epoch": 1.1633090952437717, "grad_norm": 2.7957682609558105, "learning_rate": 3.795794927563811e-05, "loss": 1.121, "step": 19518 }, { "epoch": 1.1634282989629277, "grad_norm": 3.4203994274139404, "learning_rate": 3.794877092830027e-05, "loss": 1.1791, "step": 19520 }, { "epoch": 1.1635475026820836, "grad_norm": 3.1531949043273926, "learning_rate": 3.793959301207613e-05, "loss": 1.1866, "step": 19522 }, { "epoch": 1.1636667064012398, "grad_norm": 2.9488275051116943, "learning_rate": 3.7930415527293994e-05, "loss": 1.3125, "step": 19524 }, { "epoch": 1.1637859101203958, "grad_norm": 3.134204387664795, "learning_rate": 3.7921238474282203e-05, "loss": 1.2459, "step": 19526 }, { "epoch": 1.1639051138395518, "grad_norm": 3.2348525524139404, "learning_rate": 3.791206185336902e-05, "loss": 1.2109, "step": 19528 }, { "epoch": 1.1640243175587077, "grad_norm": 3.165555715560913, "learning_rate": 3.790288566488277e-05, "loss": 1.1685, "step": 19530 }, { "epoch": 1.164143521277864, "grad_norm": 3.4543533325195312, "learning_rate": 3.789370990915167e-05, "loss": 1.2049, "step": 19532 }, { "epoch": 1.1642627249970199, "grad_norm": 3.098227024078369, "learning_rate": 3.7884534586503996e-05, "loss": 1.3076, "step": 19534 }, { "epoch": 1.1643819287161759, "grad_norm": 3.4130465984344482, "learning_rate": 3.787535969726796e-05, "loss": 1.2699, "step": 19536 }, { "epoch": 1.164501132435332, "grad_norm": 3.0677835941314697, "learning_rate": 3.786618524177182e-05, "loss": 1.0469, "step": 19538 }, { "epoch": 1.164620336154488, "grad_norm": 3.245513677597046, "learning_rate": 3.785701122034373e-05, "loss": 1.3287, "step": 19540 }, { "epoch": 1.164739539873644, "grad_norm": 3.0710654258728027, "learning_rate": 3.7847837633311886e-05, "loss": 1.148, "step": 19542 }, { "epoch": 1.1648587435928002, "grad_norm": 2.794783592224121, "learning_rate": 3.78386644810045e-05, "loss": 1.0839, "step": 19544 }, { "epoch": 1.1649779473119561, "grad_norm": 2.8955414295196533, "learning_rate": 3.782949176374966e-05, "loss": 1.1769, "step": 19546 }, { "epoch": 1.165097151031112, "grad_norm": 3.098747968673706, "learning_rate": 3.782031948187556e-05, "loss": 1.3056, "step": 19548 }, { "epoch": 1.1652163547502683, "grad_norm": 3.3348751068115234, "learning_rate": 3.781114763571029e-05, "loss": 1.1569, "step": 19550 }, { "epoch": 1.1653355584694243, "grad_norm": 3.3305230140686035, "learning_rate": 3.7801976225582e-05, "loss": 1.1202, "step": 19552 }, { "epoch": 1.1654547621885802, "grad_norm": 3.6008522510528564, "learning_rate": 3.779280525181874e-05, "loss": 1.2776, "step": 19554 }, { "epoch": 1.1655739659077362, "grad_norm": 3.0663275718688965, "learning_rate": 3.778363471474859e-05, "loss": 1.3591, "step": 19556 }, { "epoch": 1.1656931696268924, "grad_norm": 2.6086833477020264, "learning_rate": 3.777446461469965e-05, "loss": 1.0302, "step": 19558 }, { "epoch": 1.1658123733460484, "grad_norm": 3.582000255584717, "learning_rate": 3.7765294951999905e-05, "loss": 1.2267, "step": 19560 }, { "epoch": 1.1659315770652043, "grad_norm": 3.288569211959839, "learning_rate": 3.775612572697745e-05, "loss": 1.4426, "step": 19562 }, { "epoch": 1.1660507807843605, "grad_norm": 3.410222053527832, "learning_rate": 3.774695693996025e-05, "loss": 1.1784, "step": 19564 }, { "epoch": 1.1661699845035165, "grad_norm": 3.3351519107818604, "learning_rate": 3.773778859127633e-05, "loss": 1.1798, "step": 19566 }, { "epoch": 1.1662891882226725, "grad_norm": 3.185208320617676, "learning_rate": 3.772862068125369e-05, "loss": 1.2414, "step": 19568 }, { "epoch": 1.1664083919418287, "grad_norm": 4.899850845336914, "learning_rate": 3.7719453210220244e-05, "loss": 1.1734, "step": 19570 }, { "epoch": 1.1665275956609846, "grad_norm": 3.3028488159179688, "learning_rate": 3.7710286178503986e-05, "loss": 1.1982, "step": 19572 }, { "epoch": 1.1666467993801406, "grad_norm": 3.3383829593658447, "learning_rate": 3.770111958643283e-05, "loss": 1.1168, "step": 19574 }, { "epoch": 1.1667660030992968, "grad_norm": 3.051152229309082, "learning_rate": 3.769195343433473e-05, "loss": 1.1921, "step": 19576 }, { "epoch": 1.1668852068184528, "grad_norm": 3.223557472229004, "learning_rate": 3.7682787722537544e-05, "loss": 1.1872, "step": 19578 }, { "epoch": 1.1670044105376087, "grad_norm": 3.045193672180176, "learning_rate": 3.767362245136919e-05, "loss": 1.3507, "step": 19580 }, { "epoch": 1.1671236142567647, "grad_norm": 3.1610374450683594, "learning_rate": 3.766445762115755e-05, "loss": 1.1642, "step": 19582 }, { "epoch": 1.1672428179759209, "grad_norm": 3.4273018836975098, "learning_rate": 3.7655293232230446e-05, "loss": 1.348, "step": 19584 }, { "epoch": 1.1673620216950769, "grad_norm": 3.476478338241577, "learning_rate": 3.764612928491575e-05, "loss": 1.1538, "step": 19586 }, { "epoch": 1.167481225414233, "grad_norm": 3.002126693725586, "learning_rate": 3.7636965779541264e-05, "loss": 1.2703, "step": 19588 }, { "epoch": 1.167600429133389, "grad_norm": 3.191467523574829, "learning_rate": 3.762780271643484e-05, "loss": 1.121, "step": 19590 }, { "epoch": 1.167719632852545, "grad_norm": 3.534278154373169, "learning_rate": 3.7618640095924207e-05, "loss": 1.2008, "step": 19592 }, { "epoch": 1.167838836571701, "grad_norm": 2.8602797985076904, "learning_rate": 3.7609477918337214e-05, "loss": 1.0134, "step": 19594 }, { "epoch": 1.1679580402908571, "grad_norm": 3.0369367599487305, "learning_rate": 3.7600316184001574e-05, "loss": 1.0103, "step": 19596 }, { "epoch": 1.1680772440100131, "grad_norm": 3.3145201206207275, "learning_rate": 3.759115489324505e-05, "loss": 1.1733, "step": 19598 }, { "epoch": 1.168196447729169, "grad_norm": 3.380272626876831, "learning_rate": 3.758199404639539e-05, "loss": 1.1765, "step": 19600 }, { "epoch": 1.1683156514483253, "grad_norm": 3.3166913986206055, "learning_rate": 3.757283364378026e-05, "loss": 1.1777, "step": 19602 }, { "epoch": 1.1684348551674812, "grad_norm": 3.127389669418335, "learning_rate": 3.756367368572741e-05, "loss": 1.2496, "step": 19604 }, { "epoch": 1.1685540588866372, "grad_norm": 2.9629695415496826, "learning_rate": 3.7554514172564483e-05, "loss": 1.2613, "step": 19606 }, { "epoch": 1.1686732626057932, "grad_norm": 2.6883506774902344, "learning_rate": 3.754535510461919e-05, "loss": 1.1867, "step": 19608 }, { "epoch": 1.1687924663249494, "grad_norm": 2.881911277770996, "learning_rate": 3.753619648221914e-05, "loss": 1.1001, "step": 19610 }, { "epoch": 1.1689116700441053, "grad_norm": 3.1844377517700195, "learning_rate": 3.7527038305691994e-05, "loss": 1.3582, "step": 19612 }, { "epoch": 1.1690308737632615, "grad_norm": 3.0061867237091064, "learning_rate": 3.751788057536538e-05, "loss": 1.2896, "step": 19614 }, { "epoch": 1.1691500774824175, "grad_norm": 3.0646169185638428, "learning_rate": 3.750872329156685e-05, "loss": 1.2133, "step": 19616 }, { "epoch": 1.1692692812015735, "grad_norm": 3.1862969398498535, "learning_rate": 3.749956645462405e-05, "loss": 1.0661, "step": 19618 }, { "epoch": 1.1693884849207294, "grad_norm": 3.208378553390503, "learning_rate": 3.749041006486451e-05, "loss": 1.1687, "step": 19620 }, { "epoch": 1.1695076886398856, "grad_norm": 3.0024685859680176, "learning_rate": 3.7481254122615836e-05, "loss": 1.1301, "step": 19622 }, { "epoch": 1.1696268923590416, "grad_norm": 3.301429510116577, "learning_rate": 3.74720986282055e-05, "loss": 1.3305, "step": 19624 }, { "epoch": 1.1697460960781976, "grad_norm": 3.231762170791626, "learning_rate": 3.7462943581961075e-05, "loss": 1.2262, "step": 19626 }, { "epoch": 1.1698652997973538, "grad_norm": 3.046304702758789, "learning_rate": 3.745378898421007e-05, "loss": 1.2546, "step": 19628 }, { "epoch": 1.1699845035165097, "grad_norm": 3.323133945465088, "learning_rate": 3.7444634835279915e-05, "loss": 1.161, "step": 19630 }, { "epoch": 1.1701037072356657, "grad_norm": 2.7181789875030518, "learning_rate": 3.743548113549817e-05, "loss": 1.1653, "step": 19632 }, { "epoch": 1.1702229109548217, "grad_norm": 3.2409393787384033, "learning_rate": 3.7426327885192234e-05, "loss": 1.1753, "step": 19634 }, { "epoch": 1.1703421146739779, "grad_norm": 3.085200786590576, "learning_rate": 3.741717508468957e-05, "loss": 1.3707, "step": 19636 }, { "epoch": 1.1704613183931338, "grad_norm": 3.3828840255737305, "learning_rate": 3.740802273431761e-05, "loss": 1.268, "step": 19638 }, { "epoch": 1.17058052211229, "grad_norm": 2.984898567199707, "learning_rate": 3.7398870834403764e-05, "loss": 1.1096, "step": 19640 }, { "epoch": 1.170699725831446, "grad_norm": 3.270639419555664, "learning_rate": 3.738971938527542e-05, "loss": 1.1206, "step": 19642 }, { "epoch": 1.170818929550602, "grad_norm": 3.200378656387329, "learning_rate": 3.738056838725995e-05, "loss": 1.2813, "step": 19644 }, { "epoch": 1.170938133269758, "grad_norm": 3.2923669815063477, "learning_rate": 3.737141784068475e-05, "loss": 1.2195, "step": 19646 }, { "epoch": 1.1710573369889141, "grad_norm": 2.888704776763916, "learning_rate": 3.736226774587712e-05, "loss": 1.1094, "step": 19648 }, { "epoch": 1.17117654070807, "grad_norm": 2.954029083251953, "learning_rate": 3.735311810316442e-05, "loss": 1.0786, "step": 19650 }, { "epoch": 1.171295744427226, "grad_norm": 3.056438684463501, "learning_rate": 3.7343968912873936e-05, "loss": 1.1972, "step": 19652 }, { "epoch": 1.1714149481463823, "grad_norm": 3.2505037784576416, "learning_rate": 3.7334820175333017e-05, "loss": 1.2014, "step": 19654 }, { "epoch": 1.1715341518655382, "grad_norm": 2.7374274730682373, "learning_rate": 3.7325671890868896e-05, "loss": 1.0953, "step": 19656 }, { "epoch": 1.1716533555846942, "grad_norm": 3.21292781829834, "learning_rate": 3.731652405980887e-05, "loss": 1.2484, "step": 19658 }, { "epoch": 1.1717725593038502, "grad_norm": 2.9561378955841064, "learning_rate": 3.730737668248017e-05, "loss": 1.0964, "step": 19660 }, { "epoch": 1.1718917630230063, "grad_norm": 2.9894282817840576, "learning_rate": 3.729822975921001e-05, "loss": 1.0705, "step": 19662 }, { "epoch": 1.1720109667421623, "grad_norm": 3.2768070697784424, "learning_rate": 3.728908329032567e-05, "loss": 1.1386, "step": 19664 }, { "epoch": 1.1721301704613185, "grad_norm": 4.362641334533691, "learning_rate": 3.727993727615427e-05, "loss": 1.2282, "step": 19666 }, { "epoch": 1.1722493741804745, "grad_norm": 3.3320586681365967, "learning_rate": 3.7270791717023064e-05, "loss": 1.105, "step": 19668 }, { "epoch": 1.1723685778996304, "grad_norm": 3.274120807647705, "learning_rate": 3.7261646613259174e-05, "loss": 1.1702, "step": 19670 }, { "epoch": 1.1724877816187864, "grad_norm": 3.4660861492156982, "learning_rate": 3.725250196518979e-05, "loss": 1.19, "step": 19672 }, { "epoch": 1.1726069853379426, "grad_norm": 3.354259729385376, "learning_rate": 3.724335777314201e-05, "loss": 1.3117, "step": 19674 }, { "epoch": 1.1727261890570986, "grad_norm": 3.0830113887786865, "learning_rate": 3.723421403744296e-05, "loss": 1.0516, "step": 19676 }, { "epoch": 1.1728453927762545, "grad_norm": 2.9802322387695312, "learning_rate": 3.722507075841978e-05, "loss": 1.2041, "step": 19678 }, { "epoch": 1.1729645964954107, "grad_norm": 2.636870861053467, "learning_rate": 3.7215927936399495e-05, "loss": 1.0887, "step": 19680 }, { "epoch": 1.1730838002145667, "grad_norm": 3.1986148357391357, "learning_rate": 3.720678557170922e-05, "loss": 1.1657, "step": 19682 }, { "epoch": 1.1732030039337227, "grad_norm": 3.37805438041687, "learning_rate": 3.7197643664676e-05, "loss": 1.1263, "step": 19684 }, { "epoch": 1.1733222076528786, "grad_norm": 2.7288658618927, "learning_rate": 3.718850221562688e-05, "loss": 1.1543, "step": 19686 }, { "epoch": 1.1734414113720348, "grad_norm": 3.342956066131592, "learning_rate": 3.717936122488886e-05, "loss": 1.1113, "step": 19688 }, { "epoch": 1.1735606150911908, "grad_norm": 3.434636116027832, "learning_rate": 3.717022069278894e-05, "loss": 1.1527, "step": 19690 }, { "epoch": 1.173679818810347, "grad_norm": 3.1191940307617188, "learning_rate": 3.7161080619654147e-05, "loss": 1.0877, "step": 19692 }, { "epoch": 1.173799022529503, "grad_norm": 3.2676682472229004, "learning_rate": 3.7151941005811396e-05, "loss": 1.2095, "step": 19694 }, { "epoch": 1.173918226248659, "grad_norm": 3.6058757305145264, "learning_rate": 3.714280185158771e-05, "loss": 1.1781, "step": 19696 }, { "epoch": 1.174037429967815, "grad_norm": 3.1387436389923096, "learning_rate": 3.713366315730997e-05, "loss": 1.2048, "step": 19698 }, { "epoch": 1.174156633686971, "grad_norm": 3.084496259689331, "learning_rate": 3.7124524923305126e-05, "loss": 1.0712, "step": 19700 }, { "epoch": 1.174275837406127, "grad_norm": 3.483003854751587, "learning_rate": 3.7115387149900096e-05, "loss": 1.0843, "step": 19702 }, { "epoch": 1.174395041125283, "grad_norm": 3.221187114715576, "learning_rate": 3.710624983742173e-05, "loss": 1.1748, "step": 19704 }, { "epoch": 1.1745142448444392, "grad_norm": 3.1660165786743164, "learning_rate": 3.7097112986196924e-05, "loss": 1.0663, "step": 19706 }, { "epoch": 1.1746334485635952, "grad_norm": 3.06022572517395, "learning_rate": 3.7087976596552534e-05, "loss": 1.2888, "step": 19708 }, { "epoch": 1.1747526522827512, "grad_norm": 3.0850119590759277, "learning_rate": 3.707884066881541e-05, "loss": 1.127, "step": 19710 }, { "epoch": 1.1748718560019074, "grad_norm": 3.4042458534240723, "learning_rate": 3.706970520331235e-05, "loss": 1.0817, "step": 19712 }, { "epoch": 1.1749910597210633, "grad_norm": 2.9498541355133057, "learning_rate": 3.7060570200370184e-05, "loss": 1.1238, "step": 19714 }, { "epoch": 1.1751102634402193, "grad_norm": 3.0517349243164062, "learning_rate": 3.705143566031568e-05, "loss": 1.1342, "step": 19716 }, { "epoch": 1.1752294671593755, "grad_norm": 3.7954869270324707, "learning_rate": 3.7042301583475656e-05, "loss": 1.061, "step": 19718 }, { "epoch": 1.1753486708785315, "grad_norm": 3.307116746902466, "learning_rate": 3.703316797017682e-05, "loss": 1.2514, "step": 19720 }, { "epoch": 1.1754678745976874, "grad_norm": 3.0496225357055664, "learning_rate": 3.7024034820745926e-05, "loss": 1.1444, "step": 19722 }, { "epoch": 1.1755870783168434, "grad_norm": 2.8503031730651855, "learning_rate": 3.701490213550972e-05, "loss": 1.2267, "step": 19724 }, { "epoch": 1.1757062820359996, "grad_norm": 3.6544110774993896, "learning_rate": 3.7005769914794864e-05, "loss": 1.134, "step": 19726 }, { "epoch": 1.1758254857551556, "grad_norm": 3.045380115509033, "learning_rate": 3.699663815892811e-05, "loss": 1.3411, "step": 19728 }, { "epoch": 1.1759446894743115, "grad_norm": 3.5128707885742188, "learning_rate": 3.6987506868236073e-05, "loss": 1.1917, "step": 19730 }, { "epoch": 1.1760638931934677, "grad_norm": 3.1829850673675537, "learning_rate": 3.697837604304545e-05, "loss": 1.2865, "step": 19732 }, { "epoch": 1.1761830969126237, "grad_norm": 3.233513116836548, "learning_rate": 3.696924568368288e-05, "loss": 1.2746, "step": 19734 }, { "epoch": 1.1763023006317797, "grad_norm": 2.7957870960235596, "learning_rate": 3.696011579047496e-05, "loss": 1.1765, "step": 19736 }, { "epoch": 1.1764215043509358, "grad_norm": 3.332231044769287, "learning_rate": 3.695098636374832e-05, "loss": 1.3463, "step": 19738 }, { "epoch": 1.1765407080700918, "grad_norm": 3.4308011531829834, "learning_rate": 3.6941857403829526e-05, "loss": 1.2517, "step": 19740 }, { "epoch": 1.1766599117892478, "grad_norm": 3.1006693840026855, "learning_rate": 3.69327289110452e-05, "loss": 1.1393, "step": 19742 }, { "epoch": 1.176779115508404, "grad_norm": 3.370974540710449, "learning_rate": 3.6923600885721855e-05, "loss": 1.1635, "step": 19744 }, { "epoch": 1.17689831922756, "grad_norm": 3.0570132732391357, "learning_rate": 3.6914473328186046e-05, "loss": 1.2419, "step": 19746 }, { "epoch": 1.177017522946716, "grad_norm": 3.128643035888672, "learning_rate": 3.690534623876432e-05, "loss": 1.1794, "step": 19748 }, { "epoch": 1.1771367266658719, "grad_norm": 3.0886611938476562, "learning_rate": 3.689621961778312e-05, "loss": 1.1168, "step": 19750 }, { "epoch": 1.177255930385028, "grad_norm": 3.355475902557373, "learning_rate": 3.688709346556901e-05, "loss": 1.0911, "step": 19752 }, { "epoch": 1.177375134104184, "grad_norm": 3.238057851791382, "learning_rate": 3.687796778244841e-05, "loss": 1.2757, "step": 19754 }, { "epoch": 1.17749433782334, "grad_norm": 3.1695303916931152, "learning_rate": 3.686884256874783e-05, "loss": 1.2143, "step": 19756 }, { "epoch": 1.1776135415424962, "grad_norm": 3.209862232208252, "learning_rate": 3.685971782479364e-05, "loss": 1.2418, "step": 19758 }, { "epoch": 1.1777327452616522, "grad_norm": 7.509497165679932, "learning_rate": 3.685059355091233e-05, "loss": 1.195, "step": 19760 }, { "epoch": 1.1778519489808081, "grad_norm": 3.281076431274414, "learning_rate": 3.6841469747430267e-05, "loss": 1.2459, "step": 19762 }, { "epoch": 1.1779711526999643, "grad_norm": 3.205227851867676, "learning_rate": 3.683234641467386e-05, "loss": 1.1551, "step": 19764 }, { "epoch": 1.1780903564191203, "grad_norm": 3.329578161239624, "learning_rate": 3.682322355296948e-05, "loss": 1.1256, "step": 19766 }, { "epoch": 1.1782095601382763, "grad_norm": 3.0168302059173584, "learning_rate": 3.6814101162643465e-05, "loss": 1.1346, "step": 19768 }, { "epoch": 1.1783287638574325, "grad_norm": 2.9849393367767334, "learning_rate": 3.680497924402217e-05, "loss": 1.015, "step": 19770 }, { "epoch": 1.1784479675765884, "grad_norm": 3.4551689624786377, "learning_rate": 3.679585779743191e-05, "loss": 1.1459, "step": 19772 }, { "epoch": 1.1785671712957444, "grad_norm": 3.3769779205322266, "learning_rate": 3.678673682319901e-05, "loss": 1.3427, "step": 19774 }, { "epoch": 1.1786863750149004, "grad_norm": 3.4971606731414795, "learning_rate": 3.677761632164972e-05, "loss": 1.213, "step": 19776 }, { "epoch": 1.1788055787340566, "grad_norm": 3.4391353130340576, "learning_rate": 3.6768496293110345e-05, "loss": 1.2968, "step": 19778 }, { "epoch": 1.1789247824532125, "grad_norm": 2.957345724105835, "learning_rate": 3.6759376737907145e-05, "loss": 1.1753, "step": 19780 }, { "epoch": 1.1790439861723685, "grad_norm": 3.388770818710327, "learning_rate": 3.6750257656366314e-05, "loss": 1.2555, "step": 19782 }, { "epoch": 1.1791631898915247, "grad_norm": 3.396057367324829, "learning_rate": 3.674113904881411e-05, "loss": 1.2267, "step": 19784 }, { "epoch": 1.1792823936106807, "grad_norm": 3.0538113117218018, "learning_rate": 3.6732020915576725e-05, "loss": 1.1057, "step": 19786 }, { "epoch": 1.1794015973298366, "grad_norm": 3.3032069206237793, "learning_rate": 3.672290325698036e-05, "loss": 1.2351, "step": 19788 }, { "epoch": 1.1795208010489928, "grad_norm": 2.9443228244781494, "learning_rate": 3.6713786073351145e-05, "loss": 1.2776, "step": 19790 }, { "epoch": 1.1796400047681488, "grad_norm": 2.950470209121704, "learning_rate": 3.670466936501529e-05, "loss": 1.2132, "step": 19792 }, { "epoch": 1.1797592084873048, "grad_norm": 3.214409351348877, "learning_rate": 3.6695553132298896e-05, "loss": 1.2366, "step": 19794 }, { "epoch": 1.179878412206461, "grad_norm": 3.302177667617798, "learning_rate": 3.668643737552807e-05, "loss": 1.3441, "step": 19796 }, { "epoch": 1.179997615925617, "grad_norm": 3.387040138244629, "learning_rate": 3.6677322095028954e-05, "loss": 1.168, "step": 19798 }, { "epoch": 1.1801168196447729, "grad_norm": 2.980700969696045, "learning_rate": 3.666820729112759e-05, "loss": 1.1517, "step": 19800 }, { "epoch": 1.1802360233639289, "grad_norm": 2.5389676094055176, "learning_rate": 3.665909296415008e-05, "loss": 1.137, "step": 19802 }, { "epoch": 1.180355227083085, "grad_norm": 3.158174991607666, "learning_rate": 3.664997911442244e-05, "loss": 1.1901, "step": 19804 }, { "epoch": 1.180474430802241, "grad_norm": 3.2828664779663086, "learning_rate": 3.6640865742270755e-05, "loss": 1.0621, "step": 19806 }, { "epoch": 1.180593634521397, "grad_norm": 3.210643768310547, "learning_rate": 3.6631752848020993e-05, "loss": 1.1714, "step": 19808 }, { "epoch": 1.1807128382405532, "grad_norm": 3.5040202140808105, "learning_rate": 3.662264043199916e-05, "loss": 1.3029, "step": 19810 }, { "epoch": 1.1808320419597091, "grad_norm": 3.0805044174194336, "learning_rate": 3.661352849453127e-05, "loss": 1.1799, "step": 19812 }, { "epoch": 1.1809512456788651, "grad_norm": 3.1058897972106934, "learning_rate": 3.660441703594325e-05, "loss": 1.0991, "step": 19814 }, { "epoch": 1.1810704493980213, "grad_norm": 3.419046401977539, "learning_rate": 3.6595306056561074e-05, "loss": 1.1553, "step": 19816 }, { "epoch": 1.1811896531171773, "grad_norm": 3.259197235107422, "learning_rate": 3.658619555671065e-05, "loss": 1.345, "step": 19818 }, { "epoch": 1.1813088568363332, "grad_norm": 3.1734204292297363, "learning_rate": 3.657708553671793e-05, "loss": 1.2958, "step": 19820 }, { "epoch": 1.1814280605554894, "grad_norm": 3.022099733352661, "learning_rate": 3.6567975996908764e-05, "loss": 1.1214, "step": 19822 }, { "epoch": 1.1815472642746454, "grad_norm": 3.934709072113037, "learning_rate": 3.655886693760907e-05, "loss": 1.2107, "step": 19824 }, { "epoch": 1.1816664679938014, "grad_norm": 2.8267648220062256, "learning_rate": 3.654975835914469e-05, "loss": 1.2236, "step": 19826 }, { "epoch": 1.1817856717129573, "grad_norm": 3.2241790294647217, "learning_rate": 3.6540650261841456e-05, "loss": 1.1736, "step": 19828 }, { "epoch": 1.1819048754321135, "grad_norm": 3.2221028804779053, "learning_rate": 3.653154264602524e-05, "loss": 1.0779, "step": 19830 }, { "epoch": 1.1820240791512695, "grad_norm": 3.0488555431365967, "learning_rate": 3.6522435512021805e-05, "loss": 1.1582, "step": 19832 }, { "epoch": 1.1821432828704255, "grad_norm": 3.0629634857177734, "learning_rate": 3.651332886015697e-05, "loss": 1.3011, "step": 19834 }, { "epoch": 1.1822624865895817, "grad_norm": 3.0712528228759766, "learning_rate": 3.65042226907565e-05, "loss": 1.1621, "step": 19836 }, { "epoch": 1.1823816903087376, "grad_norm": 3.5152602195739746, "learning_rate": 3.649511700414618e-05, "loss": 1.1758, "step": 19838 }, { "epoch": 1.1825008940278936, "grad_norm": 3.2488365173339844, "learning_rate": 3.648601180065172e-05, "loss": 1.0382, "step": 19840 }, { "epoch": 1.1826200977470498, "grad_norm": 3.0867576599121094, "learning_rate": 3.6476907080598846e-05, "loss": 1.2396, "step": 19842 }, { "epoch": 1.1827393014662058, "grad_norm": 3.103381395339966, "learning_rate": 3.6467802844313296e-05, "loss": 1.176, "step": 19844 }, { "epoch": 1.1828585051853617, "grad_norm": 2.8624930381774902, "learning_rate": 3.6458699092120716e-05, "loss": 1.1129, "step": 19846 }, { "epoch": 1.182977708904518, "grad_norm": 3.319257974624634, "learning_rate": 3.6449595824346813e-05, "loss": 1.2261, "step": 19848 }, { "epoch": 1.183096912623674, "grad_norm": 3.1338319778442383, "learning_rate": 3.6440493041317216e-05, "loss": 1.1814, "step": 19850 }, { "epoch": 1.1832161163428299, "grad_norm": 3.1521873474121094, "learning_rate": 3.6431390743357596e-05, "loss": 1.0256, "step": 19852 }, { "epoch": 1.1833353200619858, "grad_norm": 3.1086907386779785, "learning_rate": 3.642228893079355e-05, "loss": 1.196, "step": 19854 }, { "epoch": 1.183454523781142, "grad_norm": 2.928494930267334, "learning_rate": 3.641318760395067e-05, "loss": 1.0901, "step": 19856 }, { "epoch": 1.183573727500298, "grad_norm": 3.278496503829956, "learning_rate": 3.6404086763154576e-05, "loss": 1.1216, "step": 19858 }, { "epoch": 1.183692931219454, "grad_norm": 3.2480127811431885, "learning_rate": 3.639498640873078e-05, "loss": 1.194, "step": 19860 }, { "epoch": 1.1838121349386101, "grad_norm": 3.26165771484375, "learning_rate": 3.638588654100491e-05, "loss": 1.2668, "step": 19862 }, { "epoch": 1.1839313386577661, "grad_norm": 3.181328535079956, "learning_rate": 3.637678716030244e-05, "loss": 1.1445, "step": 19864 }, { "epoch": 1.184050542376922, "grad_norm": 3.2584033012390137, "learning_rate": 3.6367688266948906e-05, "loss": 1.2655, "step": 19866 }, { "epoch": 1.1841697460960783, "grad_norm": 2.9907267093658447, "learning_rate": 3.635858986126982e-05, "loss": 1.1521, "step": 19868 }, { "epoch": 1.1842889498152342, "grad_norm": 2.9245800971984863, "learning_rate": 3.634949194359063e-05, "loss": 0.951, "step": 19870 }, { "epoch": 1.1844081535343902, "grad_norm": 3.238372802734375, "learning_rate": 3.634039451423682e-05, "loss": 1.3162, "step": 19872 }, { "epoch": 1.1845273572535464, "grad_norm": 3.1120433807373047, "learning_rate": 3.633129757353383e-05, "loss": 1.2541, "step": 19874 }, { "epoch": 1.1846465609727024, "grad_norm": 3.3162522315979004, "learning_rate": 3.632220112180711e-05, "loss": 1.3485, "step": 19876 }, { "epoch": 1.1847657646918583, "grad_norm": 2.9203460216522217, "learning_rate": 3.631310515938203e-05, "loss": 1.2192, "step": 19878 }, { "epoch": 1.1848849684110143, "grad_norm": 3.1657774448394775, "learning_rate": 3.630400968658403e-05, "loss": 1.197, "step": 19880 }, { "epoch": 1.1850041721301705, "grad_norm": 3.0581939220428467, "learning_rate": 3.629491470373845e-05, "loss": 1.0926, "step": 19882 }, { "epoch": 1.1851233758493265, "grad_norm": 2.8597402572631836, "learning_rate": 3.628582021117067e-05, "loss": 1.1242, "step": 19884 }, { "epoch": 1.1852425795684824, "grad_norm": 3.122600793838501, "learning_rate": 3.627672620920603e-05, "loss": 1.2159, "step": 19886 }, { "epoch": 1.1853617832876386, "grad_norm": 3.3450067043304443, "learning_rate": 3.6267632698169826e-05, "loss": 1.261, "step": 19888 }, { "epoch": 1.1854809870067946, "grad_norm": 3.038752555847168, "learning_rate": 3.625853967838741e-05, "loss": 1.2082, "step": 19890 }, { "epoch": 1.1856001907259506, "grad_norm": 3.2271461486816406, "learning_rate": 3.624944715018401e-05, "loss": 1.2447, "step": 19892 }, { "epoch": 1.1857193944451068, "grad_norm": 3.118680477142334, "learning_rate": 3.624035511388497e-05, "loss": 1.1399, "step": 19894 }, { "epoch": 1.1858385981642627, "grad_norm": 2.8593928813934326, "learning_rate": 3.623126356981549e-05, "loss": 1.0428, "step": 19896 }, { "epoch": 1.1859578018834187, "grad_norm": 3.3172225952148438, "learning_rate": 3.6222172518300826e-05, "loss": 1.359, "step": 19898 }, { "epoch": 1.186077005602575, "grad_norm": 3.127776622772217, "learning_rate": 3.621308195966619e-05, "loss": 1.1756, "step": 19900 }, { "epoch": 1.1861962093217309, "grad_norm": 3.8883559703826904, "learning_rate": 3.6203991894236775e-05, "loss": 1.2335, "step": 19902 }, { "epoch": 1.1863154130408868, "grad_norm": 2.9717133045196533, "learning_rate": 3.6194902322337785e-05, "loss": 1.0659, "step": 19904 }, { "epoch": 1.1864346167600428, "grad_norm": 2.8419811725616455, "learning_rate": 3.6185813244294355e-05, "loss": 1.1114, "step": 19906 }, { "epoch": 1.186553820479199, "grad_norm": 3.2479796409606934, "learning_rate": 3.617672466043168e-05, "loss": 0.9865, "step": 19908 }, { "epoch": 1.186673024198355, "grad_norm": 3.2742645740509033, "learning_rate": 3.6167636571074825e-05, "loss": 1.2057, "step": 19910 }, { "epoch": 1.186792227917511, "grad_norm": 3.4131276607513428, "learning_rate": 3.6158548976548955e-05, "loss": 1.1893, "step": 19912 }, { "epoch": 1.1869114316366671, "grad_norm": 3.0587925910949707, "learning_rate": 3.614946187717916e-05, "loss": 1.2384, "step": 19914 }, { "epoch": 1.187030635355823, "grad_norm": 3.414874315261841, "learning_rate": 3.614037527329048e-05, "loss": 1.2279, "step": 19916 }, { "epoch": 1.187149839074979, "grad_norm": 3.2608823776245117, "learning_rate": 3.6131289165208004e-05, "loss": 1.1242, "step": 19918 }, { "epoch": 1.1872690427941353, "grad_norm": 3.3420307636260986, "learning_rate": 3.6122203553256753e-05, "loss": 1.346, "step": 19920 }, { "epoch": 1.1873882465132912, "grad_norm": 3.155958652496338, "learning_rate": 3.611311843776179e-05, "loss": 1.2061, "step": 19922 }, { "epoch": 1.1875074502324472, "grad_norm": 3.496957778930664, "learning_rate": 3.6104033819048065e-05, "loss": 1.2424, "step": 19924 }, { "epoch": 1.1876266539516034, "grad_norm": 3.043757677078247, "learning_rate": 3.609494969744062e-05, "loss": 1.0582, "step": 19926 }, { "epoch": 1.1877458576707594, "grad_norm": 3.3103206157684326, "learning_rate": 3.608586607326439e-05, "loss": 1.0964, "step": 19928 }, { "epoch": 1.1878650613899153, "grad_norm": 2.917273759841919, "learning_rate": 3.6076782946844325e-05, "loss": 1.0877, "step": 19930 }, { "epoch": 1.1879842651090713, "grad_norm": 3.2377378940582275, "learning_rate": 3.60677003185054e-05, "loss": 1.1054, "step": 19932 }, { "epoch": 1.1881034688282275, "grad_norm": 2.9733176231384277, "learning_rate": 3.605861818857248e-05, "loss": 1.1595, "step": 19934 }, { "epoch": 1.1882226725473835, "grad_norm": 3.1446681022644043, "learning_rate": 3.6049536557370494e-05, "loss": 1.0451, "step": 19936 }, { "epoch": 1.1883418762665394, "grad_norm": 3.1355443000793457, "learning_rate": 3.604045542522431e-05, "loss": 1.379, "step": 19938 }, { "epoch": 1.1884610799856956, "grad_norm": 3.2358314990997314, "learning_rate": 3.6031374792458815e-05, "loss": 1.2314, "step": 19940 }, { "epoch": 1.1885802837048516, "grad_norm": 2.874121904373169, "learning_rate": 3.602229465939881e-05, "loss": 1.0128, "step": 19942 }, { "epoch": 1.1886994874240076, "grad_norm": 3.093839168548584, "learning_rate": 3.6013215026369176e-05, "loss": 1.2575, "step": 19944 }, { "epoch": 1.1888186911431637, "grad_norm": 3.2295620441436768, "learning_rate": 3.60041358936947e-05, "loss": 1.4112, "step": 19946 }, { "epoch": 1.1889378948623197, "grad_norm": 2.9746644496917725, "learning_rate": 3.5995057261700146e-05, "loss": 1.1546, "step": 19948 }, { "epoch": 1.1890570985814757, "grad_norm": 3.238814353942871, "learning_rate": 3.5985979130710316e-05, "loss": 1.1136, "step": 19950 }, { "epoch": 1.1891763023006319, "grad_norm": 3.218421459197998, "learning_rate": 3.597690150104996e-05, "loss": 1.205, "step": 19952 }, { "epoch": 1.1892955060197878, "grad_norm": 2.728602886199951, "learning_rate": 3.596782437304384e-05, "loss": 1.1367, "step": 19954 }, { "epoch": 1.1894147097389438, "grad_norm": 3.0573999881744385, "learning_rate": 3.5958747747016605e-05, "loss": 1.3125, "step": 19956 }, { "epoch": 1.1895339134580998, "grad_norm": 2.8444325923919678, "learning_rate": 3.594967162329305e-05, "loss": 1.0168, "step": 19958 }, { "epoch": 1.189653117177256, "grad_norm": 3.088073968887329, "learning_rate": 3.5940596002197795e-05, "loss": 1.1759, "step": 19960 }, { "epoch": 1.189772320896412, "grad_norm": 2.89634108543396, "learning_rate": 3.593152088405552e-05, "loss": 1.1651, "step": 19962 }, { "epoch": 1.1898915246155681, "grad_norm": 2.97711443901062, "learning_rate": 3.59224462691909e-05, "loss": 1.1253, "step": 19964 }, { "epoch": 1.190010728334724, "grad_norm": 2.9389264583587646, "learning_rate": 3.591337215792852e-05, "loss": 1.1521, "step": 19966 }, { "epoch": 1.19012993205388, "grad_norm": 3.031481981277466, "learning_rate": 3.590429855059302e-05, "loss": 1.067, "step": 19968 }, { "epoch": 1.190249135773036, "grad_norm": 2.7418854236602783, "learning_rate": 3.589522544750898e-05, "loss": 1.0415, "step": 19970 }, { "epoch": 1.1903683394921922, "grad_norm": 3.389064073562622, "learning_rate": 3.588615284900101e-05, "loss": 1.2706, "step": 19972 }, { "epoch": 1.1904875432113482, "grad_norm": 2.9758858680725098, "learning_rate": 3.587708075539363e-05, "loss": 1.1595, "step": 19974 }, { "epoch": 1.1906067469305042, "grad_norm": 2.679868221282959, "learning_rate": 3.5868009167011384e-05, "loss": 1.1253, "step": 19976 }, { "epoch": 1.1907259506496604, "grad_norm": 3.3059070110321045, "learning_rate": 3.5858938084178826e-05, "loss": 1.3364, "step": 19978 }, { "epoch": 1.1908451543688163, "grad_norm": 2.7874667644500732, "learning_rate": 3.5849867507220405e-05, "loss": 1.141, "step": 19980 }, { "epoch": 1.1909643580879723, "grad_norm": 3.352435827255249, "learning_rate": 3.584079743646066e-05, "loss": 1.1138, "step": 19982 }, { "epoch": 1.1910835618071283, "grad_norm": 2.9392142295837402, "learning_rate": 3.583172787222402e-05, "loss": 1.3296, "step": 19984 }, { "epoch": 1.1912027655262845, "grad_norm": 2.8484113216400146, "learning_rate": 3.5822658814834965e-05, "loss": 1.1964, "step": 19986 }, { "epoch": 1.1913219692454404, "grad_norm": 3.047499895095825, "learning_rate": 3.581359026461791e-05, "loss": 1.0481, "step": 19988 }, { "epoch": 1.1914411729645966, "grad_norm": 2.9603936672210693, "learning_rate": 3.5804522221897254e-05, "loss": 1.1515, "step": 19990 }, { "epoch": 1.1915603766837526, "grad_norm": 3.1262259483337402, "learning_rate": 3.579545468699742e-05, "loss": 1.1407, "step": 19992 }, { "epoch": 1.1916795804029086, "grad_norm": 2.8852198123931885, "learning_rate": 3.578638766024276e-05, "loss": 1.1986, "step": 19994 }, { "epoch": 1.1917987841220645, "grad_norm": 3.1220428943634033, "learning_rate": 3.5777321141957665e-05, "loss": 1.2166, "step": 19996 }, { "epoch": 1.1919179878412207, "grad_norm": 3.0627310276031494, "learning_rate": 3.576825513246643e-05, "loss": 1.1711, "step": 19998 }, { "epoch": 1.1920371915603767, "grad_norm": 2.9724910259246826, "learning_rate": 3.575918963209343e-05, "loss": 1.2937, "step": 20000 }, { "epoch": 1.1921563952795327, "grad_norm": 3.094949245452881, "learning_rate": 3.5750124641162905e-05, "loss": 1.1651, "step": 20002 }, { "epoch": 1.1922755989986888, "grad_norm": 2.829101324081421, "learning_rate": 3.574106015999922e-05, "loss": 1.1243, "step": 20004 }, { "epoch": 1.1923948027178448, "grad_norm": 3.2544405460357666, "learning_rate": 3.5731996188926584e-05, "loss": 1.0417, "step": 20006 }, { "epoch": 1.1925140064370008, "grad_norm": 2.912346363067627, "learning_rate": 3.572293272826924e-05, "loss": 1.0839, "step": 20008 }, { "epoch": 1.1926332101561568, "grad_norm": 3.0635745525360107, "learning_rate": 3.571386977835147e-05, "loss": 1.0688, "step": 20010 }, { "epoch": 1.192752413875313, "grad_norm": 3.1924850940704346, "learning_rate": 3.5704807339497436e-05, "loss": 1.0048, "step": 20012 }, { "epoch": 1.192871617594469, "grad_norm": 3.23732328414917, "learning_rate": 3.5695745412031365e-05, "loss": 1.186, "step": 20014 }, { "epoch": 1.192990821313625, "grad_norm": 3.183276414871216, "learning_rate": 3.5686683996277413e-05, "loss": 1.1605, "step": 20016 }, { "epoch": 1.193110025032781, "grad_norm": 2.834702730178833, "learning_rate": 3.567762309255977e-05, "loss": 1.076, "step": 20018 }, { "epoch": 1.193229228751937, "grad_norm": 3.3946609497070312, "learning_rate": 3.566856270120253e-05, "loss": 1.278, "step": 20020 }, { "epoch": 1.193348432471093, "grad_norm": 3.0084264278411865, "learning_rate": 3.565950282252984e-05, "loss": 1.0194, "step": 20022 }, { "epoch": 1.1934676361902492, "grad_norm": 3.156980276107788, "learning_rate": 3.5650443456865825e-05, "loss": 1.2783, "step": 20024 }, { "epoch": 1.1935868399094052, "grad_norm": 2.898461103439331, "learning_rate": 3.56413846045345e-05, "loss": 1.2121, "step": 20026 }, { "epoch": 1.1937060436285611, "grad_norm": 2.962383270263672, "learning_rate": 3.5632326265860005e-05, "loss": 1.2004, "step": 20028 }, { "epoch": 1.1938252473477173, "grad_norm": 3.217161178588867, "learning_rate": 3.5623268441166344e-05, "loss": 1.2085, "step": 20030 }, { "epoch": 1.1939444510668733, "grad_norm": 3.138028860092163, "learning_rate": 3.561421113077756e-05, "loss": 1.2111, "step": 20032 }, { "epoch": 1.1940636547860293, "grad_norm": 2.5921952724456787, "learning_rate": 3.560515433501769e-05, "loss": 1.1113, "step": 20034 }, { "epoch": 1.1941828585051852, "grad_norm": 3.0115249156951904, "learning_rate": 3.559609805421067e-05, "loss": 1.1993, "step": 20036 }, { "epoch": 1.1943020622243414, "grad_norm": 2.761462688446045, "learning_rate": 3.558704228868052e-05, "loss": 1.3646, "step": 20038 }, { "epoch": 1.1944212659434974, "grad_norm": 3.109833240509033, "learning_rate": 3.557798703875117e-05, "loss": 1.2538, "step": 20040 }, { "epoch": 1.1945404696626536, "grad_norm": 2.9685447216033936, "learning_rate": 3.556893230474659e-05, "loss": 1.1814, "step": 20042 }, { "epoch": 1.1946596733818096, "grad_norm": 3.1910407543182373, "learning_rate": 3.555987808699065e-05, "loss": 1.2263, "step": 20044 }, { "epoch": 1.1947788771009655, "grad_norm": 3.523646831512451, "learning_rate": 3.555082438580729e-05, "loss": 1.3303, "step": 20046 }, { "epoch": 1.1948980808201215, "grad_norm": 2.999948024749756, "learning_rate": 3.5541771201520366e-05, "loss": 1.1113, "step": 20048 }, { "epoch": 1.1950172845392777, "grad_norm": 3.0210120677948, "learning_rate": 3.5532718534453784e-05, "loss": 1.1271, "step": 20050 }, { "epoch": 1.1951364882584337, "grad_norm": 3.021327257156372, "learning_rate": 3.5523666384931345e-05, "loss": 1.0188, "step": 20052 }, { "epoch": 1.1952556919775896, "grad_norm": 3.3419973850250244, "learning_rate": 3.5514614753276875e-05, "loss": 1.0978, "step": 20054 }, { "epoch": 1.1953748956967458, "grad_norm": 2.921302080154419, "learning_rate": 3.5505563639814224e-05, "loss": 1.2491, "step": 20056 }, { "epoch": 1.1954940994159018, "grad_norm": 2.9758808612823486, "learning_rate": 3.549651304486712e-05, "loss": 1.2184, "step": 20058 }, { "epoch": 1.1956133031350578, "grad_norm": 3.0245096683502197, "learning_rate": 3.54874629687594e-05, "loss": 1.0961, "step": 20060 }, { "epoch": 1.1957325068542137, "grad_norm": 3.1758828163146973, "learning_rate": 3.547841341181476e-05, "loss": 1.0199, "step": 20062 }, { "epoch": 1.19585171057337, "grad_norm": 2.8854176998138428, "learning_rate": 3.546936437435696e-05, "loss": 1.0975, "step": 20064 }, { "epoch": 1.1959709142925259, "grad_norm": 3.158618450164795, "learning_rate": 3.5460315856709734e-05, "loss": 1.1495, "step": 20066 }, { "epoch": 1.196090118011682, "grad_norm": 2.9843039512634277, "learning_rate": 3.5451267859196734e-05, "loss": 1.1268, "step": 20068 }, { "epoch": 1.196209321730838, "grad_norm": 3.335982084274292, "learning_rate": 3.544222038214167e-05, "loss": 1.1279, "step": 20070 }, { "epoch": 1.196328525449994, "grad_norm": 2.9961376190185547, "learning_rate": 3.543317342586818e-05, "loss": 1.1363, "step": 20072 }, { "epoch": 1.19644772916915, "grad_norm": 3.3992340564727783, "learning_rate": 3.542412699069994e-05, "loss": 1.189, "step": 20074 }, { "epoch": 1.1965669328883062, "grad_norm": 3.2779653072357178, "learning_rate": 3.5415081076960526e-05, "loss": 1.298, "step": 20076 }, { "epoch": 1.1966861366074621, "grad_norm": 3.1799232959747314, "learning_rate": 3.540603568497358e-05, "loss": 1.1882, "step": 20078 }, { "epoch": 1.1968053403266181, "grad_norm": 2.9287235736846924, "learning_rate": 3.539699081506267e-05, "loss": 1.2207, "step": 20080 }, { "epoch": 1.1969245440457743, "grad_norm": 3.5012080669403076, "learning_rate": 3.5387946467551344e-05, "loss": 1.148, "step": 20082 }, { "epoch": 1.1970437477649303, "grad_norm": 2.897005558013916, "learning_rate": 3.5378902642763186e-05, "loss": 1.2425, "step": 20084 }, { "epoch": 1.1971629514840862, "grad_norm": 3.05395245552063, "learning_rate": 3.536985934102169e-05, "loss": 1.199, "step": 20086 }, { "epoch": 1.1972821552032424, "grad_norm": 3.1854236125946045, "learning_rate": 3.53608165626504e-05, "loss": 1.1389, "step": 20088 }, { "epoch": 1.1974013589223984, "grad_norm": 3.226592540740967, "learning_rate": 3.535177430797276e-05, "loss": 1.0718, "step": 20090 }, { "epoch": 1.1975205626415544, "grad_norm": 3.38265323638916, "learning_rate": 3.5342732577312304e-05, "loss": 1.1899, "step": 20092 }, { "epoch": 1.1976397663607106, "grad_norm": 3.1999599933624268, "learning_rate": 3.533369137099244e-05, "loss": 1.1703, "step": 20094 }, { "epoch": 1.1977589700798665, "grad_norm": 2.9837758541107178, "learning_rate": 3.532465068933661e-05, "loss": 1.0847, "step": 20096 }, { "epoch": 1.1978781737990225, "grad_norm": 3.3628735542297363, "learning_rate": 3.5315610532668255e-05, "loss": 1.3004, "step": 20098 }, { "epoch": 1.1979973775181785, "grad_norm": 3.230959892272949, "learning_rate": 3.5306570901310733e-05, "loss": 1.2681, "step": 20100 }, { "epoch": 1.1981165812373347, "grad_norm": 2.9268240928649902, "learning_rate": 3.529753179558745e-05, "loss": 1.2199, "step": 20102 }, { "epoch": 1.1982357849564906, "grad_norm": 2.7153515815734863, "learning_rate": 3.528849321582175e-05, "loss": 1.2841, "step": 20104 }, { "epoch": 1.1983549886756466, "grad_norm": 2.7825005054473877, "learning_rate": 3.5279455162337005e-05, "loss": 1.1331, "step": 20106 }, { "epoch": 1.1984741923948028, "grad_norm": 3.013857364654541, "learning_rate": 3.527041763545649e-05, "loss": 1.1104, "step": 20108 }, { "epoch": 1.1985933961139588, "grad_norm": 3.1796417236328125, "learning_rate": 3.5261380635503544e-05, "loss": 1.2253, "step": 20110 }, { "epoch": 1.1987125998331147, "grad_norm": 2.9885313510894775, "learning_rate": 3.5252344162801455e-05, "loss": 1.1742, "step": 20112 }, { "epoch": 1.198831803552271, "grad_norm": 2.9535584449768066, "learning_rate": 3.524330821767345e-05, "loss": 0.9939, "step": 20114 }, { "epoch": 1.198951007271427, "grad_norm": 3.410665988922119, "learning_rate": 3.523427280044281e-05, "loss": 1.1062, "step": 20116 }, { "epoch": 1.1990702109905829, "grad_norm": 3.1100831031799316, "learning_rate": 3.522523791143274e-05, "loss": 1.1203, "step": 20118 }, { "epoch": 1.199189414709739, "grad_norm": 3.370450019836426, "learning_rate": 3.5216203550966484e-05, "loss": 1.3414, "step": 20120 }, { "epoch": 1.199308618428895, "grad_norm": 2.981013536453247, "learning_rate": 3.520716971936718e-05, "loss": 1.0994, "step": 20122 }, { "epoch": 1.199427822148051, "grad_norm": 3.1334683895111084, "learning_rate": 3.5198136416958056e-05, "loss": 1.182, "step": 20124 }, { "epoch": 1.199547025867207, "grad_norm": 3.184147834777832, "learning_rate": 3.518910364406223e-05, "loss": 1.1865, "step": 20126 }, { "epoch": 1.1996662295863632, "grad_norm": 3.1507813930511475, "learning_rate": 3.5180071401002825e-05, "loss": 1.1773, "step": 20128 }, { "epoch": 1.1997854333055191, "grad_norm": 3.362966299057007, "learning_rate": 3.5171039688102996e-05, "loss": 1.2784, "step": 20130 }, { "epoch": 1.199904637024675, "grad_norm": 3.048384428024292, "learning_rate": 3.516200850568579e-05, "loss": 1.1571, "step": 20132 }, { "epoch": 1.2000238407438313, "grad_norm": 2.812079429626465, "learning_rate": 3.515297785407432e-05, "loss": 1.1386, "step": 20134 }, { "epoch": 1.2001430444629873, "grad_norm": 3.3111040592193604, "learning_rate": 3.514394773359163e-05, "loss": 1.2033, "step": 20136 }, { "epoch": 1.2002622481821432, "grad_norm": 3.502166509628296, "learning_rate": 3.5134918144560766e-05, "loss": 1.2191, "step": 20138 }, { "epoch": 1.2003814519012994, "grad_norm": 3.05312442779541, "learning_rate": 3.512588908730474e-05, "loss": 1.1767, "step": 20140 }, { "epoch": 1.2005006556204554, "grad_norm": 3.2071614265441895, "learning_rate": 3.5116860562146534e-05, "loss": 1.1749, "step": 20142 }, { "epoch": 1.2006198593396114, "grad_norm": 3.33844256401062, "learning_rate": 3.5107832569409175e-05, "loss": 1.1494, "step": 20144 }, { "epoch": 1.2007390630587675, "grad_norm": 3.3044557571411133, "learning_rate": 3.509880510941558e-05, "loss": 1.2043, "step": 20146 }, { "epoch": 1.2008582667779235, "grad_norm": 3.145085334777832, "learning_rate": 3.5089778182488706e-05, "loss": 1.2428, "step": 20148 }, { "epoch": 1.2009774704970795, "grad_norm": 3.2242562770843506, "learning_rate": 3.508075178895148e-05, "loss": 1.203, "step": 20150 }, { "epoch": 1.2010966742162354, "grad_norm": 3.219024896621704, "learning_rate": 3.507172592912683e-05, "loss": 1.21, "step": 20152 }, { "epoch": 1.2012158779353916, "grad_norm": 3.010685682296753, "learning_rate": 3.50627006033376e-05, "loss": 1.0653, "step": 20154 }, { "epoch": 1.2013350816545476, "grad_norm": 3.1685791015625, "learning_rate": 3.5053675811906685e-05, "loss": 1.0975, "step": 20156 }, { "epoch": 1.2014542853737036, "grad_norm": 3.3469083309173584, "learning_rate": 3.5044651555156926e-05, "loss": 1.3897, "step": 20158 }, { "epoch": 1.2015734890928598, "grad_norm": 3.2331278324127197, "learning_rate": 3.5035627833411134e-05, "loss": 1.0918, "step": 20160 }, { "epoch": 1.2016926928120157, "grad_norm": 3.237428665161133, "learning_rate": 3.502660464699216e-05, "loss": 1.1822, "step": 20162 }, { "epoch": 1.2018118965311717, "grad_norm": 2.961735725402832, "learning_rate": 3.501758199622275e-05, "loss": 1.1815, "step": 20164 }, { "epoch": 1.201931100250328, "grad_norm": 3.0589375495910645, "learning_rate": 3.50085598814257e-05, "loss": 1.1859, "step": 20166 }, { "epoch": 1.2020503039694839, "grad_norm": 3.2549407482147217, "learning_rate": 3.499953830292375e-05, "loss": 1.1546, "step": 20168 }, { "epoch": 1.2021695076886398, "grad_norm": 2.9964542388916016, "learning_rate": 3.499051726103966e-05, "loss": 1.1327, "step": 20170 }, { "epoch": 1.202288711407796, "grad_norm": 2.9373605251312256, "learning_rate": 3.49814967560961e-05, "loss": 1.1283, "step": 20172 }, { "epoch": 1.202407915126952, "grad_norm": 3.1122546195983887, "learning_rate": 3.4972476788415786e-05, "loss": 1.1391, "step": 20174 }, { "epoch": 1.202527118846108, "grad_norm": 3.2467799186706543, "learning_rate": 3.496345735832142e-05, "loss": 1.1813, "step": 20176 }, { "epoch": 1.202646322565264, "grad_norm": 2.9140737056732178, "learning_rate": 3.4954438466135596e-05, "loss": 1.2367, "step": 20178 }, { "epoch": 1.2027655262844201, "grad_norm": 2.7935681343078613, "learning_rate": 3.4945420112181003e-05, "loss": 1.0603, "step": 20180 }, { "epoch": 1.202884730003576, "grad_norm": 3.205920457839966, "learning_rate": 3.493640229678023e-05, "loss": 1.1831, "step": 20182 }, { "epoch": 1.203003933722732, "grad_norm": 3.3868765830993652, "learning_rate": 3.492738502025591e-05, "loss": 1.291, "step": 20184 }, { "epoch": 1.2031231374418883, "grad_norm": 3.128323793411255, "learning_rate": 3.491836828293058e-05, "loss": 1.1811, "step": 20186 }, { "epoch": 1.2032423411610442, "grad_norm": 3.463080644607544, "learning_rate": 3.4909352085126815e-05, "loss": 1.2364, "step": 20188 }, { "epoch": 1.2033615448802002, "grad_norm": 3.279404401779175, "learning_rate": 3.4900336427167166e-05, "loss": 1.1641, "step": 20190 }, { "epoch": 1.2034807485993564, "grad_norm": 3.001415491104126, "learning_rate": 3.4891321309374135e-05, "loss": 1.1486, "step": 20192 }, { "epoch": 1.2035999523185124, "grad_norm": 3.083688735961914, "learning_rate": 3.488230673207026e-05, "loss": 1.2466, "step": 20194 }, { "epoch": 1.2037191560376683, "grad_norm": 3.0990731716156006, "learning_rate": 3.487329269557797e-05, "loss": 1.1715, "step": 20196 }, { "epoch": 1.2038383597568245, "grad_norm": 3.292428970336914, "learning_rate": 3.486427920021977e-05, "loss": 1.1019, "step": 20198 }, { "epoch": 1.2039575634759805, "grad_norm": 3.066326141357422, "learning_rate": 3.4855266246318094e-05, "loss": 1.1113, "step": 20200 }, { "epoch": 1.2040767671951365, "grad_norm": 3.204730987548828, "learning_rate": 3.484625383419535e-05, "loss": 1.2054, "step": 20202 }, { "epoch": 1.2041959709142924, "grad_norm": 3.207397699356079, "learning_rate": 3.483724196417396e-05, "loss": 1.1679, "step": 20204 }, { "epoch": 1.2043151746334486, "grad_norm": 3.3260414600372314, "learning_rate": 3.48282306365763e-05, "loss": 1.0995, "step": 20206 }, { "epoch": 1.2044343783526046, "grad_norm": 3.34911847114563, "learning_rate": 3.481921985172475e-05, "loss": 1.168, "step": 20208 }, { "epoch": 1.2045535820717606, "grad_norm": 2.8532259464263916, "learning_rate": 3.481020960994163e-05, "loss": 1.1037, "step": 20210 }, { "epoch": 1.2046727857909167, "grad_norm": 2.9102015495300293, "learning_rate": 3.480119991154929e-05, "loss": 1.0862, "step": 20212 }, { "epoch": 1.2047919895100727, "grad_norm": 3.0736472606658936, "learning_rate": 3.4792190756870045e-05, "loss": 1.1349, "step": 20214 }, { "epoch": 1.2049111932292287, "grad_norm": 3.315106153488159, "learning_rate": 3.478318214622616e-05, "loss": 1.3476, "step": 20216 }, { "epoch": 1.2050303969483849, "grad_norm": 2.9270758628845215, "learning_rate": 3.4774174079939905e-05, "loss": 1.0672, "step": 20218 }, { "epoch": 1.2051496006675408, "grad_norm": 3.0644001960754395, "learning_rate": 3.476516655833354e-05, "loss": 1.0267, "step": 20220 }, { "epoch": 1.2052688043866968, "grad_norm": 3.4748282432556152, "learning_rate": 3.475615958172931e-05, "loss": 1.1979, "step": 20222 }, { "epoch": 1.205388008105853, "grad_norm": 3.079989194869995, "learning_rate": 3.474715315044937e-05, "loss": 1.4034, "step": 20224 }, { "epoch": 1.205507211825009, "grad_norm": 2.7207930088043213, "learning_rate": 3.473814726481599e-05, "loss": 1.2317, "step": 20226 }, { "epoch": 1.205626415544165, "grad_norm": 3.139629364013672, "learning_rate": 3.472914192515128e-05, "loss": 1.1314, "step": 20228 }, { "epoch": 1.205745619263321, "grad_norm": 3.289158821105957, "learning_rate": 3.4720137131777416e-05, "loss": 1.1409, "step": 20230 }, { "epoch": 1.205864822982477, "grad_norm": 3.3134148120880127, "learning_rate": 3.4711132885016535e-05, "loss": 1.1868, "step": 20232 }, { "epoch": 1.205984026701633, "grad_norm": 2.8814902305603027, "learning_rate": 3.4702129185190724e-05, "loss": 1.1429, "step": 20234 }, { "epoch": 1.206103230420789, "grad_norm": 3.467174530029297, "learning_rate": 3.4693126032622105e-05, "loss": 1.144, "step": 20236 }, { "epoch": 1.2062224341399452, "grad_norm": 3.2177140712738037, "learning_rate": 3.468412342763273e-05, "loss": 1.1649, "step": 20238 }, { "epoch": 1.2063416378591012, "grad_norm": 3.2436559200286865, "learning_rate": 3.467512137054468e-05, "loss": 1.2519, "step": 20240 }, { "epoch": 1.2064608415782572, "grad_norm": 3.3388521671295166, "learning_rate": 3.4666119861679945e-05, "loss": 1.1507, "step": 20242 }, { "epoch": 1.2065800452974134, "grad_norm": 3.4943573474884033, "learning_rate": 3.465711890136058e-05, "loss": 1.1462, "step": 20244 }, { "epoch": 1.2066992490165693, "grad_norm": 2.82682466506958, "learning_rate": 3.464811848990859e-05, "loss": 1.0276, "step": 20246 }, { "epoch": 1.2068184527357253, "grad_norm": 2.915846347808838, "learning_rate": 3.46391186276459e-05, "loss": 1.2559, "step": 20248 }, { "epoch": 1.2069376564548815, "grad_norm": 2.8110673427581787, "learning_rate": 3.4630119314894504e-05, "loss": 1.0533, "step": 20250 }, { "epoch": 1.2070568601740375, "grad_norm": 3.144368886947632, "learning_rate": 3.462112055197632e-05, "loss": 1.2368, "step": 20252 }, { "epoch": 1.2071760638931934, "grad_norm": 3.1105098724365234, "learning_rate": 3.4612122339213284e-05, "loss": 1.2341, "step": 20254 }, { "epoch": 1.2072952676123494, "grad_norm": 3.2024593353271484, "learning_rate": 3.460312467692726e-05, "loss": 1.2464, "step": 20256 }, { "epoch": 1.2074144713315056, "grad_norm": 3.0662460327148438, "learning_rate": 3.4594127565440166e-05, "loss": 1.117, "step": 20258 }, { "epoch": 1.2075336750506616, "grad_norm": 3.162642002105713, "learning_rate": 3.4585131005073836e-05, "loss": 1.1181, "step": 20260 }, { "epoch": 1.2076528787698175, "grad_norm": 3.0445871353149414, "learning_rate": 3.45761349961501e-05, "loss": 1.1184, "step": 20262 }, { "epoch": 1.2077720824889737, "grad_norm": 3.02816104888916, "learning_rate": 3.4567139538990805e-05, "loss": 1.2135, "step": 20264 }, { "epoch": 1.2078912862081297, "grad_norm": 3.2111868858337402, "learning_rate": 3.4558144633917714e-05, "loss": 1.0824, "step": 20266 }, { "epoch": 1.2080104899272857, "grad_norm": 3.4151055812835693, "learning_rate": 3.4549150281252636e-05, "loss": 1.1526, "step": 20268 }, { "epoch": 1.2081296936464418, "grad_norm": 2.7874529361724854, "learning_rate": 3.4540156481317295e-05, "loss": 1.1905, "step": 20270 }, { "epoch": 1.2082488973655978, "grad_norm": 2.8830225467681885, "learning_rate": 3.453116323443349e-05, "loss": 1.159, "step": 20272 }, { "epoch": 1.2083681010847538, "grad_norm": 3.3618102073669434, "learning_rate": 3.4522170540922874e-05, "loss": 1.1984, "step": 20274 }, { "epoch": 1.20848730480391, "grad_norm": 3.185298442840576, "learning_rate": 3.451317840110718e-05, "loss": 1.1699, "step": 20276 }, { "epoch": 1.208606508523066, "grad_norm": 3.114957094192505, "learning_rate": 3.4504186815308096e-05, "loss": 1.2099, "step": 20278 }, { "epoch": 1.208725712242222, "grad_norm": 3.3125717639923096, "learning_rate": 3.449519578384725e-05, "loss": 1.1209, "step": 20280 }, { "epoch": 1.2088449159613779, "grad_norm": 2.7712175846099854, "learning_rate": 3.448620530704631e-05, "loss": 1.2328, "step": 20282 }, { "epoch": 1.208964119680534, "grad_norm": 3.0454940795898438, "learning_rate": 3.4477215385226875e-05, "loss": 1.2207, "step": 20284 }, { "epoch": 1.20908332339969, "grad_norm": 3.3349125385284424, "learning_rate": 3.446822601871057e-05, "loss": 1.2368, "step": 20286 }, { "epoch": 1.209202527118846, "grad_norm": 2.811469554901123, "learning_rate": 3.445923720781894e-05, "loss": 1.2506, "step": 20288 }, { "epoch": 1.2093217308380022, "grad_norm": 3.009932041168213, "learning_rate": 3.44502489528736e-05, "loss": 1.1634, "step": 20290 }, { "epoch": 1.2094409345571582, "grad_norm": 3.3560619354248047, "learning_rate": 3.4441261254196036e-05, "loss": 1.207, "step": 20292 }, { "epoch": 1.2095601382763141, "grad_norm": 3.362335681915283, "learning_rate": 3.4432274112107787e-05, "loss": 1.3321, "step": 20294 }, { "epoch": 1.2096793419954703, "grad_norm": 2.667893648147583, "learning_rate": 3.442328752693038e-05, "loss": 1.083, "step": 20296 }, { "epoch": 1.2097985457146263, "grad_norm": 2.956402540206909, "learning_rate": 3.441430149898525e-05, "loss": 1.1465, "step": 20298 }, { "epoch": 1.2099177494337823, "grad_norm": 3.031895160675049, "learning_rate": 3.440531602859389e-05, "loss": 1.1751, "step": 20300 }, { "epoch": 1.2100369531529385, "grad_norm": 2.898526668548584, "learning_rate": 3.4396331116077727e-05, "loss": 1.1396, "step": 20302 }, { "epoch": 1.2101561568720944, "grad_norm": 3.1423137187957764, "learning_rate": 3.4387346761758205e-05, "loss": 1.3886, "step": 20304 }, { "epoch": 1.2102753605912504, "grad_norm": 3.001528739929199, "learning_rate": 3.43783629659567e-05, "loss": 1.1981, "step": 20306 }, { "epoch": 1.2103945643104064, "grad_norm": 2.971193552017212, "learning_rate": 3.436937972899458e-05, "loss": 1.1318, "step": 20308 }, { "epoch": 1.2105137680295626, "grad_norm": 2.8902692794799805, "learning_rate": 3.436039705119326e-05, "loss": 1.1965, "step": 20310 }, { "epoch": 1.2106329717487185, "grad_norm": 3.279951333999634, "learning_rate": 3.435141493287401e-05, "loss": 1.1779, "step": 20312 }, { "epoch": 1.2107521754678745, "grad_norm": 3.308410882949829, "learning_rate": 3.434243337435822e-05, "loss": 1.2412, "step": 20314 }, { "epoch": 1.2108713791870307, "grad_norm": 3.402902603149414, "learning_rate": 3.433345237596714e-05, "loss": 1.2128, "step": 20316 }, { "epoch": 1.2109905829061867, "grad_norm": 2.982635974884033, "learning_rate": 3.4324471938022094e-05, "loss": 1.246, "step": 20318 }, { "epoch": 1.2111097866253426, "grad_norm": 3.0721518993377686, "learning_rate": 3.4315492060844306e-05, "loss": 1.1535, "step": 20320 }, { "epoch": 1.2112289903444988, "grad_norm": 3.386112928390503, "learning_rate": 3.430651274475503e-05, "loss": 1.0597, "step": 20322 }, { "epoch": 1.2113481940636548, "grad_norm": 3.3130693435668945, "learning_rate": 3.429753399007548e-05, "loss": 1.306, "step": 20324 }, { "epoch": 1.2114673977828108, "grad_norm": 3.204667329788208, "learning_rate": 3.428855579712687e-05, "loss": 1.3225, "step": 20326 }, { "epoch": 1.211586601501967, "grad_norm": 2.882751226425171, "learning_rate": 3.4279578166230395e-05, "loss": 1.0318, "step": 20328 }, { "epoch": 1.211705805221123, "grad_norm": 3.0685882568359375, "learning_rate": 3.427060109770717e-05, "loss": 1.2433, "step": 20330 }, { "epoch": 1.211825008940279, "grad_norm": 3.4618284702301025, "learning_rate": 3.426162459187837e-05, "loss": 1.1642, "step": 20332 }, { "epoch": 1.2119442126594349, "grad_norm": 3.3345017433166504, "learning_rate": 3.42526486490651e-05, "loss": 1.1912, "step": 20334 }, { "epoch": 1.212063416378591, "grad_norm": 2.841486930847168, "learning_rate": 3.424367326958848e-05, "loss": 1.1535, "step": 20336 }, { "epoch": 1.212182620097747, "grad_norm": 3.1952059268951416, "learning_rate": 3.423469845376958e-05, "loss": 1.179, "step": 20338 }, { "epoch": 1.212301823816903, "grad_norm": 3.1926653385162354, "learning_rate": 3.4225724201929435e-05, "loss": 1.1774, "step": 20340 }, { "epoch": 1.2124210275360592, "grad_norm": 3.3519716262817383, "learning_rate": 3.4216750514389133e-05, "loss": 1.1544, "step": 20342 }, { "epoch": 1.2125402312552152, "grad_norm": 3.03021502494812, "learning_rate": 3.4207777391469645e-05, "loss": 1.0691, "step": 20344 }, { "epoch": 1.2126594349743711, "grad_norm": 3.052847385406494, "learning_rate": 3.419880483349201e-05, "loss": 1.2376, "step": 20346 }, { "epoch": 1.2127786386935273, "grad_norm": 2.882446050643921, "learning_rate": 3.4189832840777165e-05, "loss": 1.2196, "step": 20348 }, { "epoch": 1.2128978424126833, "grad_norm": 2.820040702819824, "learning_rate": 3.418086141364612e-05, "loss": 1.1013, "step": 20350 }, { "epoch": 1.2130170461318392, "grad_norm": 3.0672008991241455, "learning_rate": 3.417189055241978e-05, "loss": 1.1569, "step": 20352 }, { "epoch": 1.2131362498509954, "grad_norm": 3.201216459274292, "learning_rate": 3.416292025741905e-05, "loss": 1.2152, "step": 20354 }, { "epoch": 1.2132554535701514, "grad_norm": 2.9020919799804688, "learning_rate": 3.415395052896487e-05, "loss": 1.0136, "step": 20356 }, { "epoch": 1.2133746572893074, "grad_norm": 3.155773639678955, "learning_rate": 3.4144981367378085e-05, "loss": 1.1878, "step": 20358 }, { "epoch": 1.2134938610084633, "grad_norm": 3.0929324626922607, "learning_rate": 3.413601277297957e-05, "loss": 1.0398, "step": 20360 }, { "epoch": 1.2136130647276195, "grad_norm": 3.2776551246643066, "learning_rate": 3.4127044746090155e-05, "loss": 1.4216, "step": 20362 }, { "epoch": 1.2137322684467755, "grad_norm": 4.530462265014648, "learning_rate": 3.411807728703066e-05, "loss": 1.194, "step": 20364 }, { "epoch": 1.2138514721659317, "grad_norm": 3.066274881362915, "learning_rate": 3.4109110396121886e-05, "loss": 1.1918, "step": 20366 }, { "epoch": 1.2139706758850877, "grad_norm": 3.291743755340576, "learning_rate": 3.4100144073684585e-05, "loss": 1.2635, "step": 20368 }, { "epoch": 1.2140898796042436, "grad_norm": 3.396804094314575, "learning_rate": 3.4091178320039544e-05, "loss": 1.155, "step": 20370 }, { "epoch": 1.2142090833233996, "grad_norm": 3.269118070602417, "learning_rate": 3.4082213135507476e-05, "loss": 1.1293, "step": 20372 }, { "epoch": 1.2143282870425558, "grad_norm": 3.1444623470306396, "learning_rate": 3.407324852040913e-05, "loss": 1.1896, "step": 20374 }, { "epoch": 1.2144474907617118, "grad_norm": 3.2770586013793945, "learning_rate": 3.4064284475065145e-05, "loss": 1.1377, "step": 20376 }, { "epoch": 1.2145666944808677, "grad_norm": 3.318735361099243, "learning_rate": 3.405532099979625e-05, "loss": 1.1593, "step": 20378 }, { "epoch": 1.214685898200024, "grad_norm": 2.898845911026001, "learning_rate": 3.4046358094923084e-05, "loss": 1.0655, "step": 20380 }, { "epoch": 1.21480510191918, "grad_norm": 3.2402379512786865, "learning_rate": 3.403739576076625e-05, "loss": 1.2865, "step": 20382 }, { "epoch": 1.2149243056383359, "grad_norm": 2.919499158859253, "learning_rate": 3.4028433997646394e-05, "loss": 1.0977, "step": 20384 }, { "epoch": 1.2150435093574918, "grad_norm": 3.450674533843994, "learning_rate": 3.401947280588409e-05, "loss": 1.2263, "step": 20386 }, { "epoch": 1.215162713076648, "grad_norm": 3.3102684020996094, "learning_rate": 3.4010512185799945e-05, "loss": 1.1895, "step": 20388 }, { "epoch": 1.215281916795804, "grad_norm": 3.284682512283325, "learning_rate": 3.400155213771446e-05, "loss": 1.1781, "step": 20390 }, { "epoch": 1.2154011205149602, "grad_norm": 2.9722728729248047, "learning_rate": 3.3992592661948214e-05, "loss": 1.2476, "step": 20392 }, { "epoch": 1.2155203242341162, "grad_norm": 2.7896320819854736, "learning_rate": 3.3983633758821684e-05, "loss": 1.0607, "step": 20394 }, { "epoch": 1.2156395279532721, "grad_norm": 2.778357744216919, "learning_rate": 3.397467542865538e-05, "loss": 1.0982, "step": 20396 }, { "epoch": 1.215758731672428, "grad_norm": 3.5364041328430176, "learning_rate": 3.396571767176978e-05, "loss": 1.1956, "step": 20398 }, { "epoch": 1.2158779353915843, "grad_norm": 3.1539127826690674, "learning_rate": 3.3956760488485294e-05, "loss": 1.271, "step": 20400 }, { "epoch": 1.2159971391107403, "grad_norm": 3.079392671585083, "learning_rate": 3.3947803879122386e-05, "loss": 1.1488, "step": 20402 }, { "epoch": 1.2161163428298962, "grad_norm": 3.4086573123931885, "learning_rate": 3.393884784400145e-05, "loss": 1.2013, "step": 20404 }, { "epoch": 1.2162355465490524, "grad_norm": 3.2639520168304443, "learning_rate": 3.3929892383442904e-05, "loss": 1.232, "step": 20406 }, { "epoch": 1.2163547502682084, "grad_norm": 3.7060539722442627, "learning_rate": 3.392093749776706e-05, "loss": 1.2522, "step": 20408 }, { "epoch": 1.2164739539873644, "grad_norm": 3.1926677227020264, "learning_rate": 3.391198318729431e-05, "loss": 1.082, "step": 20410 }, { "epoch": 1.2165931577065203, "grad_norm": 2.675872325897217, "learning_rate": 3.390302945234498e-05, "loss": 1.0491, "step": 20412 }, { "epoch": 1.2167123614256765, "grad_norm": 3.0090293884277344, "learning_rate": 3.3894076293239336e-05, "loss": 1.0802, "step": 20414 }, { "epoch": 1.2168315651448325, "grad_norm": 3.3304529190063477, "learning_rate": 3.38851237102977e-05, "loss": 1.2121, "step": 20416 }, { "epoch": 1.2169507688639887, "grad_norm": 3.170633316040039, "learning_rate": 3.387617170384032e-05, "loss": 1.2649, "step": 20418 }, { "epoch": 1.2170699725831446, "grad_norm": 3.098526954650879, "learning_rate": 3.386722027418746e-05, "loss": 1.1491, "step": 20420 }, { "epoch": 1.2171891763023006, "grad_norm": 3.0663938522338867, "learning_rate": 3.385826942165929e-05, "loss": 1.09, "step": 20422 }, { "epoch": 1.2173083800214566, "grad_norm": 3.3773322105407715, "learning_rate": 3.38493191465761e-05, "loss": 1.0786, "step": 20424 }, { "epoch": 1.2174275837406128, "grad_norm": 3.269118547439575, "learning_rate": 3.3840369449258005e-05, "loss": 1.2293, "step": 20426 }, { "epoch": 1.2175467874597687, "grad_norm": 3.1180994510650635, "learning_rate": 3.383142033002517e-05, "loss": 1.1963, "step": 20428 }, { "epoch": 1.2176659911789247, "grad_norm": 3.3128507137298584, "learning_rate": 3.382247178919777e-05, "loss": 1.121, "step": 20430 }, { "epoch": 1.217785194898081, "grad_norm": 3.189406156539917, "learning_rate": 3.381352382709589e-05, "loss": 1.3205, "step": 20432 }, { "epoch": 1.2179043986172369, "grad_norm": 3.396000385284424, "learning_rate": 3.380457644403966e-05, "loss": 1.1814, "step": 20434 }, { "epoch": 1.2180236023363928, "grad_norm": 3.022205114364624, "learning_rate": 3.379562964034913e-05, "loss": 1.0596, "step": 20436 }, { "epoch": 1.2181428060555488, "grad_norm": 3.0580384731292725, "learning_rate": 3.378668341634438e-05, "loss": 1.1417, "step": 20438 }, { "epoch": 1.218262009774705, "grad_norm": 3.1103410720825195, "learning_rate": 3.377773777234543e-05, "loss": 1.1286, "step": 20440 }, { "epoch": 1.218381213493861, "grad_norm": 2.9327728748321533, "learning_rate": 3.3768792708672296e-05, "loss": 1.0809, "step": 20442 }, { "epoch": 1.2185004172130172, "grad_norm": 2.936263084411621, "learning_rate": 3.375984822564501e-05, "loss": 1.1266, "step": 20444 }, { "epoch": 1.2186196209321731, "grad_norm": 3.6464264392852783, "learning_rate": 3.3750904323583486e-05, "loss": 1.1845, "step": 20446 }, { "epoch": 1.218738824651329, "grad_norm": 3.178013801574707, "learning_rate": 3.374196100280772e-05, "loss": 1.2777, "step": 20448 }, { "epoch": 1.218858028370485, "grad_norm": 3.5180206298828125, "learning_rate": 3.373301826363763e-05, "loss": 1.1932, "step": 20450 }, { "epoch": 1.2189772320896413, "grad_norm": 3.193283796310425, "learning_rate": 3.372407610639315e-05, "loss": 1.2145, "step": 20452 }, { "epoch": 1.2190964358087972, "grad_norm": 3.4791345596313477, "learning_rate": 3.3715134531394134e-05, "loss": 1.3473, "step": 20454 }, { "epoch": 1.2192156395279532, "grad_norm": 2.974790334701538, "learning_rate": 3.37061935389605e-05, "loss": 1.1765, "step": 20456 }, { "epoch": 1.2193348432471094, "grad_norm": 3.0980334281921387, "learning_rate": 3.369725312941206e-05, "loss": 1.2716, "step": 20458 }, { "epoch": 1.2194540469662654, "grad_norm": 2.939133405685425, "learning_rate": 3.368831330306864e-05, "loss": 1.1184, "step": 20460 }, { "epoch": 1.2195732506854213, "grad_norm": 2.963883876800537, "learning_rate": 3.367937406025009e-05, "loss": 1.2112, "step": 20462 }, { "epoch": 1.2196924544045775, "grad_norm": 3.030092716217041, "learning_rate": 3.3670435401276144e-05, "loss": 1.1197, "step": 20464 }, { "epoch": 1.2198116581237335, "grad_norm": 3.220219612121582, "learning_rate": 3.366149732646661e-05, "loss": 1.1172, "step": 20466 }, { "epoch": 1.2199308618428895, "grad_norm": 2.8696751594543457, "learning_rate": 3.3652559836141206e-05, "loss": 1.0665, "step": 20468 }, { "epoch": 1.2200500655620456, "grad_norm": 3.1956589221954346, "learning_rate": 3.364362293061969e-05, "loss": 1.1222, "step": 20470 }, { "epoch": 1.2201692692812016, "grad_norm": 3.0275564193725586, "learning_rate": 3.363468661022173e-05, "loss": 1.2017, "step": 20472 }, { "epoch": 1.2202884730003576, "grad_norm": 3.1316962242126465, "learning_rate": 3.3625750875267024e-05, "loss": 1.3598, "step": 20474 }, { "epoch": 1.2204076767195136, "grad_norm": 3.286036491394043, "learning_rate": 3.361681572607525e-05, "loss": 1.076, "step": 20476 }, { "epoch": 1.2205268804386697, "grad_norm": 3.4855310916900635, "learning_rate": 3.3607881162966004e-05, "loss": 1.0947, "step": 20478 }, { "epoch": 1.2206460841578257, "grad_norm": 2.8348400592803955, "learning_rate": 3.359894718625895e-05, "loss": 1.1453, "step": 20480 }, { "epoch": 1.2207652878769817, "grad_norm": 3.7618720531463623, "learning_rate": 3.3590013796273655e-05, "loss": 1.2588, "step": 20482 }, { "epoch": 1.2208844915961379, "grad_norm": 2.7966558933258057, "learning_rate": 3.358108099332974e-05, "loss": 1.2037, "step": 20484 }, { "epoch": 1.2210036953152938, "grad_norm": 3.1535286903381348, "learning_rate": 3.3572148777746724e-05, "loss": 1.1342, "step": 20486 }, { "epoch": 1.2211228990344498, "grad_norm": 3.440143346786499, "learning_rate": 3.356321714984414e-05, "loss": 1.1677, "step": 20488 }, { "epoch": 1.221242102753606, "grad_norm": 3.2863667011260986, "learning_rate": 3.355428610994152e-05, "loss": 1.1114, "step": 20490 }, { "epoch": 1.221361306472762, "grad_norm": 3.075556993484497, "learning_rate": 3.354535565835836e-05, "loss": 1.356, "step": 20492 }, { "epoch": 1.221480510191918, "grad_norm": 2.96228289604187, "learning_rate": 3.353642579541414e-05, "loss": 1.298, "step": 20494 }, { "epoch": 1.2215997139110741, "grad_norm": 3.0015578269958496, "learning_rate": 3.352749652142827e-05, "loss": 1.212, "step": 20496 }, { "epoch": 1.22171891763023, "grad_norm": 2.774440288543701, "learning_rate": 3.3518567836720224e-05, "loss": 1.0429, "step": 20498 }, { "epoch": 1.221838121349386, "grad_norm": 2.839714527130127, "learning_rate": 3.350963974160942e-05, "loss": 1.0625, "step": 20500 }, { "epoch": 1.221957325068542, "grad_norm": 2.9588851928710938, "learning_rate": 3.3500712236415185e-05, "loss": 1.1469, "step": 20502 }, { "epoch": 1.2220765287876982, "grad_norm": 3.231844663619995, "learning_rate": 3.349178532145695e-05, "loss": 1.3051, "step": 20504 }, { "epoch": 1.2221957325068542, "grad_norm": 3.0727691650390625, "learning_rate": 3.348285899705402e-05, "loss": 1.186, "step": 20506 }, { "epoch": 1.2223149362260102, "grad_norm": 3.2541513442993164, "learning_rate": 3.3473933263525755e-05, "loss": 1.1433, "step": 20508 }, { "epoch": 1.2224341399451664, "grad_norm": 3.0422017574310303, "learning_rate": 3.346500812119142e-05, "loss": 1.0789, "step": 20510 }, { "epoch": 1.2225533436643223, "grad_norm": 3.3925962448120117, "learning_rate": 3.3456083570370336e-05, "loss": 1.2155, "step": 20512 }, { "epoch": 1.2226725473834783, "grad_norm": 3.2421059608459473, "learning_rate": 3.344715961138173e-05, "loss": 1.1498, "step": 20514 }, { "epoch": 1.2227917511026345, "grad_norm": 3.1406614780426025, "learning_rate": 3.3438236244544876e-05, "loss": 1.2933, "step": 20516 }, { "epoch": 1.2229109548217905, "grad_norm": 3.0822906494140625, "learning_rate": 3.3429313470178974e-05, "loss": 1.1592, "step": 20518 }, { "epoch": 1.2230301585409464, "grad_norm": 3.424175500869751, "learning_rate": 3.342039128860321e-05, "loss": 1.2277, "step": 20520 }, { "epoch": 1.2231493622601026, "grad_norm": 3.2755684852600098, "learning_rate": 3.3411469700136786e-05, "loss": 1.1959, "step": 20522 }, { "epoch": 1.2232685659792586, "grad_norm": 3.1280386447906494, "learning_rate": 3.3402548705098844e-05, "loss": 1.1551, "step": 20524 }, { "epoch": 1.2233877696984146, "grad_norm": 3.1363136768341064, "learning_rate": 3.339362830380854e-05, "loss": 1.1803, "step": 20526 }, { "epoch": 1.2235069734175705, "grad_norm": 3.1842875480651855, "learning_rate": 3.338470849658495e-05, "loss": 1.1543, "step": 20528 }, { "epoch": 1.2236261771367267, "grad_norm": 3.33760929107666, "learning_rate": 3.33757892837472e-05, "loss": 1.1452, "step": 20530 }, { "epoch": 1.2237453808558827, "grad_norm": 3.0418403148651123, "learning_rate": 3.3366870665614345e-05, "loss": 1.241, "step": 20532 }, { "epoch": 1.2238645845750387, "grad_norm": 2.873358964920044, "learning_rate": 3.335795264250543e-05, "loss": 1.1187, "step": 20534 }, { "epoch": 1.2239837882941949, "grad_norm": 3.2761144638061523, "learning_rate": 3.3349035214739486e-05, "loss": 1.2053, "step": 20536 }, { "epoch": 1.2241029920133508, "grad_norm": 3.1903164386749268, "learning_rate": 3.334011838263552e-05, "loss": 1.0543, "step": 20538 }, { "epoch": 1.2242221957325068, "grad_norm": 3.559507131576538, "learning_rate": 3.333120214651254e-05, "loss": 1.3009, "step": 20540 }, { "epoch": 1.224341399451663, "grad_norm": 3.354508638381958, "learning_rate": 3.3322286506689464e-05, "loss": 1.2659, "step": 20542 }, { "epoch": 1.224460603170819, "grad_norm": 3.2656471729278564, "learning_rate": 3.3313371463485274e-05, "loss": 1.3006, "step": 20544 }, { "epoch": 1.224579806889975, "grad_norm": 3.1795554161071777, "learning_rate": 3.33044570172189e-05, "loss": 1.0919, "step": 20546 }, { "epoch": 1.2246990106091311, "grad_norm": 3.5823709964752197, "learning_rate": 3.32955431682092e-05, "loss": 1.1584, "step": 20548 }, { "epoch": 1.224818214328287, "grad_norm": 3.083350419998169, "learning_rate": 3.328662991677507e-05, "loss": 1.4101, "step": 20550 }, { "epoch": 1.224937418047443, "grad_norm": 2.8528988361358643, "learning_rate": 3.327771726323538e-05, "loss": 1.1653, "step": 20552 }, { "epoch": 1.225056621766599, "grad_norm": 3.028564929962158, "learning_rate": 3.3268805207908956e-05, "loss": 1.2141, "step": 20554 }, { "epoch": 1.2251758254857552, "grad_norm": 3.209577798843384, "learning_rate": 3.325989375111461e-05, "loss": 1.1371, "step": 20556 }, { "epoch": 1.2252950292049112, "grad_norm": 3.4080464839935303, "learning_rate": 3.325098289317116e-05, "loss": 1.2757, "step": 20558 }, { "epoch": 1.2254142329240671, "grad_norm": 3.575289011001587, "learning_rate": 3.324207263439733e-05, "loss": 1.3214, "step": 20560 }, { "epoch": 1.2255334366432233, "grad_norm": 3.2122082710266113, "learning_rate": 3.323316297511192e-05, "loss": 1.2215, "step": 20562 }, { "epoch": 1.2256526403623793, "grad_norm": 3.063241958618164, "learning_rate": 3.322425391563364e-05, "loss": 1.1064, "step": 20564 }, { "epoch": 1.2257718440815353, "grad_norm": 2.8127381801605225, "learning_rate": 3.3215345456281175e-05, "loss": 1.043, "step": 20566 }, { "epoch": 1.2258910478006915, "grad_norm": 3.050380229949951, "learning_rate": 3.320643759737325e-05, "loss": 1.0126, "step": 20568 }, { "epoch": 1.2260102515198474, "grad_norm": 3.0811846256256104, "learning_rate": 3.3197530339228487e-05, "loss": 1.1375, "step": 20570 }, { "epoch": 1.2261294552390034, "grad_norm": 3.2376346588134766, "learning_rate": 3.3188623682165586e-05, "loss": 1.1413, "step": 20572 }, { "epoch": 1.2262486589581596, "grad_norm": 3.3356878757476807, "learning_rate": 3.317971762650311e-05, "loss": 1.3627, "step": 20574 }, { "epoch": 1.2263678626773156, "grad_norm": 3.171692132949829, "learning_rate": 3.31708121725597e-05, "loss": 1.076, "step": 20576 }, { "epoch": 1.2264870663964715, "grad_norm": 3.494732618331909, "learning_rate": 3.3161907320653915e-05, "loss": 1.2101, "step": 20578 }, { "epoch": 1.2266062701156275, "grad_norm": 3.390601396560669, "learning_rate": 3.3153003071104303e-05, "loss": 1.2321, "step": 20580 }, { "epoch": 1.2267254738347837, "grad_norm": 3.2332911491394043, "learning_rate": 3.314409942422942e-05, "loss": 1.3414, "step": 20582 }, { "epoch": 1.2268446775539397, "grad_norm": 3.084076166152954, "learning_rate": 3.3135196380347775e-05, "loss": 1.1323, "step": 20584 }, { "epoch": 1.2269638812730956, "grad_norm": 3.232203483581543, "learning_rate": 3.312629393977787e-05, "loss": 1.2219, "step": 20586 }, { "epoch": 1.2270830849922518, "grad_norm": 2.9088621139526367, "learning_rate": 3.311739210283813e-05, "loss": 1.2112, "step": 20588 }, { "epoch": 1.2272022887114078, "grad_norm": 2.83658504486084, "learning_rate": 3.310849086984707e-05, "loss": 1.0456, "step": 20590 }, { "epoch": 1.2273214924305638, "grad_norm": 3.1464297771453857, "learning_rate": 3.309959024112308e-05, "loss": 1.3137, "step": 20592 }, { "epoch": 1.22744069614972, "grad_norm": 3.0320050716400146, "learning_rate": 3.3090690216984556e-05, "loss": 1.1108, "step": 20594 }, { "epoch": 1.227559899868876, "grad_norm": 3.2578999996185303, "learning_rate": 3.308179079774991e-05, "loss": 1.1501, "step": 20596 }, { "epoch": 1.227679103588032, "grad_norm": 3.350775718688965, "learning_rate": 3.307289198373749e-05, "loss": 1.2763, "step": 20598 }, { "epoch": 1.227798307307188, "grad_norm": 2.916078805923462, "learning_rate": 3.3063993775265636e-05, "loss": 1.1992, "step": 20600 }, { "epoch": 1.227917511026344, "grad_norm": 3.3738930225372314, "learning_rate": 3.305509617265266e-05, "loss": 1.1798, "step": 20602 }, { "epoch": 1.2280367147455, "grad_norm": 3.113865613937378, "learning_rate": 3.3046199176216896e-05, "loss": 1.2886, "step": 20604 }, { "epoch": 1.228155918464656, "grad_norm": 3.078596830368042, "learning_rate": 3.3037302786276584e-05, "loss": 0.9944, "step": 20606 }, { "epoch": 1.2282751221838122, "grad_norm": 3.2823662757873535, "learning_rate": 3.3028407003149976e-05, "loss": 1.2033, "step": 20608 }, { "epoch": 1.2283943259029682, "grad_norm": 3.1019647121429443, "learning_rate": 3.301951182715534e-05, "loss": 1.2055, "step": 20610 }, { "epoch": 1.2285135296221241, "grad_norm": 2.9350597858428955, "learning_rate": 3.3010617258610846e-05, "loss": 1.0569, "step": 20612 }, { "epoch": 1.2286327333412803, "grad_norm": 3.101907253265381, "learning_rate": 3.300172329783471e-05, "loss": 1.1715, "step": 20614 }, { "epoch": 1.2287519370604363, "grad_norm": 3.2273988723754883, "learning_rate": 3.299282994514508e-05, "loss": 1.2876, "step": 20616 }, { "epoch": 1.2288711407795923, "grad_norm": 3.2671611309051514, "learning_rate": 3.2983937200860124e-05, "loss": 1.2338, "step": 20618 }, { "epoch": 1.2289903444987484, "grad_norm": 3.1525018215179443, "learning_rate": 3.2975045065297924e-05, "loss": 1.1998, "step": 20620 }, { "epoch": 1.2291095482179044, "grad_norm": 2.7827351093292236, "learning_rate": 3.296615353877666e-05, "loss": 1.1569, "step": 20622 }, { "epoch": 1.2292287519370604, "grad_norm": 3.481084108352661, "learning_rate": 3.295726262161434e-05, "loss": 1.2983, "step": 20624 }, { "epoch": 1.2293479556562166, "grad_norm": 2.901320457458496, "learning_rate": 3.294837231412904e-05, "loss": 1.2392, "step": 20626 }, { "epoch": 1.2294671593753725, "grad_norm": 3.283059597015381, "learning_rate": 3.293948261663883e-05, "loss": 1.1852, "step": 20628 }, { "epoch": 1.2295863630945285, "grad_norm": 3.194833517074585, "learning_rate": 3.293059352946167e-05, "loss": 1.1245, "step": 20630 }, { "epoch": 1.2297055668136845, "grad_norm": 3.3010339736938477, "learning_rate": 3.29217050529156e-05, "loss": 1.1668, "step": 20632 }, { "epoch": 1.2298247705328407, "grad_norm": 3.246389389038086, "learning_rate": 3.291281718731855e-05, "loss": 1.0526, "step": 20634 }, { "epoch": 1.2299439742519966, "grad_norm": 2.9413952827453613, "learning_rate": 3.290392993298852e-05, "loss": 1.058, "step": 20636 }, { "epoch": 1.2300631779711526, "grad_norm": 3.379206657409668, "learning_rate": 3.28950432902434e-05, "loss": 1.3299, "step": 20638 }, { "epoch": 1.2301823816903088, "grad_norm": 3.363706588745117, "learning_rate": 3.2886157259401095e-05, "loss": 1.2822, "step": 20640 }, { "epoch": 1.2303015854094648, "grad_norm": 2.993698835372925, "learning_rate": 3.2877271840779525e-05, "loss": 1.1934, "step": 20642 }, { "epoch": 1.2304207891286207, "grad_norm": 2.976536273956299, "learning_rate": 3.28683870346965e-05, "loss": 1.1647, "step": 20644 }, { "epoch": 1.230539992847777, "grad_norm": 2.8385274410247803, "learning_rate": 3.28595028414699e-05, "loss": 1.1131, "step": 20646 }, { "epoch": 1.230659196566933, "grad_norm": 2.977747678756714, "learning_rate": 3.2850619261417514e-05, "loss": 1.1267, "step": 20648 }, { "epoch": 1.2307784002860889, "grad_norm": 2.881298303604126, "learning_rate": 3.2841736294857176e-05, "loss": 1.0429, "step": 20650 }, { "epoch": 1.230897604005245, "grad_norm": 3.251544713973999, "learning_rate": 3.283285394210662e-05, "loss": 1.1991, "step": 20652 }, { "epoch": 1.231016807724401, "grad_norm": 2.987964630126953, "learning_rate": 3.282397220348362e-05, "loss": 1.0851, "step": 20654 }, { "epoch": 1.231136011443557, "grad_norm": 3.2911102771759033, "learning_rate": 3.28150910793059e-05, "loss": 1.2055, "step": 20656 }, { "epoch": 1.231255215162713, "grad_norm": 3.3044967651367188, "learning_rate": 3.280621056989116e-05, "loss": 1.2867, "step": 20658 }, { "epoch": 1.2313744188818692, "grad_norm": 3.0815587043762207, "learning_rate": 3.279733067555712e-05, "loss": 1.3097, "step": 20660 }, { "epoch": 1.2314936226010251, "grad_norm": 3.196460723876953, "learning_rate": 3.27884513966214e-05, "loss": 1.1927, "step": 20662 }, { "epoch": 1.231612826320181, "grad_norm": 3.425832509994507, "learning_rate": 3.277957273340167e-05, "loss": 1.2061, "step": 20664 }, { "epoch": 1.2317320300393373, "grad_norm": 3.2721478939056396, "learning_rate": 3.2770694686215555e-05, "loss": 1.2325, "step": 20666 }, { "epoch": 1.2318512337584933, "grad_norm": 2.8808796405792236, "learning_rate": 3.276181725538062e-05, "loss": 1.0816, "step": 20668 }, { "epoch": 1.2319704374776492, "grad_norm": 3.092179298400879, "learning_rate": 3.275294044121447e-05, "loss": 1.0439, "step": 20670 }, { "epoch": 1.2320896411968054, "grad_norm": 3.2337191104888916, "learning_rate": 3.2744064244034636e-05, "loss": 1.2396, "step": 20672 }, { "epoch": 1.2322088449159614, "grad_norm": 2.9896864891052246, "learning_rate": 3.273518866415869e-05, "loss": 1.2015, "step": 20674 }, { "epoch": 1.2323280486351174, "grad_norm": 3.3411221504211426, "learning_rate": 3.27263137019041e-05, "loss": 1.1756, "step": 20676 }, { "epoch": 1.2324472523542735, "grad_norm": 2.7856664657592773, "learning_rate": 3.2717439357588375e-05, "loss": 1.1041, "step": 20678 }, { "epoch": 1.2325664560734295, "grad_norm": 3.3279078006744385, "learning_rate": 3.2708565631528966e-05, "loss": 1.275, "step": 20680 }, { "epoch": 1.2326856597925855, "grad_norm": 3.134615898132324, "learning_rate": 3.2699692524043346e-05, "loss": 1.0909, "step": 20682 }, { "epoch": 1.2328048635117415, "grad_norm": 3.434630870819092, "learning_rate": 3.269082003544891e-05, "loss": 1.1545, "step": 20684 }, { "epoch": 1.2329240672308976, "grad_norm": 3.2137577533721924, "learning_rate": 3.2681948166063046e-05, "loss": 1.2257, "step": 20686 }, { "epoch": 1.2330432709500536, "grad_norm": 2.9224395751953125, "learning_rate": 3.267307691620317e-05, "loss": 1.2142, "step": 20688 }, { "epoch": 1.2331624746692096, "grad_norm": 3.0434107780456543, "learning_rate": 3.2664206286186596e-05, "loss": 1.1705, "step": 20690 }, { "epoch": 1.2332816783883658, "grad_norm": 3.2920243740081787, "learning_rate": 3.265533627633071e-05, "loss": 1.0808, "step": 20692 }, { "epoch": 1.2334008821075217, "grad_norm": 3.142596960067749, "learning_rate": 3.2646466886952756e-05, "loss": 1.2951, "step": 20694 }, { "epoch": 1.2335200858266777, "grad_norm": 2.981618881225586, "learning_rate": 3.263759811837007e-05, "loss": 1.1199, "step": 20696 }, { "epoch": 1.233639289545834, "grad_norm": 3.1136255264282227, "learning_rate": 3.2628729970899916e-05, "loss": 1.1549, "step": 20698 }, { "epoch": 1.2337584932649899, "grad_norm": 3.1976542472839355, "learning_rate": 3.261986244485951e-05, "loss": 1.221, "step": 20700 }, { "epoch": 1.2338776969841458, "grad_norm": 3.55218505859375, "learning_rate": 3.261099554056609e-05, "loss": 1.1536, "step": 20702 }, { "epoch": 1.233996900703302, "grad_norm": 3.3208959102630615, "learning_rate": 3.260212925833685e-05, "loss": 1.1876, "step": 20704 }, { "epoch": 1.234116104422458, "grad_norm": 3.098127603530884, "learning_rate": 3.2593263598489e-05, "loss": 1.1112, "step": 20706 }, { "epoch": 1.234235308141614, "grad_norm": 2.9243223667144775, "learning_rate": 3.258439856133964e-05, "loss": 1.0818, "step": 20708 }, { "epoch": 1.23435451186077, "grad_norm": 3.1182034015655518, "learning_rate": 3.257553414720594e-05, "loss": 1.0848, "step": 20710 }, { "epoch": 1.2344737155799261, "grad_norm": 3.004079580307007, "learning_rate": 3.2566670356405015e-05, "loss": 1.1617, "step": 20712 }, { "epoch": 1.234592919299082, "grad_norm": 3.480921745300293, "learning_rate": 3.2557807189253914e-05, "loss": 1.242, "step": 20714 }, { "epoch": 1.234712123018238, "grad_norm": 3.3662374019622803, "learning_rate": 3.254894464606974e-05, "loss": 1.113, "step": 20716 }, { "epoch": 1.2348313267373943, "grad_norm": 3.4252803325653076, "learning_rate": 3.254008272716951e-05, "loss": 1.1742, "step": 20718 }, { "epoch": 1.2349505304565502, "grad_norm": 3.2129077911376953, "learning_rate": 3.2531221432870274e-05, "loss": 1.2539, "step": 20720 }, { "epoch": 1.2350697341757062, "grad_norm": 3.144665241241455, "learning_rate": 3.2522360763489006e-05, "loss": 1.1163, "step": 20722 }, { "epoch": 1.2351889378948624, "grad_norm": 3.030500650405884, "learning_rate": 3.2513500719342717e-05, "loss": 1.0768, "step": 20724 }, { "epoch": 1.2353081416140184, "grad_norm": 3.2284839153289795, "learning_rate": 3.250464130074832e-05, "loss": 1.048, "step": 20726 }, { "epoch": 1.2354273453331743, "grad_norm": 2.9253077507019043, "learning_rate": 3.2495782508022754e-05, "loss": 1.0306, "step": 20728 }, { "epoch": 1.2355465490523305, "grad_norm": 3.3749449253082275, "learning_rate": 3.248692434148296e-05, "loss": 1.1171, "step": 20730 }, { "epoch": 1.2356657527714865, "grad_norm": 3.5594334602355957, "learning_rate": 3.247806680144578e-05, "loss": 1.392, "step": 20732 }, { "epoch": 1.2357849564906425, "grad_norm": 3.2697908878326416, "learning_rate": 3.246920988822811e-05, "loss": 1.2447, "step": 20734 }, { "epoch": 1.2359041602097984, "grad_norm": 3.52250337600708, "learning_rate": 3.2460353602146774e-05, "loss": 1.2343, "step": 20736 }, { "epoch": 1.2360233639289546, "grad_norm": 3.339881181716919, "learning_rate": 3.245149794351863e-05, "loss": 1.1723, "step": 20738 }, { "epoch": 1.2361425676481106, "grad_norm": 2.8397300243377686, "learning_rate": 3.244264291266042e-05, "loss": 1.0499, "step": 20740 }, { "epoch": 1.2362617713672668, "grad_norm": 3.1529152393341064, "learning_rate": 3.243378850988895e-05, "loss": 1.1004, "step": 20742 }, { "epoch": 1.2363809750864228, "grad_norm": 3.467627763748169, "learning_rate": 3.242493473552098e-05, "loss": 1.1724, "step": 20744 }, { "epoch": 1.2365001788055787, "grad_norm": 3.2900807857513428, "learning_rate": 3.241608158987321e-05, "loss": 1.1957, "step": 20746 }, { "epoch": 1.2366193825247347, "grad_norm": 3.1849114894866943, "learning_rate": 3.240722907326237e-05, "loss": 1.2672, "step": 20748 }, { "epoch": 1.2367385862438909, "grad_norm": 3.1053667068481445, "learning_rate": 3.239837718600513e-05, "loss": 1.1051, "step": 20750 }, { "epoch": 1.2368577899630468, "grad_norm": 3.287294864654541, "learning_rate": 3.238952592841818e-05, "loss": 1.2638, "step": 20752 }, { "epoch": 1.2369769936822028, "grad_norm": 3.102712392807007, "learning_rate": 3.238067530081811e-05, "loss": 1.1246, "step": 20754 }, { "epoch": 1.237096197401359, "grad_norm": 3.1447594165802, "learning_rate": 3.237182530352161e-05, "loss": 1.1975, "step": 20756 }, { "epoch": 1.237215401120515, "grad_norm": 3.3120172023773193, "learning_rate": 3.2362975936845206e-05, "loss": 1.1932, "step": 20758 }, { "epoch": 1.237334604839671, "grad_norm": 3.460688352584839, "learning_rate": 3.23541272011055e-05, "loss": 1.1776, "step": 20760 }, { "epoch": 1.237453808558827, "grad_norm": 2.664362668991089, "learning_rate": 3.2345279096619055e-05, "loss": 1.2102, "step": 20762 }, { "epoch": 1.237573012277983, "grad_norm": 3.3460636138916016, "learning_rate": 3.233643162370236e-05, "loss": 1.181, "step": 20764 }, { "epoch": 1.237692215997139, "grad_norm": 3.0117228031158447, "learning_rate": 3.232758478267196e-05, "loss": 1.3031, "step": 20766 }, { "epoch": 1.2378114197162953, "grad_norm": 3.3798162937164307, "learning_rate": 3.23187385738443e-05, "loss": 1.1687, "step": 20768 }, { "epoch": 1.2379306234354512, "grad_norm": 3.106981039047241, "learning_rate": 3.2309892997535874e-05, "loss": 1.2829, "step": 20770 }, { "epoch": 1.2380498271546072, "grad_norm": 3.2703399658203125, "learning_rate": 3.23010480540631e-05, "loss": 1.0904, "step": 20772 }, { "epoch": 1.2381690308737632, "grad_norm": 3.118708372116089, "learning_rate": 3.229220374374237e-05, "loss": 1.2847, "step": 20774 }, { "epoch": 1.2382882345929194, "grad_norm": 2.9581878185272217, "learning_rate": 3.228336006689013e-05, "loss": 1.3639, "step": 20776 }, { "epoch": 1.2384074383120753, "grad_norm": 3.2795302867889404, "learning_rate": 3.227451702382269e-05, "loss": 1.1112, "step": 20778 }, { "epoch": 1.2385266420312313, "grad_norm": 3.470715284347534, "learning_rate": 3.2265674614856447e-05, "loss": 1.1552, "step": 20780 }, { "epoch": 1.2386458457503875, "grad_norm": 3.1822471618652344, "learning_rate": 3.225683284030767e-05, "loss": 1.143, "step": 20782 }, { "epoch": 1.2387650494695435, "grad_norm": 3.006016492843628, "learning_rate": 3.224799170049273e-05, "loss": 1.0841, "step": 20784 }, { "epoch": 1.2388842531886994, "grad_norm": 3.232494831085205, "learning_rate": 3.223915119572782e-05, "loss": 1.1055, "step": 20786 }, { "epoch": 1.2390034569078554, "grad_norm": 3.0392680168151855, "learning_rate": 3.2230311326329274e-05, "loss": 1.1795, "step": 20788 }, { "epoch": 1.2391226606270116, "grad_norm": 3.4148521423339844, "learning_rate": 3.222147209261328e-05, "loss": 1.3786, "step": 20790 }, { "epoch": 1.2392418643461676, "grad_norm": 3.0216376781463623, "learning_rate": 3.221263349489605e-05, "loss": 1.22, "step": 20792 }, { "epoch": 1.2393610680653238, "grad_norm": 2.8782899379730225, "learning_rate": 3.22037955334938e-05, "loss": 1.0268, "step": 20794 }, { "epoch": 1.2394802717844797, "grad_norm": 3.013232946395874, "learning_rate": 3.219495820872265e-05, "loss": 1.1055, "step": 20796 }, { "epoch": 1.2395994755036357, "grad_norm": 3.0503783226013184, "learning_rate": 3.218612152089878e-05, "loss": 1.0908, "step": 20798 }, { "epoch": 1.2397186792227917, "grad_norm": 3.541567087173462, "learning_rate": 3.217728547033827e-05, "loss": 1.2379, "step": 20800 }, { "epoch": 1.2398378829419479, "grad_norm": 3.6671972274780273, "learning_rate": 3.216845005735727e-05, "loss": 1.2742, "step": 20802 }, { "epoch": 1.2399570866611038, "grad_norm": 3.3946924209594727, "learning_rate": 3.215961528227181e-05, "loss": 1.1844, "step": 20804 }, { "epoch": 1.2400762903802598, "grad_norm": 3.1318962574005127, "learning_rate": 3.2150781145397934e-05, "loss": 1.1952, "step": 20806 }, { "epoch": 1.240195494099416, "grad_norm": 3.1863150596618652, "learning_rate": 3.214194764705172e-05, "loss": 1.3756, "step": 20808 }, { "epoch": 1.240314697818572, "grad_norm": 3.1804800033569336, "learning_rate": 3.213311478754911e-05, "loss": 1.1867, "step": 20810 }, { "epoch": 1.240433901537728, "grad_norm": 3.614575147628784, "learning_rate": 3.2124282567206124e-05, "loss": 1.4239, "step": 20812 }, { "epoch": 1.240553105256884, "grad_norm": 3.0070950984954834, "learning_rate": 3.21154509863387e-05, "loss": 1.1633, "step": 20814 }, { "epoch": 1.24067230897604, "grad_norm": 3.136805295944214, "learning_rate": 3.2106620045262815e-05, "loss": 1.1518, "step": 20816 }, { "epoch": 1.240791512695196, "grad_norm": 2.7686376571655273, "learning_rate": 3.2097789744294335e-05, "loss": 1.0851, "step": 20818 }, { "epoch": 1.2409107164143522, "grad_norm": 3.506782054901123, "learning_rate": 3.208896008374916e-05, "loss": 1.2265, "step": 20820 }, { "epoch": 1.2410299201335082, "grad_norm": 2.917909622192383, "learning_rate": 3.208013106394318e-05, "loss": 1.1537, "step": 20822 }, { "epoch": 1.2411491238526642, "grad_norm": 3.0596771240234375, "learning_rate": 3.20713026851922e-05, "loss": 1.205, "step": 20824 }, { "epoch": 1.2412683275718202, "grad_norm": 2.9796812534332275, "learning_rate": 3.2062474947812096e-05, "loss": 1.1243, "step": 20826 }, { "epoch": 1.2413875312909763, "grad_norm": 3.3568527698516846, "learning_rate": 3.205364785211862e-05, "loss": 1.1775, "step": 20828 }, { "epoch": 1.2415067350101323, "grad_norm": 2.801192283630371, "learning_rate": 3.204482139842756e-05, "loss": 0.9953, "step": 20830 }, { "epoch": 1.2416259387292883, "grad_norm": 3.015524387359619, "learning_rate": 3.203599558705469e-05, "loss": 1.1801, "step": 20832 }, { "epoch": 1.2417451424484445, "grad_norm": 3.1770288944244385, "learning_rate": 3.20271704183157e-05, "loss": 1.1809, "step": 20834 }, { "epoch": 1.2418643461676004, "grad_norm": 3.261157751083374, "learning_rate": 3.201834589252632e-05, "loss": 1.3125, "step": 20836 }, { "epoch": 1.2419835498867564, "grad_norm": 3.255066394805908, "learning_rate": 3.200952201000224e-05, "loss": 1.2077, "step": 20838 }, { "epoch": 1.2421027536059124, "grad_norm": 3.0299994945526123, "learning_rate": 3.2000698771059114e-05, "loss": 1.1928, "step": 20840 }, { "epoch": 1.2422219573250686, "grad_norm": 3.064972162246704, "learning_rate": 3.199187617601256e-05, "loss": 1.3228, "step": 20842 }, { "epoch": 1.2423411610442245, "grad_norm": 3.3591549396514893, "learning_rate": 3.198305422517822e-05, "loss": 1.2522, "step": 20844 }, { "epoch": 1.2424603647633807, "grad_norm": 3.378911018371582, "learning_rate": 3.1974232918871665e-05, "loss": 1.178, "step": 20846 }, { "epoch": 1.2425795684825367, "grad_norm": 3.2323176860809326, "learning_rate": 3.19654122574085e-05, "loss": 1.1843, "step": 20848 }, { "epoch": 1.2426987722016927, "grad_norm": 2.91328763961792, "learning_rate": 3.1956592241104225e-05, "loss": 1.0752, "step": 20850 }, { "epoch": 1.2428179759208486, "grad_norm": 3.4265332221984863, "learning_rate": 3.1947772870274375e-05, "loss": 1.2593, "step": 20852 }, { "epoch": 1.2429371796400048, "grad_norm": 3.0374584197998047, "learning_rate": 3.193895414523446e-05, "loss": 1.1474, "step": 20854 }, { "epoch": 1.2430563833591608, "grad_norm": 3.2537567615509033, "learning_rate": 3.1930136066299945e-05, "loss": 1.2706, "step": 20856 }, { "epoch": 1.2431755870783168, "grad_norm": 3.2967963218688965, "learning_rate": 3.19213186337863e-05, "loss": 1.1416, "step": 20858 }, { "epoch": 1.243294790797473, "grad_norm": 2.9185831546783447, "learning_rate": 3.191250184800893e-05, "loss": 1.1404, "step": 20860 }, { "epoch": 1.243413994516629, "grad_norm": 3.2380127906799316, "learning_rate": 3.190368570928326e-05, "loss": 1.2372, "step": 20862 }, { "epoch": 1.243533198235785, "grad_norm": 2.874072313308716, "learning_rate": 3.189487021792469e-05, "loss": 1.0036, "step": 20864 }, { "epoch": 1.243652401954941, "grad_norm": 2.8806262016296387, "learning_rate": 3.188605537424853e-05, "loss": 1.2508, "step": 20866 }, { "epoch": 1.243771605674097, "grad_norm": 3.2283596992492676, "learning_rate": 3.187724117857015e-05, "loss": 1.2762, "step": 20868 }, { "epoch": 1.243890809393253, "grad_norm": 3.334977865219116, "learning_rate": 3.1868427631204855e-05, "loss": 1.2412, "step": 20870 }, { "epoch": 1.2440100131124092, "grad_norm": 3.062391996383667, "learning_rate": 3.1859614732467954e-05, "loss": 1.2303, "step": 20872 }, { "epoch": 1.2441292168315652, "grad_norm": 3.190532922744751, "learning_rate": 3.1850802482674693e-05, "loss": 1.2182, "step": 20874 }, { "epoch": 1.2442484205507212, "grad_norm": 3.313812017440796, "learning_rate": 3.1841990882140325e-05, "loss": 1.1866, "step": 20876 }, { "epoch": 1.2443676242698771, "grad_norm": 3.3626649379730225, "learning_rate": 3.183317993118009e-05, "loss": 1.2908, "step": 20878 }, { "epoch": 1.2444868279890333, "grad_norm": 3.2093029022216797, "learning_rate": 3.182436963010913e-05, "loss": 1.0789, "step": 20880 }, { "epoch": 1.2446060317081893, "grad_norm": 3.193743944168091, "learning_rate": 3.181555997924268e-05, "loss": 1.0312, "step": 20882 }, { "epoch": 1.2447252354273453, "grad_norm": 3.334170341491699, "learning_rate": 3.180675097889585e-05, "loss": 1.3048, "step": 20884 }, { "epoch": 1.2448444391465014, "grad_norm": 3.1979458332061768, "learning_rate": 3.179794262938379e-05, "loss": 1.1588, "step": 20886 }, { "epoch": 1.2449636428656574, "grad_norm": 3.424381732940674, "learning_rate": 3.17891349310216e-05, "loss": 1.2194, "step": 20888 }, { "epoch": 1.2450828465848134, "grad_norm": 3.267131805419922, "learning_rate": 3.1780327884124376e-05, "loss": 1.1762, "step": 20890 }, { "epoch": 1.2452020503039696, "grad_norm": 3.1860458850860596, "learning_rate": 3.177152148900715e-05, "loss": 1.2637, "step": 20892 }, { "epoch": 1.2453212540231255, "grad_norm": 3.6581554412841797, "learning_rate": 3.176271574598497e-05, "loss": 1.2328, "step": 20894 }, { "epoch": 1.2454404577422815, "grad_norm": 3.3139731884002686, "learning_rate": 3.175391065537285e-05, "loss": 1.2468, "step": 20896 }, { "epoch": 1.2455596614614377, "grad_norm": 3.3086094856262207, "learning_rate": 3.1745106217485764e-05, "loss": 1.0923, "step": 20898 }, { "epoch": 1.2456788651805937, "grad_norm": 3.244166612625122, "learning_rate": 3.1736302432638695e-05, "loss": 1.1769, "step": 20900 }, { "epoch": 1.2457980688997496, "grad_norm": 3.304036855697632, "learning_rate": 3.172749930114657e-05, "loss": 1.1799, "step": 20902 }, { "epoch": 1.2459172726189056, "grad_norm": 2.613598346710205, "learning_rate": 3.1718696823324326e-05, "loss": 1.0629, "step": 20904 }, { "epoch": 1.2460364763380618, "grad_norm": 3.2385752201080322, "learning_rate": 3.1709894999486826e-05, "loss": 1.1583, "step": 20906 }, { "epoch": 1.2461556800572178, "grad_norm": 3.412844181060791, "learning_rate": 3.170109382994898e-05, "loss": 1.198, "step": 20908 }, { "epoch": 1.2462748837763737, "grad_norm": 3.1406705379486084, "learning_rate": 3.1692293315025616e-05, "loss": 1.1969, "step": 20910 }, { "epoch": 1.24639408749553, "grad_norm": 3.3253121376037598, "learning_rate": 3.1683493455031544e-05, "loss": 1.0499, "step": 20912 }, { "epoch": 1.246513291214686, "grad_norm": 3.327425241470337, "learning_rate": 3.167469425028159e-05, "loss": 1.1403, "step": 20914 }, { "epoch": 1.2466324949338419, "grad_norm": 3.0620317459106445, "learning_rate": 3.1665895701090516e-05, "loss": 1.2104, "step": 20916 }, { "epoch": 1.246751698652998, "grad_norm": 2.982875347137451, "learning_rate": 3.165709780777308e-05, "loss": 1.1512, "step": 20918 }, { "epoch": 1.246870902372154, "grad_norm": 3.213540554046631, "learning_rate": 3.1648300570644016e-05, "loss": 1.1428, "step": 20920 }, { "epoch": 1.24699010609131, "grad_norm": 2.910566568374634, "learning_rate": 3.163950399001805e-05, "loss": 1.0238, "step": 20922 }, { "epoch": 1.2471093098104662, "grad_norm": 3.194608449935913, "learning_rate": 3.163070806620984e-05, "loss": 1.1376, "step": 20924 }, { "epoch": 1.2472285135296222, "grad_norm": 2.799060106277466, "learning_rate": 3.162191279953403e-05, "loss": 0.999, "step": 20926 }, { "epoch": 1.2473477172487781, "grad_norm": 3.1280765533447266, "learning_rate": 3.1613118190305304e-05, "loss": 1.1605, "step": 20928 }, { "epoch": 1.247466920967934, "grad_norm": 3.1398744583129883, "learning_rate": 3.1604324238838237e-05, "loss": 1.1102, "step": 20930 }, { "epoch": 1.2475861246870903, "grad_norm": 3.089301824569702, "learning_rate": 3.159553094544743e-05, "loss": 1.1245, "step": 20932 }, { "epoch": 1.2477053284062463, "grad_norm": 3.261359214782715, "learning_rate": 3.158673831044745e-05, "loss": 1.2504, "step": 20934 }, { "epoch": 1.2478245321254022, "grad_norm": 3.245281934738159, "learning_rate": 3.1577946334152865e-05, "loss": 1.2724, "step": 20936 }, { "epoch": 1.2479437358445584, "grad_norm": 3.240172863006592, "learning_rate": 3.156915501687815e-05, "loss": 1.2721, "step": 20938 }, { "epoch": 1.2480629395637144, "grad_norm": 2.5252678394317627, "learning_rate": 3.156036435893781e-05, "loss": 1.1945, "step": 20940 }, { "epoch": 1.2481821432828704, "grad_norm": 3.225022554397583, "learning_rate": 3.1551574360646355e-05, "loss": 1.1383, "step": 20942 }, { "epoch": 1.2483013470020266, "grad_norm": 3.1963016986846924, "learning_rate": 3.1542785022318184e-05, "loss": 1.1023, "step": 20944 }, { "epoch": 1.2484205507211825, "grad_norm": 2.876739740371704, "learning_rate": 3.1533996344267755e-05, "loss": 1.2072, "step": 20946 }, { "epoch": 1.2485397544403385, "grad_norm": 3.171152353286743, "learning_rate": 3.1525208326809444e-05, "loss": 1.1587, "step": 20948 }, { "epoch": 1.2486589581594947, "grad_norm": 3.216702938079834, "learning_rate": 3.151642097025765e-05, "loss": 1.1003, "step": 20950 }, { "epoch": 1.2487781618786506, "grad_norm": 3.5365922451019287, "learning_rate": 3.150763427492673e-05, "loss": 1.2514, "step": 20952 }, { "epoch": 1.2488973655978066, "grad_norm": 3.407926559448242, "learning_rate": 3.149884824113098e-05, "loss": 1.2402, "step": 20954 }, { "epoch": 1.2490165693169626, "grad_norm": 2.7983975410461426, "learning_rate": 3.1490062869184744e-05, "loss": 1.0781, "step": 20956 }, { "epoch": 1.2491357730361188, "grad_norm": 3.4003758430480957, "learning_rate": 3.148127815940228e-05, "loss": 1.3289, "step": 20958 }, { "epoch": 1.2492549767552747, "grad_norm": 3.2303032875061035, "learning_rate": 3.147249411209788e-05, "loss": 1.2459, "step": 20960 }, { "epoch": 1.2493741804744307, "grad_norm": 3.19229793548584, "learning_rate": 3.146371072758574e-05, "loss": 1.2839, "step": 20962 }, { "epoch": 1.249493384193587, "grad_norm": 2.659444570541382, "learning_rate": 3.145492800618009e-05, "loss": 1.009, "step": 20964 }, { "epoch": 1.2496125879127429, "grad_norm": 3.234934091567993, "learning_rate": 3.14461459481951e-05, "loss": 1.206, "step": 20966 }, { "epoch": 1.2497317916318988, "grad_norm": 3.2448978424072266, "learning_rate": 3.1437364553944985e-05, "loss": 1.2103, "step": 20968 }, { "epoch": 1.249850995351055, "grad_norm": 2.9424097537994385, "learning_rate": 3.142858382374384e-05, "loss": 1.1157, "step": 20970 }, { "epoch": 1.249970199070211, "grad_norm": 3.2407567501068115, "learning_rate": 3.141980375790578e-05, "loss": 1.2631, "step": 20972 }, { "epoch": 1.250089402789367, "grad_norm": 2.941877603530884, "learning_rate": 3.141102435674493e-05, "loss": 1.1612, "step": 20974 }, { "epoch": 1.2502086065085232, "grad_norm": 2.879389762878418, "learning_rate": 3.140224562057532e-05, "loss": 1.0385, "step": 20976 }, { "epoch": 1.2503278102276791, "grad_norm": 3.1153202056884766, "learning_rate": 3.139346754971102e-05, "loss": 1.1798, "step": 20978 }, { "epoch": 1.250447013946835, "grad_norm": 3.1325528621673584, "learning_rate": 3.138469014446605e-05, "loss": 1.4299, "step": 20980 }, { "epoch": 1.250566217665991, "grad_norm": 3.320643663406372, "learning_rate": 3.137591340515441e-05, "loss": 1.1351, "step": 20982 }, { "epoch": 1.2506854213851473, "grad_norm": 2.989783763885498, "learning_rate": 3.1367137332090076e-05, "loss": 1.1776, "step": 20984 }, { "epoch": 1.2508046251043032, "grad_norm": 3.670541286468506, "learning_rate": 3.1358361925586974e-05, "loss": 1.1838, "step": 20986 }, { "epoch": 1.2509238288234594, "grad_norm": 3.100236654281616, "learning_rate": 3.134958718595905e-05, "loss": 1.2536, "step": 20988 }, { "epoch": 1.2510430325426154, "grad_norm": 3.0991060733795166, "learning_rate": 3.134081311352021e-05, "loss": 1.2269, "step": 20990 }, { "epoch": 1.2511622362617714, "grad_norm": 3.2142553329467773, "learning_rate": 3.1332039708584325e-05, "loss": 1.2011, "step": 20992 }, { "epoch": 1.2512814399809273, "grad_norm": 2.720438003540039, "learning_rate": 3.132326697146525e-05, "loss": 1.2711, "step": 20994 }, { "epoch": 1.2514006437000833, "grad_norm": 3.029693126678467, "learning_rate": 3.131449490247682e-05, "loss": 1.0344, "step": 20996 }, { "epoch": 1.2515198474192395, "grad_norm": 3.127546787261963, "learning_rate": 3.130572350193285e-05, "loss": 1.1224, "step": 20998 }, { "epoch": 1.2516390511383955, "grad_norm": 3.527111768722534, "learning_rate": 3.129695277014709e-05, "loss": 1.1828, "step": 21000 }, { "epoch": 1.2517582548575517, "grad_norm": 3.007035970687866, "learning_rate": 3.128818270743333e-05, "loss": 1.136, "step": 21002 }, { "epoch": 1.2518774585767076, "grad_norm": 2.9009509086608887, "learning_rate": 3.1279413314105295e-05, "loss": 1.3229, "step": 21004 }, { "epoch": 1.2519966622958636, "grad_norm": 3.2201685905456543, "learning_rate": 3.127064459047671e-05, "loss": 1.2254, "step": 21006 }, { "epoch": 1.2521158660150196, "grad_norm": 3.216158866882324, "learning_rate": 3.126187653686123e-05, "loss": 1.0542, "step": 21008 }, { "epoch": 1.2522350697341758, "grad_norm": 3.1704587936401367, "learning_rate": 3.1253109153572554e-05, "loss": 1.0569, "step": 21010 }, { "epoch": 1.2523542734533317, "grad_norm": 3.4154648780822754, "learning_rate": 3.124434244092431e-05, "loss": 1.4432, "step": 21012 }, { "epoch": 1.252473477172488, "grad_norm": 3.5398550033569336, "learning_rate": 3.123557639923009e-05, "loss": 1.1734, "step": 21014 }, { "epoch": 1.2525926808916439, "grad_norm": 3.0603384971618652, "learning_rate": 3.122681102880352e-05, "loss": 1.1453, "step": 21016 }, { "epoch": 1.2527118846107999, "grad_norm": 3.3194429874420166, "learning_rate": 3.121804632995813e-05, "loss": 1.0019, "step": 21018 }, { "epoch": 1.2528310883299558, "grad_norm": 2.9360756874084473, "learning_rate": 3.12092823030075e-05, "loss": 1.2826, "step": 21020 }, { "epoch": 1.252950292049112, "grad_norm": 3.2891924381256104, "learning_rate": 3.1200518948265124e-05, "loss": 1.1724, "step": 21022 }, { "epoch": 1.253069495768268, "grad_norm": 2.9438674449920654, "learning_rate": 3.1191756266044526e-05, "loss": 1.1489, "step": 21024 }, { "epoch": 1.253188699487424, "grad_norm": 3.25154447555542, "learning_rate": 3.1182994256659135e-05, "loss": 1.1844, "step": 21026 }, { "epoch": 1.2533079032065801, "grad_norm": 3.3881423473358154, "learning_rate": 3.117423292042244e-05, "loss": 1.1947, "step": 21028 }, { "epoch": 1.2534271069257361, "grad_norm": 3.406653642654419, "learning_rate": 3.116547225764785e-05, "loss": 1.1829, "step": 21030 }, { "epoch": 1.253546310644892, "grad_norm": 3.0649616718292236, "learning_rate": 3.1156712268648736e-05, "loss": 1.1487, "step": 21032 }, { "epoch": 1.253665514364048, "grad_norm": 3.395899772644043, "learning_rate": 3.1147952953738515e-05, "loss": 1.2345, "step": 21034 }, { "epoch": 1.2537847180832042, "grad_norm": 3.294020652770996, "learning_rate": 3.11391943132305e-05, "loss": 1.2402, "step": 21036 }, { "epoch": 1.2539039218023602, "grad_norm": 3.1857919692993164, "learning_rate": 3.113043634743805e-05, "loss": 1.2933, "step": 21038 }, { "epoch": 1.2540231255215164, "grad_norm": 3.159834384918213, "learning_rate": 3.1121679056674446e-05, "loss": 1.2151, "step": 21040 }, { "epoch": 1.2541423292406724, "grad_norm": 3.041828155517578, "learning_rate": 3.1112922441252976e-05, "loss": 1.117, "step": 21042 }, { "epoch": 1.2542615329598283, "grad_norm": 3.52028226852417, "learning_rate": 3.1104166501486907e-05, "loss": 1.2038, "step": 21044 }, { "epoch": 1.2543807366789843, "grad_norm": 3.0731582641601562, "learning_rate": 3.109541123768943e-05, "loss": 1.1182, "step": 21046 }, { "epoch": 1.2544999403981405, "grad_norm": 3.2002389430999756, "learning_rate": 3.1086656650173784e-05, "loss": 1.1847, "step": 21048 }, { "epoch": 1.2546191441172965, "grad_norm": 3.3883676528930664, "learning_rate": 3.107790273925314e-05, "loss": 1.2434, "step": 21050 }, { "epoch": 1.2547383478364524, "grad_norm": 3.545915365219116, "learning_rate": 3.1069149505240656e-05, "loss": 1.2393, "step": 21052 }, { "epoch": 1.2548575515556086, "grad_norm": 3.6152396202087402, "learning_rate": 3.1060396948449456e-05, "loss": 1.1786, "step": 21054 }, { "epoch": 1.2549767552747646, "grad_norm": 3.0778558254241943, "learning_rate": 3.105164506919268e-05, "loss": 1.377, "step": 21056 }, { "epoch": 1.2550959589939206, "grad_norm": 3.011124849319458, "learning_rate": 3.1042893867783386e-05, "loss": 1.0378, "step": 21058 }, { "epoch": 1.2552151627130765, "grad_norm": 2.9876089096069336, "learning_rate": 3.103414334453463e-05, "loss": 1.2065, "step": 21060 }, { "epoch": 1.2553343664322327, "grad_norm": 3.402588129043579, "learning_rate": 3.102539349975947e-05, "loss": 1.1766, "step": 21062 }, { "epoch": 1.2554535701513887, "grad_norm": 3.134012460708618, "learning_rate": 3.1016644333770884e-05, "loss": 1.2105, "step": 21064 }, { "epoch": 1.255572773870545, "grad_norm": 2.710456132888794, "learning_rate": 3.1007895846881896e-05, "loss": 1.0395, "step": 21066 }, { "epoch": 1.2556919775897009, "grad_norm": 3.339836359024048, "learning_rate": 3.099914803940545e-05, "loss": 1.102, "step": 21068 }, { "epoch": 1.2558111813088568, "grad_norm": 3.551008939743042, "learning_rate": 3.0990400911654494e-05, "loss": 1.1387, "step": 21070 }, { "epoch": 1.2559303850280128, "grad_norm": 3.0233678817749023, "learning_rate": 3.098165446394193e-05, "loss": 1.2019, "step": 21072 }, { "epoch": 1.256049588747169, "grad_norm": 3.3308680057525635, "learning_rate": 3.097290869658066e-05, "loss": 1.109, "step": 21074 }, { "epoch": 1.256168792466325, "grad_norm": 3.0199179649353027, "learning_rate": 3.096416360988356e-05, "loss": 1.1588, "step": 21076 }, { "epoch": 1.256287996185481, "grad_norm": 3.2489261627197266, "learning_rate": 3.095541920416344e-05, "loss": 1.202, "step": 21078 }, { "epoch": 1.2564071999046371, "grad_norm": 2.898483991622925, "learning_rate": 3.094667547973315e-05, "loss": 1.2892, "step": 21080 }, { "epoch": 1.256526403623793, "grad_norm": 3.3889214992523193, "learning_rate": 3.0937932436905444e-05, "loss": 1.1779, "step": 21082 }, { "epoch": 1.256645607342949, "grad_norm": 3.151419162750244, "learning_rate": 3.092919007599313e-05, "loss": 1.0468, "step": 21084 }, { "epoch": 1.256764811062105, "grad_norm": 3.510946273803711, "learning_rate": 3.092044839730893e-05, "loss": 1.0819, "step": 21086 }, { "epoch": 1.2568840147812612, "grad_norm": 2.9917550086975098, "learning_rate": 3.091170740116559e-05, "loss": 1.2065, "step": 21088 }, { "epoch": 1.2570032185004172, "grad_norm": 3.467984437942505, "learning_rate": 3.090296708787578e-05, "loss": 1.2001, "step": 21090 }, { "epoch": 1.2571224222195734, "grad_norm": 3.9050495624542236, "learning_rate": 3.089422745775216e-05, "loss": 1.2105, "step": 21092 }, { "epoch": 1.2572416259387293, "grad_norm": 2.9837327003479004, "learning_rate": 3.088548851110742e-05, "loss": 1.1968, "step": 21094 }, { "epoch": 1.2573608296578853, "grad_norm": 3.3918845653533936, "learning_rate": 3.087675024825413e-05, "loss": 1.1504, "step": 21096 }, { "epoch": 1.2574800333770413, "grad_norm": 3.2603416442871094, "learning_rate": 3.086801266950493e-05, "loss": 1.0572, "step": 21098 }, { "epoch": 1.2575992370961975, "grad_norm": 2.958373785018921, "learning_rate": 3.0859275775172355e-05, "loss": 1.0879, "step": 21100 }, { "epoch": 1.2577184408153534, "grad_norm": 3.5069546699523926, "learning_rate": 3.0850539565569e-05, "loss": 1.1771, "step": 21102 }, { "epoch": 1.2578376445345094, "grad_norm": 3.3778672218322754, "learning_rate": 3.084180404100736e-05, "loss": 1.2614, "step": 21104 }, { "epoch": 1.2579568482536656, "grad_norm": 2.91333270072937, "learning_rate": 3.0833069201799924e-05, "loss": 1.2039, "step": 21106 }, { "epoch": 1.2580760519728216, "grad_norm": 2.8405206203460693, "learning_rate": 3.08243350482592e-05, "loss": 1.1322, "step": 21108 }, { "epoch": 1.2581952556919775, "grad_norm": 3.4640402793884277, "learning_rate": 3.0815601580697606e-05, "loss": 1.2305, "step": 21110 }, { "epoch": 1.2583144594111335, "grad_norm": 3.2548828125, "learning_rate": 3.0806868799427585e-05, "loss": 1.1885, "step": 21112 }, { "epoch": 1.2584336631302897, "grad_norm": 3.667426109313965, "learning_rate": 3.079813670476152e-05, "loss": 1.2077, "step": 21114 }, { "epoch": 1.2585528668494457, "grad_norm": 3.3170881271362305, "learning_rate": 3.078940529701183e-05, "loss": 1.1708, "step": 21116 }, { "epoch": 1.2586720705686019, "grad_norm": 3.1564242839813232, "learning_rate": 3.0780674576490845e-05, "loss": 1.197, "step": 21118 }, { "epoch": 1.2587912742877578, "grad_norm": 2.9218838214874268, "learning_rate": 3.0771944543510864e-05, "loss": 1.1174, "step": 21120 }, { "epoch": 1.2589104780069138, "grad_norm": 3.030113935470581, "learning_rate": 3.0763215198384235e-05, "loss": 1.241, "step": 21122 }, { "epoch": 1.2590296817260698, "grad_norm": 3.166745185852051, "learning_rate": 3.075448654142319e-05, "loss": 1.2222, "step": 21124 }, { "epoch": 1.259148885445226, "grad_norm": 3.186142921447754, "learning_rate": 3.074575857294004e-05, "loss": 1.1346, "step": 21126 }, { "epoch": 1.259268089164382, "grad_norm": 3.0311734676361084, "learning_rate": 3.0737031293246966e-05, "loss": 1.1362, "step": 21128 }, { "epoch": 1.259387292883538, "grad_norm": 3.109398603439331, "learning_rate": 3.072830470265619e-05, "loss": 1.3083, "step": 21130 }, { "epoch": 1.259506496602694, "grad_norm": 3.139688730239868, "learning_rate": 3.0719578801479884e-05, "loss": 1.0487, "step": 21132 }, { "epoch": 1.25962570032185, "grad_norm": 3.2349491119384766, "learning_rate": 3.0710853590030245e-05, "loss": 1.1527, "step": 21134 }, { "epoch": 1.259744904041006, "grad_norm": 3.460092782974243, "learning_rate": 3.070212906861934e-05, "loss": 1.0334, "step": 21136 }, { "epoch": 1.259864107760162, "grad_norm": 3.4869842529296875, "learning_rate": 3.0693405237559315e-05, "loss": 1.3123, "step": 21138 }, { "epoch": 1.2599833114793182, "grad_norm": 3.238363742828369, "learning_rate": 3.068468209716225e-05, "loss": 1.1106, "step": 21140 }, { "epoch": 1.2601025151984742, "grad_norm": 3.5246403217315674, "learning_rate": 3.067595964774017e-05, "loss": 1.224, "step": 21142 }, { "epoch": 1.2602217189176304, "grad_norm": 3.433967113494873, "learning_rate": 3.0667237889605146e-05, "loss": 1.1844, "step": 21144 }, { "epoch": 1.2603409226367863, "grad_norm": 3.283174753189087, "learning_rate": 3.065851682306916e-05, "loss": 1.2198, "step": 21146 }, { "epoch": 1.2604601263559423, "grad_norm": 3.1576032638549805, "learning_rate": 3.064979644844422e-05, "loss": 1.2323, "step": 21148 }, { "epoch": 1.2605793300750983, "grad_norm": 3.255600929260254, "learning_rate": 3.064107676604227e-05, "loss": 1.1732, "step": 21150 }, { "epoch": 1.2606985337942545, "grad_norm": 2.896028757095337, "learning_rate": 3.0632357776175217e-05, "loss": 1.146, "step": 21152 }, { "epoch": 1.2608177375134104, "grad_norm": 3.3652424812316895, "learning_rate": 3.0623639479154994e-05, "loss": 1.2002, "step": 21154 }, { "epoch": 1.2609369412325664, "grad_norm": 3.192854881286621, "learning_rate": 3.0614921875293485e-05, "loss": 1.1682, "step": 21156 }, { "epoch": 1.2610561449517226, "grad_norm": 3.088198184967041, "learning_rate": 3.0606204964902565e-05, "loss": 1.3328, "step": 21158 }, { "epoch": 1.2611753486708785, "grad_norm": 2.9249956607818604, "learning_rate": 3.0597488748294025e-05, "loss": 1.1142, "step": 21160 }, { "epoch": 1.2612945523900345, "grad_norm": 3.234302282333374, "learning_rate": 3.058877322577971e-05, "loss": 1.225, "step": 21162 }, { "epoch": 1.2614137561091905, "grad_norm": 3.1667580604553223, "learning_rate": 3.05800583976714e-05, "loss": 1.0424, "step": 21164 }, { "epoch": 1.2615329598283467, "grad_norm": 3.105545997619629, "learning_rate": 3.057134426428082e-05, "loss": 1.1656, "step": 21166 }, { "epoch": 1.2616521635475026, "grad_norm": 2.9097390174865723, "learning_rate": 3.056263082591975e-05, "loss": 1.1067, "step": 21168 }, { "epoch": 1.2617713672666588, "grad_norm": 3.4728150367736816, "learning_rate": 3.055391808289987e-05, "loss": 1.2189, "step": 21170 }, { "epoch": 1.2618905709858148, "grad_norm": 3.0983922481536865, "learning_rate": 3.0545206035532886e-05, "loss": 1.1687, "step": 21172 }, { "epoch": 1.2620097747049708, "grad_norm": 3.3449950218200684, "learning_rate": 3.053649468413043e-05, "loss": 1.1624, "step": 21174 }, { "epoch": 1.2621289784241267, "grad_norm": 3.10988450050354, "learning_rate": 3.0527784029004155e-05, "loss": 1.0598, "step": 21176 }, { "epoch": 1.262248182143283, "grad_norm": 3.1495399475097656, "learning_rate": 3.0519074070465694e-05, "loss": 1.2563, "step": 21178 }, { "epoch": 1.262367385862439, "grad_norm": 3.5193915367126465, "learning_rate": 3.0510364808826574e-05, "loss": 1.1708, "step": 21180 }, { "epoch": 1.2624865895815949, "grad_norm": 3.3528990745544434, "learning_rate": 3.0501656244398407e-05, "loss": 1.1787, "step": 21182 }, { "epoch": 1.262605793300751, "grad_norm": 3.0606167316436768, "learning_rate": 3.0492948377492693e-05, "loss": 1.1446, "step": 21184 }, { "epoch": 1.262724997019907, "grad_norm": 3.1384968757629395, "learning_rate": 3.0484241208420972e-05, "loss": 1.0759, "step": 21186 }, { "epoch": 1.262844200739063, "grad_norm": 3.241158962249756, "learning_rate": 3.0475534737494704e-05, "loss": 1.1499, "step": 21188 }, { "epoch": 1.262963404458219, "grad_norm": 2.9264838695526123, "learning_rate": 3.046682896502538e-05, "loss": 1.255, "step": 21190 }, { "epoch": 1.2630826081773752, "grad_norm": 3.608146905899048, "learning_rate": 3.0458123891324397e-05, "loss": 1.127, "step": 21192 }, { "epoch": 1.2632018118965311, "grad_norm": 3.0240297317504883, "learning_rate": 3.0449419516703192e-05, "loss": 1.1552, "step": 21194 }, { "epoch": 1.2633210156156873, "grad_norm": 3.319701910018921, "learning_rate": 3.0440715841473154e-05, "loss": 1.133, "step": 21196 }, { "epoch": 1.2634402193348433, "grad_norm": 3.1451120376586914, "learning_rate": 3.0432012865945613e-05, "loss": 1.1287, "step": 21198 }, { "epoch": 1.2635594230539993, "grad_norm": 2.9095826148986816, "learning_rate": 3.042331059043193e-05, "loss": 1.1715, "step": 21200 }, { "epoch": 1.2636786267731552, "grad_norm": 3.0887680053710938, "learning_rate": 3.04146090152434e-05, "loss": 1.1562, "step": 21202 }, { "epoch": 1.2637978304923114, "grad_norm": 3.281888484954834, "learning_rate": 3.0405908140691337e-05, "loss": 1.2747, "step": 21204 }, { "epoch": 1.2639170342114674, "grad_norm": 3.055680513381958, "learning_rate": 3.039720796708696e-05, "loss": 1.1313, "step": 21206 }, { "epoch": 1.2640362379306234, "grad_norm": 3.3076329231262207, "learning_rate": 3.038850849474153e-05, "loss": 1.1779, "step": 21208 }, { "epoch": 1.2641554416497796, "grad_norm": 3.5316240787506104, "learning_rate": 3.0379809723966268e-05, "loss": 1.143, "step": 21210 }, { "epoch": 1.2642746453689355, "grad_norm": 3.162278175354004, "learning_rate": 3.0371111655072315e-05, "loss": 1.1943, "step": 21212 }, { "epoch": 1.2643938490880915, "grad_norm": 3.1129353046417236, "learning_rate": 3.0362414288370877e-05, "loss": 1.1497, "step": 21214 }, { "epoch": 1.2645130528072475, "grad_norm": 3.4912078380584717, "learning_rate": 3.035371762417305e-05, "loss": 1.2788, "step": 21216 }, { "epoch": 1.2646322565264037, "grad_norm": 3.138853073120117, "learning_rate": 3.0345021662789975e-05, "loss": 1.0991, "step": 21218 }, { "epoch": 1.2647514602455596, "grad_norm": 3.0282723903656006, "learning_rate": 3.0336326404532718e-05, "loss": 1.0469, "step": 21220 }, { "epoch": 1.2648706639647158, "grad_norm": 3.4133806228637695, "learning_rate": 3.0327631849712355e-05, "loss": 1.1435, "step": 21222 }, { "epoch": 1.2649898676838718, "grad_norm": 3.1250250339508057, "learning_rate": 3.031893799863991e-05, "loss": 1.3567, "step": 21224 }, { "epoch": 1.2651090714030278, "grad_norm": 3.0224900245666504, "learning_rate": 3.0310244851626373e-05, "loss": 1.1834, "step": 21226 }, { "epoch": 1.2652282751221837, "grad_norm": 3.008594274520874, "learning_rate": 3.030155240898277e-05, "loss": 1.128, "step": 21228 }, { "epoch": 1.26534747884134, "grad_norm": 3.3930370807647705, "learning_rate": 3.0292860671020012e-05, "loss": 1.2256, "step": 21230 }, { "epoch": 1.2654666825604959, "grad_norm": 3.432690143585205, "learning_rate": 3.0284169638049064e-05, "loss": 1.3667, "step": 21232 }, { "epoch": 1.2655858862796519, "grad_norm": 2.9461162090301514, "learning_rate": 3.0275479310380818e-05, "loss": 1.0234, "step": 21234 }, { "epoch": 1.265705089998808, "grad_norm": 3.2469489574432373, "learning_rate": 3.0266789688326186e-05, "loss": 1.1498, "step": 21236 }, { "epoch": 1.265824293717964, "grad_norm": 3.4684550762176514, "learning_rate": 3.025810077219599e-05, "loss": 1.1334, "step": 21238 }, { "epoch": 1.26594349743712, "grad_norm": 3.4036524295806885, "learning_rate": 3.0249412562301065e-05, "loss": 1.1584, "step": 21240 }, { "epoch": 1.266062701156276, "grad_norm": 3.4067392349243164, "learning_rate": 3.0240725058952245e-05, "loss": 1.1415, "step": 21242 }, { "epoch": 1.2661819048754321, "grad_norm": 3.2685952186584473, "learning_rate": 3.0232038262460282e-05, "loss": 1.22, "step": 21244 }, { "epoch": 1.266301108594588, "grad_norm": 3.3553528785705566, "learning_rate": 3.0223352173135955e-05, "loss": 1.187, "step": 21246 }, { "epoch": 1.2664203123137443, "grad_norm": 2.8629555702209473, "learning_rate": 3.0214666791289974e-05, "loss": 1.2887, "step": 21248 }, { "epoch": 1.2665395160329003, "grad_norm": 3.4659948348999023, "learning_rate": 3.0205982117233066e-05, "loss": 1.2122, "step": 21250 }, { "epoch": 1.2666587197520562, "grad_norm": 3.0139522552490234, "learning_rate": 3.0197298151275894e-05, "loss": 1.2595, "step": 21252 }, { "epoch": 1.2667779234712122, "grad_norm": 3.134909152984619, "learning_rate": 3.0188614893729138e-05, "loss": 1.152, "step": 21254 }, { "epoch": 1.2668971271903684, "grad_norm": 3.3858892917633057, "learning_rate": 3.0179932344903406e-05, "loss": 1.2445, "step": 21256 }, { "epoch": 1.2670163309095244, "grad_norm": 2.9295945167541504, "learning_rate": 3.0171250505109294e-05, "loss": 1.2545, "step": 21258 }, { "epoch": 1.2671355346286803, "grad_norm": 3.152888059616089, "learning_rate": 3.0162569374657418e-05, "loss": 1.1829, "step": 21260 }, { "epoch": 1.2672547383478365, "grad_norm": 3.160374641418457, "learning_rate": 3.015388895385829e-05, "loss": 1.2259, "step": 21262 }, { "epoch": 1.2673739420669925, "grad_norm": 3.2561163902282715, "learning_rate": 3.014520924302246e-05, "loss": 1.201, "step": 21264 }, { "epoch": 1.2674931457861485, "grad_norm": 3.581284999847412, "learning_rate": 3.013653024246042e-05, "loss": 1.1324, "step": 21266 }, { "epoch": 1.2676123495053044, "grad_norm": 3.216115713119507, "learning_rate": 3.0127851952482677e-05, "loss": 1.2091, "step": 21268 }, { "epoch": 1.2677315532244606, "grad_norm": 3.263561725616455, "learning_rate": 3.011917437339965e-05, "loss": 1.2564, "step": 21270 }, { "epoch": 1.2678507569436166, "grad_norm": 3.750396966934204, "learning_rate": 3.0110497505521766e-05, "loss": 1.3917, "step": 21272 }, { "epoch": 1.2679699606627728, "grad_norm": 3.335003137588501, "learning_rate": 3.0101821349159464e-05, "loss": 1.0725, "step": 21274 }, { "epoch": 1.2680891643819288, "grad_norm": 3.0660791397094727, "learning_rate": 3.0093145904623064e-05, "loss": 1.0917, "step": 21276 }, { "epoch": 1.2682083681010847, "grad_norm": 3.2273690700531006, "learning_rate": 3.008447117222296e-05, "loss": 1.1617, "step": 21278 }, { "epoch": 1.2683275718202407, "grad_norm": 3.0675175189971924, "learning_rate": 3.007579715226946e-05, "loss": 1.1515, "step": 21280 }, { "epoch": 1.2684467755393969, "grad_norm": 3.2613701820373535, "learning_rate": 3.0067123845072864e-05, "loss": 1.2061, "step": 21282 }, { "epoch": 1.2685659792585529, "grad_norm": 3.28704833984375, "learning_rate": 3.0058451250943465e-05, "loss": 1.2418, "step": 21284 }, { "epoch": 1.2686851829777088, "grad_norm": 3.3614070415496826, "learning_rate": 3.0049779370191468e-05, "loss": 1.3304, "step": 21286 }, { "epoch": 1.268804386696865, "grad_norm": 2.9084010124206543, "learning_rate": 3.004110820312713e-05, "loss": 1.1271, "step": 21288 }, { "epoch": 1.268923590416021, "grad_norm": 2.925401210784912, "learning_rate": 3.0032437750060636e-05, "loss": 1.3193, "step": 21290 }, { "epoch": 1.269042794135177, "grad_norm": 3.1945137977600098, "learning_rate": 3.002376801130219e-05, "loss": 1.1055, "step": 21292 }, { "epoch": 1.269161997854333, "grad_norm": 2.7631537914276123, "learning_rate": 3.0015098987161878e-05, "loss": 0.9747, "step": 21294 }, { "epoch": 1.2692812015734891, "grad_norm": 3.2688980102539062, "learning_rate": 3.0006430677949863e-05, "loss": 1.1436, "step": 21296 }, { "epoch": 1.269400405292645, "grad_norm": 3.055026054382324, "learning_rate": 2.9997763083976227e-05, "loss": 1.1338, "step": 21298 }, { "epoch": 1.2695196090118013, "grad_norm": 3.4777729511260986, "learning_rate": 2.998909620555106e-05, "loss": 1.2863, "step": 21300 }, { "epoch": 1.2696388127309572, "grad_norm": 3.200784206390381, "learning_rate": 2.9980430042984375e-05, "loss": 1.1044, "step": 21302 }, { "epoch": 1.2697580164501132, "grad_norm": 2.864065408706665, "learning_rate": 2.9971764596586194e-05, "loss": 1.1963, "step": 21304 }, { "epoch": 1.2698772201692692, "grad_norm": 3.25478458404541, "learning_rate": 2.9963099866666543e-05, "loss": 1.0517, "step": 21306 }, { "epoch": 1.2699964238884254, "grad_norm": 3.477908134460449, "learning_rate": 2.9954435853535346e-05, "loss": 1.2453, "step": 21308 }, { "epoch": 1.2701156276075813, "grad_norm": 3.1741549968719482, "learning_rate": 2.9945772557502565e-05, "loss": 1.0819, "step": 21310 }, { "epoch": 1.2702348313267373, "grad_norm": 3.2064287662506104, "learning_rate": 2.9937109978878108e-05, "loss": 1.26, "step": 21312 }, { "epoch": 1.2703540350458935, "grad_norm": 3.1164937019348145, "learning_rate": 2.9928448117971887e-05, "loss": 1.0812, "step": 21314 }, { "epoch": 1.2704732387650495, "grad_norm": 3.267845869064331, "learning_rate": 2.9919786975093754e-05, "loss": 1.1767, "step": 21316 }, { "epoch": 1.2705924424842054, "grad_norm": 3.268549680709839, "learning_rate": 2.991112655055353e-05, "loss": 1.1192, "step": 21318 }, { "epoch": 1.2707116462033614, "grad_norm": 2.9072086811065674, "learning_rate": 2.9902466844661052e-05, "loss": 1.0788, "step": 21320 }, { "epoch": 1.2708308499225176, "grad_norm": 2.9913241863250732, "learning_rate": 2.9893807857726085e-05, "loss": 1.0819, "step": 21322 }, { "epoch": 1.2709500536416736, "grad_norm": 3.162214517593384, "learning_rate": 2.9885149590058425e-05, "loss": 1.1207, "step": 21324 }, { "epoch": 1.2710692573608298, "grad_norm": 3.13242506980896, "learning_rate": 2.9876492041967768e-05, "loss": 1.124, "step": 21326 }, { "epoch": 1.2711884610799857, "grad_norm": 3.1009066104888916, "learning_rate": 2.986783521376385e-05, "loss": 1.2538, "step": 21328 }, { "epoch": 1.2713076647991417, "grad_norm": 3.4910166263580322, "learning_rate": 2.9859179105756363e-05, "loss": 1.2168, "step": 21330 }, { "epoch": 1.2714268685182977, "grad_norm": 3.0740017890930176, "learning_rate": 2.9850523718254924e-05, "loss": 1.1181, "step": 21332 }, { "epoch": 1.2715460722374539, "grad_norm": 3.065824270248413, "learning_rate": 2.9841869051569204e-05, "loss": 1.3145, "step": 21334 }, { "epoch": 1.2716652759566098, "grad_norm": 3.1422572135925293, "learning_rate": 2.9833215106008794e-05, "loss": 1.184, "step": 21336 }, { "epoch": 1.271784479675766, "grad_norm": 3.575315475463867, "learning_rate": 2.9824561881883294e-05, "loss": 1.0659, "step": 21338 }, { "epoch": 1.271903683394922, "grad_norm": 2.6345932483673096, "learning_rate": 2.981590937950222e-05, "loss": 1.1557, "step": 21340 }, { "epoch": 1.272022887114078, "grad_norm": 3.0014679431915283, "learning_rate": 2.9807257599175143e-05, "loss": 1.1428, "step": 21342 }, { "epoch": 1.272142090833234, "grad_norm": 2.860452890396118, "learning_rate": 2.979860654121156e-05, "loss": 1.0825, "step": 21344 }, { "epoch": 1.27226129455239, "grad_norm": 2.889115333557129, "learning_rate": 2.978995620592092e-05, "loss": 1.1927, "step": 21346 }, { "epoch": 1.272380498271546, "grad_norm": 2.990312099456787, "learning_rate": 2.9781306593612702e-05, "loss": 1.1552, "step": 21348 }, { "epoch": 1.272499701990702, "grad_norm": 3.3604488372802734, "learning_rate": 2.977265770459632e-05, "loss": 1.2093, "step": 21350 }, { "epoch": 1.2726189057098583, "grad_norm": 3.278534173965454, "learning_rate": 2.976400953918118e-05, "loss": 1.3835, "step": 21352 }, { "epoch": 1.2727381094290142, "grad_norm": 2.902756690979004, "learning_rate": 2.9755362097676654e-05, "loss": 1.1537, "step": 21354 }, { "epoch": 1.2728573131481702, "grad_norm": 3.4778103828430176, "learning_rate": 2.974671538039211e-05, "loss": 1.2694, "step": 21356 }, { "epoch": 1.2729765168673262, "grad_norm": 2.901362180709839, "learning_rate": 2.973806938763683e-05, "loss": 1.1835, "step": 21358 }, { "epoch": 1.2730957205864823, "grad_norm": 3.1474006175994873, "learning_rate": 2.9729424119720144e-05, "loss": 1.1945, "step": 21360 }, { "epoch": 1.2732149243056383, "grad_norm": 3.4210081100463867, "learning_rate": 2.9720779576951323e-05, "loss": 1.1805, "step": 21362 }, { "epoch": 1.2733341280247945, "grad_norm": 3.9584736824035645, "learning_rate": 2.971213575963958e-05, "loss": 1.1612, "step": 21364 }, { "epoch": 1.2734533317439505, "grad_norm": 3.019907236099243, "learning_rate": 2.9703492668094167e-05, "loss": 1.0191, "step": 21366 }, { "epoch": 1.2735725354631064, "grad_norm": 3.103238344192505, "learning_rate": 2.9694850302624254e-05, "loss": 1.1672, "step": 21368 }, { "epoch": 1.2736917391822624, "grad_norm": 2.954777240753174, "learning_rate": 2.968620866353904e-05, "loss": 1.1952, "step": 21370 }, { "epoch": 1.2738109429014184, "grad_norm": 3.222465753555298, "learning_rate": 2.9677567751147627e-05, "loss": 0.9508, "step": 21372 }, { "epoch": 1.2739301466205746, "grad_norm": 2.9412951469421387, "learning_rate": 2.9668927565759154e-05, "loss": 1.0648, "step": 21374 }, { "epoch": 1.2740493503397305, "grad_norm": 2.7772440910339355, "learning_rate": 2.966028810768271e-05, "loss": 1.0497, "step": 21376 }, { "epoch": 1.2741685540588867, "grad_norm": 3.746971368789673, "learning_rate": 2.9651649377227343e-05, "loss": 1.3604, "step": 21378 }, { "epoch": 1.2742877577780427, "grad_norm": 3.2758896350860596, "learning_rate": 2.96430113747021e-05, "loss": 1.1544, "step": 21380 }, { "epoch": 1.2744069614971987, "grad_norm": 2.8490724563598633, "learning_rate": 2.9634374100415984e-05, "loss": 1.1625, "step": 21382 }, { "epoch": 1.2745261652163546, "grad_norm": 2.906024694442749, "learning_rate": 2.9625737554678002e-05, "loss": 1.149, "step": 21384 }, { "epoch": 1.2746453689355108, "grad_norm": 3.4042699337005615, "learning_rate": 2.9617101737797082e-05, "loss": 1.168, "step": 21386 }, { "epoch": 1.2747645726546668, "grad_norm": 3.3093347549438477, "learning_rate": 2.9608466650082188e-05, "loss": 1.1024, "step": 21388 }, { "epoch": 1.274883776373823, "grad_norm": 2.967427968978882, "learning_rate": 2.9599832291842205e-05, "loss": 1.1155, "step": 21390 }, { "epoch": 1.275002980092979, "grad_norm": 3.4614057540893555, "learning_rate": 2.959119866338601e-05, "loss": 1.2087, "step": 21392 }, { "epoch": 1.275122183812135, "grad_norm": 3.3370325565338135, "learning_rate": 2.9582565765022487e-05, "loss": 1.3227, "step": 21394 }, { "epoch": 1.275241387531291, "grad_norm": 3.543696641921997, "learning_rate": 2.957393359706042e-05, "loss": 1.1746, "step": 21396 }, { "epoch": 1.275360591250447, "grad_norm": 2.992805242538452, "learning_rate": 2.956530215980865e-05, "loss": 1.1574, "step": 21398 }, { "epoch": 1.275479794969603, "grad_norm": 3.1613826751708984, "learning_rate": 2.9556671453575923e-05, "loss": 1.1541, "step": 21400 }, { "epoch": 1.275598998688759, "grad_norm": 3.2697653770446777, "learning_rate": 2.954804147867103e-05, "loss": 1.1374, "step": 21402 }, { "epoch": 1.2757182024079152, "grad_norm": 3.1436734199523926, "learning_rate": 2.9539412235402657e-05, "loss": 1.209, "step": 21404 }, { "epoch": 1.2758374061270712, "grad_norm": 3.126173734664917, "learning_rate": 2.9530783724079503e-05, "loss": 1.3647, "step": 21406 }, { "epoch": 1.2759566098462272, "grad_norm": 3.291492223739624, "learning_rate": 2.9522155945010272e-05, "loss": 1.1278, "step": 21408 }, { "epoch": 1.2760758135653831, "grad_norm": 3.178358793258667, "learning_rate": 2.9513528898503563e-05, "loss": 1.2824, "step": 21410 }, { "epoch": 1.2761950172845393, "grad_norm": 3.0363097190856934, "learning_rate": 2.9504902584868032e-05, "loss": 1.2743, "step": 21412 }, { "epoch": 1.2763142210036953, "grad_norm": 3.545973062515259, "learning_rate": 2.9496277004412253e-05, "loss": 1.0992, "step": 21414 }, { "epoch": 1.2764334247228515, "grad_norm": 3.16743803024292, "learning_rate": 2.94876521574448e-05, "loss": 1.1719, "step": 21416 }, { "epoch": 1.2765526284420075, "grad_norm": 3.330281972885132, "learning_rate": 2.9479028044274206e-05, "loss": 1.2077, "step": 21418 }, { "epoch": 1.2766718321611634, "grad_norm": 3.1866350173950195, "learning_rate": 2.9470404665209006e-05, "loss": 1.1162, "step": 21420 }, { "epoch": 1.2767910358803194, "grad_norm": 2.807305097579956, "learning_rate": 2.946178202055767e-05, "loss": 1.0201, "step": 21422 }, { "epoch": 1.2769102395994756, "grad_norm": 3.469287395477295, "learning_rate": 2.9453160110628647e-05, "loss": 1.2036, "step": 21424 }, { "epoch": 1.2770294433186316, "grad_norm": 2.8772521018981934, "learning_rate": 2.944453893573041e-05, "loss": 1.0606, "step": 21426 }, { "epoch": 1.2771486470377875, "grad_norm": 2.923832416534424, "learning_rate": 2.943591849617132e-05, "loss": 1.1049, "step": 21428 }, { "epoch": 1.2772678507569437, "grad_norm": 3.269078254699707, "learning_rate": 2.9427298792259795e-05, "loss": 1.1674, "step": 21430 }, { "epoch": 1.2773870544760997, "grad_norm": 2.984375, "learning_rate": 2.9418679824304173e-05, "loss": 1.1303, "step": 21432 }, { "epoch": 1.2775062581952557, "grad_norm": 2.8936572074890137, "learning_rate": 2.9410061592612813e-05, "loss": 1.1149, "step": 21434 }, { "epoch": 1.2776254619144116, "grad_norm": 3.4214301109313965, "learning_rate": 2.940144409749399e-05, "loss": 1.2981, "step": 21436 }, { "epoch": 1.2777446656335678, "grad_norm": 2.8935327529907227, "learning_rate": 2.939282733925598e-05, "loss": 1.112, "step": 21438 }, { "epoch": 1.2778638693527238, "grad_norm": 3.144662380218506, "learning_rate": 2.9384211318207055e-05, "loss": 1.1762, "step": 21440 }, { "epoch": 1.27798307307188, "grad_norm": 3.435145139694214, "learning_rate": 2.937559603465541e-05, "loss": 1.2612, "step": 21442 }, { "epoch": 1.278102276791036, "grad_norm": 3.123324155807495, "learning_rate": 2.9366981488909274e-05, "loss": 1.2466, "step": 21444 }, { "epoch": 1.278221480510192, "grad_norm": 2.9511730670928955, "learning_rate": 2.935836768127679e-05, "loss": 1.1075, "step": 21446 }, { "epoch": 1.2783406842293479, "grad_norm": 2.9018139839172363, "learning_rate": 2.9349754612066132e-05, "loss": 1.1495, "step": 21448 }, { "epoch": 1.278459887948504, "grad_norm": 3.029947280883789, "learning_rate": 2.9341142281585415e-05, "loss": 1.1018, "step": 21450 }, { "epoch": 1.27857909166766, "grad_norm": 2.917698383331299, "learning_rate": 2.9332530690142702e-05, "loss": 1.1796, "step": 21452 }, { "epoch": 1.278698295386816, "grad_norm": 3.0581676959991455, "learning_rate": 2.9323919838046088e-05, "loss": 1.229, "step": 21454 }, { "epoch": 1.2788174991059722, "grad_norm": 3.1458396911621094, "learning_rate": 2.9315309725603596e-05, "loss": 1.1621, "step": 21456 }, { "epoch": 1.2789367028251282, "grad_norm": 3.2646288871765137, "learning_rate": 2.9306700353123264e-05, "loss": 1.267, "step": 21458 }, { "epoch": 1.2790559065442841, "grad_norm": 3.2355594635009766, "learning_rate": 2.9298091720913047e-05, "loss": 1.0989, "step": 21460 }, { "epoch": 1.27917511026344, "grad_norm": 3.238041639328003, "learning_rate": 2.9289483829280927e-05, "loss": 1.266, "step": 21462 }, { "epoch": 1.2792943139825963, "grad_norm": 3.071375608444214, "learning_rate": 2.9280876678534842e-05, "loss": 1.2064, "step": 21464 }, { "epoch": 1.2794135177017523, "grad_norm": 3.1660914421081543, "learning_rate": 2.927227026898266e-05, "loss": 1.0453, "step": 21466 }, { "epoch": 1.2795327214209085, "grad_norm": 3.037304162979126, "learning_rate": 2.9263664600932306e-05, "loss": 1.1225, "step": 21468 }, { "epoch": 1.2796519251400644, "grad_norm": 3.584038019180298, "learning_rate": 2.925505967469161e-05, "loss": 1.2892, "step": 21470 }, { "epoch": 1.2797711288592204, "grad_norm": 3.280775308609009, "learning_rate": 2.9246455490568424e-05, "loss": 1.1725, "step": 21472 }, { "epoch": 1.2798903325783764, "grad_norm": 3.2115235328674316, "learning_rate": 2.9237852048870517e-05, "loss": 1.2294, "step": 21474 }, { "epoch": 1.2800095362975326, "grad_norm": 3.0231571197509766, "learning_rate": 2.9229249349905684e-05, "loss": 1.2226, "step": 21476 }, { "epoch": 1.2801287400166885, "grad_norm": 2.8240609169006348, "learning_rate": 2.922064739398166e-05, "loss": 1.0198, "step": 21478 }, { "epoch": 1.2802479437358445, "grad_norm": 3.2774555683135986, "learning_rate": 2.9212046181406183e-05, "loss": 1.3185, "step": 21480 }, { "epoch": 1.2803671474550007, "grad_norm": 3.3061423301696777, "learning_rate": 2.9203445712486953e-05, "loss": 1.192, "step": 21482 }, { "epoch": 1.2804863511741567, "grad_norm": 2.8634088039398193, "learning_rate": 2.9194845987531603e-05, "loss": 1.2722, "step": 21484 }, { "epoch": 1.2806055548933126, "grad_norm": 2.9776649475097656, "learning_rate": 2.9186247006847804e-05, "loss": 1.1655, "step": 21486 }, { "epoch": 1.2807247586124686, "grad_norm": 3.0797276496887207, "learning_rate": 2.9177648770743164e-05, "loss": 1.1044, "step": 21488 }, { "epoch": 1.2808439623316248, "grad_norm": 3.111274480819702, "learning_rate": 2.916905127952527e-05, "loss": 1.0372, "step": 21490 }, { "epoch": 1.2809631660507808, "grad_norm": 3.286773681640625, "learning_rate": 2.916045453350167e-05, "loss": 1.2292, "step": 21492 }, { "epoch": 1.281082369769937, "grad_norm": 3.264786958694458, "learning_rate": 2.9151858532979947e-05, "loss": 1.1932, "step": 21494 }, { "epoch": 1.281201573489093, "grad_norm": 3.054255485534668, "learning_rate": 2.9143263278267553e-05, "loss": 1.1942, "step": 21496 }, { "epoch": 1.2813207772082489, "grad_norm": 3.250600814819336, "learning_rate": 2.913466876967198e-05, "loss": 1.0418, "step": 21498 }, { "epoch": 1.2814399809274049, "grad_norm": 2.850052833557129, "learning_rate": 2.912607500750073e-05, "loss": 1.1192, "step": 21500 }, { "epoch": 1.281559184646561, "grad_norm": 2.6467301845550537, "learning_rate": 2.9117481992061158e-05, "loss": 1.1113, "step": 21502 }, { "epoch": 1.281678388365717, "grad_norm": 3.616748809814453, "learning_rate": 2.910888972366072e-05, "loss": 1.1736, "step": 21504 }, { "epoch": 1.281797592084873, "grad_norm": 3.4428486824035645, "learning_rate": 2.910029820260678e-05, "loss": 1.0605, "step": 21506 }, { "epoch": 1.2819167958040292, "grad_norm": 3.0789833068847656, "learning_rate": 2.909170742920668e-05, "loss": 1.1311, "step": 21508 }, { "epoch": 1.2820359995231851, "grad_norm": 3.447246551513672, "learning_rate": 2.9083117403767734e-05, "loss": 1.2172, "step": 21510 }, { "epoch": 1.2821552032423411, "grad_norm": 3.2227563858032227, "learning_rate": 2.9074528126597257e-05, "loss": 1.2995, "step": 21512 }, { "epoch": 1.282274406961497, "grad_norm": 2.8347373008728027, "learning_rate": 2.9065939598002502e-05, "loss": 1.0803, "step": 21514 }, { "epoch": 1.2823936106806533, "grad_norm": 3.1121063232421875, "learning_rate": 2.9057351818290683e-05, "loss": 1.3594, "step": 21516 }, { "epoch": 1.2825128143998092, "grad_norm": 2.906308650970459, "learning_rate": 2.9048764787769088e-05, "loss": 1.1297, "step": 21518 }, { "epoch": 1.2826320181189654, "grad_norm": 3.2789218425750732, "learning_rate": 2.904017850674483e-05, "loss": 1.2038, "step": 21520 }, { "epoch": 1.2827512218381214, "grad_norm": 3.4616219997406006, "learning_rate": 2.903159297552511e-05, "loss": 1.1267, "step": 21522 }, { "epoch": 1.2828704255572774, "grad_norm": 2.7167751789093018, "learning_rate": 2.902300819441708e-05, "loss": 1.0371, "step": 21524 }, { "epoch": 1.2829896292764333, "grad_norm": 2.8671720027923584, "learning_rate": 2.9014424163727772e-05, "loss": 1.1224, "step": 21526 }, { "epoch": 1.2831088329955895, "grad_norm": 2.9346344470977783, "learning_rate": 2.9005840883764333e-05, "loss": 1.0794, "step": 21528 }, { "epoch": 1.2832280367147455, "grad_norm": 3.0305604934692383, "learning_rate": 2.89972583548338e-05, "loss": 1.2876, "step": 21530 }, { "epoch": 1.2833472404339015, "grad_norm": 3.0190463066101074, "learning_rate": 2.8988676577243196e-05, "loss": 1.2162, "step": 21532 }, { "epoch": 1.2834664441530577, "grad_norm": 3.6682679653167725, "learning_rate": 2.8980095551299512e-05, "loss": 1.2045, "step": 21534 }, { "epoch": 1.2835856478722136, "grad_norm": 3.14680814743042, "learning_rate": 2.897151527730974e-05, "loss": 1.0969, "step": 21536 }, { "epoch": 1.2837048515913696, "grad_norm": 3.2760326862335205, "learning_rate": 2.8962935755580788e-05, "loss": 1.1133, "step": 21538 }, { "epoch": 1.2838240553105256, "grad_norm": 2.920958995819092, "learning_rate": 2.895435698641965e-05, "loss": 1.1486, "step": 21540 }, { "epoch": 1.2839432590296818, "grad_norm": 2.7058115005493164, "learning_rate": 2.8945778970133143e-05, "loss": 1.0241, "step": 21542 }, { "epoch": 1.2840624627488377, "grad_norm": 3.4144554138183594, "learning_rate": 2.893720170702814e-05, "loss": 1.1464, "step": 21544 }, { "epoch": 1.284181666467994, "grad_norm": 3.058626413345337, "learning_rate": 2.892862519741153e-05, "loss": 1.1559, "step": 21546 }, { "epoch": 1.28430087018715, "grad_norm": 2.816383123397827, "learning_rate": 2.8920049441590058e-05, "loss": 1.0255, "step": 21548 }, { "epoch": 1.2844200739063059, "grad_norm": 3.5007221698760986, "learning_rate": 2.8911474439870567e-05, "loss": 1.2943, "step": 21550 }, { "epoch": 1.2845392776254618, "grad_norm": 2.989347457885742, "learning_rate": 2.890290019255978e-05, "loss": 1.2813, "step": 21552 }, { "epoch": 1.284658481344618, "grad_norm": 3.2592403888702393, "learning_rate": 2.8894326699964436e-05, "loss": 1.1467, "step": 21554 }, { "epoch": 1.284777685063774, "grad_norm": 2.710423231124878, "learning_rate": 2.8885753962391247e-05, "loss": 1.2079, "step": 21556 }, { "epoch": 1.28489688878293, "grad_norm": 3.2111833095550537, "learning_rate": 2.8877181980146872e-05, "loss": 1.1008, "step": 21558 }, { "epoch": 1.2850160925020861, "grad_norm": 3.31899094581604, "learning_rate": 2.8868610753537973e-05, "loss": 1.2806, "step": 21560 }, { "epoch": 1.2851352962212421, "grad_norm": 3.773799419403076, "learning_rate": 2.886004028287115e-05, "loss": 1.1749, "step": 21562 }, { "epoch": 1.285254499940398, "grad_norm": 3.1213252544403076, "learning_rate": 2.8851470568453048e-05, "loss": 1.14, "step": 21564 }, { "epoch": 1.285373703659554, "grad_norm": 3.084641695022583, "learning_rate": 2.884290161059017e-05, "loss": 1.0852, "step": 21566 }, { "epoch": 1.2854929073787102, "grad_norm": 3.286672592163086, "learning_rate": 2.8834333409589104e-05, "loss": 1.185, "step": 21568 }, { "epoch": 1.2856121110978662, "grad_norm": 3.4004037380218506, "learning_rate": 2.8825765965756367e-05, "loss": 1.2469, "step": 21570 }, { "epoch": 1.2857313148170224, "grad_norm": 2.980377197265625, "learning_rate": 2.8817199279398398e-05, "loss": 1.1407, "step": 21572 }, { "epoch": 1.2858505185361784, "grad_norm": 3.0767948627471924, "learning_rate": 2.88086333508217e-05, "loss": 1.108, "step": 21574 }, { "epoch": 1.2859697222553343, "grad_norm": 2.7635021209716797, "learning_rate": 2.8800068180332696e-05, "loss": 1.0198, "step": 21576 }, { "epoch": 1.2860889259744903, "grad_norm": 2.973980188369751, "learning_rate": 2.8791503768237787e-05, "loss": 1.1716, "step": 21578 }, { "epoch": 1.2862081296936465, "grad_norm": 3.1490774154663086, "learning_rate": 2.878294011484333e-05, "loss": 1.2171, "step": 21580 }, { "epoch": 1.2863273334128025, "grad_norm": 3.2185842990875244, "learning_rate": 2.8774377220455738e-05, "loss": 1.0116, "step": 21582 }, { "epoch": 1.2864465371319584, "grad_norm": 3.1502466201782227, "learning_rate": 2.8765815085381266e-05, "loss": 1.1647, "step": 21584 }, { "epoch": 1.2865657408511146, "grad_norm": 2.90159010887146, "learning_rate": 2.8757253709926246e-05, "loss": 1.0233, "step": 21586 }, { "epoch": 1.2866849445702706, "grad_norm": 3.118598699569702, "learning_rate": 2.8748693094396972e-05, "loss": 1.0851, "step": 21588 }, { "epoch": 1.2868041482894266, "grad_norm": 3.149385690689087, "learning_rate": 2.874013323909961e-05, "loss": 1.3437, "step": 21590 }, { "epoch": 1.2869233520085825, "grad_norm": 3.6094069480895996, "learning_rate": 2.8731574144340445e-05, "loss": 1.0683, "step": 21592 }, { "epoch": 1.2870425557277387, "grad_norm": 3.4282076358795166, "learning_rate": 2.8723015810425647e-05, "loss": 1.1301, "step": 21594 }, { "epoch": 1.2871617594468947, "grad_norm": 3.2142374515533447, "learning_rate": 2.871445823766136e-05, "loss": 1.3116, "step": 21596 }, { "epoch": 1.287280963166051, "grad_norm": 3.1859304904937744, "learning_rate": 2.8705901426353744e-05, "loss": 1.1754, "step": 21598 }, { "epoch": 1.2874001668852069, "grad_norm": 2.990550994873047, "learning_rate": 2.869734537680888e-05, "loss": 1.3294, "step": 21600 }, { "epoch": 1.2875193706043628, "grad_norm": 3.34171986579895, "learning_rate": 2.8688790089332863e-05, "loss": 1.2056, "step": 21602 }, { "epoch": 1.2876385743235188, "grad_norm": 3.0177114009857178, "learning_rate": 2.868023556423174e-05, "loss": 1.2448, "step": 21604 }, { "epoch": 1.287757778042675, "grad_norm": 3.017540454864502, "learning_rate": 2.867168180181153e-05, "loss": 1.2179, "step": 21606 }, { "epoch": 1.287876981761831, "grad_norm": 3.390739679336548, "learning_rate": 2.866312880237822e-05, "loss": 1.1504, "step": 21608 }, { "epoch": 1.287996185480987, "grad_norm": 2.799751043319702, "learning_rate": 2.8654576566237835e-05, "loss": 1.1226, "step": 21610 }, { "epoch": 1.2881153892001431, "grad_norm": 2.9520955085754395, "learning_rate": 2.864602509369624e-05, "loss": 1.1685, "step": 21612 }, { "epoch": 1.288234592919299, "grad_norm": 3.0780017375946045, "learning_rate": 2.863747438505941e-05, "loss": 1.0584, "step": 21614 }, { "epoch": 1.288353796638455, "grad_norm": 3.0009965896606445, "learning_rate": 2.862892444063321e-05, "loss": 1.1027, "step": 21616 }, { "epoch": 1.288473000357611, "grad_norm": 3.259470224380493, "learning_rate": 2.862037526072351e-05, "loss": 1.1502, "step": 21618 }, { "epoch": 1.2885922040767672, "grad_norm": 3.3231375217437744, "learning_rate": 2.8611826845636126e-05, "loss": 1.2657, "step": 21620 }, { "epoch": 1.2887114077959232, "grad_norm": 2.8784353733062744, "learning_rate": 2.860327919567688e-05, "loss": 1.0675, "step": 21622 }, { "epoch": 1.2888306115150794, "grad_norm": 3.0602645874023438, "learning_rate": 2.8594732311151546e-05, "loss": 0.9661, "step": 21624 }, { "epoch": 1.2889498152342354, "grad_norm": 3.2394964694976807, "learning_rate": 2.858618619236585e-05, "loss": 1.1908, "step": 21626 }, { "epoch": 1.2890690189533913, "grad_norm": 3.132161855697632, "learning_rate": 2.8577640839625585e-05, "loss": 1.1865, "step": 21628 }, { "epoch": 1.2891882226725473, "grad_norm": 3.4968223571777344, "learning_rate": 2.8569096253236383e-05, "loss": 1.3343, "step": 21630 }, { "epoch": 1.2893074263917035, "grad_norm": 3.3990960121154785, "learning_rate": 2.856055243350391e-05, "loss": 1.205, "step": 21632 }, { "epoch": 1.2894266301108595, "grad_norm": 2.931748867034912, "learning_rate": 2.8552009380733875e-05, "loss": 1.2007, "step": 21634 }, { "epoch": 1.2895458338300154, "grad_norm": 3.2601888179779053, "learning_rate": 2.85434670952318e-05, "loss": 1.179, "step": 21636 }, { "epoch": 1.2896650375491716, "grad_norm": 2.725506067276001, "learning_rate": 2.853492557730335e-05, "loss": 1.1046, "step": 21638 }, { "epoch": 1.2897842412683276, "grad_norm": 2.9696896076202393, "learning_rate": 2.852638482725405e-05, "loss": 1.1024, "step": 21640 }, { "epoch": 1.2899034449874835, "grad_norm": 3.2151401042938232, "learning_rate": 2.8517844845389436e-05, "loss": 1.0932, "step": 21642 }, { "epoch": 1.2900226487066395, "grad_norm": 3.0464069843292236, "learning_rate": 2.8509305632014994e-05, "loss": 1.0696, "step": 21644 }, { "epoch": 1.2901418524257957, "grad_norm": 2.966376304626465, "learning_rate": 2.850076718743625e-05, "loss": 1.1164, "step": 21646 }, { "epoch": 1.2902610561449517, "grad_norm": 3.4754035472869873, "learning_rate": 2.8492229511958613e-05, "loss": 1.218, "step": 21648 }, { "epoch": 1.2903802598641079, "grad_norm": 3.1593964099884033, "learning_rate": 2.848369260588749e-05, "loss": 1.199, "step": 21650 }, { "epoch": 1.2904994635832638, "grad_norm": 3.3559656143188477, "learning_rate": 2.847515646952834e-05, "loss": 1.2009, "step": 21652 }, { "epoch": 1.2906186673024198, "grad_norm": 3.146968126296997, "learning_rate": 2.8466621103186452e-05, "loss": 1.1053, "step": 21654 }, { "epoch": 1.2907378710215758, "grad_norm": 6.5501017570495605, "learning_rate": 2.845808650716722e-05, "loss": 1.0565, "step": 21656 }, { "epoch": 1.290857074740732, "grad_norm": 3.531506299972534, "learning_rate": 2.8449552681775936e-05, "loss": 1.2155, "step": 21658 }, { "epoch": 1.290976278459888, "grad_norm": 3.1491212844848633, "learning_rate": 2.8441019627317893e-05, "loss": 1.1098, "step": 21660 }, { "epoch": 1.291095482179044, "grad_norm": 2.8000540733337402, "learning_rate": 2.8432487344098336e-05, "loss": 1.1551, "step": 21662 }, { "epoch": 1.2912146858982, "grad_norm": 3.28436541557312, "learning_rate": 2.8423955832422504e-05, "loss": 1.208, "step": 21664 }, { "epoch": 1.291333889617356, "grad_norm": 3.1944024562835693, "learning_rate": 2.8415425092595593e-05, "loss": 1.2677, "step": 21666 }, { "epoch": 1.291453093336512, "grad_norm": 2.9223291873931885, "learning_rate": 2.840689512492277e-05, "loss": 0.9818, "step": 21668 }, { "epoch": 1.291572297055668, "grad_norm": 3.019423246383667, "learning_rate": 2.8398365929709197e-05, "loss": 1.2256, "step": 21670 }, { "epoch": 1.2916915007748242, "grad_norm": 3.397214889526367, "learning_rate": 2.838983750725996e-05, "loss": 1.2011, "step": 21672 }, { "epoch": 1.2918107044939802, "grad_norm": 3.231783866882324, "learning_rate": 2.838130985788021e-05, "loss": 1.1281, "step": 21674 }, { "epoch": 1.2919299082131364, "grad_norm": 3.2941391468048096, "learning_rate": 2.8372782981874963e-05, "loss": 1.1118, "step": 21676 }, { "epoch": 1.2920491119322923, "grad_norm": 3.520555257797241, "learning_rate": 2.836425687954924e-05, "loss": 1.1921, "step": 21678 }, { "epoch": 1.2921683156514483, "grad_norm": 3.251110553741455, "learning_rate": 2.8355731551208098e-05, "loss": 1.1774, "step": 21680 }, { "epoch": 1.2922875193706043, "grad_norm": 3.1825573444366455, "learning_rate": 2.8347206997156484e-05, "loss": 1.1252, "step": 21682 }, { "epoch": 1.2924067230897605, "grad_norm": 3.22188401222229, "learning_rate": 2.833868321769937e-05, "loss": 1.256, "step": 21684 }, { "epoch": 1.2925259268089164, "grad_norm": 3.1283915042877197, "learning_rate": 2.8330160213141664e-05, "loss": 1.1854, "step": 21686 }, { "epoch": 1.2926451305280724, "grad_norm": 3.379920244216919, "learning_rate": 2.8321637983788275e-05, "loss": 1.1128, "step": 21688 }, { "epoch": 1.2927643342472286, "grad_norm": 3.28776478767395, "learning_rate": 2.831311652994406e-05, "loss": 1.1777, "step": 21690 }, { "epoch": 1.2928835379663846, "grad_norm": 3.207660675048828, "learning_rate": 2.8304595851913873e-05, "loss": 1.1199, "step": 21692 }, { "epoch": 1.2930027416855405, "grad_norm": 3.081944465637207, "learning_rate": 2.8296075950002528e-05, "loss": 1.173, "step": 21694 }, { "epoch": 1.2931219454046965, "grad_norm": 3.392205238342285, "learning_rate": 2.8287556824514776e-05, "loss": 1.1581, "step": 21696 }, { "epoch": 1.2932411491238527, "grad_norm": 3.138176202774048, "learning_rate": 2.8279038475755447e-05, "loss": 1.1434, "step": 21698 }, { "epoch": 1.2933603528430087, "grad_norm": 3.3221898078918457, "learning_rate": 2.8270520904029192e-05, "loss": 1.2483, "step": 21700 }, { "epoch": 1.2934795565621648, "grad_norm": 3.1036057472229004, "learning_rate": 2.826200410964077e-05, "loss": 1.1117, "step": 21702 }, { "epoch": 1.2935987602813208, "grad_norm": 2.96309232711792, "learning_rate": 2.8253488092894832e-05, "loss": 0.9813, "step": 21704 }, { "epoch": 1.2937179640004768, "grad_norm": 3.157280921936035, "learning_rate": 2.8244972854096035e-05, "loss": 1.0469, "step": 21706 }, { "epoch": 1.2938371677196328, "grad_norm": 2.8189597129821777, "learning_rate": 2.823645839354899e-05, "loss": 1.0302, "step": 21708 }, { "epoch": 1.293956371438789, "grad_norm": 3.0280497074127197, "learning_rate": 2.8227944711558296e-05, "loss": 1.1289, "step": 21710 }, { "epoch": 1.294075575157945, "grad_norm": 3.1971352100372314, "learning_rate": 2.821943180842851e-05, "loss": 1.2761, "step": 21712 }, { "epoch": 1.294194778877101, "grad_norm": 3.380059242248535, "learning_rate": 2.821091968446415e-05, "loss": 1.2066, "step": 21714 }, { "epoch": 1.294313982596257, "grad_norm": 2.9632537364959717, "learning_rate": 2.8202408339969777e-05, "loss": 1.1151, "step": 21716 }, { "epoch": 1.294433186315413, "grad_norm": 3.126992702484131, "learning_rate": 2.8193897775249805e-05, "loss": 1.2106, "step": 21718 }, { "epoch": 1.294552390034569, "grad_norm": 3.5583250522613525, "learning_rate": 2.8185387990608736e-05, "loss": 1.1324, "step": 21720 }, { "epoch": 1.294671593753725, "grad_norm": 3.175574779510498, "learning_rate": 2.8176878986350996e-05, "loss": 1.0644, "step": 21722 }, { "epoch": 1.2947907974728812, "grad_norm": 3.3633108139038086, "learning_rate": 2.8168370762780922e-05, "loss": 1.1908, "step": 21724 }, { "epoch": 1.2949100011920371, "grad_norm": 3.222987413406372, "learning_rate": 2.815986332020294e-05, "loss": 1.2813, "step": 21726 }, { "epoch": 1.2950292049111933, "grad_norm": 3.353907823562622, "learning_rate": 2.8151356658921372e-05, "loss": 1.2641, "step": 21728 }, { "epoch": 1.2951484086303493, "grad_norm": 3.077535629272461, "learning_rate": 2.8142850779240538e-05, "loss": 1.0527, "step": 21730 }, { "epoch": 1.2952676123495053, "grad_norm": 3.164764404296875, "learning_rate": 2.8134345681464707e-05, "loss": 1.1543, "step": 21732 }, { "epoch": 1.2953868160686612, "grad_norm": 3.1922457218170166, "learning_rate": 2.8125841365898148e-05, "loss": 1.1086, "step": 21734 }, { "epoch": 1.2955060197878174, "grad_norm": 3.194805383682251, "learning_rate": 2.811733783284508e-05, "loss": 1.1966, "step": 21736 }, { "epoch": 1.2956252235069734, "grad_norm": 3.106027841567993, "learning_rate": 2.8108835082609714e-05, "loss": 1.0805, "step": 21738 }, { "epoch": 1.2957444272261296, "grad_norm": 3.0563220977783203, "learning_rate": 2.8100333115496215e-05, "loss": 1.2653, "step": 21740 }, { "epoch": 1.2958636309452856, "grad_norm": 2.97131085395813, "learning_rate": 2.809183193180871e-05, "loss": 1.1271, "step": 21742 }, { "epoch": 1.2959828346644415, "grad_norm": 3.007481575012207, "learning_rate": 2.8083331531851375e-05, "loss": 1.1572, "step": 21744 }, { "epoch": 1.2961020383835975, "grad_norm": 3.1227076053619385, "learning_rate": 2.807483191592821e-05, "loss": 1.2734, "step": 21746 }, { "epoch": 1.2962212421027535, "grad_norm": 3.419262409210205, "learning_rate": 2.8066333084343356e-05, "loss": 1.1829, "step": 21748 }, { "epoch": 1.2963404458219097, "grad_norm": 3.614431142807007, "learning_rate": 2.8057835037400804e-05, "loss": 1.1466, "step": 21750 }, { "epoch": 1.2964596495410656, "grad_norm": 2.980593204498291, "learning_rate": 2.8049337775404572e-05, "loss": 1.116, "step": 21752 }, { "epoch": 1.2965788532602218, "grad_norm": 3.291030168533325, "learning_rate": 2.8040841298658626e-05, "loss": 1.1978, "step": 21754 }, { "epoch": 1.2966980569793778, "grad_norm": 3.110745906829834, "learning_rate": 2.8032345607466926e-05, "loss": 1.2045, "step": 21756 }, { "epoch": 1.2968172606985338, "grad_norm": 2.793757200241089, "learning_rate": 2.802385070213338e-05, "loss": 1.0217, "step": 21758 }, { "epoch": 1.2969364644176897, "grad_norm": 3.4792282581329346, "learning_rate": 2.801535658296187e-05, "loss": 1.2268, "step": 21760 }, { "epoch": 1.297055668136846, "grad_norm": 2.977999448776245, "learning_rate": 2.800686325025631e-05, "loss": 1.2456, "step": 21762 }, { "epoch": 1.2971748718560019, "grad_norm": 3.0872652530670166, "learning_rate": 2.7998370704320463e-05, "loss": 1.3177, "step": 21764 }, { "epoch": 1.297294075575158, "grad_norm": 3.2049615383148193, "learning_rate": 2.7989878945458194e-05, "loss": 1.1484, "step": 21766 }, { "epoch": 1.297413279294314, "grad_norm": 2.9565441608428955, "learning_rate": 2.7981387973973276e-05, "loss": 1.1631, "step": 21768 }, { "epoch": 1.29753248301347, "grad_norm": 3.1477103233337402, "learning_rate": 2.7972897790169413e-05, "loss": 1.0931, "step": 21770 }, { "epoch": 1.297651686732626, "grad_norm": 3.3093619346618652, "learning_rate": 2.7964408394350373e-05, "loss": 1.0569, "step": 21772 }, { "epoch": 1.2977708904517822, "grad_norm": 3.437211036682129, "learning_rate": 2.795591978681985e-05, "loss": 1.2112, "step": 21774 }, { "epoch": 1.2978900941709381, "grad_norm": 3.398081064224243, "learning_rate": 2.7947431967881487e-05, "loss": 1.106, "step": 21776 }, { "epoch": 1.2980092978900941, "grad_norm": 3.2164111137390137, "learning_rate": 2.7938944937838923e-05, "loss": 1.1632, "step": 21778 }, { "epoch": 1.2981285016092503, "grad_norm": 3.0054192543029785, "learning_rate": 2.793045869699582e-05, "loss": 1.1331, "step": 21780 }, { "epoch": 1.2982477053284063, "grad_norm": 3.1019585132598877, "learning_rate": 2.7921973245655697e-05, "loss": 1.2063, "step": 21782 }, { "epoch": 1.2983669090475622, "grad_norm": 3.162550926208496, "learning_rate": 2.7913488584122106e-05, "loss": 1.1335, "step": 21784 }, { "epoch": 1.2984861127667182, "grad_norm": 3.341012954711914, "learning_rate": 2.7905004712698647e-05, "loss": 1.1563, "step": 21786 }, { "epoch": 1.2986053164858744, "grad_norm": 2.864474296569824, "learning_rate": 2.7896521631688716e-05, "loss": 1.1041, "step": 21788 }, { "epoch": 1.2987245202050304, "grad_norm": 3.354598045349121, "learning_rate": 2.7888039341395855e-05, "loss": 1.1992, "step": 21790 }, { "epoch": 1.2988437239241866, "grad_norm": 3.004857063293457, "learning_rate": 2.7879557842123482e-05, "loss": 1.0925, "step": 21792 }, { "epoch": 1.2989629276433425, "grad_norm": 3.379880905151367, "learning_rate": 2.7871077134175015e-05, "loss": 1.1718, "step": 21794 }, { "epoch": 1.2990821313624985, "grad_norm": 3.1507372856140137, "learning_rate": 2.7862597217853826e-05, "loss": 1.1577, "step": 21796 }, { "epoch": 1.2992013350816545, "grad_norm": 3.0134599208831787, "learning_rate": 2.7854118093463276e-05, "loss": 1.1889, "step": 21798 }, { "epoch": 1.2993205388008107, "grad_norm": 3.6217381954193115, "learning_rate": 2.78456397613067e-05, "loss": 1.2785, "step": 21800 }, { "epoch": 1.2994397425199666, "grad_norm": 3.317979335784912, "learning_rate": 2.7837162221687385e-05, "loss": 1.0757, "step": 21802 }, { "epoch": 1.2995589462391226, "grad_norm": 3.3293228149414062, "learning_rate": 2.782868547490861e-05, "loss": 1.0721, "step": 21804 }, { "epoch": 1.2996781499582788, "grad_norm": 2.9366767406463623, "learning_rate": 2.782020952127359e-05, "loss": 1.0659, "step": 21806 }, { "epoch": 1.2997973536774348, "grad_norm": 3.5831186771392822, "learning_rate": 2.7811734361085605e-05, "loss": 1.0897, "step": 21808 }, { "epoch": 1.2999165573965907, "grad_norm": 3.1807191371917725, "learning_rate": 2.7803259994647757e-05, "loss": 1.2424, "step": 21810 }, { "epoch": 1.3000357611157467, "grad_norm": 2.988478899002075, "learning_rate": 2.7794786422263263e-05, "loss": 1.1693, "step": 21812 }, { "epoch": 1.300154964834903, "grad_norm": 3.0417137145996094, "learning_rate": 2.778631364423524e-05, "loss": 1.155, "step": 21814 }, { "epoch": 1.3002741685540589, "grad_norm": 3.3644237518310547, "learning_rate": 2.7777841660866777e-05, "loss": 1.2183, "step": 21816 }, { "epoch": 1.300393372273215, "grad_norm": 3.2901763916015625, "learning_rate": 2.7769370472460948e-05, "loss": 1.3094, "step": 21818 }, { "epoch": 1.300512575992371, "grad_norm": 2.8543829917907715, "learning_rate": 2.77609000793208e-05, "loss": 1.2029, "step": 21820 }, { "epoch": 1.300631779711527, "grad_norm": 3.295596122741699, "learning_rate": 2.7752430481749346e-05, "loss": 1.1231, "step": 21822 }, { "epoch": 1.300750983430683, "grad_norm": 3.387350082397461, "learning_rate": 2.7743961680049557e-05, "loss": 1.2351, "step": 21824 }, { "epoch": 1.3008701871498392, "grad_norm": 3.335304021835327, "learning_rate": 2.7735493674524438e-05, "loss": 1.1328, "step": 21826 }, { "epoch": 1.3009893908689951, "grad_norm": 3.1295366287231445, "learning_rate": 2.7727026465476875e-05, "loss": 1.0591, "step": 21828 }, { "epoch": 1.301108594588151, "grad_norm": 2.7506558895111084, "learning_rate": 2.771856005320976e-05, "loss": 1.0901, "step": 21830 }, { "epoch": 1.3012277983073073, "grad_norm": 3.162874221801758, "learning_rate": 2.7710094438026024e-05, "loss": 1.0441, "step": 21832 }, { "epoch": 1.3013470020264633, "grad_norm": 3.184683084487915, "learning_rate": 2.7701629620228436e-05, "loss": 1.1832, "step": 21834 }, { "epoch": 1.3014662057456192, "grad_norm": 3.3574252128601074, "learning_rate": 2.7693165600119874e-05, "loss": 1.2336, "step": 21836 }, { "epoch": 1.3015854094647752, "grad_norm": 2.9993691444396973, "learning_rate": 2.768470237800309e-05, "loss": 1.1833, "step": 21838 }, { "epoch": 1.3017046131839314, "grad_norm": 3.29840350151062, "learning_rate": 2.767623995418086e-05, "loss": 1.1918, "step": 21840 }, { "epoch": 1.3018238169030873, "grad_norm": 2.988626480102539, "learning_rate": 2.7667778328955907e-05, "loss": 1.1208, "step": 21842 }, { "epoch": 1.3019430206222435, "grad_norm": 3.316290855407715, "learning_rate": 2.765931750263093e-05, "loss": 1.2168, "step": 21844 }, { "epoch": 1.3020622243413995, "grad_norm": 3.100983142852783, "learning_rate": 2.765085747550861e-05, "loss": 1.0766, "step": 21846 }, { "epoch": 1.3021814280605555, "grad_norm": 3.2556169033050537, "learning_rate": 2.7642398247891554e-05, "loss": 1.0769, "step": 21848 }, { "epoch": 1.3023006317797114, "grad_norm": 3.0250861644744873, "learning_rate": 2.7633939820082456e-05, "loss": 1.2231, "step": 21850 }, { "epoch": 1.3024198354988676, "grad_norm": 3.224331855773926, "learning_rate": 2.762548219238381e-05, "loss": 1.0729, "step": 21852 }, { "epoch": 1.3025390392180236, "grad_norm": 2.927126407623291, "learning_rate": 2.761702536509825e-05, "loss": 1.2004, "step": 21854 }, { "epoch": 1.3026582429371796, "grad_norm": 3.1722307205200195, "learning_rate": 2.7608569338528285e-05, "loss": 1.1628, "step": 21856 }, { "epoch": 1.3027774466563358, "grad_norm": 2.852107048034668, "learning_rate": 2.7600114112976372e-05, "loss": 1.0838, "step": 21858 }, { "epoch": 1.3028966503754917, "grad_norm": 3.099067211151123, "learning_rate": 2.759165968874503e-05, "loss": 1.0985, "step": 21860 }, { "epoch": 1.3030158540946477, "grad_norm": 2.656953811645508, "learning_rate": 2.7583206066136695e-05, "loss": 0.9823, "step": 21862 }, { "epoch": 1.3031350578138037, "grad_norm": 2.8834521770477295, "learning_rate": 2.7574753245453777e-05, "loss": 1.1667, "step": 21864 }, { "epoch": 1.3032542615329599, "grad_norm": 3.1035780906677246, "learning_rate": 2.7566301226998664e-05, "loss": 1.0822, "step": 21866 }, { "epoch": 1.3033734652521158, "grad_norm": 3.157979726791382, "learning_rate": 2.7557850011073715e-05, "loss": 1.2446, "step": 21868 }, { "epoch": 1.303492668971272, "grad_norm": 2.9536333084106445, "learning_rate": 2.754939959798123e-05, "loss": 1.0834, "step": 21870 }, { "epoch": 1.303611872690428, "grad_norm": 3.342585563659668, "learning_rate": 2.754094998802358e-05, "loss": 1.2566, "step": 21872 }, { "epoch": 1.303731076409584, "grad_norm": 3.202174663543701, "learning_rate": 2.753250118150297e-05, "loss": 1.052, "step": 21874 }, { "epoch": 1.30385028012874, "grad_norm": 3.4251434803009033, "learning_rate": 2.7524053178721642e-05, "loss": 1.2691, "step": 21876 }, { "epoch": 1.3039694838478961, "grad_norm": 3.2409117221832275, "learning_rate": 2.751560597998185e-05, "loss": 1.0801, "step": 21878 }, { "epoch": 1.304088687567052, "grad_norm": 3.1107423305511475, "learning_rate": 2.7507159585585767e-05, "loss": 1.1121, "step": 21880 }, { "epoch": 1.304207891286208, "grad_norm": 3.327024459838867, "learning_rate": 2.7498713995835545e-05, "loss": 1.2038, "step": 21882 }, { "epoch": 1.3043270950053643, "grad_norm": 3.0188169479370117, "learning_rate": 2.7490269211033304e-05, "loss": 1.109, "step": 21884 }, { "epoch": 1.3044462987245202, "grad_norm": 2.876811981201172, "learning_rate": 2.7481825231481155e-05, "loss": 1.0887, "step": 21886 }, { "epoch": 1.3045655024436762, "grad_norm": 3.2767789363861084, "learning_rate": 2.747338205748116e-05, "loss": 1.2291, "step": 21888 }, { "epoch": 1.3046847061628322, "grad_norm": 2.957214832305908, "learning_rate": 2.7464939689335367e-05, "loss": 1.1096, "step": 21890 }, { "epoch": 1.3048039098819884, "grad_norm": 3.267314910888672, "learning_rate": 2.745649812734578e-05, "loss": 1.2308, "step": 21892 }, { "epoch": 1.3049231136011443, "grad_norm": 2.8909995555877686, "learning_rate": 2.7448057371814373e-05, "loss": 1.2346, "step": 21894 }, { "epoch": 1.3050423173203005, "grad_norm": 3.285423994064331, "learning_rate": 2.7439617423043145e-05, "loss": 1.1844, "step": 21896 }, { "epoch": 1.3051615210394565, "grad_norm": 3.2558412551879883, "learning_rate": 2.743117828133396e-05, "loss": 1.0969, "step": 21898 }, { "epoch": 1.3052807247586125, "grad_norm": 3.070246934890747, "learning_rate": 2.7422739946988762e-05, "loss": 1.0892, "step": 21900 }, { "epoch": 1.3053999284777684, "grad_norm": 2.9582371711730957, "learning_rate": 2.7414302420309422e-05, "loss": 1.0312, "step": 21902 }, { "epoch": 1.3055191321969246, "grad_norm": 3.272002696990967, "learning_rate": 2.740586570159772e-05, "loss": 1.2686, "step": 21904 }, { "epoch": 1.3056383359160806, "grad_norm": 2.900458574295044, "learning_rate": 2.7397429791155528e-05, "loss": 1.106, "step": 21906 }, { "epoch": 1.3057575396352366, "grad_norm": 3.327395439147949, "learning_rate": 2.738899468928461e-05, "loss": 1.1334, "step": 21908 }, { "epoch": 1.3058767433543927, "grad_norm": 3.218301296234131, "learning_rate": 2.7380560396286713e-05, "loss": 1.2317, "step": 21910 }, { "epoch": 1.3059959470735487, "grad_norm": 3.0114500522613525, "learning_rate": 2.737212691246354e-05, "loss": 1.1886, "step": 21912 }, { "epoch": 1.3061151507927047, "grad_norm": 3.602515459060669, "learning_rate": 2.736369423811685e-05, "loss": 1.3869, "step": 21914 }, { "epoch": 1.3062343545118607, "grad_norm": 3.4006083011627197, "learning_rate": 2.735526237354824e-05, "loss": 1.1711, "step": 21916 }, { "epoch": 1.3063535582310168, "grad_norm": 3.3974432945251465, "learning_rate": 2.7346831319059363e-05, "loss": 1.2871, "step": 21918 }, { "epoch": 1.3064727619501728, "grad_norm": 3.4226601123809814, "learning_rate": 2.733840107495187e-05, "loss": 1.153, "step": 21920 }, { "epoch": 1.306591965669329, "grad_norm": 3.271874189376831, "learning_rate": 2.7329971641527275e-05, "loss": 1.1012, "step": 21922 }, { "epoch": 1.306711169388485, "grad_norm": 2.9485301971435547, "learning_rate": 2.732154301908717e-05, "loss": 1.0907, "step": 21924 }, { "epoch": 1.306830373107641, "grad_norm": 3.1085736751556396, "learning_rate": 2.7313115207933065e-05, "loss": 1.1707, "step": 21926 }, { "epoch": 1.306949576826797, "grad_norm": 2.99045729637146, "learning_rate": 2.7304688208366453e-05, "loss": 1.1484, "step": 21928 }, { "epoch": 1.307068780545953, "grad_norm": 3.171722650527954, "learning_rate": 2.7296262020688794e-05, "loss": 1.1297, "step": 21930 }, { "epoch": 1.307187984265109, "grad_norm": 3.3547096252441406, "learning_rate": 2.7287836645201525e-05, "loss": 1.1952, "step": 21932 }, { "epoch": 1.307307187984265, "grad_norm": 2.9358620643615723, "learning_rate": 2.7279412082206046e-05, "loss": 1.1396, "step": 21934 }, { "epoch": 1.3074263917034212, "grad_norm": 3.6499314308166504, "learning_rate": 2.727098833200374e-05, "loss": 1.1149, "step": 21936 }, { "epoch": 1.3075455954225772, "grad_norm": 3.443923234939575, "learning_rate": 2.7262565394895945e-05, "loss": 1.1114, "step": 21938 }, { "epoch": 1.3076647991417332, "grad_norm": 3.3154776096343994, "learning_rate": 2.7254143271183952e-05, "loss": 1.1844, "step": 21940 }, { "epoch": 1.3077840028608891, "grad_norm": 3.2343640327453613, "learning_rate": 2.724572196116913e-05, "loss": 1.1418, "step": 21942 }, { "epoch": 1.3079032065800453, "grad_norm": 3.3759958744049072, "learning_rate": 2.7237301465152643e-05, "loss": 1.0912, "step": 21944 }, { "epoch": 1.3080224102992013, "grad_norm": 3.3425159454345703, "learning_rate": 2.7228881783435783e-05, "loss": 1.1183, "step": 21946 }, { "epoch": 1.3081416140183575, "grad_norm": 3.344763994216919, "learning_rate": 2.7220462916319735e-05, "loss": 1.2327, "step": 21948 }, { "epoch": 1.3082608177375135, "grad_norm": 3.1862990856170654, "learning_rate": 2.7212044864105662e-05, "loss": 1.1111, "step": 21950 }, { "epoch": 1.3083800214566694, "grad_norm": 3.0038928985595703, "learning_rate": 2.720362762709472e-05, "loss": 1.1149, "step": 21952 }, { "epoch": 1.3084992251758254, "grad_norm": 3.141446590423584, "learning_rate": 2.7195211205588005e-05, "loss": 1.0835, "step": 21954 }, { "epoch": 1.3086184288949816, "grad_norm": 3.016065835952759, "learning_rate": 2.7186795599886617e-05, "loss": 1.071, "step": 21956 }, { "epoch": 1.3087376326141376, "grad_norm": 3.4757447242736816, "learning_rate": 2.7178380810291588e-05, "loss": 1.1969, "step": 21958 }, { "epoch": 1.3088568363332935, "grad_norm": 3.1204416751861572, "learning_rate": 2.7169966837103993e-05, "loss": 1.0784, "step": 21960 }, { "epoch": 1.3089760400524497, "grad_norm": 3.435133457183838, "learning_rate": 2.7161553680624786e-05, "loss": 1.2339, "step": 21962 }, { "epoch": 1.3090952437716057, "grad_norm": 3.5187737941741943, "learning_rate": 2.7153141341154918e-05, "loss": 1.2536, "step": 21964 }, { "epoch": 1.3092144474907617, "grad_norm": 2.8657779693603516, "learning_rate": 2.7144729818995395e-05, "loss": 1.2233, "step": 21966 }, { "epoch": 1.3093336512099176, "grad_norm": 3.6727094650268555, "learning_rate": 2.713631911444704e-05, "loss": 1.3292, "step": 21968 }, { "epoch": 1.3094528549290738, "grad_norm": 3.295992374420166, "learning_rate": 2.7127909227810805e-05, "loss": 1.1215, "step": 21970 }, { "epoch": 1.3095720586482298, "grad_norm": 3.549518346786499, "learning_rate": 2.711950015938751e-05, "loss": 1.1945, "step": 21972 }, { "epoch": 1.309691262367386, "grad_norm": 3.0565083026885986, "learning_rate": 2.7111091909477982e-05, "loss": 1.207, "step": 21974 }, { "epoch": 1.309810466086542, "grad_norm": 3.2912564277648926, "learning_rate": 2.7102684478383006e-05, "loss": 1.0968, "step": 21976 }, { "epoch": 1.309929669805698, "grad_norm": 3.347684621810913, "learning_rate": 2.709427786640335e-05, "loss": 1.1843, "step": 21978 }, { "epoch": 1.3100488735248539, "grad_norm": 3.202793836593628, "learning_rate": 2.7085872073839746e-05, "loss": 1.2472, "step": 21980 }, { "epoch": 1.31016807724401, "grad_norm": 3.48413348197937, "learning_rate": 2.7077467100992883e-05, "loss": 1.1576, "step": 21982 }, { "epoch": 1.310287280963166, "grad_norm": 3.598266363143921, "learning_rate": 2.706906294816348e-05, "loss": 1.2168, "step": 21984 }, { "epoch": 1.310406484682322, "grad_norm": 3.5683579444885254, "learning_rate": 2.7060659615652116e-05, "loss": 1.225, "step": 21986 }, { "epoch": 1.3105256884014782, "grad_norm": 3.1066291332244873, "learning_rate": 2.705225710375947e-05, "loss": 1.1847, "step": 21988 }, { "epoch": 1.3106448921206342, "grad_norm": 2.7943601608276367, "learning_rate": 2.7043855412786095e-05, "loss": 1.042, "step": 21990 }, { "epoch": 1.3107640958397901, "grad_norm": 3.116032600402832, "learning_rate": 2.7035454543032558e-05, "loss": 0.943, "step": 21992 }, { "epoch": 1.3108832995589461, "grad_norm": 2.954937696456909, "learning_rate": 2.7027054494799387e-05, "loss": 1.2164, "step": 21994 }, { "epoch": 1.3110025032781023, "grad_norm": 3.1096043586730957, "learning_rate": 2.701865526838707e-05, "loss": 1.0851, "step": 21996 }, { "epoch": 1.3111217069972583, "grad_norm": 2.859741687774658, "learning_rate": 2.7010256864096095e-05, "loss": 1.1163, "step": 21998 }, { "epoch": 1.3112409107164145, "grad_norm": 3.486400604248047, "learning_rate": 2.7001859282226883e-05, "loss": 1.071, "step": 22000 }, { "epoch": 1.3113601144355704, "grad_norm": 3.3228743076324463, "learning_rate": 2.6993462523079848e-05, "loss": 1.1908, "step": 22002 }, { "epoch": 1.3114793181547264, "grad_norm": 3.5650784969329834, "learning_rate": 2.6985066586955364e-05, "loss": 1.3435, "step": 22004 }, { "epoch": 1.3115985218738824, "grad_norm": 3.3456919193267822, "learning_rate": 2.6976671474153824e-05, "loss": 1.1, "step": 22006 }, { "epoch": 1.3117177255930386, "grad_norm": 2.729940414428711, "learning_rate": 2.696827718497551e-05, "loss": 1.1098, "step": 22008 }, { "epoch": 1.3118369293121945, "grad_norm": 3.473632335662842, "learning_rate": 2.6959883719720696e-05, "loss": 1.1585, "step": 22010 }, { "epoch": 1.3119561330313505, "grad_norm": 3.0334701538085938, "learning_rate": 2.6951491078689696e-05, "loss": 1.0524, "step": 22012 }, { "epoch": 1.3120753367505067, "grad_norm": 3.2229976654052734, "learning_rate": 2.6943099262182715e-05, "loss": 1.0893, "step": 22014 }, { "epoch": 1.3121945404696627, "grad_norm": 3.6443123817443848, "learning_rate": 2.6934708270499965e-05, "loss": 1.0757, "step": 22016 }, { "epoch": 1.3123137441888186, "grad_norm": 2.938180685043335, "learning_rate": 2.6926318103941618e-05, "loss": 1.0362, "step": 22018 }, { "epoch": 1.3124329479079746, "grad_norm": 3.2806291580200195, "learning_rate": 2.6917928762807808e-05, "loss": 1.2407, "step": 22020 }, { "epoch": 1.3125521516271308, "grad_norm": 3.29129695892334, "learning_rate": 2.6909540247398673e-05, "loss": 1.3189, "step": 22022 }, { "epoch": 1.3126713553462868, "grad_norm": 3.3043651580810547, "learning_rate": 2.6901152558014285e-05, "loss": 1.0424, "step": 22024 }, { "epoch": 1.312790559065443, "grad_norm": 3.1702067852020264, "learning_rate": 2.6892765694954696e-05, "loss": 1.1551, "step": 22026 }, { "epoch": 1.312909762784599, "grad_norm": 3.0860252380371094, "learning_rate": 2.688437965851992e-05, "loss": 1.128, "step": 22028 }, { "epoch": 1.313028966503755, "grad_norm": 3.271285057067871, "learning_rate": 2.6875994449010016e-05, "loss": 1.1892, "step": 22030 }, { "epoch": 1.3131481702229109, "grad_norm": 2.954832077026367, "learning_rate": 2.686761006672487e-05, "loss": 1.1293, "step": 22032 }, { "epoch": 1.313267373942067, "grad_norm": 3.075047731399536, "learning_rate": 2.6859226511964474e-05, "loss": 1.1416, "step": 22034 }, { "epoch": 1.313386577661223, "grad_norm": 2.775655746459961, "learning_rate": 2.6850843785028744e-05, "loss": 0.946, "step": 22036 }, { "epoch": 1.313505781380379, "grad_norm": 2.810865879058838, "learning_rate": 2.6842461886217497e-05, "loss": 1.0919, "step": 22038 }, { "epoch": 1.3136249850995352, "grad_norm": 3.7359495162963867, "learning_rate": 2.6834080815830635e-05, "loss": 1.0541, "step": 22040 }, { "epoch": 1.3137441888186911, "grad_norm": 3.287278890609741, "learning_rate": 2.6825700574167968e-05, "loss": 1.1706, "step": 22042 }, { "epoch": 1.3138633925378471, "grad_norm": 3.1878507137298584, "learning_rate": 2.681732116152928e-05, "loss": 1.1645, "step": 22044 }, { "epoch": 1.313982596257003, "grad_norm": 2.9037301540374756, "learning_rate": 2.6808942578214314e-05, "loss": 1.125, "step": 22046 }, { "epoch": 1.3141017999761593, "grad_norm": 3.1099679470062256, "learning_rate": 2.6800564824522857e-05, "loss": 1.2425, "step": 22048 }, { "epoch": 1.3142210036953152, "grad_norm": 3.227670192718506, "learning_rate": 2.6792187900754533e-05, "loss": 1.059, "step": 22050 }, { "epoch": 1.3143402074144714, "grad_norm": 3.3083927631378174, "learning_rate": 2.6783811807209076e-05, "loss": 1.1313, "step": 22052 }, { "epoch": 1.3144594111336274, "grad_norm": 3.3434033393859863, "learning_rate": 2.6775436544186115e-05, "loss": 1.1451, "step": 22054 }, { "epoch": 1.3145786148527834, "grad_norm": 3.0149409770965576, "learning_rate": 2.676706211198522e-05, "loss": 1.2744, "step": 22056 }, { "epoch": 1.3146978185719393, "grad_norm": 3.640063524246216, "learning_rate": 2.6758688510906027e-05, "loss": 1.2066, "step": 22058 }, { "epoch": 1.3148170222910955, "grad_norm": 3.3752970695495605, "learning_rate": 2.6750315741248054e-05, "loss": 1.2221, "step": 22060 }, { "epoch": 1.3149362260102515, "grad_norm": 3.1725034713745117, "learning_rate": 2.674194380331084e-05, "loss": 1.3733, "step": 22062 }, { "epoch": 1.3150554297294075, "grad_norm": 2.739271640777588, "learning_rate": 2.6733572697393872e-05, "loss": 0.999, "step": 22064 }, { "epoch": 1.3151746334485637, "grad_norm": 3.1289055347442627, "learning_rate": 2.6725202423796613e-05, "loss": 1.1605, "step": 22066 }, { "epoch": 1.3152938371677196, "grad_norm": 3.5962038040161133, "learning_rate": 2.67168329828185e-05, "loss": 1.2561, "step": 22068 }, { "epoch": 1.3154130408868756, "grad_norm": 3.156712293624878, "learning_rate": 2.6708464374758935e-05, "loss": 1.2531, "step": 22070 }, { "epoch": 1.3155322446060316, "grad_norm": 3.379467248916626, "learning_rate": 2.670009659991729e-05, "loss": 1.3428, "step": 22072 }, { "epoch": 1.3156514483251878, "grad_norm": 3.4044532775878906, "learning_rate": 2.669172965859289e-05, "loss": 1.1983, "step": 22074 }, { "epoch": 1.3157706520443437, "grad_norm": 3.2976748943328857, "learning_rate": 2.6683363551085082e-05, "loss": 1.1861, "step": 22076 }, { "epoch": 1.3158898557635, "grad_norm": 3.334650754928589, "learning_rate": 2.667499827769314e-05, "loss": 1.3113, "step": 22078 }, { "epoch": 1.316009059482656, "grad_norm": 3.176299571990967, "learning_rate": 2.6666633838716314e-05, "loss": 1.1201, "step": 22080 }, { "epoch": 1.3161282632018119, "grad_norm": 3.852665424346924, "learning_rate": 2.665827023445383e-05, "loss": 1.1658, "step": 22082 }, { "epoch": 1.3162474669209678, "grad_norm": 2.917126178741455, "learning_rate": 2.6649907465204887e-05, "loss": 1.001, "step": 22084 }, { "epoch": 1.316366670640124, "grad_norm": 3.12306809425354, "learning_rate": 2.664154553126864e-05, "loss": 1.2047, "step": 22086 }, { "epoch": 1.31648587435928, "grad_norm": 3.1929426193237305, "learning_rate": 2.6633184432944236e-05, "loss": 1.2711, "step": 22088 }, { "epoch": 1.316605078078436, "grad_norm": 3.4492032527923584, "learning_rate": 2.662482417053077e-05, "loss": 1.1651, "step": 22090 }, { "epoch": 1.3167242817975922, "grad_norm": 3.016145706176758, "learning_rate": 2.6616464744327302e-05, "loss": 1.0049, "step": 22092 }, { "epoch": 1.3168434855167481, "grad_norm": 3.1509885787963867, "learning_rate": 2.6608106154632932e-05, "loss": 1.213, "step": 22094 }, { "epoch": 1.316962689235904, "grad_norm": 3.4029011726379395, "learning_rate": 2.6599748401746606e-05, "loss": 1.1887, "step": 22096 }, { "epoch": 1.31708189295506, "grad_norm": 3.0834226608276367, "learning_rate": 2.6591391485967355e-05, "loss": 1.1012, "step": 22098 }, { "epoch": 1.3172010966742163, "grad_norm": 2.876680374145508, "learning_rate": 2.6583035407594147e-05, "loss": 1.0576, "step": 22100 }, { "epoch": 1.3173203003933722, "grad_norm": 3.337698221206665, "learning_rate": 2.657468016692584e-05, "loss": 1.2046, "step": 22102 }, { "epoch": 1.3174395041125284, "grad_norm": 3.046431064605713, "learning_rate": 2.6566325764261398e-05, "loss": 1.0853, "step": 22104 }, { "epoch": 1.3175587078316844, "grad_norm": 3.3317439556121826, "learning_rate": 2.655797219989965e-05, "loss": 1.1457, "step": 22106 }, { "epoch": 1.3176779115508404, "grad_norm": 3.0630109310150146, "learning_rate": 2.6549619474139455e-05, "loss": 1.0964, "step": 22108 }, { "epoch": 1.3177971152699963, "grad_norm": 3.171501636505127, "learning_rate": 2.6541267587279577e-05, "loss": 1.1261, "step": 22110 }, { "epoch": 1.3179163189891525, "grad_norm": 3.2472119331359863, "learning_rate": 2.6532916539618868e-05, "loss": 1.2776, "step": 22112 }, { "epoch": 1.3180355227083085, "grad_norm": 3.106854200363159, "learning_rate": 2.6524566331456004e-05, "loss": 1.2708, "step": 22114 }, { "epoch": 1.3181547264274647, "grad_norm": 3.0285866260528564, "learning_rate": 2.6516216963089698e-05, "loss": 1.1082, "step": 22116 }, { "epoch": 1.3182739301466206, "grad_norm": 3.110032320022583, "learning_rate": 2.65078684348187e-05, "loss": 1.1459, "step": 22118 }, { "epoch": 1.3183931338657766, "grad_norm": 2.492238759994507, "learning_rate": 2.6499520746941585e-05, "loss": 1.1369, "step": 22120 }, { "epoch": 1.3185123375849326, "grad_norm": 3.1041367053985596, "learning_rate": 2.649117389975705e-05, "loss": 1.1195, "step": 22122 }, { "epoch": 1.3186315413040886, "grad_norm": 3.56848406791687, "learning_rate": 2.6482827893563646e-05, "loss": 1.2378, "step": 22124 }, { "epoch": 1.3187507450232447, "grad_norm": 3.112034797668457, "learning_rate": 2.6474482728659953e-05, "loss": 1.1737, "step": 22126 }, { "epoch": 1.3188699487424007, "grad_norm": 2.931000232696533, "learning_rate": 2.6466138405344508e-05, "loss": 1.223, "step": 22128 }, { "epoch": 1.318989152461557, "grad_norm": 3.1799910068511963, "learning_rate": 2.645779492391582e-05, "loss": 1.195, "step": 22130 }, { "epoch": 1.3191083561807129, "grad_norm": 3.0343191623687744, "learning_rate": 2.6449452284672348e-05, "loss": 1.1602, "step": 22132 }, { "epoch": 1.3192275598998688, "grad_norm": 3.075960159301758, "learning_rate": 2.6441110487912547e-05, "loss": 1.0711, "step": 22134 }, { "epoch": 1.3193467636190248, "grad_norm": 3.230635166168213, "learning_rate": 2.6432769533934832e-05, "loss": 1.1441, "step": 22136 }, { "epoch": 1.319465967338181, "grad_norm": 3.3380091190338135, "learning_rate": 2.6424429423037554e-05, "loss": 1.133, "step": 22138 }, { "epoch": 1.319585171057337, "grad_norm": 3.369734287261963, "learning_rate": 2.6416090155519148e-05, "loss": 1.0773, "step": 22140 }, { "epoch": 1.3197043747764932, "grad_norm": 2.9999279975891113, "learning_rate": 2.6407751731677865e-05, "loss": 1.1726, "step": 22142 }, { "epoch": 1.3198235784956491, "grad_norm": 3.638514280319214, "learning_rate": 2.6399414151812006e-05, "loss": 1.2448, "step": 22144 }, { "epoch": 1.319942782214805, "grad_norm": 3.280574321746826, "learning_rate": 2.6391077416219874e-05, "loss": 1.0995, "step": 22146 }, { "epoch": 1.320061985933961, "grad_norm": 3.112412214279175, "learning_rate": 2.638274152519967e-05, "loss": 1.1605, "step": 22148 }, { "epoch": 1.3201811896531173, "grad_norm": 3.1034164428710938, "learning_rate": 2.637440647904962e-05, "loss": 1.1096, "step": 22150 }, { "epoch": 1.3203003933722732, "grad_norm": 3.4747915267944336, "learning_rate": 2.6366072278067877e-05, "loss": 1.149, "step": 22152 }, { "epoch": 1.3204195970914292, "grad_norm": 3.352090835571289, "learning_rate": 2.635773892255259e-05, "loss": 1.3196, "step": 22154 }, { "epoch": 1.3205388008105854, "grad_norm": 3.161238670349121, "learning_rate": 2.6349406412801858e-05, "loss": 1.2122, "step": 22156 }, { "epoch": 1.3206580045297414, "grad_norm": 3.5093231201171875, "learning_rate": 2.6341074749113814e-05, "loss": 1.2634, "step": 22158 }, { "epoch": 1.3207772082488973, "grad_norm": 3.2849130630493164, "learning_rate": 2.633274393178645e-05, "loss": 1.0877, "step": 22160 }, { "epoch": 1.3208964119680533, "grad_norm": 3.210423707962036, "learning_rate": 2.63244139611178e-05, "loss": 1.047, "step": 22162 }, { "epoch": 1.3210156156872095, "grad_norm": 3.3340256214141846, "learning_rate": 2.6316084837405906e-05, "loss": 1.142, "step": 22164 }, { "epoch": 1.3211348194063655, "grad_norm": 3.2599198818206787, "learning_rate": 2.630775656094865e-05, "loss": 1.1888, "step": 22166 }, { "epoch": 1.3212540231255216, "grad_norm": 3.2357232570648193, "learning_rate": 2.6299429132044027e-05, "loss": 1.2093, "step": 22168 }, { "epoch": 1.3213732268446776, "grad_norm": 3.500124454498291, "learning_rate": 2.629110255098991e-05, "loss": 1.1111, "step": 22170 }, { "epoch": 1.3214924305638336, "grad_norm": 3.6036980152130127, "learning_rate": 2.6282776818084176e-05, "loss": 1.2259, "step": 22172 }, { "epoch": 1.3216116342829896, "grad_norm": 3.0422236919403076, "learning_rate": 2.627445193362466e-05, "loss": 1.1035, "step": 22174 }, { "epoch": 1.3217308380021457, "grad_norm": 2.808380365371704, "learning_rate": 2.6266127897909178e-05, "loss": 1.0654, "step": 22176 }, { "epoch": 1.3218500417213017, "grad_norm": 3.3091559410095215, "learning_rate": 2.6257804711235506e-05, "loss": 1.1978, "step": 22178 }, { "epoch": 1.3219692454404577, "grad_norm": 3.1365926265716553, "learning_rate": 2.6249482373901367e-05, "loss": 1.1695, "step": 22180 }, { "epoch": 1.3220884491596139, "grad_norm": 3.192352771759033, "learning_rate": 2.624116088620455e-05, "loss": 1.2381, "step": 22182 }, { "epoch": 1.3222076528787698, "grad_norm": 3.4320623874664307, "learning_rate": 2.6232840248442658e-05, "loss": 1.3314, "step": 22184 }, { "epoch": 1.3223268565979258, "grad_norm": 3.2890548706054688, "learning_rate": 2.622452046091341e-05, "loss": 1.1777, "step": 22186 }, { "epoch": 1.3224460603170818, "grad_norm": 3.1434872150421143, "learning_rate": 2.621620152391443e-05, "loss": 1.1887, "step": 22188 }, { "epoch": 1.322565264036238, "grad_norm": 3.213120698928833, "learning_rate": 2.6207883437743256e-05, "loss": 1.162, "step": 22190 }, { "epoch": 1.322684467755394, "grad_norm": 2.9278066158294678, "learning_rate": 2.6199566202697518e-05, "loss": 1.112, "step": 22192 }, { "epoch": 1.3228036714745501, "grad_norm": 3.1669862270355225, "learning_rate": 2.6191249819074726e-05, "loss": 1.2048, "step": 22194 }, { "epoch": 1.322922875193706, "grad_norm": 3.5706541538238525, "learning_rate": 2.6182934287172388e-05, "loss": 1.0999, "step": 22196 }, { "epoch": 1.323042078912862, "grad_norm": 2.9394803047180176, "learning_rate": 2.6174619607287987e-05, "loss": 1.1748, "step": 22198 }, { "epoch": 1.323161282632018, "grad_norm": 3.5412216186523438, "learning_rate": 2.6166305779718952e-05, "loss": 1.1707, "step": 22200 }, { "epoch": 1.3232804863511742, "grad_norm": 3.3293468952178955, "learning_rate": 2.615799280476271e-05, "loss": 1.189, "step": 22202 }, { "epoch": 1.3233996900703302, "grad_norm": 3.5263671875, "learning_rate": 2.6149680682716636e-05, "loss": 1.2366, "step": 22204 }, { "epoch": 1.3235188937894862, "grad_norm": 2.886692762374878, "learning_rate": 2.614136941387809e-05, "loss": 0.9948, "step": 22206 }, { "epoch": 1.3236380975086424, "grad_norm": 3.0958330631256104, "learning_rate": 2.6133058998544367e-05, "loss": 1.0378, "step": 22208 }, { "epoch": 1.3237573012277983, "grad_norm": 3.0143213272094727, "learning_rate": 2.6124749437012814e-05, "loss": 1.1635, "step": 22210 }, { "epoch": 1.3238765049469543, "grad_norm": 3.359513282775879, "learning_rate": 2.6116440729580654e-05, "loss": 1.2204, "step": 22212 }, { "epoch": 1.3239957086661103, "grad_norm": 3.3743512630462646, "learning_rate": 2.6108132876545126e-05, "loss": 1.1835, "step": 22214 }, { "epoch": 1.3241149123852665, "grad_norm": 2.943601369857788, "learning_rate": 2.6099825878203433e-05, "loss": 1.0914, "step": 22216 }, { "epoch": 1.3242341161044224, "grad_norm": 3.4422481060028076, "learning_rate": 2.6091519734852742e-05, "loss": 1.1644, "step": 22218 }, { "epoch": 1.3243533198235786, "grad_norm": 2.9344589710235596, "learning_rate": 2.6083214446790187e-05, "loss": 1.112, "step": 22220 }, { "epoch": 1.3244725235427346, "grad_norm": 2.8053879737854004, "learning_rate": 2.6074910014312886e-05, "loss": 1.0616, "step": 22222 }, { "epoch": 1.3245917272618906, "grad_norm": 3.007737874984741, "learning_rate": 2.606660643771791e-05, "loss": 1.2045, "step": 22224 }, { "epoch": 1.3247109309810465, "grad_norm": 3.371788263320923, "learning_rate": 2.605830371730229e-05, "loss": 1.3411, "step": 22226 }, { "epoch": 1.3248301347002027, "grad_norm": 3.3994994163513184, "learning_rate": 2.6050001853363105e-05, "loss": 1.2608, "step": 22228 }, { "epoch": 1.3249493384193587, "grad_norm": 3.57938814163208, "learning_rate": 2.604170084619726e-05, "loss": 1.2191, "step": 22230 }, { "epoch": 1.3250685421385147, "grad_norm": 4.09218168258667, "learning_rate": 2.6033400696101763e-05, "loss": 1.1768, "step": 22232 }, { "epoch": 1.3251877458576709, "grad_norm": 2.8305625915527344, "learning_rate": 2.602510140337354e-05, "loss": 1.1284, "step": 22234 }, { "epoch": 1.3253069495768268, "grad_norm": 3.24371600151062, "learning_rate": 2.6016802968309427e-05, "loss": 1.1755, "step": 22236 }, { "epoch": 1.3254261532959828, "grad_norm": 3.046295642852783, "learning_rate": 2.600850539120636e-05, "loss": 1.1061, "step": 22238 }, { "epoch": 1.3255453570151388, "grad_norm": 3.039344310760498, "learning_rate": 2.6000208672361137e-05, "loss": 1.1195, "step": 22240 }, { "epoch": 1.325664560734295, "grad_norm": 3.1707422733306885, "learning_rate": 2.599191281207056e-05, "loss": 1.0101, "step": 22242 }, { "epoch": 1.325783764453451, "grad_norm": 2.941856622695923, "learning_rate": 2.5983617810631387e-05, "loss": 1.2192, "step": 22244 }, { "epoch": 1.325902968172607, "grad_norm": 3.5133368968963623, "learning_rate": 2.5975323668340422e-05, "loss": 1.1169, "step": 22246 }, { "epoch": 1.326022171891763, "grad_norm": 3.013538360595703, "learning_rate": 2.5967030385494306e-05, "loss": 1.2074, "step": 22248 }, { "epoch": 1.326141375610919, "grad_norm": 3.210219144821167, "learning_rate": 2.5958737962389718e-05, "loss": 1.2518, "step": 22250 }, { "epoch": 1.326260579330075, "grad_norm": 3.181339740753174, "learning_rate": 2.595044639932337e-05, "loss": 1.1101, "step": 22252 }, { "epoch": 1.3263797830492312, "grad_norm": 2.908398389816284, "learning_rate": 2.5942155696591802e-05, "loss": 1.0691, "step": 22254 }, { "epoch": 1.3264989867683872, "grad_norm": 3.3011996746063232, "learning_rate": 2.5933865854491655e-05, "loss": 1.1549, "step": 22256 }, { "epoch": 1.3266181904875431, "grad_norm": 3.254786729812622, "learning_rate": 2.592557687331947e-05, "loss": 0.9593, "step": 22258 }, { "epoch": 1.3267373942066993, "grad_norm": 3.2338101863861084, "learning_rate": 2.5917288753371765e-05, "loss": 1.2364, "step": 22260 }, { "epoch": 1.3268565979258553, "grad_norm": 3.0792596340179443, "learning_rate": 2.590900149494504e-05, "loss": 1.175, "step": 22262 }, { "epoch": 1.3269758016450113, "grad_norm": 3.3872740268707275, "learning_rate": 2.5900715098335763e-05, "loss": 1.3188, "step": 22264 }, { "epoch": 1.3270950053641672, "grad_norm": 2.966510534286499, "learning_rate": 2.589242956384036e-05, "loss": 1.2841, "step": 22266 }, { "epoch": 1.3272142090833234, "grad_norm": 3.1237683296203613, "learning_rate": 2.5884144891755235e-05, "loss": 1.045, "step": 22268 }, { "epoch": 1.3273334128024794, "grad_norm": 2.9756529331207275, "learning_rate": 2.5875861082376757e-05, "loss": 1.1844, "step": 22270 }, { "epoch": 1.3274526165216356, "grad_norm": 3.0196101665496826, "learning_rate": 2.5867578136001246e-05, "loss": 1.1961, "step": 22272 }, { "epoch": 1.3275718202407916, "grad_norm": 3.024698257446289, "learning_rate": 2.5859296052925065e-05, "loss": 1.0835, "step": 22274 }, { "epoch": 1.3276910239599475, "grad_norm": 2.9468953609466553, "learning_rate": 2.5851014833444447e-05, "loss": 1.1625, "step": 22276 }, { "epoch": 1.3278102276791035, "grad_norm": 3.357590913772583, "learning_rate": 2.5842734477855655e-05, "loss": 1.1509, "step": 22278 }, { "epoch": 1.3279294313982597, "grad_norm": 3.0911805629730225, "learning_rate": 2.5834454986454908e-05, "loss": 1.1233, "step": 22280 }, { "epoch": 1.3280486351174157, "grad_norm": 3.967603921890259, "learning_rate": 2.582617635953839e-05, "loss": 1.2672, "step": 22282 }, { "epoch": 1.3281678388365716, "grad_norm": 3.3645780086517334, "learning_rate": 2.5817898597402245e-05, "loss": 1.2278, "step": 22284 }, { "epoch": 1.3282870425557278, "grad_norm": 3.381659984588623, "learning_rate": 2.580962170034261e-05, "loss": 1.0985, "step": 22286 }, { "epoch": 1.3284062462748838, "grad_norm": 2.709866762161255, "learning_rate": 2.5801345668655574e-05, "loss": 1.1387, "step": 22288 }, { "epoch": 1.3285254499940398, "grad_norm": 3.325334072113037, "learning_rate": 2.5793070502637184e-05, "loss": 1.0552, "step": 22290 }, { "epoch": 1.3286446537131957, "grad_norm": 3.2198095321655273, "learning_rate": 2.5784796202583518e-05, "loss": 1.3098, "step": 22292 }, { "epoch": 1.328763857432352, "grad_norm": 3.0027828216552734, "learning_rate": 2.577652276879052e-05, "loss": 1.0587, "step": 22294 }, { "epoch": 1.328883061151508, "grad_norm": 3.3101394176483154, "learning_rate": 2.5768250201554168e-05, "loss": 1.1009, "step": 22296 }, { "epoch": 1.329002264870664, "grad_norm": 2.9763123989105225, "learning_rate": 2.5759978501170456e-05, "loss": 1.1536, "step": 22298 }, { "epoch": 1.32912146858982, "grad_norm": 3.225144863128662, "learning_rate": 2.5751707667935205e-05, "loss": 1.0403, "step": 22300 }, { "epoch": 1.329240672308976, "grad_norm": 3.2688188552856445, "learning_rate": 2.5743437702144356e-05, "loss": 1.3058, "step": 22302 }, { "epoch": 1.329359876028132, "grad_norm": 3.087677001953125, "learning_rate": 2.5735168604093733e-05, "loss": 1.1119, "step": 22304 }, { "epoch": 1.3294790797472882, "grad_norm": 3.1767492294311523, "learning_rate": 2.5726900374079154e-05, "loss": 1.2513, "step": 22306 }, { "epoch": 1.3295982834664442, "grad_norm": 3.0085742473602295, "learning_rate": 2.5718633012396397e-05, "loss": 1.1227, "step": 22308 }, { "epoch": 1.3297174871856001, "grad_norm": 3.5631260871887207, "learning_rate": 2.571036651934121e-05, "loss": 1.1203, "step": 22310 }, { "epoch": 1.3298366909047563, "grad_norm": 3.2706868648529053, "learning_rate": 2.5702100895209324e-05, "loss": 1.3302, "step": 22312 }, { "epoch": 1.3299558946239123, "grad_norm": 3.30181622505188, "learning_rate": 2.56938361402964e-05, "loss": 1.132, "step": 22314 }, { "epoch": 1.3300750983430683, "grad_norm": 3.317420482635498, "learning_rate": 2.568557225489816e-05, "loss": 1.0415, "step": 22316 }, { "epoch": 1.3301943020622242, "grad_norm": 3.4343373775482178, "learning_rate": 2.5677309239310155e-05, "loss": 1.1242, "step": 22318 }, { "epoch": 1.3303135057813804, "grad_norm": 3.3897554874420166, "learning_rate": 2.5669047093828035e-05, "loss": 1.2342, "step": 22320 }, { "epoch": 1.3304327095005364, "grad_norm": 2.9362869262695312, "learning_rate": 2.5660785818747345e-05, "loss": 1.179, "step": 22322 }, { "epoch": 1.3305519132196926, "grad_norm": 3.3774542808532715, "learning_rate": 2.5652525414363626e-05, "loss": 1.2475, "step": 22324 }, { "epoch": 1.3306711169388485, "grad_norm": 3.1177470684051514, "learning_rate": 2.564426588097238e-05, "loss": 1.1252, "step": 22326 }, { "epoch": 1.3307903206580045, "grad_norm": 3.2177743911743164, "learning_rate": 2.5636007218869074e-05, "loss": 1.1029, "step": 22328 }, { "epoch": 1.3309095243771605, "grad_norm": 2.833953857421875, "learning_rate": 2.5627749428349145e-05, "loss": 1.221, "step": 22330 }, { "epoch": 1.3310287280963167, "grad_norm": 3.656118631362915, "learning_rate": 2.561949250970801e-05, "loss": 1.1945, "step": 22332 }, { "epoch": 1.3311479318154726, "grad_norm": 3.208632707595825, "learning_rate": 2.561123646324105e-05, "loss": 1.2027, "step": 22334 }, { "epoch": 1.3312671355346286, "grad_norm": 3.0743203163146973, "learning_rate": 2.5602981289243578e-05, "loss": 1.0988, "step": 22336 }, { "epoch": 1.3313863392537848, "grad_norm": 3.3713133335113525, "learning_rate": 2.5594726988010985e-05, "loss": 1.1567, "step": 22338 }, { "epoch": 1.3315055429729408, "grad_norm": 3.927006244659424, "learning_rate": 2.5586473559838487e-05, "loss": 1.2146, "step": 22340 }, { "epoch": 1.3316247466920967, "grad_norm": 3.0981247425079346, "learning_rate": 2.5578221005021343e-05, "loss": 1.1339, "step": 22342 }, { "epoch": 1.3317439504112527, "grad_norm": 3.069761037826538, "learning_rate": 2.556996932385481e-05, "loss": 1.156, "step": 22344 }, { "epoch": 1.331863154130409, "grad_norm": 3.126096248626709, "learning_rate": 2.556171851663406e-05, "loss": 1.1352, "step": 22346 }, { "epoch": 1.3319823578495649, "grad_norm": 2.644994020462036, "learning_rate": 2.555346858365425e-05, "loss": 1.0315, "step": 22348 }, { "epoch": 1.332101561568721, "grad_norm": 3.070579767227173, "learning_rate": 2.5545219525210517e-05, "loss": 1.1804, "step": 22350 }, { "epoch": 1.332220765287877, "grad_norm": 3.164767026901245, "learning_rate": 2.5536971341597952e-05, "loss": 1.1101, "step": 22352 }, { "epoch": 1.332339969007033, "grad_norm": 3.2436017990112305, "learning_rate": 2.5528724033111623e-05, "loss": 1.1502, "step": 22354 }, { "epoch": 1.332459172726189, "grad_norm": 3.4956088066101074, "learning_rate": 2.5520477600046554e-05, "loss": 1.24, "step": 22356 }, { "epoch": 1.3325783764453452, "grad_norm": 3.537071704864502, "learning_rate": 2.551223204269777e-05, "loss": 1.2853, "step": 22358 }, { "epoch": 1.3326975801645011, "grad_norm": 3.2474184036254883, "learning_rate": 2.5503987361360203e-05, "loss": 1.2086, "step": 22360 }, { "epoch": 1.332816783883657, "grad_norm": 3.223691463470459, "learning_rate": 2.549574355632886e-05, "loss": 1.0756, "step": 22362 }, { "epoch": 1.3329359876028133, "grad_norm": 3.1262905597686768, "learning_rate": 2.548750062789857e-05, "loss": 1.2447, "step": 22364 }, { "epoch": 1.3330551913219693, "grad_norm": 3.3098390102386475, "learning_rate": 2.5479258576364274e-05, "loss": 1.1838, "step": 22366 }, { "epoch": 1.3331743950411252, "grad_norm": 3.006901264190674, "learning_rate": 2.5471017402020814e-05, "loss": 1.0367, "step": 22368 }, { "epoch": 1.3332935987602812, "grad_norm": 3.4616706371307373, "learning_rate": 2.5462777105162945e-05, "loss": 1.1578, "step": 22370 }, { "epoch": 1.3334128024794374, "grad_norm": 3.8066258430480957, "learning_rate": 2.5454537686085513e-05, "loss": 1.1612, "step": 22372 }, { "epoch": 1.3335320061985934, "grad_norm": 3.35823917388916, "learning_rate": 2.5446299145083254e-05, "loss": 1.2458, "step": 22374 }, { "epoch": 1.3336512099177495, "grad_norm": 3.0211291313171387, "learning_rate": 2.5438061482450874e-05, "loss": 1.0479, "step": 22376 }, { "epoch": 1.3337704136369055, "grad_norm": 3.1234724521636963, "learning_rate": 2.542982469848306e-05, "loss": 1.1522, "step": 22378 }, { "epoch": 1.3338896173560615, "grad_norm": 3.0707058906555176, "learning_rate": 2.542158879347452e-05, "loss": 1.1084, "step": 22380 }, { "epoch": 1.3340088210752175, "grad_norm": 3.1003620624542236, "learning_rate": 2.5413353767719805e-05, "loss": 1.2206, "step": 22382 }, { "epoch": 1.3341280247943736, "grad_norm": 3.1511056423187256, "learning_rate": 2.5405119621513566e-05, "loss": 1.1208, "step": 22384 }, { "epoch": 1.3342472285135296, "grad_norm": 3.0446019172668457, "learning_rate": 2.5396886355150373e-05, "loss": 1.1245, "step": 22386 }, { "epoch": 1.3343664322326856, "grad_norm": 3.003894567489624, "learning_rate": 2.5388653968924693e-05, "loss": 1.2413, "step": 22388 }, { "epoch": 1.3344856359518418, "grad_norm": 3.2937204837799072, "learning_rate": 2.5380422463131082e-05, "loss": 1.066, "step": 22390 }, { "epoch": 1.3346048396709977, "grad_norm": 2.9279370307922363, "learning_rate": 2.5372191838063996e-05, "loss": 1.2439, "step": 22392 }, { "epoch": 1.3347240433901537, "grad_norm": 3.0096819400787354, "learning_rate": 2.5363962094017868e-05, "loss": 1.1128, "step": 22394 }, { "epoch": 1.3348432471093097, "grad_norm": 3.250798225402832, "learning_rate": 2.5355733231287115e-05, "loss": 1.0753, "step": 22396 }, { "epoch": 1.3349624508284659, "grad_norm": 2.9638185501098633, "learning_rate": 2.5347505250166103e-05, "loss": 1.181, "step": 22398 }, { "epoch": 1.3350816545476218, "grad_norm": 3.4645004272460938, "learning_rate": 2.533927815094917e-05, "loss": 1.1791, "step": 22400 }, { "epoch": 1.335200858266778, "grad_norm": 3.308978319168091, "learning_rate": 2.5331051933930638e-05, "loss": 1.245, "step": 22402 }, { "epoch": 1.335320061985934, "grad_norm": 3.1140055656433105, "learning_rate": 2.532282659940478e-05, "loss": 1.316, "step": 22404 }, { "epoch": 1.33543926570509, "grad_norm": 3.5172741413116455, "learning_rate": 2.5314602147665824e-05, "loss": 1.1483, "step": 22406 }, { "epoch": 1.335558469424246, "grad_norm": 3.2916111946105957, "learning_rate": 2.530637857900803e-05, "loss": 1.2182, "step": 22408 }, { "epoch": 1.3356776731434021, "grad_norm": 3.24253511428833, "learning_rate": 2.5298155893725572e-05, "loss": 1.1947, "step": 22410 }, { "epoch": 1.335796876862558, "grad_norm": 2.730536460876465, "learning_rate": 2.528993409211259e-05, "loss": 1.1773, "step": 22412 }, { "epoch": 1.335916080581714, "grad_norm": 3.5943756103515625, "learning_rate": 2.52817131744632e-05, "loss": 1.1653, "step": 22414 }, { "epoch": 1.3360352843008703, "grad_norm": 3.4491031169891357, "learning_rate": 2.5273493141071513e-05, "loss": 1.191, "step": 22416 }, { "epoch": 1.3361544880200262, "grad_norm": 3.616508722305298, "learning_rate": 2.5265273992231575e-05, "loss": 1.0005, "step": 22418 }, { "epoch": 1.3362736917391822, "grad_norm": 3.1476759910583496, "learning_rate": 2.5257055728237413e-05, "loss": 1.1689, "step": 22420 }, { "epoch": 1.3363928954583382, "grad_norm": 2.9593677520751953, "learning_rate": 2.5248838349383024e-05, "loss": 1.1039, "step": 22422 }, { "epoch": 1.3365120991774944, "grad_norm": 3.2059504985809326, "learning_rate": 2.5240621855962353e-05, "loss": 1.0901, "step": 22424 }, { "epoch": 1.3366313028966503, "grad_norm": 3.170153856277466, "learning_rate": 2.523240624826939e-05, "loss": 1.1198, "step": 22426 }, { "epoch": 1.3367505066158065, "grad_norm": 3.2153308391571045, "learning_rate": 2.5224191526597985e-05, "loss": 1.0979, "step": 22428 }, { "epoch": 1.3368697103349625, "grad_norm": 3.290043830871582, "learning_rate": 2.5215977691241993e-05, "loss": 1.1159, "step": 22430 }, { "epoch": 1.3369889140541185, "grad_norm": 3.073277473449707, "learning_rate": 2.5207764742495317e-05, "loss": 1.0913, "step": 22432 }, { "epoch": 1.3371081177732744, "grad_norm": 3.2129065990448, "learning_rate": 2.5199552680651682e-05, "loss": 1.3057, "step": 22434 }, { "epoch": 1.3372273214924306, "grad_norm": 3.374943971633911, "learning_rate": 2.519134150600492e-05, "loss": 1.181, "step": 22436 }, { "epoch": 1.3373465252115866, "grad_norm": 3.498718500137329, "learning_rate": 2.5183131218848754e-05, "loss": 1.2463, "step": 22438 }, { "epoch": 1.3374657289307426, "grad_norm": 3.4912891387939453, "learning_rate": 2.5174921819476895e-05, "loss": 1.1124, "step": 22440 }, { "epoch": 1.3375849326498988, "grad_norm": 3.2062084674835205, "learning_rate": 2.5166713308183004e-05, "loss": 1.0992, "step": 22442 }, { "epoch": 1.3377041363690547, "grad_norm": 3.3794429302215576, "learning_rate": 2.515850568526078e-05, "loss": 1.1279, "step": 22444 }, { "epoch": 1.3378233400882107, "grad_norm": 3.348367214202881, "learning_rate": 2.515029895100378e-05, "loss": 1.1151, "step": 22446 }, { "epoch": 1.3379425438073667, "grad_norm": 3.1023061275482178, "learning_rate": 2.51420931057056e-05, "loss": 1.045, "step": 22448 }, { "epoch": 1.3380617475265228, "grad_norm": 3.6629526615142822, "learning_rate": 2.513388814965983e-05, "loss": 1.1668, "step": 22450 }, { "epoch": 1.3381809512456788, "grad_norm": 3.217946767807007, "learning_rate": 2.5125684083159918e-05, "loss": 1.2056, "step": 22452 }, { "epoch": 1.338300154964835, "grad_norm": 2.700348138809204, "learning_rate": 2.511748090649942e-05, "loss": 1.1275, "step": 22454 }, { "epoch": 1.338419358683991, "grad_norm": 3.4183921813964844, "learning_rate": 2.510927861997176e-05, "loss": 1.1315, "step": 22456 }, { "epoch": 1.338538562403147, "grad_norm": 3.0002100467681885, "learning_rate": 2.5101077223870367e-05, "loss": 0.9419, "step": 22458 }, { "epoch": 1.338657766122303, "grad_norm": 2.9889626502990723, "learning_rate": 2.5092876718488633e-05, "loss": 1.2199, "step": 22460 }, { "epoch": 1.338776969841459, "grad_norm": 3.101210832595825, "learning_rate": 2.508467710411992e-05, "loss": 1.2726, "step": 22462 }, { "epoch": 1.338896173560615, "grad_norm": 3.437269926071167, "learning_rate": 2.507647838105755e-05, "loss": 1.2784, "step": 22464 }, { "epoch": 1.339015377279771, "grad_norm": 3.107327461242676, "learning_rate": 2.5068280549594824e-05, "loss": 1.2776, "step": 22466 }, { "epoch": 1.3391345809989272, "grad_norm": 2.965144395828247, "learning_rate": 2.5060083610025008e-05, "loss": 1.044, "step": 22468 }, { "epoch": 1.3392537847180832, "grad_norm": 3.363640308380127, "learning_rate": 2.5051887562641307e-05, "loss": 1.2, "step": 22470 }, { "epoch": 1.3393729884372392, "grad_norm": 2.8066017627716064, "learning_rate": 2.5043692407736975e-05, "loss": 1.085, "step": 22472 }, { "epoch": 1.3394921921563951, "grad_norm": 3.1835906505584717, "learning_rate": 2.5035498145605162e-05, "loss": 1.0859, "step": 22474 }, { "epoch": 1.3396113958755513, "grad_norm": 3.3651528358459473, "learning_rate": 2.5027304776538963e-05, "loss": 1.1331, "step": 22476 }, { "epoch": 1.3397305995947073, "grad_norm": 3.2379419803619385, "learning_rate": 2.501911230083154e-05, "loss": 1.2601, "step": 22478 }, { "epoch": 1.3398498033138635, "grad_norm": 2.7723093032836914, "learning_rate": 2.5010920718775933e-05, "loss": 1.1506, "step": 22480 }, { "epoch": 1.3399690070330195, "grad_norm": 2.919090747833252, "learning_rate": 2.500273003066519e-05, "loss": 1.1069, "step": 22482 }, { "epoch": 1.3400882107521754, "grad_norm": 3.3796472549438477, "learning_rate": 2.4994540236792325e-05, "loss": 1.1175, "step": 22484 }, { "epoch": 1.3402074144713314, "grad_norm": 3.392883777618408, "learning_rate": 2.4986351337450314e-05, "loss": 1.0854, "step": 22486 }, { "epoch": 1.3403266181904876, "grad_norm": 3.210212230682373, "learning_rate": 2.4978163332932093e-05, "loss": 1.269, "step": 22488 }, { "epoch": 1.3404458219096436, "grad_norm": 3.6443557739257812, "learning_rate": 2.4969976223530583e-05, "loss": 1.1695, "step": 22490 }, { "epoch": 1.3405650256287998, "grad_norm": 3.2011165618896484, "learning_rate": 2.4961790009538666e-05, "loss": 1.2076, "step": 22492 }, { "epoch": 1.3406842293479557, "grad_norm": 3.3033061027526855, "learning_rate": 2.495360469124916e-05, "loss": 1.1495, "step": 22494 }, { "epoch": 1.3408034330671117, "grad_norm": 3.158714532852173, "learning_rate": 2.4945420268954955e-05, "loss": 1.2146, "step": 22496 }, { "epoch": 1.3409226367862677, "grad_norm": 3.178497076034546, "learning_rate": 2.493723674294875e-05, "loss": 1.1523, "step": 22498 }, { "epoch": 1.3410418405054236, "grad_norm": 3.4431850910186768, "learning_rate": 2.492905411352336e-05, "loss": 1.1616, "step": 22500 }, { "epoch": 1.3411610442245798, "grad_norm": 2.9163825511932373, "learning_rate": 2.4920872380971476e-05, "loss": 0.9748, "step": 22502 }, { "epoch": 1.3412802479437358, "grad_norm": 3.051762580871582, "learning_rate": 2.4912691545585805e-05, "loss": 1.0717, "step": 22504 }, { "epoch": 1.341399451662892, "grad_norm": 3.130382537841797, "learning_rate": 2.4904511607658985e-05, "loss": 1.2267, "step": 22506 }, { "epoch": 1.341518655382048, "grad_norm": 3.155917167663574, "learning_rate": 2.489633256748365e-05, "loss": 1.1639, "step": 22508 }, { "epoch": 1.341637859101204, "grad_norm": 3.5599944591522217, "learning_rate": 2.4888154425352388e-05, "loss": 1.2397, "step": 22510 }, { "epoch": 1.34175706282036, "grad_norm": 3.5478341579437256, "learning_rate": 2.4879977181557744e-05, "loss": 1.1806, "step": 22512 }, { "epoch": 1.341876266539516, "grad_norm": 3.449467420578003, "learning_rate": 2.4871800836392307e-05, "loss": 1.1542, "step": 22514 }, { "epoch": 1.341995470258672, "grad_norm": 3.1630187034606934, "learning_rate": 2.4863625390148487e-05, "loss": 1.2655, "step": 22516 }, { "epoch": 1.3421146739778282, "grad_norm": 3.163341760635376, "learning_rate": 2.4855450843118804e-05, "loss": 1.3009, "step": 22518 }, { "epoch": 1.3422338776969842, "grad_norm": 3.0002596378326416, "learning_rate": 2.4847277195595702e-05, "loss": 1.0912, "step": 22520 }, { "epoch": 1.3423530814161402, "grad_norm": 3.4231719970703125, "learning_rate": 2.4839104447871515e-05, "loss": 1.0879, "step": 22522 }, { "epoch": 1.3424722851352962, "grad_norm": 2.9102466106414795, "learning_rate": 2.4830932600238667e-05, "loss": 1.1957, "step": 22524 }, { "epoch": 1.3425914888544523, "grad_norm": 3.185988426208496, "learning_rate": 2.4822761652989468e-05, "loss": 1.1822, "step": 22526 }, { "epoch": 1.3427106925736083, "grad_norm": 3.2730560302734375, "learning_rate": 2.4814591606416237e-05, "loss": 1.1182, "step": 22528 }, { "epoch": 1.3428298962927643, "grad_norm": 2.7551002502441406, "learning_rate": 2.480642246081123e-05, "loss": 1.1055, "step": 22530 }, { "epoch": 1.3429491000119205, "grad_norm": 3.2592079639434814, "learning_rate": 2.4798254216466693e-05, "loss": 1.0545, "step": 22532 }, { "epoch": 1.3430683037310764, "grad_norm": 2.8837735652923584, "learning_rate": 2.4790086873674827e-05, "loss": 1.2124, "step": 22534 }, { "epoch": 1.3431875074502324, "grad_norm": 3.2351417541503906, "learning_rate": 2.4781920432727813e-05, "loss": 1.303, "step": 22536 }, { "epoch": 1.3433067111693884, "grad_norm": 2.9943490028381348, "learning_rate": 2.4773754893917784e-05, "loss": 1.2212, "step": 22538 }, { "epoch": 1.3434259148885446, "grad_norm": 3.081575870513916, "learning_rate": 2.4765590257536835e-05, "loss": 1.1058, "step": 22540 }, { "epoch": 1.3435451186077005, "grad_norm": 2.8863449096679688, "learning_rate": 2.475742652387708e-05, "loss": 1.0428, "step": 22542 }, { "epoch": 1.3436643223268567, "grad_norm": 3.176079034805298, "learning_rate": 2.4749263693230547e-05, "loss": 1.2758, "step": 22544 }, { "epoch": 1.3437835260460127, "grad_norm": 3.4360451698303223, "learning_rate": 2.474110176588924e-05, "loss": 1.2404, "step": 22546 }, { "epoch": 1.3439027297651687, "grad_norm": 3.464714288711548, "learning_rate": 2.473294074214515e-05, "loss": 1.3051, "step": 22548 }, { "epoch": 1.3440219334843246, "grad_norm": 3.169738292694092, "learning_rate": 2.4724780622290217e-05, "loss": 1.0238, "step": 22550 }, { "epoch": 1.3441411372034808, "grad_norm": 2.792898416519165, "learning_rate": 2.4716621406616364e-05, "loss": 1.154, "step": 22552 }, { "epoch": 1.3442603409226368, "grad_norm": 2.833340883255005, "learning_rate": 2.4708463095415467e-05, "loss": 1.2318, "step": 22554 }, { "epoch": 1.3443795446417928, "grad_norm": 3.17950177192688, "learning_rate": 2.470030568897938e-05, "loss": 1.2285, "step": 22556 }, { "epoch": 1.344498748360949, "grad_norm": 3.3831074237823486, "learning_rate": 2.46921491875999e-05, "loss": 1.1548, "step": 22558 }, { "epoch": 1.344617952080105, "grad_norm": 3.3047640323638916, "learning_rate": 2.468399359156887e-05, "loss": 1.2375, "step": 22560 }, { "epoch": 1.344737155799261, "grad_norm": 3.2887039184570312, "learning_rate": 2.4675838901177972e-05, "loss": 1.17, "step": 22562 }, { "epoch": 1.3448563595184169, "grad_norm": 3.435732126235962, "learning_rate": 2.4667685116718982e-05, "loss": 1.1066, "step": 22564 }, { "epoch": 1.344975563237573, "grad_norm": 3.30596661567688, "learning_rate": 2.4659532238483584e-05, "loss": 1.3146, "step": 22566 }, { "epoch": 1.345094766956729, "grad_norm": 3.186480760574341, "learning_rate": 2.4651380266763385e-05, "loss": 1.1999, "step": 22568 }, { "epoch": 1.3452139706758852, "grad_norm": 3.291217565536499, "learning_rate": 2.464322920185006e-05, "loss": 1.2077, "step": 22570 }, { "epoch": 1.3453331743950412, "grad_norm": 2.9462146759033203, "learning_rate": 2.4635079044035186e-05, "loss": 1.1508, "step": 22572 }, { "epoch": 1.3454523781141972, "grad_norm": 3.103003978729248, "learning_rate": 2.4626929793610314e-05, "loss": 1.0451, "step": 22574 }, { "epoch": 1.3455715818333531, "grad_norm": 3.3663170337677, "learning_rate": 2.461878145086696e-05, "loss": 1.1496, "step": 22576 }, { "epoch": 1.3456907855525093, "grad_norm": 3.121460437774658, "learning_rate": 2.461063401609668e-05, "loss": 1.1238, "step": 22578 }, { "epoch": 1.3458099892716653, "grad_norm": 2.884507656097412, "learning_rate": 2.4602487489590866e-05, "loss": 1.0799, "step": 22580 }, { "epoch": 1.3459291929908213, "grad_norm": 2.9628190994262695, "learning_rate": 2.4594341871640947e-05, "loss": 1.1248, "step": 22582 }, { "epoch": 1.3460483967099774, "grad_norm": 3.2574751377105713, "learning_rate": 2.458619716253839e-05, "loss": 1.1026, "step": 22584 }, { "epoch": 1.3461676004291334, "grad_norm": 3.490170955657959, "learning_rate": 2.4578053362574465e-05, "loss": 1.2441, "step": 22586 }, { "epoch": 1.3462868041482894, "grad_norm": 2.984247922897339, "learning_rate": 2.456991047204058e-05, "loss": 1.0426, "step": 22588 }, { "epoch": 1.3464060078674454, "grad_norm": 3.2358083724975586, "learning_rate": 2.4561768491227998e-05, "loss": 1.1487, "step": 22590 }, { "epoch": 1.3465252115866015, "grad_norm": 3.120473861694336, "learning_rate": 2.4553627420428e-05, "loss": 1.1225, "step": 22592 }, { "epoch": 1.3466444153057575, "grad_norm": 3.5456278324127197, "learning_rate": 2.454548725993181e-05, "loss": 1.1514, "step": 22594 }, { "epoch": 1.3467636190249137, "grad_norm": 3.0871527194976807, "learning_rate": 2.453734801003063e-05, "loss": 1.1099, "step": 22596 }, { "epoch": 1.3468828227440697, "grad_norm": 3.338651657104492, "learning_rate": 2.4529209671015635e-05, "loss": 1.108, "step": 22598 }, { "epoch": 1.3470020264632256, "grad_norm": 3.347717046737671, "learning_rate": 2.4521072243177956e-05, "loss": 1.2071, "step": 22600 }, { "epoch": 1.3471212301823816, "grad_norm": 3.2864091396331787, "learning_rate": 2.45129357268087e-05, "loss": 1.3993, "step": 22602 }, { "epoch": 1.3472404339015378, "grad_norm": 3.602938413619995, "learning_rate": 2.4504800122198912e-05, "loss": 1.2156, "step": 22604 }, { "epoch": 1.3473596376206938, "grad_norm": 3.458669424057007, "learning_rate": 2.4496665429639674e-05, "loss": 1.227, "step": 22606 }, { "epoch": 1.3474788413398497, "grad_norm": 3.312869071960449, "learning_rate": 2.448853164942198e-05, "loss": 1.1019, "step": 22608 }, { "epoch": 1.347598045059006, "grad_norm": 3.1076812744140625, "learning_rate": 2.448039878183679e-05, "loss": 1.1643, "step": 22610 }, { "epoch": 1.347717248778162, "grad_norm": 2.6593446731567383, "learning_rate": 2.447226682717505e-05, "loss": 1.0121, "step": 22612 }, { "epoch": 1.3478364524973179, "grad_norm": 3.366403102874756, "learning_rate": 2.4464135785727665e-05, "loss": 1.1982, "step": 22614 }, { "epoch": 1.3479556562164738, "grad_norm": 3.4685659408569336, "learning_rate": 2.445600565778552e-05, "loss": 1.3231, "step": 22616 }, { "epoch": 1.34807485993563, "grad_norm": 3.190779209136963, "learning_rate": 2.4447876443639443e-05, "loss": 1.3057, "step": 22618 }, { "epoch": 1.348194063654786, "grad_norm": 3.185072660446167, "learning_rate": 2.4439748143580254e-05, "loss": 1.2074, "step": 22620 }, { "epoch": 1.3483132673739422, "grad_norm": 3.3818228244781494, "learning_rate": 2.443162075789871e-05, "loss": 1.1545, "step": 22622 }, { "epoch": 1.3484324710930982, "grad_norm": 3.7902512550354004, "learning_rate": 2.4423494286885612e-05, "loss": 1.1982, "step": 22624 }, { "epoch": 1.3485516748122541, "grad_norm": 3.1566691398620605, "learning_rate": 2.441536873083161e-05, "loss": 1.0705, "step": 22626 }, { "epoch": 1.34867087853141, "grad_norm": 3.151864528656006, "learning_rate": 2.4407244090027387e-05, "loss": 1.134, "step": 22628 }, { "epoch": 1.3487900822505663, "grad_norm": 2.9819998741149902, "learning_rate": 2.4399120364763646e-05, "loss": 1.2932, "step": 22630 }, { "epoch": 1.3489092859697223, "grad_norm": 3.400951862335205, "learning_rate": 2.4390997555330924e-05, "loss": 1.2084, "step": 22632 }, { "epoch": 1.3490284896888782, "grad_norm": 2.84332013130188, "learning_rate": 2.4382875662019862e-05, "loss": 1.0905, "step": 22634 }, { "epoch": 1.3491476934080344, "grad_norm": 2.8403067588806152, "learning_rate": 2.4374754685120983e-05, "loss": 1.0501, "step": 22636 }, { "epoch": 1.3492668971271904, "grad_norm": 3.398928165435791, "learning_rate": 2.43666346249248e-05, "loss": 1.3024, "step": 22638 }, { "epoch": 1.3493861008463464, "grad_norm": 3.2342119216918945, "learning_rate": 2.4358515481721805e-05, "loss": 1.195, "step": 22640 }, { "epoch": 1.3495053045655023, "grad_norm": 3.18414044380188, "learning_rate": 2.4350397255802444e-05, "loss": 1.1983, "step": 22642 }, { "epoch": 1.3496245082846585, "grad_norm": 3.450336217880249, "learning_rate": 2.434227994745713e-05, "loss": 1.1389, "step": 22644 }, { "epoch": 1.3497437120038145, "grad_norm": 3.0350754261016846, "learning_rate": 2.4334163556976232e-05, "loss": 1.2123, "step": 22646 }, { "epoch": 1.3498629157229707, "grad_norm": 2.9553956985473633, "learning_rate": 2.4326048084650154e-05, "loss": 1.1443, "step": 22648 }, { "epoch": 1.3499821194421266, "grad_norm": 3.0800843238830566, "learning_rate": 2.4317933530769137e-05, "loss": 1.0704, "step": 22650 }, { "epoch": 1.3501013231612826, "grad_norm": 3.477191925048828, "learning_rate": 2.4309819895623535e-05, "loss": 1.1677, "step": 22652 }, { "epoch": 1.3502205268804386, "grad_norm": 2.9021623134613037, "learning_rate": 2.4301707179503592e-05, "loss": 1.1226, "step": 22654 }, { "epoch": 1.3503397305995948, "grad_norm": 2.736802816390991, "learning_rate": 2.4293595382699464e-05, "loss": 1.1759, "step": 22656 }, { "epoch": 1.3504589343187507, "grad_norm": 3.0938334465026855, "learning_rate": 2.4285484505501405e-05, "loss": 1.1748, "step": 22658 }, { "epoch": 1.3505781380379067, "grad_norm": 3.1578800678253174, "learning_rate": 2.4277374548199545e-05, "loss": 1.0584, "step": 22660 }, { "epoch": 1.350697341757063, "grad_norm": 3.5201187133789062, "learning_rate": 2.426926551108401e-05, "loss": 1.1111, "step": 22662 }, { "epoch": 1.3508165454762189, "grad_norm": 3.30652117729187, "learning_rate": 2.4261157394444883e-05, "loss": 1.1322, "step": 22664 }, { "epoch": 1.3509357491953748, "grad_norm": 3.3261661529541016, "learning_rate": 2.425305019857222e-05, "loss": 1.2938, "step": 22666 }, { "epoch": 1.3510549529145308, "grad_norm": 3.14056658744812, "learning_rate": 2.4244943923756026e-05, "loss": 1.1024, "step": 22668 }, { "epoch": 1.351174156633687, "grad_norm": 3.3462822437286377, "learning_rate": 2.4236838570286323e-05, "loss": 1.2435, "step": 22670 }, { "epoch": 1.351293360352843, "grad_norm": 3.3172848224639893, "learning_rate": 2.4228734138453075e-05, "loss": 1.1509, "step": 22672 }, { "epoch": 1.3514125640719992, "grad_norm": 3.3412654399871826, "learning_rate": 2.4220630628546144e-05, "loss": 1.2791, "step": 22674 }, { "epoch": 1.3515317677911551, "grad_norm": 3.4093527793884277, "learning_rate": 2.4212528040855474e-05, "loss": 1.0913, "step": 22676 }, { "epoch": 1.351650971510311, "grad_norm": 3.4201624393463135, "learning_rate": 2.420442637567091e-05, "loss": 1.1285, "step": 22678 }, { "epoch": 1.351770175229467, "grad_norm": 3.2332916259765625, "learning_rate": 2.4196325633282275e-05, "loss": 1.1752, "step": 22680 }, { "epoch": 1.3518893789486233, "grad_norm": 3.2123520374298096, "learning_rate": 2.4188225813979355e-05, "loss": 1.165, "step": 22682 }, { "epoch": 1.3520085826677792, "grad_norm": 3.3841712474823, "learning_rate": 2.418012691805191e-05, "loss": 1.1482, "step": 22684 }, { "epoch": 1.3521277863869352, "grad_norm": 3.3462162017822266, "learning_rate": 2.4172028945789672e-05, "loss": 1.1038, "step": 22686 }, { "epoch": 1.3522469901060914, "grad_norm": 3.0970447063446045, "learning_rate": 2.4163931897482323e-05, "loss": 1.116, "step": 22688 }, { "epoch": 1.3523661938252474, "grad_norm": 3.211019992828369, "learning_rate": 2.415583577341953e-05, "loss": 1.1685, "step": 22690 }, { "epoch": 1.3524853975444033, "grad_norm": 3.2497925758361816, "learning_rate": 2.41477405738909e-05, "loss": 1.2906, "step": 22692 }, { "epoch": 1.3526046012635593, "grad_norm": 3.0341062545776367, "learning_rate": 2.4139646299186075e-05, "loss": 1.2328, "step": 22694 }, { "epoch": 1.3527238049827155, "grad_norm": 3.4230754375457764, "learning_rate": 2.4131552949594542e-05, "loss": 1.0499, "step": 22696 }, { "epoch": 1.3528430087018715, "grad_norm": 3.5393216609954834, "learning_rate": 2.4123460525405895e-05, "loss": 1.2654, "step": 22698 }, { "epoch": 1.3529622124210277, "grad_norm": 2.7414610385894775, "learning_rate": 2.4115369026909618e-05, "loss": 1.1754, "step": 22700 }, { "epoch": 1.3530814161401836, "grad_norm": 3.27488374710083, "learning_rate": 2.410727845439511e-05, "loss": 1.2385, "step": 22702 }, { "epoch": 1.3532006198593396, "grad_norm": 3.205523729324341, "learning_rate": 2.4099188808151867e-05, "loss": 1.0726, "step": 22704 }, { "epoch": 1.3533198235784956, "grad_norm": 3.4442708492279053, "learning_rate": 2.4091100088469258e-05, "loss": 1.3102, "step": 22706 }, { "epoch": 1.3534390272976518, "grad_norm": 2.8860931396484375, "learning_rate": 2.4083012295636643e-05, "loss": 1.0855, "step": 22708 }, { "epoch": 1.3535582310168077, "grad_norm": 2.9539079666137695, "learning_rate": 2.4074925429943333e-05, "loss": 1.0413, "step": 22710 }, { "epoch": 1.3536774347359637, "grad_norm": 3.083590030670166, "learning_rate": 2.4066839491678683e-05, "loss": 1.0997, "step": 22712 }, { "epoch": 1.3537966384551199, "grad_norm": 3.4556543827056885, "learning_rate": 2.4058754481131897e-05, "loss": 1.2658, "step": 22714 }, { "epoch": 1.3539158421742759, "grad_norm": 3.0647060871124268, "learning_rate": 2.4050670398592195e-05, "loss": 1.1166, "step": 22716 }, { "epoch": 1.3540350458934318, "grad_norm": 2.989529609680176, "learning_rate": 2.404258724434884e-05, "loss": 1.2077, "step": 22718 }, { "epoch": 1.3541542496125878, "grad_norm": 3.0040605068206787, "learning_rate": 2.4034505018690912e-05, "loss": 1.1589, "step": 22720 }, { "epoch": 1.354273453331744, "grad_norm": 3.0446090698242188, "learning_rate": 2.4026423721907598e-05, "loss": 1.2133, "step": 22722 }, { "epoch": 1.3543926570509, "grad_norm": 2.953653335571289, "learning_rate": 2.4018343354287974e-05, "loss": 1.0614, "step": 22724 }, { "epoch": 1.3545118607700561, "grad_norm": 3.3238067626953125, "learning_rate": 2.4010263916121113e-05, "loss": 1.2136, "step": 22726 }, { "epoch": 1.3546310644892121, "grad_norm": 3.3172762393951416, "learning_rate": 2.400218540769602e-05, "loss": 1.124, "step": 22728 }, { "epoch": 1.354750268208368, "grad_norm": 2.9463629722595215, "learning_rate": 2.3994107829301716e-05, "loss": 1.1043, "step": 22730 }, { "epoch": 1.354869471927524, "grad_norm": 3.2217164039611816, "learning_rate": 2.3986031181227146e-05, "loss": 1.0795, "step": 22732 }, { "epoch": 1.3549886756466802, "grad_norm": 3.124488115310669, "learning_rate": 2.3977955463761252e-05, "loss": 1.1134, "step": 22734 }, { "epoch": 1.3551078793658362, "grad_norm": 3.2512850761413574, "learning_rate": 2.3969880677192925e-05, "loss": 1.0556, "step": 22736 }, { "epoch": 1.3552270830849922, "grad_norm": 3.4359123706817627, "learning_rate": 2.3961806821811004e-05, "loss": 1.1195, "step": 22738 }, { "epoch": 1.3553462868041484, "grad_norm": 3.1793367862701416, "learning_rate": 2.395373389790436e-05, "loss": 1.0701, "step": 22740 }, { "epoch": 1.3554654905233043, "grad_norm": 3.4936108589172363, "learning_rate": 2.394566190576178e-05, "loss": 1.2179, "step": 22742 }, { "epoch": 1.3555846942424603, "grad_norm": 3.121717691421509, "learning_rate": 2.3937590845672008e-05, "loss": 1.1542, "step": 22744 }, { "epoch": 1.3557038979616163, "grad_norm": 3.0711655616760254, "learning_rate": 2.3929520717923788e-05, "loss": 1.2112, "step": 22746 }, { "epoch": 1.3558231016807725, "grad_norm": 3.7098374366760254, "learning_rate": 2.392145152280581e-05, "loss": 1.0839, "step": 22748 }, { "epoch": 1.3559423053999284, "grad_norm": 3.7067971229553223, "learning_rate": 2.3913383260606736e-05, "loss": 1.2402, "step": 22750 }, { "epoch": 1.3560615091190846, "grad_norm": 2.9904260635375977, "learning_rate": 2.3905315931615195e-05, "loss": 1.175, "step": 22752 }, { "epoch": 1.3561807128382406, "grad_norm": 3.1327919960021973, "learning_rate": 2.3897249536119793e-05, "loss": 1.0671, "step": 22754 }, { "epoch": 1.3562999165573966, "grad_norm": 3.1026360988616943, "learning_rate": 2.388918407440906e-05, "loss": 1.247, "step": 22756 }, { "epoch": 1.3564191202765525, "grad_norm": 2.815274715423584, "learning_rate": 2.3881119546771585e-05, "loss": 1.1573, "step": 22758 }, { "epoch": 1.3565383239957087, "grad_norm": 3.4062976837158203, "learning_rate": 2.3873055953495814e-05, "loss": 1.1127, "step": 22760 }, { "epoch": 1.3566575277148647, "grad_norm": 3.0965607166290283, "learning_rate": 2.3864993294870204e-05, "loss": 1.123, "step": 22762 }, { "epoch": 1.3567767314340207, "grad_norm": 3.217691659927368, "learning_rate": 2.3856931571183244e-05, "loss": 1.0686, "step": 22764 }, { "epoch": 1.3568959351531769, "grad_norm": 2.9346163272857666, "learning_rate": 2.3848870782723247e-05, "loss": 1.2031, "step": 22766 }, { "epoch": 1.3570151388723328, "grad_norm": 3.3489670753479004, "learning_rate": 2.3840810929778638e-05, "loss": 1.1029, "step": 22768 }, { "epoch": 1.3571343425914888, "grad_norm": 3.09653639793396, "learning_rate": 2.3832752012637727e-05, "loss": 1.1306, "step": 22770 }, { "epoch": 1.3572535463106448, "grad_norm": 3.3226077556610107, "learning_rate": 2.3824694031588806e-05, "loss": 1.1032, "step": 22772 }, { "epoch": 1.357372750029801, "grad_norm": 3.1993672847747803, "learning_rate": 2.3816636986920137e-05, "loss": 1.179, "step": 22774 }, { "epoch": 1.357491953748957, "grad_norm": 2.918778896331787, "learning_rate": 2.3808580878919946e-05, "loss": 1.152, "step": 22776 }, { "epoch": 1.3576111574681131, "grad_norm": 3.1892507076263428, "learning_rate": 2.380052570787643e-05, "loss": 1.1494, "step": 22778 }, { "epoch": 1.357730361187269, "grad_norm": 3.262563705444336, "learning_rate": 2.3792471474077737e-05, "loss": 1.1264, "step": 22780 }, { "epoch": 1.357849564906425, "grad_norm": 3.4559130668640137, "learning_rate": 2.378441817781204e-05, "loss": 1.2398, "step": 22782 }, { "epoch": 1.357968768625581, "grad_norm": 3.3086395263671875, "learning_rate": 2.3776365819367363e-05, "loss": 1.2399, "step": 22784 }, { "epoch": 1.3580879723447372, "grad_norm": 3.2757022380828857, "learning_rate": 2.3768314399031827e-05, "loss": 1.0938, "step": 22786 }, { "epoch": 1.3582071760638932, "grad_norm": 3.505988121032715, "learning_rate": 2.376026391709343e-05, "loss": 1.174, "step": 22788 }, { "epoch": 1.3583263797830492, "grad_norm": 3.0375020503997803, "learning_rate": 2.375221437384017e-05, "loss": 1.0738, "step": 22790 }, { "epoch": 1.3584455835022053, "grad_norm": 2.9569742679595947, "learning_rate": 2.374416576956001e-05, "loss": 1.1146, "step": 22792 }, { "epoch": 1.3585647872213613, "grad_norm": 3.2249653339385986, "learning_rate": 2.3736118104540878e-05, "loss": 1.064, "step": 22794 }, { "epoch": 1.3586839909405173, "grad_norm": 3.025635004043579, "learning_rate": 2.372807137907066e-05, "loss": 1.1938, "step": 22796 }, { "epoch": 1.3588031946596733, "grad_norm": 2.9795777797698975, "learning_rate": 2.3720025593437217e-05, "loss": 1.1324, "step": 22798 }, { "epoch": 1.3589223983788294, "grad_norm": 3.3116021156311035, "learning_rate": 2.371198074792837e-05, "loss": 1.2959, "step": 22800 }, { "epoch": 1.3590416020979854, "grad_norm": 3.0736069679260254, "learning_rate": 2.3703936842831905e-05, "loss": 1.0643, "step": 22802 }, { "epoch": 1.3591608058171416, "grad_norm": 3.2873404026031494, "learning_rate": 2.3695893878435598e-05, "loss": 1.1635, "step": 22804 }, { "epoch": 1.3592800095362976, "grad_norm": 2.9978082180023193, "learning_rate": 2.3687851855027193e-05, "loss": 1.1318, "step": 22806 }, { "epoch": 1.3593992132554535, "grad_norm": 3.4984986782073975, "learning_rate": 2.3679810772894312e-05, "loss": 1.2271, "step": 22808 }, { "epoch": 1.3595184169746095, "grad_norm": 2.818577289581299, "learning_rate": 2.3671770632324675e-05, "loss": 1.1647, "step": 22810 }, { "epoch": 1.3596376206937657, "grad_norm": 2.6464431285858154, "learning_rate": 2.366373143360588e-05, "loss": 1.1411, "step": 22812 }, { "epoch": 1.3597568244129217, "grad_norm": 3.4396626949310303, "learning_rate": 2.3655693177025524e-05, "loss": 1.1452, "step": 22814 }, { "epoch": 1.3598760281320776, "grad_norm": 2.901057243347168, "learning_rate": 2.3647655862871153e-05, "loss": 1.0878, "step": 22816 }, { "epoch": 1.3599952318512338, "grad_norm": 3.1457111835479736, "learning_rate": 2.36396194914303e-05, "loss": 1.1234, "step": 22818 }, { "epoch": 1.3601144355703898, "grad_norm": 2.867274761199951, "learning_rate": 2.3631584062990448e-05, "loss": 1.0612, "step": 22820 }, { "epoch": 1.3602336392895458, "grad_norm": 3.4118480682373047, "learning_rate": 2.3623549577839055e-05, "loss": 1.2278, "step": 22822 }, { "epoch": 1.3603528430087017, "grad_norm": 3.31013822555542, "learning_rate": 2.3615516036263535e-05, "loss": 1.0843, "step": 22824 }, { "epoch": 1.360472046727858, "grad_norm": 2.8020927906036377, "learning_rate": 2.3607483438551265e-05, "loss": 1.0348, "step": 22826 }, { "epoch": 1.360591250447014, "grad_norm": 3.215700626373291, "learning_rate": 2.3599451784989647e-05, "loss": 1.1411, "step": 22828 }, { "epoch": 1.36071045416617, "grad_norm": 3.077193021774292, "learning_rate": 2.3591421075865932e-05, "loss": 1.011, "step": 22830 }, { "epoch": 1.360829657885326, "grad_norm": 3.2149949073791504, "learning_rate": 2.3583391311467463e-05, "loss": 1.1678, "step": 22832 }, { "epoch": 1.360948861604482, "grad_norm": 3.5613718032836914, "learning_rate": 2.3575362492081464e-05, "loss": 1.1766, "step": 22834 }, { "epoch": 1.361068065323638, "grad_norm": 3.499215841293335, "learning_rate": 2.3567334617995164e-05, "loss": 1.0447, "step": 22836 }, { "epoch": 1.3611872690427942, "grad_norm": 3.4011902809143066, "learning_rate": 2.355930768949574e-05, "loss": 1.0866, "step": 22838 }, { "epoch": 1.3613064727619502, "grad_norm": 3.077202558517456, "learning_rate": 2.3551281706870347e-05, "loss": 1.0666, "step": 22840 }, { "epoch": 1.3614256764811061, "grad_norm": 3.56003475189209, "learning_rate": 2.3543256670406105e-05, "loss": 1.1357, "step": 22842 }, { "epoch": 1.3615448802002623, "grad_norm": 3.3347554206848145, "learning_rate": 2.353523258039007e-05, "loss": 1.2518, "step": 22844 }, { "epoch": 1.3616640839194183, "grad_norm": 3.314713478088379, "learning_rate": 2.352720943710935e-05, "loss": 0.9917, "step": 22846 }, { "epoch": 1.3617832876385743, "grad_norm": 3.2439725399017334, "learning_rate": 2.351918724085089e-05, "loss": 1.1683, "step": 22848 }, { "epoch": 1.3619024913577302, "grad_norm": 3.127195358276367, "learning_rate": 2.3511165991901718e-05, "loss": 1.2272, "step": 22850 }, { "epoch": 1.3620216950768864, "grad_norm": 3.2854163646698, "learning_rate": 2.3503145690548796e-05, "loss": 1.1391, "step": 22852 }, { "epoch": 1.3621408987960424, "grad_norm": 3.148059129714966, "learning_rate": 2.3495126337078966e-05, "loss": 1.1032, "step": 22854 }, { "epoch": 1.3622601025151986, "grad_norm": 3.256564140319824, "learning_rate": 2.348710793177918e-05, "loss": 1.1262, "step": 22856 }, { "epoch": 1.3623793062343545, "grad_norm": 3.0497994422912598, "learning_rate": 2.3479090474936245e-05, "loss": 1.0354, "step": 22858 }, { "epoch": 1.3624985099535105, "grad_norm": 3.0087766647338867, "learning_rate": 2.347107396683699e-05, "loss": 1.0953, "step": 22860 }, { "epoch": 1.3626177136726665, "grad_norm": 3.2044837474823, "learning_rate": 2.346305840776819e-05, "loss": 1.3311, "step": 22862 }, { "epoch": 1.3627369173918227, "grad_norm": 2.955106019973755, "learning_rate": 2.345504379801658e-05, "loss": 1.0707, "step": 22864 }, { "epoch": 1.3628561211109786, "grad_norm": 3.1463334560394287, "learning_rate": 2.344703013786888e-05, "loss": 1.0942, "step": 22866 }, { "epoch": 1.3629753248301348, "grad_norm": 2.9680752754211426, "learning_rate": 2.343901742761176e-05, "loss": 1.086, "step": 22868 }, { "epoch": 1.3630945285492908, "grad_norm": 3.3952524662017822, "learning_rate": 2.3431005667531863e-05, "loss": 1.1909, "step": 22870 }, { "epoch": 1.3632137322684468, "grad_norm": 3.248330593109131, "learning_rate": 2.3422994857915782e-05, "loss": 1.086, "step": 22872 }, { "epoch": 1.3633329359876027, "grad_norm": 3.02400541305542, "learning_rate": 2.341498499905012e-05, "loss": 1.0097, "step": 22874 }, { "epoch": 1.3634521397067587, "grad_norm": 3.1130874156951904, "learning_rate": 2.3406976091221412e-05, "loss": 0.9792, "step": 22876 }, { "epoch": 1.363571343425915, "grad_norm": 3.3951611518859863, "learning_rate": 2.3398968134716155e-05, "loss": 1.2464, "step": 22878 }, { "epoch": 1.3636905471450709, "grad_norm": 3.053182601928711, "learning_rate": 2.339096112982082e-05, "loss": 1.1627, "step": 22880 }, { "epoch": 1.363809750864227, "grad_norm": 3.1577799320220947, "learning_rate": 2.338295507682185e-05, "loss": 1.1221, "step": 22882 }, { "epoch": 1.363928954583383, "grad_norm": 3.2445549964904785, "learning_rate": 2.337494997600564e-05, "loss": 1.0782, "step": 22884 }, { "epoch": 1.364048158302539, "grad_norm": 3.2263073921203613, "learning_rate": 2.336694582765857e-05, "loss": 1.2292, "step": 22886 }, { "epoch": 1.364167362021695, "grad_norm": 3.3261477947235107, "learning_rate": 2.3358942632066972e-05, "loss": 1.1179, "step": 22888 }, { "epoch": 1.3642865657408512, "grad_norm": 2.6390318870544434, "learning_rate": 2.335094038951713e-05, "loss": 1.0323, "step": 22890 }, { "epoch": 1.3644057694600071, "grad_norm": 3.2797257900238037, "learning_rate": 2.3342939100295362e-05, "loss": 1.2376, "step": 22892 }, { "epoch": 1.3645249731791633, "grad_norm": 3.1070642471313477, "learning_rate": 2.3334938764687836e-05, "loss": 1.1595, "step": 22894 }, { "epoch": 1.3646441768983193, "grad_norm": 3.3035266399383545, "learning_rate": 2.33269393829808e-05, "loss": 1.2113, "step": 22896 }, { "epoch": 1.3647633806174753, "grad_norm": 3.0669209957122803, "learning_rate": 2.3318940955460423e-05, "loss": 1.2416, "step": 22898 }, { "epoch": 1.3648825843366312, "grad_norm": 3.046535015106201, "learning_rate": 2.3310943482412777e-05, "loss": 1.0515, "step": 22900 }, { "epoch": 1.3650017880557872, "grad_norm": 3.17669939994812, "learning_rate": 2.3302946964124016e-05, "loss": 1.2075, "step": 22902 }, { "epoch": 1.3651209917749434, "grad_norm": 3.092914342880249, "learning_rate": 2.3294951400880188e-05, "loss": 1.0245, "step": 22904 }, { "epoch": 1.3652401954940994, "grad_norm": 3.24975848197937, "learning_rate": 2.3286956792967317e-05, "loss": 1.0755, "step": 22906 }, { "epoch": 1.3653593992132556, "grad_norm": 3.14797043800354, "learning_rate": 2.3278963140671384e-05, "loss": 1.0975, "step": 22908 }, { "epoch": 1.3654786029324115, "grad_norm": 3.208814859390259, "learning_rate": 2.32709704442784e-05, "loss": 1.1633, "step": 22910 }, { "epoch": 1.3655978066515675, "grad_norm": 3.549417495727539, "learning_rate": 2.326297870407424e-05, "loss": 1.0279, "step": 22912 }, { "epoch": 1.3657170103707235, "grad_norm": 2.749542713165283, "learning_rate": 2.3254987920344785e-05, "loss": 1.1901, "step": 22914 }, { "epoch": 1.3658362140898797, "grad_norm": 3.227518320083618, "learning_rate": 2.324699809337596e-05, "loss": 1.1424, "step": 22916 }, { "epoch": 1.3659554178090356, "grad_norm": 2.9877026081085205, "learning_rate": 2.3239009223453512e-05, "loss": 1.1637, "step": 22918 }, { "epoch": 1.3660746215281918, "grad_norm": 3.175614833831787, "learning_rate": 2.3231021310863283e-05, "loss": 1.129, "step": 22920 }, { "epoch": 1.3661938252473478, "grad_norm": 3.2635560035705566, "learning_rate": 2.3223034355891017e-05, "loss": 1.0972, "step": 22922 }, { "epoch": 1.3663130289665038, "grad_norm": 3.302276849746704, "learning_rate": 2.3215048358822416e-05, "loss": 1.1343, "step": 22924 }, { "epoch": 1.3664322326856597, "grad_norm": 3.516378164291382, "learning_rate": 2.320706331994319e-05, "loss": 1.2495, "step": 22926 }, { "epoch": 1.366551436404816, "grad_norm": 3.615473508834839, "learning_rate": 2.3199079239538975e-05, "loss": 1.1674, "step": 22928 }, { "epoch": 1.3666706401239719, "grad_norm": 2.9929637908935547, "learning_rate": 2.3191096117895393e-05, "loss": 1.1721, "step": 22930 }, { "epoch": 1.3667898438431278, "grad_norm": 3.267703056335449, "learning_rate": 2.318311395529803e-05, "loss": 1.1271, "step": 22932 }, { "epoch": 1.366909047562284, "grad_norm": 3.252892255783081, "learning_rate": 2.3175132752032434e-05, "loss": 1.271, "step": 22934 }, { "epoch": 1.36702825128144, "grad_norm": 3.0634007453918457, "learning_rate": 2.3167152508384106e-05, "loss": 1.1587, "step": 22936 }, { "epoch": 1.367147455000596, "grad_norm": 3.185858726501465, "learning_rate": 2.315917322463855e-05, "loss": 1.2295, "step": 22938 }, { "epoch": 1.367266658719752, "grad_norm": 3.4183905124664307, "learning_rate": 2.3151194901081225e-05, "loss": 1.2132, "step": 22940 }, { "epoch": 1.3673858624389081, "grad_norm": 3.267038345336914, "learning_rate": 2.3143217537997485e-05, "loss": 1.1688, "step": 22942 }, { "epoch": 1.367505066158064, "grad_norm": 3.1525447368621826, "learning_rate": 2.3135241135672757e-05, "loss": 1.0939, "step": 22944 }, { "epoch": 1.3676242698772203, "grad_norm": 3.1449029445648193, "learning_rate": 2.312726569439238e-05, "loss": 1.1992, "step": 22946 }, { "epoch": 1.3677434735963763, "grad_norm": 3.3224728107452393, "learning_rate": 2.3119291214441647e-05, "loss": 1.2307, "step": 22948 }, { "epoch": 1.3678626773155322, "grad_norm": 3.047239065170288, "learning_rate": 2.311131769610584e-05, "loss": 1.1335, "step": 22950 }, { "epoch": 1.3679818810346882, "grad_norm": 3.5471842288970947, "learning_rate": 2.310334513967019e-05, "loss": 1.2693, "step": 22952 }, { "epoch": 1.3681010847538444, "grad_norm": 3.2686495780944824, "learning_rate": 2.3095373545419902e-05, "loss": 0.9492, "step": 22954 }, { "epoch": 1.3682202884730004, "grad_norm": 3.1140682697296143, "learning_rate": 2.3087402913640187e-05, "loss": 1.0322, "step": 22956 }, { "epoch": 1.3683394921921563, "grad_norm": 3.026885509490967, "learning_rate": 2.3079433244616134e-05, "loss": 1.1697, "step": 22958 }, { "epoch": 1.3684586959113125, "grad_norm": 3.4172487258911133, "learning_rate": 2.307146453863284e-05, "loss": 1.1588, "step": 22960 }, { "epoch": 1.3685778996304685, "grad_norm": 3.091252326965332, "learning_rate": 2.3063496795975437e-05, "loss": 1.1106, "step": 22962 }, { "epoch": 1.3686971033496245, "grad_norm": 2.9929347038269043, "learning_rate": 2.3055530016928877e-05, "loss": 0.9665, "step": 22964 }, { "epoch": 1.3688163070687804, "grad_norm": 3.232865810394287, "learning_rate": 2.3047564201778214e-05, "loss": 1.1082, "step": 22966 }, { "epoch": 1.3689355107879366, "grad_norm": 3.4769327640533447, "learning_rate": 2.3039599350808398e-05, "loss": 1.0906, "step": 22968 }, { "epoch": 1.3690547145070926, "grad_norm": 3.259612798690796, "learning_rate": 2.3031635464304356e-05, "loss": 1.1475, "step": 22970 }, { "epoch": 1.3691739182262488, "grad_norm": 3.3037750720977783, "learning_rate": 2.3023672542550984e-05, "loss": 1.1852, "step": 22972 }, { "epoch": 1.3692931219454048, "grad_norm": 3.203545331954956, "learning_rate": 2.3015710585833144e-05, "loss": 1.2016, "step": 22974 }, { "epoch": 1.3694123256645607, "grad_norm": 3.336582899093628, "learning_rate": 2.300774959443566e-05, "loss": 1.1462, "step": 22976 }, { "epoch": 1.3695315293837167, "grad_norm": 3.2808597087860107, "learning_rate": 2.2999789568643314e-05, "loss": 1.1481, "step": 22978 }, { "epoch": 1.3696507331028729, "grad_norm": 2.8951642513275146, "learning_rate": 2.2991830508740908e-05, "loss": 1.1133, "step": 22980 }, { "epoch": 1.3697699368220289, "grad_norm": 3.066680431365967, "learning_rate": 2.29838724150131e-05, "loss": 1.2067, "step": 22982 }, { "epoch": 1.3698891405411848, "grad_norm": 3.5408143997192383, "learning_rate": 2.2975915287744626e-05, "loss": 1.0497, "step": 22984 }, { "epoch": 1.370008344260341, "grad_norm": 3.340919017791748, "learning_rate": 2.296795912722014e-05, "loss": 1.0578, "step": 22986 }, { "epoch": 1.370127547979497, "grad_norm": 3.227581739425659, "learning_rate": 2.296000393372421e-05, "loss": 1.1671, "step": 22988 }, { "epoch": 1.370246751698653, "grad_norm": 3.34852933883667, "learning_rate": 2.295204970754147e-05, "loss": 1.1614, "step": 22990 }, { "epoch": 1.370365955417809, "grad_norm": 3.0375478267669678, "learning_rate": 2.294409644895646e-05, "loss": 0.9971, "step": 22992 }, { "epoch": 1.3704851591369651, "grad_norm": 2.7897531986236572, "learning_rate": 2.2936144158253692e-05, "loss": 1.2052, "step": 22994 }, { "epoch": 1.370604362856121, "grad_norm": 3.4361979961395264, "learning_rate": 2.2928192835717644e-05, "loss": 1.15, "step": 22996 }, { "epoch": 1.3707235665752773, "grad_norm": 3.117781639099121, "learning_rate": 2.2920242481632766e-05, "loss": 1.1038, "step": 22998 }, { "epoch": 1.3708427702944332, "grad_norm": 2.875145435333252, "learning_rate": 2.291229309628346e-05, "loss": 1.076, "step": 23000 }, { "epoch": 1.3709619740135892, "grad_norm": 3.1130623817443848, "learning_rate": 2.2904344679954093e-05, "loss": 1.1376, "step": 23002 }, { "epoch": 1.3710811777327452, "grad_norm": 2.9954724311828613, "learning_rate": 2.2896397232929068e-05, "loss": 1.194, "step": 23004 }, { "epoch": 1.3712003814519014, "grad_norm": 3.1422312259674072, "learning_rate": 2.2888450755492602e-05, "loss": 1.3612, "step": 23006 }, { "epoch": 1.3713195851710573, "grad_norm": 3.3675737380981445, "learning_rate": 2.2880505247929037e-05, "loss": 1.1759, "step": 23008 }, { "epoch": 1.3714387888902133, "grad_norm": 2.979466676712036, "learning_rate": 2.2872560710522582e-05, "loss": 1.1986, "step": 23010 }, { "epoch": 1.3715579926093695, "grad_norm": 3.3456034660339355, "learning_rate": 2.2864617143557456e-05, "loss": 1.141, "step": 23012 }, { "epoch": 1.3716771963285255, "grad_norm": 3.1956915855407715, "learning_rate": 2.285667454731781e-05, "loss": 1.0372, "step": 23014 }, { "epoch": 1.3717964000476814, "grad_norm": 3.1870474815368652, "learning_rate": 2.2848732922087786e-05, "loss": 1.1171, "step": 23016 }, { "epoch": 1.3719156037668374, "grad_norm": 3.0528767108917236, "learning_rate": 2.2840792268151483e-05, "loss": 1.046, "step": 23018 }, { "epoch": 1.3720348074859936, "grad_norm": 2.8483707904815674, "learning_rate": 2.283285258579297e-05, "loss": 1.2186, "step": 23020 }, { "epoch": 1.3721540112051496, "grad_norm": 3.079176425933838, "learning_rate": 2.282491387529626e-05, "loss": 1.176, "step": 23022 }, { "epoch": 1.3722732149243058, "grad_norm": 2.7858834266662598, "learning_rate": 2.281697613694535e-05, "loss": 1.1279, "step": 23024 }, { "epoch": 1.3723924186434617, "grad_norm": 3.2592830657958984, "learning_rate": 2.2809039371024244e-05, "loss": 1.2212, "step": 23026 }, { "epoch": 1.3725116223626177, "grad_norm": 3.1807825565338135, "learning_rate": 2.2801103577816795e-05, "loss": 1.3252, "step": 23028 }, { "epoch": 1.3726308260817737, "grad_norm": 3.0132203102111816, "learning_rate": 2.279316875760695e-05, "loss": 1.103, "step": 23030 }, { "epoch": 1.3727500298009299, "grad_norm": 3.4417338371276855, "learning_rate": 2.2785234910678565e-05, "loss": 1.2319, "step": 23032 }, { "epoch": 1.3728692335200858, "grad_norm": 3.2219722270965576, "learning_rate": 2.27773020373154e-05, "loss": 1.1696, "step": 23034 }, { "epoch": 1.3729884372392418, "grad_norm": 3.2826342582702637, "learning_rate": 2.2769370137801304e-05, "loss": 1.2092, "step": 23036 }, { "epoch": 1.373107640958398, "grad_norm": 2.8976616859436035, "learning_rate": 2.2761439212420012e-05, "loss": 1.013, "step": 23038 }, { "epoch": 1.373226844677554, "grad_norm": 3.068502426147461, "learning_rate": 2.275350926145523e-05, "loss": 1.0761, "step": 23040 }, { "epoch": 1.37334604839671, "grad_norm": 3.231372356414795, "learning_rate": 2.2745580285190626e-05, "loss": 1.1141, "step": 23042 }, { "epoch": 1.373465252115866, "grad_norm": 2.946776866912842, "learning_rate": 2.2737652283909905e-05, "loss": 1.1489, "step": 23044 }, { "epoch": 1.373584455835022, "grad_norm": 3.4352638721466064, "learning_rate": 2.2729725257896618e-05, "loss": 1.2238, "step": 23046 }, { "epoch": 1.373703659554178, "grad_norm": 3.5309360027313232, "learning_rate": 2.272179920743434e-05, "loss": 1.1739, "step": 23048 }, { "epoch": 1.3738228632733342, "grad_norm": 2.652594804763794, "learning_rate": 2.2713874132806677e-05, "loss": 1.0881, "step": 23050 }, { "epoch": 1.3739420669924902, "grad_norm": 3.116150140762329, "learning_rate": 2.2705950034297058e-05, "loss": 1.1774, "step": 23052 }, { "epoch": 1.3740612707116462, "grad_norm": 3.0929408073425293, "learning_rate": 2.269802691218901e-05, "loss": 1.1257, "step": 23054 }, { "epoch": 1.3741804744308022, "grad_norm": 3.478344440460205, "learning_rate": 2.2690104766765956e-05, "loss": 1.169, "step": 23056 }, { "epoch": 1.3742996781499583, "grad_norm": 3.859905481338501, "learning_rate": 2.268218359831129e-05, "loss": 1.1186, "step": 23058 }, { "epoch": 1.3744188818691143, "grad_norm": 3.2033863067626953, "learning_rate": 2.267426340710838e-05, "loss": 1.1436, "step": 23060 }, { "epoch": 1.3745380855882703, "grad_norm": 2.971677541732788, "learning_rate": 2.266634419344057e-05, "loss": 1.1697, "step": 23062 }, { "epoch": 1.3746572893074265, "grad_norm": 3.414510488510132, "learning_rate": 2.2658425957591145e-05, "loss": 1.1923, "step": 23064 }, { "epoch": 1.3747764930265824, "grad_norm": 3.2501296997070312, "learning_rate": 2.265050869984337e-05, "loss": 1.2204, "step": 23066 }, { "epoch": 1.3748956967457384, "grad_norm": 3.300997734069824, "learning_rate": 2.264259242048048e-05, "loss": 1.3233, "step": 23068 }, { "epoch": 1.3750149004648944, "grad_norm": 3.029615879058838, "learning_rate": 2.2634677119785637e-05, "loss": 1.192, "step": 23070 }, { "epoch": 1.3751341041840506, "grad_norm": 2.8447558879852295, "learning_rate": 2.2626762798042047e-05, "loss": 1.0417, "step": 23072 }, { "epoch": 1.3752533079032065, "grad_norm": 3.117591619491577, "learning_rate": 2.2618849455532803e-05, "loss": 1.2049, "step": 23074 }, { "epoch": 1.3753725116223627, "grad_norm": 3.26369309425354, "learning_rate": 2.2610937092540994e-05, "loss": 1.1006, "step": 23076 }, { "epoch": 1.3754917153415187, "grad_norm": 3.0035557746887207, "learning_rate": 2.2603025709349685e-05, "loss": 1.0521, "step": 23078 }, { "epoch": 1.3756109190606747, "grad_norm": 3.0084497928619385, "learning_rate": 2.2595115306241883e-05, "loss": 1.1107, "step": 23080 }, { "epoch": 1.3757301227798306, "grad_norm": 3.3512582778930664, "learning_rate": 2.2587205883500568e-05, "loss": 1.0984, "step": 23082 }, { "epoch": 1.3758493264989868, "grad_norm": 3.657136917114258, "learning_rate": 2.2579297441408694e-05, "loss": 1.3346, "step": 23084 }, { "epoch": 1.3759685302181428, "grad_norm": 3.561892032623291, "learning_rate": 2.2571389980249165e-05, "loss": 1.2444, "step": 23086 }, { "epoch": 1.3760877339372988, "grad_norm": 2.814969062805176, "learning_rate": 2.2563483500304844e-05, "loss": 1.0663, "step": 23088 }, { "epoch": 1.376206937656455, "grad_norm": 3.367166042327881, "learning_rate": 2.255557800185863e-05, "loss": 1.1259, "step": 23090 }, { "epoch": 1.376326141375611, "grad_norm": 3.241997718811035, "learning_rate": 2.2547673485193273e-05, "loss": 1.123, "step": 23092 }, { "epoch": 1.376445345094767, "grad_norm": 3.278217077255249, "learning_rate": 2.2539769950591537e-05, "loss": 1.2035, "step": 23094 }, { "epoch": 1.3765645488139229, "grad_norm": 3.3333723545074463, "learning_rate": 2.253186739833623e-05, "loss": 1.2378, "step": 23096 }, { "epoch": 1.376683752533079, "grad_norm": 3.3050098419189453, "learning_rate": 2.252396582870997e-05, "loss": 1.276, "step": 23098 }, { "epoch": 1.376802956252235, "grad_norm": 2.8360824584960938, "learning_rate": 2.2516065241995477e-05, "loss": 1.1235, "step": 23100 }, { "epoch": 1.3769221599713912, "grad_norm": 3.208254337310791, "learning_rate": 2.2508165638475363e-05, "loss": 1.0232, "step": 23102 }, { "epoch": 1.3770413636905472, "grad_norm": 3.140101909637451, "learning_rate": 2.250026701843223e-05, "loss": 1.1452, "step": 23104 }, { "epoch": 1.3771605674097032, "grad_norm": 2.600229263305664, "learning_rate": 2.249236938214863e-05, "loss": 1.2238, "step": 23106 }, { "epoch": 1.3772797711288591, "grad_norm": 3.5901455879211426, "learning_rate": 2.2484472729907103e-05, "loss": 1.0476, "step": 23108 }, { "epoch": 1.3773989748480153, "grad_norm": 3.1515872478485107, "learning_rate": 2.247657706199012e-05, "loss": 1.0668, "step": 23110 }, { "epoch": 1.3775181785671713, "grad_norm": 2.9752864837646484, "learning_rate": 2.2468682378680134e-05, "loss": 1.134, "step": 23112 }, { "epoch": 1.3776373822863273, "grad_norm": 3.332322120666504, "learning_rate": 2.2460788680259608e-05, "loss": 1.176, "step": 23114 }, { "epoch": 1.3777565860054835, "grad_norm": 2.874525547027588, "learning_rate": 2.245289596701086e-05, "loss": 1.1824, "step": 23116 }, { "epoch": 1.3778757897246394, "grad_norm": 3.1142759323120117, "learning_rate": 2.24450042392163e-05, "loss": 1.1163, "step": 23118 }, { "epoch": 1.3779949934437954, "grad_norm": 2.7359230518341064, "learning_rate": 2.2437113497158207e-05, "loss": 1.1305, "step": 23120 }, { "epoch": 1.3781141971629514, "grad_norm": 3.3898990154266357, "learning_rate": 2.2429223741118875e-05, "loss": 1.2016, "step": 23122 }, { "epoch": 1.3782334008821076, "grad_norm": 3.3741443157196045, "learning_rate": 2.2421334971380543e-05, "loss": 1.1936, "step": 23124 }, { "epoch": 1.3783526046012635, "grad_norm": 3.5166690349578857, "learning_rate": 2.241344718822542e-05, "loss": 1.2349, "step": 23126 }, { "epoch": 1.3784718083204197, "grad_norm": 3.1857762336730957, "learning_rate": 2.2405560391935672e-05, "loss": 1.2305, "step": 23128 }, { "epoch": 1.3785910120395757, "grad_norm": 3.458677291870117, "learning_rate": 2.2397674582793443e-05, "loss": 1.1664, "step": 23130 }, { "epoch": 1.3787102157587316, "grad_norm": 3.5987629890441895, "learning_rate": 2.238978976108083e-05, "loss": 1.1778, "step": 23132 }, { "epoch": 1.3788294194778876, "grad_norm": 3.364544630050659, "learning_rate": 2.2381905927079892e-05, "loss": 1.166, "step": 23134 }, { "epoch": 1.3789486231970438, "grad_norm": 3.4108383655548096, "learning_rate": 2.2374023081072697e-05, "loss": 1.1232, "step": 23136 }, { "epoch": 1.3790678269161998, "grad_norm": 3.0409512519836426, "learning_rate": 2.236614122334123e-05, "loss": 1.0297, "step": 23138 }, { "epoch": 1.3791870306353557, "grad_norm": 3.0177361965179443, "learning_rate": 2.2358260354167406e-05, "loss": 1.0767, "step": 23140 }, { "epoch": 1.379306234354512, "grad_norm": 3.2128653526306152, "learning_rate": 2.23503804738332e-05, "loss": 1.062, "step": 23142 }, { "epoch": 1.379425438073668, "grad_norm": 2.8326127529144287, "learning_rate": 2.2342501582620484e-05, "loss": 1.0241, "step": 23144 }, { "epoch": 1.3795446417928239, "grad_norm": 3.3275678157806396, "learning_rate": 2.2334623680811118e-05, "loss": 1.0937, "step": 23146 }, { "epoch": 1.3796638455119798, "grad_norm": 3.133054256439209, "learning_rate": 2.232674676868692e-05, "loss": 1.1971, "step": 23148 }, { "epoch": 1.379783049231136, "grad_norm": 2.793884754180908, "learning_rate": 2.2318870846529673e-05, "loss": 1.2871, "step": 23150 }, { "epoch": 1.379902252950292, "grad_norm": 3.5201053619384766, "learning_rate": 2.2310995914621125e-05, "loss": 1.1896, "step": 23152 }, { "epoch": 1.3800214566694482, "grad_norm": 3.49096941947937, "learning_rate": 2.230312197324299e-05, "loss": 1.293, "step": 23154 }, { "epoch": 1.3801406603886042, "grad_norm": 3.3780927658081055, "learning_rate": 2.2295249022676946e-05, "loss": 1.0788, "step": 23156 }, { "epoch": 1.3802598641077601, "grad_norm": 3.052281141281128, "learning_rate": 2.2287377063204616e-05, "loss": 1.0376, "step": 23158 }, { "epoch": 1.380379067826916, "grad_norm": 3.314563512802124, "learning_rate": 2.2279506095107667e-05, "loss": 1.1005, "step": 23160 }, { "epoch": 1.3804982715460723, "grad_norm": 2.964094877243042, "learning_rate": 2.227163611866759e-05, "loss": 1.0772, "step": 23162 }, { "epoch": 1.3806174752652283, "grad_norm": 3.0477540493011475, "learning_rate": 2.2263767134165975e-05, "loss": 1.2509, "step": 23164 }, { "epoch": 1.3807366789843842, "grad_norm": 3.352689504623413, "learning_rate": 2.225589914188433e-05, "loss": 1.3356, "step": 23166 }, { "epoch": 1.3808558827035404, "grad_norm": 3.0930593013763428, "learning_rate": 2.2248032142104063e-05, "loss": 1.1293, "step": 23168 }, { "epoch": 1.3809750864226964, "grad_norm": 3.1547012329101562, "learning_rate": 2.2240166135106656e-05, "loss": 1.2469, "step": 23170 }, { "epoch": 1.3810942901418524, "grad_norm": 3.190542459487915, "learning_rate": 2.223230112117348e-05, "loss": 1.0642, "step": 23172 }, { "epoch": 1.3812134938610083, "grad_norm": 3.483499526977539, "learning_rate": 2.222443710058591e-05, "loss": 1.2373, "step": 23174 }, { "epoch": 1.3813326975801645, "grad_norm": 3.2637948989868164, "learning_rate": 2.221657407362523e-05, "loss": 1.0927, "step": 23176 }, { "epoch": 1.3814519012993205, "grad_norm": 2.817891836166382, "learning_rate": 2.22087120405728e-05, "loss": 0.983, "step": 23178 }, { "epoch": 1.3815711050184767, "grad_norm": 3.319660186767578, "learning_rate": 2.2200851001709782e-05, "loss": 1.1333, "step": 23180 }, { "epoch": 1.3816903087376327, "grad_norm": 3.1911802291870117, "learning_rate": 2.2192990957317462e-05, "loss": 1.1462, "step": 23182 }, { "epoch": 1.3818095124567886, "grad_norm": 2.9489574432373047, "learning_rate": 2.2185131907677013e-05, "loss": 1.1617, "step": 23184 }, { "epoch": 1.3819287161759446, "grad_norm": 3.323275089263916, "learning_rate": 2.2177273853069525e-05, "loss": 1.206, "step": 23186 }, { "epoch": 1.3820479198951008, "grad_norm": 3.0559427738189697, "learning_rate": 2.2169416793776166e-05, "loss": 1.0364, "step": 23188 }, { "epoch": 1.3821671236142568, "grad_norm": 3.387889862060547, "learning_rate": 2.216156073007799e-05, "loss": 1.0905, "step": 23190 }, { "epoch": 1.3822863273334127, "grad_norm": 3.178250312805176, "learning_rate": 2.215370566225603e-05, "loss": 1.0376, "step": 23192 }, { "epoch": 1.382405531052569, "grad_norm": 3.462714672088623, "learning_rate": 2.21458515905913e-05, "loss": 1.253, "step": 23194 }, { "epoch": 1.3825247347717249, "grad_norm": 3.6721601486206055, "learning_rate": 2.2137998515364754e-05, "loss": 1.2265, "step": 23196 }, { "epoch": 1.3826439384908809, "grad_norm": 2.898428201675415, "learning_rate": 2.2130146436857337e-05, "loss": 1.1619, "step": 23198 }, { "epoch": 1.3827631422100368, "grad_norm": 3.3485143184661865, "learning_rate": 2.2122295355349915e-05, "loss": 1.267, "step": 23200 }, { "epoch": 1.382882345929193, "grad_norm": 2.8439080715179443, "learning_rate": 2.2114445271123408e-05, "loss": 1.1074, "step": 23202 }, { "epoch": 1.383001549648349, "grad_norm": 3.581376791000366, "learning_rate": 2.2106596184458567e-05, "loss": 1.3065, "step": 23204 }, { "epoch": 1.3831207533675052, "grad_norm": 3.1227619647979736, "learning_rate": 2.2098748095636234e-05, "loss": 1.209, "step": 23206 }, { "epoch": 1.3832399570866611, "grad_norm": 3.278205156326294, "learning_rate": 2.2090901004937147e-05, "loss": 1.2986, "step": 23208 }, { "epoch": 1.3833591608058171, "grad_norm": 3.522030830383301, "learning_rate": 2.208305491264202e-05, "loss": 1.2688, "step": 23210 }, { "epoch": 1.383478364524973, "grad_norm": 3.1168906688690186, "learning_rate": 2.2075209819031535e-05, "loss": 1.0543, "step": 23212 }, { "epoch": 1.3835975682441293, "grad_norm": 3.065937042236328, "learning_rate": 2.206736572438634e-05, "loss": 1.1454, "step": 23214 }, { "epoch": 1.3837167719632852, "grad_norm": 3.140497922897339, "learning_rate": 2.205952262898704e-05, "loss": 1.2417, "step": 23216 }, { "epoch": 1.3838359756824412, "grad_norm": 3.3533530235290527, "learning_rate": 2.2051680533114215e-05, "loss": 1.1315, "step": 23218 }, { "epoch": 1.3839551794015974, "grad_norm": 3.264799118041992, "learning_rate": 2.20438394370484e-05, "loss": 1.1864, "step": 23220 }, { "epoch": 1.3840743831207534, "grad_norm": 3.0790324211120605, "learning_rate": 2.2035999341070086e-05, "loss": 1.0927, "step": 23222 }, { "epoch": 1.3841935868399093, "grad_norm": 3.1456241607666016, "learning_rate": 2.2028160245459785e-05, "loss": 1.1697, "step": 23224 }, { "epoch": 1.3843127905590653, "grad_norm": 2.8751795291900635, "learning_rate": 2.202032215049788e-05, "loss": 1.0904, "step": 23226 }, { "epoch": 1.3844319942782215, "grad_norm": 2.773155450820923, "learning_rate": 2.2012485056464767e-05, "loss": 1.0288, "step": 23228 }, { "epoch": 1.3845511979973775, "grad_norm": 3.2386367321014404, "learning_rate": 2.2004648963640857e-05, "loss": 1.3818, "step": 23230 }, { "epoch": 1.3846704017165337, "grad_norm": 3.086543560028076, "learning_rate": 2.19968138723064e-05, "loss": 1.1484, "step": 23232 }, { "epoch": 1.3847896054356896, "grad_norm": 3.089897394180298, "learning_rate": 2.1988979782741735e-05, "loss": 1.2712, "step": 23234 }, { "epoch": 1.3849088091548456, "grad_norm": 2.895669937133789, "learning_rate": 2.198114669522711e-05, "loss": 1.0132, "step": 23236 }, { "epoch": 1.3850280128740016, "grad_norm": 3.076951265335083, "learning_rate": 2.1973314610042734e-05, "loss": 1.0877, "step": 23238 }, { "epoch": 1.3851472165931578, "grad_norm": 3.255399227142334, "learning_rate": 2.1965483527468767e-05, "loss": 1.2909, "step": 23240 }, { "epoch": 1.3852664203123137, "grad_norm": 3.084622859954834, "learning_rate": 2.195765344778541e-05, "loss": 1.1517, "step": 23242 }, { "epoch": 1.38538562403147, "grad_norm": 3.299772262573242, "learning_rate": 2.1949824371272715e-05, "loss": 1.0854, "step": 23244 }, { "epoch": 1.385504827750626, "grad_norm": 3.381722927093506, "learning_rate": 2.194199629821076e-05, "loss": 1.232, "step": 23246 }, { "epoch": 1.3856240314697819, "grad_norm": 3.4226009845733643, "learning_rate": 2.193416922887963e-05, "loss": 1.2579, "step": 23248 }, { "epoch": 1.3857432351889378, "grad_norm": 3.1603710651397705, "learning_rate": 2.1926343163559255e-05, "loss": 1.0843, "step": 23250 }, { "epoch": 1.3858624389080938, "grad_norm": 3.05085825920105, "learning_rate": 2.191851810252966e-05, "loss": 1.1451, "step": 23252 }, { "epoch": 1.38598164262725, "grad_norm": 3.4235355854034424, "learning_rate": 2.1910694046070753e-05, "loss": 1.3062, "step": 23254 }, { "epoch": 1.386100846346406, "grad_norm": 3.23447847366333, "learning_rate": 2.190287099446242e-05, "loss": 1.1515, "step": 23256 }, { "epoch": 1.3862200500655621, "grad_norm": 3.6351804733276367, "learning_rate": 2.189504894798453e-05, "loss": 1.3296, "step": 23258 }, { "epoch": 1.3863392537847181, "grad_norm": 3.4107465744018555, "learning_rate": 2.1887227906916896e-05, "loss": 1.2432, "step": 23260 }, { "epoch": 1.386458457503874, "grad_norm": 2.8753812313079834, "learning_rate": 2.187940787153931e-05, "loss": 1.2243, "step": 23262 }, { "epoch": 1.38657766122303, "grad_norm": 2.8245649337768555, "learning_rate": 2.187158884213152e-05, "loss": 1.0561, "step": 23264 }, { "epoch": 1.3866968649421862, "grad_norm": 3.125474452972412, "learning_rate": 2.1863770818973233e-05, "loss": 1.1093, "step": 23266 }, { "epoch": 1.3868160686613422, "grad_norm": 3.034031629562378, "learning_rate": 2.1855953802344116e-05, "loss": 1.2154, "step": 23268 }, { "epoch": 1.3869352723804984, "grad_norm": 3.4853289127349854, "learning_rate": 2.1848137792523836e-05, "loss": 1.2248, "step": 23270 }, { "epoch": 1.3870544760996544, "grad_norm": 3.079810619354248, "learning_rate": 2.1840322789792018e-05, "loss": 1.1964, "step": 23272 }, { "epoch": 1.3871736798188103, "grad_norm": 3.0390737056732178, "learning_rate": 2.1832508794428152e-05, "loss": 1.0677, "step": 23274 }, { "epoch": 1.3872928835379663, "grad_norm": 2.810565948486328, "learning_rate": 2.182469580671185e-05, "loss": 1.0637, "step": 23276 }, { "epoch": 1.3874120872571223, "grad_norm": 3.168876886367798, "learning_rate": 2.1816883826922574e-05, "loss": 1.1813, "step": 23278 }, { "epoch": 1.3875312909762785, "grad_norm": 3.415945291519165, "learning_rate": 2.180907285533979e-05, "loss": 1.2568, "step": 23280 }, { "epoch": 1.3876504946954344, "grad_norm": 3.206270217895508, "learning_rate": 2.1801262892242924e-05, "loss": 1.102, "step": 23282 }, { "epoch": 1.3877696984145906, "grad_norm": 3.45436954498291, "learning_rate": 2.179345393791137e-05, "loss": 1.0892, "step": 23284 }, { "epoch": 1.3878889021337466, "grad_norm": 3.1544320583343506, "learning_rate": 2.178564599262447e-05, "loss": 1.093, "step": 23286 }, { "epoch": 1.3880081058529026, "grad_norm": 3.0667691230773926, "learning_rate": 2.1777839056661554e-05, "loss": 1.0886, "step": 23288 }, { "epoch": 1.3881273095720585, "grad_norm": 3.288271903991699, "learning_rate": 2.177003313030189e-05, "loss": 1.141, "step": 23290 }, { "epoch": 1.3882465132912147, "grad_norm": 2.8380494117736816, "learning_rate": 2.1762228213824716e-05, "loss": 1.1313, "step": 23292 }, { "epoch": 1.3883657170103707, "grad_norm": 3.1246228218078613, "learning_rate": 2.1754424307509286e-05, "loss": 1.0985, "step": 23294 }, { "epoch": 1.388484920729527, "grad_norm": 3.3995814323425293, "learning_rate": 2.1746621411634706e-05, "loss": 1.0679, "step": 23296 }, { "epoch": 1.3886041244486829, "grad_norm": 3.488868236541748, "learning_rate": 2.173881952648016e-05, "loss": 1.0886, "step": 23298 }, { "epoch": 1.3887233281678388, "grad_norm": 3.440934181213379, "learning_rate": 2.1731018652324737e-05, "loss": 1.2472, "step": 23300 }, { "epoch": 1.3888425318869948, "grad_norm": 3.1480867862701416, "learning_rate": 2.17232187894475e-05, "loss": 1.2473, "step": 23302 }, { "epoch": 1.388961735606151, "grad_norm": 3.249053478240967, "learning_rate": 2.171541993812747e-05, "loss": 1.2166, "step": 23304 }, { "epoch": 1.389080939325307, "grad_norm": 3.4848928451538086, "learning_rate": 2.1707622098643643e-05, "loss": 1.3018, "step": 23306 }, { "epoch": 1.389200143044463, "grad_norm": 3.3517990112304688, "learning_rate": 2.1699825271274977e-05, "loss": 1.1212, "step": 23308 }, { "epoch": 1.3893193467636191, "grad_norm": 3.280052661895752, "learning_rate": 2.1692029456300373e-05, "loss": 1.0777, "step": 23310 }, { "epoch": 1.389438550482775, "grad_norm": 2.995560884475708, "learning_rate": 2.1684234653998765e-05, "loss": 1.1147, "step": 23312 }, { "epoch": 1.389557754201931, "grad_norm": 3.165534496307373, "learning_rate": 2.167644086464893e-05, "loss": 1.1205, "step": 23314 }, { "epoch": 1.389676957921087, "grad_norm": 3.4611732959747314, "learning_rate": 2.166864808852973e-05, "loss": 1.2687, "step": 23316 }, { "epoch": 1.3897961616402432, "grad_norm": 3.4413909912109375, "learning_rate": 2.1660856325919936e-05, "loss": 1.1688, "step": 23318 }, { "epoch": 1.3899153653593992, "grad_norm": 3.155198812484741, "learning_rate": 2.165306557709824e-05, "loss": 1.2001, "step": 23320 }, { "epoch": 1.3900345690785554, "grad_norm": 2.993300199508667, "learning_rate": 2.16452758423434e-05, "loss": 1.1793, "step": 23322 }, { "epoch": 1.3901537727977114, "grad_norm": 3.03887677192688, "learning_rate": 2.1637487121934054e-05, "loss": 1.1959, "step": 23324 }, { "epoch": 1.3902729765168673, "grad_norm": 3.1006481647491455, "learning_rate": 2.162969941614883e-05, "loss": 1.2087, "step": 23326 }, { "epoch": 1.3903921802360233, "grad_norm": 3.1713876724243164, "learning_rate": 2.162191272526633e-05, "loss": 1.1859, "step": 23328 }, { "epoch": 1.3905113839551795, "grad_norm": 3.3226518630981445, "learning_rate": 2.161412704956511e-05, "loss": 1.153, "step": 23330 }, { "epoch": 1.3906305876743354, "grad_norm": 3.0865509510040283, "learning_rate": 2.160634238932368e-05, "loss": 1.2597, "step": 23332 }, { "epoch": 1.3907497913934914, "grad_norm": 3.6455721855163574, "learning_rate": 2.1598558744820518e-05, "loss": 1.2371, "step": 23334 }, { "epoch": 1.3908689951126476, "grad_norm": 3.2687785625457764, "learning_rate": 2.1590776116334115e-05, "loss": 1.1073, "step": 23336 }, { "epoch": 1.3909881988318036, "grad_norm": 3.111870527267456, "learning_rate": 2.1582994504142816e-05, "loss": 1.0391, "step": 23338 }, { "epoch": 1.3911074025509595, "grad_norm": 3.0246329307556152, "learning_rate": 2.1575213908525054e-05, "loss": 1.0546, "step": 23340 }, { "epoch": 1.3912266062701155, "grad_norm": 3.353451728820801, "learning_rate": 2.156743432975914e-05, "loss": 1.3651, "step": 23342 }, { "epoch": 1.3913458099892717, "grad_norm": 2.672490119934082, "learning_rate": 2.1559655768123388e-05, "loss": 1.1268, "step": 23344 }, { "epoch": 1.3914650137084277, "grad_norm": 3.1643357276916504, "learning_rate": 2.155187822389605e-05, "loss": 1.1357, "step": 23346 }, { "epoch": 1.3915842174275839, "grad_norm": 3.347956895828247, "learning_rate": 2.1544101697355363e-05, "loss": 1.1947, "step": 23348 }, { "epoch": 1.3917034211467398, "grad_norm": 3.0799083709716797, "learning_rate": 2.153632618877952e-05, "loss": 1.1968, "step": 23350 }, { "epoch": 1.3918226248658958, "grad_norm": 3.196112632751465, "learning_rate": 2.1528551698446673e-05, "loss": 1.2346, "step": 23352 }, { "epoch": 1.3919418285850518, "grad_norm": 3.4786953926086426, "learning_rate": 2.1520778226634953e-05, "loss": 1.0993, "step": 23354 }, { "epoch": 1.392061032304208, "grad_norm": 3.102783679962158, "learning_rate": 2.151300577362242e-05, "loss": 1.1055, "step": 23356 }, { "epoch": 1.392180236023364, "grad_norm": 2.905117988586426, "learning_rate": 2.1505234339687163e-05, "loss": 1.0835, "step": 23358 }, { "epoch": 1.39229943974252, "grad_norm": 3.466299533843994, "learning_rate": 2.1497463925107136e-05, "loss": 1.0635, "step": 23360 }, { "epoch": 1.392418643461676, "grad_norm": 3.5010457038879395, "learning_rate": 2.1489694530160363e-05, "loss": 1.1762, "step": 23362 }, { "epoch": 1.392537847180832, "grad_norm": 2.826883316040039, "learning_rate": 2.1481926155124787e-05, "loss": 1.1816, "step": 23364 }, { "epoch": 1.392657050899988, "grad_norm": 3.1681628227233887, "learning_rate": 2.1474158800278243e-05, "loss": 1.1808, "step": 23366 }, { "epoch": 1.392776254619144, "grad_norm": 3.0694825649261475, "learning_rate": 2.146639246589866e-05, "loss": 1.2128, "step": 23368 }, { "epoch": 1.3928954583383002, "grad_norm": 3.152637004852295, "learning_rate": 2.1458627152263844e-05, "loss": 1.1369, "step": 23370 }, { "epoch": 1.3930146620574562, "grad_norm": 3.3770344257354736, "learning_rate": 2.1450862859651588e-05, "loss": 1.153, "step": 23372 }, { "epoch": 1.3931338657766124, "grad_norm": 3.057598829269409, "learning_rate": 2.144309958833963e-05, "loss": 1.1566, "step": 23374 }, { "epoch": 1.3932530694957683, "grad_norm": 3.0295639038085938, "learning_rate": 2.1435337338605743e-05, "loss": 1.0767, "step": 23376 }, { "epoch": 1.3933722732149243, "grad_norm": 3.1209754943847656, "learning_rate": 2.1427576110727554e-05, "loss": 1.1608, "step": 23378 }, { "epoch": 1.3934914769340803, "grad_norm": 2.939558744430542, "learning_rate": 2.141981590498271e-05, "loss": 1.0545, "step": 23380 }, { "epoch": 1.3936106806532365, "grad_norm": 3.0081892013549805, "learning_rate": 2.141205672164887e-05, "loss": 1.1178, "step": 23382 }, { "epoch": 1.3937298843723924, "grad_norm": 3.148930788040161, "learning_rate": 2.140429856100354e-05, "loss": 1.2905, "step": 23384 }, { "epoch": 1.3938490880915484, "grad_norm": 3.1690523624420166, "learning_rate": 2.1396541423324305e-05, "loss": 1.1263, "step": 23386 }, { "epoch": 1.3939682918107046, "grad_norm": 3.1064701080322266, "learning_rate": 2.1388785308888653e-05, "loss": 1.0465, "step": 23388 }, { "epoch": 1.3940874955298606, "grad_norm": 3.281059980392456, "learning_rate": 2.1381030217974042e-05, "loss": 1.2932, "step": 23390 }, { "epoch": 1.3942066992490165, "grad_norm": 2.899080276489258, "learning_rate": 2.1373276150857895e-05, "loss": 1.1065, "step": 23392 }, { "epoch": 1.3943259029681725, "grad_norm": 3.446307897567749, "learning_rate": 2.1365523107817608e-05, "loss": 1.1789, "step": 23394 }, { "epoch": 1.3944451066873287, "grad_norm": 3.3382418155670166, "learning_rate": 2.135777108913053e-05, "loss": 1.0732, "step": 23396 }, { "epoch": 1.3945643104064847, "grad_norm": 3.250370979309082, "learning_rate": 2.1350020095073965e-05, "loss": 1.086, "step": 23398 }, { "epoch": 1.3946835141256408, "grad_norm": 3.2607553005218506, "learning_rate": 2.1342270125925236e-05, "loss": 1.1209, "step": 23400 }, { "epoch": 1.3948027178447968, "grad_norm": 3.1032254695892334, "learning_rate": 2.133452118196152e-05, "loss": 1.0187, "step": 23402 }, { "epoch": 1.3949219215639528, "grad_norm": 2.745380163192749, "learning_rate": 2.132677326346008e-05, "loss": 1.0717, "step": 23404 }, { "epoch": 1.3950411252831088, "grad_norm": 3.12056565284729, "learning_rate": 2.1319026370698065e-05, "loss": 1.0605, "step": 23406 }, { "epoch": 1.395160329002265, "grad_norm": 3.4501261711120605, "learning_rate": 2.131128050395261e-05, "loss": 1.1646, "step": 23408 }, { "epoch": 1.395279532721421, "grad_norm": 3.070753574371338, "learning_rate": 2.1303535663500805e-05, "loss": 1.0829, "step": 23410 }, { "epoch": 1.3953987364405769, "grad_norm": 3.4439032077789307, "learning_rate": 2.129579184961971e-05, "loss": 1.1525, "step": 23412 }, { "epoch": 1.395517940159733, "grad_norm": 3.1028754711151123, "learning_rate": 2.1288049062586357e-05, "loss": 1.1106, "step": 23414 }, { "epoch": 1.395637143878889, "grad_norm": 2.7937304973602295, "learning_rate": 2.1280307302677726e-05, "loss": 1.0409, "step": 23416 }, { "epoch": 1.395756347598045, "grad_norm": 3.3621346950531006, "learning_rate": 2.127256657017076e-05, "loss": 1.105, "step": 23418 }, { "epoch": 1.395875551317201, "grad_norm": 3.291642665863037, "learning_rate": 2.126482686534237e-05, "loss": 1.2511, "step": 23420 }, { "epoch": 1.3959947550363572, "grad_norm": 3.2775819301605225, "learning_rate": 2.1257088188469466e-05, "loss": 1.0919, "step": 23422 }, { "epoch": 1.3961139587555131, "grad_norm": 2.977208375930786, "learning_rate": 2.1249350539828843e-05, "loss": 1.0895, "step": 23424 }, { "epoch": 1.3962331624746693, "grad_norm": 3.055870532989502, "learning_rate": 2.1241613919697307e-05, "loss": 1.0397, "step": 23426 }, { "epoch": 1.3963523661938253, "grad_norm": 3.161024808883667, "learning_rate": 2.1233878328351668e-05, "loss": 1.1691, "step": 23428 }, { "epoch": 1.3964715699129813, "grad_norm": 3.413445472717285, "learning_rate": 2.122614376606859e-05, "loss": 1.1627, "step": 23430 }, { "epoch": 1.3965907736321372, "grad_norm": 2.9620516300201416, "learning_rate": 2.121841023312482e-05, "loss": 1.0505, "step": 23432 }, { "epoch": 1.3967099773512934, "grad_norm": 3.557490587234497, "learning_rate": 2.1210677729796984e-05, "loss": 1.2472, "step": 23434 }, { "epoch": 1.3968291810704494, "grad_norm": 3.608347177505493, "learning_rate": 2.120294625636171e-05, "loss": 1.1838, "step": 23436 }, { "epoch": 1.3969483847896054, "grad_norm": 3.1131997108459473, "learning_rate": 2.119521581309558e-05, "loss": 1.1507, "step": 23438 }, { "epoch": 1.3970675885087616, "grad_norm": 3.3945047855377197, "learning_rate": 2.118748640027513e-05, "loss": 1.1943, "step": 23440 }, { "epoch": 1.3971867922279175, "grad_norm": 2.8401477336883545, "learning_rate": 2.1179758018176875e-05, "loss": 0.9699, "step": 23442 }, { "epoch": 1.3973059959470735, "grad_norm": 3.205261707305908, "learning_rate": 2.1172030667077262e-05, "loss": 1.1728, "step": 23444 }, { "epoch": 1.3974251996662295, "grad_norm": 2.775994062423706, "learning_rate": 2.116430434725279e-05, "loss": 1.0181, "step": 23446 }, { "epoch": 1.3975444033853857, "grad_norm": 3.04939603805542, "learning_rate": 2.115657905897977e-05, "loss": 1.1493, "step": 23448 }, { "epoch": 1.3976636071045416, "grad_norm": 2.809096097946167, "learning_rate": 2.1148854802534618e-05, "loss": 1.0043, "step": 23450 }, { "epoch": 1.3977828108236978, "grad_norm": 3.4141645431518555, "learning_rate": 2.114113157819367e-05, "loss": 1.0694, "step": 23452 }, { "epoch": 1.3979020145428538, "grad_norm": 3.500737428665161, "learning_rate": 2.1133409386233137e-05, "loss": 1.1455, "step": 23454 }, { "epoch": 1.3980212182620098, "grad_norm": 3.127333164215088, "learning_rate": 2.112568822692934e-05, "loss": 1.1634, "step": 23456 }, { "epoch": 1.3981404219811657, "grad_norm": 3.4970297813415527, "learning_rate": 2.1117968100558473e-05, "loss": 1.1819, "step": 23458 }, { "epoch": 1.398259625700322, "grad_norm": 3.188183069229126, "learning_rate": 2.11102490073967e-05, "loss": 1.2613, "step": 23460 }, { "epoch": 1.3983788294194779, "grad_norm": 3.5299198627471924, "learning_rate": 2.1102530947720166e-05, "loss": 1.1257, "step": 23462 }, { "epoch": 1.3984980331386339, "grad_norm": 3.2880778312683105, "learning_rate": 2.1094813921804973e-05, "loss": 1.2342, "step": 23464 }, { "epoch": 1.39861723685779, "grad_norm": 3.2701480388641357, "learning_rate": 2.1087097929927164e-05, "loss": 1.092, "step": 23466 }, { "epoch": 1.398736440576946, "grad_norm": 3.3675155639648438, "learning_rate": 2.1079382972362806e-05, "loss": 1.1572, "step": 23468 }, { "epoch": 1.398855644296102, "grad_norm": 3.4603896141052246, "learning_rate": 2.1071669049387888e-05, "loss": 1.2329, "step": 23470 }, { "epoch": 1.398974848015258, "grad_norm": 3.4706597328186035, "learning_rate": 2.106395616127831e-05, "loss": 1.1559, "step": 23472 }, { "epoch": 1.3990940517344141, "grad_norm": 3.3202710151672363, "learning_rate": 2.1056244308310042e-05, "loss": 1.1485, "step": 23474 }, { "epoch": 1.3992132554535701, "grad_norm": 3.3645589351654053, "learning_rate": 2.1048533490758944e-05, "loss": 1.1385, "step": 23476 }, { "epoch": 1.3993324591727263, "grad_norm": 3.3563895225524902, "learning_rate": 2.1040823708900854e-05, "loss": 1.2336, "step": 23478 }, { "epoch": 1.3994516628918823, "grad_norm": 2.631439685821533, "learning_rate": 2.103311496301159e-05, "loss": 1.0645, "step": 23480 }, { "epoch": 1.3995708666110382, "grad_norm": 3.3240883350372314, "learning_rate": 2.1025407253366907e-05, "loss": 1.162, "step": 23482 }, { "epoch": 1.3996900703301942, "grad_norm": 3.402008533477783, "learning_rate": 2.101770058024255e-05, "loss": 1.2172, "step": 23484 }, { "epoch": 1.3998092740493504, "grad_norm": 3.102379322052002, "learning_rate": 2.10099949439142e-05, "loss": 1.0412, "step": 23486 }, { "epoch": 1.3999284777685064, "grad_norm": 3.153730630874634, "learning_rate": 2.1002290344657523e-05, "loss": 1.0861, "step": 23488 }, { "epoch": 1.4000476814876623, "grad_norm": 2.8870818614959717, "learning_rate": 2.099458678274811e-05, "loss": 1.0657, "step": 23490 }, { "epoch": 1.4001668852068185, "grad_norm": 2.8505818843841553, "learning_rate": 2.0986884258461608e-05, "loss": 1.2254, "step": 23492 }, { "epoch": 1.4002860889259745, "grad_norm": 3.0505027770996094, "learning_rate": 2.097918277207349e-05, "loss": 1.0343, "step": 23494 }, { "epoch": 1.4004052926451305, "grad_norm": 2.580644130706787, "learning_rate": 2.0971482323859315e-05, "loss": 1.2926, "step": 23496 }, { "epoch": 1.4005244963642864, "grad_norm": 3.1409428119659424, "learning_rate": 2.0963782914094553e-05, "loss": 1.1339, "step": 23498 }, { "epoch": 1.4006437000834426, "grad_norm": 3.157839298248291, "learning_rate": 2.0956084543054588e-05, "loss": 1.1971, "step": 23500 }, { "epoch": 1.4007629038025986, "grad_norm": 3.2276229858398438, "learning_rate": 2.094838721101487e-05, "loss": 1.2536, "step": 23502 }, { "epoch": 1.4008821075217548, "grad_norm": 3.4279816150665283, "learning_rate": 2.094069091825074e-05, "loss": 1.1911, "step": 23504 }, { "epoch": 1.4010013112409108, "grad_norm": 3.0741546154022217, "learning_rate": 2.0932995665037518e-05, "loss": 1.1004, "step": 23506 }, { "epoch": 1.4011205149600667, "grad_norm": 3.1451447010040283, "learning_rate": 2.092530145165048e-05, "loss": 1.1389, "step": 23508 }, { "epoch": 1.4012397186792227, "grad_norm": 3.237274646759033, "learning_rate": 2.091760827836492e-05, "loss": 1.2121, "step": 23510 }, { "epoch": 1.401358922398379, "grad_norm": 3.196333885192871, "learning_rate": 2.0909916145456e-05, "loss": 1.1163, "step": 23512 }, { "epoch": 1.4014781261175349, "grad_norm": 2.7598400115966797, "learning_rate": 2.0902225053198886e-05, "loss": 1.0622, "step": 23514 }, { "epoch": 1.4015973298366908, "grad_norm": 3.2228474617004395, "learning_rate": 2.0894535001868782e-05, "loss": 1.0823, "step": 23516 }, { "epoch": 1.401716533555847, "grad_norm": 3.3390181064605713, "learning_rate": 2.0886845991740704e-05, "loss": 1.1328, "step": 23518 }, { "epoch": 1.401835737275003, "grad_norm": 3.7349021434783936, "learning_rate": 2.087915802308978e-05, "loss": 1.2816, "step": 23520 }, { "epoch": 1.401954940994159, "grad_norm": 3.18251895904541, "learning_rate": 2.0871471096191002e-05, "loss": 1.0532, "step": 23522 }, { "epoch": 1.402074144713315, "grad_norm": 3.122511625289917, "learning_rate": 2.086378521131937e-05, "loss": 1.0579, "step": 23524 }, { "epoch": 1.4021933484324711, "grad_norm": 2.772904872894287, "learning_rate": 2.0856100368749825e-05, "loss": 0.9423, "step": 23526 }, { "epoch": 1.402312552151627, "grad_norm": 3.725087881088257, "learning_rate": 2.0848416568757283e-05, "loss": 1.16, "step": 23528 }, { "epoch": 1.4024317558707833, "grad_norm": 3.0964720249176025, "learning_rate": 2.0840733811616625e-05, "loss": 1.1704, "step": 23530 }, { "epoch": 1.4025509595899393, "grad_norm": 3.2417914867401123, "learning_rate": 2.083305209760267e-05, "loss": 1.09, "step": 23532 }, { "epoch": 1.4026701633090952, "grad_norm": 3.353034734725952, "learning_rate": 2.082537142699027e-05, "loss": 1.1715, "step": 23534 }, { "epoch": 1.4027893670282512, "grad_norm": 3.220659017562866, "learning_rate": 2.081769180005412e-05, "loss": 1.116, "step": 23536 }, { "epoch": 1.4029085707474074, "grad_norm": 2.966850996017456, "learning_rate": 2.0810013217068992e-05, "loss": 1.0875, "step": 23538 }, { "epoch": 1.4030277744665633, "grad_norm": 3.1421313285827637, "learning_rate": 2.0802335678309566e-05, "loss": 1.1811, "step": 23540 }, { "epoch": 1.4031469781857193, "grad_norm": 3.223553419113159, "learning_rate": 2.0794659184050492e-05, "loss": 1.2062, "step": 23542 }, { "epoch": 1.4032661819048755, "grad_norm": 3.162241220474243, "learning_rate": 2.0786983734566385e-05, "loss": 1.1593, "step": 23544 }, { "epoch": 1.4033853856240315, "grad_norm": 3.0110995769500732, "learning_rate": 2.077930933013182e-05, "loss": 1.1472, "step": 23546 }, { "epoch": 1.4035045893431874, "grad_norm": 3.4071826934814453, "learning_rate": 2.0771635971021337e-05, "loss": 1.2579, "step": 23548 }, { "epoch": 1.4036237930623434, "grad_norm": 2.935516595840454, "learning_rate": 2.076396365750944e-05, "loss": 1.0989, "step": 23550 }, { "epoch": 1.4037429967814996, "grad_norm": 3.0947842597961426, "learning_rate": 2.0756292389870595e-05, "loss": 1.1653, "step": 23552 }, { "epoch": 1.4038622005006556, "grad_norm": 3.0351920127868652, "learning_rate": 2.0748622168379206e-05, "loss": 1.0965, "step": 23554 }, { "epoch": 1.4039814042198118, "grad_norm": 3.428417444229126, "learning_rate": 2.0740952993309725e-05, "loss": 1.2405, "step": 23556 }, { "epoch": 1.4041006079389677, "grad_norm": 2.805337429046631, "learning_rate": 2.0733284864936442e-05, "loss": 1.2191, "step": 23558 }, { "epoch": 1.4042198116581237, "grad_norm": 2.8782191276550293, "learning_rate": 2.072561778353368e-05, "loss": 0.9928, "step": 23560 }, { "epoch": 1.4043390153772797, "grad_norm": 3.0937108993530273, "learning_rate": 2.0717951749375764e-05, "loss": 1.2552, "step": 23562 }, { "epoch": 1.4044582190964359, "grad_norm": 3.1439874172210693, "learning_rate": 2.0710286762736864e-05, "loss": 1.0742, "step": 23564 }, { "epoch": 1.4045774228155918, "grad_norm": 3.3175277709960938, "learning_rate": 2.0702622823891242e-05, "loss": 1.326, "step": 23566 }, { "epoch": 1.4046966265347478, "grad_norm": 3.2591140270233154, "learning_rate": 2.069495993311304e-05, "loss": 1.0372, "step": 23568 }, { "epoch": 1.404815830253904, "grad_norm": 3.363832473754883, "learning_rate": 2.0687298090676387e-05, "loss": 1.202, "step": 23570 }, { "epoch": 1.40493503397306, "grad_norm": 3.1127212047576904, "learning_rate": 2.0679637296855377e-05, "loss": 1.2439, "step": 23572 }, { "epoch": 1.405054237692216, "grad_norm": 3.3012101650238037, "learning_rate": 2.0671977551924055e-05, "loss": 1.077, "step": 23574 }, { "epoch": 1.405173441411372, "grad_norm": 3.094167470932007, "learning_rate": 2.0664318856156438e-05, "loss": 1.0518, "step": 23576 }, { "epoch": 1.405292645130528, "grad_norm": 3.005124092102051, "learning_rate": 2.0656661209826495e-05, "loss": 1.1411, "step": 23578 }, { "epoch": 1.405411848849684, "grad_norm": 3.36907696723938, "learning_rate": 2.064900461320821e-05, "loss": 1.1176, "step": 23580 }, { "epoch": 1.4055310525688403, "grad_norm": 3.2057442665100098, "learning_rate": 2.0641349066575423e-05, "loss": 1.1423, "step": 23582 }, { "epoch": 1.4056502562879962, "grad_norm": 3.3160929679870605, "learning_rate": 2.063369457020204e-05, "loss": 1.1555, "step": 23584 }, { "epoch": 1.4057694600071522, "grad_norm": 3.3757059574127197, "learning_rate": 2.0626041124361883e-05, "loss": 1.254, "step": 23586 }, { "epoch": 1.4058886637263082, "grad_norm": 3.1526832580566406, "learning_rate": 2.061838872932873e-05, "loss": 1.0416, "step": 23588 }, { "epoch": 1.4060078674454644, "grad_norm": 3.1775131225585938, "learning_rate": 2.061073738537635e-05, "loss": 1.1983, "step": 23590 }, { "epoch": 1.4061270711646203, "grad_norm": 3.1479525566101074, "learning_rate": 2.060308709277844e-05, "loss": 1.1555, "step": 23592 }, { "epoch": 1.4062462748837763, "grad_norm": 3.476522922515869, "learning_rate": 2.0595437851808692e-05, "loss": 1.1816, "step": 23594 }, { "epoch": 1.4063654786029325, "grad_norm": 3.2431676387786865, "learning_rate": 2.0587789662740716e-05, "loss": 1.0281, "step": 23596 }, { "epoch": 1.4064846823220885, "grad_norm": 3.228755235671997, "learning_rate": 2.0580142525848172e-05, "loss": 1.0458, "step": 23598 }, { "epoch": 1.4066038860412444, "grad_norm": 2.7304961681365967, "learning_rate": 2.0572496441404555e-05, "loss": 1.034, "step": 23600 }, { "epoch": 1.4067230897604004, "grad_norm": 3.0170955657958984, "learning_rate": 2.0564851409683446e-05, "loss": 1.1751, "step": 23602 }, { "epoch": 1.4068422934795566, "grad_norm": 3.0867857933044434, "learning_rate": 2.0557207430958325e-05, "loss": 1.0635, "step": 23604 }, { "epoch": 1.4069614971987126, "grad_norm": 3.359954833984375, "learning_rate": 2.05495645055026e-05, "loss": 1.1348, "step": 23606 }, { "epoch": 1.4070807009178687, "grad_norm": 3.2071104049682617, "learning_rate": 2.0541922633589732e-05, "loss": 1.1356, "step": 23608 }, { "epoch": 1.4071999046370247, "grad_norm": 3.222545623779297, "learning_rate": 2.0534281815493078e-05, "loss": 1.2151, "step": 23610 }, { "epoch": 1.4073191083561807, "grad_norm": 3.2252562046051025, "learning_rate": 2.0526642051485982e-05, "loss": 1.0162, "step": 23612 }, { "epoch": 1.4074383120753367, "grad_norm": 3.422990560531616, "learning_rate": 2.0519003341841748e-05, "loss": 1.2211, "step": 23614 }, { "epoch": 1.4075575157944928, "grad_norm": 2.900330066680908, "learning_rate": 2.051136568683363e-05, "loss": 0.9927, "step": 23616 }, { "epoch": 1.4076767195136488, "grad_norm": 3.0175352096557617, "learning_rate": 2.0503729086734853e-05, "loss": 1.265, "step": 23618 }, { "epoch": 1.407795923232805, "grad_norm": 3.395827531814575, "learning_rate": 2.0496093541818607e-05, "loss": 1.1758, "step": 23620 }, { "epoch": 1.407915126951961, "grad_norm": 3.224926710128784, "learning_rate": 2.048845905235804e-05, "loss": 1.1238, "step": 23622 }, { "epoch": 1.408034330671117, "grad_norm": 3.193485736846924, "learning_rate": 2.0480825618626248e-05, "loss": 1.165, "step": 23624 }, { "epoch": 1.408153534390273, "grad_norm": 3.486690044403076, "learning_rate": 2.047319324089636e-05, "loss": 1.1687, "step": 23626 }, { "epoch": 1.4082727381094289, "grad_norm": 3.4437572956085205, "learning_rate": 2.0465561919441338e-05, "loss": 1.1756, "step": 23628 }, { "epoch": 1.408391941828585, "grad_norm": 3.3071048259735107, "learning_rate": 2.0457931654534235e-05, "loss": 1.2522, "step": 23630 }, { "epoch": 1.408511145547741, "grad_norm": 2.878812074661255, "learning_rate": 2.0450302446447995e-05, "loss": 1.0017, "step": 23632 }, { "epoch": 1.4086303492668972, "grad_norm": 3.4421327114105225, "learning_rate": 2.044267429545554e-05, "loss": 1.1412, "step": 23634 }, { "epoch": 1.4087495529860532, "grad_norm": 3.4012093544006348, "learning_rate": 2.0435047201829756e-05, "loss": 1.1798, "step": 23636 }, { "epoch": 1.4088687567052092, "grad_norm": 3.398836612701416, "learning_rate": 2.0427421165843485e-05, "loss": 1.3114, "step": 23638 }, { "epoch": 1.4089879604243651, "grad_norm": 3.2555811405181885, "learning_rate": 2.041979618776954e-05, "loss": 1.2215, "step": 23640 }, { "epoch": 1.4091071641435213, "grad_norm": 3.279481887817383, "learning_rate": 2.041217226788068e-05, "loss": 1.1903, "step": 23642 }, { "epoch": 1.4092263678626773, "grad_norm": 3.6150028705596924, "learning_rate": 2.0404549406449686e-05, "loss": 1.0599, "step": 23644 }, { "epoch": 1.4093455715818335, "grad_norm": 3.259592294692993, "learning_rate": 2.0396927603749176e-05, "loss": 1.2549, "step": 23646 }, { "epoch": 1.4094647753009895, "grad_norm": 3.111865758895874, "learning_rate": 2.0389306860051882e-05, "loss": 1.2481, "step": 23648 }, { "epoch": 1.4095839790201454, "grad_norm": 3.1388344764709473, "learning_rate": 2.0381687175630404e-05, "loss": 1.0975, "step": 23650 }, { "epoch": 1.4097031827393014, "grad_norm": 2.988983631134033, "learning_rate": 2.037406855075728e-05, "loss": 1.2639, "step": 23652 }, { "epoch": 1.4098223864584574, "grad_norm": 3.3206264972686768, "learning_rate": 2.0366450985705104e-05, "loss": 1.1318, "step": 23654 }, { "epoch": 1.4099415901776136, "grad_norm": 3.0561304092407227, "learning_rate": 2.0358834480746365e-05, "loss": 1.1064, "step": 23656 }, { "epoch": 1.4100607938967695, "grad_norm": 3.1263809204101562, "learning_rate": 2.0351219036153536e-05, "loss": 1.101, "step": 23658 }, { "epoch": 1.4101799976159257, "grad_norm": 3.31127667427063, "learning_rate": 2.0343604652199046e-05, "loss": 1.1318, "step": 23660 }, { "epoch": 1.4102992013350817, "grad_norm": 3.195420503616333, "learning_rate": 2.033599132915528e-05, "loss": 1.0692, "step": 23662 }, { "epoch": 1.4104184050542377, "grad_norm": 3.085244655609131, "learning_rate": 2.0328379067294602e-05, "loss": 0.9619, "step": 23664 }, { "epoch": 1.4105376087733936, "grad_norm": 3.0912468433380127, "learning_rate": 2.0320767866889307e-05, "loss": 1.0368, "step": 23666 }, { "epoch": 1.4106568124925498, "grad_norm": 2.6510658264160156, "learning_rate": 2.0313157728211727e-05, "loss": 1.114, "step": 23668 }, { "epoch": 1.4107760162117058, "grad_norm": 3.2342095375061035, "learning_rate": 2.0305548651534035e-05, "loss": 1.0828, "step": 23670 }, { "epoch": 1.410895219930862, "grad_norm": 3.3452205657958984, "learning_rate": 2.0297940637128476e-05, "loss": 1.1112, "step": 23672 }, { "epoch": 1.411014423650018, "grad_norm": 3.1146185398101807, "learning_rate": 2.029033368526721e-05, "loss": 1.1592, "step": 23674 }, { "epoch": 1.411133627369174, "grad_norm": 3.057716131210327, "learning_rate": 2.028272779622236e-05, "loss": 1.1424, "step": 23676 }, { "epoch": 1.4112528310883299, "grad_norm": 3.2641425132751465, "learning_rate": 2.0275122970266014e-05, "loss": 1.0619, "step": 23678 }, { "epoch": 1.411372034807486, "grad_norm": 3.1831538677215576, "learning_rate": 2.0267519207670223e-05, "loss": 1.1616, "step": 23680 }, { "epoch": 1.411491238526642, "grad_norm": 3.4318935871124268, "learning_rate": 2.0259916508706988e-05, "loss": 1.1264, "step": 23682 }, { "epoch": 1.411610442245798, "grad_norm": 3.573258876800537, "learning_rate": 2.0252314873648302e-05, "loss": 1.1728, "step": 23684 }, { "epoch": 1.4117296459649542, "grad_norm": 3.273097515106201, "learning_rate": 2.0244714302766094e-05, "loss": 1.1797, "step": 23686 }, { "epoch": 1.4118488496841102, "grad_norm": 3.1593246459960938, "learning_rate": 2.0237114796332247e-05, "loss": 1.0382, "step": 23688 }, { "epoch": 1.4119680534032661, "grad_norm": 3.5293939113616943, "learning_rate": 2.0229516354618665e-05, "loss": 1.179, "step": 23690 }, { "epoch": 1.4120872571224221, "grad_norm": 3.2140257358551025, "learning_rate": 2.0221918977897108e-05, "loss": 1.018, "step": 23692 }, { "epoch": 1.4122064608415783, "grad_norm": 3.299773931503296, "learning_rate": 2.0214322666439418e-05, "loss": 1.2111, "step": 23694 }, { "epoch": 1.4123256645607343, "grad_norm": 3.582674741744995, "learning_rate": 2.020672742051733e-05, "loss": 1.1229, "step": 23696 }, { "epoch": 1.4124448682798905, "grad_norm": 3.2045562267303467, "learning_rate": 2.01991332404025e-05, "loss": 1.128, "step": 23698 }, { "epoch": 1.4125640719990464, "grad_norm": 2.9768073558807373, "learning_rate": 2.019154012636666e-05, "loss": 1.2048, "step": 23700 }, { "epoch": 1.4126832757182024, "grad_norm": 3.630784511566162, "learning_rate": 2.0183948078681415e-05, "loss": 1.0905, "step": 23702 }, { "epoch": 1.4128024794373584, "grad_norm": 2.943227529525757, "learning_rate": 2.017635709761836e-05, "loss": 1.0761, "step": 23704 }, { "epoch": 1.4129216831565146, "grad_norm": 3.6806981563568115, "learning_rate": 2.0168767183449032e-05, "loss": 1.1995, "step": 23706 }, { "epoch": 1.4130408868756705, "grad_norm": 3.4028360843658447, "learning_rate": 2.0161178336445013e-05, "loss": 1.2077, "step": 23708 }, { "epoch": 1.4131600905948265, "grad_norm": 3.2553107738494873, "learning_rate": 2.015359055687771e-05, "loss": 1.2032, "step": 23710 }, { "epoch": 1.4132792943139827, "grad_norm": 3.1668248176574707, "learning_rate": 2.014600384501858e-05, "loss": 1.1422, "step": 23712 }, { "epoch": 1.4133984980331387, "grad_norm": 3.1748251914978027, "learning_rate": 2.013841820113907e-05, "loss": 1.118, "step": 23714 }, { "epoch": 1.4135177017522946, "grad_norm": 3.1415796279907227, "learning_rate": 2.0130833625510474e-05, "loss": 1.0536, "step": 23716 }, { "epoch": 1.4136369054714506, "grad_norm": 3.1665375232696533, "learning_rate": 2.012325011840418e-05, "loss": 1.198, "step": 23718 }, { "epoch": 1.4137561091906068, "grad_norm": 2.9214441776275635, "learning_rate": 2.0115667680091445e-05, "loss": 1.1347, "step": 23720 }, { "epoch": 1.4138753129097628, "grad_norm": 3.4792816638946533, "learning_rate": 2.0108086310843527e-05, "loss": 1.06, "step": 23722 }, { "epoch": 1.413994516628919, "grad_norm": 3.3652045726776123, "learning_rate": 2.0100506010931636e-05, "loss": 1.1843, "step": 23724 }, { "epoch": 1.414113720348075, "grad_norm": 3.45684552192688, "learning_rate": 2.0092926780626946e-05, "loss": 1.1651, "step": 23726 }, { "epoch": 1.414232924067231, "grad_norm": 3.2112677097320557, "learning_rate": 2.008534862020059e-05, "loss": 1.1301, "step": 23728 }, { "epoch": 1.4143521277863869, "grad_norm": 2.729513168334961, "learning_rate": 2.007777152992365e-05, "loss": 1.0257, "step": 23730 }, { "epoch": 1.414471331505543, "grad_norm": 3.4258954524993896, "learning_rate": 2.007019551006723e-05, "loss": 1.1102, "step": 23732 }, { "epoch": 1.414590535224699, "grad_norm": 2.968120574951172, "learning_rate": 2.006262056090229e-05, "loss": 1.0034, "step": 23734 }, { "epoch": 1.414709738943855, "grad_norm": 2.928940773010254, "learning_rate": 2.0055046682699858e-05, "loss": 1.1426, "step": 23736 }, { "epoch": 1.4148289426630112, "grad_norm": 3.9069652557373047, "learning_rate": 2.004747387573088e-05, "loss": 1.191, "step": 23738 }, { "epoch": 1.4149481463821671, "grad_norm": 3.263450860977173, "learning_rate": 2.003990214026621e-05, "loss": 1.1059, "step": 23740 }, { "epoch": 1.4150673501013231, "grad_norm": 3.4161489009857178, "learning_rate": 2.003233147657676e-05, "loss": 1.1889, "step": 23742 }, { "epoch": 1.415186553820479, "grad_norm": 3.28498911857605, "learning_rate": 2.0024761884933347e-05, "loss": 1.2794, "step": 23744 }, { "epoch": 1.4153057575396353, "grad_norm": 3.302121162414551, "learning_rate": 2.0017193365606762e-05, "loss": 1.1299, "step": 23746 }, { "epoch": 1.4154249612587912, "grad_norm": 3.3390228748321533, "learning_rate": 2.000962591886776e-05, "loss": 1.1472, "step": 23748 }, { "epoch": 1.4155441649779474, "grad_norm": 2.8668622970581055, "learning_rate": 2.0002059544987047e-05, "loss": 1.1591, "step": 23750 }, { "epoch": 1.4156633686971034, "grad_norm": 3.19535493850708, "learning_rate": 1.999449424423528e-05, "loss": 1.2434, "step": 23752 }, { "epoch": 1.4157825724162594, "grad_norm": 3.2890405654907227, "learning_rate": 1.9986930016883154e-05, "loss": 1.1466, "step": 23754 }, { "epoch": 1.4159017761354153, "grad_norm": 3.4345250129699707, "learning_rate": 1.997936686320121e-05, "loss": 1.1678, "step": 23756 }, { "epoch": 1.4160209798545715, "grad_norm": 3.293358087539673, "learning_rate": 1.9971804783460013e-05, "loss": 1.1808, "step": 23758 }, { "epoch": 1.4161401835737275, "grad_norm": 3.10137677192688, "learning_rate": 1.9964243777930135e-05, "loss": 1.1893, "step": 23760 }, { "epoch": 1.4162593872928835, "grad_norm": 3.37878680229187, "learning_rate": 1.9956683846881984e-05, "loss": 1.2262, "step": 23762 }, { "epoch": 1.4163785910120397, "grad_norm": 3.182863235473633, "learning_rate": 1.9949124990586066e-05, "loss": 0.9646, "step": 23764 }, { "epoch": 1.4164977947311956, "grad_norm": 3.301755905151367, "learning_rate": 1.9941567209312767e-05, "loss": 1.0499, "step": 23766 }, { "epoch": 1.4166169984503516, "grad_norm": 3.369375467300415, "learning_rate": 1.9934010503332446e-05, "loss": 1.207, "step": 23768 }, { "epoch": 1.4167362021695076, "grad_norm": 3.334930419921875, "learning_rate": 1.9926454872915446e-05, "loss": 1.0684, "step": 23770 }, { "epoch": 1.4168554058886638, "grad_norm": 3.041339635848999, "learning_rate": 1.9918900318332044e-05, "loss": 1.1207, "step": 23772 }, { "epoch": 1.4169746096078197, "grad_norm": 3.454810857772827, "learning_rate": 1.9911346839852503e-05, "loss": 1.1084, "step": 23774 }, { "epoch": 1.417093813326976, "grad_norm": 3.017397403717041, "learning_rate": 1.9903794437747014e-05, "loss": 1.3156, "step": 23776 }, { "epoch": 1.417213017046132, "grad_norm": 3.2344014644622803, "learning_rate": 1.9896243112285807e-05, "loss": 1.099, "step": 23778 }, { "epoch": 1.4173322207652879, "grad_norm": 3.1525213718414307, "learning_rate": 1.9888692863738944e-05, "loss": 1.299, "step": 23780 }, { "epoch": 1.4174514244844438, "grad_norm": 3.0830838680267334, "learning_rate": 1.9881143692376586e-05, "loss": 1.1849, "step": 23782 }, { "epoch": 1.4175706282036, "grad_norm": 3.38771653175354, "learning_rate": 1.987359559846878e-05, "loss": 1.2482, "step": 23784 }, { "epoch": 1.417689831922756, "grad_norm": 3.050875663757324, "learning_rate": 1.9866048582285505e-05, "loss": 1.2386, "step": 23786 }, { "epoch": 1.417809035641912, "grad_norm": 3.0085160732269287, "learning_rate": 1.9858502644096794e-05, "loss": 1.3028, "step": 23788 }, { "epoch": 1.4179282393610682, "grad_norm": 3.3115711212158203, "learning_rate": 1.9850957784172565e-05, "loss": 1.0382, "step": 23790 }, { "epoch": 1.4180474430802241, "grad_norm": 3.5312564373016357, "learning_rate": 1.9843414002782736e-05, "loss": 1.1144, "step": 23792 }, { "epoch": 1.41816664679938, "grad_norm": 3.5399739742279053, "learning_rate": 1.9835871300197145e-05, "loss": 1.2204, "step": 23794 }, { "epoch": 1.418285850518536, "grad_norm": 3.276090621948242, "learning_rate": 1.982832967668568e-05, "loss": 1.0759, "step": 23796 }, { "epoch": 1.4184050542376923, "grad_norm": 3.093083143234253, "learning_rate": 1.9820789132518076e-05, "loss": 1.089, "step": 23798 }, { "epoch": 1.4185242579568482, "grad_norm": 3.123539447784424, "learning_rate": 1.9813249667964083e-05, "loss": 1.1591, "step": 23800 }, { "epoch": 1.4186434616760044, "grad_norm": 3.337310791015625, "learning_rate": 1.9805711283293472e-05, "loss": 1.1401, "step": 23802 }, { "epoch": 1.4187626653951604, "grad_norm": 3.171119451522827, "learning_rate": 1.9798173978775836e-05, "loss": 1.2127, "step": 23804 }, { "epoch": 1.4188818691143164, "grad_norm": 3.2776999473571777, "learning_rate": 1.9790637754680875e-05, "loss": 1.1549, "step": 23806 }, { "epoch": 1.4190010728334723, "grad_norm": 3.1552369594573975, "learning_rate": 1.9783102611278158e-05, "loss": 1.1545, "step": 23808 }, { "epoch": 1.4191202765526285, "grad_norm": 3.236600160598755, "learning_rate": 1.9775568548837248e-05, "loss": 1.146, "step": 23810 }, { "epoch": 1.4192394802717845, "grad_norm": 3.3276731967926025, "learning_rate": 1.9768035567627658e-05, "loss": 1.076, "step": 23812 }, { "epoch": 1.4193586839909405, "grad_norm": 3.2730112075805664, "learning_rate": 1.976050366791888e-05, "loss": 1.1369, "step": 23814 }, { "epoch": 1.4194778877100966, "grad_norm": 2.9855735301971436, "learning_rate": 1.9752972849980343e-05, "loss": 0.9469, "step": 23816 }, { "epoch": 1.4195970914292526, "grad_norm": 2.8504433631896973, "learning_rate": 1.9745443114081464e-05, "loss": 1.2314, "step": 23818 }, { "epoch": 1.4197162951484086, "grad_norm": 3.276034355163574, "learning_rate": 1.973791446049159e-05, "loss": 1.1856, "step": 23820 }, { "epoch": 1.4198354988675645, "grad_norm": 3.1248340606689453, "learning_rate": 1.9730386889480047e-05, "loss": 0.991, "step": 23822 }, { "epoch": 1.4199547025867207, "grad_norm": 3.552736759185791, "learning_rate": 1.972286040131616e-05, "loss": 1.2548, "step": 23824 }, { "epoch": 1.4200739063058767, "grad_norm": 3.0349156856536865, "learning_rate": 1.9715334996269118e-05, "loss": 1.0709, "step": 23826 }, { "epoch": 1.420193110025033, "grad_norm": 3.1113173961639404, "learning_rate": 1.9707810674608178e-05, "loss": 1.2572, "step": 23828 }, { "epoch": 1.4203123137441889, "grad_norm": 2.8039350509643555, "learning_rate": 1.970028743660251e-05, "loss": 1.0532, "step": 23830 }, { "epoch": 1.4204315174633448, "grad_norm": 3.208287239074707, "learning_rate": 1.96927652825212e-05, "loss": 1.1845, "step": 23832 }, { "epoch": 1.4205507211825008, "grad_norm": 3.644259214401245, "learning_rate": 1.968524421263338e-05, "loss": 1.2908, "step": 23834 }, { "epoch": 1.420669924901657, "grad_norm": 3.0191071033477783, "learning_rate": 1.96777242272081e-05, "loss": 1.0879, "step": 23836 }, { "epoch": 1.420789128620813, "grad_norm": 2.7017781734466553, "learning_rate": 1.967020532651437e-05, "loss": 1.1432, "step": 23838 }, { "epoch": 1.420908332339969, "grad_norm": 3.087909698486328, "learning_rate": 1.9662687510821147e-05, "loss": 1.228, "step": 23840 }, { "epoch": 1.4210275360591251, "grad_norm": 2.9301252365112305, "learning_rate": 1.965517078039743e-05, "loss": 1.0435, "step": 23842 }, { "epoch": 1.421146739778281, "grad_norm": 3.2003090381622314, "learning_rate": 1.964765513551206e-05, "loss": 1.0861, "step": 23844 }, { "epoch": 1.421265943497437, "grad_norm": 3.1639742851257324, "learning_rate": 1.9640140576433898e-05, "loss": 1.1559, "step": 23846 }, { "epoch": 1.421385147216593, "grad_norm": 3.365664482116699, "learning_rate": 1.9632627103431816e-05, "loss": 1.1373, "step": 23848 }, { "epoch": 1.4215043509357492, "grad_norm": 2.7010338306427, "learning_rate": 1.9625114716774528e-05, "loss": 0.9869, "step": 23850 }, { "epoch": 1.4216235546549052, "grad_norm": 2.779971122741699, "learning_rate": 1.961760341673083e-05, "loss": 1.1666, "step": 23852 }, { "epoch": 1.4217427583740614, "grad_norm": 3.456134557723999, "learning_rate": 1.961009320356941e-05, "loss": 1.2382, "step": 23854 }, { "epoch": 1.4218619620932174, "grad_norm": 3.3653860092163086, "learning_rate": 1.9602584077558938e-05, "loss": 1.0847, "step": 23856 }, { "epoch": 1.4219811658123733, "grad_norm": 2.80005145072937, "learning_rate": 1.959507603896803e-05, "loss": 1.1184, "step": 23858 }, { "epoch": 1.4221003695315293, "grad_norm": 3.5953714847564697, "learning_rate": 1.9587569088065288e-05, "loss": 0.9978, "step": 23860 }, { "epoch": 1.4222195732506855, "grad_norm": 3.381436347961426, "learning_rate": 1.9580063225119254e-05, "loss": 1.1592, "step": 23862 }, { "epoch": 1.4223387769698415, "grad_norm": 3.02982759475708, "learning_rate": 1.957255845039842e-05, "loss": 0.9893, "step": 23864 }, { "epoch": 1.4224579806889974, "grad_norm": 2.8849732875823975, "learning_rate": 1.956505476417131e-05, "loss": 1.1831, "step": 23866 }, { "epoch": 1.4225771844081536, "grad_norm": 2.934574842453003, "learning_rate": 1.955755216670629e-05, "loss": 1.1134, "step": 23868 }, { "epoch": 1.4226963881273096, "grad_norm": 3.050224542617798, "learning_rate": 1.955005065827181e-05, "loss": 1.2765, "step": 23870 }, { "epoch": 1.4228155918464656, "grad_norm": 3.084099769592285, "learning_rate": 1.9542550239136186e-05, "loss": 1.1032, "step": 23872 }, { "epoch": 1.4229347955656215, "grad_norm": 3.421027183532715, "learning_rate": 1.953505090956776e-05, "loss": 1.0851, "step": 23874 }, { "epoch": 1.4230539992847777, "grad_norm": 3.3427889347076416, "learning_rate": 1.9527552669834798e-05, "loss": 1.2315, "step": 23876 }, { "epoch": 1.4231732030039337, "grad_norm": 3.275639295578003, "learning_rate": 1.952005552020554e-05, "loss": 1.0871, "step": 23878 }, { "epoch": 1.4232924067230899, "grad_norm": 3.6178624629974365, "learning_rate": 1.9512559460948172e-05, "loss": 1.2323, "step": 23880 }, { "epoch": 1.4234116104422458, "grad_norm": 3.238130569458008, "learning_rate": 1.9505064492330872e-05, "loss": 1.0882, "step": 23882 }, { "epoch": 1.4235308141614018, "grad_norm": 3.180267572402954, "learning_rate": 1.9497570614621746e-05, "loss": 1.191, "step": 23884 }, { "epoch": 1.4236500178805578, "grad_norm": 3.017716884613037, "learning_rate": 1.949007782808887e-05, "loss": 1.2169, "step": 23886 }, { "epoch": 1.423769221599714, "grad_norm": 2.82297420501709, "learning_rate": 1.948258613300033e-05, "loss": 1.0741, "step": 23888 }, { "epoch": 1.42388842531887, "grad_norm": 3.336500406265259, "learning_rate": 1.9475095529624083e-05, "loss": 1.1323, "step": 23890 }, { "epoch": 1.424007629038026, "grad_norm": 3.2012319564819336, "learning_rate": 1.946760601822809e-05, "loss": 1.1823, "step": 23892 }, { "epoch": 1.424126832757182, "grad_norm": 3.192823648452759, "learning_rate": 1.946011759908033e-05, "loss": 1.1358, "step": 23894 }, { "epoch": 1.424246036476338, "grad_norm": 3.5892622470855713, "learning_rate": 1.9452630272448625e-05, "loss": 1.2452, "step": 23896 }, { "epoch": 1.424365240195494, "grad_norm": 3.4297266006469727, "learning_rate": 1.9445144038600865e-05, "loss": 1.2139, "step": 23898 }, { "epoch": 1.42448444391465, "grad_norm": 3.345292806625366, "learning_rate": 1.9437658897804844e-05, "loss": 1.049, "step": 23900 }, { "epoch": 1.4246036476338062, "grad_norm": 3.1768717765808105, "learning_rate": 1.943017485032833e-05, "loss": 1.2369, "step": 23902 }, { "epoch": 1.4247228513529622, "grad_norm": 3.4335086345672607, "learning_rate": 1.942269189643906e-05, "loss": 1.0646, "step": 23904 }, { "epoch": 1.4248420550721184, "grad_norm": 3.2224032878875732, "learning_rate": 1.9415210036404718e-05, "loss": 1.1847, "step": 23906 }, { "epoch": 1.4249612587912743, "grad_norm": 3.460601806640625, "learning_rate": 1.9407729270492958e-05, "loss": 1.1714, "step": 23908 }, { "epoch": 1.4250804625104303, "grad_norm": 3.648017168045044, "learning_rate": 1.9400249598971375e-05, "loss": 1.2289, "step": 23910 }, { "epoch": 1.4251996662295863, "grad_norm": 3.078134059906006, "learning_rate": 1.9392771022107593e-05, "loss": 1.0388, "step": 23912 }, { "epoch": 1.4253188699487425, "grad_norm": 3.2351696491241455, "learning_rate": 1.9385293540169082e-05, "loss": 1.1509, "step": 23914 }, { "epoch": 1.4254380736678984, "grad_norm": 2.8919804096221924, "learning_rate": 1.9377817153423382e-05, "loss": 1.102, "step": 23916 }, { "epoch": 1.4255572773870544, "grad_norm": 3.351801872253418, "learning_rate": 1.9370341862137935e-05, "loss": 1.2497, "step": 23918 }, { "epoch": 1.4256764811062106, "grad_norm": 3.0718233585357666, "learning_rate": 1.9362867666580157e-05, "loss": 1.1515, "step": 23920 }, { "epoch": 1.4257956848253666, "grad_norm": 3.5658934116363525, "learning_rate": 1.9355394567017432e-05, "loss": 1.2498, "step": 23922 }, { "epoch": 1.4259148885445225, "grad_norm": 3.1791484355926514, "learning_rate": 1.934792256371709e-05, "loss": 1.212, "step": 23924 }, { "epoch": 1.4260340922636785, "grad_norm": 3.6010663509368896, "learning_rate": 1.934045165694643e-05, "loss": 1.1012, "step": 23926 }, { "epoch": 1.4261532959828347, "grad_norm": 3.0732014179229736, "learning_rate": 1.9332981846972704e-05, "loss": 1.2557, "step": 23928 }, { "epoch": 1.4262724997019907, "grad_norm": 3.2884035110473633, "learning_rate": 1.9325513134063173e-05, "loss": 1.1538, "step": 23930 }, { "epoch": 1.4263917034211469, "grad_norm": 3.0408427715301514, "learning_rate": 1.931804551848495e-05, "loss": 1.1104, "step": 23932 }, { "epoch": 1.4265109071403028, "grad_norm": 3.229534387588501, "learning_rate": 1.9310579000505242e-05, "loss": 1.1303, "step": 23934 }, { "epoch": 1.4266301108594588, "grad_norm": 3.4633607864379883, "learning_rate": 1.9303113580391137e-05, "loss": 1.171, "step": 23936 }, { "epoch": 1.4267493145786148, "grad_norm": 2.8555822372436523, "learning_rate": 1.9295649258409653e-05, "loss": 1.0603, "step": 23938 }, { "epoch": 1.426868518297771, "grad_norm": 3.2644846439361572, "learning_rate": 1.928818603482787e-05, "loss": 0.9902, "step": 23940 }, { "epoch": 1.426987722016927, "grad_norm": 3.4511704444885254, "learning_rate": 1.9280723909912753e-05, "loss": 1.2438, "step": 23942 }, { "epoch": 1.4271069257360829, "grad_norm": 2.7962985038757324, "learning_rate": 1.9273262883931244e-05, "loss": 1.2997, "step": 23944 }, { "epoch": 1.427226129455239, "grad_norm": 3.1488969326019287, "learning_rate": 1.9265802957150246e-05, "loss": 1.0535, "step": 23946 }, { "epoch": 1.427345333174395, "grad_norm": 3.237318277359009, "learning_rate": 1.925834412983664e-05, "loss": 1.1833, "step": 23948 }, { "epoch": 1.427464536893551, "grad_norm": 3.2917721271514893, "learning_rate": 1.9250886402257245e-05, "loss": 1.0198, "step": 23950 }, { "epoch": 1.427583740612707, "grad_norm": 3.149111032485962, "learning_rate": 1.924342977467885e-05, "loss": 1.0873, "step": 23952 }, { "epoch": 1.4277029443318632, "grad_norm": 3.1991190910339355, "learning_rate": 1.9235974247368206e-05, "loss": 1.1366, "step": 23954 }, { "epoch": 1.4278221480510191, "grad_norm": 3.4415042400360107, "learning_rate": 1.9228519820592e-05, "loss": 1.1155, "step": 23956 }, { "epoch": 1.4279413517701753, "grad_norm": 3.5171267986297607, "learning_rate": 1.9221066494616956e-05, "loss": 1.3389, "step": 23958 }, { "epoch": 1.4280605554893313, "grad_norm": 3.2813894748687744, "learning_rate": 1.9213614269709646e-05, "loss": 1.2172, "step": 23960 }, { "epoch": 1.4281797592084873, "grad_norm": 2.626354455947876, "learning_rate": 1.9206163146136695e-05, "loss": 0.9426, "step": 23962 }, { "epoch": 1.4282989629276432, "grad_norm": 3.3559651374816895, "learning_rate": 1.9198713124164658e-05, "loss": 1.0496, "step": 23964 }, { "epoch": 1.4284181666467994, "grad_norm": 3.3530778884887695, "learning_rate": 1.9191264204060034e-05, "loss": 1.1758, "step": 23966 }, { "epoch": 1.4285373703659554, "grad_norm": 3.2690107822418213, "learning_rate": 1.9183816386089298e-05, "loss": 1.2261, "step": 23968 }, { "epoch": 1.4286565740851114, "grad_norm": 3.150587320327759, "learning_rate": 1.917636967051889e-05, "loss": 0.9823, "step": 23970 }, { "epoch": 1.4287757778042676, "grad_norm": 3.0897319316864014, "learning_rate": 1.91689240576152e-05, "loss": 1.2222, "step": 23972 }, { "epoch": 1.4288949815234235, "grad_norm": 2.9546258449554443, "learning_rate": 1.9161479547644567e-05, "loss": 1.2142, "step": 23974 }, { "epoch": 1.4290141852425795, "grad_norm": 3.334789991378784, "learning_rate": 1.9154036140873355e-05, "loss": 1.1412, "step": 23976 }, { "epoch": 1.4291333889617355, "grad_norm": 3.0499370098114014, "learning_rate": 1.914659383756778e-05, "loss": 0.9802, "step": 23978 }, { "epoch": 1.4292525926808917, "grad_norm": 2.914818048477173, "learning_rate": 1.9139152637994117e-05, "loss": 1.0263, "step": 23980 }, { "epoch": 1.4293717964000476, "grad_norm": 3.0861682891845703, "learning_rate": 1.913171254241858e-05, "loss": 1.24, "step": 23982 }, { "epoch": 1.4294910001192038, "grad_norm": 3.3533153533935547, "learning_rate": 1.9124273551107258e-05, "loss": 1.206, "step": 23984 }, { "epoch": 1.4296102038383598, "grad_norm": 3.115255832672119, "learning_rate": 1.9116835664326326e-05, "loss": 1.0666, "step": 23986 }, { "epoch": 1.4297294075575158, "grad_norm": 3.1902174949645996, "learning_rate": 1.9109398882341856e-05, "loss": 1.0354, "step": 23988 }, { "epoch": 1.4298486112766717, "grad_norm": 2.9076571464538574, "learning_rate": 1.9101963205419876e-05, "loss": 1.1077, "step": 23990 }, { "epoch": 1.429967814995828, "grad_norm": 2.9659523963928223, "learning_rate": 1.909452863382637e-05, "loss": 1.2983, "step": 23992 }, { "epoch": 1.430087018714984, "grad_norm": 3.397559881210327, "learning_rate": 1.9087095167827353e-05, "loss": 1.2079, "step": 23994 }, { "epoch": 1.4302062224341399, "grad_norm": 2.9152653217315674, "learning_rate": 1.9079662807688686e-05, "loss": 1.0526, "step": 23996 }, { "epoch": 1.430325426153296, "grad_norm": 2.9722602367401123, "learning_rate": 1.907223155367626e-05, "loss": 1.1059, "step": 23998 }, { "epoch": 1.430444629872452, "grad_norm": 3.2929279804229736, "learning_rate": 1.906480140605597e-05, "loss": 1.2832, "step": 24000 }, { "epoch": 1.430563833591608, "grad_norm": 3.25102162361145, "learning_rate": 1.9057372365093533e-05, "loss": 1.2152, "step": 24002 }, { "epoch": 1.430683037310764, "grad_norm": 3.3223047256469727, "learning_rate": 1.9049944431054778e-05, "loss": 1.2412, "step": 24004 }, { "epoch": 1.4308022410299202, "grad_norm": 3.480893611907959, "learning_rate": 1.9042517604205406e-05, "loss": 1.1687, "step": 24006 }, { "epoch": 1.4309214447490761, "grad_norm": 2.9914796352386475, "learning_rate": 1.9035091884811097e-05, "loss": 1.1227, "step": 24008 }, { "epoch": 1.4310406484682323, "grad_norm": 3.136693000793457, "learning_rate": 1.90276672731375e-05, "loss": 1.1302, "step": 24010 }, { "epoch": 1.4311598521873883, "grad_norm": 3.4471652507781982, "learning_rate": 1.9020243769450214e-05, "loss": 1.0046, "step": 24012 }, { "epoch": 1.4312790559065443, "grad_norm": 3.155003786087036, "learning_rate": 1.90128213740148e-05, "loss": 1.2242, "step": 24014 }, { "epoch": 1.4313982596257002, "grad_norm": 2.8533365726470947, "learning_rate": 1.9005400087096792e-05, "loss": 1.0064, "step": 24016 }, { "epoch": 1.4315174633448564, "grad_norm": 3.1662116050720215, "learning_rate": 1.8997979908961667e-05, "loss": 1.0796, "step": 24018 }, { "epoch": 1.4316366670640124, "grad_norm": 3.1150434017181396, "learning_rate": 1.8990560839874856e-05, "loss": 0.9946, "step": 24020 }, { "epoch": 1.4317558707831686, "grad_norm": 3.2946043014526367, "learning_rate": 1.8983142880101818e-05, "loss": 1.0647, "step": 24022 }, { "epoch": 1.4318750745023245, "grad_norm": 3.112776756286621, "learning_rate": 1.8975726029907866e-05, "loss": 1.1268, "step": 24024 }, { "epoch": 1.4319942782214805, "grad_norm": 3.439210891723633, "learning_rate": 1.896831028955832e-05, "loss": 1.1553, "step": 24026 }, { "epoch": 1.4321134819406365, "grad_norm": 3.1769955158233643, "learning_rate": 1.8960895659318523e-05, "loss": 1.2779, "step": 24028 }, { "epoch": 1.4322326856597924, "grad_norm": 3.1605825424194336, "learning_rate": 1.895348213945366e-05, "loss": 1.2874, "step": 24030 }, { "epoch": 1.4323518893789486, "grad_norm": 3.0634119510650635, "learning_rate": 1.8946069730228977e-05, "loss": 1.2072, "step": 24032 }, { "epoch": 1.4324710930981046, "grad_norm": 3.3457908630371094, "learning_rate": 1.893865843190963e-05, "loss": 1.1318, "step": 24034 }, { "epoch": 1.4325902968172608, "grad_norm": 3.066375494003296, "learning_rate": 1.8931248244760745e-05, "loss": 1.0343, "step": 24036 }, { "epoch": 1.4327095005364168, "grad_norm": 3.0421178340911865, "learning_rate": 1.8923839169047397e-05, "loss": 1.1455, "step": 24038 }, { "epoch": 1.4328287042555727, "grad_norm": 3.019226551055908, "learning_rate": 1.8916431205034678e-05, "loss": 1.1919, "step": 24040 }, { "epoch": 1.4329479079747287, "grad_norm": 3.1704883575439453, "learning_rate": 1.8909024352987554e-05, "loss": 1.0738, "step": 24042 }, { "epoch": 1.433067111693885, "grad_norm": 3.5674784183502197, "learning_rate": 1.890161861317099e-05, "loss": 1.2326, "step": 24044 }, { "epoch": 1.4331863154130409, "grad_norm": 3.2408783435821533, "learning_rate": 1.8894213985849957e-05, "loss": 1.3261, "step": 24046 }, { "epoch": 1.433305519132197, "grad_norm": 3.589801073074341, "learning_rate": 1.888681047128929e-05, "loss": 1.3754, "step": 24048 }, { "epoch": 1.433424722851353, "grad_norm": 2.792236328125, "learning_rate": 1.8879408069753878e-05, "loss": 1.2443, "step": 24050 }, { "epoch": 1.433543926570509, "grad_norm": 3.7323532104492188, "learning_rate": 1.8872006781508516e-05, "loss": 1.2005, "step": 24052 }, { "epoch": 1.433663130289665, "grad_norm": 3.213209629058838, "learning_rate": 1.8864606606817977e-05, "loss": 1.1295, "step": 24054 }, { "epoch": 1.4337823340088212, "grad_norm": 3.0776174068450928, "learning_rate": 1.8857207545946987e-05, "loss": 1.1303, "step": 24056 }, { "epoch": 1.4339015377279771, "grad_norm": 3.202483654022217, "learning_rate": 1.884980959916024e-05, "loss": 1.0472, "step": 24058 }, { "epoch": 1.434020741447133, "grad_norm": 3.394770860671997, "learning_rate": 1.884241276672238e-05, "loss": 1.2273, "step": 24060 }, { "epoch": 1.4341399451662893, "grad_norm": 3.60874342918396, "learning_rate": 1.8835017048898e-05, "loss": 1.1179, "step": 24062 }, { "epoch": 1.4342591488854453, "grad_norm": 3.3644955158233643, "learning_rate": 1.8827622445951733e-05, "loss": 1.1096, "step": 24064 }, { "epoch": 1.4343783526046012, "grad_norm": 3.009303092956543, "learning_rate": 1.882022895814803e-05, "loss": 1.1621, "step": 24066 }, { "epoch": 1.4344975563237572, "grad_norm": 3.0280284881591797, "learning_rate": 1.8812836585751424e-05, "loss": 1.156, "step": 24068 }, { "epoch": 1.4346167600429134, "grad_norm": 3.443507432937622, "learning_rate": 1.880544532902639e-05, "loss": 1.0065, "step": 24070 }, { "epoch": 1.4347359637620694, "grad_norm": 3.353501319885254, "learning_rate": 1.8798055188237263e-05, "loss": 1.2492, "step": 24072 }, { "epoch": 1.4348551674812255, "grad_norm": 3.2322487831115723, "learning_rate": 1.879066616364848e-05, "loss": 1.099, "step": 24074 }, { "epoch": 1.4349743712003815, "grad_norm": 3.19197940826416, "learning_rate": 1.878327825552435e-05, "loss": 1.1236, "step": 24076 }, { "epoch": 1.4350935749195375, "grad_norm": 3.3711159229278564, "learning_rate": 1.8775891464129165e-05, "loss": 1.1, "step": 24078 }, { "epoch": 1.4352127786386935, "grad_norm": 2.8660659790039062, "learning_rate": 1.876850578972717e-05, "loss": 1.1969, "step": 24080 }, { "epoch": 1.4353319823578496, "grad_norm": 2.8843016624450684, "learning_rate": 1.8761121232582586e-05, "loss": 1.1817, "step": 24082 }, { "epoch": 1.4354511860770056, "grad_norm": 3.2950961589813232, "learning_rate": 1.8753737792959574e-05, "loss": 1.228, "step": 24084 }, { "epoch": 1.4355703897961616, "grad_norm": 3.262620449066162, "learning_rate": 1.8746355471122267e-05, "loss": 1.1326, "step": 24086 }, { "epoch": 1.4356895935153178, "grad_norm": 2.942399740219116, "learning_rate": 1.8738974267334762e-05, "loss": 1.0577, "step": 24088 }, { "epoch": 1.4358087972344737, "grad_norm": 2.5409963130950928, "learning_rate": 1.873159418186109e-05, "loss": 0.9173, "step": 24090 }, { "epoch": 1.4359280009536297, "grad_norm": 3.0622286796569824, "learning_rate": 1.8724215214965307e-05, "loss": 1.1709, "step": 24092 }, { "epoch": 1.4360472046727857, "grad_norm": 3.4688546657562256, "learning_rate": 1.8716837366911326e-05, "loss": 1.1168, "step": 24094 }, { "epoch": 1.4361664083919419, "grad_norm": 3.084313154220581, "learning_rate": 1.8709460637963123e-05, "loss": 1.0581, "step": 24096 }, { "epoch": 1.4362856121110978, "grad_norm": 3.0095300674438477, "learning_rate": 1.870208502838457e-05, "loss": 1.0873, "step": 24098 }, { "epoch": 1.436404815830254, "grad_norm": 3.002765417098999, "learning_rate": 1.869471053843952e-05, "loss": 1.2445, "step": 24100 }, { "epoch": 1.43652401954941, "grad_norm": 3.0537045001983643, "learning_rate": 1.8687337168391784e-05, "loss": 1.0553, "step": 24102 }, { "epoch": 1.436643223268566, "grad_norm": 3.3499627113342285, "learning_rate": 1.8679964918505132e-05, "loss": 0.9709, "step": 24104 }, { "epoch": 1.436762426987722, "grad_norm": 3.1815059185028076, "learning_rate": 1.8672593789043295e-05, "loss": 1.3039, "step": 24106 }, { "epoch": 1.4368816307068781, "grad_norm": 3.6219093799591064, "learning_rate": 1.8665223780269948e-05, "loss": 1.1825, "step": 24108 }, { "epoch": 1.437000834426034, "grad_norm": 3.291942834854126, "learning_rate": 1.8657854892448794e-05, "loss": 1.1015, "step": 24110 }, { "epoch": 1.43712003814519, "grad_norm": 3.0885143280029297, "learning_rate": 1.8650487125843368e-05, "loss": 1.0073, "step": 24112 }, { "epoch": 1.4372392418643463, "grad_norm": 3.124603748321533, "learning_rate": 1.8643120480717295e-05, "loss": 0.9565, "step": 24114 }, { "epoch": 1.4373584455835022, "grad_norm": 3.175852060317993, "learning_rate": 1.8635754957334112e-05, "loss": 1.0812, "step": 24116 }, { "epoch": 1.4374776493026582, "grad_norm": 3.2734169960021973, "learning_rate": 1.862839055595725e-05, "loss": 1.0231, "step": 24118 }, { "epoch": 1.4375968530218142, "grad_norm": 3.3467884063720703, "learning_rate": 1.8621027276850205e-05, "loss": 1.195, "step": 24120 }, { "epoch": 1.4377160567409704, "grad_norm": 3.0323314666748047, "learning_rate": 1.8613665120276387e-05, "loss": 1.0281, "step": 24122 }, { "epoch": 1.4378352604601263, "grad_norm": 3.072089195251465, "learning_rate": 1.8606304086499143e-05, "loss": 1.2208, "step": 24124 }, { "epoch": 1.4379544641792825, "grad_norm": 3.2246875762939453, "learning_rate": 1.8598944175781806e-05, "loss": 1.0881, "step": 24126 }, { "epoch": 1.4380736678984385, "grad_norm": 3.313209295272827, "learning_rate": 1.8591585388387706e-05, "loss": 1.1605, "step": 24128 }, { "epoch": 1.4381928716175945, "grad_norm": 3.2086899280548096, "learning_rate": 1.8584227724580038e-05, "loss": 1.2571, "step": 24130 }, { "epoch": 1.4383120753367504, "grad_norm": 3.1635072231292725, "learning_rate": 1.8576871184622017e-05, "loss": 1.1603, "step": 24132 }, { "epoch": 1.4384312790559066, "grad_norm": 3.1373231410980225, "learning_rate": 1.8569515768776864e-05, "loss": 1.0987, "step": 24134 }, { "epoch": 1.4385504827750626, "grad_norm": 2.9053633213043213, "learning_rate": 1.8562161477307633e-05, "loss": 1.1121, "step": 24136 }, { "epoch": 1.4386696864942186, "grad_norm": 3.2145724296569824, "learning_rate": 1.8554808310477466e-05, "loss": 1.0311, "step": 24138 }, { "epoch": 1.4387888902133747, "grad_norm": 2.8795180320739746, "learning_rate": 1.8547456268549402e-05, "loss": 1.1902, "step": 24140 }, { "epoch": 1.4389080939325307, "grad_norm": 3.2130138874053955, "learning_rate": 1.854010535178643e-05, "loss": 1.0175, "step": 24142 }, { "epoch": 1.4390272976516867, "grad_norm": 3.2618024349212646, "learning_rate": 1.8532755560451537e-05, "loss": 1.1947, "step": 24144 }, { "epoch": 1.4391465013708427, "grad_norm": 3.0379133224487305, "learning_rate": 1.8525406894807635e-05, "loss": 1.0279, "step": 24146 }, { "epoch": 1.4392657050899988, "grad_norm": 3.613109827041626, "learning_rate": 1.8518059355117624e-05, "loss": 1.1173, "step": 24148 }, { "epoch": 1.4393849088091548, "grad_norm": 3.224430799484253, "learning_rate": 1.8510712941644337e-05, "loss": 1.0118, "step": 24150 }, { "epoch": 1.439504112528311, "grad_norm": 3.39615797996521, "learning_rate": 1.8503367654650595e-05, "loss": 1.1118, "step": 24152 }, { "epoch": 1.439623316247467, "grad_norm": 2.8666486740112305, "learning_rate": 1.8496023494399135e-05, "loss": 0.987, "step": 24154 }, { "epoch": 1.439742519966623, "grad_norm": 3.862882375717163, "learning_rate": 1.8488680461152736e-05, "loss": 1.1317, "step": 24156 }, { "epoch": 1.439861723685779, "grad_norm": 3.2484183311462402, "learning_rate": 1.8481338555174023e-05, "loss": 1.0963, "step": 24158 }, { "epoch": 1.439980927404935, "grad_norm": 3.2723445892333984, "learning_rate": 1.8473997776725684e-05, "loss": 1.213, "step": 24160 }, { "epoch": 1.440100131124091, "grad_norm": 2.8911139965057373, "learning_rate": 1.8466658126070306e-05, "loss": 1.11, "step": 24162 }, { "epoch": 1.440219334843247, "grad_norm": 3.6968798637390137, "learning_rate": 1.8459319603470466e-05, "loss": 1.1873, "step": 24164 }, { "epoch": 1.4403385385624032, "grad_norm": 2.9775829315185547, "learning_rate": 1.8451982209188674e-05, "loss": 1.3061, "step": 24166 }, { "epoch": 1.4404577422815592, "grad_norm": 3.5437092781066895, "learning_rate": 1.8444645943487414e-05, "loss": 1.1259, "step": 24168 }, { "epoch": 1.4405769460007152, "grad_norm": 3.3329172134399414, "learning_rate": 1.8437310806629145e-05, "loss": 1.139, "step": 24170 }, { "epoch": 1.4406961497198711, "grad_norm": 3.1199371814727783, "learning_rate": 1.842997679887623e-05, "loss": 1.1201, "step": 24172 }, { "epoch": 1.4408153534390273, "grad_norm": 2.7903029918670654, "learning_rate": 1.84226439204911e-05, "loss": 1.0819, "step": 24174 }, { "epoch": 1.4409345571581833, "grad_norm": 2.917393684387207, "learning_rate": 1.841531217173602e-05, "loss": 1.059, "step": 24176 }, { "epoch": 1.4410537608773395, "grad_norm": 3.2427616119384766, "learning_rate": 1.8407981552873277e-05, "loss": 1.0837, "step": 24178 }, { "epoch": 1.4411729645964955, "grad_norm": 3.263524055480957, "learning_rate": 1.840065206416516e-05, "loss": 1.0671, "step": 24180 }, { "epoch": 1.4412921683156514, "grad_norm": 2.8763763904571533, "learning_rate": 1.8393323705873794e-05, "loss": 1.1279, "step": 24182 }, { "epoch": 1.4414113720348074, "grad_norm": 3.1566357612609863, "learning_rate": 1.8385996478261398e-05, "loss": 1.1434, "step": 24184 }, { "epoch": 1.4415305757539636, "grad_norm": 3.1869781017303467, "learning_rate": 1.8378670381590073e-05, "loss": 1.1439, "step": 24186 }, { "epoch": 1.4416497794731196, "grad_norm": 3.1120049953460693, "learning_rate": 1.8371345416121904e-05, "loss": 1.0025, "step": 24188 }, { "epoch": 1.4417689831922755, "grad_norm": 2.7286908626556396, "learning_rate": 1.8364021582118924e-05, "loss": 1.0586, "step": 24190 }, { "epoch": 1.4418881869114317, "grad_norm": 3.323207139968872, "learning_rate": 1.8356698879843132e-05, "loss": 1.1255, "step": 24192 }, { "epoch": 1.4420073906305877, "grad_norm": 3.244798421859741, "learning_rate": 1.8349377309556486e-05, "loss": 1.1847, "step": 24194 }, { "epoch": 1.4421265943497437, "grad_norm": 3.069136142730713, "learning_rate": 1.8342056871520895e-05, "loss": 1.0407, "step": 24196 }, { "epoch": 1.4422457980688996, "grad_norm": 3.461958646774292, "learning_rate": 1.8334737565998278e-05, "loss": 1.2249, "step": 24198 }, { "epoch": 1.4423650017880558, "grad_norm": 3.147017002105713, "learning_rate": 1.83274193932504e-05, "loss": 1.2163, "step": 24200 }, { "epoch": 1.4424842055072118, "grad_norm": 2.9768683910369873, "learning_rate": 1.832010235353912e-05, "loss": 1.1108, "step": 24202 }, { "epoch": 1.442603409226368, "grad_norm": 3.4478838443756104, "learning_rate": 1.831278644712617e-05, "loss": 1.1409, "step": 24204 }, { "epoch": 1.442722612945524, "grad_norm": 3.3639791011810303, "learning_rate": 1.8305471674273263e-05, "loss": 1.135, "step": 24206 }, { "epoch": 1.44284181666468, "grad_norm": 3.581162691116333, "learning_rate": 1.8298158035242075e-05, "loss": 1.1902, "step": 24208 }, { "epoch": 1.442961020383836, "grad_norm": 2.7140960693359375, "learning_rate": 1.8290845530294244e-05, "loss": 1.0322, "step": 24210 }, { "epoch": 1.443080224102992, "grad_norm": 3.492886781692505, "learning_rate": 1.8283534159691357e-05, "loss": 1.1225, "step": 24212 }, { "epoch": 1.443199427822148, "grad_norm": 3.0629353523254395, "learning_rate": 1.8276223923694968e-05, "loss": 0.991, "step": 24214 }, { "epoch": 1.443318631541304, "grad_norm": 3.246518850326538, "learning_rate": 1.8268914822566596e-05, "loss": 1.2071, "step": 24216 }, { "epoch": 1.4434378352604602, "grad_norm": 3.3192646503448486, "learning_rate": 1.8261606856567687e-05, "loss": 1.2215, "step": 24218 }, { "epoch": 1.4435570389796162, "grad_norm": 2.8977878093719482, "learning_rate": 1.8254300025959726e-05, "loss": 1.1072, "step": 24220 }, { "epoch": 1.4436762426987721, "grad_norm": 3.422430992126465, "learning_rate": 1.8246994331004057e-05, "loss": 1.1539, "step": 24222 }, { "epoch": 1.4437954464179281, "grad_norm": 3.0731618404388428, "learning_rate": 1.823968977196202e-05, "loss": 1.1076, "step": 24224 }, { "epoch": 1.4439146501370843, "grad_norm": 3.057224988937378, "learning_rate": 1.8232386349094987e-05, "loss": 1.0554, "step": 24226 }, { "epoch": 1.4440338538562403, "grad_norm": 3.20666241645813, "learning_rate": 1.822508406266415e-05, "loss": 1.1163, "step": 24228 }, { "epoch": 1.4441530575753965, "grad_norm": 3.084721326828003, "learning_rate": 1.8217782912930787e-05, "loss": 1.0408, "step": 24230 }, { "epoch": 1.4442722612945524, "grad_norm": 3.0926594734191895, "learning_rate": 1.8210482900156074e-05, "loss": 1.101, "step": 24232 }, { "epoch": 1.4443914650137084, "grad_norm": 3.313342332839966, "learning_rate": 1.820318402460115e-05, "loss": 1.1164, "step": 24234 }, { "epoch": 1.4445106687328644, "grad_norm": 3.0669796466827393, "learning_rate": 1.819588628652713e-05, "loss": 1.1445, "step": 24236 }, { "epoch": 1.4446298724520206, "grad_norm": 2.962076425552368, "learning_rate": 1.8188589686195072e-05, "loss": 1.1987, "step": 24238 }, { "epoch": 1.4447490761711765, "grad_norm": 3.2803592681884766, "learning_rate": 1.8181294223866002e-05, "loss": 1.0761, "step": 24240 }, { "epoch": 1.4448682798903325, "grad_norm": 3.1160736083984375, "learning_rate": 1.8173999899800887e-05, "loss": 0.9348, "step": 24242 }, { "epoch": 1.4449874836094887, "grad_norm": 3.205176830291748, "learning_rate": 1.8166706714260722e-05, "loss": 1.1725, "step": 24244 }, { "epoch": 1.4451066873286447, "grad_norm": 3.0955188274383545, "learning_rate": 1.8159414667506343e-05, "loss": 1.3062, "step": 24246 }, { "epoch": 1.4452258910478006, "grad_norm": 3.381742238998413, "learning_rate": 1.8152123759798655e-05, "loss": 1.106, "step": 24248 }, { "epoch": 1.4453450947669566, "grad_norm": 3.316772222518921, "learning_rate": 1.8144833991398485e-05, "loss": 1.2064, "step": 24250 }, { "epoch": 1.4454642984861128, "grad_norm": 3.1580255031585693, "learning_rate": 1.813754536256656e-05, "loss": 1.0784, "step": 24252 }, { "epoch": 1.4455835022052688, "grad_norm": 3.162722110748291, "learning_rate": 1.8130257873563673e-05, "loss": 1.0865, "step": 24254 }, { "epoch": 1.445702705924425, "grad_norm": 2.8669369220733643, "learning_rate": 1.81229715246505e-05, "loss": 1.0735, "step": 24256 }, { "epoch": 1.445821909643581, "grad_norm": 3.3302135467529297, "learning_rate": 1.8115686316087703e-05, "loss": 1.3197, "step": 24258 }, { "epoch": 1.445941113362737, "grad_norm": 3.4396955966949463, "learning_rate": 1.810840224813588e-05, "loss": 1.1376, "step": 24260 }, { "epoch": 1.4460603170818929, "grad_norm": 2.9691054821014404, "learning_rate": 1.810111932105566e-05, "loss": 1.0763, "step": 24262 }, { "epoch": 1.446179520801049, "grad_norm": 3.297318935394287, "learning_rate": 1.8093837535107505e-05, "loss": 1.2759, "step": 24264 }, { "epoch": 1.446298724520205, "grad_norm": 3.59169602394104, "learning_rate": 1.808655689055197e-05, "loss": 1.2327, "step": 24266 }, { "epoch": 1.446417928239361, "grad_norm": 3.290012836456299, "learning_rate": 1.8079277387649502e-05, "loss": 1.169, "step": 24268 }, { "epoch": 1.4465371319585172, "grad_norm": 3.2728872299194336, "learning_rate": 1.8071999026660457e-05, "loss": 1.1393, "step": 24270 }, { "epoch": 1.4466563356776732, "grad_norm": 3.267399787902832, "learning_rate": 1.806472180784527e-05, "loss": 1.0646, "step": 24272 }, { "epoch": 1.4467755393968291, "grad_norm": 3.1893680095672607, "learning_rate": 1.8057445731464247e-05, "loss": 1.045, "step": 24274 }, { "epoch": 1.446894743115985, "grad_norm": 3.3485395908355713, "learning_rate": 1.8050170797777683e-05, "loss": 1.1047, "step": 24276 }, { "epoch": 1.4470139468351413, "grad_norm": 3.245088815689087, "learning_rate": 1.804289700704582e-05, "loss": 1.3394, "step": 24278 }, { "epoch": 1.4471331505542973, "grad_norm": 3.3891687393188477, "learning_rate": 1.8035624359528874e-05, "loss": 1.0649, "step": 24280 }, { "epoch": 1.4472523542734534, "grad_norm": 3.295623779296875, "learning_rate": 1.8028352855487007e-05, "loss": 1.0899, "step": 24282 }, { "epoch": 1.4473715579926094, "grad_norm": 3.3156979084014893, "learning_rate": 1.8021082495180353e-05, "loss": 1.2356, "step": 24284 }, { "epoch": 1.4474907617117654, "grad_norm": 2.944709539413452, "learning_rate": 1.8013813278868992e-05, "loss": 1.1077, "step": 24286 }, { "epoch": 1.4476099654309214, "grad_norm": 3.1864304542541504, "learning_rate": 1.8006545206812948e-05, "loss": 1.1681, "step": 24288 }, { "epoch": 1.4477291691500775, "grad_norm": 3.486711025238037, "learning_rate": 1.7999278279272287e-05, "loss": 1.1581, "step": 24290 }, { "epoch": 1.4478483728692335, "grad_norm": 3.1322267055511475, "learning_rate": 1.7992012496506894e-05, "loss": 1.1021, "step": 24292 }, { "epoch": 1.4479675765883895, "grad_norm": 3.0866308212280273, "learning_rate": 1.798474785877674e-05, "loss": 1.1031, "step": 24294 }, { "epoch": 1.4480867803075457, "grad_norm": 3.5462963581085205, "learning_rate": 1.79774843663417e-05, "loss": 1.1685, "step": 24296 }, { "epoch": 1.4482059840267016, "grad_norm": 3.182860851287842, "learning_rate": 1.7970222019461608e-05, "loss": 1.1897, "step": 24298 }, { "epoch": 1.4483251877458576, "grad_norm": 3.241446018218994, "learning_rate": 1.7962960818396262e-05, "loss": 1.0672, "step": 24300 }, { "epoch": 1.4484443914650136, "grad_norm": 3.0755670070648193, "learning_rate": 1.795570076340542e-05, "loss": 1.2463, "step": 24302 }, { "epoch": 1.4485635951841698, "grad_norm": 2.8470826148986816, "learning_rate": 1.7948441854748805e-05, "loss": 1.0324, "step": 24304 }, { "epoch": 1.4486827989033257, "grad_norm": 3.246656656265259, "learning_rate": 1.7941184092686065e-05, "loss": 1.1593, "step": 24306 }, { "epoch": 1.448802002622482, "grad_norm": 2.993858814239502, "learning_rate": 1.7933927477476892e-05, "loss": 1.1471, "step": 24308 }, { "epoch": 1.448921206341638, "grad_norm": 3.085089921951294, "learning_rate": 1.7926672009380835e-05, "loss": 1.0618, "step": 24310 }, { "epoch": 1.4490404100607939, "grad_norm": 3.247371196746826, "learning_rate": 1.7919417688657437e-05, "loss": 1.1378, "step": 24312 }, { "epoch": 1.4491596137799498, "grad_norm": 3.178075075149536, "learning_rate": 1.7912164515566264e-05, "loss": 1.1291, "step": 24314 }, { "epoch": 1.449278817499106, "grad_norm": 2.9830446243286133, "learning_rate": 1.790491249036672e-05, "loss": 1.2169, "step": 24316 }, { "epoch": 1.449398021218262, "grad_norm": 3.2259926795959473, "learning_rate": 1.7897661613318285e-05, "loss": 1.0446, "step": 24318 }, { "epoch": 1.449517224937418, "grad_norm": 3.248305559158325, "learning_rate": 1.7890411884680337e-05, "loss": 1.15, "step": 24320 }, { "epoch": 1.4496364286565742, "grad_norm": 3.113229751586914, "learning_rate": 1.788316330471221e-05, "loss": 1.1239, "step": 24322 }, { "epoch": 1.4497556323757301, "grad_norm": 3.546337127685547, "learning_rate": 1.7875915873673205e-05, "loss": 1.1602, "step": 24324 }, { "epoch": 1.449874836094886, "grad_norm": 2.8239829540252686, "learning_rate": 1.7868669591822634e-05, "loss": 0.9877, "step": 24326 }, { "epoch": 1.449994039814042, "grad_norm": 3.3905019760131836, "learning_rate": 1.7861424459419666e-05, "loss": 1.002, "step": 24328 }, { "epoch": 1.4501132435331983, "grad_norm": 3.333024024963379, "learning_rate": 1.7854180476723497e-05, "loss": 1.1529, "step": 24330 }, { "epoch": 1.4502324472523542, "grad_norm": 3.0039021968841553, "learning_rate": 1.7846937643993307e-05, "loss": 1.2004, "step": 24332 }, { "epoch": 1.4503516509715104, "grad_norm": 2.9089128971099854, "learning_rate": 1.7839695961488133e-05, "loss": 1.2583, "step": 24334 }, { "epoch": 1.4504708546906664, "grad_norm": 3.193652868270874, "learning_rate": 1.7832455429467093e-05, "loss": 1.0972, "step": 24336 }, { "epoch": 1.4505900584098224, "grad_norm": 3.412409543991089, "learning_rate": 1.7825216048189175e-05, "loss": 1.2157, "step": 24338 }, { "epoch": 1.4507092621289783, "grad_norm": 3.7324466705322266, "learning_rate": 1.7817977817913366e-05, "loss": 1.1069, "step": 24340 }, { "epoch": 1.4508284658481345, "grad_norm": 3.127171754837036, "learning_rate": 1.781074073889861e-05, "loss": 1.1031, "step": 24342 }, { "epoch": 1.4509476695672905, "grad_norm": 2.8985493183135986, "learning_rate": 1.780350481140378e-05, "loss": 1.2078, "step": 24344 }, { "epoch": 1.4510668732864465, "grad_norm": 3.284921407699585, "learning_rate": 1.7796270035687747e-05, "loss": 1.2275, "step": 24346 }, { "epoch": 1.4511860770056026, "grad_norm": 3.001136541366577, "learning_rate": 1.7789036412009315e-05, "loss": 1.0256, "step": 24348 }, { "epoch": 1.4513052807247586, "grad_norm": 2.6599819660186768, "learning_rate": 1.7781803940627264e-05, "loss": 1.0617, "step": 24350 }, { "epoch": 1.4514244844439146, "grad_norm": 3.224705934524536, "learning_rate": 1.7774572621800294e-05, "loss": 1.3074, "step": 24352 }, { "epoch": 1.4515436881630706, "grad_norm": 3.5058372020721436, "learning_rate": 1.7767342455787162e-05, "loss": 1.2397, "step": 24354 }, { "epoch": 1.4516628918822267, "grad_norm": 2.7092599868774414, "learning_rate": 1.7760113442846455e-05, "loss": 1.141, "step": 24356 }, { "epoch": 1.4517820956013827, "grad_norm": 3.293458938598633, "learning_rate": 1.7752885583236777e-05, "loss": 1.0467, "step": 24358 }, { "epoch": 1.451901299320539, "grad_norm": 3.123270034790039, "learning_rate": 1.7745658877216736e-05, "loss": 1.0738, "step": 24360 }, { "epoch": 1.4520205030396949, "grad_norm": 2.9821066856384277, "learning_rate": 1.7738433325044827e-05, "loss": 1.0375, "step": 24362 }, { "epoch": 1.4521397067588508, "grad_norm": 3.511132001876831, "learning_rate": 1.7731208926979548e-05, "loss": 1.1076, "step": 24364 }, { "epoch": 1.4522589104780068, "grad_norm": 2.982713222503662, "learning_rate": 1.7723985683279328e-05, "loss": 1.3284, "step": 24366 }, { "epoch": 1.452378114197163, "grad_norm": 3.1688451766967773, "learning_rate": 1.7716763594202572e-05, "loss": 1.2135, "step": 24368 }, { "epoch": 1.452497317916319, "grad_norm": 3.1818196773529053, "learning_rate": 1.770954266000762e-05, "loss": 1.1879, "step": 24370 }, { "epoch": 1.452616521635475, "grad_norm": 3.2863688468933105, "learning_rate": 1.7702322880952844e-05, "loss": 1.2001, "step": 24372 }, { "epoch": 1.4527357253546311, "grad_norm": 3.297144889831543, "learning_rate": 1.7695104257296463e-05, "loss": 1.0487, "step": 24374 }, { "epoch": 1.452854929073787, "grad_norm": 3.1881673336029053, "learning_rate": 1.768788678929672e-05, "loss": 1.184, "step": 24376 }, { "epoch": 1.452974132792943, "grad_norm": 3.315370559692383, "learning_rate": 1.7680670477211853e-05, "loss": 1.0803, "step": 24378 }, { "epoch": 1.453093336512099, "grad_norm": 3.341886520385742, "learning_rate": 1.767345532129995e-05, "loss": 1.1547, "step": 24380 }, { "epoch": 1.4532125402312552, "grad_norm": 3.1374154090881348, "learning_rate": 1.7666241321819166e-05, "loss": 1.2059, "step": 24382 }, { "epoch": 1.4533317439504112, "grad_norm": 2.958885669708252, "learning_rate": 1.7659028479027562e-05, "loss": 1.0871, "step": 24384 }, { "epoch": 1.4534509476695674, "grad_norm": 3.421250104904175, "learning_rate": 1.7651816793183163e-05, "loss": 1.1833, "step": 24386 }, { "epoch": 1.4535701513887234, "grad_norm": 3.4483466148376465, "learning_rate": 1.7644606264543955e-05, "loss": 1.1127, "step": 24388 }, { "epoch": 1.4536893551078793, "grad_norm": 3.1651065349578857, "learning_rate": 1.7637396893367886e-05, "loss": 1.1613, "step": 24390 }, { "epoch": 1.4538085588270353, "grad_norm": 3.1012213230133057, "learning_rate": 1.7630188679912855e-05, "loss": 1.0898, "step": 24392 }, { "epoch": 1.4539277625461915, "grad_norm": 3.147965431213379, "learning_rate": 1.7622981624436712e-05, "loss": 1.1238, "step": 24394 }, { "epoch": 1.4540469662653475, "grad_norm": 3.1064610481262207, "learning_rate": 1.7615775727197324e-05, "loss": 1.1121, "step": 24396 }, { "epoch": 1.4541661699845037, "grad_norm": 3.1148672103881836, "learning_rate": 1.760857098845241e-05, "loss": 1.0318, "step": 24398 }, { "epoch": 1.4542853737036596, "grad_norm": 3.440319538116455, "learning_rate": 1.7601367408459752e-05, "loss": 1.1661, "step": 24400 }, { "epoch": 1.4544045774228156, "grad_norm": 3.1516690254211426, "learning_rate": 1.759416498747705e-05, "loss": 1.0601, "step": 24402 }, { "epoch": 1.4545237811419716, "grad_norm": 3.16566801071167, "learning_rate": 1.7586963725761907e-05, "loss": 1.2002, "step": 24404 }, { "epoch": 1.4546429848611275, "grad_norm": 3.2622780799865723, "learning_rate": 1.7579763623571994e-05, "loss": 1.0994, "step": 24406 }, { "epoch": 1.4547621885802837, "grad_norm": 3.443448543548584, "learning_rate": 1.7572564681164855e-05, "loss": 1.1091, "step": 24408 }, { "epoch": 1.4548813922994397, "grad_norm": 3.1757805347442627, "learning_rate": 1.7565366898798024e-05, "loss": 0.9945, "step": 24410 }, { "epoch": 1.4550005960185959, "grad_norm": 3.2386021614074707, "learning_rate": 1.7558170276728996e-05, "loss": 1.2141, "step": 24412 }, { "epoch": 1.4551197997377519, "grad_norm": 3.0274817943573, "learning_rate": 1.7550974815215223e-05, "loss": 1.1078, "step": 24414 }, { "epoch": 1.4552390034569078, "grad_norm": 3.2490668296813965, "learning_rate": 1.7543780514514096e-05, "loss": 1.2093, "step": 24416 }, { "epoch": 1.4553582071760638, "grad_norm": 3.396136999130249, "learning_rate": 1.753658737488299e-05, "loss": 1.2637, "step": 24418 }, { "epoch": 1.45547741089522, "grad_norm": 3.4057106971740723, "learning_rate": 1.7529395396579225e-05, "loss": 1.1093, "step": 24420 }, { "epoch": 1.455596614614376, "grad_norm": 3.2529656887054443, "learning_rate": 1.7522204579860064e-05, "loss": 1.1791, "step": 24422 }, { "epoch": 1.4557158183335321, "grad_norm": 3.0765938758850098, "learning_rate": 1.7515014924982803e-05, "loss": 1.1494, "step": 24424 }, { "epoch": 1.455835022052688, "grad_norm": 3.2389867305755615, "learning_rate": 1.750782643220457e-05, "loss": 1.1809, "step": 24426 }, { "epoch": 1.455954225771844, "grad_norm": 3.3661696910858154, "learning_rate": 1.750063910178258e-05, "loss": 1.1886, "step": 24428 }, { "epoch": 1.456073429491, "grad_norm": 2.992709159851074, "learning_rate": 1.7493452933973913e-05, "loss": 1.11, "step": 24430 }, { "epoch": 1.4561926332101562, "grad_norm": 3.087707996368408, "learning_rate": 1.7486267929035666e-05, "loss": 1.136, "step": 24432 }, { "epoch": 1.4563118369293122, "grad_norm": 2.902512550354004, "learning_rate": 1.747908408722485e-05, "loss": 1.144, "step": 24434 }, { "epoch": 1.4564310406484682, "grad_norm": 2.934587001800537, "learning_rate": 1.747190140879847e-05, "loss": 1.0419, "step": 24436 }, { "epoch": 1.4565502443676244, "grad_norm": 3.3624794483184814, "learning_rate": 1.746471989401347e-05, "loss": 1.0903, "step": 24438 }, { "epoch": 1.4566694480867803, "grad_norm": 3.2774722576141357, "learning_rate": 1.7457539543126738e-05, "loss": 1.1479, "step": 24440 }, { "epoch": 1.4567886518059363, "grad_norm": 3.06127667427063, "learning_rate": 1.74503603563952e-05, "loss": 1.1679, "step": 24442 }, { "epoch": 1.4569078555250923, "grad_norm": 3.2018959522247314, "learning_rate": 1.7443182334075602e-05, "loss": 1.2074, "step": 24444 }, { "epoch": 1.4570270592442485, "grad_norm": 3.142228841781616, "learning_rate": 1.7436005476424778e-05, "loss": 1.1886, "step": 24446 }, { "epoch": 1.4571462629634044, "grad_norm": 2.9966390132904053, "learning_rate": 1.7428829783699475e-05, "loss": 1.1366, "step": 24448 }, { "epoch": 1.4572654666825606, "grad_norm": 3.7074410915374756, "learning_rate": 1.742165525615634e-05, "loss": 1.2389, "step": 24450 }, { "epoch": 1.4573846704017166, "grad_norm": 3.342557668685913, "learning_rate": 1.741448189405207e-05, "loss": 1.1652, "step": 24452 }, { "epoch": 1.4575038741208726, "grad_norm": 2.7165369987487793, "learning_rate": 1.740730969764328e-05, "loss": 0.9529, "step": 24454 }, { "epoch": 1.4576230778400285, "grad_norm": 3.6192309856414795, "learning_rate": 1.740013866718653e-05, "loss": 1.2482, "step": 24456 }, { "epoch": 1.4577422815591847, "grad_norm": 3.2763543128967285, "learning_rate": 1.7392968802938354e-05, "loss": 1.1927, "step": 24458 }, { "epoch": 1.4578614852783407, "grad_norm": 3.3645334243774414, "learning_rate": 1.7385800105155277e-05, "loss": 1.0677, "step": 24460 }, { "epoch": 1.4579806889974967, "grad_norm": 3.253970146179199, "learning_rate": 1.7378632574093694e-05, "loss": 1.171, "step": 24462 }, { "epoch": 1.4580998927166529, "grad_norm": 3.31583833694458, "learning_rate": 1.737146621001003e-05, "loss": 1.1126, "step": 24464 }, { "epoch": 1.4582190964358088, "grad_norm": 3.0061254501342773, "learning_rate": 1.7364301013160685e-05, "loss": 1.0866, "step": 24466 }, { "epoch": 1.4583383001549648, "grad_norm": 3.1553070545196533, "learning_rate": 1.735713698380192e-05, "loss": 1.0804, "step": 24468 }, { "epoch": 1.4584575038741208, "grad_norm": 2.9638025760650635, "learning_rate": 1.734997412219007e-05, "loss": 1.1876, "step": 24470 }, { "epoch": 1.458576707593277, "grad_norm": 3.345292091369629, "learning_rate": 1.7342812428581357e-05, "loss": 1.1997, "step": 24472 }, { "epoch": 1.458695911312433, "grad_norm": 3.0415608882904053, "learning_rate": 1.7335651903231974e-05, "loss": 1.2265, "step": 24474 }, { "epoch": 1.4588151150315891, "grad_norm": 2.9115188121795654, "learning_rate": 1.732849254639809e-05, "loss": 1.0576, "step": 24476 }, { "epoch": 1.458934318750745, "grad_norm": 3.595224618911743, "learning_rate": 1.732133435833581e-05, "loss": 1.1747, "step": 24478 }, { "epoch": 1.459053522469901, "grad_norm": 3.4745798110961914, "learning_rate": 1.7314177339301207e-05, "loss": 1.2603, "step": 24480 }, { "epoch": 1.459172726189057, "grad_norm": 2.962399482727051, "learning_rate": 1.730702148955031e-05, "loss": 1.1134, "step": 24482 }, { "epoch": 1.4592919299082132, "grad_norm": 2.968733310699463, "learning_rate": 1.729986680933912e-05, "loss": 1.0969, "step": 24484 }, { "epoch": 1.4594111336273692, "grad_norm": 2.8839609622955322, "learning_rate": 1.729271329892356e-05, "loss": 1.0917, "step": 24486 }, { "epoch": 1.4595303373465252, "grad_norm": 3.102890968322754, "learning_rate": 1.7285560958559576e-05, "loss": 1.1634, "step": 24488 }, { "epoch": 1.4596495410656813, "grad_norm": 3.335611343383789, "learning_rate": 1.727840978850298e-05, "loss": 1.0705, "step": 24490 }, { "epoch": 1.4597687447848373, "grad_norm": 3.526656150817871, "learning_rate": 1.727125978900964e-05, "loss": 1.2291, "step": 24492 }, { "epoch": 1.4598879485039933, "grad_norm": 3.3416152000427246, "learning_rate": 1.7264110960335317e-05, "loss": 1.0442, "step": 24494 }, { "epoch": 1.4600071522231493, "grad_norm": 3.1358203887939453, "learning_rate": 1.725696330273575e-05, "loss": 1.0454, "step": 24496 }, { "epoch": 1.4601263559423054, "grad_norm": 3.3659377098083496, "learning_rate": 1.7249816816466634e-05, "loss": 1.2559, "step": 24498 }, { "epoch": 1.4602455596614614, "grad_norm": 3.131089925765991, "learning_rate": 1.7242671501783627e-05, "loss": 1.3308, "step": 24500 }, { "epoch": 1.4603647633806176, "grad_norm": 3.6438379287719727, "learning_rate": 1.7235527358942333e-05, "loss": 1.1139, "step": 24502 }, { "epoch": 1.4604839670997736, "grad_norm": 3.719175338745117, "learning_rate": 1.722838438819831e-05, "loss": 1.2859, "step": 24504 }, { "epoch": 1.4606031708189295, "grad_norm": 3.2793285846710205, "learning_rate": 1.722124258980714e-05, "loss": 0.8959, "step": 24506 }, { "epoch": 1.4607223745380855, "grad_norm": 2.672532081604004, "learning_rate": 1.7214101964024258e-05, "loss": 0.9944, "step": 24508 }, { "epoch": 1.4608415782572417, "grad_norm": 3.3163058757781982, "learning_rate": 1.7206962511105107e-05, "loss": 1.2305, "step": 24510 }, { "epoch": 1.4609607819763977, "grad_norm": 3.2013800144195557, "learning_rate": 1.7199824231305145e-05, "loss": 1.1411, "step": 24512 }, { "epoch": 1.4610799856955536, "grad_norm": 3.3605730533599854, "learning_rate": 1.7192687124879653e-05, "loss": 1.1344, "step": 24514 }, { "epoch": 1.4611991894147098, "grad_norm": 3.316892385482788, "learning_rate": 1.7185551192084016e-05, "loss": 1.2652, "step": 24516 }, { "epoch": 1.4613183931338658, "grad_norm": 3.1227996349334717, "learning_rate": 1.7178416433173485e-05, "loss": 1.1128, "step": 24518 }, { "epoch": 1.4614375968530218, "grad_norm": 3.2976386547088623, "learning_rate": 1.717128284840329e-05, "loss": 1.1333, "step": 24520 }, { "epoch": 1.4615568005721777, "grad_norm": 3.2078793048858643, "learning_rate": 1.7164150438028636e-05, "loss": 1.0614, "step": 24522 }, { "epoch": 1.461676004291334, "grad_norm": 3.0269553661346436, "learning_rate": 1.7157019202304665e-05, "loss": 1.0578, "step": 24524 }, { "epoch": 1.46179520801049, "grad_norm": 3.5120794773101807, "learning_rate": 1.7149889141486486e-05, "loss": 1.1269, "step": 24526 }, { "epoch": 1.461914411729646, "grad_norm": 3.4164958000183105, "learning_rate": 1.7142760255829153e-05, "loss": 1.1411, "step": 24528 }, { "epoch": 1.462033615448802, "grad_norm": 3.28049898147583, "learning_rate": 1.713563254558774e-05, "loss": 1.225, "step": 24530 }, { "epoch": 1.462152819167958, "grad_norm": 3.4393982887268066, "learning_rate": 1.7128506011017165e-05, "loss": 1.0701, "step": 24532 }, { "epoch": 1.462272022887114, "grad_norm": 3.2617034912109375, "learning_rate": 1.712138065237241e-05, "loss": 1.1492, "step": 24534 }, { "epoch": 1.4623912266062702, "grad_norm": 3.1541810035705566, "learning_rate": 1.711425646990838e-05, "loss": 1.108, "step": 24536 }, { "epoch": 1.4625104303254262, "grad_norm": 3.000584363937378, "learning_rate": 1.7107133463879882e-05, "loss": 1.1576, "step": 24538 }, { "epoch": 1.4626296340445821, "grad_norm": 3.2780678272247314, "learning_rate": 1.7100011634541775e-05, "loss": 1.0748, "step": 24540 }, { "epoch": 1.4627488377637383, "grad_norm": 3.074033260345459, "learning_rate": 1.7092890982148817e-05, "loss": 0.9918, "step": 24542 }, { "epoch": 1.4628680414828943, "grad_norm": 2.9847025871276855, "learning_rate": 1.7085771506955743e-05, "loss": 1.0514, "step": 24544 }, { "epoch": 1.4629872452020503, "grad_norm": 3.0604093074798584, "learning_rate": 1.7078653209217237e-05, "loss": 1.089, "step": 24546 }, { "epoch": 1.4631064489212062, "grad_norm": 3.0653839111328125, "learning_rate": 1.7071536089187944e-05, "loss": 1.0888, "step": 24548 }, { "epoch": 1.4632256526403624, "grad_norm": 3.0580379962921143, "learning_rate": 1.706442014712245e-05, "loss": 1.1345, "step": 24550 }, { "epoch": 1.4633448563595184, "grad_norm": 3.205458164215088, "learning_rate": 1.7057305383275375e-05, "loss": 1.0819, "step": 24552 }, { "epoch": 1.4634640600786746, "grad_norm": 3.254105806350708, "learning_rate": 1.7050191797901177e-05, "loss": 1.1085, "step": 24554 }, { "epoch": 1.4635832637978305, "grad_norm": 3.144254446029663, "learning_rate": 1.7043079391254336e-05, "loss": 1.1751, "step": 24556 }, { "epoch": 1.4637024675169865, "grad_norm": 3.3069112300872803, "learning_rate": 1.7035968163589326e-05, "loss": 1.0434, "step": 24558 }, { "epoch": 1.4638216712361425, "grad_norm": 3.57327938079834, "learning_rate": 1.7028858115160516e-05, "loss": 1.205, "step": 24560 }, { "epoch": 1.4639408749552987, "grad_norm": 2.913797616958618, "learning_rate": 1.7021749246222267e-05, "loss": 1.2189, "step": 24562 }, { "epoch": 1.4640600786744546, "grad_norm": 3.384770631790161, "learning_rate": 1.7014641557028877e-05, "loss": 1.348, "step": 24564 }, { "epoch": 1.4641792823936106, "grad_norm": 3.4183411598205566, "learning_rate": 1.7007535047834615e-05, "loss": 1.1051, "step": 24566 }, { "epoch": 1.4642984861127668, "grad_norm": 3.286226511001587, "learning_rate": 1.700042971889371e-05, "loss": 1.1365, "step": 24568 }, { "epoch": 1.4644176898319228, "grad_norm": 2.684744358062744, "learning_rate": 1.6993325570460346e-05, "loss": 1.0879, "step": 24570 }, { "epoch": 1.4645368935510787, "grad_norm": 3.4617512226104736, "learning_rate": 1.698622260278865e-05, "loss": 1.1784, "step": 24572 }, { "epoch": 1.4646560972702347, "grad_norm": 3.638146162033081, "learning_rate": 1.697912081613271e-05, "loss": 1.2095, "step": 24574 }, { "epoch": 1.464775300989391, "grad_norm": 3.3006718158721924, "learning_rate": 1.6972020210746636e-05, "loss": 1.0539, "step": 24576 }, { "epoch": 1.4648945047085469, "grad_norm": 3.1180193424224854, "learning_rate": 1.696492078688437e-05, "loss": 1.1584, "step": 24578 }, { "epoch": 1.465013708427703, "grad_norm": 3.310835361480713, "learning_rate": 1.695782254479993e-05, "loss": 1.079, "step": 24580 }, { "epoch": 1.465132912146859, "grad_norm": 3.2466773986816406, "learning_rate": 1.6950725484747244e-05, "loss": 1.0469, "step": 24582 }, { "epoch": 1.465252115866015, "grad_norm": 3.361668109893799, "learning_rate": 1.6943629606980155e-05, "loss": 1.3023, "step": 24584 }, { "epoch": 1.465371319585171, "grad_norm": 3.289886474609375, "learning_rate": 1.693653491175255e-05, "loss": 1.0891, "step": 24586 }, { "epoch": 1.4654905233043272, "grad_norm": 3.3415145874023438, "learning_rate": 1.692944139931822e-05, "loss": 1.1037, "step": 24588 }, { "epoch": 1.4656097270234831, "grad_norm": 3.654315233230591, "learning_rate": 1.6922349069930914e-05, "loss": 1.2287, "step": 24590 }, { "epoch": 1.465728930742639, "grad_norm": 2.8551108837127686, "learning_rate": 1.6915257923844347e-05, "loss": 1.1234, "step": 24592 }, { "epoch": 1.4658481344617953, "grad_norm": 3.2842626571655273, "learning_rate": 1.6908167961312232e-05, "loss": 1.0566, "step": 24594 }, { "epoch": 1.4659673381809513, "grad_norm": 3.193775177001953, "learning_rate": 1.690107918258816e-05, "loss": 1.1523, "step": 24596 }, { "epoch": 1.4660865419001072, "grad_norm": 2.988945245742798, "learning_rate": 1.6893991587925708e-05, "loss": 1.1978, "step": 24598 }, { "epoch": 1.4662057456192632, "grad_norm": 3.4063642024993896, "learning_rate": 1.688690517757848e-05, "loss": 1.2467, "step": 24600 }, { "epoch": 1.4663249493384194, "grad_norm": 2.9949426651000977, "learning_rate": 1.6879819951799924e-05, "loss": 1.0639, "step": 24602 }, { "epoch": 1.4664441530575754, "grad_norm": 3.623444080352783, "learning_rate": 1.6872735910843542e-05, "loss": 1.3011, "step": 24604 }, { "epoch": 1.4665633567767316, "grad_norm": 3.0151398181915283, "learning_rate": 1.6865653054962736e-05, "loss": 1.1246, "step": 24606 }, { "epoch": 1.4666825604958875, "grad_norm": 3.3881633281707764, "learning_rate": 1.6858571384410893e-05, "loss": 1.0236, "step": 24608 }, { "epoch": 1.4668017642150435, "grad_norm": 3.361802101135254, "learning_rate": 1.685149089944134e-05, "loss": 1.3005, "step": 24610 }, { "epoch": 1.4669209679341995, "grad_norm": 3.3235836029052734, "learning_rate": 1.6844411600307375e-05, "loss": 1.1046, "step": 24612 }, { "epoch": 1.4670401716533557, "grad_norm": 3.10204815864563, "learning_rate": 1.683733348726225e-05, "loss": 1.1472, "step": 24614 }, { "epoch": 1.4671593753725116, "grad_norm": 3.1655092239379883, "learning_rate": 1.683025656055917e-05, "loss": 1.0994, "step": 24616 }, { "epoch": 1.4672785790916676, "grad_norm": 3.0088818073272705, "learning_rate": 1.6823180820451302e-05, "loss": 1.2339, "step": 24618 }, { "epoch": 1.4673977828108238, "grad_norm": 3.373913049697876, "learning_rate": 1.681610626719175e-05, "loss": 1.1096, "step": 24620 }, { "epoch": 1.4675169865299798, "grad_norm": 3.128727436065674, "learning_rate": 1.680903290103365e-05, "loss": 1.2242, "step": 24622 }, { "epoch": 1.4676361902491357, "grad_norm": 3.1732959747314453, "learning_rate": 1.680196072222997e-05, "loss": 1.157, "step": 24624 }, { "epoch": 1.4677553939682917, "grad_norm": 3.102588176727295, "learning_rate": 1.6794889731033758e-05, "loss": 1.0147, "step": 24626 }, { "epoch": 1.4678745976874479, "grad_norm": 3.391313314437866, "learning_rate": 1.6787819927697952e-05, "loss": 1.3309, "step": 24628 }, { "epoch": 1.4679938014066038, "grad_norm": 3.0091443061828613, "learning_rate": 1.678075131247546e-05, "loss": 1.1755, "step": 24630 }, { "epoch": 1.46811300512576, "grad_norm": 3.2300498485565186, "learning_rate": 1.6773683885619152e-05, "loss": 1.1636, "step": 24632 }, { "epoch": 1.468232208844916, "grad_norm": 3.0719826221466064, "learning_rate": 1.6766617647381853e-05, "loss": 1.1548, "step": 24634 }, { "epoch": 1.468351412564072, "grad_norm": 3.385754346847534, "learning_rate": 1.6759552598016353e-05, "loss": 1.1653, "step": 24636 }, { "epoch": 1.468470616283228, "grad_norm": 3.3298685550689697, "learning_rate": 1.6752488737775367e-05, "loss": 1.037, "step": 24638 }, { "epoch": 1.4685898200023841, "grad_norm": 3.353084087371826, "learning_rate": 1.6745426066911647e-05, "loss": 1.0328, "step": 24640 }, { "epoch": 1.46870902372154, "grad_norm": 3.180664300918579, "learning_rate": 1.6738364585677794e-05, "loss": 1.0941, "step": 24642 }, { "epoch": 1.468828227440696, "grad_norm": 3.3233532905578613, "learning_rate": 1.6731304294326434e-05, "loss": 1.2376, "step": 24644 }, { "epoch": 1.4689474311598523, "grad_norm": 2.7190043926239014, "learning_rate": 1.6724245193110178e-05, "loss": 1.0921, "step": 24646 }, { "epoch": 1.4690666348790082, "grad_norm": 3.3369243144989014, "learning_rate": 1.6717187282281493e-05, "loss": 1.0623, "step": 24648 }, { "epoch": 1.4691858385981642, "grad_norm": 3.2626445293426514, "learning_rate": 1.6710130562092907e-05, "loss": 1.1246, "step": 24650 }, { "epoch": 1.4693050423173202, "grad_norm": 3.478058099746704, "learning_rate": 1.6703075032796856e-05, "loss": 1.2071, "step": 24652 }, { "epoch": 1.4694242460364764, "grad_norm": 3.107576847076416, "learning_rate": 1.6696020694645732e-05, "loss": 1.2375, "step": 24654 }, { "epoch": 1.4695434497556323, "grad_norm": 3.3623852729797363, "learning_rate": 1.6688967547891876e-05, "loss": 1.2475, "step": 24656 }, { "epoch": 1.4696626534747885, "grad_norm": 3.2596654891967773, "learning_rate": 1.668191559278766e-05, "loss": 1.0721, "step": 24658 }, { "epoch": 1.4697818571939445, "grad_norm": 3.1583974361419678, "learning_rate": 1.6674864829585302e-05, "loss": 1.1624, "step": 24660 }, { "epoch": 1.4699010609131005, "grad_norm": 3.1289031505584717, "learning_rate": 1.6667815258537035e-05, "loss": 1.1013, "step": 24662 }, { "epoch": 1.4700202646322564, "grad_norm": 3.3609540462493896, "learning_rate": 1.6660766879895097e-05, "loss": 1.1561, "step": 24664 }, { "epoch": 1.4701394683514126, "grad_norm": 2.860196113586426, "learning_rate": 1.6653719693911557e-05, "loss": 1.0932, "step": 24666 }, { "epoch": 1.4702586720705686, "grad_norm": 2.8234243392944336, "learning_rate": 1.6646673700838578e-05, "loss": 1.1362, "step": 24668 }, { "epoch": 1.4703778757897246, "grad_norm": 2.953608751296997, "learning_rate": 1.6639628900928196e-05, "loss": 1.1099, "step": 24670 }, { "epoch": 1.4704970795088808, "grad_norm": 3.273021697998047, "learning_rate": 1.6632585294432423e-05, "loss": 1.1582, "step": 24672 }, { "epoch": 1.4706162832280367, "grad_norm": 3.0068657398223877, "learning_rate": 1.6625542881603252e-05, "loss": 1.1425, "step": 24674 }, { "epoch": 1.4707354869471927, "grad_norm": 3.5316498279571533, "learning_rate": 1.6618501662692594e-05, "loss": 1.3151, "step": 24676 }, { "epoch": 1.4708546906663487, "grad_norm": 3.093569040298462, "learning_rate": 1.6611461637952346e-05, "loss": 1.1245, "step": 24678 }, { "epoch": 1.4709738943855049, "grad_norm": 3.179098606109619, "learning_rate": 1.660442280763435e-05, "loss": 1.163, "step": 24680 }, { "epoch": 1.4710930981046608, "grad_norm": 3.0572562217712402, "learning_rate": 1.6597385171990416e-05, "loss": 1.14, "step": 24682 }, { "epoch": 1.471212301823817, "grad_norm": 3.214336633682251, "learning_rate": 1.6590348731272286e-05, "loss": 1.0739, "step": 24684 }, { "epoch": 1.471331505542973, "grad_norm": 2.9970760345458984, "learning_rate": 1.658331348573172e-05, "loss": 1.069, "step": 24686 }, { "epoch": 1.471450709262129, "grad_norm": 3.1303467750549316, "learning_rate": 1.6576279435620353e-05, "loss": 1.1952, "step": 24688 }, { "epoch": 1.471569912981285, "grad_norm": 3.1675150394439697, "learning_rate": 1.6569246581189807e-05, "loss": 1.2031, "step": 24690 }, { "epoch": 1.4716891167004411, "grad_norm": 2.8708431720733643, "learning_rate": 1.656221492269171e-05, "loss": 1.0606, "step": 24692 }, { "epoch": 1.471808320419597, "grad_norm": 3.364556312561035, "learning_rate": 1.6555184460377592e-05, "loss": 1.1934, "step": 24694 }, { "epoch": 1.471927524138753, "grad_norm": 3.1971397399902344, "learning_rate": 1.654815519449896e-05, "loss": 1.0671, "step": 24696 }, { "epoch": 1.4720467278579092, "grad_norm": 3.1130802631378174, "learning_rate": 1.6541127125307266e-05, "loss": 1.1961, "step": 24698 }, { "epoch": 1.4721659315770652, "grad_norm": 3.133693218231201, "learning_rate": 1.6534100253053937e-05, "loss": 1.0835, "step": 24700 }, { "epoch": 1.4722851352962212, "grad_norm": 3.5157060623168945, "learning_rate": 1.652707457799034e-05, "loss": 1.1309, "step": 24702 }, { "epoch": 1.4724043390153772, "grad_norm": 3.2665903568267822, "learning_rate": 1.652005010036781e-05, "loss": 1.2304, "step": 24704 }, { "epoch": 1.4725235427345333, "grad_norm": 3.0279746055603027, "learning_rate": 1.6513026820437645e-05, "loss": 1.0585, "step": 24706 }, { "epoch": 1.4726427464536893, "grad_norm": 3.32724928855896, "learning_rate": 1.6506004738451064e-05, "loss": 1.1355, "step": 24708 }, { "epoch": 1.4727619501728455, "grad_norm": 3.239192485809326, "learning_rate": 1.6498983854659326e-05, "loss": 1.1125, "step": 24710 }, { "epoch": 1.4728811538920015, "grad_norm": 3.270246744155884, "learning_rate": 1.649196416931353e-05, "loss": 1.1205, "step": 24712 }, { "epoch": 1.4730003576111574, "grad_norm": 3.2654287815093994, "learning_rate": 1.6484945682664833e-05, "loss": 1.0975, "step": 24714 }, { "epoch": 1.4731195613303134, "grad_norm": 3.3927714824676514, "learning_rate": 1.6477928394964298e-05, "loss": 0.9951, "step": 24716 }, { "epoch": 1.4732387650494696, "grad_norm": 3.462275981903076, "learning_rate": 1.6470912306462966e-05, "loss": 1.1401, "step": 24718 }, { "epoch": 1.4733579687686256, "grad_norm": 3.750941276550293, "learning_rate": 1.646389741741181e-05, "loss": 1.12, "step": 24720 }, { "epoch": 1.4734771724877815, "grad_norm": 3.5338001251220703, "learning_rate": 1.6456883728061795e-05, "loss": 1.1597, "step": 24722 }, { "epoch": 1.4735963762069377, "grad_norm": 3.1858580112457275, "learning_rate": 1.6449871238663812e-05, "loss": 1.1198, "step": 24724 }, { "epoch": 1.4737155799260937, "grad_norm": 2.9815833568573, "learning_rate": 1.644285994946871e-05, "loss": 1.1564, "step": 24726 }, { "epoch": 1.4738347836452497, "grad_norm": 3.599337100982666, "learning_rate": 1.643584986072735e-05, "loss": 1.1934, "step": 24728 }, { "epoch": 1.4739539873644056, "grad_norm": 3.530731678009033, "learning_rate": 1.642884097269045e-05, "loss": 1.0699, "step": 24730 }, { "epoch": 1.4740731910835618, "grad_norm": 2.7953875064849854, "learning_rate": 1.6421833285608786e-05, "loss": 1.1575, "step": 24732 }, { "epoch": 1.4741923948027178, "grad_norm": 3.7952139377593994, "learning_rate": 1.641482679973305e-05, "loss": 1.2173, "step": 24734 }, { "epoch": 1.474311598521874, "grad_norm": 3.2601540088653564, "learning_rate": 1.640782151531384e-05, "loss": 1.0933, "step": 24736 }, { "epoch": 1.47443080224103, "grad_norm": 3.3843772411346436, "learning_rate": 1.6400817432601793e-05, "loss": 1.1211, "step": 24738 }, { "epoch": 1.474550005960186, "grad_norm": 3.264902353286743, "learning_rate": 1.6393814551847475e-05, "loss": 0.9896, "step": 24740 }, { "epoch": 1.474669209679342, "grad_norm": 3.4179623126983643, "learning_rate": 1.6386812873301394e-05, "loss": 1.0818, "step": 24742 }, { "epoch": 1.474788413398498, "grad_norm": 3.333345890045166, "learning_rate": 1.637981239721402e-05, "loss": 0.9404, "step": 24744 }, { "epoch": 1.474907617117654, "grad_norm": 3.359894275665283, "learning_rate": 1.6372813123835786e-05, "loss": 1.2023, "step": 24746 }, { "epoch": 1.47502682083681, "grad_norm": 3.3898229598999023, "learning_rate": 1.6365815053417082e-05, "loss": 1.2305, "step": 24748 }, { "epoch": 1.4751460245559662, "grad_norm": 3.560046911239624, "learning_rate": 1.6358818186208246e-05, "loss": 1.3037, "step": 24750 }, { "epoch": 1.4752652282751222, "grad_norm": 3.148075819015503, "learning_rate": 1.6351822522459593e-05, "loss": 1.1552, "step": 24752 }, { "epoch": 1.4753844319942782, "grad_norm": 2.8653738498687744, "learning_rate": 1.6344828062421358e-05, "loss": 1.0459, "step": 24754 }, { "epoch": 1.4755036357134341, "grad_norm": 3.1593434810638428, "learning_rate": 1.6337834806343783e-05, "loss": 1.1239, "step": 24756 }, { "epoch": 1.4756228394325903, "grad_norm": 3.1392974853515625, "learning_rate": 1.6330842754477038e-05, "loss": 1.0831, "step": 24758 }, { "epoch": 1.4757420431517463, "grad_norm": 3.3319101333618164, "learning_rate": 1.6323851907071243e-05, "loss": 1.1372, "step": 24760 }, { "epoch": 1.4758612468709025, "grad_norm": 3.2209067344665527, "learning_rate": 1.6316862264376488e-05, "loss": 1.2306, "step": 24762 }, { "epoch": 1.4759804505900584, "grad_norm": 3.29634165763855, "learning_rate": 1.6309873826642808e-05, "loss": 1.1329, "step": 24764 }, { "epoch": 1.4760996543092144, "grad_norm": 3.4953548908233643, "learning_rate": 1.630288659412022e-05, "loss": 1.0543, "step": 24766 }, { "epoch": 1.4762188580283704, "grad_norm": 3.199498414993286, "learning_rate": 1.6295900567058662e-05, "loss": 1.1577, "step": 24768 }, { "epoch": 1.4763380617475266, "grad_norm": 3.2886834144592285, "learning_rate": 1.6288915745708068e-05, "loss": 0.983, "step": 24770 }, { "epoch": 1.4764572654666825, "grad_norm": 3.386744976043701, "learning_rate": 1.6281932130318277e-05, "loss": 1.0773, "step": 24772 }, { "epoch": 1.4765764691858387, "grad_norm": 3.1846776008605957, "learning_rate": 1.6274949721139175e-05, "loss": 1.1798, "step": 24774 }, { "epoch": 1.4766956729049947, "grad_norm": 3.450474500656128, "learning_rate": 1.6267968518420478e-05, "loss": 1.1438, "step": 24776 }, { "epoch": 1.4768148766241507, "grad_norm": 3.2902536392211914, "learning_rate": 1.6260988522411974e-05, "loss": 1.3271, "step": 24778 }, { "epoch": 1.4769340803433066, "grad_norm": 3.3039331436157227, "learning_rate": 1.6254009733363364e-05, "loss": 1.1478, "step": 24780 }, { "epoch": 1.4770532840624626, "grad_norm": 3.4735381603240967, "learning_rate": 1.6247032151524257e-05, "loss": 1.1527, "step": 24782 }, { "epoch": 1.4771724877816188, "grad_norm": 3.0946731567382812, "learning_rate": 1.6240055777144307e-05, "loss": 1.1733, "step": 24784 }, { "epoch": 1.4772916915007748, "grad_norm": 3.525228500366211, "learning_rate": 1.623308061047307e-05, "loss": 1.2523, "step": 24786 }, { "epoch": 1.477410895219931, "grad_norm": 3.5183420181274414, "learning_rate": 1.6226106651760076e-05, "loss": 1.2686, "step": 24788 }, { "epoch": 1.477530098939087, "grad_norm": 2.952183723449707, "learning_rate": 1.621913390125478e-05, "loss": 1.0893, "step": 24790 }, { "epoch": 1.477649302658243, "grad_norm": 3.3970894813537598, "learning_rate": 1.6212162359206683e-05, "loss": 1.2483, "step": 24792 }, { "epoch": 1.4777685063773989, "grad_norm": 3.3194024562835693, "learning_rate": 1.6205192025865124e-05, "loss": 1.1053, "step": 24794 }, { "epoch": 1.477887710096555, "grad_norm": 2.9674124717712402, "learning_rate": 1.6198222901479453e-05, "loss": 1.118, "step": 24796 }, { "epoch": 1.478006913815711, "grad_norm": 3.22308087348938, "learning_rate": 1.619125498629904e-05, "loss": 1.2653, "step": 24798 }, { "epoch": 1.4781261175348672, "grad_norm": 3.0872642993927, "learning_rate": 1.6184288280573078e-05, "loss": 1.0776, "step": 24800 }, { "epoch": 1.4782453212540232, "grad_norm": 3.1745824813842773, "learning_rate": 1.6177322784550835e-05, "loss": 1.1865, "step": 24802 }, { "epoch": 1.4783645249731792, "grad_norm": 3.378934860229492, "learning_rate": 1.6170358498481487e-05, "loss": 1.0986, "step": 24804 }, { "epoch": 1.4784837286923351, "grad_norm": 3.1455113887786865, "learning_rate": 1.6163395422614158e-05, "loss": 1.0119, "step": 24806 }, { "epoch": 1.478602932411491, "grad_norm": 2.478346347808838, "learning_rate": 1.615643355719794e-05, "loss": 1.0879, "step": 24808 }, { "epoch": 1.4787221361306473, "grad_norm": 2.9792373180389404, "learning_rate": 1.6149472902481894e-05, "loss": 1.0304, "step": 24810 }, { "epoch": 1.4788413398498033, "grad_norm": 3.572793483734131, "learning_rate": 1.6142513458715018e-05, "loss": 1.1584, "step": 24812 }, { "epoch": 1.4789605435689595, "grad_norm": 3.2746224403381348, "learning_rate": 1.6135555226146283e-05, "loss": 1.0543, "step": 24814 }, { "epoch": 1.4790797472881154, "grad_norm": 3.233834981918335, "learning_rate": 1.6128598205024597e-05, "loss": 1.1069, "step": 24816 }, { "epoch": 1.4791989510072714, "grad_norm": 3.249610662460327, "learning_rate": 1.6121642395598833e-05, "loss": 1.2719, "step": 24818 }, { "epoch": 1.4793181547264274, "grad_norm": 3.7139298915863037, "learning_rate": 1.611468779811786e-05, "loss": 1.2379, "step": 24820 }, { "epoch": 1.4794373584455836, "grad_norm": 3.103663444519043, "learning_rate": 1.6107734412830433e-05, "loss": 1.0698, "step": 24822 }, { "epoch": 1.4795565621647395, "grad_norm": 3.205341339111328, "learning_rate": 1.6100782239985285e-05, "loss": 1.1604, "step": 24824 }, { "epoch": 1.4796757658838957, "grad_norm": 2.7741217613220215, "learning_rate": 1.609383127983116e-05, "loss": 1.0549, "step": 24826 }, { "epoch": 1.4797949696030517, "grad_norm": 3.199512243270874, "learning_rate": 1.6086881532616697e-05, "loss": 1.1164, "step": 24828 }, { "epoch": 1.4799141733222076, "grad_norm": 3.2203726768493652, "learning_rate": 1.6079932998590512e-05, "loss": 1.2024, "step": 24830 }, { "epoch": 1.4800333770413636, "grad_norm": 3.682934522628784, "learning_rate": 1.6072985678001186e-05, "loss": 1.1747, "step": 24832 }, { "epoch": 1.4801525807605198, "grad_norm": 3.096900224685669, "learning_rate": 1.606603957109724e-05, "loss": 1.0747, "step": 24834 }, { "epoch": 1.4802717844796758, "grad_norm": 3.1701812744140625, "learning_rate": 1.6059094678127144e-05, "loss": 1.113, "step": 24836 }, { "epoch": 1.4803909881988317, "grad_norm": 2.916015386581421, "learning_rate": 1.6052150999339393e-05, "loss": 1.1482, "step": 24838 }, { "epoch": 1.480510191917988, "grad_norm": 3.137519121170044, "learning_rate": 1.6045208534982335e-05, "loss": 1.1412, "step": 24840 }, { "epoch": 1.480629395637144, "grad_norm": 2.9895031452178955, "learning_rate": 1.603826728530433e-05, "loss": 1.1042, "step": 24842 }, { "epoch": 1.4807485993562999, "grad_norm": 3.0715315341949463, "learning_rate": 1.603132725055373e-05, "loss": 0.9795, "step": 24844 }, { "epoch": 1.4808678030754558, "grad_norm": 2.793079137802124, "learning_rate": 1.6024388430978748e-05, "loss": 1.0255, "step": 24846 }, { "epoch": 1.480987006794612, "grad_norm": 2.7869043350219727, "learning_rate": 1.6017450826827658e-05, "loss": 1.0872, "step": 24848 }, { "epoch": 1.481106210513768, "grad_norm": 3.1526377201080322, "learning_rate": 1.601051443834861e-05, "loss": 1.0969, "step": 24850 }, { "epoch": 1.4812254142329242, "grad_norm": 3.10449481010437, "learning_rate": 1.6003579265789763e-05, "loss": 1.0812, "step": 24852 }, { "epoch": 1.4813446179520802, "grad_norm": 2.9868717193603516, "learning_rate": 1.5996645309399206e-05, "loss": 1.1259, "step": 24854 }, { "epoch": 1.4814638216712361, "grad_norm": 3.3476064205169678, "learning_rate": 1.598971256942498e-05, "loss": 1.1285, "step": 24856 }, { "epoch": 1.481583025390392, "grad_norm": 2.9506661891937256, "learning_rate": 1.5982781046115103e-05, "loss": 1.0973, "step": 24858 }, { "epoch": 1.4817022291095483, "grad_norm": 3.6165506839752197, "learning_rate": 1.5975850739717522e-05, "loss": 1.2042, "step": 24860 }, { "epoch": 1.4818214328287043, "grad_norm": 3.3212242126464844, "learning_rate": 1.5968921650480205e-05, "loss": 1.1876, "step": 24862 }, { "epoch": 1.4819406365478602, "grad_norm": 3.2192513942718506, "learning_rate": 1.5961993778650963e-05, "loss": 1.2507, "step": 24864 }, { "epoch": 1.4820598402670164, "grad_norm": 3.4133434295654297, "learning_rate": 1.595506712447768e-05, "loss": 1.1247, "step": 24866 }, { "epoch": 1.4821790439861724, "grad_norm": 3.2962019443511963, "learning_rate": 1.5948141688208147e-05, "loss": 1.09, "step": 24868 }, { "epoch": 1.4822982477053284, "grad_norm": 3.395543336868286, "learning_rate": 1.5941217470090057e-05, "loss": 1.1155, "step": 24870 }, { "epoch": 1.4824174514244843, "grad_norm": 3.2959210872650146, "learning_rate": 1.593429447037117e-05, "loss": 1.0685, "step": 24872 }, { "epoch": 1.4825366551436405, "grad_norm": 2.8138067722320557, "learning_rate": 1.592737268929912e-05, "loss": 0.9678, "step": 24874 }, { "epoch": 1.4826558588627965, "grad_norm": 2.952629804611206, "learning_rate": 1.5920452127121525e-05, "loss": 1.052, "step": 24876 }, { "epoch": 1.4827750625819527, "grad_norm": 3.482356309890747, "learning_rate": 1.5913532784085965e-05, "loss": 1.2632, "step": 24878 }, { "epoch": 1.4828942663011087, "grad_norm": 2.7613017559051514, "learning_rate": 1.590661466043996e-05, "loss": 1.2216, "step": 24880 }, { "epoch": 1.4830134700202646, "grad_norm": 3.2500834465026855, "learning_rate": 1.5899697756430988e-05, "loss": 1.2868, "step": 24882 }, { "epoch": 1.4831326737394206, "grad_norm": 3.337594509124756, "learning_rate": 1.5892782072306522e-05, "loss": 1.0906, "step": 24884 }, { "epoch": 1.4832518774585768, "grad_norm": 2.956756353378296, "learning_rate": 1.5885867608313926e-05, "loss": 1.0418, "step": 24886 }, { "epoch": 1.4833710811777328, "grad_norm": 3.3362042903900146, "learning_rate": 1.5878954364700555e-05, "loss": 1.136, "step": 24888 }, { "epoch": 1.4834902848968887, "grad_norm": 3.2661936283111572, "learning_rate": 1.587204234171374e-05, "loss": 1.0203, "step": 24890 }, { "epoch": 1.483609488616045, "grad_norm": 2.952348232269287, "learning_rate": 1.5865131539600743e-05, "loss": 1.2561, "step": 24892 }, { "epoch": 1.4837286923352009, "grad_norm": 3.0990161895751953, "learning_rate": 1.5858221958608776e-05, "loss": 1.1978, "step": 24894 }, { "epoch": 1.4838478960543569, "grad_norm": 3.2655141353607178, "learning_rate": 1.5851313598985028e-05, "loss": 1.1655, "step": 24896 }, { "epoch": 1.4839670997735128, "grad_norm": 3.165548324584961, "learning_rate": 1.584440646097663e-05, "loss": 1.123, "step": 24898 }, { "epoch": 1.484086303492669, "grad_norm": 2.700371265411377, "learning_rate": 1.5837500544830676e-05, "loss": 1.0083, "step": 24900 }, { "epoch": 1.484205507211825, "grad_norm": 2.9751758575439453, "learning_rate": 1.583059585079421e-05, "loss": 1.2588, "step": 24902 }, { "epoch": 1.4843247109309812, "grad_norm": 3.4719531536102295, "learning_rate": 1.582369237911424e-05, "loss": 1.1221, "step": 24904 }, { "epoch": 1.4844439146501371, "grad_norm": 3.285109758377075, "learning_rate": 1.5816790130037718e-05, "loss": 1.0457, "step": 24906 }, { "epoch": 1.484563118369293, "grad_norm": 3.306415319442749, "learning_rate": 1.580988910381159e-05, "loss": 1.2743, "step": 24908 }, { "epoch": 1.484682322088449, "grad_norm": 3.602668285369873, "learning_rate": 1.580298930068269e-05, "loss": 1.1066, "step": 24910 }, { "epoch": 1.4848015258076053, "grad_norm": 3.4040610790252686, "learning_rate": 1.5796090720897878e-05, "loss": 1.2067, "step": 24912 }, { "epoch": 1.4849207295267612, "grad_norm": 3.352262496948242, "learning_rate": 1.5789193364703942e-05, "loss": 0.929, "step": 24914 }, { "epoch": 1.4850399332459172, "grad_norm": 3.1359236240386963, "learning_rate": 1.5782297232347576e-05, "loss": 1.2397, "step": 24916 }, { "epoch": 1.4851591369650734, "grad_norm": 3.232006788253784, "learning_rate": 1.5775402324075534e-05, "loss": 1.1816, "step": 24918 }, { "epoch": 1.4852783406842294, "grad_norm": 3.4049253463745117, "learning_rate": 1.5768508640134445e-05, "loss": 1.0867, "step": 24920 }, { "epoch": 1.4853975444033853, "grad_norm": 3.1407432556152344, "learning_rate": 1.576161618077092e-05, "loss": 1.2026, "step": 24922 }, { "epoch": 1.4855167481225413, "grad_norm": 3.435264825820923, "learning_rate": 1.5754724946231526e-05, "loss": 1.2117, "step": 24924 }, { "epoch": 1.4856359518416975, "grad_norm": 3.055248260498047, "learning_rate": 1.574783493676281e-05, "loss": 1.1559, "step": 24926 }, { "epoch": 1.4857551555608535, "grad_norm": 3.076519012451172, "learning_rate": 1.574094615261122e-05, "loss": 1.1858, "step": 24928 }, { "epoch": 1.4858743592800097, "grad_norm": 3.1704351902008057, "learning_rate": 1.5734058594023187e-05, "loss": 1.187, "step": 24930 }, { "epoch": 1.4859935629991656, "grad_norm": 2.9642398357391357, "learning_rate": 1.5727172261245148e-05, "loss": 1.1139, "step": 24932 }, { "epoch": 1.4861127667183216, "grad_norm": 2.85117769241333, "learning_rate": 1.5720287154523387e-05, "loss": 1.1055, "step": 24934 }, { "epoch": 1.4862319704374776, "grad_norm": 2.8039727210998535, "learning_rate": 1.571340327410426e-05, "loss": 1.0661, "step": 24936 }, { "epoch": 1.4863511741566338, "grad_norm": 3.136503219604492, "learning_rate": 1.5706520620234005e-05, "loss": 1.1138, "step": 24938 }, { "epoch": 1.4864703778757897, "grad_norm": 3.1982645988464355, "learning_rate": 1.5699639193158845e-05, "loss": 1.2072, "step": 24940 }, { "epoch": 1.4865895815949457, "grad_norm": 2.999000310897827, "learning_rate": 1.5692758993124946e-05, "loss": 1.011, "step": 24942 }, { "epoch": 1.4867087853141019, "grad_norm": 3.200948476791382, "learning_rate": 1.568588002037844e-05, "loss": 1.3012, "step": 24944 }, { "epoch": 1.4868279890332579, "grad_norm": 3.453842878341675, "learning_rate": 1.567900227516541e-05, "loss": 1.1824, "step": 24946 }, { "epoch": 1.4869471927524138, "grad_norm": 3.4418601989746094, "learning_rate": 1.5672125757731897e-05, "loss": 1.2589, "step": 24948 }, { "epoch": 1.4870663964715698, "grad_norm": 3.107330799102783, "learning_rate": 1.5665250468323894e-05, "loss": 1.2384, "step": 24950 }, { "epoch": 1.487185600190726, "grad_norm": 3.0898191928863525, "learning_rate": 1.565837640718735e-05, "loss": 1.09, "step": 24952 }, { "epoch": 1.487304803909882, "grad_norm": 3.3638529777526855, "learning_rate": 1.5651503574568187e-05, "loss": 1.1375, "step": 24954 }, { "epoch": 1.4874240076290381, "grad_norm": 3.364363670349121, "learning_rate": 1.5644631970712264e-05, "loss": 1.0822, "step": 24956 }, { "epoch": 1.4875432113481941, "grad_norm": 2.7823026180267334, "learning_rate": 1.5637761595865403e-05, "loss": 1.0582, "step": 24958 }, { "epoch": 1.48766241506735, "grad_norm": 3.2932558059692383, "learning_rate": 1.563089245027338e-05, "loss": 1.174, "step": 24960 }, { "epoch": 1.487781618786506, "grad_norm": 3.440803289413452, "learning_rate": 1.5624024534181925e-05, "loss": 1.1069, "step": 24962 }, { "epoch": 1.4879008225056622, "grad_norm": 3.3052308559417725, "learning_rate": 1.5617157847836733e-05, "loss": 1.1703, "step": 24964 }, { "epoch": 1.4880200262248182, "grad_norm": 3.4731674194335938, "learning_rate": 1.561029239148344e-05, "loss": 1.2266, "step": 24966 }, { "epoch": 1.4881392299439742, "grad_norm": 2.8999550342559814, "learning_rate": 1.5603428165367652e-05, "loss": 1.0241, "step": 24968 }, { "epoch": 1.4882584336631304, "grad_norm": 3.4242870807647705, "learning_rate": 1.5596565169734906e-05, "loss": 1.112, "step": 24970 }, { "epoch": 1.4883776373822863, "grad_norm": 2.748892068862915, "learning_rate": 1.558970340483077e-05, "loss": 1.0607, "step": 24972 }, { "epoch": 1.4884968411014423, "grad_norm": 3.045928478240967, "learning_rate": 1.558284287090066e-05, "loss": 1.145, "step": 24974 }, { "epoch": 1.4886160448205983, "grad_norm": 3.139896869659424, "learning_rate": 1.557598356819e-05, "loss": 1.146, "step": 24976 }, { "epoch": 1.4887352485397545, "grad_norm": 3.1902246475219727, "learning_rate": 1.5569125496944226e-05, "loss": 1.1356, "step": 24978 }, { "epoch": 1.4888544522589104, "grad_norm": 3.073464870452881, "learning_rate": 1.55622686574086e-05, "loss": 1.2137, "step": 24980 }, { "epoch": 1.4889736559780666, "grad_norm": 3.141000986099243, "learning_rate": 1.5555413049828467e-05, "loss": 0.9625, "step": 24982 }, { "epoch": 1.4890928596972226, "grad_norm": 3.5760011672973633, "learning_rate": 1.554855867444906e-05, "loss": 1.0636, "step": 24984 }, { "epoch": 1.4892120634163786, "grad_norm": 3.1647720336914062, "learning_rate": 1.5541705531515588e-05, "loss": 1.0201, "step": 24986 }, { "epoch": 1.4893312671355345, "grad_norm": 3.4530348777770996, "learning_rate": 1.5534853621273206e-05, "loss": 1.1799, "step": 24988 }, { "epoch": 1.4894504708546907, "grad_norm": 3.197141170501709, "learning_rate": 1.5528002943967025e-05, "loss": 0.9962, "step": 24990 }, { "epoch": 1.4895696745738467, "grad_norm": 3.055429458618164, "learning_rate": 1.552115349984213e-05, "loss": 1.1492, "step": 24992 }, { "epoch": 1.4896888782930027, "grad_norm": 3.41019868850708, "learning_rate": 1.551430528914352e-05, "loss": 1.2557, "step": 24994 }, { "epoch": 1.4898080820121589, "grad_norm": 3.182501792907715, "learning_rate": 1.550745831211624e-05, "loss": 1.2918, "step": 24996 }, { "epoch": 1.4899272857313148, "grad_norm": 3.3065719604492188, "learning_rate": 1.5500612569005158e-05, "loss": 1.0841, "step": 24998 }, { "epoch": 1.4900464894504708, "grad_norm": 2.9295928478240967, "learning_rate": 1.5493768060055215e-05, "loss": 1.0573, "step": 25000 }, { "epoch": 1.4901656931696268, "grad_norm": 3.041499137878418, "learning_rate": 1.548692478551125e-05, "loss": 1.1728, "step": 25002 }, { "epoch": 1.490284896888783, "grad_norm": 3.139780044555664, "learning_rate": 1.5480082745618074e-05, "loss": 0.9997, "step": 25004 }, { "epoch": 1.490404100607939, "grad_norm": 2.992233991622925, "learning_rate": 1.5473241940620447e-05, "loss": 0.9745, "step": 25006 }, { "epoch": 1.4905233043270951, "grad_norm": 3.069983959197998, "learning_rate": 1.546640237076309e-05, "loss": 1.0841, "step": 25008 }, { "epoch": 1.490642508046251, "grad_norm": 3.101698160171509, "learning_rate": 1.5459564036290668e-05, "loss": 1.0797, "step": 25010 }, { "epoch": 1.490761711765407, "grad_norm": 3.441272020339966, "learning_rate": 1.5452726937447826e-05, "loss": 1.1061, "step": 25012 }, { "epoch": 1.490880915484563, "grad_norm": 3.4567348957061768, "learning_rate": 1.5445891074479142e-05, "loss": 1.2661, "step": 25014 }, { "epoch": 1.4910001192037192, "grad_norm": 3.275789976119995, "learning_rate": 1.5439056447629142e-05, "loss": 1.1015, "step": 25016 }, { "epoch": 1.4911193229228752, "grad_norm": 2.80531644821167, "learning_rate": 1.5432223057142375e-05, "loss": 1.0584, "step": 25018 }, { "epoch": 1.4912385266420312, "grad_norm": 3.213123083114624, "learning_rate": 1.5425390903263242e-05, "loss": 1.1166, "step": 25020 }, { "epoch": 1.4913577303611874, "grad_norm": 3.2090840339660645, "learning_rate": 1.5418559986236164e-05, "loss": 1.0527, "step": 25022 }, { "epoch": 1.4914769340803433, "grad_norm": 3.454220771789551, "learning_rate": 1.541173030630552e-05, "loss": 1.1677, "step": 25024 }, { "epoch": 1.4915961377994993, "grad_norm": 3.2994167804718018, "learning_rate": 1.540490186371564e-05, "loss": 1.1292, "step": 25026 }, { "epoch": 1.4917153415186553, "grad_norm": 3.3335018157958984, "learning_rate": 1.539807465871077e-05, "loss": 1.1259, "step": 25028 }, { "epoch": 1.4918345452378114, "grad_norm": 2.5234715938568115, "learning_rate": 1.539124869153517e-05, "loss": 0.9837, "step": 25030 }, { "epoch": 1.4919537489569674, "grad_norm": 3.5673060417175293, "learning_rate": 1.5384423962433015e-05, "loss": 1.2145, "step": 25032 }, { "epoch": 1.4920729526761236, "grad_norm": 3.0307750701904297, "learning_rate": 1.5377600471648447e-05, "loss": 1.0316, "step": 25034 }, { "epoch": 1.4921921563952796, "grad_norm": 3.428385019302368, "learning_rate": 1.5370778219425567e-05, "loss": 1.0318, "step": 25036 }, { "epoch": 1.4923113601144355, "grad_norm": 3.254192352294922, "learning_rate": 1.5363957206008438e-05, "loss": 1.142, "step": 25038 }, { "epoch": 1.4924305638335915, "grad_norm": 3.4259884357452393, "learning_rate": 1.535713743164105e-05, "loss": 1.0758, "step": 25040 }, { "epoch": 1.4925497675527477, "grad_norm": 3.119056463241577, "learning_rate": 1.5350318896567417e-05, "loss": 1.1031, "step": 25042 }, { "epoch": 1.4926689712719037, "grad_norm": 3.0084259510040283, "learning_rate": 1.5343501601031397e-05, "loss": 1.1228, "step": 25044 }, { "epoch": 1.4927881749910596, "grad_norm": 3.351156234741211, "learning_rate": 1.533668554527692e-05, "loss": 1.1287, "step": 25046 }, { "epoch": 1.4929073787102158, "grad_norm": 3.3159496784210205, "learning_rate": 1.5329870729547812e-05, "loss": 1.1303, "step": 25048 }, { "epoch": 1.4930265824293718, "grad_norm": 3.037456512451172, "learning_rate": 1.5323057154087817e-05, "loss": 1.1134, "step": 25050 }, { "epoch": 1.4931457861485278, "grad_norm": 3.1070947647094727, "learning_rate": 1.5316244819140736e-05, "loss": 1.0647, "step": 25052 }, { "epoch": 1.4932649898676837, "grad_norm": 3.2671825885772705, "learning_rate": 1.530943372495024e-05, "loss": 1.1175, "step": 25054 }, { "epoch": 1.49338419358684, "grad_norm": 2.96730637550354, "learning_rate": 1.530262387176e-05, "loss": 0.9733, "step": 25056 }, { "epoch": 1.493503397305996, "grad_norm": 2.9573121070861816, "learning_rate": 1.5295815259813595e-05, "loss": 1.0802, "step": 25058 }, { "epoch": 1.493622601025152, "grad_norm": 2.8603293895721436, "learning_rate": 1.528900788935465e-05, "loss": 1.0783, "step": 25060 }, { "epoch": 1.493741804744308, "grad_norm": 2.7928974628448486, "learning_rate": 1.528220176062663e-05, "loss": 1.1245, "step": 25062 }, { "epoch": 1.493861008463464, "grad_norm": 2.9842846393585205, "learning_rate": 1.5275396873873048e-05, "loss": 1.0465, "step": 25064 }, { "epoch": 1.49398021218262, "grad_norm": 2.816925048828125, "learning_rate": 1.526859322933735e-05, "loss": 1.0769, "step": 25066 }, { "epoch": 1.4940994159017762, "grad_norm": 3.529047727584839, "learning_rate": 1.5261790827262866e-05, "loss": 1.2418, "step": 25068 }, { "epoch": 1.4942186196209322, "grad_norm": 3.1515438556671143, "learning_rate": 1.5254989667893004e-05, "loss": 1.124, "step": 25070 }, { "epoch": 1.4943378233400881, "grad_norm": 2.9163529872894287, "learning_rate": 1.5248189751471036e-05, "loss": 1.1577, "step": 25072 }, { "epoch": 1.4944570270592443, "grad_norm": 3.688162088394165, "learning_rate": 1.5241391078240225e-05, "loss": 1.0718, "step": 25074 }, { "epoch": 1.4945762307784003, "grad_norm": 3.20843768119812, "learning_rate": 1.523459364844378e-05, "loss": 1.0185, "step": 25076 }, { "epoch": 1.4946954344975563, "grad_norm": 3.4555981159210205, "learning_rate": 1.522779746232486e-05, "loss": 1.2331, "step": 25078 }, { "epoch": 1.4948146382167122, "grad_norm": 3.315303087234497, "learning_rate": 1.522100252012661e-05, "loss": 1.0351, "step": 25080 }, { "epoch": 1.4949338419358684, "grad_norm": 3.1554086208343506, "learning_rate": 1.5214208822092085e-05, "loss": 1.1778, "step": 25082 }, { "epoch": 1.4950530456550244, "grad_norm": 3.4979681968688965, "learning_rate": 1.5207416368464333e-05, "loss": 1.2311, "step": 25084 }, { "epoch": 1.4951722493741806, "grad_norm": 3.1233465671539307, "learning_rate": 1.520062515948632e-05, "loss": 1.143, "step": 25086 }, { "epoch": 1.4952914530933366, "grad_norm": 2.947366714477539, "learning_rate": 1.5193835195401029e-05, "loss": 0.983, "step": 25088 }, { "epoch": 1.4954106568124925, "grad_norm": 3.37168288230896, "learning_rate": 1.5187046476451345e-05, "loss": 1.1302, "step": 25090 }, { "epoch": 1.4955298605316485, "grad_norm": 3.7627265453338623, "learning_rate": 1.5180259002880109e-05, "loss": 1.0722, "step": 25092 }, { "epoch": 1.4956490642508047, "grad_norm": 3.274928331375122, "learning_rate": 1.5173472774930153e-05, "loss": 1.118, "step": 25094 }, { "epoch": 1.4957682679699607, "grad_norm": 3.024886131286621, "learning_rate": 1.516668779284423e-05, "loss": 1.2992, "step": 25096 }, { "epoch": 1.4958874716891166, "grad_norm": 3.609032392501831, "learning_rate": 1.5159904056865065e-05, "loss": 1.0477, "step": 25098 }, { "epoch": 1.4960066754082728, "grad_norm": 3.5281920433044434, "learning_rate": 1.5153121567235335e-05, "loss": 1.2506, "step": 25100 }, { "epoch": 1.4961258791274288, "grad_norm": 3.1638333797454834, "learning_rate": 1.5146340324197672e-05, "loss": 1.1122, "step": 25102 }, { "epoch": 1.4962450828465848, "grad_norm": 3.007603168487549, "learning_rate": 1.5139560327994656e-05, "loss": 0.9404, "step": 25104 }, { "epoch": 1.4963642865657407, "grad_norm": 3.2590556144714355, "learning_rate": 1.513278157886887e-05, "loss": 1.1006, "step": 25106 }, { "epoch": 1.496483490284897, "grad_norm": 3.3675220012664795, "learning_rate": 1.5126004077062766e-05, "loss": 1.0117, "step": 25108 }, { "epoch": 1.4966026940040529, "grad_norm": 3.24790358543396, "learning_rate": 1.5119227822818804e-05, "loss": 1.1651, "step": 25110 }, { "epoch": 1.496721897723209, "grad_norm": 3.173696994781494, "learning_rate": 1.5112452816379435e-05, "loss": 1.1494, "step": 25112 }, { "epoch": 1.496841101442365, "grad_norm": 3.696402072906494, "learning_rate": 1.5105679057986965e-05, "loss": 1.129, "step": 25114 }, { "epoch": 1.496960305161521, "grad_norm": 3.412491798400879, "learning_rate": 1.5098906547883757e-05, "loss": 1.137, "step": 25116 }, { "epoch": 1.497079508880677, "grad_norm": 3.415573835372925, "learning_rate": 1.5092135286312081e-05, "loss": 1.1942, "step": 25118 }, { "epoch": 1.4971987125998332, "grad_norm": 3.3634238243103027, "learning_rate": 1.5085365273514158e-05, "loss": 1.0574, "step": 25120 }, { "epoch": 1.4973179163189891, "grad_norm": 2.8847594261169434, "learning_rate": 1.5078596509732162e-05, "loss": 1.1876, "step": 25122 }, { "epoch": 1.497437120038145, "grad_norm": 3.262314796447754, "learning_rate": 1.5071828995208282e-05, "loss": 1.0411, "step": 25124 }, { "epoch": 1.4975563237573013, "grad_norm": 3.0017411708831787, "learning_rate": 1.5065062730184571e-05, "loss": 1.1912, "step": 25126 }, { "epoch": 1.4976755274764573, "grad_norm": 3.171936511993408, "learning_rate": 1.5058297714903075e-05, "loss": 1.1266, "step": 25128 }, { "epoch": 1.4977947311956132, "grad_norm": 3.327296733856201, "learning_rate": 1.5051533949605856e-05, "loss": 1.1962, "step": 25130 }, { "epoch": 1.4979139349147692, "grad_norm": 3.3705766201019287, "learning_rate": 1.504477143453481e-05, "loss": 1.1146, "step": 25132 }, { "epoch": 1.4980331386339254, "grad_norm": 2.8188750743865967, "learning_rate": 1.503801016993191e-05, "loss": 1.0918, "step": 25134 }, { "epoch": 1.4981523423530814, "grad_norm": 2.8277227878570557, "learning_rate": 1.5031250156039001e-05, "loss": 1.0838, "step": 25136 }, { "epoch": 1.4982715460722376, "grad_norm": 3.1702094078063965, "learning_rate": 1.5024491393097916e-05, "loss": 1.2934, "step": 25138 }, { "epoch": 1.4983907497913935, "grad_norm": 3.4037437438964844, "learning_rate": 1.5017733881350444e-05, "loss": 1.107, "step": 25140 }, { "epoch": 1.4985099535105495, "grad_norm": 3.5222156047821045, "learning_rate": 1.5010977621038324e-05, "loss": 1.1907, "step": 25142 }, { "epoch": 1.4986291572297055, "grad_norm": 3.176651954650879, "learning_rate": 1.5004222612403247e-05, "loss": 1.1368, "step": 25144 }, { "epoch": 1.4987483609488617, "grad_norm": 3.120147466659546, "learning_rate": 1.4997468855686863e-05, "loss": 1.1321, "step": 25146 }, { "epoch": 1.4988675646680176, "grad_norm": 3.174283504486084, "learning_rate": 1.4990716351130779e-05, "loss": 1.084, "step": 25148 }, { "epoch": 1.4989867683871738, "grad_norm": 3.201765775680542, "learning_rate": 1.4983965098976532e-05, "loss": 1.1895, "step": 25150 }, { "epoch": 1.4991059721063298, "grad_norm": 3.270341634750366, "learning_rate": 1.4977215099465685e-05, "loss": 1.1634, "step": 25152 }, { "epoch": 1.4992251758254858, "grad_norm": 3.3164000511169434, "learning_rate": 1.4970466352839691e-05, "loss": 1.0247, "step": 25154 }, { "epoch": 1.4993443795446417, "grad_norm": 3.339994192123413, "learning_rate": 1.4963718859339943e-05, "loss": 1.2787, "step": 25156 }, { "epoch": 1.4994635832637977, "grad_norm": 3.3217005729675293, "learning_rate": 1.495697261920786e-05, "loss": 1.048, "step": 25158 }, { "epoch": 1.4995827869829539, "grad_norm": 3.0484941005706787, "learning_rate": 1.495022763268476e-05, "loss": 1.1208, "step": 25160 }, { "epoch": 1.4997019907021099, "grad_norm": 3.1474874019622803, "learning_rate": 1.4943483900011945e-05, "loss": 1.1113, "step": 25162 }, { "epoch": 1.499821194421266, "grad_norm": 3.123486042022705, "learning_rate": 1.4936741421430656e-05, "loss": 1.4159, "step": 25164 }, { "epoch": 1.499940398140422, "grad_norm": 3.2838339805603027, "learning_rate": 1.4930000197182087e-05, "loss": 1.1778, "step": 25166 }, { "epoch": 1.500059601859578, "grad_norm": 3.3661842346191406, "learning_rate": 1.492326022750739e-05, "loss": 1.1751, "step": 25168 }, { "epoch": 1.500178805578734, "grad_norm": 3.460517168045044, "learning_rate": 1.4916521512647724e-05, "loss": 1.0646, "step": 25170 }, { "epoch": 1.5002980092978901, "grad_norm": 2.9265737533569336, "learning_rate": 1.49097840528441e-05, "loss": 0.9927, "step": 25172 }, { "epoch": 1.5004172130170461, "grad_norm": 3.1967320442199707, "learning_rate": 1.4903047848337549e-05, "loss": 1.2175, "step": 25174 }, { "epoch": 1.5005364167362023, "grad_norm": 3.309396743774414, "learning_rate": 1.4896312899369086e-05, "loss": 1.0702, "step": 25176 }, { "epoch": 1.5006556204553583, "grad_norm": 3.0040483474731445, "learning_rate": 1.4889579206179583e-05, "loss": 1.072, "step": 25178 }, { "epoch": 1.5007748241745142, "grad_norm": 2.956601142883301, "learning_rate": 1.4882846769009985e-05, "loss": 1.0356, "step": 25180 }, { "epoch": 1.5008940278936702, "grad_norm": 3.264155387878418, "learning_rate": 1.4876115588101104e-05, "loss": 1.1817, "step": 25182 }, { "epoch": 1.5010132316128262, "grad_norm": 3.0232956409454346, "learning_rate": 1.486938566369374e-05, "loss": 1.172, "step": 25184 }, { "epoch": 1.5011324353319824, "grad_norm": 3.5280370712280273, "learning_rate": 1.4862656996028657e-05, "loss": 1.2073, "step": 25186 }, { "epoch": 1.5012516390511383, "grad_norm": 3.3806838989257812, "learning_rate": 1.485592958534655e-05, "loss": 1.1478, "step": 25188 }, { "epoch": 1.5013708427702945, "grad_norm": 3.405799388885498, "learning_rate": 1.4849203431888086e-05, "loss": 1.1549, "step": 25190 }, { "epoch": 1.5014900464894505, "grad_norm": 3.196789503097534, "learning_rate": 1.4842478535893867e-05, "loss": 1.0813, "step": 25192 }, { "epoch": 1.5016092502086065, "grad_norm": 3.3707332611083984, "learning_rate": 1.4835754897604514e-05, "loss": 1.1278, "step": 25194 }, { "epoch": 1.5017284539277624, "grad_norm": 3.1640350818634033, "learning_rate": 1.4829032517260489e-05, "loss": 1.1177, "step": 25196 }, { "epoch": 1.5018476576469186, "grad_norm": 3.6227359771728516, "learning_rate": 1.4822311395102324e-05, "loss": 1.1635, "step": 25198 }, { "epoch": 1.5019668613660746, "grad_norm": 3.102699041366577, "learning_rate": 1.4815591531370454e-05, "loss": 1.089, "step": 25200 }, { "epoch": 1.5020860650852308, "grad_norm": 3.0368268489837646, "learning_rate": 1.4808872926305222e-05, "loss": 1.0973, "step": 25202 }, { "epoch": 1.5022052688043868, "grad_norm": 3.201357364654541, "learning_rate": 1.4802155580147032e-05, "loss": 1.1322, "step": 25204 }, { "epoch": 1.5023244725235427, "grad_norm": 3.1163480281829834, "learning_rate": 1.4795439493136165e-05, "loss": 1.2091, "step": 25206 }, { "epoch": 1.5024436762426987, "grad_norm": 3.301044225692749, "learning_rate": 1.4788724665512877e-05, "loss": 1.1847, "step": 25208 }, { "epoch": 1.5025628799618547, "grad_norm": 3.0227789878845215, "learning_rate": 1.4782011097517379e-05, "loss": 1.1468, "step": 25210 }, { "epoch": 1.5026820836810109, "grad_norm": 3.04967999458313, "learning_rate": 1.4775298789389841e-05, "loss": 0.9418, "step": 25212 }, { "epoch": 1.5028012874001668, "grad_norm": 3.328016996383667, "learning_rate": 1.4768587741370387e-05, "loss": 1.1318, "step": 25214 }, { "epoch": 1.502920491119323, "grad_norm": 2.6959898471832275, "learning_rate": 1.4761877953699094e-05, "loss": 1.0513, "step": 25216 }, { "epoch": 1.503039694838479, "grad_norm": 3.1281163692474365, "learning_rate": 1.4755169426615995e-05, "loss": 1.2048, "step": 25218 }, { "epoch": 1.503158898557635, "grad_norm": 3.7643775939941406, "learning_rate": 1.4748462160361054e-05, "loss": 1.2836, "step": 25220 }, { "epoch": 1.503278102276791, "grad_norm": 3.0870046615600586, "learning_rate": 1.4741756155174252e-05, "loss": 1.0491, "step": 25222 }, { "epoch": 1.5033973059959471, "grad_norm": 3.204425573348999, "learning_rate": 1.4735051411295464e-05, "loss": 1.1487, "step": 25224 }, { "epoch": 1.503516509715103, "grad_norm": 3.3976898193359375, "learning_rate": 1.4728347928964548e-05, "loss": 1.2481, "step": 25226 }, { "epoch": 1.5036357134342593, "grad_norm": 3.3703811168670654, "learning_rate": 1.4721645708421306e-05, "loss": 1.1474, "step": 25228 }, { "epoch": 1.5037549171534152, "grad_norm": 3.414154291152954, "learning_rate": 1.4714944749905496e-05, "loss": 1.2085, "step": 25230 }, { "epoch": 1.5038741208725712, "grad_norm": 3.0020039081573486, "learning_rate": 1.4708245053656844e-05, "loss": 1.1321, "step": 25232 }, { "epoch": 1.5039933245917272, "grad_norm": 2.944896697998047, "learning_rate": 1.4701546619915014e-05, "loss": 1.1706, "step": 25234 }, { "epoch": 1.5041125283108832, "grad_norm": 3.3970086574554443, "learning_rate": 1.4694849448919635e-05, "loss": 1.2908, "step": 25236 }, { "epoch": 1.5042317320300393, "grad_norm": 3.31432843208313, "learning_rate": 1.468815354091027e-05, "loss": 1.124, "step": 25238 }, { "epoch": 1.5043509357491955, "grad_norm": 3.3598833084106445, "learning_rate": 1.4681458896126504e-05, "loss": 1.0653, "step": 25240 }, { "epoch": 1.5044701394683515, "grad_norm": 3.3945467472076416, "learning_rate": 1.4674765514807765e-05, "loss": 1.0667, "step": 25242 }, { "epoch": 1.5045893431875075, "grad_norm": 3.229104518890381, "learning_rate": 1.4668073397193538e-05, "loss": 1.1316, "step": 25244 }, { "epoch": 1.5047085469066634, "grad_norm": 3.1389901638031006, "learning_rate": 1.4661382543523228e-05, "loss": 1.1461, "step": 25246 }, { "epoch": 1.5048277506258194, "grad_norm": 3.765824556350708, "learning_rate": 1.4654692954036148e-05, "loss": 1.3137, "step": 25248 }, { "epoch": 1.5049469543449756, "grad_norm": 2.927008867263794, "learning_rate": 1.4648004628971646e-05, "loss": 1.0182, "step": 25250 }, { "epoch": 1.5050661580641316, "grad_norm": 3.0459558963775635, "learning_rate": 1.4641317568568973e-05, "loss": 1.1683, "step": 25252 }, { "epoch": 1.5051853617832878, "grad_norm": 3.124622106552124, "learning_rate": 1.4634631773067347e-05, "loss": 1.1751, "step": 25254 }, { "epoch": 1.5053045655024437, "grad_norm": 3.2878458499908447, "learning_rate": 1.4627947242705935e-05, "loss": 1.0738, "step": 25256 }, { "epoch": 1.5054237692215997, "grad_norm": 3.1900341510772705, "learning_rate": 1.4621263977723904e-05, "loss": 1.1867, "step": 25258 }, { "epoch": 1.5055429729407557, "grad_norm": 3.016573190689087, "learning_rate": 1.4614581978360287e-05, "loss": 1.2647, "step": 25260 }, { "epoch": 1.5056621766599116, "grad_norm": 3.212268829345703, "learning_rate": 1.4607901244854134e-05, "loss": 1.0876, "step": 25262 }, { "epoch": 1.5057813803790678, "grad_norm": 3.451965093612671, "learning_rate": 1.4601221777444468e-05, "loss": 1.1797, "step": 25264 }, { "epoch": 1.505900584098224, "grad_norm": 3.443263053894043, "learning_rate": 1.459454357637019e-05, "loss": 1.2648, "step": 25266 }, { "epoch": 1.50601978781738, "grad_norm": 3.3624088764190674, "learning_rate": 1.4587866641870241e-05, "loss": 1.1416, "step": 25268 }, { "epoch": 1.506138991536536, "grad_norm": 3.265077590942383, "learning_rate": 1.4581190974183468e-05, "loss": 1.1788, "step": 25270 }, { "epoch": 1.506258195255692, "grad_norm": 3.032994031906128, "learning_rate": 1.457451657354867e-05, "loss": 1.0112, "step": 25272 }, { "epoch": 1.506377398974848, "grad_norm": 3.714684247970581, "learning_rate": 1.4567843440204626e-05, "loss": 1.1595, "step": 25274 }, { "epoch": 1.506496602694004, "grad_norm": 3.0293984413146973, "learning_rate": 1.4561171574390048e-05, "loss": 1.1532, "step": 25276 }, { "epoch": 1.50661580641316, "grad_norm": 3.3989334106445312, "learning_rate": 1.4554500976343622e-05, "loss": 1.1899, "step": 25278 }, { "epoch": 1.5067350101323163, "grad_norm": 3.1584556102752686, "learning_rate": 1.4547831646303967e-05, "loss": 1.1478, "step": 25280 }, { "epoch": 1.5068542138514722, "grad_norm": 2.8887927532196045, "learning_rate": 1.454116358450967e-05, "loss": 1.1043, "step": 25282 }, { "epoch": 1.5069734175706282, "grad_norm": 3.128140687942505, "learning_rate": 1.4534496791199265e-05, "loss": 1.1333, "step": 25284 }, { "epoch": 1.5070926212897842, "grad_norm": 3.2554104328155518, "learning_rate": 1.4527831266611264e-05, "loss": 1.2734, "step": 25286 }, { "epoch": 1.5072118250089401, "grad_norm": 3.5379438400268555, "learning_rate": 1.4521167010984105e-05, "loss": 1.4024, "step": 25288 }, { "epoch": 1.5073310287280963, "grad_norm": 2.770690679550171, "learning_rate": 1.4514504024556192e-05, "loss": 1.1678, "step": 25290 }, { "epoch": 1.5074502324472525, "grad_norm": 3.144634962081909, "learning_rate": 1.4507842307565883e-05, "loss": 1.2131, "step": 25292 }, { "epoch": 1.5075694361664085, "grad_norm": 3.2043497562408447, "learning_rate": 1.450118186025149e-05, "loss": 1.1388, "step": 25294 }, { "epoch": 1.5076886398855645, "grad_norm": 3.546701669692993, "learning_rate": 1.4494522682851276e-05, "loss": 1.1869, "step": 25296 }, { "epoch": 1.5078078436047204, "grad_norm": 3.5219225883483887, "learning_rate": 1.448786477560347e-05, "loss": 1.1596, "step": 25298 }, { "epoch": 1.5079270473238764, "grad_norm": 3.4137790203094482, "learning_rate": 1.4481208138746245e-05, "loss": 1.1083, "step": 25300 }, { "epoch": 1.5080462510430326, "grad_norm": 3.4224777221679688, "learning_rate": 1.4474552772517713e-05, "loss": 1.1638, "step": 25302 }, { "epoch": 1.5081654547621886, "grad_norm": 3.3317646980285645, "learning_rate": 1.4467898677156011e-05, "loss": 1.1197, "step": 25304 }, { "epoch": 1.5082846584813447, "grad_norm": 3.462658405303955, "learning_rate": 1.446124585289913e-05, "loss": 1.3165, "step": 25306 }, { "epoch": 1.5084038622005007, "grad_norm": 3.513110876083374, "learning_rate": 1.4454594299985059e-05, "loss": 1.197, "step": 25308 }, { "epoch": 1.5085230659196567, "grad_norm": 3.5166165828704834, "learning_rate": 1.4447944018651804e-05, "loss": 1.051, "step": 25310 }, { "epoch": 1.5086422696388126, "grad_norm": 3.555187225341797, "learning_rate": 1.4441295009137195e-05, "loss": 1.2044, "step": 25312 }, { "epoch": 1.5087614733579686, "grad_norm": 3.025949239730835, "learning_rate": 1.4434647271679148e-05, "loss": 1.2749, "step": 25314 }, { "epoch": 1.5088806770771248, "grad_norm": 2.8002817630767822, "learning_rate": 1.4428000806515451e-05, "loss": 0.9908, "step": 25316 }, { "epoch": 1.508999880796281, "grad_norm": 3.1262097358703613, "learning_rate": 1.4421355613883875e-05, "loss": 1.084, "step": 25318 }, { "epoch": 1.509119084515437, "grad_norm": 3.65075945854187, "learning_rate": 1.4414711694022137e-05, "loss": 1.2033, "step": 25320 }, { "epoch": 1.509238288234593, "grad_norm": 3.211493492126465, "learning_rate": 1.4408069047167916e-05, "loss": 1.3471, "step": 25322 }, { "epoch": 1.509357491953749, "grad_norm": 3.2813708782196045, "learning_rate": 1.4401427673558837e-05, "loss": 1.3063, "step": 25324 }, { "epoch": 1.5094766956729049, "grad_norm": 3.271544933319092, "learning_rate": 1.4394787573432484e-05, "loss": 1.0161, "step": 25326 }, { "epoch": 1.509595899392061, "grad_norm": 3.1055288314819336, "learning_rate": 1.4388148747026425e-05, "loss": 1.0648, "step": 25328 }, { "epoch": 1.509715103111217, "grad_norm": 3.691068649291992, "learning_rate": 1.4381511194578101e-05, "loss": 1.1521, "step": 25330 }, { "epoch": 1.5098343068303732, "grad_norm": 3.2139806747436523, "learning_rate": 1.4374874916325003e-05, "loss": 1.2206, "step": 25332 }, { "epoch": 1.5099535105495292, "grad_norm": 3.1725757122039795, "learning_rate": 1.436823991250454e-05, "loss": 1.1698, "step": 25334 }, { "epoch": 1.5100727142686852, "grad_norm": 3.389417886734009, "learning_rate": 1.436160618335401e-05, "loss": 1.1419, "step": 25336 }, { "epoch": 1.5101919179878411, "grad_norm": 3.065652847290039, "learning_rate": 1.4354973729110782e-05, "loss": 1.0109, "step": 25338 }, { "epoch": 1.510311121706997, "grad_norm": 3.214400053024292, "learning_rate": 1.43483425500121e-05, "loss": 1.0332, "step": 25340 }, { "epoch": 1.5104303254261533, "grad_norm": 3.5844485759735107, "learning_rate": 1.4341712646295185e-05, "loss": 1.2135, "step": 25342 }, { "epoch": 1.5105495291453095, "grad_norm": 3.578932046890259, "learning_rate": 1.4335084018197214e-05, "loss": 1.1422, "step": 25344 }, { "epoch": 1.5106687328644655, "grad_norm": 3.0013749599456787, "learning_rate": 1.4328456665955314e-05, "loss": 1.1258, "step": 25346 }, { "epoch": 1.5107879365836214, "grad_norm": 2.8924951553344727, "learning_rate": 1.4321830589806556e-05, "loss": 0.9787, "step": 25348 }, { "epoch": 1.5109071403027774, "grad_norm": 3.3493611812591553, "learning_rate": 1.4315205789988013e-05, "loss": 1.0987, "step": 25350 }, { "epoch": 1.5110263440219334, "grad_norm": 3.4502363204956055, "learning_rate": 1.4308582266736641e-05, "loss": 1.121, "step": 25352 }, { "epoch": 1.5111455477410896, "grad_norm": 3.3048133850097656, "learning_rate": 1.4301960020289385e-05, "loss": 1.1665, "step": 25354 }, { "epoch": 1.5112647514602455, "grad_norm": 3.3923556804656982, "learning_rate": 1.4295339050883172e-05, "loss": 1.2131, "step": 25356 }, { "epoch": 1.5113839551794017, "grad_norm": 3.28005313873291, "learning_rate": 1.4288719358754843e-05, "loss": 1.0784, "step": 25358 }, { "epoch": 1.5115031588985577, "grad_norm": 3.365339517593384, "learning_rate": 1.4282100944141207e-05, "loss": 1.1901, "step": 25360 }, { "epoch": 1.5116223626177137, "grad_norm": 3.079671621322632, "learning_rate": 1.4275483807279027e-05, "loss": 0.9721, "step": 25362 }, { "epoch": 1.5117415663368696, "grad_norm": 2.8761215209960938, "learning_rate": 1.4268867948405023e-05, "loss": 1.1495, "step": 25364 }, { "epoch": 1.5118607700560256, "grad_norm": 3.3686208724975586, "learning_rate": 1.4262253367755868e-05, "loss": 1.1726, "step": 25366 }, { "epoch": 1.5119799737751818, "grad_norm": 3.1813011169433594, "learning_rate": 1.4255640065568182e-05, "loss": 1.2245, "step": 25368 }, { "epoch": 1.512099177494338, "grad_norm": 3.2142741680145264, "learning_rate": 1.4249028042078549e-05, "loss": 1.2852, "step": 25370 }, { "epoch": 1.512218381213494, "grad_norm": 3.135995626449585, "learning_rate": 1.4242417297523486e-05, "loss": 1.1938, "step": 25372 }, { "epoch": 1.51233758493265, "grad_norm": 3.447234869003296, "learning_rate": 1.4235807832139536e-05, "loss": 1.0414, "step": 25374 }, { "epoch": 1.5124567886518059, "grad_norm": 3.02333927154541, "learning_rate": 1.4229199646163071e-05, "loss": 1.1416, "step": 25376 }, { "epoch": 1.5125759923709619, "grad_norm": 3.551863431930542, "learning_rate": 1.4222592739830536e-05, "loss": 1.0497, "step": 25378 }, { "epoch": 1.512695196090118, "grad_norm": 3.396012306213379, "learning_rate": 1.4215987113378288e-05, "loss": 1.0941, "step": 25380 }, { "epoch": 1.512814399809274, "grad_norm": 3.48361873626709, "learning_rate": 1.4209382767042583e-05, "loss": 1.1047, "step": 25382 }, { "epoch": 1.5129336035284302, "grad_norm": 3.1761837005615234, "learning_rate": 1.4202779701059732e-05, "loss": 1.065, "step": 25384 }, { "epoch": 1.5130528072475862, "grad_norm": 3.0900399684906006, "learning_rate": 1.4196177915665926e-05, "loss": 1.0882, "step": 25386 }, { "epoch": 1.5131720109667421, "grad_norm": 3.1601407527923584, "learning_rate": 1.4189577411097338e-05, "loss": 1.0779, "step": 25388 }, { "epoch": 1.5132912146858981, "grad_norm": 3.508859157562256, "learning_rate": 1.4182978187590079e-05, "loss": 1.271, "step": 25390 }, { "epoch": 1.513410418405054, "grad_norm": 3.489370822906494, "learning_rate": 1.4176380245380262e-05, "loss": 1.1684, "step": 25392 }, { "epoch": 1.5135296221242103, "grad_norm": 3.1228291988372803, "learning_rate": 1.4169783584703867e-05, "loss": 1.1052, "step": 25394 }, { "epoch": 1.5136488258433665, "grad_norm": 3.253944158554077, "learning_rate": 1.4163188205796913e-05, "loss": 1.1046, "step": 25396 }, { "epoch": 1.5137680295625224, "grad_norm": 3.0706021785736084, "learning_rate": 1.4156594108895355e-05, "loss": 1.2119, "step": 25398 }, { "epoch": 1.5138872332816784, "grad_norm": 3.2432286739349365, "learning_rate": 1.4150001294235027e-05, "loss": 1.1125, "step": 25400 }, { "epoch": 1.5140064370008344, "grad_norm": 3.487389326095581, "learning_rate": 1.414340976205183e-05, "loss": 1.1719, "step": 25402 }, { "epoch": 1.5141256407199903, "grad_norm": 3.1745383739471436, "learning_rate": 1.4136819512581544e-05, "loss": 1.0259, "step": 25404 }, { "epoch": 1.5142448444391465, "grad_norm": 2.9485199451446533, "learning_rate": 1.4130230546059931e-05, "loss": 1.125, "step": 25406 }, { "epoch": 1.5143640481583025, "grad_norm": 3.43293833732605, "learning_rate": 1.4123642862722697e-05, "loss": 1.0958, "step": 25408 }, { "epoch": 1.5144832518774587, "grad_norm": 3.239436626434326, "learning_rate": 1.4117056462805506e-05, "loss": 1.0207, "step": 25410 }, { "epoch": 1.5146024555966147, "grad_norm": 3.031627893447876, "learning_rate": 1.411047134654398e-05, "loss": 1.1195, "step": 25412 }, { "epoch": 1.5147216593157706, "grad_norm": 3.351339101791382, "learning_rate": 1.4103887514173692e-05, "loss": 1.0921, "step": 25414 }, { "epoch": 1.5148408630349266, "grad_norm": 2.8338229656219482, "learning_rate": 1.4097304965930158e-05, "loss": 0.9821, "step": 25416 }, { "epoch": 1.5149600667540826, "grad_norm": 3.3243494033813477, "learning_rate": 1.4090723702048859e-05, "loss": 1.0606, "step": 25418 }, { "epoch": 1.5150792704732388, "grad_norm": 3.250852108001709, "learning_rate": 1.4084143722765242e-05, "loss": 1.1551, "step": 25420 }, { "epoch": 1.515198474192395, "grad_norm": 3.0178048610687256, "learning_rate": 1.4077565028314693e-05, "loss": 1.1464, "step": 25422 }, { "epoch": 1.515317677911551, "grad_norm": 3.489466667175293, "learning_rate": 1.4070987618932547e-05, "loss": 1.1838, "step": 25424 }, { "epoch": 1.515436881630707, "grad_norm": 3.137584924697876, "learning_rate": 1.4064411494854107e-05, "loss": 1.1482, "step": 25426 }, { "epoch": 1.5155560853498629, "grad_norm": 3.20530366897583, "learning_rate": 1.405783665631462e-05, "loss": 1.1236, "step": 25428 }, { "epoch": 1.5156752890690188, "grad_norm": 3.4886505603790283, "learning_rate": 1.4051263103549289e-05, "loss": 1.1651, "step": 25430 }, { "epoch": 1.515794492788175, "grad_norm": 2.9352190494537354, "learning_rate": 1.4044690836793273e-05, "loss": 1.0333, "step": 25432 }, { "epoch": 1.515913696507331, "grad_norm": 3.657196521759033, "learning_rate": 1.4038119856281689e-05, "loss": 1.2388, "step": 25434 }, { "epoch": 1.5160329002264872, "grad_norm": 3.186255931854248, "learning_rate": 1.4031550162249584e-05, "loss": 1.1147, "step": 25436 }, { "epoch": 1.5161521039456431, "grad_norm": 3.2196969985961914, "learning_rate": 1.4024981754932021e-05, "loss": 0.9694, "step": 25438 }, { "epoch": 1.5162713076647991, "grad_norm": 3.129378080368042, "learning_rate": 1.4018414634563937e-05, "loss": 1.0016, "step": 25440 }, { "epoch": 1.516390511383955, "grad_norm": 3.189028739929199, "learning_rate": 1.4011848801380257e-05, "loss": 1.1005, "step": 25442 }, { "epoch": 1.516509715103111, "grad_norm": 2.9697484970092773, "learning_rate": 1.4005284255615903e-05, "loss": 1.0532, "step": 25444 }, { "epoch": 1.5166289188222672, "grad_norm": 3.460522413253784, "learning_rate": 1.3998720997505655e-05, "loss": 1.1479, "step": 25446 }, { "epoch": 1.5167481225414234, "grad_norm": 3.516846179962158, "learning_rate": 1.3992159027284352e-05, "loss": 1.0766, "step": 25448 }, { "epoch": 1.5168673262605794, "grad_norm": 3.0585873126983643, "learning_rate": 1.3985598345186724e-05, "loss": 1.215, "step": 25450 }, { "epoch": 1.5169865299797354, "grad_norm": 3.0615344047546387, "learning_rate": 1.397903895144746e-05, "loss": 1.0732, "step": 25452 }, { "epoch": 1.5171057336988913, "grad_norm": 2.853498935699463, "learning_rate": 1.39724808463012e-05, "loss": 1.1934, "step": 25454 }, { "epoch": 1.5172249374180473, "grad_norm": 3.0539121627807617, "learning_rate": 1.3965924029982602e-05, "loss": 1.3071, "step": 25456 }, { "epoch": 1.5173441411372035, "grad_norm": 3.229527711868286, "learning_rate": 1.3959368502726172e-05, "loss": 1.2671, "step": 25458 }, { "epoch": 1.5174633448563595, "grad_norm": 2.931103229522705, "learning_rate": 1.3952814264766429e-05, "loss": 1.0237, "step": 25460 }, { "epoch": 1.5175825485755157, "grad_norm": 3.0202198028564453, "learning_rate": 1.3946261316337889e-05, "loss": 1.2094, "step": 25462 }, { "epoch": 1.5177017522946716, "grad_norm": 3.1289803981781006, "learning_rate": 1.39397096576749e-05, "loss": 1.112, "step": 25464 }, { "epoch": 1.5178209560138276, "grad_norm": 3.3839218616485596, "learning_rate": 1.3933159289011894e-05, "loss": 1.3674, "step": 25466 }, { "epoch": 1.5179401597329836, "grad_norm": 3.184107542037964, "learning_rate": 1.3926610210583179e-05, "loss": 1.134, "step": 25468 }, { "epoch": 1.5180593634521395, "grad_norm": 3.327824592590332, "learning_rate": 1.3920062422623049e-05, "loss": 1.1436, "step": 25470 }, { "epoch": 1.5181785671712957, "grad_norm": 3.1105988025665283, "learning_rate": 1.3913515925365722e-05, "loss": 1.2565, "step": 25472 }, { "epoch": 1.518297770890452, "grad_norm": 3.2804927825927734, "learning_rate": 1.3906970719045404e-05, "loss": 1.0885, "step": 25474 }, { "epoch": 1.518416974609608, "grad_norm": 3.5105648040771484, "learning_rate": 1.3900426803896232e-05, "loss": 1.1986, "step": 25476 }, { "epoch": 1.5185361783287639, "grad_norm": 3.1935267448425293, "learning_rate": 1.389388418015231e-05, "loss": 1.1862, "step": 25478 }, { "epoch": 1.5186553820479198, "grad_norm": 2.9596149921417236, "learning_rate": 1.3887342848047686e-05, "loss": 1.0354, "step": 25480 }, { "epoch": 1.5187745857670758, "grad_norm": 3.262111186981201, "learning_rate": 1.3880802807816345e-05, "loss": 1.0991, "step": 25482 }, { "epoch": 1.518893789486232, "grad_norm": 3.316340208053589, "learning_rate": 1.3874264059692294e-05, "loss": 1.0706, "step": 25484 }, { "epoch": 1.519012993205388, "grad_norm": 3.480957508087158, "learning_rate": 1.3867726603909425e-05, "loss": 1.196, "step": 25486 }, { "epoch": 1.5191321969245442, "grad_norm": 3.246227979660034, "learning_rate": 1.3861190440701577e-05, "loss": 1.1322, "step": 25488 }, { "epoch": 1.5192514006437001, "grad_norm": 2.9560961723327637, "learning_rate": 1.3854655570302611e-05, "loss": 1.3589, "step": 25490 }, { "epoch": 1.519370604362856, "grad_norm": 3.112598419189453, "learning_rate": 1.3848121992946278e-05, "loss": 1.3097, "step": 25492 }, { "epoch": 1.519489808082012, "grad_norm": 3.400883674621582, "learning_rate": 1.384158970886632e-05, "loss": 1.114, "step": 25494 }, { "epoch": 1.519609011801168, "grad_norm": 3.323751449584961, "learning_rate": 1.3835058718296412e-05, "loss": 1.177, "step": 25496 }, { "epoch": 1.5197282155203242, "grad_norm": 3.1213057041168213, "learning_rate": 1.3828529021470187e-05, "loss": 1.2306, "step": 25498 }, { "epoch": 1.5198474192394804, "grad_norm": 3.2889161109924316, "learning_rate": 1.3822000618621239e-05, "loss": 1.1018, "step": 25500 }, { "epoch": 1.5199666229586364, "grad_norm": 3.15734601020813, "learning_rate": 1.3815473509983113e-05, "loss": 1.1499, "step": 25502 }, { "epoch": 1.5200858266777924, "grad_norm": 3.299348831176758, "learning_rate": 1.3808947695789303e-05, "loss": 1.1816, "step": 25504 }, { "epoch": 1.5202050303969483, "grad_norm": 3.267521381378174, "learning_rate": 1.3802423176273243e-05, "loss": 1.0775, "step": 25506 }, { "epoch": 1.5203242341161043, "grad_norm": 3.481607675552368, "learning_rate": 1.379589995166839e-05, "loss": 1.1146, "step": 25508 }, { "epoch": 1.5204434378352605, "grad_norm": 3.2418038845062256, "learning_rate": 1.3789378022208032e-05, "loss": 1.1506, "step": 25510 }, { "epoch": 1.5205626415544164, "grad_norm": 3.361429452896118, "learning_rate": 1.378285738812553e-05, "loss": 1.3439, "step": 25512 }, { "epoch": 1.5206818452735726, "grad_norm": 3.183256149291992, "learning_rate": 1.3776338049654136e-05, "loss": 1.1249, "step": 25514 }, { "epoch": 1.5208010489927286, "grad_norm": 3.4562435150146484, "learning_rate": 1.3769820007027073e-05, "loss": 1.1442, "step": 25516 }, { "epoch": 1.5209202527118846, "grad_norm": 3.335770845413208, "learning_rate": 1.37633032604775e-05, "loss": 1.0445, "step": 25518 }, { "epoch": 1.5210394564310405, "grad_norm": 3.3431663513183594, "learning_rate": 1.3756787810238559e-05, "loss": 1.0429, "step": 25520 }, { "epoch": 1.5211586601501967, "grad_norm": 3.178598403930664, "learning_rate": 1.3750273656543317e-05, "loss": 1.177, "step": 25522 }, { "epoch": 1.5212778638693527, "grad_norm": 3.0659801959991455, "learning_rate": 1.3743760799624799e-05, "loss": 1.1455, "step": 25524 }, { "epoch": 1.521397067588509, "grad_norm": 3.129863977432251, "learning_rate": 1.3737249239716043e-05, "loss": 1.1253, "step": 25526 }, { "epoch": 1.5215162713076649, "grad_norm": 2.9020235538482666, "learning_rate": 1.3730738977049917e-05, "loss": 1.2432, "step": 25528 }, { "epoch": 1.5216354750268208, "grad_norm": 3.251079797744751, "learning_rate": 1.3724230011859368e-05, "loss": 1.1956, "step": 25530 }, { "epoch": 1.5217546787459768, "grad_norm": 2.8179121017456055, "learning_rate": 1.3717722344377248e-05, "loss": 1.0839, "step": 25532 }, { "epoch": 1.5218738824651328, "grad_norm": 3.19901704788208, "learning_rate": 1.37112159748363e-05, "loss": 1.0876, "step": 25534 }, { "epoch": 1.521993086184289, "grad_norm": 3.027822494506836, "learning_rate": 1.3704710903469336e-05, "loss": 1.089, "step": 25536 }, { "epoch": 1.522112289903445, "grad_norm": 3.2466721534729004, "learning_rate": 1.369820713050905e-05, "loss": 1.0293, "step": 25538 }, { "epoch": 1.5222314936226011, "grad_norm": 3.1334261894226074, "learning_rate": 1.3691704656188092e-05, "loss": 1.1732, "step": 25540 }, { "epoch": 1.522350697341757, "grad_norm": 3.42010498046875, "learning_rate": 1.368520348073909e-05, "loss": 1.083, "step": 25542 }, { "epoch": 1.522469901060913, "grad_norm": 2.9495294094085693, "learning_rate": 1.367870360439461e-05, "loss": 0.9419, "step": 25544 }, { "epoch": 1.522589104780069, "grad_norm": 3.3933908939361572, "learning_rate": 1.3672205027387175e-05, "loss": 1.0948, "step": 25546 }, { "epoch": 1.5227083084992252, "grad_norm": 3.16050124168396, "learning_rate": 1.3665707749949257e-05, "loss": 1.047, "step": 25548 }, { "epoch": 1.5228275122183812, "grad_norm": 3.0791845321655273, "learning_rate": 1.365921177231329e-05, "loss": 1.209, "step": 25550 }, { "epoch": 1.5229467159375374, "grad_norm": 3.6809303760528564, "learning_rate": 1.3652717094711648e-05, "loss": 1.2164, "step": 25552 }, { "epoch": 1.5230659196566934, "grad_norm": 3.1720128059387207, "learning_rate": 1.3646223717376683e-05, "loss": 1.2337, "step": 25554 }, { "epoch": 1.5231851233758493, "grad_norm": 3.0005335807800293, "learning_rate": 1.3639731640540681e-05, "loss": 0.9907, "step": 25556 }, { "epoch": 1.5233043270950053, "grad_norm": 3.1285157203674316, "learning_rate": 1.3633240864435886e-05, "loss": 1.0804, "step": 25558 }, { "epoch": 1.5234235308141613, "grad_norm": 2.5383691787719727, "learning_rate": 1.362675138929449e-05, "loss": 1.0051, "step": 25560 }, { "epoch": 1.5235427345333175, "grad_norm": 3.5028016567230225, "learning_rate": 1.362026321534865e-05, "loss": 1.1406, "step": 25562 }, { "epoch": 1.5236619382524734, "grad_norm": 3.516467809677124, "learning_rate": 1.3613776342830464e-05, "loss": 1.2104, "step": 25564 }, { "epoch": 1.5237811419716296, "grad_norm": 3.2833800315856934, "learning_rate": 1.3607290771971993e-05, "loss": 1.1839, "step": 25566 }, { "epoch": 1.5239003456907856, "grad_norm": 3.164324998855591, "learning_rate": 1.3600806503005247e-05, "loss": 1.1505, "step": 25568 }, { "epoch": 1.5240195494099416, "grad_norm": 2.9687726497650146, "learning_rate": 1.3594323536162173e-05, "loss": 1.1323, "step": 25570 }, { "epoch": 1.5241387531290975, "grad_norm": 3.646017551422119, "learning_rate": 1.3587841871674744e-05, "loss": 1.0516, "step": 25572 }, { "epoch": 1.5242579568482537, "grad_norm": 3.1320910453796387, "learning_rate": 1.358136150977476e-05, "loss": 1.0888, "step": 25574 }, { "epoch": 1.5243771605674097, "grad_norm": 4.082498073577881, "learning_rate": 1.3574882450694093e-05, "loss": 1.1894, "step": 25576 }, { "epoch": 1.5244963642865659, "grad_norm": 3.308401107788086, "learning_rate": 1.3568404694664527e-05, "loss": 1.039, "step": 25578 }, { "epoch": 1.5246155680057218, "grad_norm": 3.2533414363861084, "learning_rate": 1.3561928241917737e-05, "loss": 1.1119, "step": 25580 }, { "epoch": 1.5247347717248778, "grad_norm": 3.3166446685791016, "learning_rate": 1.3555453092685466e-05, "loss": 1.1393, "step": 25582 }, { "epoch": 1.5248539754440338, "grad_norm": 3.259465217590332, "learning_rate": 1.3548979247199333e-05, "loss": 1.271, "step": 25584 }, { "epoch": 1.5249731791631898, "grad_norm": 3.160430908203125, "learning_rate": 1.3542506705690927e-05, "loss": 1.1188, "step": 25586 }, { "epoch": 1.525092382882346, "grad_norm": 3.0704922676086426, "learning_rate": 1.3536035468391777e-05, "loss": 1.0308, "step": 25588 }, { "epoch": 1.525211586601502, "grad_norm": 3.1535065174102783, "learning_rate": 1.3529565535533428e-05, "loss": 1.081, "step": 25590 }, { "epoch": 1.525330790320658, "grad_norm": 3.5887610912323, "learning_rate": 1.3523096907347294e-05, "loss": 1.0686, "step": 25592 }, { "epoch": 1.525449994039814, "grad_norm": 3.11689829826355, "learning_rate": 1.3516629584064767e-05, "loss": 1.2601, "step": 25594 }, { "epoch": 1.52556919775897, "grad_norm": 3.5098979473114014, "learning_rate": 1.3510163565917256e-05, "loss": 1.2094, "step": 25596 }, { "epoch": 1.525688401478126, "grad_norm": 2.8814239501953125, "learning_rate": 1.350369885313601e-05, "loss": 1.0917, "step": 25598 }, { "epoch": 1.5258076051972822, "grad_norm": 3.131742000579834, "learning_rate": 1.349723544595235e-05, "loss": 1.0978, "step": 25600 }, { "epoch": 1.5259268089164382, "grad_norm": 3.104501724243164, "learning_rate": 1.3490773344597463e-05, "loss": 1.1125, "step": 25602 }, { "epoch": 1.5260460126355944, "grad_norm": 3.1553122997283936, "learning_rate": 1.3484312549302535e-05, "loss": 1.0094, "step": 25604 }, { "epoch": 1.5261652163547503, "grad_norm": 3.0438239574432373, "learning_rate": 1.3477853060298678e-05, "loss": 1.0192, "step": 25606 }, { "epoch": 1.5262844200739063, "grad_norm": 3.4986155033111572, "learning_rate": 1.3471394877816984e-05, "loss": 1.298, "step": 25608 }, { "epoch": 1.5264036237930623, "grad_norm": 3.1423308849334717, "learning_rate": 1.3464938002088468e-05, "loss": 1.0541, "step": 25610 }, { "epoch": 1.5265228275122182, "grad_norm": 3.4397897720336914, "learning_rate": 1.3458482433344127e-05, "loss": 1.0926, "step": 25612 }, { "epoch": 1.5266420312313744, "grad_norm": 3.4846575260162354, "learning_rate": 1.3452028171814895e-05, "loss": 1.1654, "step": 25614 }, { "epoch": 1.5267612349505306, "grad_norm": 3.319758892059326, "learning_rate": 1.3445575217731649e-05, "loss": 1.1179, "step": 25616 }, { "epoch": 1.5268804386696866, "grad_norm": 3.270108699798584, "learning_rate": 1.3439123571325262e-05, "loss": 1.0874, "step": 25618 }, { "epoch": 1.5269996423888426, "grad_norm": 2.9887092113494873, "learning_rate": 1.3432673232826532e-05, "loss": 1.1726, "step": 25620 }, { "epoch": 1.5271188461079985, "grad_norm": 3.0702197551727295, "learning_rate": 1.342622420246616e-05, "loss": 1.0421, "step": 25622 }, { "epoch": 1.5272380498271545, "grad_norm": 3.2832391262054443, "learning_rate": 1.34197764804749e-05, "loss": 1.0661, "step": 25624 }, { "epoch": 1.5273572535463107, "grad_norm": 3.0453929901123047, "learning_rate": 1.3413330067083396e-05, "loss": 1.3432, "step": 25626 }, { "epoch": 1.5274764572654667, "grad_norm": 3.2830021381378174, "learning_rate": 1.3406884962522254e-05, "loss": 1.1407, "step": 25628 }, { "epoch": 1.5275956609846228, "grad_norm": 3.1386799812316895, "learning_rate": 1.3400441167022038e-05, "loss": 1.1764, "step": 25630 }, { "epoch": 1.5277148647037788, "grad_norm": 3.212188482284546, "learning_rate": 1.3393998680813269e-05, "loss": 1.1768, "step": 25632 }, { "epoch": 1.5278340684229348, "grad_norm": 3.230572462081909, "learning_rate": 1.3387557504126396e-05, "loss": 1.208, "step": 25634 }, { "epoch": 1.5279532721420908, "grad_norm": 3.0736892223358154, "learning_rate": 1.3381117637191886e-05, "loss": 0.9947, "step": 25636 }, { "epoch": 1.5280724758612467, "grad_norm": 2.8319358825683594, "learning_rate": 1.3374679080240077e-05, "loss": 1.2538, "step": 25638 }, { "epoch": 1.528191679580403, "grad_norm": 3.4056665897369385, "learning_rate": 1.3368241833501293e-05, "loss": 1.0611, "step": 25640 }, { "epoch": 1.528310883299559, "grad_norm": 3.5199337005615234, "learning_rate": 1.3361805897205865e-05, "loss": 1.1597, "step": 25642 }, { "epoch": 1.528430087018715, "grad_norm": 3.248063325881958, "learning_rate": 1.3355371271583967e-05, "loss": 1.3412, "step": 25644 }, { "epoch": 1.528549290737871, "grad_norm": 3.2834489345550537, "learning_rate": 1.334893795686583e-05, "loss": 1.1707, "step": 25646 }, { "epoch": 1.528668494457027, "grad_norm": 3.6165876388549805, "learning_rate": 1.334250595328158e-05, "loss": 1.0927, "step": 25648 }, { "epoch": 1.528787698176183, "grad_norm": 3.143845319747925, "learning_rate": 1.333607526106132e-05, "loss": 1.0786, "step": 25650 }, { "epoch": 1.5289069018953392, "grad_norm": 3.3505187034606934, "learning_rate": 1.3329645880435088e-05, "loss": 1.0347, "step": 25652 }, { "epoch": 1.5290261056144951, "grad_norm": 2.864284038543701, "learning_rate": 1.3323217811632887e-05, "loss": 1.0592, "step": 25654 }, { "epoch": 1.5291453093336513, "grad_norm": 2.949936628341675, "learning_rate": 1.3316791054884675e-05, "loss": 1.0942, "step": 25656 }, { "epoch": 1.5292645130528073, "grad_norm": 3.3965842723846436, "learning_rate": 1.3310365610420344e-05, "loss": 0.9484, "step": 25658 }, { "epoch": 1.5293837167719633, "grad_norm": 2.9733479022979736, "learning_rate": 1.33039414784698e-05, "loss": 1.1747, "step": 25660 }, { "epoch": 1.5295029204911192, "grad_norm": 3.248598337173462, "learning_rate": 1.3297518659262792e-05, "loss": 1.2518, "step": 25662 }, { "epoch": 1.5296221242102752, "grad_norm": 3.1736159324645996, "learning_rate": 1.329109715302913e-05, "loss": 1.0863, "step": 25664 }, { "epoch": 1.5297413279294314, "grad_norm": 3.6117727756500244, "learning_rate": 1.3284676959998538e-05, "loss": 1.086, "step": 25666 }, { "epoch": 1.5298605316485876, "grad_norm": 3.0538346767425537, "learning_rate": 1.3278258080400635e-05, "loss": 1.2021, "step": 25668 }, { "epoch": 1.5299797353677436, "grad_norm": 3.181997537612915, "learning_rate": 1.3271840514465107e-05, "loss": 1.1132, "step": 25670 }, { "epoch": 1.5300989390868995, "grad_norm": 3.1040871143341064, "learning_rate": 1.3265424262421499e-05, "loss": 1.1091, "step": 25672 }, { "epoch": 1.5302181428060555, "grad_norm": 3.1932597160339355, "learning_rate": 1.3259009324499361e-05, "loss": 1.1238, "step": 25674 }, { "epoch": 1.5303373465252115, "grad_norm": 3.4893856048583984, "learning_rate": 1.325259570092816e-05, "loss": 1.0415, "step": 25676 }, { "epoch": 1.5304565502443677, "grad_norm": 2.7572944164276123, "learning_rate": 1.3246183391937344e-05, "loss": 1.1451, "step": 25678 }, { "epoch": 1.5305757539635236, "grad_norm": 3.4004712104797363, "learning_rate": 1.3239772397756284e-05, "loss": 1.232, "step": 25680 }, { "epoch": 1.5306949576826798, "grad_norm": 3.6752147674560547, "learning_rate": 1.323336271861435e-05, "loss": 1.1232, "step": 25682 }, { "epoch": 1.5308141614018358, "grad_norm": 3.2920351028442383, "learning_rate": 1.322695435474085e-05, "loss": 1.0693, "step": 25684 }, { "epoch": 1.5309333651209918, "grad_norm": 3.1850669384002686, "learning_rate": 1.3220547306364978e-05, "loss": 1.053, "step": 25686 }, { "epoch": 1.5310525688401477, "grad_norm": 3.1278083324432373, "learning_rate": 1.3214141573715978e-05, "loss": 1.1412, "step": 25688 }, { "epoch": 1.5311717725593037, "grad_norm": 3.643775701522827, "learning_rate": 1.3207737157022998e-05, "loss": 1.1771, "step": 25690 }, { "epoch": 1.53129097627846, "grad_norm": 3.246790647506714, "learning_rate": 1.3201334056515146e-05, "loss": 1.0906, "step": 25692 }, { "epoch": 1.531410179997616, "grad_norm": 3.498493194580078, "learning_rate": 1.3194932272421478e-05, "loss": 1.0292, "step": 25694 }, { "epoch": 1.531529383716772, "grad_norm": 3.1344122886657715, "learning_rate": 1.3188531804971011e-05, "loss": 1.0384, "step": 25696 }, { "epoch": 1.531648587435928, "grad_norm": 3.3323991298675537, "learning_rate": 1.318213265439271e-05, "loss": 1.0704, "step": 25698 }, { "epoch": 1.531767791155084, "grad_norm": 2.9951233863830566, "learning_rate": 1.3175734820915497e-05, "loss": 0.9399, "step": 25700 }, { "epoch": 1.53188699487424, "grad_norm": 3.148374319076538, "learning_rate": 1.3169338304768236e-05, "loss": 1.1267, "step": 25702 }, { "epoch": 1.5320061985933962, "grad_norm": 3.3086133003234863, "learning_rate": 1.3162943106179749e-05, "loss": 1.1035, "step": 25704 }, { "epoch": 1.5321254023125521, "grad_norm": 3.1735098361968994, "learning_rate": 1.3156549225378856e-05, "loss": 1.0126, "step": 25706 }, { "epoch": 1.5322446060317083, "grad_norm": 3.058103561401367, "learning_rate": 1.3150156662594221e-05, "loss": 1.0699, "step": 25708 }, { "epoch": 1.5323638097508643, "grad_norm": 3.0388448238372803, "learning_rate": 1.3143765418054577e-05, "loss": 1.0173, "step": 25710 }, { "epoch": 1.5324830134700202, "grad_norm": 3.054434061050415, "learning_rate": 1.3137375491988563e-05, "loss": 1.1504, "step": 25712 }, { "epoch": 1.5326022171891762, "grad_norm": 3.0651631355285645, "learning_rate": 1.3130986884624724e-05, "loss": 1.0012, "step": 25714 }, { "epoch": 1.5327214209083322, "grad_norm": 3.28074312210083, "learning_rate": 1.312459959619164e-05, "loss": 1.0474, "step": 25716 }, { "epoch": 1.5328406246274884, "grad_norm": 2.874155282974243, "learning_rate": 1.3118213626917797e-05, "loss": 1.1113, "step": 25718 }, { "epoch": 1.5329598283466446, "grad_norm": 3.0086560249328613, "learning_rate": 1.3111828977031648e-05, "loss": 1.1012, "step": 25720 }, { "epoch": 1.5330790320658005, "grad_norm": 3.054685354232788, "learning_rate": 1.3105445646761566e-05, "loss": 1.1894, "step": 25722 }, { "epoch": 1.5331982357849565, "grad_norm": 3.452120780944824, "learning_rate": 1.3099063636335962e-05, "loss": 1.0686, "step": 25724 }, { "epoch": 1.5333174395041125, "grad_norm": 3.2322471141815186, "learning_rate": 1.3092682945983092e-05, "loss": 1.1481, "step": 25726 }, { "epoch": 1.5334366432232684, "grad_norm": 3.227653741836548, "learning_rate": 1.3086303575931208e-05, "loss": 1.1812, "step": 25728 }, { "epoch": 1.5335558469424246, "grad_norm": 3.3772261142730713, "learning_rate": 1.3079925526408581e-05, "loss": 1.2712, "step": 25730 }, { "epoch": 1.5336750506615806, "grad_norm": 3.2881834506988525, "learning_rate": 1.3073548797643309e-05, "loss": 1.3075, "step": 25732 }, { "epoch": 1.5337942543807368, "grad_norm": 3.336751937866211, "learning_rate": 1.3067173389863552e-05, "loss": 1.1091, "step": 25734 }, { "epoch": 1.5339134580998928, "grad_norm": 3.4574790000915527, "learning_rate": 1.3060799303297366e-05, "loss": 1.1432, "step": 25736 }, { "epoch": 1.5340326618190487, "grad_norm": 3.561055898666382, "learning_rate": 1.3054426538172775e-05, "loss": 1.2169, "step": 25738 }, { "epoch": 1.5341518655382047, "grad_norm": 3.3962595462799072, "learning_rate": 1.3048055094717753e-05, "loss": 1.2737, "step": 25740 }, { "epoch": 1.5342710692573607, "grad_norm": 3.3404335975646973, "learning_rate": 1.3041684973160235e-05, "loss": 1.2241, "step": 25742 }, { "epoch": 1.5343902729765169, "grad_norm": 3.0337212085723877, "learning_rate": 1.303531617372809e-05, "loss": 1.1341, "step": 25744 }, { "epoch": 1.534509476695673, "grad_norm": 3.069124221801758, "learning_rate": 1.3028948696649162e-05, "loss": 1.1318, "step": 25746 }, { "epoch": 1.534628680414829, "grad_norm": 3.4388084411621094, "learning_rate": 1.3022582542151241e-05, "loss": 1.1145, "step": 25748 }, { "epoch": 1.534747884133985, "grad_norm": 3.455077886581421, "learning_rate": 1.3016217710462036e-05, "loss": 1.2537, "step": 25750 }, { "epoch": 1.534867087853141, "grad_norm": 2.909182071685791, "learning_rate": 1.3009854201809279e-05, "loss": 1.0574, "step": 25752 }, { "epoch": 1.534986291572297, "grad_norm": 3.4241902828216553, "learning_rate": 1.30034920164206e-05, "loss": 1.163, "step": 25754 }, { "epoch": 1.5351054952914531, "grad_norm": 3.051250457763672, "learning_rate": 1.299713115452359e-05, "loss": 1.0411, "step": 25756 }, { "epoch": 1.535224699010609, "grad_norm": 3.4805750846862793, "learning_rate": 1.2990771616345804e-05, "loss": 1.1894, "step": 25758 }, { "epoch": 1.5353439027297653, "grad_norm": 3.450615882873535, "learning_rate": 1.2984413402114747e-05, "loss": 1.1532, "step": 25760 }, { "epoch": 1.5354631064489213, "grad_norm": 4.033255100250244, "learning_rate": 1.2978056512057863e-05, "loss": 1.1902, "step": 25762 }, { "epoch": 1.5355823101680772, "grad_norm": 3.116804361343384, "learning_rate": 1.2971700946402572e-05, "loss": 1.1103, "step": 25764 }, { "epoch": 1.5357015138872332, "grad_norm": 2.884265661239624, "learning_rate": 1.296534670537623e-05, "loss": 0.9638, "step": 25766 }, { "epoch": 1.5358207176063892, "grad_norm": 2.91743540763855, "learning_rate": 1.2958993789206132e-05, "loss": 1.0078, "step": 25768 }, { "epoch": 1.5359399213255454, "grad_norm": 3.252131700515747, "learning_rate": 1.2952642198119585e-05, "loss": 1.1084, "step": 25770 }, { "epoch": 1.5360591250447015, "grad_norm": 3.333890914916992, "learning_rate": 1.2946291932343773e-05, "loss": 1.1103, "step": 25772 }, { "epoch": 1.5361783287638575, "grad_norm": 3.1145219802856445, "learning_rate": 1.2939942992105864e-05, "loss": 1.1177, "step": 25774 }, { "epoch": 1.5362975324830135, "grad_norm": 3.5702974796295166, "learning_rate": 1.2933595377633023e-05, "loss": 1.2458, "step": 25776 }, { "epoch": 1.5364167362021695, "grad_norm": 3.4901111125946045, "learning_rate": 1.2927249089152265e-05, "loss": 1.2961, "step": 25778 }, { "epoch": 1.5365359399213254, "grad_norm": 3.3423290252685547, "learning_rate": 1.2920904126890665e-05, "loss": 1.1363, "step": 25780 }, { "epoch": 1.5366551436404816, "grad_norm": 2.9405758380889893, "learning_rate": 1.291456049107519e-05, "loss": 1.1774, "step": 25782 }, { "epoch": 1.5367743473596376, "grad_norm": 3.3607418537139893, "learning_rate": 1.2908218181932769e-05, "loss": 1.1462, "step": 25784 }, { "epoch": 1.5368935510787938, "grad_norm": 3.181655168533325, "learning_rate": 1.2901877199690294e-05, "loss": 1.0575, "step": 25786 }, { "epoch": 1.5370127547979497, "grad_norm": 3.2986769676208496, "learning_rate": 1.2895537544574604e-05, "loss": 1.0975, "step": 25788 }, { "epoch": 1.5371319585171057, "grad_norm": 3.365506172180176, "learning_rate": 1.2889199216812487e-05, "loss": 1.2364, "step": 25790 }, { "epoch": 1.5372511622362617, "grad_norm": 3.065354824066162, "learning_rate": 1.288286221663067e-05, "loss": 1.1793, "step": 25792 }, { "epoch": 1.5373703659554177, "grad_norm": 3.046143054962158, "learning_rate": 1.2876526544255896e-05, "loss": 1.1011, "step": 25794 }, { "epoch": 1.5374895696745738, "grad_norm": 3.3606324195861816, "learning_rate": 1.2870192199914755e-05, "loss": 1.2491, "step": 25796 }, { "epoch": 1.53760877339373, "grad_norm": 3.3519339561462402, "learning_rate": 1.2863859183833893e-05, "loss": 1.0843, "step": 25798 }, { "epoch": 1.537727977112886, "grad_norm": 3.356372117996216, "learning_rate": 1.2857527496239852e-05, "loss": 1.2326, "step": 25800 }, { "epoch": 1.537847180832042, "grad_norm": 2.765186071395874, "learning_rate": 1.2851197137359128e-05, "loss": 1.1277, "step": 25802 }, { "epoch": 1.537966384551198, "grad_norm": 2.980673313140869, "learning_rate": 1.2844868107418184e-05, "loss": 1.0196, "step": 25804 }, { "epoch": 1.538085588270354, "grad_norm": 3.3828296661376953, "learning_rate": 1.283854040664344e-05, "loss": 1.194, "step": 25806 }, { "epoch": 1.53820479198951, "grad_norm": 3.6077980995178223, "learning_rate": 1.2832214035261247e-05, "loss": 1.1635, "step": 25808 }, { "epoch": 1.538323995708666, "grad_norm": 3.1859803199768066, "learning_rate": 1.2825888993497925e-05, "loss": 1.1153, "step": 25810 }, { "epoch": 1.5384431994278223, "grad_norm": 3.2660160064697266, "learning_rate": 1.2819565281579748e-05, "loss": 1.251, "step": 25812 }, { "epoch": 1.5385624031469782, "grad_norm": 3.3089916706085205, "learning_rate": 1.281324289973292e-05, "loss": 1.0971, "step": 25814 }, { "epoch": 1.5386816068661342, "grad_norm": 3.144789695739746, "learning_rate": 1.2806921848183633e-05, "loss": 1.0565, "step": 25816 }, { "epoch": 1.5388008105852902, "grad_norm": 2.9699442386627197, "learning_rate": 1.2800602127158024e-05, "loss": 1.024, "step": 25818 }, { "epoch": 1.5389200143044461, "grad_norm": 3.2361438274383545, "learning_rate": 1.2794283736882129e-05, "loss": 1.0346, "step": 25820 }, { "epoch": 1.5390392180236023, "grad_norm": 3.0027761459350586, "learning_rate": 1.2787966677582008e-05, "loss": 1.0888, "step": 25822 }, { "epoch": 1.5391584217427585, "grad_norm": 3.266616106033325, "learning_rate": 1.2781650949483647e-05, "loss": 1.221, "step": 25824 }, { "epoch": 1.5392776254619145, "grad_norm": 3.3730356693267822, "learning_rate": 1.2775336552812973e-05, "loss": 1.257, "step": 25826 }, { "epoch": 1.5393968291810705, "grad_norm": 2.948847770690918, "learning_rate": 1.2769023487795866e-05, "loss": 1.093, "step": 25828 }, { "epoch": 1.5395160329002264, "grad_norm": 2.769562244415283, "learning_rate": 1.276271175465818e-05, "loss": 1.0411, "step": 25830 }, { "epoch": 1.5396352366193824, "grad_norm": 3.4762561321258545, "learning_rate": 1.2756401353625696e-05, "loss": 1.4305, "step": 25832 }, { "epoch": 1.5397544403385386, "grad_norm": 3.433300256729126, "learning_rate": 1.2750092284924165e-05, "loss": 1.2148, "step": 25834 }, { "epoch": 1.5398736440576946, "grad_norm": 3.0528905391693115, "learning_rate": 1.2743784548779281e-05, "loss": 1.0921, "step": 25836 }, { "epoch": 1.5399928477768507, "grad_norm": 3.345569133758545, "learning_rate": 1.2737478145416681e-05, "loss": 1.1031, "step": 25838 }, { "epoch": 1.5401120514960067, "grad_norm": 3.4504659175872803, "learning_rate": 1.2731173075062009e-05, "loss": 1.107, "step": 25840 }, { "epoch": 1.5402312552151627, "grad_norm": 3.0525062084198, "learning_rate": 1.272486933794076e-05, "loss": 1.1462, "step": 25842 }, { "epoch": 1.5403504589343187, "grad_norm": 3.3085274696350098, "learning_rate": 1.2718566934278487e-05, "loss": 1.099, "step": 25844 }, { "epoch": 1.5404696626534746, "grad_norm": 3.377568244934082, "learning_rate": 1.2712265864300643e-05, "loss": 1.1944, "step": 25846 }, { "epoch": 1.5405888663726308, "grad_norm": 3.3130929470062256, "learning_rate": 1.2705966128232604e-05, "loss": 1.1116, "step": 25848 }, { "epoch": 1.540708070091787, "grad_norm": 3.3034250736236572, "learning_rate": 1.2699667726299769e-05, "loss": 1.2386, "step": 25850 }, { "epoch": 1.540827273810943, "grad_norm": 3.2670083045959473, "learning_rate": 1.2693370658727437e-05, "loss": 1.0789, "step": 25852 }, { "epoch": 1.540946477530099, "grad_norm": 2.985934257507324, "learning_rate": 1.2687074925740883e-05, "loss": 1.0582, "step": 25854 }, { "epoch": 1.541065681249255, "grad_norm": 3.2623090744018555, "learning_rate": 1.268078052756531e-05, "loss": 1.0351, "step": 25856 }, { "epoch": 1.5411848849684109, "grad_norm": 3.492398262023926, "learning_rate": 1.2674487464425932e-05, "loss": 1.2288, "step": 25858 }, { "epoch": 1.541304088687567, "grad_norm": 3.0283448696136475, "learning_rate": 1.2668195736547816e-05, "loss": 1.1164, "step": 25860 }, { "epoch": 1.541423292406723, "grad_norm": 3.442870855331421, "learning_rate": 1.2661905344156083e-05, "loss": 1.233, "step": 25862 }, { "epoch": 1.5415424961258792, "grad_norm": 3.42885160446167, "learning_rate": 1.2655616287475764e-05, "loss": 1.1163, "step": 25864 }, { "epoch": 1.5416616998450352, "grad_norm": 3.1551828384399414, "learning_rate": 1.2649328566731789e-05, "loss": 1.0818, "step": 25866 }, { "epoch": 1.5417809035641912, "grad_norm": 2.88966965675354, "learning_rate": 1.264304218214914e-05, "loss": 1.0862, "step": 25868 }, { "epoch": 1.5419001072833471, "grad_norm": 3.1881332397460938, "learning_rate": 1.2636757133952692e-05, "loss": 1.073, "step": 25870 }, { "epoch": 1.5420193110025031, "grad_norm": 3.3114500045776367, "learning_rate": 1.2630473422367273e-05, "loss": 1.2675, "step": 25872 }, { "epoch": 1.5421385147216593, "grad_norm": 3.327054262161255, "learning_rate": 1.2624191047617684e-05, "loss": 1.1288, "step": 25874 }, { "epoch": 1.5422577184408155, "grad_norm": 3.1885204315185547, "learning_rate": 1.2617910009928657e-05, "loss": 1.192, "step": 25876 }, { "epoch": 1.5423769221599715, "grad_norm": 3.1676173210144043, "learning_rate": 1.261163030952489e-05, "loss": 1.1485, "step": 25878 }, { "epoch": 1.5424961258791274, "grad_norm": 3.0867254734039307, "learning_rate": 1.2605351946631017e-05, "loss": 1.1393, "step": 25880 }, { "epoch": 1.5426153295982834, "grad_norm": 3.0399961471557617, "learning_rate": 1.2599074921471676e-05, "loss": 1.011, "step": 25882 }, { "epoch": 1.5427345333174394, "grad_norm": 3.038376569747925, "learning_rate": 1.2592799234271363e-05, "loss": 1.2298, "step": 25884 }, { "epoch": 1.5428537370365956, "grad_norm": 3.288614273071289, "learning_rate": 1.2586524885254624e-05, "loss": 1.1973, "step": 25886 }, { "epoch": 1.5429729407557515, "grad_norm": 3.0812036991119385, "learning_rate": 1.25802518746459e-05, "loss": 1.1511, "step": 25888 }, { "epoch": 1.5430921444749077, "grad_norm": 3.032600164413452, "learning_rate": 1.2573980202669594e-05, "loss": 1.0991, "step": 25890 }, { "epoch": 1.5432113481940637, "grad_norm": 3.623030662536621, "learning_rate": 1.2567709869550071e-05, "loss": 1.3157, "step": 25892 }, { "epoch": 1.5433305519132197, "grad_norm": 2.921103000640869, "learning_rate": 1.2561440875511637e-05, "loss": 1.1732, "step": 25894 }, { "epoch": 1.5434497556323756, "grad_norm": 3.1811394691467285, "learning_rate": 1.2555173220778554e-05, "loss": 1.177, "step": 25896 }, { "epoch": 1.5435689593515318, "grad_norm": 3.046416997909546, "learning_rate": 1.2548906905575047e-05, "loss": 1.1586, "step": 25898 }, { "epoch": 1.5436881630706878, "grad_norm": 3.448077440261841, "learning_rate": 1.2542641930125276e-05, "loss": 1.0696, "step": 25900 }, { "epoch": 1.543807366789844, "grad_norm": 3.365495443344116, "learning_rate": 1.2536378294653346e-05, "loss": 1.1428, "step": 25902 }, { "epoch": 1.543926570509, "grad_norm": 3.173206329345703, "learning_rate": 1.2530115999383375e-05, "loss": 1.1157, "step": 25904 }, { "epoch": 1.544045774228156, "grad_norm": 3.4210023880004883, "learning_rate": 1.252385504453933e-05, "loss": 1.1316, "step": 25906 }, { "epoch": 1.544164977947312, "grad_norm": 3.091475486755371, "learning_rate": 1.2517595430345224e-05, "loss": 1.0266, "step": 25908 }, { "epoch": 1.5442841816664679, "grad_norm": 3.424025297164917, "learning_rate": 1.2511337157024988e-05, "loss": 1.1312, "step": 25910 }, { "epoch": 1.544403385385624, "grad_norm": 3.316647529602051, "learning_rate": 1.2505080224802463e-05, "loss": 1.1106, "step": 25912 }, { "epoch": 1.54452258910478, "grad_norm": 2.943878650665283, "learning_rate": 1.2498824633901518e-05, "loss": 1.0475, "step": 25914 }, { "epoch": 1.5446417928239362, "grad_norm": 2.635579824447632, "learning_rate": 1.2492570384545927e-05, "loss": 1.1176, "step": 25916 }, { "epoch": 1.5447609965430922, "grad_norm": 3.3448894023895264, "learning_rate": 1.2486317476959419e-05, "loss": 1.0849, "step": 25918 }, { "epoch": 1.5448802002622481, "grad_norm": 3.5064573287963867, "learning_rate": 1.2480065911365674e-05, "loss": 1.1371, "step": 25920 }, { "epoch": 1.5449994039814041, "grad_norm": 3.2146923542022705, "learning_rate": 1.2473815687988371e-05, "loss": 1.1865, "step": 25922 }, { "epoch": 1.5451186077005603, "grad_norm": 3.1904430389404297, "learning_rate": 1.2467566807051067e-05, "loss": 1.2729, "step": 25924 }, { "epoch": 1.5452378114197163, "grad_norm": 2.8513216972351074, "learning_rate": 1.2461319268777294e-05, "loss": 1.1225, "step": 25926 }, { "epoch": 1.5453570151388725, "grad_norm": 3.436319351196289, "learning_rate": 1.24550730733906e-05, "loss": 1.1284, "step": 25928 }, { "epoch": 1.5454762188580284, "grad_norm": 3.3087637424468994, "learning_rate": 1.2448828221114373e-05, "loss": 1.2311, "step": 25930 }, { "epoch": 1.5455954225771844, "grad_norm": 3.2953100204467773, "learning_rate": 1.244258471217205e-05, "loss": 1.0956, "step": 25932 }, { "epoch": 1.5457146262963404, "grad_norm": 3.2602603435516357, "learning_rate": 1.2436342546786977e-05, "loss": 1.0816, "step": 25934 }, { "epoch": 1.5458338300154963, "grad_norm": 3.293607711791992, "learning_rate": 1.2430101725182453e-05, "loss": 1.0703, "step": 25936 }, { "epoch": 1.5459530337346525, "grad_norm": 3.4095139503479004, "learning_rate": 1.2423862247581736e-05, "loss": 1.1506, "step": 25938 }, { "epoch": 1.5460722374538085, "grad_norm": 3.1640231609344482, "learning_rate": 1.2417624114208032e-05, "loss": 1.1007, "step": 25940 }, { "epoch": 1.5461914411729647, "grad_norm": 3.087432622909546, "learning_rate": 1.2411387325284496e-05, "loss": 1.0308, "step": 25942 }, { "epoch": 1.5463106448921207, "grad_norm": 3.2839818000793457, "learning_rate": 1.2405151881034244e-05, "loss": 1.0967, "step": 25944 }, { "epoch": 1.5464298486112766, "grad_norm": 3.238943576812744, "learning_rate": 1.2398917781680341e-05, "loss": 1.0903, "step": 25946 }, { "epoch": 1.5465490523304326, "grad_norm": 3.061328172683716, "learning_rate": 1.2392685027445783e-05, "loss": 0.9766, "step": 25948 }, { "epoch": 1.5466682560495888, "grad_norm": 3.5694572925567627, "learning_rate": 1.2386453618553572e-05, "loss": 1.1688, "step": 25950 }, { "epoch": 1.5467874597687448, "grad_norm": 3.1368658542633057, "learning_rate": 1.2380223555226622e-05, "loss": 1.0481, "step": 25952 }, { "epoch": 1.546906663487901, "grad_norm": 3.3022561073303223, "learning_rate": 1.2373994837687757e-05, "loss": 1.0638, "step": 25954 }, { "epoch": 1.547025867207057, "grad_norm": 3.155972957611084, "learning_rate": 1.2367767466159851e-05, "loss": 1.2047, "step": 25956 }, { "epoch": 1.547145070926213, "grad_norm": 3.04946231842041, "learning_rate": 1.2361541440865659e-05, "loss": 1.0092, "step": 25958 }, { "epoch": 1.5472642746453689, "grad_norm": 3.5785934925079346, "learning_rate": 1.2355316762027908e-05, "loss": 1.2762, "step": 25960 }, { "epoch": 1.5473834783645248, "grad_norm": 3.2311055660247803, "learning_rate": 1.2349093429869274e-05, "loss": 1.1506, "step": 25962 }, { "epoch": 1.547502682083681, "grad_norm": 3.5229339599609375, "learning_rate": 1.2342871444612392e-05, "loss": 1.0381, "step": 25964 }, { "epoch": 1.547621885802837, "grad_norm": 3.3928956985473633, "learning_rate": 1.2336650806479827e-05, "loss": 1.1061, "step": 25966 }, { "epoch": 1.5477410895219932, "grad_norm": 3.0555837154388428, "learning_rate": 1.2330431515694158e-05, "loss": 0.9957, "step": 25968 }, { "epoch": 1.5478602932411492, "grad_norm": 2.80631947517395, "learning_rate": 1.2324213572477816e-05, "loss": 1.0224, "step": 25970 }, { "epoch": 1.5479794969603051, "grad_norm": 3.0663466453552246, "learning_rate": 1.2317996977053254e-05, "loss": 1.0292, "step": 25972 }, { "epoch": 1.548098700679461, "grad_norm": 3.5059990882873535, "learning_rate": 1.2311781729642891e-05, "loss": 1.2447, "step": 25974 }, { "epoch": 1.5482179043986173, "grad_norm": 3.2978122234344482, "learning_rate": 1.2305567830469017e-05, "loss": 1.0918, "step": 25976 }, { "epoch": 1.5483371081177733, "grad_norm": 3.1286566257476807, "learning_rate": 1.229935527975397e-05, "loss": 1.1134, "step": 25978 }, { "epoch": 1.5484563118369294, "grad_norm": 3.206808090209961, "learning_rate": 1.229314407771997e-05, "loss": 0.9444, "step": 25980 }, { "epoch": 1.5485755155560854, "grad_norm": 3.4592819213867188, "learning_rate": 1.2286934224589225e-05, "loss": 1.0223, "step": 25982 }, { "epoch": 1.5486947192752414, "grad_norm": 3.0476560592651367, "learning_rate": 1.2280725720583875e-05, "loss": 1.1088, "step": 25984 }, { "epoch": 1.5488139229943974, "grad_norm": 3.190657615661621, "learning_rate": 1.2274518565926019e-05, "loss": 1.1446, "step": 25986 }, { "epoch": 1.5489331267135533, "grad_norm": 3.1410131454467773, "learning_rate": 1.2268312760837714e-05, "loss": 1.0751, "step": 25988 }, { "epoch": 1.5490523304327095, "grad_norm": 3.31438946723938, "learning_rate": 1.2262108305540936e-05, "loss": 1.1861, "step": 25990 }, { "epoch": 1.5491715341518657, "grad_norm": 3.122164487838745, "learning_rate": 1.22559052002577e-05, "loss": 1.2068, "step": 25992 }, { "epoch": 1.5492907378710217, "grad_norm": 3.294966697692871, "learning_rate": 1.2249703445209842e-05, "loss": 1.047, "step": 25994 }, { "epoch": 1.5494099415901776, "grad_norm": 3.01145339012146, "learning_rate": 1.2243503040619269e-05, "loss": 1.0598, "step": 25996 }, { "epoch": 1.5495291453093336, "grad_norm": 3.2563159465789795, "learning_rate": 1.2237303986707793e-05, "loss": 1.0952, "step": 25998 }, { "epoch": 1.5496483490284896, "grad_norm": 3.0784852504730225, "learning_rate": 1.2231106283697125e-05, "loss": 1.1664, "step": 26000 }, { "epoch": 1.5497675527476458, "grad_norm": 3.0503997802734375, "learning_rate": 1.2224909931809026e-05, "loss": 1.2315, "step": 26002 }, { "epoch": 1.5498867564668017, "grad_norm": 3.2320847511291504, "learning_rate": 1.2218714931265141e-05, "loss": 1.1331, "step": 26004 }, { "epoch": 1.550005960185958, "grad_norm": 3.516521453857422, "learning_rate": 1.2212521282287092e-05, "loss": 1.2584, "step": 26006 }, { "epoch": 1.550125163905114, "grad_norm": 3.180448532104492, "learning_rate": 1.2206328985096443e-05, "loss": 1.1195, "step": 26008 }, { "epoch": 1.5502443676242699, "grad_norm": 3.2588837146759033, "learning_rate": 1.2200138039914721e-05, "loss": 1.0855, "step": 26010 }, { "epoch": 1.5503635713434258, "grad_norm": 2.9447107315063477, "learning_rate": 1.2193948446963387e-05, "loss": 1.0229, "step": 26012 }, { "epoch": 1.5504827750625818, "grad_norm": 3.4520277976989746, "learning_rate": 1.2187760206463855e-05, "loss": 1.1358, "step": 26014 }, { "epoch": 1.550601978781738, "grad_norm": 3.2447941303253174, "learning_rate": 1.2181573318637545e-05, "loss": 1.1579, "step": 26016 }, { "epoch": 1.5507211825008942, "grad_norm": 2.9353561401367188, "learning_rate": 1.2175387783705721e-05, "loss": 1.1348, "step": 26018 }, { "epoch": 1.5508403862200502, "grad_norm": 2.9930570125579834, "learning_rate": 1.2169203601889712e-05, "loss": 1.0947, "step": 26020 }, { "epoch": 1.5509595899392061, "grad_norm": 3.0724992752075195, "learning_rate": 1.2163020773410722e-05, "loss": 1.0534, "step": 26022 }, { "epoch": 1.551078793658362, "grad_norm": 2.912048578262329, "learning_rate": 1.2156839298489935e-05, "loss": 1.1242, "step": 26024 }, { "epoch": 1.551197997377518, "grad_norm": 3.114316463470459, "learning_rate": 1.2150659177348489e-05, "loss": 1.156, "step": 26026 }, { "epoch": 1.5513172010966743, "grad_norm": 3.483739137649536, "learning_rate": 1.2144480410207459e-05, "loss": 1.082, "step": 26028 }, { "epoch": 1.5514364048158302, "grad_norm": 2.9896950721740723, "learning_rate": 1.2138302997287892e-05, "loss": 1.2184, "step": 26030 }, { "epoch": 1.5515556085349864, "grad_norm": 3.322356939315796, "learning_rate": 1.2132126938810768e-05, "loss": 1.0733, "step": 26032 }, { "epoch": 1.5516748122541424, "grad_norm": 3.2440671920776367, "learning_rate": 1.2125952234997024e-05, "loss": 1.1579, "step": 26034 }, { "epoch": 1.5517940159732984, "grad_norm": 3.147747755050659, "learning_rate": 1.211977888606754e-05, "loss": 1.237, "step": 26036 }, { "epoch": 1.5519132196924543, "grad_norm": 3.0949480533599854, "learning_rate": 1.21136068922432e-05, "loss": 1.0434, "step": 26038 }, { "epoch": 1.5520324234116103, "grad_norm": 3.405043840408325, "learning_rate": 1.2107436253744742e-05, "loss": 1.1944, "step": 26040 }, { "epoch": 1.5521516271307665, "grad_norm": 3.4035861492156982, "learning_rate": 1.2101266970792946e-05, "loss": 1.1136, "step": 26042 }, { "epoch": 1.5522708308499227, "grad_norm": 3.1325559616088867, "learning_rate": 1.2095099043608515e-05, "loss": 1.131, "step": 26044 }, { "epoch": 1.5523900345690786, "grad_norm": 2.993539810180664, "learning_rate": 1.2088932472412057e-05, "loss": 1.2273, "step": 26046 }, { "epoch": 1.5525092382882346, "grad_norm": 3.200239658355713, "learning_rate": 1.2082767257424211e-05, "loss": 1.2232, "step": 26048 }, { "epoch": 1.5526284420073906, "grad_norm": 3.1620419025421143, "learning_rate": 1.207660339886551e-05, "loss": 1.0412, "step": 26050 }, { "epoch": 1.5527476457265466, "grad_norm": 3.4288032054901123, "learning_rate": 1.207044089695646e-05, "loss": 1.1094, "step": 26052 }, { "epoch": 1.5528668494457027, "grad_norm": 2.7727105617523193, "learning_rate": 1.2064279751917495e-05, "loss": 1.0289, "step": 26054 }, { "epoch": 1.5529860531648587, "grad_norm": 3.5674452781677246, "learning_rate": 1.2058119963969072e-05, "loss": 1.2422, "step": 26056 }, { "epoch": 1.553105256884015, "grad_norm": 3.2164204120635986, "learning_rate": 1.2051961533331496e-05, "loss": 1.1467, "step": 26058 }, { "epoch": 1.5532244606031709, "grad_norm": 3.3039140701293945, "learning_rate": 1.2045804460225086e-05, "loss": 1.1883, "step": 26060 }, { "epoch": 1.5533436643223268, "grad_norm": 3.159480333328247, "learning_rate": 1.203964874487013e-05, "loss": 1.1646, "step": 26062 }, { "epoch": 1.5534628680414828, "grad_norm": 3.1499431133270264, "learning_rate": 1.2033494387486793e-05, "loss": 1.0177, "step": 26064 }, { "epoch": 1.5535820717606388, "grad_norm": 3.3090803623199463, "learning_rate": 1.2027341388295283e-05, "loss": 1.2692, "step": 26066 }, { "epoch": 1.553701275479795, "grad_norm": 3.2883079051971436, "learning_rate": 1.2021189747515688e-05, "loss": 1.1194, "step": 26068 }, { "epoch": 1.5538204791989512, "grad_norm": 2.8749442100524902, "learning_rate": 1.2015039465368078e-05, "loss": 1.2092, "step": 26070 }, { "epoch": 1.5539396829181071, "grad_norm": 3.3167994022369385, "learning_rate": 1.200889054207247e-05, "loss": 1.2147, "step": 26072 }, { "epoch": 1.554058886637263, "grad_norm": 3.616611957550049, "learning_rate": 1.2002742977848835e-05, "loss": 1.2768, "step": 26074 }, { "epoch": 1.554178090356419, "grad_norm": 3.0317559242248535, "learning_rate": 1.199659677291709e-05, "loss": 1.203, "step": 26076 }, { "epoch": 1.554297294075575, "grad_norm": 3.330127716064453, "learning_rate": 1.1990451927497094e-05, "loss": 1.015, "step": 26078 }, { "epoch": 1.5544164977947312, "grad_norm": 3.3510336875915527, "learning_rate": 1.1984308441808706e-05, "loss": 1.0884, "step": 26080 }, { "epoch": 1.5545357015138872, "grad_norm": 3.426309823989868, "learning_rate": 1.1978166316071642e-05, "loss": 1.1005, "step": 26082 }, { "epoch": 1.5546549052330434, "grad_norm": 3.120514154434204, "learning_rate": 1.1972025550505677e-05, "loss": 1.1422, "step": 26084 }, { "epoch": 1.5547741089521994, "grad_norm": 3.6009414196014404, "learning_rate": 1.1965886145330474e-05, "loss": 1.2816, "step": 26086 }, { "epoch": 1.5548933126713553, "grad_norm": 3.306091547012329, "learning_rate": 1.195974810076565e-05, "loss": 1.026, "step": 26088 }, { "epoch": 1.5550125163905113, "grad_norm": 3.2650866508483887, "learning_rate": 1.1953611417030792e-05, "loss": 1.2166, "step": 26090 }, { "epoch": 1.5551317201096673, "grad_norm": 3.349120616912842, "learning_rate": 1.1947476094345428e-05, "loss": 1.1987, "step": 26092 }, { "epoch": 1.5552509238288235, "grad_norm": 2.8989853858947754, "learning_rate": 1.194134213292904e-05, "loss": 1.0512, "step": 26094 }, { "epoch": 1.5553701275479797, "grad_norm": 3.272601842880249, "learning_rate": 1.1935209533001057e-05, "loss": 1.1751, "step": 26096 }, { "epoch": 1.5554893312671356, "grad_norm": 2.505762815475464, "learning_rate": 1.1929078294780871e-05, "loss": 1.0457, "step": 26098 }, { "epoch": 1.5556085349862916, "grad_norm": 3.2700929641723633, "learning_rate": 1.1922948418487795e-05, "loss": 1.141, "step": 26100 }, { "epoch": 1.5557277387054476, "grad_norm": 3.629621982574463, "learning_rate": 1.191681990434116e-05, "loss": 1.0795, "step": 26102 }, { "epoch": 1.5558469424246035, "grad_norm": 3.2358734607696533, "learning_rate": 1.1910692752560165e-05, "loss": 1.0898, "step": 26104 }, { "epoch": 1.5559661461437597, "grad_norm": 3.188927412033081, "learning_rate": 1.1904566963364e-05, "loss": 1.0504, "step": 26106 }, { "epoch": 1.5560853498629157, "grad_norm": 3.036858558654785, "learning_rate": 1.1898442536971844e-05, "loss": 1.0797, "step": 26108 }, { "epoch": 1.5562045535820719, "grad_norm": 3.3759567737579346, "learning_rate": 1.1892319473602731e-05, "loss": 1.1119, "step": 26110 }, { "epoch": 1.5563237573012279, "grad_norm": 3.541221857070923, "learning_rate": 1.1886197773475749e-05, "loss": 1.1801, "step": 26112 }, { "epoch": 1.5564429610203838, "grad_norm": 3.1769204139709473, "learning_rate": 1.1880077436809879e-05, "loss": 1.1294, "step": 26114 }, { "epoch": 1.5565621647395398, "grad_norm": 3.340254306793213, "learning_rate": 1.1873958463824065e-05, "loss": 1.2008, "step": 26116 }, { "epoch": 1.5566813684586958, "grad_norm": 3.51385760307312, "learning_rate": 1.18678408547372e-05, "loss": 1.0661, "step": 26118 }, { "epoch": 1.556800572177852, "grad_norm": 2.9197168350219727, "learning_rate": 1.1861724609768137e-05, "loss": 1.0576, "step": 26120 }, { "epoch": 1.5569197758970081, "grad_norm": 3.368746757507324, "learning_rate": 1.185560972913568e-05, "loss": 1.1366, "step": 26122 }, { "epoch": 1.557038979616164, "grad_norm": 3.1473231315612793, "learning_rate": 1.1849496213058552e-05, "loss": 1.0212, "step": 26124 }, { "epoch": 1.55715818333532, "grad_norm": 3.2843339443206787, "learning_rate": 1.1843384061755502e-05, "loss": 1.2837, "step": 26126 }, { "epoch": 1.557277387054476, "grad_norm": 3.4983744621276855, "learning_rate": 1.1837273275445132e-05, "loss": 1.2041, "step": 26128 }, { "epoch": 1.557396590773632, "grad_norm": 3.1444056034088135, "learning_rate": 1.1831163854346084e-05, "loss": 1.0083, "step": 26130 }, { "epoch": 1.5575157944927882, "grad_norm": 3.0376670360565186, "learning_rate": 1.182505579867691e-05, "loss": 1.0942, "step": 26132 }, { "epoch": 1.5576349982119442, "grad_norm": 3.591078042984009, "learning_rate": 1.1818949108656074e-05, "loss": 1.3451, "step": 26134 }, { "epoch": 1.5577542019311004, "grad_norm": 3.1244754791259766, "learning_rate": 1.1812843784502076e-05, "loss": 1.0715, "step": 26136 }, { "epoch": 1.5578734056502563, "grad_norm": 3.4995546340942383, "learning_rate": 1.1806739826433316e-05, "loss": 1.2036, "step": 26138 }, { "epoch": 1.5579926093694123, "grad_norm": 3.4342427253723145, "learning_rate": 1.180063723466815e-05, "loss": 1.1138, "step": 26140 }, { "epoch": 1.5581118130885683, "grad_norm": 3.2217278480529785, "learning_rate": 1.1794536009424884e-05, "loss": 1.0581, "step": 26142 }, { "epoch": 1.5582310168077242, "grad_norm": 3.2420506477355957, "learning_rate": 1.1788436150921783e-05, "loss": 1.1449, "step": 26144 }, { "epoch": 1.5583502205268804, "grad_norm": 2.7999892234802246, "learning_rate": 1.1782337659377052e-05, "loss": 1.0853, "step": 26146 }, { "epoch": 1.5584694242460366, "grad_norm": 2.8489761352539062, "learning_rate": 1.1776240535008876e-05, "loss": 1.0667, "step": 26148 }, { "epoch": 1.5585886279651926, "grad_norm": 3.2704195976257324, "learning_rate": 1.177014477803537e-05, "loss": 1.215, "step": 26150 }, { "epoch": 1.5587078316843486, "grad_norm": 3.1556272506713867, "learning_rate": 1.1764050388674557e-05, "loss": 1.0687, "step": 26152 }, { "epoch": 1.5588270354035045, "grad_norm": 3.161771774291992, "learning_rate": 1.17579573671445e-05, "loss": 1.0305, "step": 26154 }, { "epoch": 1.5589462391226605, "grad_norm": 3.6947009563446045, "learning_rate": 1.1751865713663152e-05, "loss": 1.0515, "step": 26156 }, { "epoch": 1.5590654428418167, "grad_norm": 3.1184351444244385, "learning_rate": 1.174577542844843e-05, "loss": 1.064, "step": 26158 }, { "epoch": 1.5591846465609727, "grad_norm": 3.1200480461120605, "learning_rate": 1.1739686511718206e-05, "loss": 0.9664, "step": 26160 }, { "epoch": 1.5593038502801289, "grad_norm": 3.2249739170074463, "learning_rate": 1.1733598963690306e-05, "loss": 1.1658, "step": 26162 }, { "epoch": 1.5594230539992848, "grad_norm": 3.4399642944335938, "learning_rate": 1.1727512784582496e-05, "loss": 1.1541, "step": 26164 }, { "epoch": 1.5595422577184408, "grad_norm": 3.1143617630004883, "learning_rate": 1.1721427974612504e-05, "loss": 1.0935, "step": 26166 }, { "epoch": 1.5596614614375968, "grad_norm": 3.33227801322937, "learning_rate": 1.1715344533997996e-05, "loss": 1.1242, "step": 26168 }, { "epoch": 1.5597806651567527, "grad_norm": 3.409052610397339, "learning_rate": 1.1709262462956594e-05, "loss": 1.0841, "step": 26170 }, { "epoch": 1.559899868875909, "grad_norm": 3.4901444911956787, "learning_rate": 1.170318176170591e-05, "loss": 1.0892, "step": 26172 }, { "epoch": 1.5600190725950651, "grad_norm": 3.274177074432373, "learning_rate": 1.1697102430463425e-05, "loss": 1.1382, "step": 26174 }, { "epoch": 1.560138276314221, "grad_norm": 3.0738766193389893, "learning_rate": 1.1691024469446643e-05, "loss": 1.1693, "step": 26176 }, { "epoch": 1.560257480033377, "grad_norm": 2.883394956588745, "learning_rate": 1.1684947878873005e-05, "loss": 0.9045, "step": 26178 }, { "epoch": 1.560376683752533, "grad_norm": 2.7994892597198486, "learning_rate": 1.167887265895985e-05, "loss": 1.037, "step": 26180 }, { "epoch": 1.560495887471689, "grad_norm": 3.2405800819396973, "learning_rate": 1.1672798809924546e-05, "loss": 1.0984, "step": 26182 }, { "epoch": 1.5606150911908452, "grad_norm": 3.0651590824127197, "learning_rate": 1.1666726331984363e-05, "loss": 1.123, "step": 26184 }, { "epoch": 1.5607342949100012, "grad_norm": 3.2468554973602295, "learning_rate": 1.1660655225356531e-05, "loss": 1.1012, "step": 26186 }, { "epoch": 1.5608534986291573, "grad_norm": 3.1760990619659424, "learning_rate": 1.1654585490258229e-05, "loss": 1.0195, "step": 26188 }, { "epoch": 1.5609727023483133, "grad_norm": 3.416431427001953, "learning_rate": 1.1648517126906634e-05, "loss": 1.1602, "step": 26190 }, { "epoch": 1.5610919060674693, "grad_norm": 3.1971874237060547, "learning_rate": 1.1642450135518762e-05, "loss": 1.1618, "step": 26192 }, { "epoch": 1.5612111097866253, "grad_norm": 3.4602160453796387, "learning_rate": 1.163638451631171e-05, "loss": 1.0419, "step": 26194 }, { "epoch": 1.5613303135057812, "grad_norm": 3.123782157897949, "learning_rate": 1.1630320269502454e-05, "loss": 1.1239, "step": 26196 }, { "epoch": 1.5614495172249374, "grad_norm": 3.0870914459228516, "learning_rate": 1.1624257395307892e-05, "loss": 1.0233, "step": 26198 }, { "epoch": 1.5615687209440936, "grad_norm": 3.432844400405884, "learning_rate": 1.161819589394496e-05, "loss": 1.2071, "step": 26200 }, { "epoch": 1.5616879246632496, "grad_norm": 3.1014232635498047, "learning_rate": 1.1612135765630483e-05, "loss": 1.1861, "step": 26202 }, { "epoch": 1.5618071283824055, "grad_norm": 3.107095241546631, "learning_rate": 1.1606077010581252e-05, "loss": 1.1632, "step": 26204 }, { "epoch": 1.5619263321015615, "grad_norm": 3.403550148010254, "learning_rate": 1.1600019629014008e-05, "loss": 1.031, "step": 26206 }, { "epoch": 1.5620455358207175, "grad_norm": 3.490971565246582, "learning_rate": 1.1593963621145449e-05, "loss": 1.1656, "step": 26208 }, { "epoch": 1.5621647395398737, "grad_norm": 3.315680742263794, "learning_rate": 1.1587908987192219e-05, "loss": 1.141, "step": 26210 }, { "epoch": 1.5622839432590296, "grad_norm": 3.0893280506134033, "learning_rate": 1.1581855727370889e-05, "loss": 0.9394, "step": 26212 }, { "epoch": 1.5624031469781858, "grad_norm": 2.9022750854492188, "learning_rate": 1.1575803841898059e-05, "loss": 1.092, "step": 26214 }, { "epoch": 1.5625223506973418, "grad_norm": 3.1922569274902344, "learning_rate": 1.1569753330990163e-05, "loss": 1.2526, "step": 26216 }, { "epoch": 1.5626415544164978, "grad_norm": 3.038149833679199, "learning_rate": 1.1563704194863689e-05, "loss": 1.0623, "step": 26218 }, { "epoch": 1.5627607581356537, "grad_norm": 3.265047073364258, "learning_rate": 1.155765643373503e-05, "loss": 1.1846, "step": 26220 }, { "epoch": 1.5628799618548097, "grad_norm": 3.489130735397339, "learning_rate": 1.1551610047820526e-05, "loss": 1.1116, "step": 26222 }, { "epoch": 1.562999165573966, "grad_norm": 3.308063268661499, "learning_rate": 1.1545565037336481e-05, "loss": 1.1817, "step": 26224 }, { "epoch": 1.563118369293122, "grad_norm": 3.4762980937957764, "learning_rate": 1.1539521402499143e-05, "loss": 1.2341, "step": 26226 }, { "epoch": 1.563237573012278, "grad_norm": 3.584195613861084, "learning_rate": 1.1533479143524717e-05, "loss": 1.152, "step": 26228 }, { "epoch": 1.563356776731434, "grad_norm": 2.901155948638916, "learning_rate": 1.152743826062936e-05, "loss": 0.9876, "step": 26230 }, { "epoch": 1.56347598045059, "grad_norm": 3.484917402267456, "learning_rate": 1.152139875402916e-05, "loss": 1.1845, "step": 26232 }, { "epoch": 1.563595184169746, "grad_norm": 3.064145088195801, "learning_rate": 1.1515360623940168e-05, "loss": 1.0875, "step": 26234 }, { "epoch": 1.5637143878889022, "grad_norm": 3.5479912757873535, "learning_rate": 1.1509323870578432e-05, "loss": 1.221, "step": 26236 }, { "epoch": 1.5638335916080581, "grad_norm": 3.2017126083374023, "learning_rate": 1.1503288494159858e-05, "loss": 1.0893, "step": 26238 }, { "epoch": 1.5639527953272143, "grad_norm": 3.038050651550293, "learning_rate": 1.1497254494900357e-05, "loss": 1.1372, "step": 26240 }, { "epoch": 1.5640719990463703, "grad_norm": 3.0832037925720215, "learning_rate": 1.1491221873015822e-05, "loss": 1.0106, "step": 26242 }, { "epoch": 1.5641912027655263, "grad_norm": 3.568673610687256, "learning_rate": 1.1485190628722015e-05, "loss": 1.0655, "step": 26244 }, { "epoch": 1.5643104064846822, "grad_norm": 3.6481740474700928, "learning_rate": 1.1479160762234725e-05, "loss": 1.0561, "step": 26246 }, { "epoch": 1.5644296102038382, "grad_norm": 3.30784010887146, "learning_rate": 1.147313227376966e-05, "loss": 1.1629, "step": 26248 }, { "epoch": 1.5645488139229944, "grad_norm": 3.326716661453247, "learning_rate": 1.1467105163542463e-05, "loss": 1.1378, "step": 26250 }, { "epoch": 1.5646680176421506, "grad_norm": 3.4071688652038574, "learning_rate": 1.1461079431768745e-05, "loss": 1.2048, "step": 26252 }, { "epoch": 1.5647872213613065, "grad_norm": 2.9967947006225586, "learning_rate": 1.14550550786641e-05, "loss": 0.9688, "step": 26254 }, { "epoch": 1.5649064250804625, "grad_norm": 3.019824266433716, "learning_rate": 1.1449032104444002e-05, "loss": 1.1438, "step": 26256 }, { "epoch": 1.5650256287996185, "grad_norm": 3.3459255695343018, "learning_rate": 1.1443010509323914e-05, "loss": 1.1288, "step": 26258 }, { "epoch": 1.5651448325187745, "grad_norm": 3.4194719791412354, "learning_rate": 1.143699029351929e-05, "loss": 1.1119, "step": 26260 }, { "epoch": 1.5652640362379306, "grad_norm": 3.4344663619995117, "learning_rate": 1.1430971457245432e-05, "loss": 1.08, "step": 26262 }, { "epoch": 1.5653832399570866, "grad_norm": 3.3030097484588623, "learning_rate": 1.1424954000717709e-05, "loss": 1.1438, "step": 26264 }, { "epoch": 1.5655024436762428, "grad_norm": 3.1347196102142334, "learning_rate": 1.1418937924151358e-05, "loss": 1.0876, "step": 26266 }, { "epoch": 1.5656216473953988, "grad_norm": 3.434565544128418, "learning_rate": 1.1412923227761601e-05, "loss": 1.1473, "step": 26268 }, { "epoch": 1.5657408511145547, "grad_norm": 3.1765241622924805, "learning_rate": 1.1406909911763609e-05, "loss": 1.1, "step": 26270 }, { "epoch": 1.5658600548337107, "grad_norm": 3.1271491050720215, "learning_rate": 1.1400897976372493e-05, "loss": 1.3626, "step": 26272 }, { "epoch": 1.565979258552867, "grad_norm": 3.1035995483398438, "learning_rate": 1.1394887421803318e-05, "loss": 0.9998, "step": 26274 }, { "epoch": 1.5660984622720229, "grad_norm": 3.44962477684021, "learning_rate": 1.1388878248271095e-05, "loss": 1.1335, "step": 26276 }, { "epoch": 1.566217665991179, "grad_norm": 3.497579574584961, "learning_rate": 1.1382870455990835e-05, "loss": 1.2279, "step": 26278 }, { "epoch": 1.566336869710335, "grad_norm": 2.8923754692077637, "learning_rate": 1.1376864045177393e-05, "loss": 0.9985, "step": 26280 }, { "epoch": 1.566456073429491, "grad_norm": 3.041637897491455, "learning_rate": 1.1370859016045687e-05, "loss": 1.0062, "step": 26282 }, { "epoch": 1.566575277148647, "grad_norm": 3.49314022064209, "learning_rate": 1.1364855368810534e-05, "loss": 1.1034, "step": 26284 }, { "epoch": 1.566694480867803, "grad_norm": 2.972358465194702, "learning_rate": 1.1358853103686667e-05, "loss": 1.0988, "step": 26286 }, { "epoch": 1.5668136845869591, "grad_norm": 3.230741262435913, "learning_rate": 1.1352852220888849e-05, "loss": 1.0041, "step": 26288 }, { "epoch": 1.566932888306115, "grad_norm": 3.270381450653076, "learning_rate": 1.1346852720631735e-05, "loss": 1.0719, "step": 26290 }, { "epoch": 1.5670520920252713, "grad_norm": 3.4494051933288574, "learning_rate": 1.1340854603129952e-05, "loss": 1.2506, "step": 26292 }, { "epoch": 1.5671712957444273, "grad_norm": 3.1944994926452637, "learning_rate": 1.133485786859807e-05, "loss": 1.1604, "step": 26294 }, { "epoch": 1.5672904994635832, "grad_norm": 3.1002249717712402, "learning_rate": 1.132886251725061e-05, "loss": 1.1386, "step": 26296 }, { "epoch": 1.5674097031827392, "grad_norm": 3.2079715728759766, "learning_rate": 1.1322868549302051e-05, "loss": 1.1508, "step": 26298 }, { "epoch": 1.5675289069018954, "grad_norm": 3.0486795902252197, "learning_rate": 1.1316875964966822e-05, "loss": 1.1165, "step": 26300 }, { "epoch": 1.5676481106210514, "grad_norm": 3.3244097232818604, "learning_rate": 1.1310884764459285e-05, "loss": 1.0471, "step": 26302 }, { "epoch": 1.5677673143402076, "grad_norm": 3.0857200622558594, "learning_rate": 1.130489494799376e-05, "loss": 1.0075, "step": 26304 }, { "epoch": 1.5678865180593635, "grad_norm": 3.1871626377105713, "learning_rate": 1.1298906515784568e-05, "loss": 1.12, "step": 26306 }, { "epoch": 1.5680057217785195, "grad_norm": 3.302131175994873, "learning_rate": 1.1292919468045877e-05, "loss": 1.2308, "step": 26308 }, { "epoch": 1.5681249254976755, "grad_norm": 3.5487067699432373, "learning_rate": 1.1286933804991906e-05, "loss": 1.1621, "step": 26310 }, { "epoch": 1.5682441292168314, "grad_norm": 3.0845742225646973, "learning_rate": 1.1280949526836765e-05, "loss": 1.09, "step": 26312 }, { "epoch": 1.5683633329359876, "grad_norm": 2.8512606620788574, "learning_rate": 1.1274966633794537e-05, "loss": 1.1033, "step": 26314 }, { "epoch": 1.5684825366551436, "grad_norm": 3.077500581741333, "learning_rate": 1.1268985126079245e-05, "loss": 1.0781, "step": 26316 }, { "epoch": 1.5686017403742998, "grad_norm": 3.7340691089630127, "learning_rate": 1.1263005003904875e-05, "loss": 1.1328, "step": 26318 }, { "epoch": 1.5687209440934557, "grad_norm": 2.863600730895996, "learning_rate": 1.1257026267485355e-05, "loss": 0.9763, "step": 26320 }, { "epoch": 1.5688401478126117, "grad_norm": 3.16506290435791, "learning_rate": 1.1251048917034546e-05, "loss": 1.2329, "step": 26322 }, { "epoch": 1.5689593515317677, "grad_norm": 3.359858751296997, "learning_rate": 1.1245072952766322e-05, "loss": 1.0903, "step": 26324 }, { "epoch": 1.5690785552509239, "grad_norm": 3.6830153465270996, "learning_rate": 1.1239098374894414e-05, "loss": 1.2151, "step": 26326 }, { "epoch": 1.5691977589700798, "grad_norm": 3.3631389141082764, "learning_rate": 1.1233125183632587e-05, "loss": 1.1459, "step": 26328 }, { "epoch": 1.569316962689236, "grad_norm": 2.832545042037964, "learning_rate": 1.1227153379194527e-05, "loss": 0.8998, "step": 26330 }, { "epoch": 1.569436166408392, "grad_norm": 3.171351909637451, "learning_rate": 1.1221182961793819e-05, "loss": 1.0575, "step": 26332 }, { "epoch": 1.569555370127548, "grad_norm": 3.2718007564544678, "learning_rate": 1.1215213931644098e-05, "loss": 1.006, "step": 26334 }, { "epoch": 1.569674573846704, "grad_norm": 3.170605182647705, "learning_rate": 1.120924628895887e-05, "loss": 1.0833, "step": 26336 }, { "epoch": 1.56979377756586, "grad_norm": 3.3126649856567383, "learning_rate": 1.1203280033951624e-05, "loss": 1.0568, "step": 26338 }, { "epoch": 1.569912981285016, "grad_norm": 2.9399795532226562, "learning_rate": 1.1197315166835792e-05, "loss": 1.0683, "step": 26340 }, { "epoch": 1.570032185004172, "grad_norm": 3.562190294265747, "learning_rate": 1.1191351687824758e-05, "loss": 1.2642, "step": 26342 }, { "epoch": 1.5701513887233283, "grad_norm": 3.473073720932007, "learning_rate": 1.1185389597131858e-05, "loss": 1.0189, "step": 26344 }, { "epoch": 1.5702705924424842, "grad_norm": 3.3233327865600586, "learning_rate": 1.1179428894970362e-05, "loss": 1.0673, "step": 26346 }, { "epoch": 1.5703897961616402, "grad_norm": 3.471104383468628, "learning_rate": 1.1173469581553542e-05, "loss": 1.1338, "step": 26348 }, { "epoch": 1.5705089998807962, "grad_norm": 3.4172513484954834, "learning_rate": 1.1167511657094538e-05, "loss": 1.1605, "step": 26350 }, { "epoch": 1.5706282035999524, "grad_norm": 3.0854244232177734, "learning_rate": 1.1161555121806511e-05, "loss": 1.1357, "step": 26352 }, { "epoch": 1.5707474073191083, "grad_norm": 3.4159252643585205, "learning_rate": 1.1155599975902548e-05, "loss": 1.1012, "step": 26354 }, { "epoch": 1.5708666110382645, "grad_norm": 3.340742588043213, "learning_rate": 1.1149646219595672e-05, "loss": 1.0756, "step": 26356 }, { "epoch": 1.5709858147574205, "grad_norm": 2.9367270469665527, "learning_rate": 1.114369385309888e-05, "loss": 1.0691, "step": 26358 }, { "epoch": 1.5711050184765765, "grad_norm": 3.2661938667297363, "learning_rate": 1.1137742876625107e-05, "loss": 1.3306, "step": 26360 }, { "epoch": 1.5712242221957324, "grad_norm": 3.4159657955169678, "learning_rate": 1.1131793290387233e-05, "loss": 0.9789, "step": 26362 }, { "epoch": 1.5713434259148884, "grad_norm": 3.606843948364258, "learning_rate": 1.1125845094598097e-05, "loss": 1.2553, "step": 26364 }, { "epoch": 1.5714626296340446, "grad_norm": 4.2148661613464355, "learning_rate": 1.1119898289470492e-05, "loss": 0.9932, "step": 26366 }, { "epoch": 1.5715818333532008, "grad_norm": 3.2612791061401367, "learning_rate": 1.1113952875217143e-05, "loss": 1.1685, "step": 26368 }, { "epoch": 1.5717010370723568, "grad_norm": 3.2933571338653564, "learning_rate": 1.1108008852050767e-05, "loss": 1.1847, "step": 26370 }, { "epoch": 1.5718202407915127, "grad_norm": 3.1632399559020996, "learning_rate": 1.1102066220183955e-05, "loss": 1.109, "step": 26372 }, { "epoch": 1.5719394445106687, "grad_norm": 3.209733247756958, "learning_rate": 1.1096124979829342e-05, "loss": 1.164, "step": 26374 }, { "epoch": 1.5720586482298247, "grad_norm": 3.0987565517425537, "learning_rate": 1.1090185131199454e-05, "loss": 1.0702, "step": 26376 }, { "epoch": 1.5721778519489809, "grad_norm": 3.3180882930755615, "learning_rate": 1.1084246674506748e-05, "loss": 1.1471, "step": 26378 }, { "epoch": 1.5722970556681368, "grad_norm": 3.6057233810424805, "learning_rate": 1.1078309609963699e-05, "loss": 1.1535, "step": 26380 }, { "epoch": 1.572416259387293, "grad_norm": 3.5798702239990234, "learning_rate": 1.1072373937782687e-05, "loss": 1.1991, "step": 26382 }, { "epoch": 1.572535463106449, "grad_norm": 3.241712808609009, "learning_rate": 1.1066439658176042e-05, "loss": 1.2368, "step": 26384 }, { "epoch": 1.572654666825605, "grad_norm": 3.109395742416382, "learning_rate": 1.1060506771356056e-05, "loss": 1.2632, "step": 26386 }, { "epoch": 1.572773870544761, "grad_norm": 3.0732460021972656, "learning_rate": 1.1054575277534996e-05, "loss": 0.9716, "step": 26388 }, { "epoch": 1.572893074263917, "grad_norm": 3.0511014461517334, "learning_rate": 1.1048645176925009e-05, "loss": 0.9765, "step": 26390 }, { "epoch": 1.573012277983073, "grad_norm": 2.971367359161377, "learning_rate": 1.104271646973825e-05, "loss": 1.0462, "step": 26392 }, { "epoch": 1.5731314817022293, "grad_norm": 2.8524320125579834, "learning_rate": 1.1036789156186839e-05, "loss": 1.0269, "step": 26394 }, { "epoch": 1.5732506854213852, "grad_norm": 3.1965794563293457, "learning_rate": 1.1030863236482763e-05, "loss": 1.319, "step": 26396 }, { "epoch": 1.5733698891405412, "grad_norm": 3.306457996368408, "learning_rate": 1.1024938710838051e-05, "loss": 1.2384, "step": 26398 }, { "epoch": 1.5734890928596972, "grad_norm": 3.396672010421753, "learning_rate": 1.1019015579464636e-05, "loss": 1.2569, "step": 26400 }, { "epoch": 1.5736082965788531, "grad_norm": 3.5869126319885254, "learning_rate": 1.1013093842574407e-05, "loss": 1.0318, "step": 26402 }, { "epoch": 1.5737275002980093, "grad_norm": 3.2721407413482666, "learning_rate": 1.1007173500379203e-05, "loss": 1.1065, "step": 26404 }, { "epoch": 1.5738467040171653, "grad_norm": 3.224855661392212, "learning_rate": 1.1001254553090812e-05, "loss": 1.1815, "step": 26406 }, { "epoch": 1.5739659077363215, "grad_norm": 3.3888626098632812, "learning_rate": 1.099533700092098e-05, "loss": 1.2217, "step": 26408 }, { "epoch": 1.5740851114554775, "grad_norm": 3.0852701663970947, "learning_rate": 1.0989420844081383e-05, "loss": 1.0837, "step": 26410 }, { "epoch": 1.5742043151746334, "grad_norm": 3.239915132522583, "learning_rate": 1.0983506082783707e-05, "loss": 1.108, "step": 26412 }, { "epoch": 1.5743235188937894, "grad_norm": 3.082146406173706, "learning_rate": 1.0977592717239476e-05, "loss": 1.0284, "step": 26414 }, { "epoch": 1.5744427226129454, "grad_norm": 3.705838680267334, "learning_rate": 1.097168074766029e-05, "loss": 1.1433, "step": 26416 }, { "epoch": 1.5745619263321016, "grad_norm": 3.4032554626464844, "learning_rate": 1.0965770174257612e-05, "loss": 1.2264, "step": 26418 }, { "epoch": 1.5746811300512578, "grad_norm": 3.346916675567627, "learning_rate": 1.0959860997242894e-05, "loss": 1.099, "step": 26420 }, { "epoch": 1.5748003337704137, "grad_norm": 3.5647716522216797, "learning_rate": 1.0953953216827517e-05, "loss": 1.2779, "step": 26422 }, { "epoch": 1.5749195374895697, "grad_norm": 3.5690088272094727, "learning_rate": 1.094804683322283e-05, "loss": 1.2147, "step": 26424 }, { "epoch": 1.5750387412087257, "grad_norm": 3.062563419342041, "learning_rate": 1.0942141846640124e-05, "loss": 1.1145, "step": 26426 }, { "epoch": 1.5751579449278816, "grad_norm": 3.441864490509033, "learning_rate": 1.0936238257290638e-05, "loss": 1.0569, "step": 26428 }, { "epoch": 1.5752771486470378, "grad_norm": 2.935060501098633, "learning_rate": 1.0930336065385566e-05, "loss": 0.9834, "step": 26430 }, { "epoch": 1.5753963523661938, "grad_norm": 3.1877057552337646, "learning_rate": 1.092443527113603e-05, "loss": 1.1995, "step": 26432 }, { "epoch": 1.57551555608535, "grad_norm": 3.4518401622772217, "learning_rate": 1.0918535874753172e-05, "loss": 1.0919, "step": 26434 }, { "epoch": 1.575634759804506, "grad_norm": 3.263219118118286, "learning_rate": 1.091263787644799e-05, "loss": 1.0176, "step": 26436 }, { "epoch": 1.575753963523662, "grad_norm": 3.3283302783966064, "learning_rate": 1.0906741276431465e-05, "loss": 1.1096, "step": 26438 }, { "epoch": 1.575873167242818, "grad_norm": 3.126948118209839, "learning_rate": 1.0900846074914594e-05, "loss": 1.2256, "step": 26440 }, { "epoch": 1.5759923709619739, "grad_norm": 3.2136850357055664, "learning_rate": 1.0894952272108211e-05, "loss": 1.1927, "step": 26442 }, { "epoch": 1.57611157468113, "grad_norm": 3.3975682258605957, "learning_rate": 1.0889059868223189e-05, "loss": 1.0529, "step": 26444 }, { "epoch": 1.5762307784002862, "grad_norm": 3.419893503189087, "learning_rate": 1.0883168863470316e-05, "loss": 1.1889, "step": 26446 }, { "epoch": 1.5763499821194422, "grad_norm": 3.245668411254883, "learning_rate": 1.0877279258060325e-05, "loss": 1.0444, "step": 26448 }, { "epoch": 1.5764691858385982, "grad_norm": 3.223432779312134, "learning_rate": 1.0871391052203917e-05, "loss": 1.1713, "step": 26450 }, { "epoch": 1.5765883895577542, "grad_norm": 3.4933876991271973, "learning_rate": 1.0865504246111724e-05, "loss": 1.0834, "step": 26452 }, { "epoch": 1.5767075932769101, "grad_norm": 3.470909833908081, "learning_rate": 1.0859618839994345e-05, "loss": 1.0882, "step": 26454 }, { "epoch": 1.5768267969960663, "grad_norm": 3.1954174041748047, "learning_rate": 1.0853734834062302e-05, "loss": 1.0022, "step": 26456 }, { "epoch": 1.5769460007152223, "grad_norm": 3.5088140964508057, "learning_rate": 1.0847852228526128e-05, "loss": 1.0954, "step": 26458 }, { "epoch": 1.5770652044343785, "grad_norm": 3.597670316696167, "learning_rate": 1.0841971023596215e-05, "loss": 1.245, "step": 26460 }, { "epoch": 1.5771844081535344, "grad_norm": 3.1935513019561768, "learning_rate": 1.0836091219482986e-05, "loss": 1.1875, "step": 26462 }, { "epoch": 1.5773036118726904, "grad_norm": 3.3038556575775146, "learning_rate": 1.0830212816396785e-05, "loss": 1.2214, "step": 26464 }, { "epoch": 1.5774228155918464, "grad_norm": 3.2048499584198, "learning_rate": 1.0824335814547865e-05, "loss": 1.1122, "step": 26466 }, { "epoch": 1.5775420193110024, "grad_norm": 2.8458409309387207, "learning_rate": 1.0818460214146497e-05, "loss": 1.0323, "step": 26468 }, { "epoch": 1.5776612230301585, "grad_norm": 3.1745522022247314, "learning_rate": 1.0812586015402876e-05, "loss": 1.0759, "step": 26470 }, { "epoch": 1.5777804267493147, "grad_norm": 3.386705160140991, "learning_rate": 1.0806713218527126e-05, "loss": 1.2078, "step": 26472 }, { "epoch": 1.5778996304684707, "grad_norm": 3.1384224891662598, "learning_rate": 1.080084182372933e-05, "loss": 1.011, "step": 26474 }, { "epoch": 1.5780188341876267, "grad_norm": 3.3261821269989014, "learning_rate": 1.0794971831219564e-05, "loss": 1.1242, "step": 26476 }, { "epoch": 1.5781380379067826, "grad_norm": 3.379796266555786, "learning_rate": 1.0789103241207771e-05, "loss": 1.1749, "step": 26478 }, { "epoch": 1.5782572416259386, "grad_norm": 3.3180480003356934, "learning_rate": 1.0783236053903927e-05, "loss": 1.1291, "step": 26480 }, { "epoch": 1.5783764453450948, "grad_norm": 3.0151941776275635, "learning_rate": 1.0777370269517923e-05, "loss": 1.2432, "step": 26482 }, { "epoch": 1.5784956490642508, "grad_norm": 3.4437479972839355, "learning_rate": 1.0771505888259552e-05, "loss": 1.1118, "step": 26484 }, { "epoch": 1.578614852783407, "grad_norm": 3.4707741737365723, "learning_rate": 1.0765642910338647e-05, "loss": 1.2991, "step": 26486 }, { "epoch": 1.578734056502563, "grad_norm": 3.3059194087982178, "learning_rate": 1.0759781335964942e-05, "loss": 1.1048, "step": 26488 }, { "epoch": 1.578853260221719, "grad_norm": 3.3817193508148193, "learning_rate": 1.0753921165348107e-05, "loss": 1.1015, "step": 26490 }, { "epoch": 1.5789724639408749, "grad_norm": 2.776679277420044, "learning_rate": 1.07480623986978e-05, "loss": 0.9757, "step": 26492 }, { "epoch": 1.5790916676600308, "grad_norm": 3.4334163665771484, "learning_rate": 1.0742205036223597e-05, "loss": 1.2057, "step": 26494 }, { "epoch": 1.579210871379187, "grad_norm": 3.077698230743408, "learning_rate": 1.0736349078135039e-05, "loss": 1.1396, "step": 26496 }, { "epoch": 1.5793300750983432, "grad_norm": 2.829493522644043, "learning_rate": 1.073049452464161e-05, "loss": 1.0498, "step": 26498 }, { "epoch": 1.5794492788174992, "grad_norm": 3.490253210067749, "learning_rate": 1.0724641375952754e-05, "loss": 1.0425, "step": 26500 }, { "epoch": 1.5795684825366552, "grad_norm": 3.10024094581604, "learning_rate": 1.0718789632277843e-05, "loss": 0.9716, "step": 26502 }, { "epoch": 1.5796876862558111, "grad_norm": 3.0697686672210693, "learning_rate": 1.0712939293826252e-05, "loss": 1.116, "step": 26504 }, { "epoch": 1.579806889974967, "grad_norm": 3.424166202545166, "learning_rate": 1.0707090360807215e-05, "loss": 1.0716, "step": 26506 }, { "epoch": 1.5799260936941233, "grad_norm": 3.3212292194366455, "learning_rate": 1.0701242833430008e-05, "loss": 1.1172, "step": 26508 }, { "epoch": 1.5800452974132793, "grad_norm": 3.089886426925659, "learning_rate": 1.0695396711903816e-05, "loss": 1.1007, "step": 26510 }, { "epoch": 1.5801645011324355, "grad_norm": 3.409029006958008, "learning_rate": 1.068955199643774e-05, "loss": 1.1741, "step": 26512 }, { "epoch": 1.5802837048515914, "grad_norm": 3.1004016399383545, "learning_rate": 1.0683708687240901e-05, "loss": 1.044, "step": 26514 }, { "epoch": 1.5804029085707474, "grad_norm": 2.879270315170288, "learning_rate": 1.0677866784522317e-05, "loss": 1.0376, "step": 26516 }, { "epoch": 1.5805221122899034, "grad_norm": 3.442626476287842, "learning_rate": 1.0672026288490982e-05, "loss": 1.1709, "step": 26518 }, { "epoch": 1.5806413160090593, "grad_norm": 3.437389612197876, "learning_rate": 1.066618719935581e-05, "loss": 1.1705, "step": 26520 }, { "epoch": 1.5807605197282155, "grad_norm": 3.391031503677368, "learning_rate": 1.066034951732573e-05, "loss": 1.2459, "step": 26522 }, { "epoch": 1.5808797234473717, "grad_norm": 3.2815515995025635, "learning_rate": 1.0654513242609531e-05, "loss": 1.1512, "step": 26524 }, { "epoch": 1.5809989271665277, "grad_norm": 3.0172364711761475, "learning_rate": 1.0648678375415998e-05, "loss": 1.066, "step": 26526 }, { "epoch": 1.5811181308856836, "grad_norm": 3.455432176589966, "learning_rate": 1.0642844915953909e-05, "loss": 1.167, "step": 26528 }, { "epoch": 1.5812373346048396, "grad_norm": 3.4398064613342285, "learning_rate": 1.063701286443189e-05, "loss": 1.0346, "step": 26530 }, { "epoch": 1.5813565383239956, "grad_norm": 3.370725393295288, "learning_rate": 1.0631182221058611e-05, "loss": 1.0307, "step": 26532 }, { "epoch": 1.5814757420431518, "grad_norm": 2.252610445022583, "learning_rate": 1.0625352986042642e-05, "loss": 0.9324, "step": 26534 }, { "epoch": 1.5815949457623077, "grad_norm": 3.189262866973877, "learning_rate": 1.0619525159592514e-05, "loss": 1.1797, "step": 26536 }, { "epoch": 1.581714149481464, "grad_norm": 3.456679105758667, "learning_rate": 1.0613698741916717e-05, "loss": 1.236, "step": 26538 }, { "epoch": 1.58183335320062, "grad_norm": 3.4448981285095215, "learning_rate": 1.0607873733223666e-05, "loss": 1.1723, "step": 26540 }, { "epoch": 1.5819525569197759, "grad_norm": 3.535874128341675, "learning_rate": 1.0602050133721758e-05, "loss": 1.1651, "step": 26542 }, { "epoch": 1.5820717606389318, "grad_norm": 3.2902870178222656, "learning_rate": 1.0596227943619297e-05, "loss": 1.1423, "step": 26544 }, { "epoch": 1.5821909643580878, "grad_norm": 3.027114152908325, "learning_rate": 1.059040716312461e-05, "loss": 1.1318, "step": 26546 }, { "epoch": 1.582310168077244, "grad_norm": 3.1511898040771484, "learning_rate": 1.0584587792445871e-05, "loss": 1.2528, "step": 26548 }, { "epoch": 1.5824293717964002, "grad_norm": 2.9588000774383545, "learning_rate": 1.0578769831791297e-05, "loss": 1.1364, "step": 26550 }, { "epoch": 1.5825485755155562, "grad_norm": 3.0734002590179443, "learning_rate": 1.0572953281369002e-05, "loss": 1.0604, "step": 26552 }, { "epoch": 1.5826677792347121, "grad_norm": 3.185579538345337, "learning_rate": 1.0567138141387073e-05, "loss": 1.1091, "step": 26554 }, { "epoch": 1.582786982953868, "grad_norm": 3.3696415424346924, "learning_rate": 1.0561324412053525e-05, "loss": 1.1579, "step": 26556 }, { "epoch": 1.582906186673024, "grad_norm": 3.162259817123413, "learning_rate": 1.0555512093576341e-05, "loss": 1.1786, "step": 26558 }, { "epoch": 1.5830253903921803, "grad_norm": 3.5046563148498535, "learning_rate": 1.0549701186163452e-05, "loss": 1.0777, "step": 26560 }, { "epoch": 1.5831445941113362, "grad_norm": 3.145901679992676, "learning_rate": 1.0543891690022729e-05, "loss": 1.1115, "step": 26562 }, { "epoch": 1.5832637978304924, "grad_norm": 2.9955577850341797, "learning_rate": 1.0538083605361993e-05, "loss": 1.0548, "step": 26564 }, { "epoch": 1.5833830015496484, "grad_norm": 3.376091957092285, "learning_rate": 1.0532276932389013e-05, "loss": 1.1249, "step": 26566 }, { "epoch": 1.5835022052688044, "grad_norm": 3.097191333770752, "learning_rate": 1.052647167131155e-05, "loss": 1.1593, "step": 26568 }, { "epoch": 1.5836214089879603, "grad_norm": 3.0093038082122803, "learning_rate": 1.0520667822337243e-05, "loss": 1.1534, "step": 26570 }, { "epoch": 1.5837406127071163, "grad_norm": 3.0313467979431152, "learning_rate": 1.0514865385673705e-05, "loss": 1.0776, "step": 26572 }, { "epoch": 1.5838598164262725, "grad_norm": 3.393169403076172, "learning_rate": 1.0509064361528558e-05, "loss": 1.0832, "step": 26574 }, { "epoch": 1.5839790201454287, "grad_norm": 3.1377170085906982, "learning_rate": 1.0503264750109276e-05, "loss": 1.0591, "step": 26576 }, { "epoch": 1.5840982238645847, "grad_norm": 3.337104320526123, "learning_rate": 1.0497466551623359e-05, "loss": 1.068, "step": 26578 }, { "epoch": 1.5842174275837406, "grad_norm": 3.6087260246276855, "learning_rate": 1.0491669766278217e-05, "loss": 1.1882, "step": 26580 }, { "epoch": 1.5843366313028966, "grad_norm": 2.8387951850891113, "learning_rate": 1.0485874394281226e-05, "loss": 1.126, "step": 26582 }, { "epoch": 1.5844558350220526, "grad_norm": 3.0893399715423584, "learning_rate": 1.0480080435839707e-05, "loss": 1.1149, "step": 26584 }, { "epoch": 1.5845750387412088, "grad_norm": 3.3439371585845947, "learning_rate": 1.0474287891160923e-05, "loss": 1.2219, "step": 26586 }, { "epoch": 1.5846942424603647, "grad_norm": 2.8312108516693115, "learning_rate": 1.0468496760452096e-05, "loss": 1.015, "step": 26588 }, { "epoch": 1.584813446179521, "grad_norm": 3.5665132999420166, "learning_rate": 1.0462707043920383e-05, "loss": 1.1335, "step": 26590 }, { "epoch": 1.5849326498986769, "grad_norm": 3.1869990825653076, "learning_rate": 1.0456918741772941e-05, "loss": 1.0099, "step": 26592 }, { "epoch": 1.5850518536178329, "grad_norm": 3.5524206161499023, "learning_rate": 1.0451131854216788e-05, "loss": 1.2111, "step": 26594 }, { "epoch": 1.5851710573369888, "grad_norm": 3.337385416030884, "learning_rate": 1.0445346381458971e-05, "loss": 1.2922, "step": 26596 }, { "epoch": 1.5852902610561448, "grad_norm": 3.1915853023529053, "learning_rate": 1.0439562323706453e-05, "loss": 1.0474, "step": 26598 }, { "epoch": 1.585409464775301, "grad_norm": 3.0778188705444336, "learning_rate": 1.0433779681166145e-05, "loss": 1.1351, "step": 26600 }, { "epoch": 1.5855286684944572, "grad_norm": 3.145167827606201, "learning_rate": 1.0427998454044914e-05, "loss": 1.2642, "step": 26602 }, { "epoch": 1.5856478722136131, "grad_norm": 3.2871832847595215, "learning_rate": 1.0422218642549569e-05, "loss": 1.1356, "step": 26604 }, { "epoch": 1.585767075932769, "grad_norm": 3.433156728744507, "learning_rate": 1.0416440246886877e-05, "loss": 1.2226, "step": 26606 }, { "epoch": 1.585886279651925, "grad_norm": 3.081308364868164, "learning_rate": 1.0410663267263537e-05, "loss": 0.9845, "step": 26608 }, { "epoch": 1.586005483371081, "grad_norm": 3.245379686355591, "learning_rate": 1.0404887703886251e-05, "loss": 1.1914, "step": 26610 }, { "epoch": 1.5861246870902372, "grad_norm": 5.188622951507568, "learning_rate": 1.0399113556961582e-05, "loss": 1.2964, "step": 26612 }, { "epoch": 1.5862438908093932, "grad_norm": 3.163141965866089, "learning_rate": 1.039334082669613e-05, "loss": 1.2056, "step": 26614 }, { "epoch": 1.5863630945285494, "grad_norm": 3.221308469772339, "learning_rate": 1.0387569513296396e-05, "loss": 1.0161, "step": 26616 }, { "epoch": 1.5864822982477054, "grad_norm": 3.2372281551361084, "learning_rate": 1.0381799616968812e-05, "loss": 1.0461, "step": 26618 }, { "epoch": 1.5866015019668613, "grad_norm": 3.2950778007507324, "learning_rate": 1.0376031137919817e-05, "loss": 1.187, "step": 26620 }, { "epoch": 1.5867207056860173, "grad_norm": 2.8924620151519775, "learning_rate": 1.0370264076355762e-05, "loss": 1.0376, "step": 26622 }, { "epoch": 1.5868399094051733, "grad_norm": 3.6233022212982178, "learning_rate": 1.036449843248295e-05, "loss": 1.1284, "step": 26624 }, { "epoch": 1.5869591131243295, "grad_norm": 3.255908966064453, "learning_rate": 1.0358734206507641e-05, "loss": 0.9903, "step": 26626 }, { "epoch": 1.5870783168434857, "grad_norm": 3.026615619659424, "learning_rate": 1.0352971398636046e-05, "loss": 1.143, "step": 26628 }, { "epoch": 1.5871975205626416, "grad_norm": 3.5260426998138428, "learning_rate": 1.034721000907431e-05, "loss": 0.9672, "step": 26630 }, { "epoch": 1.5873167242817976, "grad_norm": 3.313392162322998, "learning_rate": 1.0341450038028544e-05, "loss": 1.2183, "step": 26632 }, { "epoch": 1.5874359280009536, "grad_norm": 3.229320764541626, "learning_rate": 1.0335691485704801e-05, "loss": 1.0911, "step": 26634 }, { "epoch": 1.5875551317201095, "grad_norm": 3.4404523372650146, "learning_rate": 1.0329934352309067e-05, "loss": 1.0848, "step": 26636 }, { "epoch": 1.5876743354392657, "grad_norm": 3.1185905933380127, "learning_rate": 1.0324178638047344e-05, "loss": 1.0162, "step": 26638 }, { "epoch": 1.5877935391584217, "grad_norm": 3.583254337310791, "learning_rate": 1.0318424343125471e-05, "loss": 1.1919, "step": 26640 }, { "epoch": 1.5879127428775779, "grad_norm": 3.4725120067596436, "learning_rate": 1.0312671467749342e-05, "loss": 1.1428, "step": 26642 }, { "epoch": 1.5880319465967339, "grad_norm": 3.093562364578247, "learning_rate": 1.0306920012124737e-05, "loss": 1.07, "step": 26644 }, { "epoch": 1.5881511503158898, "grad_norm": 3.5214593410491943, "learning_rate": 1.0301169976457419e-05, "loss": 1.2828, "step": 26646 }, { "epoch": 1.5882703540350458, "grad_norm": 3.001352310180664, "learning_rate": 1.0295421360953072e-05, "loss": 1.1572, "step": 26648 }, { "epoch": 1.588389557754202, "grad_norm": 3.2949728965759277, "learning_rate": 1.028967416581736e-05, "loss": 1.2115, "step": 26650 }, { "epoch": 1.588508761473358, "grad_norm": 2.904297351837158, "learning_rate": 1.0283928391255865e-05, "loss": 1.0719, "step": 26652 }, { "epoch": 1.5886279651925141, "grad_norm": 3.139390468597412, "learning_rate": 1.0278184037474126e-05, "loss": 1.0546, "step": 26654 }, { "epoch": 1.5887471689116701, "grad_norm": 3.0611672401428223, "learning_rate": 1.0272441104677677e-05, "loss": 1.1176, "step": 26656 }, { "epoch": 1.588866372630826, "grad_norm": 3.227956533432007, "learning_rate": 1.026669959307191e-05, "loss": 1.0265, "step": 26658 }, { "epoch": 1.588985576349982, "grad_norm": 2.907484292984009, "learning_rate": 1.0260959502862254e-05, "loss": 1.1201, "step": 26660 }, { "epoch": 1.589104780069138, "grad_norm": 3.2924723625183105, "learning_rate": 1.0255220834254059e-05, "loss": 1.1356, "step": 26662 }, { "epoch": 1.5892239837882942, "grad_norm": 3.0767581462860107, "learning_rate": 1.0249483587452575e-05, "loss": 1.0065, "step": 26664 }, { "epoch": 1.5893431875074502, "grad_norm": 2.994594097137451, "learning_rate": 1.024374776266308e-05, "loss": 1.2418, "step": 26666 }, { "epoch": 1.5894623912266064, "grad_norm": 3.1454105377197266, "learning_rate": 1.0238013360090753e-05, "loss": 1.0909, "step": 26668 }, { "epoch": 1.5895815949457623, "grad_norm": 3.1974940299987793, "learning_rate": 1.0232280379940729e-05, "loss": 1.1974, "step": 26670 }, { "epoch": 1.5897007986649183, "grad_norm": 3.0680787563323975, "learning_rate": 1.0226548822418091e-05, "loss": 1.1887, "step": 26672 }, { "epoch": 1.5898200023840743, "grad_norm": 3.171736001968384, "learning_rate": 1.0220818687727906e-05, "loss": 1.0941, "step": 26674 }, { "epoch": 1.5899392061032305, "grad_norm": 2.967665195465088, "learning_rate": 1.0215089976075132e-05, "loss": 1.0545, "step": 26676 }, { "epoch": 1.5900584098223864, "grad_norm": 3.175623655319214, "learning_rate": 1.0209362687664698e-05, "loss": 1.1117, "step": 26678 }, { "epoch": 1.5901776135415426, "grad_norm": 3.382368326187134, "learning_rate": 1.0203636822701523e-05, "loss": 0.9889, "step": 26680 }, { "epoch": 1.5902968172606986, "grad_norm": 3.2256927490234375, "learning_rate": 1.01979123813904e-05, "loss": 1.1593, "step": 26682 }, { "epoch": 1.5904160209798546, "grad_norm": 3.3054518699645996, "learning_rate": 1.0192189363936144e-05, "loss": 1.0947, "step": 26684 }, { "epoch": 1.5905352246990105, "grad_norm": 3.3506665229797363, "learning_rate": 1.0186467770543478e-05, "loss": 1.137, "step": 26686 }, { "epoch": 1.5906544284181665, "grad_norm": 3.512373685836792, "learning_rate": 1.0180747601417078e-05, "loss": 1.14, "step": 26688 }, { "epoch": 1.5907736321373227, "grad_norm": 3.461388111114502, "learning_rate": 1.0175028856761576e-05, "loss": 1.3298, "step": 26690 }, { "epoch": 1.5908928358564787, "grad_norm": 3.136566400527954, "learning_rate": 1.0169311536781551e-05, "loss": 1.1218, "step": 26692 }, { "epoch": 1.5910120395756349, "grad_norm": 3.2242684364318848, "learning_rate": 1.0163595641681534e-05, "loss": 1.086, "step": 26694 }, { "epoch": 1.5911312432947908, "grad_norm": 2.876797676086426, "learning_rate": 1.0157881171666e-05, "loss": 1.0105, "step": 26696 }, { "epoch": 1.5912504470139468, "grad_norm": 3.1730144023895264, "learning_rate": 1.0152168126939371e-05, "loss": 1.15, "step": 26698 }, { "epoch": 1.5913696507331028, "grad_norm": 3.0922508239746094, "learning_rate": 1.0146456507706016e-05, "loss": 1.2001, "step": 26700 }, { "epoch": 1.591488854452259, "grad_norm": 3.0207557678222656, "learning_rate": 1.0140746314170297e-05, "loss": 1.2221, "step": 26702 }, { "epoch": 1.591608058171415, "grad_norm": 3.4644041061401367, "learning_rate": 1.013503754653643e-05, "loss": 1.0761, "step": 26704 }, { "epoch": 1.5917272618905711, "grad_norm": 3.4709532260894775, "learning_rate": 1.0129330205008675e-05, "loss": 1.1123, "step": 26706 }, { "epoch": 1.591846465609727, "grad_norm": 2.895885467529297, "learning_rate": 1.0123624289791216e-05, "loss": 1.0011, "step": 26708 }, { "epoch": 1.591965669328883, "grad_norm": 3.4515888690948486, "learning_rate": 1.0117919801088122e-05, "loss": 1.1772, "step": 26710 }, { "epoch": 1.592084873048039, "grad_norm": 3.202476739883423, "learning_rate": 1.0112216739103503e-05, "loss": 1.0874, "step": 26712 }, { "epoch": 1.592204076767195, "grad_norm": 3.3220646381378174, "learning_rate": 1.010651510404137e-05, "loss": 1.2444, "step": 26714 }, { "epoch": 1.5923232804863512, "grad_norm": 3.036729335784912, "learning_rate": 1.010081489610568e-05, "loss": 1.0623, "step": 26716 }, { "epoch": 1.5924424842055072, "grad_norm": 3.0885534286499023, "learning_rate": 1.0095116115500347e-05, "loss": 1.1174, "step": 26718 }, { "epoch": 1.5925616879246633, "grad_norm": 3.307746171951294, "learning_rate": 1.008941876242927e-05, "loss": 1.2486, "step": 26720 }, { "epoch": 1.5926808916438193, "grad_norm": 2.8946337699890137, "learning_rate": 1.008372283709622e-05, "loss": 1.1109, "step": 26722 }, { "epoch": 1.5928000953629753, "grad_norm": 3.2341020107269287, "learning_rate": 1.0078028339704965e-05, "loss": 1.0985, "step": 26724 }, { "epoch": 1.5929192990821313, "grad_norm": 3.1640613079071045, "learning_rate": 1.0072335270459254e-05, "loss": 0.9868, "step": 26726 }, { "epoch": 1.5930385028012874, "grad_norm": 3.037560224533081, "learning_rate": 1.00666436295627e-05, "loss": 1.0007, "step": 26728 }, { "epoch": 1.5931577065204434, "grad_norm": 3.1448583602905273, "learning_rate": 1.0060953417218938e-05, "loss": 1.1249, "step": 26730 }, { "epoch": 1.5932769102395996, "grad_norm": 3.1824939250946045, "learning_rate": 1.0055264633631527e-05, "loss": 1.2023, "step": 26732 }, { "epoch": 1.5933961139587556, "grad_norm": 3.0531342029571533, "learning_rate": 1.0049577279003969e-05, "loss": 1.1354, "step": 26734 }, { "epoch": 1.5935153176779115, "grad_norm": 3.3191704750061035, "learning_rate": 1.004389135353972e-05, "loss": 0.914, "step": 26736 }, { "epoch": 1.5936345213970675, "grad_norm": 2.971750259399414, "learning_rate": 1.0038206857442189e-05, "loss": 1.1298, "step": 26738 }, { "epoch": 1.5937537251162235, "grad_norm": 2.856458902359009, "learning_rate": 1.0032523790914721e-05, "loss": 1.0175, "step": 26740 }, { "epoch": 1.5938729288353797, "grad_norm": 3.5464515686035156, "learning_rate": 1.0026842154160615e-05, "loss": 1.0565, "step": 26742 }, { "epoch": 1.5939921325545359, "grad_norm": 3.548076629638672, "learning_rate": 1.0021161947383156e-05, "loss": 1.2707, "step": 26744 }, { "epoch": 1.5941113362736918, "grad_norm": 3.2261481285095215, "learning_rate": 1.0015483170785494e-05, "loss": 1.151, "step": 26746 }, { "epoch": 1.5942305399928478, "grad_norm": 2.9336819648742676, "learning_rate": 1.0009805824570817e-05, "loss": 1.0935, "step": 26748 }, { "epoch": 1.5943497437120038, "grad_norm": 2.7816412448883057, "learning_rate": 1.0004129908942229e-05, "loss": 1.1256, "step": 26750 }, { "epoch": 1.5944689474311597, "grad_norm": 2.955294132232666, "learning_rate": 9.998455424102727e-06, "loss": 0.9737, "step": 26752 }, { "epoch": 1.594588151150316, "grad_norm": 3.170621156692505, "learning_rate": 9.992782370255355e-06, "loss": 1.0892, "step": 26754 }, { "epoch": 1.594707354869472, "grad_norm": 3.517224073410034, "learning_rate": 9.987110747603035e-06, "loss": 1.0849, "step": 26756 }, { "epoch": 1.594826558588628, "grad_norm": 3.004274606704712, "learning_rate": 9.98144055634867e-06, "loss": 1.1222, "step": 26758 }, { "epoch": 1.594945762307784, "grad_norm": 3.1956560611724854, "learning_rate": 9.9757717966951e-06, "loss": 0.9524, "step": 26760 }, { "epoch": 1.59506496602694, "grad_norm": 3.206812620162964, "learning_rate": 9.970104468845115e-06, "loss": 1.0718, "step": 26762 }, { "epoch": 1.595184169746096, "grad_norm": 3.3178939819335938, "learning_rate": 9.964438573001434e-06, "loss": 1.0781, "step": 26764 }, { "epoch": 1.595303373465252, "grad_norm": 3.123037815093994, "learning_rate": 9.958774109366798e-06, "loss": 1.2404, "step": 26766 }, { "epoch": 1.5954225771844082, "grad_norm": 3.2915844917297363, "learning_rate": 9.953111078143795e-06, "loss": 1.1693, "step": 26768 }, { "epoch": 1.5955417809035644, "grad_norm": 3.442178726196289, "learning_rate": 9.947449479535014e-06, "loss": 1.1829, "step": 26770 }, { "epoch": 1.5956609846227203, "grad_norm": 3.2418835163116455, "learning_rate": 9.941789313743033e-06, "loss": 1.096, "step": 26772 }, { "epoch": 1.5957801883418763, "grad_norm": 3.295205593109131, "learning_rate": 9.936130580970281e-06, "loss": 1.1358, "step": 26774 }, { "epoch": 1.5958993920610323, "grad_norm": 3.1575794219970703, "learning_rate": 9.930473281419228e-06, "loss": 1.1492, "step": 26776 }, { "epoch": 1.5960185957801882, "grad_norm": 3.2458384037017822, "learning_rate": 9.924817415292236e-06, "loss": 1.1318, "step": 26778 }, { "epoch": 1.5961377994993444, "grad_norm": 3.672621488571167, "learning_rate": 9.919162982791647e-06, "loss": 1.1154, "step": 26780 }, { "epoch": 1.5962570032185004, "grad_norm": 3.628387689590454, "learning_rate": 9.913509984119728e-06, "loss": 1.1193, "step": 26782 }, { "epoch": 1.5963762069376566, "grad_norm": 3.203033447265625, "learning_rate": 9.907858419478716e-06, "loss": 1.1488, "step": 26784 }, { "epoch": 1.5964954106568126, "grad_norm": 3.367279529571533, "learning_rate": 9.90220828907078e-06, "loss": 1.1335, "step": 26786 }, { "epoch": 1.5966146143759685, "grad_norm": 3.1736364364624023, "learning_rate": 9.896559593098037e-06, "loss": 1.1117, "step": 26788 }, { "epoch": 1.5967338180951245, "grad_norm": 2.7616639137268066, "learning_rate": 9.890912331762592e-06, "loss": 1.0547, "step": 26790 }, { "epoch": 1.5968530218142805, "grad_norm": 3.18442440032959, "learning_rate": 9.885266505266422e-06, "loss": 1.144, "step": 26792 }, { "epoch": 1.5969722255334367, "grad_norm": 3.2561941146850586, "learning_rate": 9.879622113811526e-06, "loss": 1.2124, "step": 26794 }, { "epoch": 1.5970914292525928, "grad_norm": 3.212498903274536, "learning_rate": 9.87397915759984e-06, "loss": 1.1111, "step": 26796 }, { "epoch": 1.5972106329717488, "grad_norm": 3.2227022647857666, "learning_rate": 9.868337636833174e-06, "loss": 1.3647, "step": 26798 }, { "epoch": 1.5973298366909048, "grad_norm": 2.8931972980499268, "learning_rate": 9.8626975517134e-06, "loss": 1.1082, "step": 26800 }, { "epoch": 1.5974490404100607, "grad_norm": 2.972266674041748, "learning_rate": 9.857058902442257e-06, "loss": 1.0935, "step": 26802 }, { "epoch": 1.5975682441292167, "grad_norm": 3.4502015113830566, "learning_rate": 9.851421689221469e-06, "loss": 1.2064, "step": 26804 }, { "epoch": 1.597687447848373, "grad_norm": 3.0413873195648193, "learning_rate": 9.84578591225268e-06, "loss": 1.155, "step": 26806 }, { "epoch": 1.5978066515675289, "grad_norm": 3.0519967079162598, "learning_rate": 9.840151571737543e-06, "loss": 1.0555, "step": 26808 }, { "epoch": 1.597925855286685, "grad_norm": 3.1460933685302734, "learning_rate": 9.834518667877574e-06, "loss": 1.1564, "step": 26810 }, { "epoch": 1.598045059005841, "grad_norm": 3.1683380603790283, "learning_rate": 9.828887200874281e-06, "loss": 1.0351, "step": 26812 }, { "epoch": 1.598164262724997, "grad_norm": 3.030805826187134, "learning_rate": 9.823257170929167e-06, "loss": 1.0414, "step": 26814 }, { "epoch": 1.598283466444153, "grad_norm": 3.313016891479492, "learning_rate": 9.817628578243581e-06, "loss": 1.0954, "step": 26816 }, { "epoch": 1.598402670163309, "grad_norm": 3.318162202835083, "learning_rate": 9.812001423018918e-06, "loss": 1.1342, "step": 26818 }, { "epoch": 1.5985218738824651, "grad_norm": 3.1111180782318115, "learning_rate": 9.80637570545646e-06, "loss": 1.0581, "step": 26820 }, { "epoch": 1.5986410776016213, "grad_norm": 2.779099941253662, "learning_rate": 9.80075142575747e-06, "loss": 1.0778, "step": 26822 }, { "epoch": 1.5987602813207773, "grad_norm": 3.304471254348755, "learning_rate": 9.795128584123137e-06, "loss": 1.0562, "step": 26824 }, { "epoch": 1.5988794850399333, "grad_norm": 3.102128267288208, "learning_rate": 9.78950718075462e-06, "loss": 1.1363, "step": 26826 }, { "epoch": 1.5989986887590892, "grad_norm": 3.2991678714752197, "learning_rate": 9.783887215853005e-06, "loss": 1.2075, "step": 26828 }, { "epoch": 1.5991178924782452, "grad_norm": 3.445132255554199, "learning_rate": 9.778268689619341e-06, "loss": 1.0984, "step": 26830 }, { "epoch": 1.5992370961974014, "grad_norm": 2.9683051109313965, "learning_rate": 9.772651602254628e-06, "loss": 1.1132, "step": 26832 }, { "epoch": 1.5993562999165574, "grad_norm": 3.160701274871826, "learning_rate": 9.767035953959792e-06, "loss": 1.2022, "step": 26834 }, { "epoch": 1.5994755036357136, "grad_norm": 3.211439847946167, "learning_rate": 9.761421744935757e-06, "loss": 1.0836, "step": 26836 }, { "epoch": 1.5995947073548695, "grad_norm": 3.132786512374878, "learning_rate": 9.755808975383312e-06, "loss": 1.1018, "step": 26838 }, { "epoch": 1.5997139110740255, "grad_norm": 3.1486759185791016, "learning_rate": 9.75019764550329e-06, "loss": 1.2483, "step": 26840 }, { "epoch": 1.5998331147931815, "grad_norm": 3.3947830200195312, "learning_rate": 9.744587755496415e-06, "loss": 0.9769, "step": 26842 }, { "epoch": 1.5999523185123374, "grad_norm": 3.375370740890503, "learning_rate": 9.738979305563367e-06, "loss": 1.1622, "step": 26844 }, { "epoch": 1.6000715222314936, "grad_norm": 3.3574774265289307, "learning_rate": 9.733372295904774e-06, "loss": 1.0704, "step": 26846 }, { "epoch": 1.6001907259506498, "grad_norm": 3.407968282699585, "learning_rate": 9.727766726721232e-06, "loss": 1.1354, "step": 26848 }, { "epoch": 1.6003099296698058, "grad_norm": 3.548659324645996, "learning_rate": 9.722162598213264e-06, "loss": 1.2313, "step": 26850 }, { "epoch": 1.6004291333889618, "grad_norm": 3.4897074699401855, "learning_rate": 9.716559910581336e-06, "loss": 1.101, "step": 26852 }, { "epoch": 1.6005483371081177, "grad_norm": 3.2262086868286133, "learning_rate": 9.71095866402591e-06, "loss": 1.1683, "step": 26854 }, { "epoch": 1.6006675408272737, "grad_norm": 3.278501510620117, "learning_rate": 9.705358858747332e-06, "loss": 1.0428, "step": 26856 }, { "epoch": 1.6007867445464299, "grad_norm": 3.2565221786499023, "learning_rate": 9.69976049494592e-06, "loss": 1.179, "step": 26858 }, { "epoch": 1.6009059482655859, "grad_norm": 3.3244011402130127, "learning_rate": 9.694163572821985e-06, "loss": 1.1322, "step": 26860 }, { "epoch": 1.601025151984742, "grad_norm": 3.1561264991760254, "learning_rate": 9.688568092575695e-06, "loss": 1.1397, "step": 26862 }, { "epoch": 1.601144355703898, "grad_norm": 3.2735579013824463, "learning_rate": 9.682974054407268e-06, "loss": 1.2492, "step": 26864 }, { "epoch": 1.601263559423054, "grad_norm": 2.902902603149414, "learning_rate": 9.6773814585168e-06, "loss": 0.9434, "step": 26866 }, { "epoch": 1.60138276314221, "grad_norm": 3.469776153564453, "learning_rate": 9.671790305104356e-06, "loss": 1.0932, "step": 26868 }, { "epoch": 1.601501966861366, "grad_norm": 3.301396131515503, "learning_rate": 9.666200594369962e-06, "loss": 1.0298, "step": 26870 }, { "epoch": 1.6016211705805221, "grad_norm": 2.9283952713012695, "learning_rate": 9.660612326513569e-06, "loss": 1.1009, "step": 26872 }, { "epoch": 1.6017403742996783, "grad_norm": 3.5727617740631104, "learning_rate": 9.655025501735087e-06, "loss": 1.0171, "step": 26874 }, { "epoch": 1.6018595780188343, "grad_norm": 3.4495222568511963, "learning_rate": 9.649440120234376e-06, "loss": 1.2751, "step": 26876 }, { "epoch": 1.6019787817379902, "grad_norm": 3.5988271236419678, "learning_rate": 9.643856182211275e-06, "loss": 1.2866, "step": 26878 }, { "epoch": 1.6020979854571462, "grad_norm": 3.4566216468811035, "learning_rate": 9.63827368786549e-06, "loss": 1.0614, "step": 26880 }, { "epoch": 1.6022171891763022, "grad_norm": 3.557060718536377, "learning_rate": 9.632692637396762e-06, "loss": 1.1363, "step": 26882 }, { "epoch": 1.6023363928954584, "grad_norm": 3.1139726638793945, "learning_rate": 9.627113031004737e-06, "loss": 1.1192, "step": 26884 }, { "epoch": 1.6024555966146143, "grad_norm": 3.2169554233551025, "learning_rate": 9.62153486888901e-06, "loss": 1.0967, "step": 26886 }, { "epoch": 1.6025748003337705, "grad_norm": 3.1708052158355713, "learning_rate": 9.615958151249133e-06, "loss": 1.0015, "step": 26888 }, { "epoch": 1.6026940040529265, "grad_norm": 2.991668701171875, "learning_rate": 9.610382878284607e-06, "loss": 0.9553, "step": 26890 }, { "epoch": 1.6028132077720825, "grad_norm": 3.4526498317718506, "learning_rate": 9.604809050194879e-06, "loss": 1.3556, "step": 26892 }, { "epoch": 1.6029324114912384, "grad_norm": 3.0930287837982178, "learning_rate": 9.59923666717934e-06, "loss": 1.0522, "step": 26894 }, { "epoch": 1.6030516152103944, "grad_norm": 3.041227102279663, "learning_rate": 9.593665729437334e-06, "loss": 1.1496, "step": 26896 }, { "epoch": 1.6031708189295506, "grad_norm": 2.916219472885132, "learning_rate": 9.588096237168142e-06, "loss": 1.0574, "step": 26898 }, { "epoch": 1.6032900226487068, "grad_norm": 3.1638267040252686, "learning_rate": 9.58252819057104e-06, "loss": 1.1457, "step": 26900 }, { "epoch": 1.6034092263678628, "grad_norm": 3.0948309898376465, "learning_rate": 9.576961589845179e-06, "loss": 1.1388, "step": 26902 }, { "epoch": 1.6035284300870187, "grad_norm": 3.4864063262939453, "learning_rate": 9.571396435189701e-06, "loss": 0.9971, "step": 26904 }, { "epoch": 1.6036476338061747, "grad_norm": 3.3799703121185303, "learning_rate": 9.565832726803713e-06, "loss": 1.0893, "step": 26906 }, { "epoch": 1.6037668375253307, "grad_norm": 3.2272250652313232, "learning_rate": 9.560270464886212e-06, "loss": 1.1859, "step": 26908 }, { "epoch": 1.6038860412444869, "grad_norm": 3.1207077503204346, "learning_rate": 9.554709649636211e-06, "loss": 1.3005, "step": 26910 }, { "epoch": 1.6040052449636428, "grad_norm": 3.231724500656128, "learning_rate": 9.549150281252633e-06, "loss": 1.1225, "step": 26912 }, { "epoch": 1.604124448682799, "grad_norm": 3.4232654571533203, "learning_rate": 9.543592359934345e-06, "loss": 1.0872, "step": 26914 }, { "epoch": 1.604243652401955, "grad_norm": 3.132141590118408, "learning_rate": 9.53803588588018e-06, "loss": 0.9949, "step": 26916 }, { "epoch": 1.604362856121111, "grad_norm": 2.9932808876037598, "learning_rate": 9.532480859288912e-06, "loss": 0.9813, "step": 26918 }, { "epoch": 1.604482059840267, "grad_norm": 3.1316919326782227, "learning_rate": 9.52692728035926e-06, "loss": 0.9649, "step": 26920 }, { "epoch": 1.604601263559423, "grad_norm": 3.5603086948394775, "learning_rate": 9.521375149289886e-06, "loss": 1.0638, "step": 26922 }, { "epoch": 1.604720467278579, "grad_norm": 3.2437760829925537, "learning_rate": 9.515824466279439e-06, "loss": 1.1703, "step": 26924 }, { "epoch": 1.6048396709977353, "grad_norm": 3.482333183288574, "learning_rate": 9.510275231526444e-06, "loss": 1.268, "step": 26926 }, { "epoch": 1.6049588747168912, "grad_norm": 3.441450357437134, "learning_rate": 9.50472744522945e-06, "loss": 1.1095, "step": 26928 }, { "epoch": 1.6050780784360472, "grad_norm": 3.2370119094848633, "learning_rate": 9.499181107586913e-06, "loss": 1.045, "step": 26930 }, { "epoch": 1.6051972821552032, "grad_norm": 3.0853466987609863, "learning_rate": 9.493636218797237e-06, "loss": 1.1207, "step": 26932 }, { "epoch": 1.6053164858743592, "grad_norm": 2.951730251312256, "learning_rate": 9.488092779058783e-06, "loss": 1.1835, "step": 26934 }, { "epoch": 1.6054356895935153, "grad_norm": 2.9957706928253174, "learning_rate": 9.482550788569855e-06, "loss": 1.2103, "step": 26936 }, { "epoch": 1.6055548933126713, "grad_norm": 3.4195549488067627, "learning_rate": 9.477010247528723e-06, "loss": 1.0845, "step": 26938 }, { "epoch": 1.6056740970318275, "grad_norm": 3.029952049255371, "learning_rate": 9.471471156133566e-06, "loss": 1.1146, "step": 26940 }, { "epoch": 1.6057933007509835, "grad_norm": 2.965590476989746, "learning_rate": 9.46593351458258e-06, "loss": 1.058, "step": 26942 }, { "epoch": 1.6059125044701394, "grad_norm": 3.2219693660736084, "learning_rate": 9.460397323073811e-06, "loss": 1.0062, "step": 26944 }, { "epoch": 1.6060317081892954, "grad_norm": 3.067687511444092, "learning_rate": 9.454862581805347e-06, "loss": 1.0839, "step": 26946 }, { "epoch": 1.6061509119084514, "grad_norm": 3.264047384262085, "learning_rate": 9.449329290975184e-06, "loss": 1.0037, "step": 26948 }, { "epoch": 1.6062701156276076, "grad_norm": 3.1132988929748535, "learning_rate": 9.443797450781234e-06, "loss": 1.0825, "step": 26950 }, { "epoch": 1.6063893193467638, "grad_norm": 3.154674530029297, "learning_rate": 9.438267061421419e-06, "loss": 1.1371, "step": 26952 }, { "epoch": 1.6065085230659197, "grad_norm": 3.228698492050171, "learning_rate": 9.432738123093572e-06, "loss": 1.178, "step": 26954 }, { "epoch": 1.6066277267850757, "grad_norm": 3.4698569774627686, "learning_rate": 9.427210635995482e-06, "loss": 1.2087, "step": 26956 }, { "epoch": 1.6067469305042317, "grad_norm": 3.2651240825653076, "learning_rate": 9.421684600324887e-06, "loss": 0.9991, "step": 26958 }, { "epoch": 1.6068661342233876, "grad_norm": 2.93420672416687, "learning_rate": 9.416160016279469e-06, "loss": 1.1436, "step": 26960 }, { "epoch": 1.6069853379425438, "grad_norm": 3.3233227729797363, "learning_rate": 9.410636884056867e-06, "loss": 1.0913, "step": 26962 }, { "epoch": 1.6071045416616998, "grad_norm": 2.9332308769226074, "learning_rate": 9.405115203854654e-06, "loss": 0.9646, "step": 26964 }, { "epoch": 1.607223745380856, "grad_norm": 3.3479256629943848, "learning_rate": 9.399594975870368e-06, "loss": 1.1557, "step": 26966 }, { "epoch": 1.607342949100012, "grad_norm": 2.9932820796966553, "learning_rate": 9.394076200301465e-06, "loss": 1.1354, "step": 26968 }, { "epoch": 1.607462152819168, "grad_norm": 2.8212175369262695, "learning_rate": 9.388558877345421e-06, "loss": 1.1546, "step": 26970 }, { "epoch": 1.607581356538324, "grad_norm": 3.369637966156006, "learning_rate": 9.383043007199544e-06, "loss": 1.1269, "step": 26972 }, { "epoch": 1.6077005602574799, "grad_norm": 3.166201114654541, "learning_rate": 9.377528590061208e-06, "loss": 1.23, "step": 26974 }, { "epoch": 1.607819763976636, "grad_norm": 3.059764862060547, "learning_rate": 9.372015626127656e-06, "loss": 1.2224, "step": 26976 }, { "epoch": 1.6079389676957923, "grad_norm": 3.201122760772705, "learning_rate": 9.366504115596114e-06, "loss": 1.0624, "step": 26978 }, { "epoch": 1.6080581714149482, "grad_norm": 2.89005970954895, "learning_rate": 9.360994058663746e-06, "loss": 1.0478, "step": 26980 }, { "epoch": 1.6081773751341042, "grad_norm": 3.2020657062530518, "learning_rate": 9.355485455527668e-06, "loss": 1.2403, "step": 26982 }, { "epoch": 1.6082965788532602, "grad_norm": 3.828568935394287, "learning_rate": 9.349978306384938e-06, "loss": 1.2581, "step": 26984 }, { "epoch": 1.6084157825724161, "grad_norm": 3.090001344680786, "learning_rate": 9.344472611432547e-06, "loss": 1.1797, "step": 26986 }, { "epoch": 1.6085349862915723, "grad_norm": 3.1118626594543457, "learning_rate": 9.338968370867507e-06, "loss": 1.0756, "step": 26988 }, { "epoch": 1.6086541900107283, "grad_norm": 3.263209342956543, "learning_rate": 9.333465584886657e-06, "loss": 1.1587, "step": 26990 }, { "epoch": 1.6087733937298845, "grad_norm": 3.5933165550231934, "learning_rate": 9.327964253686899e-06, "loss": 1.0971, "step": 26992 }, { "epoch": 1.6088925974490405, "grad_norm": 2.9912781715393066, "learning_rate": 9.32246437746503e-06, "loss": 0.9869, "step": 26994 }, { "epoch": 1.6090118011681964, "grad_norm": 3.3638386726379395, "learning_rate": 9.316965956417755e-06, "loss": 1.0845, "step": 26996 }, { "epoch": 1.6091310048873524, "grad_norm": 3.0007190704345703, "learning_rate": 9.311468990741822e-06, "loss": 1.0242, "step": 26998 }, { "epoch": 1.6092502086065084, "grad_norm": 2.945634126663208, "learning_rate": 9.305973480633857e-06, "loss": 1.0303, "step": 27000 }, { "epoch": 1.6093694123256646, "grad_norm": 3.4600753784179688, "learning_rate": 9.300479426290453e-06, "loss": 1.1446, "step": 27002 }, { "epoch": 1.6094886160448207, "grad_norm": 3.4470598697662354, "learning_rate": 9.294986827908136e-06, "loss": 1.1259, "step": 27004 }, { "epoch": 1.6096078197639767, "grad_norm": 3.4519224166870117, "learning_rate": 9.289495685683441e-06, "loss": 1.1848, "step": 27006 }, { "epoch": 1.6097270234831327, "grad_norm": 2.938908100128174, "learning_rate": 9.284005999812761e-06, "loss": 1.1594, "step": 27008 }, { "epoch": 1.6098462272022886, "grad_norm": 2.974803924560547, "learning_rate": 9.27851777049248e-06, "loss": 1.0955, "step": 27010 }, { "epoch": 1.6099654309214446, "grad_norm": 3.190448760986328, "learning_rate": 9.273030997918975e-06, "loss": 1.1281, "step": 27012 }, { "epoch": 1.6100846346406008, "grad_norm": 3.1032028198242188, "learning_rate": 9.267545682288465e-06, "loss": 0.9798, "step": 27014 }, { "epoch": 1.6102038383597568, "grad_norm": 3.3917179107666016, "learning_rate": 9.26206182379723e-06, "loss": 1.1406, "step": 27016 }, { "epoch": 1.610323042078913, "grad_norm": 2.9792017936706543, "learning_rate": 9.25657942264142e-06, "loss": 1.1006, "step": 27018 }, { "epoch": 1.610442245798069, "grad_norm": 3.137821912765503, "learning_rate": 9.251098479017172e-06, "loss": 1.0591, "step": 27020 }, { "epoch": 1.610561449517225, "grad_norm": 3.1591031551361084, "learning_rate": 9.245618993120553e-06, "loss": 1.1304, "step": 27022 }, { "epoch": 1.6106806532363809, "grad_norm": 3.5466203689575195, "learning_rate": 9.240140965147576e-06, "loss": 1.0787, "step": 27024 }, { "epoch": 1.610799856955537, "grad_norm": 2.9482297897338867, "learning_rate": 9.234664395294218e-06, "loss": 0.9891, "step": 27026 }, { "epoch": 1.610919060674693, "grad_norm": 3.3243420124053955, "learning_rate": 9.229189283756396e-06, "loss": 1.1514, "step": 27028 }, { "epoch": 1.6110382643938492, "grad_norm": 3.4483370780944824, "learning_rate": 9.223715630729962e-06, "loss": 1.1305, "step": 27030 }, { "epoch": 1.6111574681130052, "grad_norm": 3.313220262527466, "learning_rate": 9.218243436410728e-06, "loss": 1.027, "step": 27032 }, { "epoch": 1.6112766718321612, "grad_norm": 3.4902350902557373, "learning_rate": 9.212772700994482e-06, "loss": 1.0222, "step": 27034 }, { "epoch": 1.6113958755513171, "grad_norm": 3.4540979862213135, "learning_rate": 9.207303424676894e-06, "loss": 1.1962, "step": 27036 }, { "epoch": 1.611515079270473, "grad_norm": 3.6453328132629395, "learning_rate": 9.201835607653625e-06, "loss": 1.1152, "step": 27038 }, { "epoch": 1.6116342829896293, "grad_norm": 3.0725510120391846, "learning_rate": 9.196369250120295e-06, "loss": 1.0857, "step": 27040 }, { "epoch": 1.6117534867087853, "grad_norm": 3.270251989364624, "learning_rate": 9.190904352272444e-06, "loss": 1.3147, "step": 27042 }, { "epoch": 1.6118726904279415, "grad_norm": 3.1289925575256348, "learning_rate": 9.185440914305576e-06, "loss": 1.2939, "step": 27044 }, { "epoch": 1.6119918941470974, "grad_norm": 3.2991080284118652, "learning_rate": 9.179978936415128e-06, "loss": 1.1275, "step": 27046 }, { "epoch": 1.6121110978662534, "grad_norm": 2.905953884124756, "learning_rate": 9.174518418796495e-06, "loss": 0.9145, "step": 27048 }, { "epoch": 1.6122303015854094, "grad_norm": 3.1698687076568604, "learning_rate": 9.169059361645016e-06, "loss": 1.1746, "step": 27050 }, { "epoch": 1.6123495053045656, "grad_norm": 3.425712823867798, "learning_rate": 9.163601765156004e-06, "loss": 1.1146, "step": 27052 }, { "epoch": 1.6124687090237215, "grad_norm": 3.1298882961273193, "learning_rate": 9.158145629524667e-06, "loss": 1.1494, "step": 27054 }, { "epoch": 1.6125879127428777, "grad_norm": 3.2730228900909424, "learning_rate": 9.152690954946185e-06, "loss": 1.0886, "step": 27056 }, { "epoch": 1.6127071164620337, "grad_norm": 3.675438404083252, "learning_rate": 9.147237741615739e-06, "loss": 1.3036, "step": 27058 }, { "epoch": 1.6128263201811897, "grad_norm": 3.1035189628601074, "learning_rate": 9.141785989728341e-06, "loss": 1.1705, "step": 27060 }, { "epoch": 1.6129455239003456, "grad_norm": 3.4792351722717285, "learning_rate": 9.136335699479064e-06, "loss": 1.0613, "step": 27062 }, { "epoch": 1.6130647276195016, "grad_norm": 3.124753475189209, "learning_rate": 9.130886871062877e-06, "loss": 1.1515, "step": 27064 }, { "epoch": 1.6131839313386578, "grad_norm": 3.0361874103546143, "learning_rate": 9.125439504674699e-06, "loss": 1.0466, "step": 27066 }, { "epoch": 1.6133031350578138, "grad_norm": 3.2301158905029297, "learning_rate": 9.119993600509402e-06, "loss": 1.0516, "step": 27068 }, { "epoch": 1.61342233877697, "grad_norm": 3.5965895652770996, "learning_rate": 9.114549158761804e-06, "loss": 1.2504, "step": 27070 }, { "epoch": 1.613541542496126, "grad_norm": 3.271986246109009, "learning_rate": 9.109106179626664e-06, "loss": 1.2119, "step": 27072 }, { "epoch": 1.6136607462152819, "grad_norm": 3.3452930450439453, "learning_rate": 9.103664663298694e-06, "loss": 1.1889, "step": 27074 }, { "epoch": 1.6137799499344379, "grad_norm": 3.0466387271881104, "learning_rate": 9.098224609972594e-06, "loss": 1.0755, "step": 27076 }, { "epoch": 1.613899153653594, "grad_norm": 2.7498393058776855, "learning_rate": 9.092786019842914e-06, "loss": 1.1924, "step": 27078 }, { "epoch": 1.61401835737275, "grad_norm": 3.3625710010528564, "learning_rate": 9.087348893104253e-06, "loss": 1.3057, "step": 27080 }, { "epoch": 1.6141375610919062, "grad_norm": 3.25150203704834, "learning_rate": 9.081913229951116e-06, "loss": 1.1122, "step": 27082 }, { "epoch": 1.6142567648110622, "grad_norm": 3.0260961055755615, "learning_rate": 9.076479030577918e-06, "loss": 0.9481, "step": 27084 }, { "epoch": 1.6143759685302181, "grad_norm": 3.2949047088623047, "learning_rate": 9.071046295179092e-06, "loss": 1.1304, "step": 27086 }, { "epoch": 1.614495172249374, "grad_norm": 2.965031623840332, "learning_rate": 9.065615023948982e-06, "loss": 1.0253, "step": 27088 }, { "epoch": 1.61461437596853, "grad_norm": 3.007664918899536, "learning_rate": 9.060185217081874e-06, "loss": 1.1007, "step": 27090 }, { "epoch": 1.6147335796876863, "grad_norm": 3.0456736087799072, "learning_rate": 9.054756874772019e-06, "loss": 1.1391, "step": 27092 }, { "epoch": 1.6148527834068422, "grad_norm": 3.348942995071411, "learning_rate": 9.0493299972136e-06, "loss": 1.0379, "step": 27094 }, { "epoch": 1.6149719871259984, "grad_norm": 3.0681445598602295, "learning_rate": 9.04390458460076e-06, "loss": 1.1478, "step": 27096 }, { "epoch": 1.6150911908451544, "grad_norm": 3.2968761920928955, "learning_rate": 9.038480637127583e-06, "loss": 1.0677, "step": 27098 }, { "epoch": 1.6152103945643104, "grad_norm": 2.965305805206299, "learning_rate": 9.0330581549881e-06, "loss": 1.1287, "step": 27100 }, { "epoch": 1.6153295982834663, "grad_norm": 3.0761475563049316, "learning_rate": 9.027637138376282e-06, "loss": 1.0821, "step": 27102 }, { "epoch": 1.6154488020026225, "grad_norm": 3.464611530303955, "learning_rate": 9.022217587486099e-06, "loss": 1.1043, "step": 27104 }, { "epoch": 1.6155680057217785, "grad_norm": 3.3636996746063232, "learning_rate": 9.016799502511363e-06, "loss": 0.9642, "step": 27106 }, { "epoch": 1.6156872094409347, "grad_norm": 3.1040689945220947, "learning_rate": 9.011382883645952e-06, "loss": 1.1955, "step": 27108 }, { "epoch": 1.6158064131600907, "grad_norm": 2.978647470474243, "learning_rate": 9.005967731083615e-06, "loss": 1.0557, "step": 27110 }, { "epoch": 1.6159256168792466, "grad_norm": 2.8686916828155518, "learning_rate": 9.000554045018067e-06, "loss": 0.9708, "step": 27112 }, { "epoch": 1.6160448205984026, "grad_norm": 3.5280284881591797, "learning_rate": 8.995141825642984e-06, "loss": 1.13, "step": 27114 }, { "epoch": 1.6161640243175586, "grad_norm": 3.3712315559387207, "learning_rate": 8.989731073151969e-06, "loss": 1.1502, "step": 27116 }, { "epoch": 1.6162832280367148, "grad_norm": 3.447171211242676, "learning_rate": 8.984321787738592e-06, "loss": 1.0191, "step": 27118 }, { "epoch": 1.6164024317558707, "grad_norm": 3.5722978115081787, "learning_rate": 8.978913969596342e-06, "loss": 1.126, "step": 27120 }, { "epoch": 1.616521635475027, "grad_norm": 3.320903778076172, "learning_rate": 8.97350761891872e-06, "loss": 1.142, "step": 27122 }, { "epoch": 1.6166408391941829, "grad_norm": 3.073141574859619, "learning_rate": 8.968102735899076e-06, "loss": 1.067, "step": 27124 }, { "epoch": 1.6167600429133389, "grad_norm": 3.619798183441162, "learning_rate": 8.9626993207308e-06, "loss": 1.0855, "step": 27126 }, { "epoch": 1.6168792466324948, "grad_norm": 3.266054630279541, "learning_rate": 8.957297373607188e-06, "loss": 1.0032, "step": 27128 }, { "epoch": 1.616998450351651, "grad_norm": 3.010127544403076, "learning_rate": 8.951896894721452e-06, "loss": 1.1346, "step": 27130 }, { "epoch": 1.617117654070807, "grad_norm": 2.855992317199707, "learning_rate": 8.94649788426682e-06, "loss": 1.1571, "step": 27132 }, { "epoch": 1.6172368577899632, "grad_norm": 3.2179925441741943, "learning_rate": 8.94110034243642e-06, "loss": 1.105, "step": 27134 }, { "epoch": 1.6173560615091191, "grad_norm": 2.7480459213256836, "learning_rate": 8.93570426942334e-06, "loss": 1.004, "step": 27136 }, { "epoch": 1.6174752652282751, "grad_norm": 2.9007389545440674, "learning_rate": 8.93030966542061e-06, "loss": 0.9622, "step": 27138 }, { "epoch": 1.617594468947431, "grad_norm": 3.164968729019165, "learning_rate": 8.924916530621247e-06, "loss": 1.0221, "step": 27140 }, { "epoch": 1.617713672666587, "grad_norm": 3.4108970165252686, "learning_rate": 8.919524865218142e-06, "loss": 1.2324, "step": 27142 }, { "epoch": 1.6178328763857432, "grad_norm": 3.111952066421509, "learning_rate": 8.914134669404178e-06, "loss": 1.2481, "step": 27144 }, { "epoch": 1.6179520801048994, "grad_norm": 3.289841651916504, "learning_rate": 8.90874594337221e-06, "loss": 1.1162, "step": 27146 }, { "epoch": 1.6180712838240554, "grad_norm": 3.3558335304260254, "learning_rate": 8.903358687314972e-06, "loss": 1.2618, "step": 27148 }, { "epoch": 1.6181904875432114, "grad_norm": 3.1215314865112305, "learning_rate": 8.897972901425217e-06, "loss": 1.0933, "step": 27150 }, { "epoch": 1.6183096912623673, "grad_norm": 3.4090003967285156, "learning_rate": 8.892588585895596e-06, "loss": 1.1043, "step": 27152 }, { "epoch": 1.6184288949815233, "grad_norm": 3.318394184112549, "learning_rate": 8.88720574091873e-06, "loss": 1.0518, "step": 27154 }, { "epoch": 1.6185480987006795, "grad_norm": 3.6058080196380615, "learning_rate": 8.881824366687186e-06, "loss": 1.0822, "step": 27156 }, { "epoch": 1.6186673024198355, "grad_norm": 3.2518985271453857, "learning_rate": 8.876444463393462e-06, "loss": 1.0225, "step": 27158 }, { "epoch": 1.6187865061389917, "grad_norm": 3.349210262298584, "learning_rate": 8.87106603123003e-06, "loss": 1.249, "step": 27160 }, { "epoch": 1.6189057098581476, "grad_norm": 3.141211986541748, "learning_rate": 8.865689070389282e-06, "loss": 1.078, "step": 27162 }, { "epoch": 1.6190249135773036, "grad_norm": 3.0505740642547607, "learning_rate": 8.860313581063573e-06, "loss": 1.1786, "step": 27164 }, { "epoch": 1.6191441172964596, "grad_norm": 2.831014394760132, "learning_rate": 8.854939563445197e-06, "loss": 0.9947, "step": 27166 }, { "epoch": 1.6192633210156155, "grad_norm": 3.3425943851470947, "learning_rate": 8.849567017726435e-06, "loss": 1.1271, "step": 27168 }, { "epoch": 1.6193825247347717, "grad_norm": 3.550098419189453, "learning_rate": 8.844195944099425e-06, "loss": 1.1436, "step": 27170 }, { "epoch": 1.619501728453928, "grad_norm": 3.4542019367218018, "learning_rate": 8.838826342756356e-06, "loss": 1.2079, "step": 27172 }, { "epoch": 1.619620932173084, "grad_norm": 3.4847681522369385, "learning_rate": 8.833458213889295e-06, "loss": 1.1693, "step": 27174 }, { "epoch": 1.6197401358922399, "grad_norm": 3.3683998584747314, "learning_rate": 8.828091557690289e-06, "loss": 1.0331, "step": 27176 }, { "epoch": 1.6198593396113958, "grad_norm": 3.409756660461426, "learning_rate": 8.822726374351314e-06, "loss": 1.0581, "step": 27178 }, { "epoch": 1.6199785433305518, "grad_norm": 3.0938596725463867, "learning_rate": 8.817362664064306e-06, "loss": 1.1449, "step": 27180 }, { "epoch": 1.620097747049708, "grad_norm": 3.358964443206787, "learning_rate": 8.812000427021138e-06, "loss": 1.2083, "step": 27182 }, { "epoch": 1.620216950768864, "grad_norm": 3.150912046432495, "learning_rate": 8.806639663413624e-06, "loss": 1.1393, "step": 27184 }, { "epoch": 1.6203361544880202, "grad_norm": 3.082155227661133, "learning_rate": 8.801280373433579e-06, "loss": 1.0683, "step": 27186 }, { "epoch": 1.6204553582071761, "grad_norm": 3.3201916217803955, "learning_rate": 8.79592255727268e-06, "loss": 1.148, "step": 27188 }, { "epoch": 1.620574561926332, "grad_norm": 3.2923057079315186, "learning_rate": 8.790566215122598e-06, "loss": 1.1342, "step": 27190 }, { "epoch": 1.620693765645488, "grad_norm": 3.366858720779419, "learning_rate": 8.785211347174982e-06, "loss": 1.0891, "step": 27192 }, { "epoch": 1.620812969364644, "grad_norm": 3.23700213432312, "learning_rate": 8.77985795362135e-06, "loss": 1.2081, "step": 27194 }, { "epoch": 1.6209321730838002, "grad_norm": 3.401939630508423, "learning_rate": 8.774506034653241e-06, "loss": 1.023, "step": 27196 }, { "epoch": 1.6210513768029564, "grad_norm": 3.5093319416046143, "learning_rate": 8.769155590462103e-06, "loss": 1.2564, "step": 27198 }, { "epoch": 1.6211705805221124, "grad_norm": 3.685448408126831, "learning_rate": 8.76380662123934e-06, "loss": 1.0292, "step": 27200 }, { "epoch": 1.6212897842412684, "grad_norm": 3.0687906742095947, "learning_rate": 8.758459127176305e-06, "loss": 1.0276, "step": 27202 }, { "epoch": 1.6214089879604243, "grad_norm": 3.5528831481933594, "learning_rate": 8.75311310846429e-06, "loss": 1.0995, "step": 27204 }, { "epoch": 1.6215281916795803, "grad_norm": 3.4720656871795654, "learning_rate": 8.747768565294544e-06, "loss": 1.1312, "step": 27206 }, { "epoch": 1.6216473953987365, "grad_norm": 3.284332513809204, "learning_rate": 8.742425497858248e-06, "loss": 1.1049, "step": 27208 }, { "epoch": 1.6217665991178924, "grad_norm": 3.418546438217163, "learning_rate": 8.737083906346577e-06, "loss": 1.21, "step": 27210 }, { "epoch": 1.6218858028370486, "grad_norm": 3.2789559364318848, "learning_rate": 8.731743790950575e-06, "loss": 1.0542, "step": 27212 }, { "epoch": 1.6220050065562046, "grad_norm": 3.208416700363159, "learning_rate": 8.7264051518613e-06, "loss": 1.1154, "step": 27214 }, { "epoch": 1.6221242102753606, "grad_norm": 2.922891616821289, "learning_rate": 8.721067989269732e-06, "loss": 1.1541, "step": 27216 }, { "epoch": 1.6222434139945165, "grad_norm": 2.894792079925537, "learning_rate": 8.715732303366798e-06, "loss": 1.1306, "step": 27218 }, { "epoch": 1.6223626177136725, "grad_norm": 3.0995662212371826, "learning_rate": 8.71039809434337e-06, "loss": 1.0624, "step": 27220 }, { "epoch": 1.6224818214328287, "grad_norm": 3.372537136077881, "learning_rate": 8.705065362390275e-06, "loss": 1.2981, "step": 27222 }, { "epoch": 1.622601025151985, "grad_norm": 2.920560598373413, "learning_rate": 8.699734107698276e-06, "loss": 1.2078, "step": 27224 }, { "epoch": 1.6227202288711409, "grad_norm": 2.7490620613098145, "learning_rate": 8.6944043304581e-06, "loss": 1.0708, "step": 27226 }, { "epoch": 1.6228394325902968, "grad_norm": 3.3957831859588623, "learning_rate": 8.689076030860404e-06, "loss": 1.1462, "step": 27228 }, { "epoch": 1.6229586363094528, "grad_norm": 2.703831672668457, "learning_rate": 8.683749209095792e-06, "loss": 0.9892, "step": 27230 }, { "epoch": 1.6230778400286088, "grad_norm": 3.5958664417266846, "learning_rate": 8.678423865354857e-06, "loss": 1.1681, "step": 27232 }, { "epoch": 1.623197043747765, "grad_norm": 3.359666585922241, "learning_rate": 8.673099999828071e-06, "loss": 1.1886, "step": 27234 }, { "epoch": 1.623316247466921, "grad_norm": 3.2693660259246826, "learning_rate": 8.667777612705879e-06, "loss": 1.1259, "step": 27236 }, { "epoch": 1.6234354511860771, "grad_norm": 3.244658946990967, "learning_rate": 8.662456704178711e-06, "loss": 1.0791, "step": 27238 }, { "epoch": 1.623554654905233, "grad_norm": 3.010282516479492, "learning_rate": 8.657137274436905e-06, "loss": 0.9765, "step": 27240 }, { "epoch": 1.623673858624389, "grad_norm": 3.4762115478515625, "learning_rate": 8.651819323670752e-06, "loss": 1.3024, "step": 27242 }, { "epoch": 1.623793062343545, "grad_norm": 3.212722063064575, "learning_rate": 8.646502852070493e-06, "loss": 1.0696, "step": 27244 }, { "epoch": 1.623912266062701, "grad_norm": 2.929659128189087, "learning_rate": 8.64118785982631e-06, "loss": 1.1396, "step": 27246 }, { "epoch": 1.6240314697818572, "grad_norm": 3.0540895462036133, "learning_rate": 8.635874347128353e-06, "loss": 1.0955, "step": 27248 }, { "epoch": 1.6241506735010134, "grad_norm": 3.2327561378479004, "learning_rate": 8.630562314166696e-06, "loss": 1.0504, "step": 27250 }, { "epoch": 1.6242698772201694, "grad_norm": 3.651258945465088, "learning_rate": 8.625251761131365e-06, "loss": 1.062, "step": 27252 }, { "epoch": 1.6243890809393253, "grad_norm": 3.3091535568237305, "learning_rate": 8.619942688212328e-06, "loss": 1.0021, "step": 27254 }, { "epoch": 1.6245082846584813, "grad_norm": 3.4384074211120605, "learning_rate": 8.61463509559955e-06, "loss": 1.4073, "step": 27256 }, { "epoch": 1.6246274883776373, "grad_norm": 3.3427648544311523, "learning_rate": 8.609328983482846e-06, "loss": 1.1755, "step": 27258 }, { "epoch": 1.6247466920967935, "grad_norm": 3.1865408420562744, "learning_rate": 8.60402435205207e-06, "loss": 1.0928, "step": 27260 }, { "epoch": 1.6248658958159494, "grad_norm": 3.2699482440948486, "learning_rate": 8.598721201496995e-06, "loss": 1.1664, "step": 27262 }, { "epoch": 1.6249850995351056, "grad_norm": 3.5176258087158203, "learning_rate": 8.593419532007291e-06, "loss": 1.0998, "step": 27264 }, { "epoch": 1.6251043032542616, "grad_norm": 3.341928720474243, "learning_rate": 8.588119343772649e-06, "loss": 0.9795, "step": 27266 }, { "epoch": 1.6252235069734176, "grad_norm": 3.4113893508911133, "learning_rate": 8.582820636982674e-06, "loss": 1.2338, "step": 27268 }, { "epoch": 1.6253427106925735, "grad_norm": 3.4594321250915527, "learning_rate": 8.577523411826904e-06, "loss": 1.2488, "step": 27270 }, { "epoch": 1.6254619144117295, "grad_norm": 3.272482395172119, "learning_rate": 8.572227668494842e-06, "loss": 1.1724, "step": 27272 }, { "epoch": 1.6255811181308857, "grad_norm": 2.935577630996704, "learning_rate": 8.566933407175964e-06, "loss": 1.2124, "step": 27274 }, { "epoch": 1.6257003218500419, "grad_norm": 3.0001590251922607, "learning_rate": 8.561640628059608e-06, "loss": 1.0834, "step": 27276 }, { "epoch": 1.6258195255691978, "grad_norm": 3.1256701946258545, "learning_rate": 8.556349331335168e-06, "loss": 1.1978, "step": 27278 }, { "epoch": 1.6259387292883538, "grad_norm": 3.1503021717071533, "learning_rate": 8.551059517191917e-06, "loss": 1.1963, "step": 27280 }, { "epoch": 1.6260579330075098, "grad_norm": 3.2103524208068848, "learning_rate": 8.545771185819062e-06, "loss": 1.141, "step": 27282 }, { "epoch": 1.6261771367266658, "grad_norm": 3.293369770050049, "learning_rate": 8.540484337405814e-06, "loss": 1.0901, "step": 27284 }, { "epoch": 1.626296340445822, "grad_norm": 3.028435707092285, "learning_rate": 8.535198972141295e-06, "loss": 1.2104, "step": 27286 }, { "epoch": 1.626415544164978, "grad_norm": 3.108408212661743, "learning_rate": 8.52991509021458e-06, "loss": 1.147, "step": 27288 }, { "epoch": 1.626534747884134, "grad_norm": 3.162973403930664, "learning_rate": 8.524632691814689e-06, "loss": 1.1583, "step": 27290 }, { "epoch": 1.62665395160329, "grad_norm": 2.9261398315429688, "learning_rate": 8.519351777130597e-06, "loss": 0.9739, "step": 27292 }, { "epoch": 1.626773155322446, "grad_norm": 3.2170724868774414, "learning_rate": 8.514072346351215e-06, "loss": 1.0377, "step": 27294 }, { "epoch": 1.626892359041602, "grad_norm": 3.1551318168640137, "learning_rate": 8.508794399665404e-06, "loss": 1.0926, "step": 27296 }, { "epoch": 1.627011562760758, "grad_norm": 3.4304165840148926, "learning_rate": 8.503517937261979e-06, "loss": 1.1994, "step": 27298 }, { "epoch": 1.6271307664799142, "grad_norm": 3.0712168216705322, "learning_rate": 8.498242959329688e-06, "loss": 1.1388, "step": 27300 }, { "epoch": 1.6272499701990704, "grad_norm": 3.5131266117095947, "learning_rate": 8.492969466057265e-06, "loss": 1.1604, "step": 27302 }, { "epoch": 1.6273691739182263, "grad_norm": 3.4000935554504395, "learning_rate": 8.487697457633314e-06, "loss": 1.1451, "step": 27304 }, { "epoch": 1.6274883776373823, "grad_norm": 3.332249164581299, "learning_rate": 8.482426934246468e-06, "loss": 1.1551, "step": 27306 }, { "epoch": 1.6276075813565383, "grad_norm": 3.1333088874816895, "learning_rate": 8.477157896085264e-06, "loss": 1.0953, "step": 27308 }, { "epoch": 1.6277267850756942, "grad_norm": 2.9690968990325928, "learning_rate": 8.47189034333819e-06, "loss": 1.1871, "step": 27310 }, { "epoch": 1.6278459887948504, "grad_norm": 3.066882610321045, "learning_rate": 8.466624276193685e-06, "loss": 1.052, "step": 27312 }, { "epoch": 1.6279651925140064, "grad_norm": 2.9450387954711914, "learning_rate": 8.461359694840137e-06, "loss": 1.1166, "step": 27314 }, { "epoch": 1.6280843962331626, "grad_norm": 2.802791118621826, "learning_rate": 8.456096599465873e-06, "loss": 1.0126, "step": 27316 }, { "epoch": 1.6282035999523186, "grad_norm": 3.182882785797119, "learning_rate": 8.450834990259165e-06, "loss": 1.1239, "step": 27318 }, { "epoch": 1.6283228036714745, "grad_norm": 3.0383493900299072, "learning_rate": 8.445574867408274e-06, "loss": 1.1583, "step": 27320 }, { "epoch": 1.6284420073906305, "grad_norm": 3.136011838912964, "learning_rate": 8.44031623110133e-06, "loss": 1.1772, "step": 27322 }, { "epoch": 1.6285612111097865, "grad_norm": 3.6169278621673584, "learning_rate": 8.435059081526458e-06, "loss": 1.0674, "step": 27324 }, { "epoch": 1.6286804148289427, "grad_norm": 3.4861905574798584, "learning_rate": 8.429803418871762e-06, "loss": 1.0361, "step": 27326 }, { "epoch": 1.6287996185480988, "grad_norm": 3.2618134021759033, "learning_rate": 8.424549243325203e-06, "loss": 1.1122, "step": 27328 }, { "epoch": 1.6289188222672548, "grad_norm": 3.3667614459991455, "learning_rate": 8.41929655507478e-06, "loss": 1.2219, "step": 27330 }, { "epoch": 1.6290380259864108, "grad_norm": 3.4861555099487305, "learning_rate": 8.414045354308387e-06, "loss": 1.0689, "step": 27332 }, { "epoch": 1.6291572297055668, "grad_norm": 2.9731857776641846, "learning_rate": 8.408795641213873e-06, "loss": 1.1457, "step": 27334 }, { "epoch": 1.6292764334247227, "grad_norm": 2.9339754581451416, "learning_rate": 8.40354741597903e-06, "loss": 1.0618, "step": 27336 }, { "epoch": 1.629395637143879, "grad_norm": 3.0085790157318115, "learning_rate": 8.398300678791644e-06, "loss": 1.0517, "step": 27338 }, { "epoch": 1.6295148408630349, "grad_norm": 3.124783992767334, "learning_rate": 8.393055429839364e-06, "loss": 1.0761, "step": 27340 }, { "epoch": 1.629634044582191, "grad_norm": 2.934624671936035, "learning_rate": 8.387811669309842e-06, "loss": 1.1065, "step": 27342 }, { "epoch": 1.629753248301347, "grad_norm": 3.267655372619629, "learning_rate": 8.382569397390688e-06, "loss": 1.1183, "step": 27344 }, { "epoch": 1.629872452020503, "grad_norm": 2.661644697189331, "learning_rate": 8.377328614269403e-06, "loss": 0.9551, "step": 27346 }, { "epoch": 1.629991655739659, "grad_norm": 3.296757221221924, "learning_rate": 8.37208932013349e-06, "loss": 1.1847, "step": 27348 }, { "epoch": 1.630110859458815, "grad_norm": 3.7936758995056152, "learning_rate": 8.366851515170377e-06, "loss": 1.2181, "step": 27350 }, { "epoch": 1.6302300631779711, "grad_norm": 2.8827741146087646, "learning_rate": 8.361615199567424e-06, "loss": 0.9883, "step": 27352 }, { "epoch": 1.6303492668971273, "grad_norm": 2.9675769805908203, "learning_rate": 8.356380373511962e-06, "loss": 1.1702, "step": 27354 }, { "epoch": 1.6304684706162833, "grad_norm": 3.423964738845825, "learning_rate": 8.351147037191259e-06, "loss": 1.061, "step": 27356 }, { "epoch": 1.6305876743354393, "grad_norm": 3.323463201522827, "learning_rate": 8.345915190792525e-06, "loss": 1.1336, "step": 27358 }, { "epoch": 1.6307068780545952, "grad_norm": 3.39080810546875, "learning_rate": 8.34068483450292e-06, "loss": 1.1198, "step": 27360 }, { "epoch": 1.6308260817737512, "grad_norm": 3.152630567550659, "learning_rate": 8.33545596850956e-06, "loss": 1.0835, "step": 27362 }, { "epoch": 1.6309452854929074, "grad_norm": 3.3372793197631836, "learning_rate": 8.330228592999472e-06, "loss": 1.1726, "step": 27364 }, { "epoch": 1.6310644892120634, "grad_norm": 3.311386823654175, "learning_rate": 8.325002708159713e-06, "loss": 1.2521, "step": 27366 }, { "epoch": 1.6311836929312196, "grad_norm": 3.3340632915496826, "learning_rate": 8.31977831417718e-06, "loss": 1.1089, "step": 27368 }, { "epoch": 1.6313028966503755, "grad_norm": 3.3140833377838135, "learning_rate": 8.314555411238772e-06, "loss": 1.3553, "step": 27370 }, { "epoch": 1.6314221003695315, "grad_norm": 3.354091167449951, "learning_rate": 8.309333999531355e-06, "loss": 1.0831, "step": 27372 }, { "epoch": 1.6315413040886875, "grad_norm": 3.684467315673828, "learning_rate": 8.304114079241703e-06, "loss": 1.1893, "step": 27374 }, { "epoch": 1.6316605078078434, "grad_norm": 3.1576197147369385, "learning_rate": 8.298895650556554e-06, "loss": 1.2492, "step": 27376 }, { "epoch": 1.6317797115269996, "grad_norm": 3.485032558441162, "learning_rate": 8.293678713662583e-06, "loss": 1.136, "step": 27378 }, { "epoch": 1.6318989152461558, "grad_norm": 3.090646505355835, "learning_rate": 8.288463268746421e-06, "loss": 1.0428, "step": 27380 }, { "epoch": 1.6320181189653118, "grad_norm": 3.240091562271118, "learning_rate": 8.283249315994646e-06, "loss": 1.1977, "step": 27382 }, { "epoch": 1.6321373226844678, "grad_norm": 3.414463996887207, "learning_rate": 8.278036855593773e-06, "loss": 1.0943, "step": 27384 }, { "epoch": 1.6322565264036237, "grad_norm": 3.1773879528045654, "learning_rate": 8.272825887730268e-06, "loss": 1.1082, "step": 27386 }, { "epoch": 1.6323757301227797, "grad_norm": 3.2595386505126953, "learning_rate": 8.267616412590546e-06, "loss": 1.0972, "step": 27388 }, { "epoch": 1.632494933841936, "grad_norm": 3.062269449234009, "learning_rate": 8.262408430360985e-06, "loss": 0.9717, "step": 27390 }, { "epoch": 1.6326141375610919, "grad_norm": 3.2300360202789307, "learning_rate": 8.25720194122786e-06, "loss": 1.2274, "step": 27392 }, { "epoch": 1.632733341280248, "grad_norm": 2.891916275024414, "learning_rate": 8.251996945377454e-06, "loss": 1.0589, "step": 27394 }, { "epoch": 1.632852544999404, "grad_norm": 3.2708287239074707, "learning_rate": 8.246793442995954e-06, "loss": 1.0416, "step": 27396 }, { "epoch": 1.63297174871856, "grad_norm": 3.089916706085205, "learning_rate": 8.241591434269514e-06, "loss": 1.1301, "step": 27398 }, { "epoch": 1.633090952437716, "grad_norm": 3.1197457313537598, "learning_rate": 8.236390919384224e-06, "loss": 1.043, "step": 27400 }, { "epoch": 1.633210156156872, "grad_norm": 3.093008279800415, "learning_rate": 8.23119189852612e-06, "loss": 1.0475, "step": 27402 }, { "epoch": 1.6333293598760281, "grad_norm": 3.4104905128479004, "learning_rate": 8.22599437188119e-06, "loss": 1.1766, "step": 27404 }, { "epoch": 1.6334485635951843, "grad_norm": 3.0918209552764893, "learning_rate": 8.22079833963536e-06, "loss": 1.1735, "step": 27406 }, { "epoch": 1.6335677673143403, "grad_norm": 3.360164165496826, "learning_rate": 8.215603801974541e-06, "loss": 1.0871, "step": 27408 }, { "epoch": 1.6336869710334962, "grad_norm": 3.4452810287475586, "learning_rate": 8.21041075908452e-06, "loss": 1.186, "step": 27410 }, { "epoch": 1.6338061747526522, "grad_norm": 3.018322229385376, "learning_rate": 8.205219211151095e-06, "loss": 1.1908, "step": 27412 }, { "epoch": 1.6339253784718082, "grad_norm": 3.27156662940979, "learning_rate": 8.20002915835999e-06, "loss": 1.2998, "step": 27414 }, { "epoch": 1.6340445821909644, "grad_norm": 2.923379898071289, "learning_rate": 8.194840600896836e-06, "loss": 1.1075, "step": 27416 }, { "epoch": 1.6341637859101203, "grad_norm": 3.2589423656463623, "learning_rate": 8.189653538947285e-06, "loss": 1.0733, "step": 27418 }, { "epoch": 1.6342829896292765, "grad_norm": 3.68686580657959, "learning_rate": 8.184467972696874e-06, "loss": 1.1813, "step": 27420 }, { "epoch": 1.6344021933484325, "grad_norm": 3.327169418334961, "learning_rate": 8.179283902331114e-06, "loss": 1.1263, "step": 27422 }, { "epoch": 1.6345213970675885, "grad_norm": 3.13012957572937, "learning_rate": 8.17410132803546e-06, "loss": 1.0476, "step": 27424 }, { "epoch": 1.6346406007867444, "grad_norm": 3.3322536945343018, "learning_rate": 8.168920249995305e-06, "loss": 1.1529, "step": 27426 }, { "epoch": 1.6347598045059006, "grad_norm": 2.7231006622314453, "learning_rate": 8.163740668395996e-06, "loss": 0.9311, "step": 27428 }, { "epoch": 1.6348790082250566, "grad_norm": 3.020709991455078, "learning_rate": 8.158562583422824e-06, "loss": 1.1182, "step": 27430 }, { "epoch": 1.6349982119442128, "grad_norm": 3.3028438091278076, "learning_rate": 8.15338599526103e-06, "loss": 1.0195, "step": 27432 }, { "epoch": 1.6351174156633688, "grad_norm": 3.218627691268921, "learning_rate": 8.148210904095782e-06, "loss": 1.1073, "step": 27434 }, { "epoch": 1.6352366193825247, "grad_norm": 3.013453245162964, "learning_rate": 8.143037310112234e-06, "loss": 0.9851, "step": 27436 }, { "epoch": 1.6353558231016807, "grad_norm": 3.4383583068847656, "learning_rate": 8.137865213495455e-06, "loss": 1.0687, "step": 27438 }, { "epoch": 1.6354750268208367, "grad_norm": 2.7396597862243652, "learning_rate": 8.13269461443047e-06, "loss": 1.094, "step": 27440 }, { "epoch": 1.6355942305399929, "grad_norm": 3.001819610595703, "learning_rate": 8.12752551310224e-06, "loss": 1.1132, "step": 27442 }, { "epoch": 1.6357134342591488, "grad_norm": 3.5184645652770996, "learning_rate": 8.122357909695694e-06, "loss": 1.1339, "step": 27444 }, { "epoch": 1.635832637978305, "grad_norm": 3.2093348503112793, "learning_rate": 8.117191804395685e-06, "loss": 1.1074, "step": 27446 }, { "epoch": 1.635951841697461, "grad_norm": 3.195371389389038, "learning_rate": 8.112027197387029e-06, "loss": 1.1229, "step": 27448 }, { "epoch": 1.636071045416617, "grad_norm": 3.212191343307495, "learning_rate": 8.106864088854476e-06, "loss": 1.1263, "step": 27450 }, { "epoch": 1.636190249135773, "grad_norm": 3.3620493412017822, "learning_rate": 8.101702478982715e-06, "loss": 1.0331, "step": 27452 }, { "epoch": 1.6363094528549291, "grad_norm": 2.944796323776245, "learning_rate": 8.09654236795644e-06, "loss": 0.9885, "step": 27454 }, { "epoch": 1.636428656574085, "grad_norm": 3.439680576324463, "learning_rate": 8.091383755960185e-06, "loss": 1.1553, "step": 27456 }, { "epoch": 1.6365478602932413, "grad_norm": 3.041429281234741, "learning_rate": 8.086226643178541e-06, "loss": 0.9732, "step": 27458 }, { "epoch": 1.6366670640123973, "grad_norm": 3.3429102897644043, "learning_rate": 8.081071029795983e-06, "loss": 1.0493, "step": 27460 }, { "epoch": 1.6367862677315532, "grad_norm": 3.2164173126220703, "learning_rate": 8.075916915996917e-06, "loss": 1.0194, "step": 27462 }, { "epoch": 1.6369054714507092, "grad_norm": 3.251314163208008, "learning_rate": 8.070764301965755e-06, "loss": 1.0559, "step": 27464 }, { "epoch": 1.6370246751698652, "grad_norm": 3.0958456993103027, "learning_rate": 8.065613187886817e-06, "loss": 1.0326, "step": 27466 }, { "epoch": 1.6371438788890214, "grad_norm": 3.3373734951019287, "learning_rate": 8.060463573944366e-06, "loss": 1.0783, "step": 27468 }, { "epoch": 1.6372630826081773, "grad_norm": 3.4215633869171143, "learning_rate": 8.055315460322622e-06, "loss": 1.113, "step": 27470 }, { "epoch": 1.6373822863273335, "grad_norm": 3.4685168266296387, "learning_rate": 8.050168847205775e-06, "loss": 1.0195, "step": 27472 }, { "epoch": 1.6375014900464895, "grad_norm": 3.319086790084839, "learning_rate": 8.045023734777907e-06, "loss": 1.209, "step": 27474 }, { "epoch": 1.6376206937656455, "grad_norm": 3.5666043758392334, "learning_rate": 8.039880123223076e-06, "loss": 1.3454, "step": 27476 }, { "epoch": 1.6377398974848014, "grad_norm": 3.30065655708313, "learning_rate": 8.034738012725323e-06, "loss": 1.0856, "step": 27478 }, { "epoch": 1.6378591012039576, "grad_norm": 3.226680040359497, "learning_rate": 8.029597403468547e-06, "loss": 1.1012, "step": 27480 }, { "epoch": 1.6379783049231136, "grad_norm": 2.7890896797180176, "learning_rate": 8.024458295636688e-06, "loss": 1.2177, "step": 27482 }, { "epoch": 1.6380975086422698, "grad_norm": 3.403984785079956, "learning_rate": 8.019320689413573e-06, "loss": 1.1559, "step": 27484 }, { "epoch": 1.6382167123614257, "grad_norm": 3.3603641986846924, "learning_rate": 8.014184584982993e-06, "loss": 1.2197, "step": 27486 }, { "epoch": 1.6383359160805817, "grad_norm": 3.206228017807007, "learning_rate": 8.009049982528683e-06, "loss": 1.2507, "step": 27488 }, { "epoch": 1.6384551197997377, "grad_norm": 3.1777148246765137, "learning_rate": 8.003916882234319e-06, "loss": 1.1403, "step": 27490 }, { "epoch": 1.6385743235188936, "grad_norm": 3.0433857440948486, "learning_rate": 7.998785284283545e-06, "loss": 1.0124, "step": 27492 }, { "epoch": 1.6386935272380498, "grad_norm": 3.2409751415252686, "learning_rate": 7.993655188859922e-06, "loss": 1.1863, "step": 27494 }, { "epoch": 1.6388127309572058, "grad_norm": 2.8574979305267334, "learning_rate": 7.988526596146978e-06, "loss": 1.1435, "step": 27496 }, { "epoch": 1.638931934676362, "grad_norm": 3.3255090713500977, "learning_rate": 7.983399506328165e-06, "loss": 1.1006, "step": 27498 }, { "epoch": 1.639051138395518, "grad_norm": 3.5140416622161865, "learning_rate": 7.978273919586937e-06, "loss": 1.1104, "step": 27500 }, { "epoch": 1.639170342114674, "grad_norm": 3.173861265182495, "learning_rate": 7.973149836106597e-06, "loss": 1.0313, "step": 27502 }, { "epoch": 1.63928954583383, "grad_norm": 3.0947656631469727, "learning_rate": 7.968027256070499e-06, "loss": 1.091, "step": 27504 }, { "epoch": 1.639408749552986, "grad_norm": 2.9378232955932617, "learning_rate": 7.962906179661872e-06, "loss": 1.064, "step": 27506 }, { "epoch": 1.639527953272142, "grad_norm": 3.457536220550537, "learning_rate": 7.957786607063921e-06, "loss": 1.0595, "step": 27508 }, { "epoch": 1.6396471569912983, "grad_norm": 3.3273942470550537, "learning_rate": 7.95266853845979e-06, "loss": 1.1423, "step": 27510 }, { "epoch": 1.6397663607104542, "grad_norm": 3.1606194972991943, "learning_rate": 7.947551974032569e-06, "loss": 1.0382, "step": 27512 }, { "epoch": 1.6398855644296102, "grad_norm": 2.926785707473755, "learning_rate": 7.942436913965295e-06, "loss": 1.1681, "step": 27514 }, { "epoch": 1.6400047681487662, "grad_norm": 3.365356922149658, "learning_rate": 7.937323358440935e-06, "loss": 1.2017, "step": 27516 }, { "epoch": 1.6401239718679221, "grad_norm": 3.337646484375, "learning_rate": 7.932211307642461e-06, "loss": 1.0926, "step": 27518 }, { "epoch": 1.6402431755870783, "grad_norm": 3.168012857437134, "learning_rate": 7.927100761752709e-06, "loss": 1.2744, "step": 27520 }, { "epoch": 1.6403623793062345, "grad_norm": 3.1182496547698975, "learning_rate": 7.921991720954502e-06, "loss": 1.1821, "step": 27522 }, { "epoch": 1.6404815830253905, "grad_norm": 3.4879283905029297, "learning_rate": 7.916884185430645e-06, "loss": 1.0006, "step": 27524 }, { "epoch": 1.6406007867445465, "grad_norm": 3.6440978050231934, "learning_rate": 7.911778155363802e-06, "loss": 1.1354, "step": 27526 }, { "epoch": 1.6407199904637024, "grad_norm": 3.408639907836914, "learning_rate": 7.906673630936673e-06, "loss": 1.1376, "step": 27528 }, { "epoch": 1.6408391941828584, "grad_norm": 3.937059164047241, "learning_rate": 7.901570612331844e-06, "loss": 1.215, "step": 27530 }, { "epoch": 1.6409583979020146, "grad_norm": 3.257070541381836, "learning_rate": 7.896469099731873e-06, "loss": 1.2073, "step": 27532 }, { "epoch": 1.6410776016211706, "grad_norm": 3.2812554836273193, "learning_rate": 7.891369093319262e-06, "loss": 1.2697, "step": 27534 }, { "epoch": 1.6411968053403267, "grad_norm": 3.5256848335266113, "learning_rate": 7.886270593276446e-06, "loss": 1.1141, "step": 27536 }, { "epoch": 1.6413160090594827, "grad_norm": 3.0814905166625977, "learning_rate": 7.881173599785823e-06, "loss": 1.0403, "step": 27538 }, { "epoch": 1.6414352127786387, "grad_norm": 3.4997010231018066, "learning_rate": 7.876078113029717e-06, "loss": 1.2502, "step": 27540 }, { "epoch": 1.6415544164977947, "grad_norm": 3.462590217590332, "learning_rate": 7.870984133190445e-06, "loss": 1.1613, "step": 27542 }, { "epoch": 1.6416736202169506, "grad_norm": 3.3544695377349854, "learning_rate": 7.865891660450192e-06, "loss": 1.1454, "step": 27544 }, { "epoch": 1.6417928239361068, "grad_norm": 2.8749446868896484, "learning_rate": 7.860800694991166e-06, "loss": 1.131, "step": 27546 }, { "epoch": 1.641912027655263, "grad_norm": 3.122067451477051, "learning_rate": 7.855711236995489e-06, "loss": 1.1144, "step": 27548 }, { "epoch": 1.642031231374419, "grad_norm": 3.4013619422912598, "learning_rate": 7.850623286645192e-06, "loss": 1.1323, "step": 27550 }, { "epoch": 1.642150435093575, "grad_norm": 3.0506231784820557, "learning_rate": 7.845536844122325e-06, "loss": 1.1475, "step": 27552 }, { "epoch": 1.642269638812731, "grad_norm": 3.4627881050109863, "learning_rate": 7.840451909608838e-06, "loss": 1.1057, "step": 27554 }, { "epoch": 1.6423888425318869, "grad_norm": 3.3580946922302246, "learning_rate": 7.835368483286625e-06, "loss": 1.0684, "step": 27556 }, { "epoch": 1.642508046251043, "grad_norm": 2.7889437675476074, "learning_rate": 7.830286565337552e-06, "loss": 1.0531, "step": 27558 }, { "epoch": 1.642627249970199, "grad_norm": 3.1575536727905273, "learning_rate": 7.82520615594341e-06, "loss": 1.0256, "step": 27560 }, { "epoch": 1.6427464536893552, "grad_norm": 3.400130271911621, "learning_rate": 7.82012725528593e-06, "loss": 1.266, "step": 27562 }, { "epoch": 1.6428656574085112, "grad_norm": 3.090730667114258, "learning_rate": 7.815049863546842e-06, "loss": 1.1136, "step": 27564 }, { "epoch": 1.6429848611276672, "grad_norm": 3.422548770904541, "learning_rate": 7.809973980907742e-06, "loss": 1.143, "step": 27566 }, { "epoch": 1.6431040648468231, "grad_norm": 3.2296783924102783, "learning_rate": 7.804899607550208e-06, "loss": 1.2006, "step": 27568 }, { "epoch": 1.6432232685659791, "grad_norm": 3.8543026447296143, "learning_rate": 7.799826743655797e-06, "loss": 1.1433, "step": 27570 }, { "epoch": 1.6433424722851353, "grad_norm": 3.4588074684143066, "learning_rate": 7.794755389405973e-06, "loss": 1.192, "step": 27572 }, { "epoch": 1.6434616760042915, "grad_norm": 3.445040225982666, "learning_rate": 7.78968554498215e-06, "loss": 1.0681, "step": 27574 }, { "epoch": 1.6435808797234475, "grad_norm": 3.486020565032959, "learning_rate": 7.784617210565697e-06, "loss": 1.0795, "step": 27576 }, { "epoch": 1.6437000834426034, "grad_norm": 2.945007801055908, "learning_rate": 7.779550386337925e-06, "loss": 1.1845, "step": 27578 }, { "epoch": 1.6438192871617594, "grad_norm": 2.9629411697387695, "learning_rate": 7.774485072480091e-06, "loss": 1.2181, "step": 27580 }, { "epoch": 1.6439384908809154, "grad_norm": 3.2445805072784424, "learning_rate": 7.769421269173399e-06, "loss": 1.1747, "step": 27582 }, { "epoch": 1.6440576946000716, "grad_norm": 3.7253036499023438, "learning_rate": 7.764358976599001e-06, "loss": 1.2686, "step": 27584 }, { "epoch": 1.6441768983192275, "grad_norm": 3.094235897064209, "learning_rate": 7.759298194937975e-06, "loss": 1.1138, "step": 27586 }, { "epoch": 1.6442961020383837, "grad_norm": 3.132080316543579, "learning_rate": 7.754238924371404e-06, "loss": 1.0254, "step": 27588 }, { "epoch": 1.6444153057575397, "grad_norm": 3.162476062774658, "learning_rate": 7.749181165080221e-06, "loss": 1.1696, "step": 27590 }, { "epoch": 1.6445345094766957, "grad_norm": 2.7468760013580322, "learning_rate": 7.744124917245404e-06, "loss": 1.1197, "step": 27592 }, { "epoch": 1.6446537131958516, "grad_norm": 3.304935932159424, "learning_rate": 7.739070181047825e-06, "loss": 1.2942, "step": 27594 }, { "epoch": 1.6447729169150076, "grad_norm": 3.4696950912475586, "learning_rate": 7.734016956668277e-06, "loss": 0.9929, "step": 27596 }, { "epoch": 1.6448921206341638, "grad_norm": 3.1876583099365234, "learning_rate": 7.728965244287567e-06, "loss": 1.3251, "step": 27598 }, { "epoch": 1.64501132435332, "grad_norm": 2.8261399269104004, "learning_rate": 7.723915044086395e-06, "loss": 0.939, "step": 27600 }, { "epoch": 1.645130528072476, "grad_norm": 3.2700438499450684, "learning_rate": 7.71886635624543e-06, "loss": 1.1512, "step": 27602 }, { "epoch": 1.645249731791632, "grad_norm": 2.7816925048828125, "learning_rate": 7.71381918094527e-06, "loss": 1.0365, "step": 27604 }, { "epoch": 1.6453689355107879, "grad_norm": 2.8779354095458984, "learning_rate": 7.708773518366497e-06, "loss": 1.0294, "step": 27606 }, { "epoch": 1.6454881392299439, "grad_norm": 2.8936548233032227, "learning_rate": 7.703729368689582e-06, "loss": 1.0674, "step": 27608 }, { "epoch": 1.6456073429491, "grad_norm": 3.0986948013305664, "learning_rate": 7.698686732094972e-06, "loss": 1.2113, "step": 27610 }, { "epoch": 1.645726546668256, "grad_norm": 3.1422832012176514, "learning_rate": 7.693645608763094e-06, "loss": 0.9791, "step": 27612 }, { "epoch": 1.6458457503874122, "grad_norm": 2.9932620525360107, "learning_rate": 7.688605998874243e-06, "loss": 1.098, "step": 27614 }, { "epoch": 1.6459649541065682, "grad_norm": 3.5313453674316406, "learning_rate": 7.683567902608729e-06, "loss": 1.1732, "step": 27616 }, { "epoch": 1.6460841578257241, "grad_norm": 2.950038433074951, "learning_rate": 7.678531320146776e-06, "loss": 1.106, "step": 27618 }, { "epoch": 1.6462033615448801, "grad_norm": 2.9636144638061523, "learning_rate": 7.67349625166856e-06, "loss": 1.1737, "step": 27620 }, { "epoch": 1.646322565264036, "grad_norm": 3.1345536708831787, "learning_rate": 7.668462697354201e-06, "loss": 1.2058, "step": 27622 }, { "epoch": 1.6464417689831923, "grad_norm": 3.4462802410125732, "learning_rate": 7.663430657383763e-06, "loss": 1.0577, "step": 27624 }, { "epoch": 1.6465609727023485, "grad_norm": 3.4377143383026123, "learning_rate": 7.658400131937265e-06, "loss": 1.0577, "step": 27626 }, { "epoch": 1.6466801764215044, "grad_norm": 2.8585469722747803, "learning_rate": 7.65337112119467e-06, "loss": 1.0304, "step": 27628 }, { "epoch": 1.6467993801406604, "grad_norm": 3.039093017578125, "learning_rate": 7.648343625335873e-06, "loss": 1.0318, "step": 27630 }, { "epoch": 1.6469185838598164, "grad_norm": 3.2021596431732178, "learning_rate": 7.64331764454072e-06, "loss": 1.1654, "step": 27632 }, { "epoch": 1.6470377875789723, "grad_norm": 3.3588244915008545, "learning_rate": 7.638293178989026e-06, "loss": 1.0801, "step": 27634 }, { "epoch": 1.6471569912981285, "grad_norm": 3.406200408935547, "learning_rate": 7.633270228860523e-06, "loss": 1.1223, "step": 27636 }, { "epoch": 1.6472761950172845, "grad_norm": 3.0872435569763184, "learning_rate": 7.628248794334902e-06, "loss": 0.9756, "step": 27638 }, { "epoch": 1.6473953987364407, "grad_norm": 3.3710904121398926, "learning_rate": 7.623228875591798e-06, "loss": 1.1735, "step": 27640 }, { "epoch": 1.6475146024555967, "grad_norm": 3.8829004764556885, "learning_rate": 7.6182104728107825e-06, "loss": 1.1821, "step": 27642 }, { "epoch": 1.6476338061747526, "grad_norm": 3.0630340576171875, "learning_rate": 7.613193586171391e-06, "loss": 1.2086, "step": 27644 }, { "epoch": 1.6477530098939086, "grad_norm": 3.166369676589966, "learning_rate": 7.608178215853085e-06, "loss": 1.0891, "step": 27646 }, { "epoch": 1.6478722136130646, "grad_norm": 3.0137908458709717, "learning_rate": 7.603164362035292e-06, "loss": 1.0542, "step": 27648 }, { "epoch": 1.6479914173322208, "grad_norm": 3.6292173862457275, "learning_rate": 7.5981520248973535e-06, "loss": 1.2877, "step": 27650 }, { "epoch": 1.648110621051377, "grad_norm": 2.8739521503448486, "learning_rate": 7.593141204618615e-06, "loss": 1.0649, "step": 27652 }, { "epoch": 1.648229824770533, "grad_norm": 3.728121280670166, "learning_rate": 7.588131901378298e-06, "loss": 1.1398, "step": 27654 }, { "epoch": 1.648349028489689, "grad_norm": 3.2446818351745605, "learning_rate": 7.583124115355605e-06, "loss": 1.1048, "step": 27656 }, { "epoch": 1.6484682322088449, "grad_norm": 3.7796502113342285, "learning_rate": 7.57811784672971e-06, "loss": 1.0675, "step": 27658 }, { "epoch": 1.6485874359280008, "grad_norm": 3.3102219104766846, "learning_rate": 7.573113095679663e-06, "loss": 1.0211, "step": 27660 }, { "epoch": 1.648706639647157, "grad_norm": 3.1948139667510986, "learning_rate": 7.568109862384537e-06, "loss": 1.1149, "step": 27662 }, { "epoch": 1.648825843366313, "grad_norm": 3.3889455795288086, "learning_rate": 7.563108147023296e-06, "loss": 1.1728, "step": 27664 }, { "epoch": 1.6489450470854692, "grad_norm": 3.2830493450164795, "learning_rate": 7.558107949774873e-06, "loss": 1.1074, "step": 27666 }, { "epoch": 1.6490642508046252, "grad_norm": 3.0162408351898193, "learning_rate": 7.553109270818148e-06, "loss": 1.1891, "step": 27668 }, { "epoch": 1.6491834545237811, "grad_norm": 3.0539700984954834, "learning_rate": 7.548112110331928e-06, "loss": 1.0019, "step": 27670 }, { "epoch": 1.649302658242937, "grad_norm": 3.263641834259033, "learning_rate": 7.543116468494987e-06, "loss": 1.2932, "step": 27672 }, { "epoch": 1.649421861962093, "grad_norm": 3.1296682357788086, "learning_rate": 7.538122345486026e-06, "loss": 1.0448, "step": 27674 }, { "epoch": 1.6495410656812493, "grad_norm": 3.3592116832733154, "learning_rate": 7.53312974148373e-06, "loss": 1.1402, "step": 27676 }, { "epoch": 1.6496602694004054, "grad_norm": 2.9951138496398926, "learning_rate": 7.528138656666661e-06, "loss": 1.2749, "step": 27678 }, { "epoch": 1.6497794731195614, "grad_norm": 2.9642481803894043, "learning_rate": 7.523149091213399e-06, "loss": 1.055, "step": 27680 }, { "epoch": 1.6498986768387174, "grad_norm": 2.849443197250366, "learning_rate": 7.518161045302424e-06, "loss": 0.9304, "step": 27682 }, { "epoch": 1.6500178805578734, "grad_norm": 3.041722536087036, "learning_rate": 7.513174519112182e-06, "loss": 1.1363, "step": 27684 }, { "epoch": 1.6501370842770293, "grad_norm": 3.4757893085479736, "learning_rate": 7.508189512821057e-06, "loss": 1.1109, "step": 27686 }, { "epoch": 1.6502562879961855, "grad_norm": 3.03230881690979, "learning_rate": 7.5032060266073735e-06, "loss": 1.1174, "step": 27688 }, { "epoch": 1.6503754917153415, "grad_norm": 2.9769444465637207, "learning_rate": 7.498224060649412e-06, "loss": 1.087, "step": 27690 }, { "epoch": 1.6504946954344977, "grad_norm": 3.4563701152801514, "learning_rate": 7.493243615125395e-06, "loss": 1.0611, "step": 27692 }, { "epoch": 1.6506138991536536, "grad_norm": 3.3499486446380615, "learning_rate": 7.488264690213487e-06, "loss": 1.0168, "step": 27694 }, { "epoch": 1.6507331028728096, "grad_norm": 3.151890516281128, "learning_rate": 7.4832872860917945e-06, "loss": 1.1211, "step": 27696 }, { "epoch": 1.6508523065919656, "grad_norm": 3.228945016860962, "learning_rate": 7.47831140293841e-06, "loss": 1.1911, "step": 27698 }, { "epoch": 1.6509715103111215, "grad_norm": 3.246354103088379, "learning_rate": 7.473337040931294e-06, "loss": 1.204, "step": 27700 }, { "epoch": 1.6510907140302777, "grad_norm": 3.0762600898742676, "learning_rate": 7.468364200248412e-06, "loss": 1.1057, "step": 27702 }, { "epoch": 1.651209917749434, "grad_norm": 2.940394639968872, "learning_rate": 7.4633928810676715e-06, "loss": 1.0053, "step": 27704 }, { "epoch": 1.65132912146859, "grad_norm": 3.119460105895996, "learning_rate": 7.458423083566907e-06, "loss": 1.1953, "step": 27706 }, { "epoch": 1.6514483251877459, "grad_norm": 3.4190962314605713, "learning_rate": 7.453454807923899e-06, "loss": 1.0998, "step": 27708 }, { "epoch": 1.6515675289069018, "grad_norm": 3.215749979019165, "learning_rate": 7.448488054316383e-06, "loss": 0.975, "step": 27710 }, { "epoch": 1.6516867326260578, "grad_norm": 3.091970205307007, "learning_rate": 7.4435228229220444e-06, "loss": 1.26, "step": 27712 }, { "epoch": 1.651805936345214, "grad_norm": 3.0792267322540283, "learning_rate": 7.438559113918492e-06, "loss": 1.1979, "step": 27714 }, { "epoch": 1.65192514006437, "grad_norm": 3.1961567401885986, "learning_rate": 7.433596927483311e-06, "loss": 1.2504, "step": 27716 }, { "epoch": 1.6520443437835262, "grad_norm": 3.28566575050354, "learning_rate": 7.428636263794003e-06, "loss": 1.1418, "step": 27718 }, { "epoch": 1.6521635475026821, "grad_norm": 3.180136203765869, "learning_rate": 7.423677123028016e-06, "loss": 1.0409, "step": 27720 }, { "epoch": 1.652282751221838, "grad_norm": 2.8144004344940186, "learning_rate": 7.418719505362803e-06, "loss": 0.8741, "step": 27722 }, { "epoch": 1.652401954940994, "grad_norm": 3.0949597358703613, "learning_rate": 7.413763410975655e-06, "loss": 1.1168, "step": 27724 }, { "epoch": 1.65252115866015, "grad_norm": 3.16227650642395, "learning_rate": 7.408808840043913e-06, "loss": 1.0542, "step": 27726 }, { "epoch": 1.6526403623793062, "grad_norm": 3.2119221687316895, "learning_rate": 7.403855792744796e-06, "loss": 1.1022, "step": 27728 }, { "epoch": 1.6527595660984624, "grad_norm": 3.080352783203125, "learning_rate": 7.398904269255508e-06, "loss": 1.0361, "step": 27730 }, { "epoch": 1.6528787698176184, "grad_norm": 3.1560721397399902, "learning_rate": 7.393954269753161e-06, "loss": 1.0737, "step": 27732 }, { "epoch": 1.6529979735367744, "grad_norm": 3.3887033462524414, "learning_rate": 7.389005794414855e-06, "loss": 1.1474, "step": 27734 }, { "epoch": 1.6531171772559303, "grad_norm": 3.001035690307617, "learning_rate": 7.384058843417596e-06, "loss": 1.1938, "step": 27736 }, { "epoch": 1.6532363809750863, "grad_norm": 3.1602694988250732, "learning_rate": 7.379113416938355e-06, "loss": 1.0511, "step": 27738 }, { "epoch": 1.6533555846942425, "grad_norm": 3.195054292678833, "learning_rate": 7.374169515154072e-06, "loss": 1.1711, "step": 27740 }, { "epoch": 1.6534747884133985, "grad_norm": 3.1268999576568604, "learning_rate": 7.369227138241569e-06, "loss": 1.088, "step": 27742 }, { "epoch": 1.6535939921325546, "grad_norm": 3.1315243244171143, "learning_rate": 7.3642862863776805e-06, "loss": 1.1812, "step": 27744 }, { "epoch": 1.6537131958517106, "grad_norm": 3.1380693912506104, "learning_rate": 7.3593469597391605e-06, "loss": 1.0631, "step": 27746 }, { "epoch": 1.6538323995708666, "grad_norm": 3.3541364669799805, "learning_rate": 7.354409158502667e-06, "loss": 1.036, "step": 27748 }, { "epoch": 1.6539516032900226, "grad_norm": 3.4060730934143066, "learning_rate": 7.3494728828448825e-06, "loss": 1.0779, "step": 27750 }, { "epoch": 1.6540708070091785, "grad_norm": 3.150855302810669, "learning_rate": 7.3445381329423805e-06, "loss": 1.1409, "step": 27752 }, { "epoch": 1.6541900107283347, "grad_norm": 2.834819793701172, "learning_rate": 7.339604908971692e-06, "loss": 0.9825, "step": 27754 }, { "epoch": 1.654309214447491, "grad_norm": 3.0377838611602783, "learning_rate": 7.3346732111093e-06, "loss": 1.0875, "step": 27756 }, { "epoch": 1.6544284181666469, "grad_norm": 3.5029993057250977, "learning_rate": 7.329743039531623e-06, "loss": 1.143, "step": 27758 }, { "epoch": 1.6545476218858028, "grad_norm": 3.0275895595550537, "learning_rate": 7.32481439441503e-06, "loss": 1.256, "step": 27760 }, { "epoch": 1.6546668256049588, "grad_norm": 3.4632415771484375, "learning_rate": 7.319887275935844e-06, "loss": 1.2194, "step": 27762 }, { "epoch": 1.6547860293241148, "grad_norm": 3.2176358699798584, "learning_rate": 7.314961684270311e-06, "loss": 1.0147, "step": 27764 }, { "epoch": 1.654905233043271, "grad_norm": 3.356508731842041, "learning_rate": 7.310037619594634e-06, "loss": 0.9983, "step": 27766 }, { "epoch": 1.655024436762427, "grad_norm": 3.2402994632720947, "learning_rate": 7.3051150820849905e-06, "loss": 1.0648, "step": 27768 }, { "epoch": 1.6551436404815831, "grad_norm": 3.254437208175659, "learning_rate": 7.300194071917455e-06, "loss": 1.1753, "step": 27770 }, { "epoch": 1.655262844200739, "grad_norm": 3.471057653427124, "learning_rate": 7.295274589268075e-06, "loss": 1.1789, "step": 27772 }, { "epoch": 1.655382047919895, "grad_norm": 2.5676276683807373, "learning_rate": 7.2903566343128335e-06, "loss": 0.977, "step": 27774 }, { "epoch": 1.655501251639051, "grad_norm": 3.2155749797821045, "learning_rate": 7.285440207227662e-06, "loss": 1.1891, "step": 27776 }, { "epoch": 1.655620455358207, "grad_norm": 3.232487440109253, "learning_rate": 7.280525308188446e-06, "loss": 1.2543, "step": 27778 }, { "epoch": 1.6557396590773632, "grad_norm": 3.0795364379882812, "learning_rate": 7.275611937371002e-06, "loss": 1.0693, "step": 27780 }, { "epoch": 1.6558588627965194, "grad_norm": 3.009244203567505, "learning_rate": 7.2707000949511e-06, "loss": 1.0254, "step": 27782 }, { "epoch": 1.6559780665156754, "grad_norm": 2.7690272331237793, "learning_rate": 7.2657897811044375e-06, "loss": 0.9577, "step": 27784 }, { "epoch": 1.6560972702348313, "grad_norm": 2.7973766326904297, "learning_rate": 7.260880996006714e-06, "loss": 1.0225, "step": 27786 }, { "epoch": 1.6562164739539873, "grad_norm": 3.3126332759857178, "learning_rate": 7.255973739833482e-06, "loss": 1.0471, "step": 27788 }, { "epoch": 1.6563356776731433, "grad_norm": 3.356214761734009, "learning_rate": 7.251068012760326e-06, "loss": 1.2006, "step": 27790 }, { "epoch": 1.6564548813922995, "grad_norm": 3.229573965072632, "learning_rate": 7.246163814962747e-06, "loss": 1.1178, "step": 27792 }, { "epoch": 1.6565740851114554, "grad_norm": 2.9917478561401367, "learning_rate": 7.241261146616146e-06, "loss": 1.0081, "step": 27794 }, { "epoch": 1.6566932888306116, "grad_norm": 3.2796874046325684, "learning_rate": 7.236360007895937e-06, "loss": 1.1025, "step": 27796 }, { "epoch": 1.6568124925497676, "grad_norm": 3.362291097640991, "learning_rate": 7.231460398977452e-06, "loss": 1.0319, "step": 27798 }, { "epoch": 1.6569316962689236, "grad_norm": 3.37679386138916, "learning_rate": 7.226562320035957e-06, "loss": 1.0638, "step": 27800 }, { "epoch": 1.6570508999880795, "grad_norm": 3.464747190475464, "learning_rate": 7.2216657712466605e-06, "loss": 1.0348, "step": 27802 }, { "epoch": 1.6571701037072357, "grad_norm": 3.463759422302246, "learning_rate": 7.216770752784774e-06, "loss": 1.1069, "step": 27804 }, { "epoch": 1.6572893074263917, "grad_norm": 3.4039692878723145, "learning_rate": 7.211877264825362e-06, "loss": 1.2105, "step": 27806 }, { "epoch": 1.6574085111455479, "grad_norm": 3.7220232486724854, "learning_rate": 7.206985307543484e-06, "loss": 1.1426, "step": 27808 }, { "epoch": 1.6575277148647038, "grad_norm": 3.0955779552459717, "learning_rate": 7.2020948811141856e-06, "loss": 1.0227, "step": 27810 }, { "epoch": 1.6576469185838598, "grad_norm": 3.5207958221435547, "learning_rate": 7.197205985712363e-06, "loss": 0.9881, "step": 27812 }, { "epoch": 1.6577661223030158, "grad_norm": 3.2740867137908936, "learning_rate": 7.192318621512934e-06, "loss": 0.9894, "step": 27814 }, { "epoch": 1.6578853260221718, "grad_norm": 3.095412015914917, "learning_rate": 7.18743278869074e-06, "loss": 1.101, "step": 27816 }, { "epoch": 1.658004529741328, "grad_norm": 3.2366278171539307, "learning_rate": 7.182548487420554e-06, "loss": 0.9715, "step": 27818 }, { "epoch": 1.658123733460484, "grad_norm": 3.1271626949310303, "learning_rate": 7.177665717877108e-06, "loss": 1.2032, "step": 27820 }, { "epoch": 1.65824293717964, "grad_norm": 3.4157886505126953, "learning_rate": 7.172784480235073e-06, "loss": 1.288, "step": 27822 }, { "epoch": 1.658362140898796, "grad_norm": 3.3326473236083984, "learning_rate": 7.1679047746690656e-06, "loss": 1.0554, "step": 27824 }, { "epoch": 1.658481344617952, "grad_norm": 3.176920175552368, "learning_rate": 7.163026601353656e-06, "loss": 1.1804, "step": 27826 }, { "epoch": 1.658600548337108, "grad_norm": 3.2352819442749023, "learning_rate": 7.158149960463351e-06, "loss": 1.0434, "step": 27828 }, { "epoch": 1.6587197520562642, "grad_norm": 3.378994941711426, "learning_rate": 7.153274852172592e-06, "loss": 1.1463, "step": 27830 }, { "epoch": 1.6588389557754202, "grad_norm": 3.372429132461548, "learning_rate": 7.1484012766558136e-06, "loss": 1.1511, "step": 27832 }, { "epoch": 1.6589581594945764, "grad_norm": 3.4867687225341797, "learning_rate": 7.143529234087326e-06, "loss": 1.1548, "step": 27834 }, { "epoch": 1.6590773632137323, "grad_norm": 3.1557841300964355, "learning_rate": 7.138658724641417e-06, "loss": 0.9662, "step": 27836 }, { "epoch": 1.6591965669328883, "grad_norm": 3.11439847946167, "learning_rate": 7.133789748492342e-06, "loss": 1.2638, "step": 27838 }, { "epoch": 1.6593157706520443, "grad_norm": 3.325758457183838, "learning_rate": 7.1289223058142786e-06, "loss": 1.1618, "step": 27840 }, { "epoch": 1.6594349743712002, "grad_norm": 3.1272146701812744, "learning_rate": 7.124056396781342e-06, "loss": 1.1705, "step": 27842 }, { "epoch": 1.6595541780903564, "grad_norm": 3.413424015045166, "learning_rate": 7.119192021567605e-06, "loss": 1.1131, "step": 27844 }, { "epoch": 1.6596733818095124, "grad_norm": 3.147064447402954, "learning_rate": 7.114329180347091e-06, "loss": 1.0097, "step": 27846 }, { "epoch": 1.6597925855286686, "grad_norm": 3.2932381629943848, "learning_rate": 7.109467873293735e-06, "loss": 1.0072, "step": 27848 }, { "epoch": 1.6599117892478246, "grad_norm": 3.0191454887390137, "learning_rate": 7.104608100581489e-06, "loss": 0.9787, "step": 27850 }, { "epoch": 1.6600309929669805, "grad_norm": 3.3782835006713867, "learning_rate": 7.099749862384164e-06, "loss": 1.0098, "step": 27852 }, { "epoch": 1.6601501966861365, "grad_norm": 3.0861830711364746, "learning_rate": 7.09489315887556e-06, "loss": 1.0838, "step": 27854 }, { "epoch": 1.6602694004052927, "grad_norm": 3.1050472259521484, "learning_rate": 7.0900379902294424e-06, "loss": 1.0631, "step": 27856 }, { "epoch": 1.6603886041244487, "grad_norm": 2.8437888622283936, "learning_rate": 7.085184356619462e-06, "loss": 1.1736, "step": 27858 }, { "epoch": 1.6605078078436049, "grad_norm": 3.408670425415039, "learning_rate": 7.08033225821928e-06, "loss": 1.2612, "step": 27860 }, { "epoch": 1.6606270115627608, "grad_norm": 3.2955987453460693, "learning_rate": 7.0754816952024574e-06, "loss": 1.1397, "step": 27862 }, { "epoch": 1.6607462152819168, "grad_norm": 3.3871474266052246, "learning_rate": 7.0706326677425215e-06, "loss": 1.1362, "step": 27864 }, { "epoch": 1.6608654190010728, "grad_norm": 2.858217239379883, "learning_rate": 7.06578517601294e-06, "loss": 1.1406, "step": 27866 }, { "epoch": 1.6609846227202287, "grad_norm": 3.235542058944702, "learning_rate": 7.060939220187113e-06, "loss": 1.2026, "step": 27868 }, { "epoch": 1.661103826439385, "grad_norm": 3.3712682723999023, "learning_rate": 7.056094800438412e-06, "loss": 1.058, "step": 27870 }, { "epoch": 1.661223030158541, "grad_norm": 3.30920672416687, "learning_rate": 7.051251916940116e-06, "loss": 1.1475, "step": 27872 }, { "epoch": 1.661342233877697, "grad_norm": 3.4658446311950684, "learning_rate": 7.046410569865508e-06, "loss": 1.2299, "step": 27874 }, { "epoch": 1.661461437596853, "grad_norm": 3.198155164718628, "learning_rate": 7.041570759387739e-06, "loss": 1.1539, "step": 27876 }, { "epoch": 1.661580641316009, "grad_norm": 3.5106186866760254, "learning_rate": 7.036732485679975e-06, "loss": 1.1273, "step": 27878 }, { "epoch": 1.661699845035165, "grad_norm": 3.556154251098633, "learning_rate": 7.031895748915296e-06, "loss": 1.1811, "step": 27880 }, { "epoch": 1.6618190487543212, "grad_norm": 2.9819419384002686, "learning_rate": 7.027060549266701e-06, "loss": 1.1136, "step": 27882 }, { "epoch": 1.6619382524734772, "grad_norm": 2.430882215499878, "learning_rate": 7.022226886907185e-06, "loss": 1.0833, "step": 27884 }, { "epoch": 1.6620574561926333, "grad_norm": 3.221968173980713, "learning_rate": 7.017394762009666e-06, "loss": 1.0775, "step": 27886 }, { "epoch": 1.6621766599117893, "grad_norm": 3.3421339988708496, "learning_rate": 7.0125641747469995e-06, "loss": 1.1274, "step": 27888 }, { "epoch": 1.6622958636309453, "grad_norm": 3.4538748264312744, "learning_rate": 7.007735125291992e-06, "loss": 1.041, "step": 27890 }, { "epoch": 1.6624150673501012, "grad_norm": 3.334138870239258, "learning_rate": 7.002907613817394e-06, "loss": 1.1312, "step": 27892 }, { "epoch": 1.6625342710692572, "grad_norm": 3.042785882949829, "learning_rate": 6.998081640495902e-06, "loss": 1.1978, "step": 27894 }, { "epoch": 1.6626534747884134, "grad_norm": 2.9716708660125732, "learning_rate": 6.993257205500165e-06, "loss": 1.0982, "step": 27896 }, { "epoch": 1.6627726785075696, "grad_norm": 2.949033498764038, "learning_rate": 6.988434309002762e-06, "loss": 1.1056, "step": 27898 }, { "epoch": 1.6628918822267256, "grad_norm": 3.320861339569092, "learning_rate": 6.98361295117621e-06, "loss": 1.0337, "step": 27900 }, { "epoch": 1.6630110859458815, "grad_norm": 3.1480789184570312, "learning_rate": 6.978793132193018e-06, "loss": 1.0286, "step": 27902 }, { "epoch": 1.6631302896650375, "grad_norm": 2.9676525592803955, "learning_rate": 6.97397485222559e-06, "loss": 1.151, "step": 27904 }, { "epoch": 1.6632494933841935, "grad_norm": 3.26701021194458, "learning_rate": 6.969158111446289e-06, "loss": 1.1523, "step": 27906 }, { "epoch": 1.6633686971033497, "grad_norm": 3.0896823406219482, "learning_rate": 6.964342910027438e-06, "loss": 1.0542, "step": 27908 }, { "epoch": 1.6634879008225056, "grad_norm": 3.6199445724487305, "learning_rate": 6.959529248141284e-06, "loss": 1.085, "step": 27910 }, { "epoch": 1.6636071045416618, "grad_norm": 3.202230453491211, "learning_rate": 6.954717125960025e-06, "loss": 1.0627, "step": 27912 }, { "epoch": 1.6637263082608178, "grad_norm": 3.0101993083953857, "learning_rate": 6.949906543655821e-06, "loss": 1.1528, "step": 27914 }, { "epoch": 1.6638455119799738, "grad_norm": 3.4064996242523193, "learning_rate": 6.945097501400749e-06, "loss": 1.1995, "step": 27916 }, { "epoch": 1.6639647156991297, "grad_norm": 3.108566999435425, "learning_rate": 6.940289999366839e-06, "loss": 0.9647, "step": 27918 }, { "epoch": 1.6640839194182857, "grad_norm": 3.200329303741455, "learning_rate": 6.935484037726103e-06, "loss": 1.0471, "step": 27920 }, { "epoch": 1.664203123137442, "grad_norm": 3.357536554336548, "learning_rate": 6.930679616650427e-06, "loss": 1.1011, "step": 27922 }, { "epoch": 1.664322326856598, "grad_norm": 3.23634934425354, "learning_rate": 6.925876736311715e-06, "loss": 1.0641, "step": 27924 }, { "epoch": 1.664441530575754, "grad_norm": 3.2227466106414795, "learning_rate": 6.921075396881776e-06, "loss": 1.022, "step": 27926 }, { "epoch": 1.66456073429491, "grad_norm": 2.9193928241729736, "learning_rate": 6.91627559853234e-06, "loss": 1.1776, "step": 27928 }, { "epoch": 1.664679938014066, "grad_norm": 2.7371411323547363, "learning_rate": 6.911477341435152e-06, "loss": 1.2668, "step": 27930 }, { "epoch": 1.664799141733222, "grad_norm": 3.1131322383880615, "learning_rate": 6.906680625761841e-06, "loss": 1.2398, "step": 27932 }, { "epoch": 1.6649183454523782, "grad_norm": 3.070873737335205, "learning_rate": 6.9018854516840035e-06, "loss": 1.1172, "step": 27934 }, { "epoch": 1.6650375491715341, "grad_norm": 3.3139708042144775, "learning_rate": 6.897091819373174e-06, "loss": 1.1468, "step": 27936 }, { "epoch": 1.6651567528906903, "grad_norm": 3.2228779792785645, "learning_rate": 6.892299729000868e-06, "loss": 0.9982, "step": 27938 }, { "epoch": 1.6652759566098463, "grad_norm": 2.7578179836273193, "learning_rate": 6.887509180738483e-06, "loss": 1.0909, "step": 27940 }, { "epoch": 1.6653951603290023, "grad_norm": 2.9261627197265625, "learning_rate": 6.88272017475739e-06, "loss": 1.0902, "step": 27942 }, { "epoch": 1.6655143640481582, "grad_norm": 3.0260977745056152, "learning_rate": 6.877932711228941e-06, "loss": 1.1917, "step": 27944 }, { "epoch": 1.6656335677673142, "grad_norm": 2.952463150024414, "learning_rate": 6.8731467903243586e-06, "loss": 1.09, "step": 27946 }, { "epoch": 1.6657527714864704, "grad_norm": 3.139643669128418, "learning_rate": 6.868362412214879e-06, "loss": 0.9443, "step": 27948 }, { "epoch": 1.6658719752056266, "grad_norm": 3.3496999740600586, "learning_rate": 6.8635795770716545e-06, "loss": 1.2095, "step": 27950 }, { "epoch": 1.6659911789247825, "grad_norm": 2.7755539417266846, "learning_rate": 6.858798285065776e-06, "loss": 1.0501, "step": 27952 }, { "epoch": 1.6661103826439385, "grad_norm": 3.4171392917633057, "learning_rate": 6.854018536368289e-06, "loss": 1.2052, "step": 27954 }, { "epoch": 1.6662295863630945, "grad_norm": 3.4117298126220703, "learning_rate": 6.849240331150186e-06, "loss": 0.9788, "step": 27956 }, { "epoch": 1.6663487900822505, "grad_norm": 3.523181676864624, "learning_rate": 6.844463669582391e-06, "loss": 1.0685, "step": 27958 }, { "epoch": 1.6664679938014066, "grad_norm": 3.2150933742523193, "learning_rate": 6.839688551835782e-06, "loss": 1.0557, "step": 27960 }, { "epoch": 1.6665871975205626, "grad_norm": 3.4815049171447754, "learning_rate": 6.834914978081192e-06, "loss": 1.0459, "step": 27962 }, { "epoch": 1.6667064012397188, "grad_norm": 3.2478134632110596, "learning_rate": 6.830142948489365e-06, "loss": 0.8834, "step": 27964 }, { "epoch": 1.6668256049588748, "grad_norm": 3.55788516998291, "learning_rate": 6.825372463231039e-06, "loss": 1.2136, "step": 27966 }, { "epoch": 1.6669448086780307, "grad_norm": 2.93276047706604, "learning_rate": 6.8206035224768646e-06, "loss": 1.0455, "step": 27968 }, { "epoch": 1.6670640123971867, "grad_norm": 3.1995465755462646, "learning_rate": 6.8158361263974406e-06, "loss": 1.1505, "step": 27970 }, { "epoch": 1.6671832161163427, "grad_norm": 3.2377829551696777, "learning_rate": 6.811070275163306e-06, "loss": 1.0038, "step": 27972 }, { "epoch": 1.6673024198354989, "grad_norm": 3.2783212661743164, "learning_rate": 6.806305968944959e-06, "loss": 1.111, "step": 27974 }, { "epoch": 1.667421623554655, "grad_norm": 3.0726983547210693, "learning_rate": 6.80154320791283e-06, "loss": 1.2079, "step": 27976 }, { "epoch": 1.667540827273811, "grad_norm": 3.4843316078186035, "learning_rate": 6.796781992237305e-06, "loss": 1.0329, "step": 27978 }, { "epoch": 1.667660030992967, "grad_norm": 3.558866262435913, "learning_rate": 6.792022322088709e-06, "loss": 1.2213, "step": 27980 }, { "epoch": 1.667779234712123, "grad_norm": 3.159877061843872, "learning_rate": 6.787264197637294e-06, "loss": 1.073, "step": 27982 }, { "epoch": 1.667898438431279, "grad_norm": 3.5671815872192383, "learning_rate": 6.782507619053314e-06, "loss": 1.2043, "step": 27984 }, { "epoch": 1.6680176421504351, "grad_norm": 3.4688918590545654, "learning_rate": 6.777752586506891e-06, "loss": 1.0259, "step": 27986 }, { "epoch": 1.668136845869591, "grad_norm": 3.2020926475524902, "learning_rate": 6.772999100168132e-06, "loss": 1.1158, "step": 27988 }, { "epoch": 1.6682560495887473, "grad_norm": 2.991262674331665, "learning_rate": 6.768247160207114e-06, "loss": 1.0785, "step": 27990 }, { "epoch": 1.6683752533079033, "grad_norm": 2.975545644760132, "learning_rate": 6.763496766793792e-06, "loss": 1.2691, "step": 27992 }, { "epoch": 1.6684944570270592, "grad_norm": 3.6980412006378174, "learning_rate": 6.758747920098135e-06, "loss": 1.1744, "step": 27994 }, { "epoch": 1.6686136607462152, "grad_norm": 3.4868106842041016, "learning_rate": 6.754000620290007e-06, "loss": 1.2252, "step": 27996 }, { "epoch": 1.6687328644653712, "grad_norm": 3.55869197845459, "learning_rate": 6.7492548675392455e-06, "loss": 1.1105, "step": 27998 }, { "epoch": 1.6688520681845274, "grad_norm": 3.6085662841796875, "learning_rate": 6.744510662015619e-06, "loss": 1.1353, "step": 28000 }, { "epoch": 1.6689712719036836, "grad_norm": 3.0887694358825684, "learning_rate": 6.739768003888841e-06, "loss": 1.1703, "step": 28002 }, { "epoch": 1.6690904756228395, "grad_norm": 2.820408582687378, "learning_rate": 6.7350268933285776e-06, "loss": 0.9606, "step": 28004 }, { "epoch": 1.6692096793419955, "grad_norm": 3.4807894229888916, "learning_rate": 6.73028733050442e-06, "loss": 1.1609, "step": 28006 }, { "epoch": 1.6693288830611515, "grad_norm": 3.439115285873413, "learning_rate": 6.725549315585949e-06, "loss": 1.0589, "step": 28008 }, { "epoch": 1.6694480867803074, "grad_norm": 3.6035170555114746, "learning_rate": 6.720812848742624e-06, "loss": 1.258, "step": 28010 }, { "epoch": 1.6695672904994636, "grad_norm": 3.2850613594055176, "learning_rate": 6.71607793014391e-06, "loss": 1.0279, "step": 28012 }, { "epoch": 1.6696864942186196, "grad_norm": 3.4551448822021484, "learning_rate": 6.711344559959182e-06, "loss": 1.1496, "step": 28014 }, { "epoch": 1.6698056979377758, "grad_norm": 3.21384334564209, "learning_rate": 6.70661273835777e-06, "loss": 1.0306, "step": 28016 }, { "epoch": 1.6699249016569317, "grad_norm": 3.377439498901367, "learning_rate": 6.701882465508946e-06, "loss": 1.1399, "step": 28018 }, { "epoch": 1.6700441053760877, "grad_norm": 3.5354368686676025, "learning_rate": 6.697153741581929e-06, "loss": 1.2706, "step": 28020 }, { "epoch": 1.6701633090952437, "grad_norm": 3.3043079376220703, "learning_rate": 6.692426566745885e-06, "loss": 1.1599, "step": 28022 }, { "epoch": 1.6702825128143997, "grad_norm": 3.272529363632202, "learning_rate": 6.687700941169917e-06, "loss": 1.2293, "step": 28024 }, { "epoch": 1.6704017165335558, "grad_norm": 3.1792235374450684, "learning_rate": 6.682976865023077e-06, "loss": 1.1808, "step": 28026 }, { "epoch": 1.670520920252712, "grad_norm": 3.4935789108276367, "learning_rate": 6.678254338474349e-06, "loss": 1.1872, "step": 28028 }, { "epoch": 1.670640123971868, "grad_norm": 3.369053602218628, "learning_rate": 6.673533361692713e-06, "loss": 1.1959, "step": 28030 }, { "epoch": 1.670759327691024, "grad_norm": 3.012500524520874, "learning_rate": 6.668813934847018e-06, "loss": 1.082, "step": 28032 }, { "epoch": 1.67087853141018, "grad_norm": 3.1026060581207275, "learning_rate": 6.664096058106089e-06, "loss": 1.0496, "step": 28034 }, { "epoch": 1.670997735129336, "grad_norm": 3.479597330093384, "learning_rate": 6.659379731638732e-06, "loss": 1.0239, "step": 28036 }, { "epoch": 1.671116938848492, "grad_norm": 3.7203264236450195, "learning_rate": 6.654664955613643e-06, "loss": 1.1801, "step": 28038 }, { "epoch": 1.671236142567648, "grad_norm": 3.5110905170440674, "learning_rate": 6.6499517301995e-06, "loss": 1.1728, "step": 28040 }, { "epoch": 1.6713553462868043, "grad_norm": 3.466303825378418, "learning_rate": 6.645240055564905e-06, "loss": 1.0871, "step": 28042 }, { "epoch": 1.6714745500059602, "grad_norm": 3.013765573501587, "learning_rate": 6.640529931878403e-06, "loss": 1.0918, "step": 28044 }, { "epoch": 1.6715937537251162, "grad_norm": 3.000623941421509, "learning_rate": 6.635821359308503e-06, "loss": 1.0373, "step": 28046 }, { "epoch": 1.6717129574442722, "grad_norm": 3.112705707550049, "learning_rate": 6.631114338023642e-06, "loss": 0.9825, "step": 28048 }, { "epoch": 1.6718321611634281, "grad_norm": 3.4371871948242188, "learning_rate": 6.626408868192208e-06, "loss": 1.0916, "step": 28050 }, { "epoch": 1.6719513648825843, "grad_norm": 3.356935739517212, "learning_rate": 6.6217049499825156e-06, "loss": 1.0692, "step": 28052 }, { "epoch": 1.6720705686017405, "grad_norm": 3.673341989517212, "learning_rate": 6.6170025835628755e-06, "loss": 1.0769, "step": 28054 }, { "epoch": 1.6721897723208965, "grad_norm": 3.3224706649780273, "learning_rate": 6.6123017691014645e-06, "loss": 1.083, "step": 28056 }, { "epoch": 1.6723089760400525, "grad_norm": 3.154831647872925, "learning_rate": 6.607602506766481e-06, "loss": 1.1286, "step": 28058 }, { "epoch": 1.6724281797592084, "grad_norm": 3.501093626022339, "learning_rate": 6.602904796726033e-06, "loss": 1.1555, "step": 28060 }, { "epoch": 1.6725473834783644, "grad_norm": 3.6799099445343018, "learning_rate": 6.5982086391481404e-06, "loss": 1.1606, "step": 28062 }, { "epoch": 1.6726665871975206, "grad_norm": 3.1251296997070312, "learning_rate": 6.593514034200832e-06, "loss": 0.9591, "step": 28064 }, { "epoch": 1.6727857909166766, "grad_norm": 3.173851490020752, "learning_rate": 6.5888209820520404e-06, "loss": 1.3033, "step": 28066 }, { "epoch": 1.6729049946358328, "grad_norm": 2.877312183380127, "learning_rate": 6.58412948286965e-06, "loss": 1.0347, "step": 28068 }, { "epoch": 1.6730241983549887, "grad_norm": 3.2728357315063477, "learning_rate": 6.579439536821486e-06, "loss": 1.0192, "step": 28070 }, { "epoch": 1.6731434020741447, "grad_norm": 3.299314022064209, "learning_rate": 6.574751144075353e-06, "loss": 1.1015, "step": 28072 }, { "epoch": 1.6732626057933007, "grad_norm": 3.1529393196105957, "learning_rate": 6.570064304798928e-06, "loss": 1.1814, "step": 28074 }, { "epoch": 1.6733818095124566, "grad_norm": 2.861093282699585, "learning_rate": 6.565379019159906e-06, "loss": 0.9031, "step": 28076 }, { "epoch": 1.6735010132316128, "grad_norm": 3.2868635654449463, "learning_rate": 6.560695287325897e-06, "loss": 1.1749, "step": 28078 }, { "epoch": 1.673620216950769, "grad_norm": 2.9846088886260986, "learning_rate": 6.556013109464421e-06, "loss": 1.0279, "step": 28080 }, { "epoch": 1.673739420669925, "grad_norm": 4.192355632781982, "learning_rate": 6.551332485743012e-06, "loss": 1.1486, "step": 28082 }, { "epoch": 1.673858624389081, "grad_norm": 3.198962926864624, "learning_rate": 6.546653416329096e-06, "loss": 1.125, "step": 28084 }, { "epoch": 1.673977828108237, "grad_norm": 3.301626205444336, "learning_rate": 6.541975901390063e-06, "loss": 1.1315, "step": 28086 }, { "epoch": 1.674097031827393, "grad_norm": 3.265636444091797, "learning_rate": 6.5372999410932445e-06, "loss": 1.1038, "step": 28088 }, { "epoch": 1.674216235546549, "grad_norm": 3.3616812229156494, "learning_rate": 6.532625535605907e-06, "loss": 1.0913, "step": 28090 }, { "epoch": 1.674335439265705, "grad_norm": 3.2902579307556152, "learning_rate": 6.527952685095279e-06, "loss": 1.075, "step": 28092 }, { "epoch": 1.6744546429848612, "grad_norm": 3.152665138244629, "learning_rate": 6.523281389728525e-06, "loss": 1.249, "step": 28094 }, { "epoch": 1.6745738467040172, "grad_norm": 2.838515043258667, "learning_rate": 6.518611649672746e-06, "loss": 1.0481, "step": 28096 }, { "epoch": 1.6746930504231732, "grad_norm": 3.105095863342285, "learning_rate": 6.513943465094985e-06, "loss": 1.1642, "step": 28098 }, { "epoch": 1.6748122541423291, "grad_norm": 3.4530882835388184, "learning_rate": 6.509276836162264e-06, "loss": 1.2159, "step": 28100 }, { "epoch": 1.6749314578614851, "grad_norm": 3.28167986869812, "learning_rate": 6.504611763041513e-06, "loss": 1.0109, "step": 28102 }, { "epoch": 1.6750506615806413, "grad_norm": 3.095487594604492, "learning_rate": 6.499948245899623e-06, "loss": 1.1417, "step": 28104 }, { "epoch": 1.6751698652997975, "grad_norm": 3.2813565731048584, "learning_rate": 6.495286284903412e-06, "loss": 1.243, "step": 28106 }, { "epoch": 1.6752890690189535, "grad_norm": 3.2914845943450928, "learning_rate": 6.4906258802196585e-06, "loss": 1.1018, "step": 28108 }, { "epoch": 1.6754082727381094, "grad_norm": 3.076869010925293, "learning_rate": 6.485967032015089e-06, "loss": 1.1842, "step": 28110 }, { "epoch": 1.6755274764572654, "grad_norm": 2.973555564880371, "learning_rate": 6.481309740456354e-06, "loss": 1.1232, "step": 28112 }, { "epoch": 1.6756466801764214, "grad_norm": 3.2319130897521973, "learning_rate": 6.4766540057100754e-06, "loss": 1.0888, "step": 28114 }, { "epoch": 1.6757658838955776, "grad_norm": 2.9283034801483154, "learning_rate": 6.471999827942776e-06, "loss": 1.1137, "step": 28116 }, { "epoch": 1.6758850876147335, "grad_norm": 3.1784110069274902, "learning_rate": 6.467347207321001e-06, "loss": 1.1011, "step": 28118 }, { "epoch": 1.6760042913338897, "grad_norm": 3.087670087814331, "learning_rate": 6.462696144011149e-06, "loss": 1.0833, "step": 28120 }, { "epoch": 1.6761234950530457, "grad_norm": 2.75266432762146, "learning_rate": 6.458046638179605e-06, "loss": 1.0291, "step": 28122 }, { "epoch": 1.6762426987722017, "grad_norm": 3.025221586227417, "learning_rate": 6.453398689992734e-06, "loss": 1.076, "step": 28124 }, { "epoch": 1.6763619024913576, "grad_norm": 3.3001298904418945, "learning_rate": 6.448752299616762e-06, "loss": 1.0881, "step": 28126 }, { "epoch": 1.6764811062105136, "grad_norm": 2.9955592155456543, "learning_rate": 6.4441074672179415e-06, "loss": 1.0575, "step": 28128 }, { "epoch": 1.6766003099296698, "grad_norm": 3.1939995288848877, "learning_rate": 6.439464192962419e-06, "loss": 1.1319, "step": 28130 }, { "epoch": 1.676719513648826, "grad_norm": 3.6642417907714844, "learning_rate": 6.434822477016305e-06, "loss": 1.1405, "step": 28132 }, { "epoch": 1.676838717367982, "grad_norm": 3.312389612197876, "learning_rate": 6.4301823195456355e-06, "loss": 1.0414, "step": 28134 }, { "epoch": 1.676957921087138, "grad_norm": 3.010683298110962, "learning_rate": 6.425543720716442e-06, "loss": 1.1374, "step": 28136 }, { "epoch": 1.677077124806294, "grad_norm": 3.2080118656158447, "learning_rate": 6.4209066806946215e-06, "loss": 1.0791, "step": 28138 }, { "epoch": 1.6771963285254499, "grad_norm": 3.4414021968841553, "learning_rate": 6.416271199646068e-06, "loss": 1.1974, "step": 28140 }, { "epoch": 1.677315532244606, "grad_norm": 3.298224925994873, "learning_rate": 6.411637277736632e-06, "loss": 1.186, "step": 28142 }, { "epoch": 1.677434735963762, "grad_norm": 3.2687265872955322, "learning_rate": 6.407004915132047e-06, "loss": 1.0906, "step": 28144 }, { "epoch": 1.6775539396829182, "grad_norm": 3.056220531463623, "learning_rate": 6.402374111998055e-06, "loss": 1.0729, "step": 28146 }, { "epoch": 1.6776731434020742, "grad_norm": 3.2524681091308594, "learning_rate": 6.3977448685003105e-06, "loss": 1.012, "step": 28148 }, { "epoch": 1.6777923471212302, "grad_norm": 3.499119281768799, "learning_rate": 6.39311718480442e-06, "loss": 1.1431, "step": 28150 }, { "epoch": 1.6779115508403861, "grad_norm": 3.333313465118408, "learning_rate": 6.388491061075924e-06, "loss": 1.0702, "step": 28152 }, { "epoch": 1.678030754559542, "grad_norm": 3.1619749069213867, "learning_rate": 6.3838664974803176e-06, "loss": 1.1129, "step": 28154 }, { "epoch": 1.6781499582786983, "grad_norm": 3.0978922843933105, "learning_rate": 6.379243494183035e-06, "loss": 1.0697, "step": 28156 }, { "epoch": 1.6782691619978545, "grad_norm": 2.951766014099121, "learning_rate": 6.374622051349466e-06, "loss": 1.0169, "step": 28158 }, { "epoch": 1.6783883657170104, "grad_norm": 3.1750292778015137, "learning_rate": 6.370002169144923e-06, "loss": 1.0399, "step": 28160 }, { "epoch": 1.6785075694361664, "grad_norm": 3.3466851711273193, "learning_rate": 6.365383847734674e-06, "loss": 1.178, "step": 28162 }, { "epoch": 1.6786267731553224, "grad_norm": 3.213616132736206, "learning_rate": 6.360767087283947e-06, "loss": 1.149, "step": 28164 }, { "epoch": 1.6787459768744784, "grad_norm": 2.9340007305145264, "learning_rate": 6.356151887957901e-06, "loss": 1.1492, "step": 28166 }, { "epoch": 1.6788651805936345, "grad_norm": 3.046330213546753, "learning_rate": 6.351538249921607e-06, "loss": 1.0113, "step": 28168 }, { "epoch": 1.6789843843127905, "grad_norm": 2.9666576385498047, "learning_rate": 6.346926173340146e-06, "loss": 1.0367, "step": 28170 }, { "epoch": 1.6791035880319467, "grad_norm": 3.2843873500823975, "learning_rate": 6.342315658378489e-06, "loss": 1.0528, "step": 28172 }, { "epoch": 1.6792227917511027, "grad_norm": 3.0572309494018555, "learning_rate": 6.337706705201574e-06, "loss": 1.1078, "step": 28174 }, { "epoch": 1.6793419954702586, "grad_norm": 3.630185127258301, "learning_rate": 6.3330993139742835e-06, "loss": 1.1324, "step": 28176 }, { "epoch": 1.6794611991894146, "grad_norm": 3.3064420223236084, "learning_rate": 6.328493484861431e-06, "loss": 1.1672, "step": 28178 }, { "epoch": 1.6795804029085708, "grad_norm": 3.44988751411438, "learning_rate": 6.32388921802779e-06, "loss": 1.1564, "step": 28180 }, { "epoch": 1.6796996066277268, "grad_norm": 3.5721888542175293, "learning_rate": 6.319286513638062e-06, "loss": 1.1041, "step": 28182 }, { "epoch": 1.679818810346883, "grad_norm": 3.2233378887176514, "learning_rate": 6.3146853718569155e-06, "loss": 1.133, "step": 28184 }, { "epoch": 1.679938014066039, "grad_norm": 3.374795436859131, "learning_rate": 6.3100857928489244e-06, "loss": 1.0604, "step": 28186 }, { "epoch": 1.680057217785195, "grad_norm": 3.0866923332214355, "learning_rate": 6.305487776778668e-06, "loss": 1.1861, "step": 28188 }, { "epoch": 1.6801764215043509, "grad_norm": 3.217057466506958, "learning_rate": 6.300891323810598e-06, "loss": 1.1886, "step": 28190 }, { "epoch": 1.6802956252235068, "grad_norm": 3.379953145980835, "learning_rate": 6.296296434109167e-06, "loss": 1.1097, "step": 28192 }, { "epoch": 1.680414828942663, "grad_norm": 2.9720606803894043, "learning_rate": 6.291703107838742e-06, "loss": 1.0734, "step": 28194 }, { "epoch": 1.680534032661819, "grad_norm": 3.2509617805480957, "learning_rate": 6.287111345163649e-06, "loss": 1.1168, "step": 28196 }, { "epoch": 1.6806532363809752, "grad_norm": 3.15389084815979, "learning_rate": 6.282521146248138e-06, "loss": 1.099, "step": 28198 }, { "epoch": 1.6807724401001312, "grad_norm": 3.150956392288208, "learning_rate": 6.2779325112564294e-06, "loss": 1.0583, "step": 28200 }, { "epoch": 1.6808916438192871, "grad_norm": 3.1089913845062256, "learning_rate": 6.273345440352662e-06, "loss": 1.1235, "step": 28202 }, { "epoch": 1.681010847538443, "grad_norm": 3.0160415172576904, "learning_rate": 6.268759933700929e-06, "loss": 1.057, "step": 28204 }, { "epoch": 1.6811300512575993, "grad_norm": 2.9727299213409424, "learning_rate": 6.264175991465304e-06, "loss": 1.0459, "step": 28206 }, { "epoch": 1.6812492549767553, "grad_norm": 3.3472445011138916, "learning_rate": 6.259593613809717e-06, "loss": 1.0316, "step": 28208 }, { "epoch": 1.6813684586959114, "grad_norm": 3.4028728008270264, "learning_rate": 6.255012800898135e-06, "loss": 1.0528, "step": 28210 }, { "epoch": 1.6814876624150674, "grad_norm": 3.42303729057312, "learning_rate": 6.2504335528944304e-06, "loss": 1.0267, "step": 28212 }, { "epoch": 1.6816068661342234, "grad_norm": 3.5633883476257324, "learning_rate": 6.245855869962381e-06, "loss": 1.142, "step": 28214 }, { "epoch": 1.6817260698533794, "grad_norm": 3.549884796142578, "learning_rate": 6.241279752265783e-06, "loss": 1.1306, "step": 28216 }, { "epoch": 1.6818452735725353, "grad_norm": 3.515371561050415, "learning_rate": 6.236705199968324e-06, "loss": 1.2138, "step": 28218 }, { "epoch": 1.6819644772916915, "grad_norm": 3.3578503131866455, "learning_rate": 6.23213221323366e-06, "loss": 1.1906, "step": 28220 }, { "epoch": 1.6820836810108475, "grad_norm": 3.412027359008789, "learning_rate": 6.227560792225373e-06, "loss": 1.1384, "step": 28222 }, { "epoch": 1.6822028847300037, "grad_norm": 2.8963770866394043, "learning_rate": 6.222990937106998e-06, "loss": 0.9472, "step": 28224 }, { "epoch": 1.6823220884491596, "grad_norm": 3.1498568058013916, "learning_rate": 6.218422648042027e-06, "loss": 1.0721, "step": 28226 }, { "epoch": 1.6824412921683156, "grad_norm": 2.879117965698242, "learning_rate": 6.213855925193868e-06, "loss": 1.0249, "step": 28228 }, { "epoch": 1.6825604958874716, "grad_norm": 3.5986146926879883, "learning_rate": 6.209290768725901e-06, "loss": 1.1109, "step": 28230 }, { "epoch": 1.6826796996066278, "grad_norm": 2.9140920639038086, "learning_rate": 6.204727178801417e-06, "loss": 1.0832, "step": 28232 }, { "epoch": 1.6827989033257837, "grad_norm": 3.4214587211608887, "learning_rate": 6.200165155583698e-06, "loss": 1.2178, "step": 28234 }, { "epoch": 1.68291810704494, "grad_norm": 3.21989369392395, "learning_rate": 6.19560469923593e-06, "loss": 1.0984, "step": 28236 }, { "epoch": 1.683037310764096, "grad_norm": 3.385669469833374, "learning_rate": 6.191045809921259e-06, "loss": 1.2389, "step": 28238 }, { "epoch": 1.6831565144832519, "grad_norm": 3.1799423694610596, "learning_rate": 6.186488487802766e-06, "loss": 1.1003, "step": 28240 }, { "epoch": 1.6832757182024078, "grad_norm": 3.362257719039917, "learning_rate": 6.1819327330434885e-06, "loss": 1.2141, "step": 28242 }, { "epoch": 1.6833949219215638, "grad_norm": 3.5866239070892334, "learning_rate": 6.177378545806401e-06, "loss": 1.1471, "step": 28244 }, { "epoch": 1.68351412564072, "grad_norm": 3.2790141105651855, "learning_rate": 6.172825926254416e-06, "loss": 1.1071, "step": 28246 }, { "epoch": 1.683633329359876, "grad_norm": 2.9699466228485107, "learning_rate": 6.168274874550406e-06, "loss": 1.0706, "step": 28248 }, { "epoch": 1.6837525330790322, "grad_norm": 3.2990500926971436, "learning_rate": 6.163725390857161e-06, "loss": 1.1649, "step": 28250 }, { "epoch": 1.6838717367981881, "grad_norm": 3.2964730262756348, "learning_rate": 6.159177475337463e-06, "loss": 1.2105, "step": 28252 }, { "epoch": 1.683990940517344, "grad_norm": 3.626694440841675, "learning_rate": 6.154631128153959e-06, "loss": 1.1559, "step": 28254 }, { "epoch": 1.6841101442365, "grad_norm": 3.2563812732696533, "learning_rate": 6.150086349469336e-06, "loss": 1.1385, "step": 28256 }, { "epoch": 1.6842293479556563, "grad_norm": 2.8976306915283203, "learning_rate": 6.145543139446158e-06, "loss": 1.1102, "step": 28258 }, { "epoch": 1.6843485516748122, "grad_norm": 3.356719970703125, "learning_rate": 6.141001498246929e-06, "loss": 1.0148, "step": 28260 }, { "epoch": 1.6844677553939684, "grad_norm": 2.809983015060425, "learning_rate": 6.136461426034152e-06, "loss": 1.0669, "step": 28262 }, { "epoch": 1.6845869591131244, "grad_norm": 3.093348979949951, "learning_rate": 6.131922922970223e-06, "loss": 1.0839, "step": 28264 }, { "epoch": 1.6847061628322804, "grad_norm": 3.120568037033081, "learning_rate": 6.12738598921751e-06, "loss": 1.0166, "step": 28266 }, { "epoch": 1.6848253665514363, "grad_norm": 3.3146936893463135, "learning_rate": 6.122850624938292e-06, "loss": 1.0186, "step": 28268 }, { "epoch": 1.6849445702705923, "grad_norm": 2.995234251022339, "learning_rate": 6.118316830294857e-06, "loss": 1.0525, "step": 28270 }, { "epoch": 1.6850637739897485, "grad_norm": 3.421642780303955, "learning_rate": 6.113784605449352e-06, "loss": 1.1997, "step": 28272 }, { "epoch": 1.6851829777089047, "grad_norm": 3.5289461612701416, "learning_rate": 6.10925395056392e-06, "loss": 1.007, "step": 28274 }, { "epoch": 1.6853021814280607, "grad_norm": 2.925356388092041, "learning_rate": 6.104724865800665e-06, "loss": 1.1564, "step": 28276 }, { "epoch": 1.6854213851472166, "grad_norm": 3.062495470046997, "learning_rate": 6.100197351321568e-06, "loss": 1.2445, "step": 28278 }, { "epoch": 1.6855405888663726, "grad_norm": 3.4631972312927246, "learning_rate": 6.095671407288622e-06, "loss": 1.2093, "step": 28280 }, { "epoch": 1.6856597925855286, "grad_norm": 3.1803395748138428, "learning_rate": 6.091147033863731e-06, "loss": 1.0365, "step": 28282 }, { "epoch": 1.6857789963046848, "grad_norm": 3.188992977142334, "learning_rate": 6.0866242312087375e-06, "loss": 1.0703, "step": 28284 }, { "epoch": 1.6858982000238407, "grad_norm": 3.414238452911377, "learning_rate": 6.082102999485445e-06, "loss": 1.1981, "step": 28286 }, { "epoch": 1.686017403742997, "grad_norm": 3.0243163108825684, "learning_rate": 6.077583338855592e-06, "loss": 1.0741, "step": 28288 }, { "epoch": 1.6861366074621529, "grad_norm": 3.3262176513671875, "learning_rate": 6.073065249480864e-06, "loss": 1.3371, "step": 28290 }, { "epoch": 1.6862558111813089, "grad_norm": 3.725574493408203, "learning_rate": 6.068548731522883e-06, "loss": 1.0386, "step": 28292 }, { "epoch": 1.6863750149004648, "grad_norm": 2.73568058013916, "learning_rate": 6.064033785143225e-06, "loss": 1.1098, "step": 28294 }, { "epoch": 1.6864942186196208, "grad_norm": 3.4662933349609375, "learning_rate": 6.0595204105033945e-06, "loss": 1.1187, "step": 28296 }, { "epoch": 1.686613422338777, "grad_norm": 3.29789400100708, "learning_rate": 6.0550086077648725e-06, "loss": 1.024, "step": 28298 }, { "epoch": 1.6867326260579332, "grad_norm": 3.1482508182525635, "learning_rate": 6.050498377089042e-06, "loss": 1.1251, "step": 28300 }, { "epoch": 1.6868518297770891, "grad_norm": 3.1898529529571533, "learning_rate": 6.045989718637263e-06, "loss": 1.0241, "step": 28302 }, { "epoch": 1.686971033496245, "grad_norm": 3.293137788772583, "learning_rate": 6.0414826325708165e-06, "loss": 1.1558, "step": 28304 }, { "epoch": 1.687090237215401, "grad_norm": 3.490832805633545, "learning_rate": 6.036977119050935e-06, "loss": 1.1727, "step": 28306 }, { "epoch": 1.687209440934557, "grad_norm": 3.365476369857788, "learning_rate": 6.032473178238801e-06, "loss": 1.0782, "step": 28308 }, { "epoch": 1.6873286446537132, "grad_norm": 3.4125170707702637, "learning_rate": 6.027970810295536e-06, "loss": 1.0859, "step": 28310 }, { "epoch": 1.6874478483728692, "grad_norm": 3.171161651611328, "learning_rate": 6.023470015382204e-06, "loss": 1.2477, "step": 28312 }, { "epoch": 1.6875670520920254, "grad_norm": 3.435692548751831, "learning_rate": 6.018970793659806e-06, "loss": 1.3982, "step": 28314 }, { "epoch": 1.6876862558111814, "grad_norm": 2.9974911212921143, "learning_rate": 6.014473145289318e-06, "loss": 1.0431, "step": 28316 }, { "epoch": 1.6878054595303373, "grad_norm": 3.4208626747131348, "learning_rate": 6.009977070431616e-06, "loss": 1.0751, "step": 28318 }, { "epoch": 1.6879246632494933, "grad_norm": 3.008970260620117, "learning_rate": 6.005482569247528e-06, "loss": 1.1168, "step": 28320 }, { "epoch": 1.6880438669686493, "grad_norm": 3.1426029205322266, "learning_rate": 6.000989641897875e-06, "loss": 1.0344, "step": 28322 }, { "epoch": 1.6881630706878055, "grad_norm": 3.4204115867614746, "learning_rate": 5.996498288543345e-06, "loss": 1.0557, "step": 28324 }, { "epoch": 1.6882822744069617, "grad_norm": 2.989586353302002, "learning_rate": 5.992008509344638e-06, "loss": 1.1752, "step": 28326 }, { "epoch": 1.6884014781261176, "grad_norm": 3.310028553009033, "learning_rate": 5.987520304462363e-06, "loss": 1.142, "step": 28328 }, { "epoch": 1.6885206818452736, "grad_norm": 3.346337080001831, "learning_rate": 5.983033674057065e-06, "loss": 1.0353, "step": 28330 }, { "epoch": 1.6886398855644296, "grad_norm": 3.1852290630340576, "learning_rate": 5.978548618289259e-06, "loss": 0.9213, "step": 28332 }, { "epoch": 1.6887590892835855, "grad_norm": 3.2804503440856934, "learning_rate": 5.97406513731939e-06, "loss": 1.0714, "step": 28334 }, { "epoch": 1.6888782930027417, "grad_norm": 3.278881788253784, "learning_rate": 5.96958323130784e-06, "loss": 1.1646, "step": 28336 }, { "epoch": 1.6889974967218977, "grad_norm": 3.1775777339935303, "learning_rate": 5.9651029004149375e-06, "loss": 1.1738, "step": 28338 }, { "epoch": 1.6891167004410539, "grad_norm": 3.4527828693389893, "learning_rate": 5.960624144800986e-06, "loss": 1.2293, "step": 28340 }, { "epoch": 1.6892359041602099, "grad_norm": 3.207692861557007, "learning_rate": 5.956146964626169e-06, "loss": 1.16, "step": 28342 }, { "epoch": 1.6893551078793658, "grad_norm": 3.410485029220581, "learning_rate": 5.951671360050681e-06, "loss": 1.1626, "step": 28344 }, { "epoch": 1.6894743115985218, "grad_norm": 2.872162342071533, "learning_rate": 5.9471973312346274e-06, "loss": 1.1326, "step": 28346 }, { "epoch": 1.6895935153176778, "grad_norm": 3.3457236289978027, "learning_rate": 5.942724878338029e-06, "loss": 1.1005, "step": 28348 }, { "epoch": 1.689712719036834, "grad_norm": 3.346360683441162, "learning_rate": 5.938254001520915e-06, "loss": 1.0929, "step": 28350 }, { "epoch": 1.6898319227559901, "grad_norm": 3.022994041442871, "learning_rate": 5.933784700943207e-06, "loss": 1.1164, "step": 28352 }, { "epoch": 1.6899511264751461, "grad_norm": 3.5358283519744873, "learning_rate": 5.929316976764793e-06, "loss": 1.1438, "step": 28354 }, { "epoch": 1.690070330194302, "grad_norm": 2.845301628112793, "learning_rate": 5.924850829145501e-06, "loss": 1.1291, "step": 28356 }, { "epoch": 1.690189533913458, "grad_norm": 3.2579405307769775, "learning_rate": 5.920386258245092e-06, "loss": 1.002, "step": 28358 }, { "epoch": 1.690308737632614, "grad_norm": 3.5771090984344482, "learning_rate": 5.915923264223272e-06, "loss": 1.0587, "step": 28360 }, { "epoch": 1.6904279413517702, "grad_norm": 3.3551127910614014, "learning_rate": 5.911461847239719e-06, "loss": 0.984, "step": 28362 }, { "epoch": 1.6905471450709262, "grad_norm": 3.0120813846588135, "learning_rate": 5.907002007454038e-06, "loss": 1.1545, "step": 28364 }, { "epoch": 1.6906663487900824, "grad_norm": 2.9668843746185303, "learning_rate": 5.902543745025735e-06, "loss": 0.9676, "step": 28366 }, { "epoch": 1.6907855525092383, "grad_norm": 3.276416778564453, "learning_rate": 5.898087060114327e-06, "loss": 1.1151, "step": 28368 }, { "epoch": 1.6909047562283943, "grad_norm": 2.993116855621338, "learning_rate": 5.893631952879242e-06, "loss": 1.169, "step": 28370 }, { "epoch": 1.6910239599475503, "grad_norm": 3.6207759380340576, "learning_rate": 5.889178423479846e-06, "loss": 1.0898, "step": 28372 }, { "epoch": 1.6911431636667063, "grad_norm": 3.3039798736572266, "learning_rate": 5.884726472075469e-06, "loss": 0.9814, "step": 28374 }, { "epoch": 1.6912623673858624, "grad_norm": 3.4127466678619385, "learning_rate": 5.8802760988253605e-06, "loss": 1.1378, "step": 28376 }, { "epoch": 1.6913815711050186, "grad_norm": 3.4352426528930664, "learning_rate": 5.875827303888726e-06, "loss": 1.1201, "step": 28378 }, { "epoch": 1.6915007748241746, "grad_norm": 3.363372325897217, "learning_rate": 5.8713800874247215e-06, "loss": 1.0085, "step": 28380 }, { "epoch": 1.6916199785433306, "grad_norm": 2.861917495727539, "learning_rate": 5.866934449592437e-06, "loss": 1.0414, "step": 28382 }, { "epoch": 1.6917391822624865, "grad_norm": 3.199589967727661, "learning_rate": 5.862490390550895e-06, "loss": 1.1041, "step": 28384 }, { "epoch": 1.6918583859816425, "grad_norm": 3.0716114044189453, "learning_rate": 5.858047910459108e-06, "loss": 1.1105, "step": 28386 }, { "epoch": 1.6919775897007987, "grad_norm": 3.1435890197753906, "learning_rate": 5.8536070094759585e-06, "loss": 0.9867, "step": 28388 }, { "epoch": 1.6920967934199547, "grad_norm": 3.4176974296569824, "learning_rate": 5.8491676877603375e-06, "loss": 1.0463, "step": 28390 }, { "epoch": 1.6922159971391109, "grad_norm": 3.227842092514038, "learning_rate": 5.844729945471061e-06, "loss": 1.1573, "step": 28392 }, { "epoch": 1.6923352008582668, "grad_norm": 3.0766971111297607, "learning_rate": 5.840293782766854e-06, "loss": 1.0533, "step": 28394 }, { "epoch": 1.6924544045774228, "grad_norm": 3.508868455886841, "learning_rate": 5.8358591998064315e-06, "loss": 1.0525, "step": 28396 }, { "epoch": 1.6925736082965788, "grad_norm": 3.3317019939422607, "learning_rate": 5.831426196748435e-06, "loss": 1.0138, "step": 28398 }, { "epoch": 1.6926928120157347, "grad_norm": 3.4095396995544434, "learning_rate": 5.8269947737514485e-06, "loss": 1.2704, "step": 28400 }, { "epoch": 1.692812015734891, "grad_norm": 3.714782476425171, "learning_rate": 5.822564930973978e-06, "loss": 1.1287, "step": 28402 }, { "epoch": 1.6929312194540471, "grad_norm": 3.6865034103393555, "learning_rate": 5.818136668574536e-06, "loss": 1.1976, "step": 28404 }, { "epoch": 1.693050423173203, "grad_norm": 3.2442739009857178, "learning_rate": 5.813709986711502e-06, "loss": 1.027, "step": 28406 }, { "epoch": 1.693169626892359, "grad_norm": 3.459404468536377, "learning_rate": 5.809284885543232e-06, "loss": 1.1374, "step": 28408 }, { "epoch": 1.693288830611515, "grad_norm": 3.1485159397125244, "learning_rate": 5.804861365228059e-06, "loss": 1.1703, "step": 28410 }, { "epoch": 1.693408034330671, "grad_norm": 3.0119378566741943, "learning_rate": 5.800439425924186e-06, "loss": 0.9865, "step": 28412 }, { "epoch": 1.6935272380498272, "grad_norm": 3.0724329948425293, "learning_rate": 5.796019067789832e-06, "loss": 1.0035, "step": 28414 }, { "epoch": 1.6936464417689832, "grad_norm": 3.476321220397949, "learning_rate": 5.791600290983124e-06, "loss": 1.1318, "step": 28416 }, { "epoch": 1.6937656454881393, "grad_norm": 3.381281614303589, "learning_rate": 5.787183095662124e-06, "loss": 0.9536, "step": 28418 }, { "epoch": 1.6938848492072953, "grad_norm": 2.8803935050964355, "learning_rate": 5.782767481984858e-06, "loss": 1.0255, "step": 28420 }, { "epoch": 1.6940040529264513, "grad_norm": 3.1008639335632324, "learning_rate": 5.778353450109286e-06, "loss": 1.0125, "step": 28422 }, { "epoch": 1.6941232566456073, "grad_norm": 3.28293776512146, "learning_rate": 5.773941000193317e-06, "loss": 1.129, "step": 28424 }, { "epoch": 1.6942424603647632, "grad_norm": 3.4112606048583984, "learning_rate": 5.769530132394796e-06, "loss": 1.1998, "step": 28426 }, { "epoch": 1.6943616640839194, "grad_norm": 3.229219436645508, "learning_rate": 5.7651208468715136e-06, "loss": 1.0893, "step": 28428 }, { "epoch": 1.6944808678030756, "grad_norm": 3.591254472732544, "learning_rate": 5.760713143781199e-06, "loss": 1.1311, "step": 28430 }, { "epoch": 1.6946000715222316, "grad_norm": 2.8362014293670654, "learning_rate": 5.756307023281549e-06, "loss": 1.1484, "step": 28432 }, { "epoch": 1.6947192752413875, "grad_norm": 3.1741058826446533, "learning_rate": 5.751902485530169e-06, "loss": 1.0905, "step": 28434 }, { "epoch": 1.6948384789605435, "grad_norm": 3.3386802673339844, "learning_rate": 5.747499530684636e-06, "loss": 1.0581, "step": 28436 }, { "epoch": 1.6949576826796995, "grad_norm": 3.3139967918395996, "learning_rate": 5.743098158902449e-06, "loss": 1.2413, "step": 28438 }, { "epoch": 1.6950768863988557, "grad_norm": 2.952821969985962, "learning_rate": 5.738698370341067e-06, "loss": 1.2176, "step": 28440 }, { "epoch": 1.6951960901180116, "grad_norm": 3.4049885272979736, "learning_rate": 5.73430016515788e-06, "loss": 1.2065, "step": 28442 }, { "epoch": 1.6953152938371678, "grad_norm": 3.119018316268921, "learning_rate": 5.729903543510235e-06, "loss": 1.0513, "step": 28444 }, { "epoch": 1.6954344975563238, "grad_norm": 3.2102909088134766, "learning_rate": 5.725508505555405e-06, "loss": 1.092, "step": 28446 }, { "epoch": 1.6955537012754798, "grad_norm": 3.3893086910247803, "learning_rate": 5.7211150514506095e-06, "loss": 1.1754, "step": 28448 }, { "epoch": 1.6956729049946357, "grad_norm": 3.520494222640991, "learning_rate": 5.716723181353045e-06, "loss": 1.141, "step": 28450 }, { "epoch": 1.6957921087137917, "grad_norm": 3.2436275482177734, "learning_rate": 5.712332895419797e-06, "loss": 1.1386, "step": 28452 }, { "epoch": 1.695911312432948, "grad_norm": 3.3005266189575195, "learning_rate": 5.7079441938079235e-06, "loss": 1.1436, "step": 28454 }, { "epoch": 1.696030516152104, "grad_norm": 3.58610463142395, "learning_rate": 5.703557076674449e-06, "loss": 1.0761, "step": 28456 }, { "epoch": 1.69614971987126, "grad_norm": 3.3131282329559326, "learning_rate": 5.699171544176274e-06, "loss": 1.0578, "step": 28458 }, { "epoch": 1.696268923590416, "grad_norm": 3.04194712638855, "learning_rate": 5.694787596470314e-06, "loss": 1.0968, "step": 28460 }, { "epoch": 1.696388127309572, "grad_norm": 3.2401161193847656, "learning_rate": 5.690405233713397e-06, "loss": 1.0724, "step": 28462 }, { "epoch": 1.696507331028728, "grad_norm": 2.959043025970459, "learning_rate": 5.686024456062283e-06, "loss": 1.1467, "step": 28464 }, { "epoch": 1.6966265347478842, "grad_norm": 3.1586313247680664, "learning_rate": 5.681645263673685e-06, "loss": 1.1792, "step": 28466 }, { "epoch": 1.6967457384670401, "grad_norm": 3.135218620300293, "learning_rate": 5.677267656704288e-06, "loss": 1.1265, "step": 28468 }, { "epoch": 1.6968649421861963, "grad_norm": 3.335603952407837, "learning_rate": 5.672891635310668e-06, "loss": 1.1026, "step": 28470 }, { "epoch": 1.6969841459053523, "grad_norm": 3.164848804473877, "learning_rate": 5.6685171996493645e-06, "loss": 1.1697, "step": 28472 }, { "epoch": 1.6971033496245083, "grad_norm": 2.9487085342407227, "learning_rate": 5.664144349876904e-06, "loss": 1.0139, "step": 28474 }, { "epoch": 1.6972225533436642, "grad_norm": 3.1544573307037354, "learning_rate": 5.659773086149672e-06, "loss": 0.9812, "step": 28476 }, { "epoch": 1.6973417570628202, "grad_norm": 3.240812301635742, "learning_rate": 5.655403408624072e-06, "loss": 1.0288, "step": 28478 }, { "epoch": 1.6974609607819764, "grad_norm": 3.186051607131958, "learning_rate": 5.651035317456421e-06, "loss": 1.1723, "step": 28480 }, { "epoch": 1.6975801645011326, "grad_norm": 3.0190651416778564, "learning_rate": 5.646668812802969e-06, "loss": 0.9843, "step": 28482 }, { "epoch": 1.6976993682202886, "grad_norm": 2.8529224395751953, "learning_rate": 5.642303894819934e-06, "loss": 0.9941, "step": 28484 }, { "epoch": 1.6978185719394445, "grad_norm": 3.2104899883270264, "learning_rate": 5.637940563663452e-06, "loss": 1.1787, "step": 28486 }, { "epoch": 1.6979377756586005, "grad_norm": 2.9645016193389893, "learning_rate": 5.633578819489616e-06, "loss": 1.056, "step": 28488 }, { "epoch": 1.6980569793777565, "grad_norm": 3.523087739944458, "learning_rate": 5.629218662454472e-06, "loss": 1.1306, "step": 28490 }, { "epoch": 1.6981761830969127, "grad_norm": 2.908444881439209, "learning_rate": 5.624860092713985e-06, "loss": 1.1891, "step": 28492 }, { "epoch": 1.6982953868160686, "grad_norm": 3.2541513442993164, "learning_rate": 5.620503110424069e-06, "loss": 0.9744, "step": 28494 }, { "epoch": 1.6984145905352248, "grad_norm": 3.727250337600708, "learning_rate": 5.616147715740611e-06, "loss": 1.1073, "step": 28496 }, { "epoch": 1.6985337942543808, "grad_norm": 3.60917329788208, "learning_rate": 5.6117939088194185e-06, "loss": 1.3414, "step": 28498 }, { "epoch": 1.6986529979735367, "grad_norm": 3.3279905319213867, "learning_rate": 5.607441689816207e-06, "loss": 1.1701, "step": 28500 }, { "epoch": 1.6987722016926927, "grad_norm": 3.401937961578369, "learning_rate": 5.603091058886706e-06, "loss": 1.1273, "step": 28502 }, { "epoch": 1.6988914054118487, "grad_norm": 2.778597831726074, "learning_rate": 5.598742016186536e-06, "loss": 1.1415, "step": 28504 }, { "epoch": 1.6990106091310049, "grad_norm": 3.4297826290130615, "learning_rate": 5.5943945618712854e-06, "loss": 1.152, "step": 28506 }, { "epoch": 1.699129812850161, "grad_norm": 2.9202637672424316, "learning_rate": 5.590048696096472e-06, "loss": 1.1106, "step": 28508 }, { "epoch": 1.699249016569317, "grad_norm": 2.9527077674865723, "learning_rate": 5.585704419017568e-06, "loss": 1.055, "step": 28510 }, { "epoch": 1.699368220288473, "grad_norm": 3.3741629123687744, "learning_rate": 5.581361730789975e-06, "loss": 1.1104, "step": 28512 }, { "epoch": 1.699487424007629, "grad_norm": 3.447675943374634, "learning_rate": 5.577020631569052e-06, "loss": 1.1066, "step": 28514 }, { "epoch": 1.699606627726785, "grad_norm": 3.327958822250366, "learning_rate": 5.572681121510093e-06, "loss": 1.0849, "step": 28516 }, { "epoch": 1.6997258314459411, "grad_norm": 3.3301734924316406, "learning_rate": 5.568343200768322e-06, "loss": 1.1926, "step": 28518 }, { "epoch": 1.699845035165097, "grad_norm": 2.8860511779785156, "learning_rate": 5.5640068694989615e-06, "loss": 1.1558, "step": 28520 }, { "epoch": 1.6999642388842533, "grad_norm": 3.2479512691497803, "learning_rate": 5.559672127857085e-06, "loss": 1.0292, "step": 28522 }, { "epoch": 1.7000834426034093, "grad_norm": 2.996920108795166, "learning_rate": 5.5553389759978036e-06, "loss": 1.0406, "step": 28524 }, { "epoch": 1.7002026463225652, "grad_norm": 3.8250036239624023, "learning_rate": 5.551007414076109e-06, "loss": 1.2301, "step": 28526 }, { "epoch": 1.7003218500417212, "grad_norm": 2.9329802989959717, "learning_rate": 5.546677442246962e-06, "loss": 1.0048, "step": 28528 }, { "epoch": 1.7004410537608772, "grad_norm": 3.27238130569458, "learning_rate": 5.542349060665259e-06, "loss": 1.1213, "step": 28530 }, { "epoch": 1.7005602574800334, "grad_norm": 3.120102882385254, "learning_rate": 5.538022269485837e-06, "loss": 1.2276, "step": 28532 }, { "epoch": 1.7006794611991896, "grad_norm": 3.092087745666504, "learning_rate": 5.533697068863486e-06, "loss": 1.154, "step": 28534 }, { "epoch": 1.7007986649183455, "grad_norm": 3.1624720096588135, "learning_rate": 5.529373458952919e-06, "loss": 1.0304, "step": 28536 }, { "epoch": 1.7009178686375015, "grad_norm": 3.313244342803955, "learning_rate": 5.5250514399088346e-06, "loss": 1.0615, "step": 28538 }, { "epoch": 1.7010370723566575, "grad_norm": 2.926791191101074, "learning_rate": 5.5207310118858155e-06, "loss": 1.1209, "step": 28540 }, { "epoch": 1.7011562760758134, "grad_norm": 3.4287662506103516, "learning_rate": 5.5164121750384435e-06, "loss": 1.0834, "step": 28542 }, { "epoch": 1.7012754797949696, "grad_norm": 3.3696365356445312, "learning_rate": 5.512094929521211e-06, "loss": 0.9676, "step": 28544 }, { "epoch": 1.7013946835141256, "grad_norm": 3.264941692352295, "learning_rate": 5.507779275488539e-06, "loss": 1.2579, "step": 28546 }, { "epoch": 1.7015138872332818, "grad_norm": 3.4590866565704346, "learning_rate": 5.503465213094844e-06, "loss": 1.1429, "step": 28548 }, { "epoch": 1.7016330909524378, "grad_norm": 2.6439900398254395, "learning_rate": 5.49915274249444e-06, "loss": 1.1232, "step": 28550 }, { "epoch": 1.7017522946715937, "grad_norm": 3.352515697479248, "learning_rate": 5.494841863841599e-06, "loss": 1.1151, "step": 28552 }, { "epoch": 1.7018714983907497, "grad_norm": 3.137686252593994, "learning_rate": 5.490532577290536e-06, "loss": 0.9867, "step": 28554 }, { "epoch": 1.7019907021099059, "grad_norm": 3.1136622428894043, "learning_rate": 5.4862248829954114e-06, "loss": 1.1033, "step": 28556 }, { "epoch": 1.7021099058290619, "grad_norm": 3.666423797607422, "learning_rate": 5.481918781110329e-06, "loss": 1.2113, "step": 28558 }, { "epoch": 1.702229109548218, "grad_norm": 3.8504629135131836, "learning_rate": 5.477614271789311e-06, "loss": 1.1407, "step": 28560 }, { "epoch": 1.702348313267374, "grad_norm": 3.316232204437256, "learning_rate": 5.473311355186389e-06, "loss": 1.1302, "step": 28562 }, { "epoch": 1.70246751698653, "grad_norm": 3.4030256271362305, "learning_rate": 5.4690100314554404e-06, "loss": 1.2896, "step": 28564 }, { "epoch": 1.702586720705686, "grad_norm": 3.334754467010498, "learning_rate": 5.464710300750381e-06, "loss": 1.0549, "step": 28566 }, { "epoch": 1.702705924424842, "grad_norm": 3.0417656898498535, "learning_rate": 5.460412163225004e-06, "loss": 1.0592, "step": 28568 }, { "epoch": 1.7028251281439981, "grad_norm": 3.3564939498901367, "learning_rate": 5.456115619033081e-06, "loss": 1.1375, "step": 28570 }, { "epoch": 1.702944331863154, "grad_norm": 3.210617780685425, "learning_rate": 5.4518206683283e-06, "loss": 1.1597, "step": 28572 }, { "epoch": 1.7030635355823103, "grad_norm": 3.609790325164795, "learning_rate": 5.447527311264316e-06, "loss": 1.0449, "step": 28574 }, { "epoch": 1.7031827393014662, "grad_norm": 3.141559362411499, "learning_rate": 5.443235547994718e-06, "loss": 1.0378, "step": 28576 }, { "epoch": 1.7033019430206222, "grad_norm": 3.4070024490356445, "learning_rate": 5.438945378673033e-06, "loss": 1.1919, "step": 28578 }, { "epoch": 1.7034211467397782, "grad_norm": 3.323903799057007, "learning_rate": 5.434656803452731e-06, "loss": 1.1244, "step": 28580 }, { "epoch": 1.7035403504589344, "grad_norm": 2.542330026626587, "learning_rate": 5.43036982248723e-06, "loss": 0.9726, "step": 28582 }, { "epoch": 1.7036595541780903, "grad_norm": 2.9286344051361084, "learning_rate": 5.4260844359299126e-06, "loss": 0.9375, "step": 28584 }, { "epoch": 1.7037787578972465, "grad_norm": 3.2463412284851074, "learning_rate": 5.421800643934039e-06, "loss": 1.0717, "step": 28586 }, { "epoch": 1.7038979616164025, "grad_norm": 3.2084741592407227, "learning_rate": 5.417518446652892e-06, "loss": 1.1046, "step": 28588 }, { "epoch": 1.7040171653355585, "grad_norm": 2.822967767715454, "learning_rate": 5.413237844239655e-06, "loss": 1.0845, "step": 28590 }, { "epoch": 1.7041363690547144, "grad_norm": 3.442920207977295, "learning_rate": 5.408958836847433e-06, "loss": 1.2903, "step": 28592 }, { "epoch": 1.7042555727738704, "grad_norm": 3.0165417194366455, "learning_rate": 5.404681424629332e-06, "loss": 1.0932, "step": 28594 }, { "epoch": 1.7043747764930266, "grad_norm": 3.353851556777954, "learning_rate": 5.400405607738357e-06, "loss": 1.1939, "step": 28596 }, { "epoch": 1.7044939802121826, "grad_norm": 3.5478756427764893, "learning_rate": 5.396131386327469e-06, "loss": 1.1872, "step": 28598 }, { "epoch": 1.7046131839313388, "grad_norm": 2.7575340270996094, "learning_rate": 5.391858760549562e-06, "loss": 1.0692, "step": 28600 }, { "epoch": 1.7047323876504947, "grad_norm": 3.338369131088257, "learning_rate": 5.387587730557514e-06, "loss": 1.1927, "step": 28602 }, { "epoch": 1.7048515913696507, "grad_norm": 3.4489824771881104, "learning_rate": 5.38331829650408e-06, "loss": 1.2335, "step": 28604 }, { "epoch": 1.7049707950888067, "grad_norm": 3.465986967086792, "learning_rate": 5.379050458541996e-06, "loss": 1.0299, "step": 28606 }, { "epoch": 1.7050899988079629, "grad_norm": 3.116950035095215, "learning_rate": 5.374784216823969e-06, "loss": 1.0472, "step": 28608 }, { "epoch": 1.7052092025271188, "grad_norm": 3.206878662109375, "learning_rate": 5.370519571502569e-06, "loss": 1.1953, "step": 28610 }, { "epoch": 1.705328406246275, "grad_norm": 3.5176780223846436, "learning_rate": 5.366256522730395e-06, "loss": 1.128, "step": 28612 }, { "epoch": 1.705447609965431, "grad_norm": 3.551854372024536, "learning_rate": 5.361995070659937e-06, "loss": 1.2264, "step": 28614 }, { "epoch": 1.705566813684587, "grad_norm": 3.13775372505188, "learning_rate": 5.357735215443644e-06, "loss": 0.9345, "step": 28616 }, { "epoch": 1.705686017403743, "grad_norm": 3.377577304840088, "learning_rate": 5.3534769572338995e-06, "loss": 1.1306, "step": 28618 }, { "epoch": 1.705805221122899, "grad_norm": 3.541088342666626, "learning_rate": 5.349220296183044e-06, "loss": 1.0736, "step": 28620 }, { "epoch": 1.705924424842055, "grad_norm": 3.5147135257720947, "learning_rate": 5.3449652324433495e-06, "loss": 1.1797, "step": 28622 }, { "epoch": 1.706043628561211, "grad_norm": 3.3145294189453125, "learning_rate": 5.340711766167033e-06, "loss": 1.1315, "step": 28624 }, { "epoch": 1.7061628322803672, "grad_norm": 3.332644462585449, "learning_rate": 5.336459897506257e-06, "loss": 1.0127, "step": 28626 }, { "epoch": 1.7062820359995232, "grad_norm": 3.2707407474517822, "learning_rate": 5.332209626613116e-06, "loss": 1.2495, "step": 28628 }, { "epoch": 1.7064012397186792, "grad_norm": 3.0209429264068604, "learning_rate": 5.3279609536396765e-06, "loss": 1.0019, "step": 28630 }, { "epoch": 1.7065204434378352, "grad_norm": 3.3410048484802246, "learning_rate": 5.323713878737929e-06, "loss": 1.0802, "step": 28632 }, { "epoch": 1.7066396471569913, "grad_norm": 3.157959461212158, "learning_rate": 5.319468402059768e-06, "loss": 1.0361, "step": 28634 }, { "epoch": 1.7067588508761473, "grad_norm": 3.1632511615753174, "learning_rate": 5.315224523757112e-06, "loss": 1.0212, "step": 28636 }, { "epoch": 1.7068780545953035, "grad_norm": 3.2703304290771484, "learning_rate": 5.31098224398176e-06, "loss": 1.1042, "step": 28638 }, { "epoch": 1.7069972583144595, "grad_norm": 2.834261894226074, "learning_rate": 5.30674156288547e-06, "loss": 1.0159, "step": 28640 }, { "epoch": 1.7071164620336154, "grad_norm": 3.288928508758545, "learning_rate": 5.302502480619959e-06, "loss": 1.096, "step": 28642 }, { "epoch": 1.7072356657527714, "grad_norm": 3.2568259239196777, "learning_rate": 5.2982649973368605e-06, "loss": 1.2982, "step": 28644 }, { "epoch": 1.7073548694719274, "grad_norm": 3.099168300628662, "learning_rate": 5.294029113187754e-06, "loss": 1.1266, "step": 28646 }, { "epoch": 1.7074740731910836, "grad_norm": 3.48071551322937, "learning_rate": 5.289794828324213e-06, "loss": 1.1813, "step": 28648 }, { "epoch": 1.7075932769102398, "grad_norm": 3.1332285404205322, "learning_rate": 5.285562142897671e-06, "loss": 1.1132, "step": 28650 }, { "epoch": 1.7077124806293957, "grad_norm": 3.269355297088623, "learning_rate": 5.281331057059552e-06, "loss": 1.12, "step": 28652 }, { "epoch": 1.7078316843485517, "grad_norm": 3.246903657913208, "learning_rate": 5.277101570961246e-06, "loss": 1.031, "step": 28654 }, { "epoch": 1.7079508880677077, "grad_norm": 3.3421831130981445, "learning_rate": 5.272873684754015e-06, "loss": 0.9603, "step": 28656 }, { "epoch": 1.7080700917868636, "grad_norm": 3.5711941719055176, "learning_rate": 5.268647398589133e-06, "loss": 1.1483, "step": 28658 }, { "epoch": 1.7081892955060198, "grad_norm": 3.14247727394104, "learning_rate": 5.264422712617778e-06, "loss": 1.2081, "step": 28660 }, { "epoch": 1.7083084992251758, "grad_norm": 3.237276792526245, "learning_rate": 5.2601996269910855e-06, "loss": 1.221, "step": 28662 }, { "epoch": 1.708427702944332, "grad_norm": 3.2775018215179443, "learning_rate": 5.255978141860129e-06, "loss": 1.0383, "step": 28664 }, { "epoch": 1.708546906663488, "grad_norm": 3.3494601249694824, "learning_rate": 5.251758257375922e-06, "loss": 1.23, "step": 28666 }, { "epoch": 1.708666110382644, "grad_norm": 3.4669201374053955, "learning_rate": 5.247539973689425e-06, "loss": 1.0833, "step": 28668 }, { "epoch": 1.7087853141018, "grad_norm": 3.19282603263855, "learning_rate": 5.2433232909515355e-06, "loss": 1.0911, "step": 28670 }, { "epoch": 1.7089045178209559, "grad_norm": 3.2133920192718506, "learning_rate": 5.239108209313126e-06, "loss": 1.2153, "step": 28672 }, { "epoch": 1.709023721540112, "grad_norm": 2.85465931892395, "learning_rate": 5.234894728924944e-06, "loss": 1.0534, "step": 28674 }, { "epoch": 1.7091429252592683, "grad_norm": 3.5645246505737305, "learning_rate": 5.230682849937746e-06, "loss": 1.0809, "step": 28676 }, { "epoch": 1.7092621289784242, "grad_norm": 3.361682415008545, "learning_rate": 5.226472572502212e-06, "loss": 1.1628, "step": 28678 }, { "epoch": 1.7093813326975802, "grad_norm": 3.0629942417144775, "learning_rate": 5.22226389676892e-06, "loss": 1.2142, "step": 28680 }, { "epoch": 1.7095005364167362, "grad_norm": 3.097233295440674, "learning_rate": 5.218056822888468e-06, "loss": 1.166, "step": 28682 }, { "epoch": 1.7096197401358921, "grad_norm": 3.0727241039276123, "learning_rate": 5.213851351011345e-06, "loss": 0.8343, "step": 28684 }, { "epoch": 1.7097389438550483, "grad_norm": 3.2327795028686523, "learning_rate": 5.2096474812879885e-06, "loss": 1.1516, "step": 28686 }, { "epoch": 1.7098581475742043, "grad_norm": 2.975961446762085, "learning_rate": 5.205445213868793e-06, "loss": 1.2098, "step": 28688 }, { "epoch": 1.7099773512933605, "grad_norm": 3.3206632137298584, "learning_rate": 5.201244548904089e-06, "loss": 1.0659, "step": 28690 }, { "epoch": 1.7100965550125165, "grad_norm": 3.1228580474853516, "learning_rate": 5.197045486544139e-06, "loss": 1.0571, "step": 28692 }, { "epoch": 1.7102157587316724, "grad_norm": 3.5318427085876465, "learning_rate": 5.192848026939156e-06, "loss": 1.0506, "step": 28694 }, { "epoch": 1.7103349624508284, "grad_norm": 3.4906198978424072, "learning_rate": 5.188652170239322e-06, "loss": 1.1141, "step": 28696 }, { "epoch": 1.7104541661699844, "grad_norm": 3.0814268589019775, "learning_rate": 5.184457916594704e-06, "loss": 1.1472, "step": 28698 }, { "epoch": 1.7105733698891405, "grad_norm": 3.010158061981201, "learning_rate": 5.180265266155371e-06, "loss": 1.0362, "step": 28700 }, { "epoch": 1.7106925736082967, "grad_norm": 3.2150514125823975, "learning_rate": 5.176074219071297e-06, "loss": 1.2323, "step": 28702 }, { "epoch": 1.7108117773274527, "grad_norm": 3.4359378814697266, "learning_rate": 5.171884775492408e-06, "loss": 1.1607, "step": 28704 }, { "epoch": 1.7109309810466087, "grad_norm": 3.1536788940429688, "learning_rate": 5.167696935568584e-06, "loss": 1.0697, "step": 28706 }, { "epoch": 1.7110501847657646, "grad_norm": 3.149876594543457, "learning_rate": 5.163510699449631e-06, "loss": 1.1104, "step": 28708 }, { "epoch": 1.7111693884849206, "grad_norm": 3.4894704818725586, "learning_rate": 5.159326067285308e-06, "loss": 1.097, "step": 28710 }, { "epoch": 1.7112885922040768, "grad_norm": 3.2177319526672363, "learning_rate": 5.155143039225307e-06, "loss": 1.1674, "step": 28712 }, { "epoch": 1.7114077959232328, "grad_norm": 3.5396535396575928, "learning_rate": 5.150961615419281e-06, "loss": 1.0963, "step": 28714 }, { "epoch": 1.711526999642389, "grad_norm": 3.3798649311065674, "learning_rate": 5.1467817960167975e-06, "loss": 1.0131, "step": 28716 }, { "epoch": 1.711646203361545, "grad_norm": 2.9760847091674805, "learning_rate": 5.142603581167404e-06, "loss": 1.0435, "step": 28718 }, { "epoch": 1.711765407080701, "grad_norm": 3.2257726192474365, "learning_rate": 5.138426971020549e-06, "loss": 1.1789, "step": 28720 }, { "epoch": 1.7118846107998569, "grad_norm": 2.8499608039855957, "learning_rate": 5.134251965725656e-06, "loss": 1.0145, "step": 28722 }, { "epoch": 1.7120038145190128, "grad_norm": 3.5790224075317383, "learning_rate": 5.13007856543209e-06, "loss": 1.201, "step": 28724 }, { "epoch": 1.712123018238169, "grad_norm": 2.982802152633667, "learning_rate": 5.125906770289113e-06, "loss": 1.0711, "step": 28726 }, { "epoch": 1.7122422219573252, "grad_norm": 3.0450339317321777, "learning_rate": 5.121736580445996e-06, "loss": 1.1336, "step": 28728 }, { "epoch": 1.7123614256764812, "grad_norm": 3.063415765762329, "learning_rate": 5.117567996051914e-06, "loss": 1.0597, "step": 28730 }, { "epoch": 1.7124806293956372, "grad_norm": 3.093613624572754, "learning_rate": 5.113401017255987e-06, "loss": 1.0252, "step": 28732 }, { "epoch": 1.7125998331147931, "grad_norm": 3.2458832263946533, "learning_rate": 5.109235644207272e-06, "loss": 1.1116, "step": 28734 }, { "epoch": 1.712719036833949, "grad_norm": 3.3008005619049072, "learning_rate": 5.105071877054812e-06, "loss": 1.0864, "step": 28736 }, { "epoch": 1.7128382405531053, "grad_norm": 3.3274030685424805, "learning_rate": 5.100909715947522e-06, "loss": 1.0669, "step": 28738 }, { "epoch": 1.7129574442722613, "grad_norm": 3.528996706008911, "learning_rate": 5.096749161034309e-06, "loss": 1.1047, "step": 28740 }, { "epoch": 1.7130766479914175, "grad_norm": 3.0246496200561523, "learning_rate": 5.0925902124640265e-06, "loss": 0.9837, "step": 28742 }, { "epoch": 1.7131958517105734, "grad_norm": 3.374058246612549, "learning_rate": 5.088432870385429e-06, "loss": 1.1701, "step": 28744 }, { "epoch": 1.7133150554297294, "grad_norm": 3.091695785522461, "learning_rate": 5.084277134947257e-06, "loss": 1.0714, "step": 28746 }, { "epoch": 1.7134342591488854, "grad_norm": 3.0419278144836426, "learning_rate": 5.0801230062981705e-06, "loss": 0.9754, "step": 28748 }, { "epoch": 1.7135534628680413, "grad_norm": 3.542436361312866, "learning_rate": 5.075970484586773e-06, "loss": 1.1604, "step": 28750 }, { "epoch": 1.7136726665871975, "grad_norm": 3.184911012649536, "learning_rate": 5.071819569961617e-06, "loss": 1.0369, "step": 28752 }, { "epoch": 1.7137918703063537, "grad_norm": 3.49658465385437, "learning_rate": 5.067670262571201e-06, "loss": 1.1099, "step": 28754 }, { "epoch": 1.7139110740255097, "grad_norm": 3.417112112045288, "learning_rate": 5.06352256256395e-06, "loss": 1.0195, "step": 28756 }, { "epoch": 1.7140302777446657, "grad_norm": 2.993161916732788, "learning_rate": 5.059376470088234e-06, "loss": 1.0832, "step": 28758 }, { "epoch": 1.7141494814638216, "grad_norm": 3.091824531555176, "learning_rate": 5.055231985292402e-06, "loss": 1.1657, "step": 28760 }, { "epoch": 1.7142686851829776, "grad_norm": 3.128267765045166, "learning_rate": 5.051089108324686e-06, "loss": 1.0769, "step": 28762 }, { "epoch": 1.7143878889021338, "grad_norm": 3.3765478134155273, "learning_rate": 5.04694783933331e-06, "loss": 1.0958, "step": 28764 }, { "epoch": 1.7145070926212898, "grad_norm": 3.5465264320373535, "learning_rate": 5.042808178466413e-06, "loss": 1.1037, "step": 28766 }, { "epoch": 1.714626296340446, "grad_norm": 4.06850528717041, "learning_rate": 5.038670125872091e-06, "loss": 1.1694, "step": 28768 }, { "epoch": 1.714745500059602, "grad_norm": 3.0169191360473633, "learning_rate": 5.034533681698367e-06, "loss": 1.0713, "step": 28770 }, { "epoch": 1.7148647037787579, "grad_norm": 3.281675100326538, "learning_rate": 5.030398846093226e-06, "loss": 1.0723, "step": 28772 }, { "epoch": 1.7149839074979139, "grad_norm": 3.346611499786377, "learning_rate": 5.026265619204578e-06, "loss": 1.0511, "step": 28774 }, { "epoch": 1.7151031112170698, "grad_norm": 3.218966245651245, "learning_rate": 5.022134001180284e-06, "loss": 1.0349, "step": 28776 }, { "epoch": 1.715222314936226, "grad_norm": 3.389430046081543, "learning_rate": 5.018003992168146e-06, "loss": 1.1557, "step": 28778 }, { "epoch": 1.7153415186553822, "grad_norm": 3.279684066772461, "learning_rate": 5.0138755923159034e-06, "loss": 1.1216, "step": 28780 }, { "epoch": 1.7154607223745382, "grad_norm": 3.631063461303711, "learning_rate": 5.009748801771269e-06, "loss": 1.0321, "step": 28782 }, { "epoch": 1.7155799260936941, "grad_norm": 3.120905637741089, "learning_rate": 5.0056236206818375e-06, "loss": 1.1435, "step": 28784 }, { "epoch": 1.71569912981285, "grad_norm": 3.165342330932617, "learning_rate": 5.00150004919519e-06, "loss": 1.2246, "step": 28786 }, { "epoch": 1.715818333532006, "grad_norm": 3.2851016521453857, "learning_rate": 4.997378087458865e-06, "loss": 1.1465, "step": 28788 }, { "epoch": 1.7159375372511623, "grad_norm": 3.4917848110198975, "learning_rate": 4.993257735620283e-06, "loss": 1.1276, "step": 28790 }, { "epoch": 1.7160567409703182, "grad_norm": 3.302276134490967, "learning_rate": 4.989138993826864e-06, "loss": 1.1166, "step": 28792 }, { "epoch": 1.7161759446894744, "grad_norm": 3.2576916217803955, "learning_rate": 4.985021862225952e-06, "loss": 0.9529, "step": 28794 }, { "epoch": 1.7162951484086304, "grad_norm": 3.1047182083129883, "learning_rate": 4.9809063409648245e-06, "loss": 1.0717, "step": 28796 }, { "epoch": 1.7164143521277864, "grad_norm": 3.343045234680176, "learning_rate": 4.976792430190708e-06, "loss": 1.1852, "step": 28798 }, { "epoch": 1.7165335558469423, "grad_norm": 3.069963216781616, "learning_rate": 4.9726801300507715e-06, "loss": 1.0563, "step": 28800 }, { "epoch": 1.7166527595660983, "grad_norm": 3.5156424045562744, "learning_rate": 4.968569440692123e-06, "loss": 1.1299, "step": 28802 }, { "epoch": 1.7167719632852545, "grad_norm": 3.1385767459869385, "learning_rate": 4.964460362261813e-06, "loss": 1.0465, "step": 28804 }, { "epoch": 1.7168911670044107, "grad_norm": 3.3321726322174072, "learning_rate": 4.960352894906861e-06, "loss": 1.126, "step": 28806 }, { "epoch": 1.7170103707235667, "grad_norm": 3.1056010723114014, "learning_rate": 4.95624703877417e-06, "loss": 1.1036, "step": 28808 }, { "epoch": 1.7171295744427226, "grad_norm": 3.393381118774414, "learning_rate": 4.952142794010644e-06, "loss": 1.0735, "step": 28810 }, { "epoch": 1.7172487781618786, "grad_norm": 2.853957176208496, "learning_rate": 4.948040160763101e-06, "loss": 1.0525, "step": 28812 }, { "epoch": 1.7173679818810346, "grad_norm": 3.5474436283111572, "learning_rate": 4.943939139178311e-06, "loss": 1.2531, "step": 28814 }, { "epoch": 1.7174871856001908, "grad_norm": 3.0015602111816406, "learning_rate": 4.939839729402968e-06, "loss": 1.1975, "step": 28816 }, { "epoch": 1.7176063893193467, "grad_norm": 3.0941097736358643, "learning_rate": 4.935741931583732e-06, "loss": 0.9838, "step": 28818 }, { "epoch": 1.717725593038503, "grad_norm": 3.3722636699676514, "learning_rate": 4.931645745867191e-06, "loss": 1.1952, "step": 28820 }, { "epoch": 1.7178447967576589, "grad_norm": 3.39589786529541, "learning_rate": 4.9275511723998855e-06, "loss": 1.0937, "step": 28822 }, { "epoch": 1.7179640004768149, "grad_norm": 3.129124164581299, "learning_rate": 4.923458211328286e-06, "loss": 1.1656, "step": 28824 }, { "epoch": 1.7180832041959708, "grad_norm": 2.787508010864258, "learning_rate": 4.919366862798807e-06, "loss": 1.0838, "step": 28826 }, { "epoch": 1.7182024079151268, "grad_norm": 3.089366912841797, "learning_rate": 4.915277126957824e-06, "loss": 1.1243, "step": 28828 }, { "epoch": 1.718321611634283, "grad_norm": 3.4221627712249756, "learning_rate": 4.911189003951644e-06, "loss": 1.1666, "step": 28830 }, { "epoch": 1.7184408153534392, "grad_norm": 3.024292230606079, "learning_rate": 4.907102493926485e-06, "loss": 1.0796, "step": 28832 }, { "epoch": 1.7185600190725951, "grad_norm": 3.248333215713501, "learning_rate": 4.903017597028558e-06, "loss": 1.0136, "step": 28834 }, { "epoch": 1.7186792227917511, "grad_norm": 3.219064235687256, "learning_rate": 4.898934313403997e-06, "loss": 1.3232, "step": 28836 }, { "epoch": 1.718798426510907, "grad_norm": 3.6484014987945557, "learning_rate": 4.89485264319886e-06, "loss": 1.2877, "step": 28838 }, { "epoch": 1.718917630230063, "grad_norm": 3.278268575668335, "learning_rate": 4.890772586559172e-06, "loss": 1.1286, "step": 28840 }, { "epoch": 1.7190368339492192, "grad_norm": 3.5145111083984375, "learning_rate": 4.886694143630894e-06, "loss": 1.1817, "step": 28842 }, { "epoch": 1.7191560376683752, "grad_norm": 2.845064401626587, "learning_rate": 4.882617314559912e-06, "loss": 1.0518, "step": 28844 }, { "epoch": 1.7192752413875314, "grad_norm": 3.683868885040283, "learning_rate": 4.878542099492078e-06, "loss": 1.191, "step": 28846 }, { "epoch": 1.7193944451066874, "grad_norm": 3.654029369354248, "learning_rate": 4.874468498573176e-06, "loss": 1.2118, "step": 28848 }, { "epoch": 1.7195136488258433, "grad_norm": 2.9218814373016357, "learning_rate": 4.870396511948921e-06, "loss": 1.2449, "step": 28850 }, { "epoch": 1.7196328525449993, "grad_norm": 3.299201011657715, "learning_rate": 4.866326139765015e-06, "loss": 1.035, "step": 28852 }, { "epoch": 1.7197520562641553, "grad_norm": 2.869074583053589, "learning_rate": 4.862257382167024e-06, "loss": 1.0485, "step": 28854 }, { "epoch": 1.7198712599833115, "grad_norm": 2.9650490283966064, "learning_rate": 4.858190239300531e-06, "loss": 0.9518, "step": 28856 }, { "epoch": 1.7199904637024677, "grad_norm": 2.8869190216064453, "learning_rate": 4.854124711311037e-06, "loss": 1.1357, "step": 28858 }, { "epoch": 1.7201096674216236, "grad_norm": 3.323798656463623, "learning_rate": 4.850060798343942e-06, "loss": 1.105, "step": 28860 }, { "epoch": 1.7202288711407796, "grad_norm": 3.4597957134246826, "learning_rate": 4.845998500544668e-06, "loss": 1.0933, "step": 28862 }, { "epoch": 1.7203480748599356, "grad_norm": 2.9526405334472656, "learning_rate": 4.841937818058517e-06, "loss": 1.1329, "step": 28864 }, { "epoch": 1.7204672785790915, "grad_norm": 3.2787537574768066, "learning_rate": 4.837878751030755e-06, "loss": 0.9546, "step": 28866 }, { "epoch": 1.7205864822982477, "grad_norm": 3.2501769065856934, "learning_rate": 4.833821299606584e-06, "loss": 1.0436, "step": 28868 }, { "epoch": 1.7207056860174037, "grad_norm": 3.309664726257324, "learning_rate": 4.829765463931179e-06, "loss": 1.3396, "step": 28870 }, { "epoch": 1.72082488973656, "grad_norm": 3.0709447860717773, "learning_rate": 4.8257112441495885e-06, "loss": 1.0985, "step": 28872 }, { "epoch": 1.7209440934557159, "grad_norm": 3.363483190536499, "learning_rate": 4.821658640406884e-06, "loss": 1.0883, "step": 28874 }, { "epoch": 1.7210632971748718, "grad_norm": 3.217475175857544, "learning_rate": 4.817607652848033e-06, "loss": 1.2753, "step": 28876 }, { "epoch": 1.7211825008940278, "grad_norm": 3.5448267459869385, "learning_rate": 4.813558281617925e-06, "loss": 1.0605, "step": 28878 }, { "epoch": 1.7213017046131838, "grad_norm": 3.3044397830963135, "learning_rate": 4.809510526861455e-06, "loss": 1.0464, "step": 28880 }, { "epoch": 1.72142090833234, "grad_norm": 3.2026190757751465, "learning_rate": 4.805464388723408e-06, "loss": 1.2804, "step": 28882 }, { "epoch": 1.7215401120514962, "grad_norm": 3.2318551540374756, "learning_rate": 4.801419867348533e-06, "loss": 1.1034, "step": 28884 }, { "epoch": 1.7216593157706521, "grad_norm": 3.050084114074707, "learning_rate": 4.7973769628815115e-06, "loss": 1.1645, "step": 28886 }, { "epoch": 1.721778519489808, "grad_norm": 3.3511931896209717, "learning_rate": 4.7933356754669755e-06, "loss": 1.0143, "step": 28888 }, { "epoch": 1.721897723208964, "grad_norm": 2.95241117477417, "learning_rate": 4.789296005249499e-06, "loss": 0.9717, "step": 28890 }, { "epoch": 1.72201692692812, "grad_norm": 3.1694247722625732, "learning_rate": 4.7852579523735785e-06, "loss": 1.1999, "step": 28892 }, { "epoch": 1.7221361306472762, "grad_norm": 2.898775815963745, "learning_rate": 4.781221516983702e-06, "loss": 1.0827, "step": 28894 }, { "epoch": 1.7222553343664322, "grad_norm": 3.468658685684204, "learning_rate": 4.777186699224229e-06, "loss": 1.1478, "step": 28896 }, { "epoch": 1.7223745380855884, "grad_norm": 3.146355152130127, "learning_rate": 4.773153499239524e-06, "loss": 1.0155, "step": 28898 }, { "epoch": 1.7224937418047443, "grad_norm": 3.1146771907806396, "learning_rate": 4.769121917173857e-06, "loss": 1.0389, "step": 28900 }, { "epoch": 1.7226129455239003, "grad_norm": 3.3809969425201416, "learning_rate": 4.7650919531714575e-06, "loss": 1.1395, "step": 28902 }, { "epoch": 1.7227321492430563, "grad_norm": 3.44464111328125, "learning_rate": 4.761063607376487e-06, "loss": 1.1218, "step": 28904 }, { "epoch": 1.7228513529622123, "grad_norm": 2.9574265480041504, "learning_rate": 4.757036879933058e-06, "loss": 1.0186, "step": 28906 }, { "epoch": 1.7229705566813684, "grad_norm": 3.365407943725586, "learning_rate": 4.753011770985216e-06, "loss": 1.1099, "step": 28908 }, { "epoch": 1.7230897604005246, "grad_norm": 3.1811749935150146, "learning_rate": 4.7489882806769526e-06, "loss": 0.996, "step": 28910 }, { "epoch": 1.7232089641196806, "grad_norm": 3.240887403488159, "learning_rate": 4.7449664091522016e-06, "loss": 1.216, "step": 28912 }, { "epoch": 1.7233281678388366, "grad_norm": 3.3440074920654297, "learning_rate": 4.740946156554837e-06, "loss": 0.9491, "step": 28914 }, { "epoch": 1.7234473715579925, "grad_norm": 3.1494853496551514, "learning_rate": 4.736927523028694e-06, "loss": 1.0894, "step": 28916 }, { "epoch": 1.7235665752771485, "grad_norm": 3.3881256580352783, "learning_rate": 4.7329105087175085e-06, "loss": 1.0342, "step": 28918 }, { "epoch": 1.7236857789963047, "grad_norm": 3.2977077960968018, "learning_rate": 4.728895113764992e-06, "loss": 1.2409, "step": 28920 }, { "epoch": 1.7238049827154607, "grad_norm": 3.1497225761413574, "learning_rate": 4.724881338314802e-06, "loss": 1.1318, "step": 28922 }, { "epoch": 1.7239241864346169, "grad_norm": 3.012006998062134, "learning_rate": 4.720869182510496e-06, "loss": 1.1398, "step": 28924 }, { "epoch": 1.7240433901537728, "grad_norm": 3.3076183795928955, "learning_rate": 4.716858646495631e-06, "loss": 1.086, "step": 28926 }, { "epoch": 1.7241625938729288, "grad_norm": 3.3638060092926025, "learning_rate": 4.712849730413671e-06, "loss": 1.1951, "step": 28928 }, { "epoch": 1.7242817975920848, "grad_norm": 2.9530396461486816, "learning_rate": 4.7088424344080176e-06, "loss": 0.9725, "step": 28930 }, { "epoch": 1.7244010013112407, "grad_norm": 3.354891777038574, "learning_rate": 4.704836758622028e-06, "loss": 0.9844, "step": 28932 }, { "epoch": 1.724520205030397, "grad_norm": 2.9524500370025635, "learning_rate": 4.700832703199021e-06, "loss": 1.0393, "step": 28934 }, { "epoch": 1.7246394087495531, "grad_norm": 3.2987961769104004, "learning_rate": 4.696830268282204e-06, "loss": 1.3015, "step": 28936 }, { "epoch": 1.724758612468709, "grad_norm": 2.98647141456604, "learning_rate": 4.6928294540147635e-06, "loss": 1.0441, "step": 28938 }, { "epoch": 1.724877816187865, "grad_norm": 3.0698938369750977, "learning_rate": 4.6888302605398505e-06, "loss": 1.0097, "step": 28940 }, { "epoch": 1.724997019907021, "grad_norm": 3.3658804893493652, "learning_rate": 4.684832688000496e-06, "loss": 1.2254, "step": 28942 }, { "epoch": 1.725116223626177, "grad_norm": 3.327115058898926, "learning_rate": 4.680836736539723e-06, "loss": 1.1557, "step": 28944 }, { "epoch": 1.7252354273453332, "grad_norm": 3.304091453552246, "learning_rate": 4.6768424063004804e-06, "loss": 1.2758, "step": 28946 }, { "epoch": 1.7253546310644892, "grad_norm": 2.833071708679199, "learning_rate": 4.6728496974256516e-06, "loss": 0.9869, "step": 28948 }, { "epoch": 1.7254738347836454, "grad_norm": 3.1472935676574707, "learning_rate": 4.66885861005808e-06, "loss": 1.0877, "step": 28950 }, { "epoch": 1.7255930385028013, "grad_norm": 3.362006664276123, "learning_rate": 4.664869144340533e-06, "loss": 1.0752, "step": 28952 }, { "epoch": 1.7257122422219573, "grad_norm": 3.645946741104126, "learning_rate": 4.6608813004157315e-06, "loss": 1.2497, "step": 28954 }, { "epoch": 1.7258314459411133, "grad_norm": 3.273831844329834, "learning_rate": 4.656895078426321e-06, "loss": 1.0874, "step": 28956 }, { "epoch": 1.7259506496602695, "grad_norm": 3.260694980621338, "learning_rate": 4.652910478514927e-06, "loss": 0.9637, "step": 28958 }, { "epoch": 1.7260698533794254, "grad_norm": 3.3452117443084717, "learning_rate": 4.648927500824068e-06, "loss": 1.0446, "step": 28960 }, { "epoch": 1.7261890570985816, "grad_norm": 3.4932844638824463, "learning_rate": 4.644946145496243e-06, "loss": 1.1714, "step": 28962 }, { "epoch": 1.7263082608177376, "grad_norm": 3.3751375675201416, "learning_rate": 4.640966412673886e-06, "loss": 1.2506, "step": 28964 }, { "epoch": 1.7264274645368936, "grad_norm": 3.3038909435272217, "learning_rate": 4.63698830249934e-06, "loss": 1.2029, "step": 28966 }, { "epoch": 1.7265466682560495, "grad_norm": 3.230971097946167, "learning_rate": 4.6330118151149345e-06, "loss": 1.007, "step": 28968 }, { "epoch": 1.7266658719752055, "grad_norm": 3.0269148349761963, "learning_rate": 4.629036950662918e-06, "loss": 1.0553, "step": 28970 }, { "epoch": 1.7267850756943617, "grad_norm": 4.62629508972168, "learning_rate": 4.625063709285488e-06, "loss": 1.1622, "step": 28972 }, { "epoch": 1.7269042794135177, "grad_norm": 3.0319790840148926, "learning_rate": 4.621092091124774e-06, "loss": 0.9981, "step": 28974 }, { "epoch": 1.7270234831326738, "grad_norm": 3.570807933807373, "learning_rate": 4.617122096322863e-06, "loss": 1.1394, "step": 28976 }, { "epoch": 1.7271426868518298, "grad_norm": 3.4125595092773438, "learning_rate": 4.613153725021757e-06, "loss": 1.1639, "step": 28978 }, { "epoch": 1.7272618905709858, "grad_norm": 3.341230630874634, "learning_rate": 4.60918697736345e-06, "loss": 1.0684, "step": 28980 }, { "epoch": 1.7273810942901417, "grad_norm": 2.7420718669891357, "learning_rate": 4.605221853489821e-06, "loss": 1.1283, "step": 28982 }, { "epoch": 1.727500298009298, "grad_norm": 3.1853137016296387, "learning_rate": 4.601258353542715e-06, "loss": 1.0931, "step": 28984 }, { "epoch": 1.727619501728454, "grad_norm": 3.1277096271514893, "learning_rate": 4.597296477663942e-06, "loss": 1.1677, "step": 28986 }, { "epoch": 1.72773870544761, "grad_norm": 3.5153515338897705, "learning_rate": 4.593336225995199e-06, "loss": 1.1833, "step": 28988 }, { "epoch": 1.727857909166766, "grad_norm": 3.3159255981445312, "learning_rate": 4.58937759867819e-06, "loss": 1.0668, "step": 28990 }, { "epoch": 1.727977112885922, "grad_norm": 3.4851086139678955, "learning_rate": 4.585420595854512e-06, "loss": 1.0284, "step": 28992 }, { "epoch": 1.728096316605078, "grad_norm": 3.50565505027771, "learning_rate": 4.581465217665726e-06, "loss": 1.1472, "step": 28994 }, { "epoch": 1.728215520324234, "grad_norm": 3.350691080093384, "learning_rate": 4.577511464253325e-06, "loss": 1.006, "step": 28996 }, { "epoch": 1.7283347240433902, "grad_norm": 3.240633249282837, "learning_rate": 4.573559335758754e-06, "loss": 1.104, "step": 28998 }, { "epoch": 1.7284539277625461, "grad_norm": 3.2510781288146973, "learning_rate": 4.5696088323233854e-06, "loss": 1.1104, "step": 29000 }, { "epoch": 1.7285731314817023, "grad_norm": 3.3884117603302, "learning_rate": 4.565659954088542e-06, "loss": 1.0008, "step": 29002 }, { "epoch": 1.7286923352008583, "grad_norm": 2.8138952255249023, "learning_rate": 4.561712701195509e-06, "loss": 1.1152, "step": 29004 }, { "epoch": 1.7288115389200143, "grad_norm": 3.1718509197235107, "learning_rate": 4.557767073785457e-06, "loss": 1.1268, "step": 29006 }, { "epoch": 1.7289307426391702, "grad_norm": 3.24782133102417, "learning_rate": 4.553823071999569e-06, "loss": 1.0111, "step": 29008 }, { "epoch": 1.7290499463583264, "grad_norm": 2.9140279293060303, "learning_rate": 4.5498806959789365e-06, "loss": 0.9833, "step": 29010 }, { "epoch": 1.7291691500774824, "grad_norm": 3.123513698577881, "learning_rate": 4.5459399458645525e-06, "loss": 1.1241, "step": 29012 }, { "epoch": 1.7292883537966386, "grad_norm": 3.1377696990966797, "learning_rate": 4.542000821797427e-06, "loss": 1.1067, "step": 29014 }, { "epoch": 1.7294075575157946, "grad_norm": 3.2191030979156494, "learning_rate": 4.538063323918462e-06, "loss": 1.0225, "step": 29016 }, { "epoch": 1.7295267612349505, "grad_norm": 3.0748910903930664, "learning_rate": 4.53412745236852e-06, "loss": 1.0885, "step": 29018 }, { "epoch": 1.7296459649541065, "grad_norm": 3.1437244415283203, "learning_rate": 4.530193207288403e-06, "loss": 1.0653, "step": 29020 }, { "epoch": 1.7297651686732625, "grad_norm": 3.3653972148895264, "learning_rate": 4.526260588818843e-06, "loss": 1.0158, "step": 29022 }, { "epoch": 1.7298843723924187, "grad_norm": 3.2631211280822754, "learning_rate": 4.522329597100527e-06, "loss": 1.2613, "step": 29024 }, { "epoch": 1.7300035761115746, "grad_norm": 3.10551381111145, "learning_rate": 4.5184002322740785e-06, "loss": 1.1466, "step": 29026 }, { "epoch": 1.7301227798307308, "grad_norm": 2.9463846683502197, "learning_rate": 4.514472494480082e-06, "loss": 1.1282, "step": 29028 }, { "epoch": 1.7302419835498868, "grad_norm": 2.8787930011749268, "learning_rate": 4.5105463838590156e-06, "loss": 1.0159, "step": 29030 }, { "epoch": 1.7303611872690428, "grad_norm": 3.275317907333374, "learning_rate": 4.50662190055135e-06, "loss": 1.0602, "step": 29032 }, { "epoch": 1.7304803909881987, "grad_norm": 2.922710657119751, "learning_rate": 4.502699044697478e-06, "loss": 1.0064, "step": 29034 }, { "epoch": 1.730599594707355, "grad_norm": 3.1140339374542236, "learning_rate": 4.498777816437727e-06, "loss": 1.0767, "step": 29036 }, { "epoch": 1.7307187984265109, "grad_norm": 3.4470067024230957, "learning_rate": 4.494858215912373e-06, "loss": 1.2336, "step": 29038 }, { "epoch": 1.730838002145667, "grad_norm": 3.3712685108184814, "learning_rate": 4.4909402432616375e-06, "loss": 1.1035, "step": 29040 }, { "epoch": 1.730957205864823, "grad_norm": 3.407681465148926, "learning_rate": 4.487023898625675e-06, "loss": 0.9808, "step": 29042 }, { "epoch": 1.731076409583979, "grad_norm": 3.063141345977783, "learning_rate": 4.483109182144585e-06, "loss": 1.0024, "step": 29044 }, { "epoch": 1.731195613303135, "grad_norm": 3.3083066940307617, "learning_rate": 4.479196093958421e-06, "loss": 1.0303, "step": 29046 }, { "epoch": 1.731314817022291, "grad_norm": 3.43740177154541, "learning_rate": 4.4752846342071446e-06, "loss": 1.0154, "step": 29048 }, { "epoch": 1.7314340207414471, "grad_norm": 3.0575015544891357, "learning_rate": 4.4713748030307205e-06, "loss": 1.0188, "step": 29050 }, { "epoch": 1.7315532244606033, "grad_norm": 3.528651714324951, "learning_rate": 4.467466600568976e-06, "loss": 1.2141, "step": 29052 }, { "epoch": 1.7316724281797593, "grad_norm": 3.3857007026672363, "learning_rate": 4.4635600269617496e-06, "loss": 1.0081, "step": 29054 }, { "epoch": 1.7317916318989153, "grad_norm": 3.731346368789673, "learning_rate": 4.459655082348785e-06, "loss": 1.0719, "step": 29056 }, { "epoch": 1.7319108356180712, "grad_norm": 3.314699649810791, "learning_rate": 4.455751766869759e-06, "loss": 1.096, "step": 29058 }, { "epoch": 1.7320300393372272, "grad_norm": 3.47031831741333, "learning_rate": 4.451850080664333e-06, "loss": 1.1028, "step": 29060 }, { "epoch": 1.7321492430563834, "grad_norm": 3.6719112396240234, "learning_rate": 4.447950023872066e-06, "loss": 1.1824, "step": 29062 }, { "epoch": 1.7322684467755394, "grad_norm": 3.3285269737243652, "learning_rate": 4.444051596632482e-06, "loss": 1.1748, "step": 29064 }, { "epoch": 1.7323876504946956, "grad_norm": 3.2308058738708496, "learning_rate": 4.440154799085028e-06, "loss": 1.0237, "step": 29066 }, { "epoch": 1.7325068542138515, "grad_norm": 3.1027891635894775, "learning_rate": 4.436259631369144e-06, "loss": 1.1033, "step": 29068 }, { "epoch": 1.7326260579330075, "grad_norm": 3.0520541667938232, "learning_rate": 4.432366093624129e-06, "loss": 1.159, "step": 29070 }, { "epoch": 1.7327452616521635, "grad_norm": 3.4439127445220947, "learning_rate": 4.428474185989284e-06, "loss": 1.1994, "step": 29072 }, { "epoch": 1.7328644653713194, "grad_norm": 3.027148485183716, "learning_rate": 4.424583908603858e-06, "loss": 1.1215, "step": 29074 }, { "epoch": 1.7329836690904756, "grad_norm": 2.957051992416382, "learning_rate": 4.420695261606977e-06, "loss": 1.1215, "step": 29076 }, { "epoch": 1.7331028728096318, "grad_norm": 3.488109588623047, "learning_rate": 4.416808245137788e-06, "loss": 1.0184, "step": 29078 }, { "epoch": 1.7332220765287878, "grad_norm": 3.105302572250366, "learning_rate": 4.412922859335327e-06, "loss": 1.1495, "step": 29080 }, { "epoch": 1.7333412802479438, "grad_norm": 3.2961955070495605, "learning_rate": 4.409039104338591e-06, "loss": 1.116, "step": 29082 }, { "epoch": 1.7334604839670997, "grad_norm": 3.782794237136841, "learning_rate": 4.405156980286518e-06, "loss": 1.1285, "step": 29084 }, { "epoch": 1.7335796876862557, "grad_norm": 3.227095365524292, "learning_rate": 4.401276487317973e-06, "loss": 0.9874, "step": 29086 }, { "epoch": 1.733698891405412, "grad_norm": 3.46710467338562, "learning_rate": 4.397397625571786e-06, "loss": 1.1553, "step": 29088 }, { "epoch": 1.7338180951245679, "grad_norm": 3.256343126296997, "learning_rate": 4.393520395186706e-06, "loss": 1.0054, "step": 29090 }, { "epoch": 1.733937298843724, "grad_norm": 4.092469692230225, "learning_rate": 4.389644796301456e-06, "loss": 1.2296, "step": 29092 }, { "epoch": 1.73405650256288, "grad_norm": 2.6907079219818115, "learning_rate": 4.385770829054653e-06, "loss": 1.0678, "step": 29094 }, { "epoch": 1.734175706282036, "grad_norm": 2.9209511280059814, "learning_rate": 4.3818984935849015e-06, "loss": 1.116, "step": 29096 }, { "epoch": 1.734294910001192, "grad_norm": 3.3221542835235596, "learning_rate": 4.37802779003072e-06, "loss": 1.2105, "step": 29098 }, { "epoch": 1.734414113720348, "grad_norm": 3.0312438011169434, "learning_rate": 4.37415871853058e-06, "loss": 0.9713, "step": 29100 }, { "epoch": 1.7345333174395041, "grad_norm": 3.171252965927124, "learning_rate": 4.370291279222893e-06, "loss": 1.1346, "step": 29102 }, { "epoch": 1.7346525211586603, "grad_norm": 3.218902826309204, "learning_rate": 4.366425472246e-06, "loss": 1.1719, "step": 29104 }, { "epoch": 1.7347717248778163, "grad_norm": 3.101828098297119, "learning_rate": 4.3625612977382045e-06, "loss": 1.0112, "step": 29106 }, { "epoch": 1.7348909285969722, "grad_norm": 3.2019946575164795, "learning_rate": 4.358698755837742e-06, "loss": 1.1174, "step": 29108 }, { "epoch": 1.7350101323161282, "grad_norm": 3.4616916179656982, "learning_rate": 4.35483784668278e-06, "loss": 1.0024, "step": 29110 }, { "epoch": 1.7351293360352842, "grad_norm": 3.4444031715393066, "learning_rate": 4.350978570411435e-06, "loss": 1.081, "step": 29112 }, { "epoch": 1.7352485397544404, "grad_norm": 3.53102445602417, "learning_rate": 4.3471209271617906e-06, "loss": 1.0643, "step": 29114 }, { "epoch": 1.7353677434735963, "grad_norm": 3.6042871475219727, "learning_rate": 4.343264917071821e-06, "loss": 1.1411, "step": 29116 }, { "epoch": 1.7354869471927525, "grad_norm": 3.3068692684173584, "learning_rate": 4.33941054027947e-06, "loss": 1.1064, "step": 29118 }, { "epoch": 1.7356061509119085, "grad_norm": 3.2778732776641846, "learning_rate": 4.335557796922646e-06, "loss": 1.0678, "step": 29120 }, { "epoch": 1.7357253546310645, "grad_norm": 3.2067036628723145, "learning_rate": 4.331706687139142e-06, "loss": 1.1256, "step": 29122 }, { "epoch": 1.7358445583502204, "grad_norm": 3.4262328147888184, "learning_rate": 4.327857211066755e-06, "loss": 1.1845, "step": 29124 }, { "epoch": 1.7359637620693764, "grad_norm": 3.0771994590759277, "learning_rate": 4.3240093688431736e-06, "loss": 1.1494, "step": 29126 }, { "epoch": 1.7360829657885326, "grad_norm": 3.1724398136138916, "learning_rate": 4.320163160606061e-06, "loss": 1.0883, "step": 29128 }, { "epoch": 1.7362021695076888, "grad_norm": 3.0612518787384033, "learning_rate": 4.316318586493007e-06, "loss": 1.0656, "step": 29130 }, { "epoch": 1.7363213732268448, "grad_norm": 3.753793478012085, "learning_rate": 4.312475646641534e-06, "loss": 1.1204, "step": 29132 }, { "epoch": 1.7364405769460007, "grad_norm": 3.1459274291992188, "learning_rate": 4.308634341189133e-06, "loss": 1.1493, "step": 29134 }, { "epoch": 1.7365597806651567, "grad_norm": 3.08805513381958, "learning_rate": 4.304794670273199e-06, "loss": 1.0991, "step": 29136 }, { "epoch": 1.7366789843843127, "grad_norm": 3.2893948554992676, "learning_rate": 4.3009566340311225e-06, "loss": 1.1244, "step": 29138 }, { "epoch": 1.7367981881034689, "grad_norm": 3.2516579627990723, "learning_rate": 4.297120232600166e-06, "loss": 1.0514, "step": 29140 }, { "epoch": 1.7369173918226248, "grad_norm": 2.9487602710723877, "learning_rate": 4.2932854661176025e-06, "loss": 0.9868, "step": 29142 }, { "epoch": 1.737036595541781, "grad_norm": 3.1984190940856934, "learning_rate": 4.289452334720606e-06, "loss": 1.0739, "step": 29144 }, { "epoch": 1.737155799260937, "grad_norm": 3.4664711952209473, "learning_rate": 4.285620838546283e-06, "loss": 1.1373, "step": 29146 }, { "epoch": 1.737275002980093, "grad_norm": 2.9011356830596924, "learning_rate": 4.28179097773172e-06, "loss": 1.1483, "step": 29148 }, { "epoch": 1.737394206699249, "grad_norm": 3.347221612930298, "learning_rate": 4.277962752413916e-06, "loss": 1.1247, "step": 29150 }, { "epoch": 1.737513410418405, "grad_norm": 3.5301342010498047, "learning_rate": 4.27413616272982e-06, "loss": 1.0144, "step": 29152 }, { "epoch": 1.737632614137561, "grad_norm": 3.1508946418762207, "learning_rate": 4.270311208816313e-06, "loss": 1.1436, "step": 29154 }, { "epoch": 1.7377518178567173, "grad_norm": 2.9049172401428223, "learning_rate": 4.266487890810256e-06, "loss": 1.1794, "step": 29156 }, { "epoch": 1.7378710215758733, "grad_norm": 2.857713460922241, "learning_rate": 4.2626662088483875e-06, "loss": 1.0137, "step": 29158 }, { "epoch": 1.7379902252950292, "grad_norm": 3.0807924270629883, "learning_rate": 4.258846163067443e-06, "loss": 1.1181, "step": 29160 }, { "epoch": 1.7381094290141852, "grad_norm": 3.264263153076172, "learning_rate": 4.2550277536040794e-06, "loss": 1.1907, "step": 29162 }, { "epoch": 1.7382286327333412, "grad_norm": 2.8990323543548584, "learning_rate": 4.251210980594878e-06, "loss": 1.0671, "step": 29164 }, { "epoch": 1.7383478364524974, "grad_norm": 3.201913595199585, "learning_rate": 4.247395844176389e-06, "loss": 1.0427, "step": 29166 }, { "epoch": 1.7384670401716533, "grad_norm": 3.175276756286621, "learning_rate": 4.243582344485092e-06, "loss": 1.1247, "step": 29168 }, { "epoch": 1.7385862438908095, "grad_norm": 3.3610260486602783, "learning_rate": 4.239770481657412e-06, "loss": 1.1403, "step": 29170 }, { "epoch": 1.7387054476099655, "grad_norm": 3.270893096923828, "learning_rate": 4.235960255829707e-06, "loss": 1.0398, "step": 29172 }, { "epoch": 1.7388246513291215, "grad_norm": 3.1526031494140625, "learning_rate": 4.232151667138284e-06, "loss": 1.0471, "step": 29174 }, { "epoch": 1.7389438550482774, "grad_norm": 3.1763577461242676, "learning_rate": 4.22834471571939e-06, "loss": 1.1957, "step": 29176 }, { "epoch": 1.7390630587674334, "grad_norm": 3.4830875396728516, "learning_rate": 4.22453940170921e-06, "loss": 1.1134, "step": 29178 }, { "epoch": 1.7391822624865896, "grad_norm": 3.0792360305786133, "learning_rate": 4.220735725243874e-06, "loss": 1.1017, "step": 29180 }, { "epoch": 1.7393014662057458, "grad_norm": 3.161287307739258, "learning_rate": 4.216933686459446e-06, "loss": 1.2742, "step": 29182 }, { "epoch": 1.7394206699249017, "grad_norm": 2.9821832180023193, "learning_rate": 4.213133285491966e-06, "loss": 1.1491, "step": 29184 }, { "epoch": 1.7395398736440577, "grad_norm": 3.368262529373169, "learning_rate": 4.209334522477343e-06, "loss": 1.0514, "step": 29186 }, { "epoch": 1.7396590773632137, "grad_norm": 3.4433631896972656, "learning_rate": 4.205537397551506e-06, "loss": 1.1341, "step": 29188 }, { "epoch": 1.7397782810823696, "grad_norm": 3.6797680854797363, "learning_rate": 4.201741910850288e-06, "loss": 1.2035, "step": 29190 }, { "epoch": 1.7398974848015258, "grad_norm": 3.559917688369751, "learning_rate": 4.197948062509444e-06, "loss": 1.1688, "step": 29192 }, { "epoch": 1.7400166885206818, "grad_norm": 3.244555711746216, "learning_rate": 4.194155852664716e-06, "loss": 1.0673, "step": 29194 }, { "epoch": 1.740135892239838, "grad_norm": 2.832921028137207, "learning_rate": 4.190365281451753e-06, "loss": 0.9487, "step": 29196 }, { "epoch": 1.740255095958994, "grad_norm": 3.1905055046081543, "learning_rate": 4.186576349006161e-06, "loss": 1.1679, "step": 29198 }, { "epoch": 1.74037429967815, "grad_norm": 3.2847416400909424, "learning_rate": 4.182789055463477e-06, "loss": 1.1949, "step": 29200 }, { "epoch": 1.740493503397306, "grad_norm": 3.0440332889556885, "learning_rate": 4.179003400959203e-06, "loss": 1.3311, "step": 29202 }, { "epoch": 1.7406127071164619, "grad_norm": 3.169588327407837, "learning_rate": 4.175219385628748e-06, "loss": 1.0528, "step": 29204 }, { "epoch": 1.740731910835618, "grad_norm": 3.2386820316314697, "learning_rate": 4.17143700960747e-06, "loss": 1.1156, "step": 29206 }, { "epoch": 1.7408511145547743, "grad_norm": 2.8800549507141113, "learning_rate": 4.167656273030713e-06, "loss": 0.8868, "step": 29208 }, { "epoch": 1.7409703182739302, "grad_norm": 3.0806591510772705, "learning_rate": 4.163877176033687e-06, "loss": 1.3112, "step": 29210 }, { "epoch": 1.7410895219930862, "grad_norm": 3.5183236598968506, "learning_rate": 4.160099718751603e-06, "loss": 1.1109, "step": 29212 }, { "epoch": 1.7412087257122422, "grad_norm": 3.5724291801452637, "learning_rate": 4.156323901319598e-06, "loss": 1.1191, "step": 29214 }, { "epoch": 1.7413279294313981, "grad_norm": 3.4386017322540283, "learning_rate": 4.152549723872739e-06, "loss": 1.1532, "step": 29216 }, { "epoch": 1.7414471331505543, "grad_norm": 3.0734288692474365, "learning_rate": 4.148777186546038e-06, "loss": 0.9921, "step": 29218 }, { "epoch": 1.7415663368697103, "grad_norm": 3.163464069366455, "learning_rate": 4.1450062894744625e-06, "loss": 1.22, "step": 29220 }, { "epoch": 1.7416855405888665, "grad_norm": 3.438244342803955, "learning_rate": 4.141237032792894e-06, "loss": 1.1621, "step": 29222 }, { "epoch": 1.7418047443080225, "grad_norm": 3.331824779510498, "learning_rate": 4.137469416636181e-06, "loss": 1.2422, "step": 29224 }, { "epoch": 1.7419239480271784, "grad_norm": 3.1127090454101562, "learning_rate": 4.133703441139114e-06, "loss": 0.9664, "step": 29226 }, { "epoch": 1.7420431517463344, "grad_norm": 3.433469533920288, "learning_rate": 4.1299391064363925e-06, "loss": 1.1526, "step": 29228 }, { "epoch": 1.7421623554654904, "grad_norm": 3.047715187072754, "learning_rate": 4.1261764126626965e-06, "loss": 0.9689, "step": 29230 }, { "epoch": 1.7422815591846466, "grad_norm": 3.3320960998535156, "learning_rate": 4.122415359952631e-06, "loss": 1.026, "step": 29232 }, { "epoch": 1.7424007629038027, "grad_norm": 3.478093147277832, "learning_rate": 4.118655948440731e-06, "loss": 1.0378, "step": 29234 }, { "epoch": 1.7425199666229587, "grad_norm": 3.273744583129883, "learning_rate": 4.1148981782614945e-06, "loss": 1.1632, "step": 29236 }, { "epoch": 1.7426391703421147, "grad_norm": 3.498654365539551, "learning_rate": 4.111142049549338e-06, "loss": 1.1966, "step": 29238 }, { "epoch": 1.7427583740612707, "grad_norm": 2.958570718765259, "learning_rate": 4.107387562438641e-06, "loss": 0.9858, "step": 29240 }, { "epoch": 1.7428775777804266, "grad_norm": 3.560180425643921, "learning_rate": 4.103634717063714e-06, "loss": 1.1602, "step": 29242 }, { "epoch": 1.7429967814995828, "grad_norm": 3.500849723815918, "learning_rate": 4.0998835135588e-06, "loss": 1.2133, "step": 29244 }, { "epoch": 1.7431159852187388, "grad_norm": 2.974942207336426, "learning_rate": 4.09613395205809e-06, "loss": 1.1003, "step": 29246 }, { "epoch": 1.743235188937895, "grad_norm": 3.246697187423706, "learning_rate": 4.09238603269575e-06, "loss": 1.1276, "step": 29248 }, { "epoch": 1.743354392657051, "grad_norm": 3.1233115196228027, "learning_rate": 4.088639755605822e-06, "loss": 1.0353, "step": 29250 }, { "epoch": 1.743473596376207, "grad_norm": 2.77736759185791, "learning_rate": 4.084895120922322e-06, "loss": 1.0336, "step": 29252 }, { "epoch": 1.7435928000953629, "grad_norm": 3.5509536266326904, "learning_rate": 4.081152128779236e-06, "loss": 1.0195, "step": 29254 }, { "epoch": 1.7437120038145189, "grad_norm": 3.519972324371338, "learning_rate": 4.077410779310436e-06, "loss": 1.1445, "step": 29256 }, { "epoch": 1.743831207533675, "grad_norm": 3.1770687103271484, "learning_rate": 4.073671072649782e-06, "loss": 1.0941, "step": 29258 }, { "epoch": 1.7439504112528312, "grad_norm": 3.0233235359191895, "learning_rate": 4.0699330089310485e-06, "loss": 0.973, "step": 29260 }, { "epoch": 1.7440696149719872, "grad_norm": 3.0418753623962402, "learning_rate": 4.0661965882879624e-06, "loss": 1.0729, "step": 29262 }, { "epoch": 1.7441888186911432, "grad_norm": 2.766535520553589, "learning_rate": 4.062461810854173e-06, "loss": 1.092, "step": 29264 }, { "epoch": 1.7443080224102991, "grad_norm": 2.8724606037139893, "learning_rate": 4.058728676763312e-06, "loss": 1.0597, "step": 29266 }, { "epoch": 1.744427226129455, "grad_norm": 3.22538423538208, "learning_rate": 4.0549971861489125e-06, "loss": 1.1547, "step": 29268 }, { "epoch": 1.7445464298486113, "grad_norm": 3.728846788406372, "learning_rate": 4.051267339144449e-06, "loss": 1.1732, "step": 29270 }, { "epoch": 1.7446656335677673, "grad_norm": 3.282557964324951, "learning_rate": 4.0475391358833825e-06, "loss": 1.043, "step": 29272 }, { "epoch": 1.7447848372869235, "grad_norm": 3.0145885944366455, "learning_rate": 4.043812576499046e-06, "loss": 1.0217, "step": 29274 }, { "epoch": 1.7449040410060794, "grad_norm": 3.131617784500122, "learning_rate": 4.040087661124786e-06, "loss": 1.1571, "step": 29276 }, { "epoch": 1.7450232447252354, "grad_norm": 3.445244789123535, "learning_rate": 4.036364389893838e-06, "loss": 0.9681, "step": 29278 }, { "epoch": 1.7451424484443914, "grad_norm": 3.318415880203247, "learning_rate": 4.032642762939393e-06, "loss": 1.0433, "step": 29280 }, { "epoch": 1.7452616521635473, "grad_norm": 3.6273176670074463, "learning_rate": 4.0289227803946e-06, "loss": 1.1263, "step": 29282 }, { "epoch": 1.7453808558827035, "grad_norm": 2.670431613922119, "learning_rate": 4.025204442392522e-06, "loss": 1.0399, "step": 29284 }, { "epoch": 1.7455000596018597, "grad_norm": 3.065551996231079, "learning_rate": 4.021487749066183e-06, "loss": 1.1057, "step": 29286 }, { "epoch": 1.7456192633210157, "grad_norm": 3.2549264430999756, "learning_rate": 4.017772700548533e-06, "loss": 1.1455, "step": 29288 }, { "epoch": 1.7457384670401717, "grad_norm": 3.0229368209838867, "learning_rate": 4.0140592969725e-06, "loss": 1.0548, "step": 29290 }, { "epoch": 1.7458576707593276, "grad_norm": 3.715989828109741, "learning_rate": 4.0103475384708875e-06, "loss": 1.252, "step": 29292 }, { "epoch": 1.7459768744784836, "grad_norm": 2.857333183288574, "learning_rate": 4.006637425176502e-06, "loss": 0.9089, "step": 29294 }, { "epoch": 1.7460960781976398, "grad_norm": 3.3208909034729004, "learning_rate": 4.0029289572220685e-06, "loss": 1.1304, "step": 29296 }, { "epoch": 1.7462152819167958, "grad_norm": 3.478584051132202, "learning_rate": 3.999222134740227e-06, "loss": 0.9991, "step": 29298 }, { "epoch": 1.746334485635952, "grad_norm": 3.106675624847412, "learning_rate": 3.99551695786361e-06, "loss": 1.0104, "step": 29300 }, { "epoch": 1.746453689355108, "grad_norm": 3.0900042057037354, "learning_rate": 3.99181342672475e-06, "loss": 1.0888, "step": 29302 }, { "epoch": 1.7465728930742639, "grad_norm": 3.40057373046875, "learning_rate": 3.988111541456141e-06, "loss": 0.9902, "step": 29304 }, { "epoch": 1.7466920967934199, "grad_norm": 3.207904815673828, "learning_rate": 3.984411302190211e-06, "loss": 1.1181, "step": 29306 }, { "epoch": 1.7468113005125758, "grad_norm": 3.7129931449890137, "learning_rate": 3.980712709059331e-06, "loss": 1.1175, "step": 29308 }, { "epoch": 1.746930504231732, "grad_norm": 3.1676223278045654, "learning_rate": 3.977015762195807e-06, "loss": 1.0922, "step": 29310 }, { "epoch": 1.7470497079508882, "grad_norm": 3.2686421871185303, "learning_rate": 3.973320461731894e-06, "loss": 1.1397, "step": 29312 }, { "epoch": 1.7471689116700442, "grad_norm": 3.3585832118988037, "learning_rate": 3.969626807799792e-06, "loss": 1.1979, "step": 29314 }, { "epoch": 1.7472881153892001, "grad_norm": 3.324551820755005, "learning_rate": 3.965934800531618e-06, "loss": 1.1384, "step": 29316 }, { "epoch": 1.7474073191083561, "grad_norm": 3.079287528991699, "learning_rate": 3.962244440059481e-06, "loss": 1.1092, "step": 29318 }, { "epoch": 1.747526522827512, "grad_norm": 3.105652332305908, "learning_rate": 3.958555726515356e-06, "loss": 1.3123, "step": 29320 }, { "epoch": 1.7476457265466683, "grad_norm": 3.1930441856384277, "learning_rate": 3.95486866003123e-06, "loss": 1.0741, "step": 29322 }, { "epoch": 1.7477649302658242, "grad_norm": 2.697542905807495, "learning_rate": 3.951183240738998e-06, "loss": 1.0555, "step": 29324 }, { "epoch": 1.7478841339849804, "grad_norm": 3.1353371143341064, "learning_rate": 3.9474994687704934e-06, "loss": 1.0355, "step": 29326 }, { "epoch": 1.7480033377041364, "grad_norm": 3.065070152282715, "learning_rate": 3.9438173442575e-06, "loss": 1.0897, "step": 29328 }, { "epoch": 1.7481225414232924, "grad_norm": 3.3062422275543213, "learning_rate": 3.940136867331734e-06, "loss": 1.0413, "step": 29330 }, { "epoch": 1.7482417451424483, "grad_norm": 2.815192461013794, "learning_rate": 3.936458038124874e-06, "loss": 0.9674, "step": 29332 }, { "epoch": 1.7483609488616045, "grad_norm": 3.236520290374756, "learning_rate": 3.932780856768498e-06, "loss": 1.1208, "step": 29334 }, { "epoch": 1.7484801525807605, "grad_norm": 3.2783899307250977, "learning_rate": 3.929105323394189e-06, "loss": 1.1426, "step": 29336 }, { "epoch": 1.7485993562999167, "grad_norm": 3.0623831748962402, "learning_rate": 3.925431438133398e-06, "loss": 1.0973, "step": 29338 }, { "epoch": 1.7487185600190727, "grad_norm": 3.301025867462158, "learning_rate": 3.92175920111757e-06, "loss": 1.0956, "step": 29340 }, { "epoch": 1.7488377637382286, "grad_norm": 3.193819046020508, "learning_rate": 3.918088612478083e-06, "loss": 1.0821, "step": 29342 }, { "epoch": 1.7489569674573846, "grad_norm": 3.353327512741089, "learning_rate": 3.914419672346215e-06, "loss": 1.1383, "step": 29344 }, { "epoch": 1.7490761711765406, "grad_norm": 3.21109676361084, "learning_rate": 3.910752380853245e-06, "loss": 1.1369, "step": 29346 }, { "epoch": 1.7491953748956968, "grad_norm": 3.4281368255615234, "learning_rate": 3.907086738130355e-06, "loss": 1.318, "step": 29348 }, { "epoch": 1.7493145786148527, "grad_norm": 3.5702807903289795, "learning_rate": 3.903422744308677e-06, "loss": 1.2219, "step": 29350 }, { "epoch": 1.749433782334009, "grad_norm": 3.457481861114502, "learning_rate": 3.899760399519276e-06, "loss": 1.1485, "step": 29352 }, { "epoch": 1.749552986053165, "grad_norm": 3.348871946334839, "learning_rate": 3.896099703893191e-06, "loss": 1.1404, "step": 29354 }, { "epoch": 1.7496721897723209, "grad_norm": 2.9433844089508057, "learning_rate": 3.892440657561358e-06, "loss": 0.8853, "step": 29356 }, { "epoch": 1.7497913934914768, "grad_norm": 2.9288527965545654, "learning_rate": 3.888783260654666e-06, "loss": 0.9969, "step": 29358 }, { "epoch": 1.749910597210633, "grad_norm": 3.3478057384490967, "learning_rate": 3.885127513303982e-06, "loss": 1.1049, "step": 29360 }, { "epoch": 1.750029800929789, "grad_norm": 2.994595527648926, "learning_rate": 3.881473415640053e-06, "loss": 1.1209, "step": 29362 }, { "epoch": 1.7501490046489452, "grad_norm": 2.969672441482544, "learning_rate": 3.8778209677936215e-06, "loss": 1.0656, "step": 29364 }, { "epoch": 1.7502682083681012, "grad_norm": 2.6311283111572266, "learning_rate": 3.874170169895336e-06, "loss": 1.1284, "step": 29366 }, { "epoch": 1.7503874120872571, "grad_norm": 3.3070921897888184, "learning_rate": 3.870521022075802e-06, "loss": 1.1888, "step": 29368 }, { "epoch": 1.750506615806413, "grad_norm": 2.971782922744751, "learning_rate": 3.866873524465564e-06, "loss": 1.0851, "step": 29370 }, { "epoch": 1.750625819525569, "grad_norm": 3.5847010612487793, "learning_rate": 3.863227677195102e-06, "loss": 1.1335, "step": 29372 }, { "epoch": 1.7507450232447253, "grad_norm": 3.279266595840454, "learning_rate": 3.859583480394841e-06, "loss": 1.1948, "step": 29374 }, { "epoch": 1.7508642269638812, "grad_norm": 3.1529603004455566, "learning_rate": 3.855940934195146e-06, "loss": 1.1232, "step": 29376 }, { "epoch": 1.7509834306830374, "grad_norm": 3.0027830600738525, "learning_rate": 3.8523000387263255e-06, "loss": 1.0476, "step": 29378 }, { "epoch": 1.7511026344021934, "grad_norm": 2.72356915473938, "learning_rate": 3.848660794118613e-06, "loss": 0.9078, "step": 29380 }, { "epoch": 1.7512218381213494, "grad_norm": 3.5296318531036377, "learning_rate": 3.8450232005022295e-06, "loss": 1.2121, "step": 29382 }, { "epoch": 1.7513410418405053, "grad_norm": 3.4146833419799805, "learning_rate": 3.841387258007262e-06, "loss": 1.162, "step": 29384 }, { "epoch": 1.7514602455596615, "grad_norm": 3.463916778564453, "learning_rate": 3.837752966763813e-06, "loss": 1.0588, "step": 29386 }, { "epoch": 1.7515794492788175, "grad_norm": 3.46736478805542, "learning_rate": 3.834120326901897e-06, "loss": 1.0217, "step": 29388 }, { "epoch": 1.7516986529979737, "grad_norm": 3.0548007488250732, "learning_rate": 3.8304893385514295e-06, "loss": 1.2538, "step": 29390 }, { "epoch": 1.7518178567171296, "grad_norm": 3.258824586868286, "learning_rate": 3.826860001842336e-06, "loss": 1.232, "step": 29392 }, { "epoch": 1.7519370604362856, "grad_norm": 3.298910140991211, "learning_rate": 3.823232316904435e-06, "loss": 1.1128, "step": 29394 }, { "epoch": 1.7520562641554416, "grad_norm": 3.4642105102539062, "learning_rate": 3.819606283867511e-06, "loss": 1.1014, "step": 29396 }, { "epoch": 1.7521754678745975, "grad_norm": 3.218142509460449, "learning_rate": 3.815981902861265e-06, "loss": 0.9626, "step": 29398 }, { "epoch": 1.7522946715937537, "grad_norm": 3.3166356086730957, "learning_rate": 3.8123591740153843e-06, "loss": 1.1086, "step": 29400 }, { "epoch": 1.7524138753129097, "grad_norm": 3.4049556255340576, "learning_rate": 3.80873809745943e-06, "loss": 1.0774, "step": 29402 }, { "epoch": 1.752533079032066, "grad_norm": 3.5966036319732666, "learning_rate": 3.805118673322949e-06, "loss": 1.1972, "step": 29404 }, { "epoch": 1.7526522827512219, "grad_norm": 3.3315703868865967, "learning_rate": 3.801500901735444e-06, "loss": 1.1642, "step": 29406 }, { "epoch": 1.7527714864703778, "grad_norm": 3.239675760269165, "learning_rate": 3.7978847828263054e-06, "loss": 1.1467, "step": 29408 }, { "epoch": 1.7528906901895338, "grad_norm": 3.144672155380249, "learning_rate": 3.7942703167249125e-06, "loss": 1.0519, "step": 29410 }, { "epoch": 1.75300989390869, "grad_norm": 3.416473388671875, "learning_rate": 3.790657503560563e-06, "loss": 1.0944, "step": 29412 }, { "epoch": 1.753129097627846, "grad_norm": 3.1561129093170166, "learning_rate": 3.787046343462497e-06, "loss": 1.0088, "step": 29414 }, { "epoch": 1.7532483013470022, "grad_norm": 2.9287302494049072, "learning_rate": 3.783436836559895e-06, "loss": 1.0578, "step": 29416 }, { "epoch": 1.7533675050661581, "grad_norm": 3.523674249649048, "learning_rate": 3.779828982981892e-06, "loss": 1.0588, "step": 29418 }, { "epoch": 1.753486708785314, "grad_norm": 3.339907169342041, "learning_rate": 3.776222782857547e-06, "loss": 0.9984, "step": 29420 }, { "epoch": 1.75360591250447, "grad_norm": 3.546501874923706, "learning_rate": 3.7726182363158503e-06, "loss": 1.136, "step": 29422 }, { "epoch": 1.753725116223626, "grad_norm": 3.0282323360443115, "learning_rate": 3.769015343485788e-06, "loss": 0.9784, "step": 29424 }, { "epoch": 1.7538443199427822, "grad_norm": 3.3934543132781982, "learning_rate": 3.7654141044962066e-06, "loss": 0.9169, "step": 29426 }, { "epoch": 1.7539635236619384, "grad_norm": 2.897435188293457, "learning_rate": 3.7618145194759592e-06, "loss": 1.0483, "step": 29428 }, { "epoch": 1.7540827273810944, "grad_norm": 3.3762292861938477, "learning_rate": 3.75821658855382e-06, "loss": 1.0138, "step": 29430 }, { "epoch": 1.7542019311002504, "grad_norm": 3.3532485961914062, "learning_rate": 3.75462031185847e-06, "loss": 1.073, "step": 29432 }, { "epoch": 1.7543211348194063, "grad_norm": 3.699054718017578, "learning_rate": 3.7510256895185837e-06, "loss": 1.1485, "step": 29434 }, { "epoch": 1.7544403385385623, "grad_norm": 3.4954605102539062, "learning_rate": 3.7474327216627524e-06, "loss": 1.1502, "step": 29436 }, { "epoch": 1.7545595422577185, "grad_norm": 3.6274349689483643, "learning_rate": 3.743841408419502e-06, "loss": 1.2218, "step": 29438 }, { "epoch": 1.7546787459768745, "grad_norm": 3.237304925918579, "learning_rate": 3.7402517499173064e-06, "loss": 1.0207, "step": 29440 }, { "epoch": 1.7547979496960306, "grad_norm": 3.1783783435821533, "learning_rate": 3.73666374628458e-06, "loss": 1.0548, "step": 29442 }, { "epoch": 1.7549171534151866, "grad_norm": 3.437671422958374, "learning_rate": 3.733077397649676e-06, "loss": 1.008, "step": 29444 }, { "epoch": 1.7550363571343426, "grad_norm": 3.486060857772827, "learning_rate": 3.729492704140908e-06, "loss": 1.1811, "step": 29446 }, { "epoch": 1.7551555608534986, "grad_norm": 2.8194990158081055, "learning_rate": 3.7259096658864958e-06, "loss": 1.0725, "step": 29448 }, { "epoch": 1.7552747645726545, "grad_norm": 3.5806632041931152, "learning_rate": 3.7223282830146034e-06, "loss": 1.0804, "step": 29450 }, { "epoch": 1.7553939682918107, "grad_norm": 3.0263240337371826, "learning_rate": 3.7187485556533895e-06, "loss": 1.0978, "step": 29452 }, { "epoch": 1.755513172010967, "grad_norm": 3.1997809410095215, "learning_rate": 3.7151704839308687e-06, "loss": 1.1006, "step": 29454 }, { "epoch": 1.7556323757301229, "grad_norm": 3.45794939994812, "learning_rate": 3.7115940679750717e-06, "loss": 1.2278, "step": 29456 }, { "epoch": 1.7557515794492788, "grad_norm": 3.039351463317871, "learning_rate": 3.7080193079139292e-06, "loss": 1.1384, "step": 29458 }, { "epoch": 1.7558707831684348, "grad_norm": 3.5525922775268555, "learning_rate": 3.7044462038753226e-06, "loss": 1.1391, "step": 29460 }, { "epoch": 1.7559899868875908, "grad_norm": 3.3713228702545166, "learning_rate": 3.7008747559870716e-06, "loss": 1.0732, "step": 29462 }, { "epoch": 1.756109190606747, "grad_norm": 3.121140480041504, "learning_rate": 3.697304964376941e-06, "loss": 1.1615, "step": 29464 }, { "epoch": 1.756228394325903, "grad_norm": 2.944965124130249, "learning_rate": 3.6937368291726338e-06, "loss": 1.0969, "step": 29466 }, { "epoch": 1.7563475980450591, "grad_norm": 3.0210468769073486, "learning_rate": 3.6901703505017925e-06, "loss": 1.103, "step": 29468 }, { "epoch": 1.756466801764215, "grad_norm": 2.9078705310821533, "learning_rate": 3.6866055284920153e-06, "loss": 1.0713, "step": 29470 }, { "epoch": 1.756586005483371, "grad_norm": 2.7916126251220703, "learning_rate": 3.6830423632708055e-06, "loss": 1.0053, "step": 29472 }, { "epoch": 1.756705209202527, "grad_norm": 3.42573618888855, "learning_rate": 3.67948085496565e-06, "loss": 1.1417, "step": 29474 }, { "epoch": 1.756824412921683, "grad_norm": 3.1373093128204346, "learning_rate": 3.6759210037039584e-06, "loss": 1.0966, "step": 29476 }, { "epoch": 1.7569436166408392, "grad_norm": 3.4490702152252197, "learning_rate": 3.67236280961305e-06, "loss": 1.1922, "step": 29478 }, { "epoch": 1.7570628203599954, "grad_norm": 3.2157773971557617, "learning_rate": 3.6688062728202356e-06, "loss": 1.1352, "step": 29480 }, { "epoch": 1.7571820240791514, "grad_norm": 3.3608272075653076, "learning_rate": 3.665251393452751e-06, "loss": 1.0995, "step": 29482 }, { "epoch": 1.7573012277983073, "grad_norm": 3.040515661239624, "learning_rate": 3.661698171637751e-06, "loss": 1.1484, "step": 29484 }, { "epoch": 1.7574204315174633, "grad_norm": 3.0520830154418945, "learning_rate": 3.6581466075023443e-06, "loss": 1.0258, "step": 29486 }, { "epoch": 1.7575396352366193, "grad_norm": 3.078556537628174, "learning_rate": 3.654596701173607e-06, "loss": 1.0822, "step": 29488 }, { "epoch": 1.7576588389557755, "grad_norm": 3.4224562644958496, "learning_rate": 3.6510484527784994e-06, "loss": 1.3148, "step": 29490 }, { "epoch": 1.7577780426749314, "grad_norm": 3.3394241333007812, "learning_rate": 3.647501862443975e-06, "loss": 1.0714, "step": 29492 }, { "epoch": 1.7578972463940876, "grad_norm": 3.05645489692688, "learning_rate": 3.64395693029691e-06, "loss": 1.0645, "step": 29494 }, { "epoch": 1.7580164501132436, "grad_norm": 3.1637604236602783, "learning_rate": 3.6404136564640977e-06, "loss": 1.2392, "step": 29496 }, { "epoch": 1.7581356538323996, "grad_norm": 3.3084652423858643, "learning_rate": 3.6368720410723146e-06, "loss": 1.129, "step": 29498 }, { "epoch": 1.7582548575515555, "grad_norm": 3.1493661403656006, "learning_rate": 3.6333320842482475e-06, "loss": 0.922, "step": 29500 }, { "epoch": 1.7583740612707115, "grad_norm": 3.1073362827301025, "learning_rate": 3.629793786118535e-06, "loss": 1.0867, "step": 29502 }, { "epoch": 1.7584932649898677, "grad_norm": 3.3224337100982666, "learning_rate": 3.626257146809753e-06, "loss": 1.1577, "step": 29504 }, { "epoch": 1.7586124687090239, "grad_norm": 3.2221271991729736, "learning_rate": 3.622722166448417e-06, "loss": 1.0808, "step": 29506 }, { "epoch": 1.7587316724281798, "grad_norm": 3.009568452835083, "learning_rate": 3.6191888451609923e-06, "loss": 1.1722, "step": 29508 }, { "epoch": 1.7588508761473358, "grad_norm": 3.152587890625, "learning_rate": 3.6156571830738674e-06, "loss": 1.0803, "step": 29510 }, { "epoch": 1.7589700798664918, "grad_norm": 3.286783456802368, "learning_rate": 3.612127180313385e-06, "loss": 1.0727, "step": 29512 }, { "epoch": 1.7590892835856478, "grad_norm": 3.0353806018829346, "learning_rate": 3.6085988370058276e-06, "loss": 1.0505, "step": 29514 }, { "epoch": 1.759208487304804, "grad_norm": 2.6658055782318115, "learning_rate": 3.605072153277428e-06, "loss": 1.1888, "step": 29516 }, { "epoch": 1.75932769102396, "grad_norm": 3.389460325241089, "learning_rate": 3.6015471292543177e-06, "loss": 1.1834, "step": 29518 }, { "epoch": 1.759446894743116, "grad_norm": 2.97859525680542, "learning_rate": 3.5980237650626303e-06, "loss": 1.0506, "step": 29520 }, { "epoch": 1.759566098462272, "grad_norm": 3.108518123626709, "learning_rate": 3.5945020608283918e-06, "loss": 1.1016, "step": 29522 }, { "epoch": 1.759685302181428, "grad_norm": 3.2512357234954834, "learning_rate": 3.5909820166775908e-06, "loss": 1.0495, "step": 29524 }, { "epoch": 1.759804505900584, "grad_norm": 3.2683045864105225, "learning_rate": 3.587463632736149e-06, "loss": 1.0013, "step": 29526 }, { "epoch": 1.75992370961974, "grad_norm": 3.3942575454711914, "learning_rate": 3.5839469091299314e-06, "loss": 1.0655, "step": 29528 }, { "epoch": 1.7600429133388962, "grad_norm": 3.1965181827545166, "learning_rate": 3.5804318459847443e-06, "loss": 1.0935, "step": 29530 }, { "epoch": 1.7601621170580524, "grad_norm": 3.291255474090576, "learning_rate": 3.576918443426325e-06, "loss": 1.0085, "step": 29532 }, { "epoch": 1.7602813207772083, "grad_norm": 2.942734479904175, "learning_rate": 3.573406701580384e-06, "loss": 1.1534, "step": 29534 }, { "epoch": 1.7604005244963643, "grad_norm": 2.943049192428589, "learning_rate": 3.5698966205725214e-06, "loss": 1.2944, "step": 29536 }, { "epoch": 1.7605197282155203, "grad_norm": 2.9012115001678467, "learning_rate": 3.5663882005283146e-06, "loss": 1.0459, "step": 29538 }, { "epoch": 1.7606389319346762, "grad_norm": 3.044032335281372, "learning_rate": 3.5628814415732847e-06, "loss": 0.914, "step": 29540 }, { "epoch": 1.7607581356538324, "grad_norm": 3.4368479251861572, "learning_rate": 3.5593763438328485e-06, "loss": 1.0804, "step": 29542 }, { "epoch": 1.7608773393729884, "grad_norm": 3.349189043045044, "learning_rate": 3.5558729074324283e-06, "loss": 1.1481, "step": 29544 }, { "epoch": 1.7609965430921446, "grad_norm": 3.126959800720215, "learning_rate": 3.5523711324973396e-06, "loss": 1.0997, "step": 29546 }, { "epoch": 1.7611157468113006, "grad_norm": 3.116076707839966, "learning_rate": 3.548871019152855e-06, "loss": 0.9985, "step": 29548 }, { "epoch": 1.7612349505304565, "grad_norm": 3.4727389812469482, "learning_rate": 3.5453725675241856e-06, "loss": 1.1796, "step": 29550 }, { "epoch": 1.7613541542496125, "grad_norm": 3.1297109127044678, "learning_rate": 3.5418757777364808e-06, "loss": 0.9749, "step": 29552 }, { "epoch": 1.7614733579687685, "grad_norm": 2.7237396240234375, "learning_rate": 3.5383806499148353e-06, "loss": 1.1892, "step": 29554 }, { "epoch": 1.7615925616879247, "grad_norm": 3.2990076541900635, "learning_rate": 3.534887184184271e-06, "loss": 1.0669, "step": 29556 }, { "epoch": 1.7617117654070809, "grad_norm": 3.2377140522003174, "learning_rate": 3.5313953806697887e-06, "loss": 1.247, "step": 29558 }, { "epoch": 1.7618309691262368, "grad_norm": 2.943490982055664, "learning_rate": 3.5279052394962654e-06, "loss": 1.1396, "step": 29560 }, { "epoch": 1.7619501728453928, "grad_norm": 3.4423086643218994, "learning_rate": 3.5244167607885846e-06, "loss": 1.1731, "step": 29562 }, { "epoch": 1.7620693765645488, "grad_norm": 2.9016799926757812, "learning_rate": 3.5209299446715305e-06, "loss": 1.0498, "step": 29564 }, { "epoch": 1.7621885802837047, "grad_norm": 3.0795202255249023, "learning_rate": 3.5174447912698363e-06, "loss": 0.9871, "step": 29566 }, { "epoch": 1.762307784002861, "grad_norm": 3.099052667617798, "learning_rate": 3.513961300708185e-06, "loss": 1.115, "step": 29568 }, { "epoch": 1.762426987722017, "grad_norm": 3.113535165786743, "learning_rate": 3.5104794731111833e-06, "loss": 1.2273, "step": 29570 }, { "epoch": 1.762546191441173, "grad_norm": 3.0541133880615234, "learning_rate": 3.506999308603398e-06, "loss": 1.1181, "step": 29572 }, { "epoch": 1.762665395160329, "grad_norm": 3.54394793510437, "learning_rate": 3.5035208073093184e-06, "loss": 1.117, "step": 29574 }, { "epoch": 1.762784598879485, "grad_norm": 3.5509047508239746, "learning_rate": 3.500043969353384e-06, "loss": 1.2267, "step": 29576 }, { "epoch": 1.762903802598641, "grad_norm": 3.3546316623687744, "learning_rate": 3.496568794859967e-06, "loss": 1.1284, "step": 29578 }, { "epoch": 1.763023006317797, "grad_norm": 3.3421900272369385, "learning_rate": 3.4930952839534127e-06, "loss": 1.2289, "step": 29580 }, { "epoch": 1.7631422100369532, "grad_norm": 3.2497708797454834, "learning_rate": 3.489623436757955e-06, "loss": 1.0222, "step": 29582 }, { "epoch": 1.7632614137561093, "grad_norm": 3.025139093399048, "learning_rate": 3.486153253397789e-06, "loss": 1.0729, "step": 29584 }, { "epoch": 1.7633806174752653, "grad_norm": 2.8889060020446777, "learning_rate": 3.4826847339970824e-06, "loss": 1.0279, "step": 29586 }, { "epoch": 1.7634998211944213, "grad_norm": 3.2889010906219482, "learning_rate": 3.4792178786798857e-06, "loss": 1.0009, "step": 29588 }, { "epoch": 1.7636190249135772, "grad_norm": 3.488553047180176, "learning_rate": 3.475752687570244e-06, "loss": 1.1348, "step": 29590 }, { "epoch": 1.7637382286327332, "grad_norm": 3.480205774307251, "learning_rate": 3.472289160792108e-06, "loss": 1.0613, "step": 29592 }, { "epoch": 1.7638574323518894, "grad_norm": 3.255185127258301, "learning_rate": 3.4688272984693793e-06, "loss": 1.0856, "step": 29594 }, { "epoch": 1.7639766360710454, "grad_norm": 3.3057844638824463, "learning_rate": 3.465367100725908e-06, "loss": 1.1355, "step": 29596 }, { "epoch": 1.7640958397902016, "grad_norm": 3.5262434482574463, "learning_rate": 3.461908567685468e-06, "loss": 1.0715, "step": 29598 }, { "epoch": 1.7642150435093575, "grad_norm": 3.741595983505249, "learning_rate": 3.4584516994717874e-06, "loss": 1.1445, "step": 29600 }, { "epoch": 1.7643342472285135, "grad_norm": 3.1165127754211426, "learning_rate": 3.4549964962085234e-06, "loss": 1.0711, "step": 29602 }, { "epoch": 1.7644534509476695, "grad_norm": 2.9694106578826904, "learning_rate": 3.451542958019305e-06, "loss": 1.1308, "step": 29604 }, { "epoch": 1.7645726546668254, "grad_norm": 3.375673770904541, "learning_rate": 3.448091085027638e-06, "loss": 1.1128, "step": 29606 }, { "epoch": 1.7646918583859816, "grad_norm": 3.6063356399536133, "learning_rate": 3.444640877357036e-06, "loss": 1.0111, "step": 29608 }, { "epoch": 1.7648110621051378, "grad_norm": 3.1289913654327393, "learning_rate": 3.4411923351309217e-06, "loss": 1.0776, "step": 29610 }, { "epoch": 1.7649302658242938, "grad_norm": 3.085893154144287, "learning_rate": 3.437745458472652e-06, "loss": 1.2395, "step": 29612 }, { "epoch": 1.7650494695434498, "grad_norm": 3.0351903438568115, "learning_rate": 3.4343002475055453e-06, "loss": 0.8599, "step": 29614 }, { "epoch": 1.7651686732626057, "grad_norm": 3.299424171447754, "learning_rate": 3.430856702352836e-06, "loss": 1.1574, "step": 29616 }, { "epoch": 1.7652878769817617, "grad_norm": 3.6006948947906494, "learning_rate": 3.4274148231377145e-06, "loss": 1.214, "step": 29618 }, { "epoch": 1.765407080700918, "grad_norm": 3.0364761352539062, "learning_rate": 3.4239746099833102e-06, "loss": 0.9786, "step": 29620 }, { "epoch": 1.7655262844200739, "grad_norm": 3.014707326889038, "learning_rate": 3.4205360630127027e-06, "loss": 1.0189, "step": 29622 }, { "epoch": 1.76564548813923, "grad_norm": 2.8399674892425537, "learning_rate": 3.4170991823488764e-06, "loss": 1.1673, "step": 29624 }, { "epoch": 1.765764691858386, "grad_norm": 3.1710519790649414, "learning_rate": 3.4136639681148053e-06, "loss": 1.1178, "step": 29626 }, { "epoch": 1.765883895577542, "grad_norm": 3.4791600704193115, "learning_rate": 3.410230420433369e-06, "loss": 1.121, "step": 29628 }, { "epoch": 1.766003099296698, "grad_norm": 3.2055444717407227, "learning_rate": 3.406798539427386e-06, "loss": 1.0491, "step": 29630 }, { "epoch": 1.766122303015854, "grad_norm": 2.755749464035034, "learning_rate": 3.4033683252196412e-06, "loss": 0.9943, "step": 29632 }, { "epoch": 1.7662415067350101, "grad_norm": 3.2704033851623535, "learning_rate": 3.399939777932837e-06, "loss": 1.2657, "step": 29634 }, { "epoch": 1.7663607104541663, "grad_norm": 3.4902753829956055, "learning_rate": 3.3965128976896298e-06, "loss": 1.2581, "step": 29636 }, { "epoch": 1.7664799141733223, "grad_norm": 3.459101676940918, "learning_rate": 3.3930876846126115e-06, "loss": 1.1427, "step": 29638 }, { "epoch": 1.7665991178924783, "grad_norm": 3.495558023452759, "learning_rate": 3.3896641388243054e-06, "loss": 1.0604, "step": 29640 }, { "epoch": 1.7667183216116342, "grad_norm": 2.9604032039642334, "learning_rate": 3.386242260447192e-06, "loss": 1.0612, "step": 29642 }, { "epoch": 1.7668375253307902, "grad_norm": 3.3012197017669678, "learning_rate": 3.382822049603679e-06, "loss": 1.1626, "step": 29644 }, { "epoch": 1.7669567290499464, "grad_norm": 3.313023090362549, "learning_rate": 3.379403506416123e-06, "loss": 1.0257, "step": 29646 }, { "epoch": 1.7670759327691024, "grad_norm": 3.2842791080474854, "learning_rate": 3.3759866310068e-06, "loss": 1.1905, "step": 29648 }, { "epoch": 1.7671951364882585, "grad_norm": 3.084564685821533, "learning_rate": 3.3725714234979778e-06, "loss": 1.0795, "step": 29650 }, { "epoch": 1.7673143402074145, "grad_norm": 3.3371334075927734, "learning_rate": 3.3691578840117975e-06, "loss": 1.0679, "step": 29652 }, { "epoch": 1.7674335439265705, "grad_norm": 2.7234275341033936, "learning_rate": 3.3657460126703845e-06, "loss": 0.975, "step": 29654 }, { "epoch": 1.7675527476457265, "grad_norm": 2.898693084716797, "learning_rate": 3.3623358095958015e-06, "loss": 1.0809, "step": 29656 }, { "epoch": 1.7676719513648824, "grad_norm": 3.3770008087158203, "learning_rate": 3.358927274910034e-06, "loss": 0.9953, "step": 29658 }, { "epoch": 1.7677911550840386, "grad_norm": 2.802788019180298, "learning_rate": 3.355520408735019e-06, "loss": 0.9809, "step": 29660 }, { "epoch": 1.7679103588031948, "grad_norm": 3.18497371673584, "learning_rate": 3.3521152111926245e-06, "loss": 0.9989, "step": 29662 }, { "epoch": 1.7680295625223508, "grad_norm": 2.8339016437530518, "learning_rate": 3.3487116824046815e-06, "loss": 1.0901, "step": 29664 }, { "epoch": 1.7681487662415067, "grad_norm": 3.4126768112182617, "learning_rate": 3.345309822492926e-06, "loss": 1.1163, "step": 29666 }, { "epoch": 1.7682679699606627, "grad_norm": 3.071173906326294, "learning_rate": 3.341909631579082e-06, "loss": 1.1408, "step": 29668 }, { "epoch": 1.7683871736798187, "grad_norm": 3.1798388957977295, "learning_rate": 3.3385111097847534e-06, "loss": 1.1732, "step": 29670 }, { "epoch": 1.7685063773989749, "grad_norm": 2.868868112564087, "learning_rate": 3.3351142572315365e-06, "loss": 1.0014, "step": 29672 }, { "epoch": 1.7686255811181308, "grad_norm": 3.163586139678955, "learning_rate": 3.3317190740409623e-06, "loss": 1.0845, "step": 29674 }, { "epoch": 1.768744784837287, "grad_norm": 2.9268198013305664, "learning_rate": 3.328325560334444e-06, "loss": 1.1225, "step": 29676 }, { "epoch": 1.768863988556443, "grad_norm": 3.2456226348876953, "learning_rate": 3.3249337162334195e-06, "loss": 1.054, "step": 29678 }, { "epoch": 1.768983192275599, "grad_norm": 3.0112762451171875, "learning_rate": 3.321543541859212e-06, "loss": 1.0448, "step": 29680 }, { "epoch": 1.769102395994755, "grad_norm": 4.011288642883301, "learning_rate": 3.318155037333104e-06, "loss": 0.9892, "step": 29682 }, { "epoch": 1.769221599713911, "grad_norm": 3.021012306213379, "learning_rate": 3.3147682027762973e-06, "loss": 0.9535, "step": 29684 }, { "epoch": 1.769340803433067, "grad_norm": 3.190258741378784, "learning_rate": 3.3113830383099843e-06, "loss": 1.0464, "step": 29686 }, { "epoch": 1.7694600071522233, "grad_norm": 2.912043809890747, "learning_rate": 3.3079995440552346e-06, "loss": 1.0586, "step": 29688 }, { "epoch": 1.7695792108713793, "grad_norm": 2.9980034828186035, "learning_rate": 3.3046177201330854e-06, "loss": 1.0526, "step": 29690 }, { "epoch": 1.7696984145905352, "grad_norm": 3.5119965076446533, "learning_rate": 3.3012375666645447e-06, "loss": 1.0861, "step": 29692 }, { "epoch": 1.7698176183096912, "grad_norm": 3.081613540649414, "learning_rate": 3.2978590837704994e-06, "loss": 1.0901, "step": 29694 }, { "epoch": 1.7699368220288472, "grad_norm": 3.0500142574310303, "learning_rate": 3.2944822715718304e-06, "loss": 1.1448, "step": 29696 }, { "epoch": 1.7700560257480034, "grad_norm": 3.436091661453247, "learning_rate": 3.2911071301893305e-06, "loss": 1.0928, "step": 29698 }, { "epoch": 1.7701752294671593, "grad_norm": 3.4365034103393555, "learning_rate": 3.287733659743741e-06, "loss": 1.2189, "step": 29700 }, { "epoch": 1.7702944331863155, "grad_norm": 3.5057852268218994, "learning_rate": 3.2843618603557445e-06, "loss": 1.1227, "step": 29702 }, { "epoch": 1.7704136369054715, "grad_norm": 3.307354211807251, "learning_rate": 3.2809917321459604e-06, "loss": 1.1552, "step": 29704 }, { "epoch": 1.7705328406246275, "grad_norm": 3.3836722373962402, "learning_rate": 3.2776232752349534e-06, "loss": 1.0637, "step": 29706 }, { "epoch": 1.7706520443437834, "grad_norm": 3.2938473224639893, "learning_rate": 3.274256489743216e-06, "loss": 1.0013, "step": 29708 }, { "epoch": 1.7707712480629396, "grad_norm": 3.1807239055633545, "learning_rate": 3.270891375791196e-06, "loss": 1.2104, "step": 29710 }, { "epoch": 1.7708904517820956, "grad_norm": 3.5107786655426025, "learning_rate": 3.267527933499265e-06, "loss": 1.0915, "step": 29712 }, { "epoch": 1.7710096555012518, "grad_norm": 3.34989333152771, "learning_rate": 3.264166162987764e-06, "loss": 1.1553, "step": 29714 }, { "epoch": 1.7711288592204077, "grad_norm": 3.606712818145752, "learning_rate": 3.260806064376942e-06, "loss": 1.0571, "step": 29716 }, { "epoch": 1.7712480629395637, "grad_norm": 3.3214542865753174, "learning_rate": 3.2574476377869976e-06, "loss": 1.174, "step": 29718 }, { "epoch": 1.7713672666587197, "grad_norm": 3.5650408267974854, "learning_rate": 3.2540908833380847e-06, "loss": 1.2004, "step": 29720 }, { "epoch": 1.7714864703778757, "grad_norm": 3.565420389175415, "learning_rate": 3.2507358011502787e-06, "loss": 1.212, "step": 29722 }, { "epoch": 1.7716056740970318, "grad_norm": 3.4840939044952393, "learning_rate": 3.247382391343601e-06, "loss": 1.0156, "step": 29724 }, { "epoch": 1.7717248778161878, "grad_norm": 3.2839255332946777, "learning_rate": 3.244030654038022e-06, "loss": 1.1781, "step": 29726 }, { "epoch": 1.771844081535344, "grad_norm": 3.4038023948669434, "learning_rate": 3.2406805893534397e-06, "loss": 1.1017, "step": 29728 }, { "epoch": 1.7719632852545, "grad_norm": 3.043518543243408, "learning_rate": 3.2373321974096926e-06, "loss": 1.0678, "step": 29730 }, { "epoch": 1.772082488973656, "grad_norm": 2.9762558937072754, "learning_rate": 3.2339854783265845e-06, "loss": 1.0836, "step": 29732 }, { "epoch": 1.772201692692812, "grad_norm": 3.3193225860595703, "learning_rate": 3.230640432223814e-06, "loss": 1.193, "step": 29734 }, { "epoch": 1.772320896411968, "grad_norm": 3.3412106037139893, "learning_rate": 3.227297059221046e-06, "loss": 1.0713, "step": 29736 }, { "epoch": 1.772440100131124, "grad_norm": 3.154402017593384, "learning_rate": 3.2239553594379134e-06, "loss": 1.0801, "step": 29738 }, { "epoch": 1.7725593038502803, "grad_norm": 3.257814645767212, "learning_rate": 3.2206153329939206e-06, "loss": 1.1669, "step": 29740 }, { "epoch": 1.7726785075694362, "grad_norm": 3.0953423976898193, "learning_rate": 3.2172769800085767e-06, "loss": 0.9419, "step": 29742 }, { "epoch": 1.7727977112885922, "grad_norm": 3.4275014400482178, "learning_rate": 3.2139403006013035e-06, "loss": 1.234, "step": 29744 }, { "epoch": 1.7729169150077482, "grad_norm": 3.3177902698516846, "learning_rate": 3.2106052948914668e-06, "loss": 1.026, "step": 29746 }, { "epoch": 1.7730361187269041, "grad_norm": 3.1755850315093994, "learning_rate": 3.2072719629983595e-06, "loss": 1.2022, "step": 29748 }, { "epoch": 1.7731553224460603, "grad_norm": 3.425302743911743, "learning_rate": 3.203940305041242e-06, "loss": 1.186, "step": 29750 }, { "epoch": 1.7732745261652163, "grad_norm": 3.1267404556274414, "learning_rate": 3.2006103211392856e-06, "loss": 0.9938, "step": 29752 }, { "epoch": 1.7733937298843725, "grad_norm": 3.2703769207000732, "learning_rate": 3.1972820114116118e-06, "loss": 0.9618, "step": 29754 }, { "epoch": 1.7735129336035285, "grad_norm": 3.237518310546875, "learning_rate": 3.193955375977309e-06, "loss": 1.1049, "step": 29756 }, { "epoch": 1.7736321373226844, "grad_norm": 3.3657586574554443, "learning_rate": 3.190630414955353e-06, "loss": 1.1712, "step": 29758 }, { "epoch": 1.7737513410418404, "grad_norm": 3.4892849922180176, "learning_rate": 3.187307128464717e-06, "loss": 1.112, "step": 29760 }, { "epoch": 1.7738705447609966, "grad_norm": 2.9511570930480957, "learning_rate": 3.183985516624277e-06, "loss": 1.1286, "step": 29762 }, { "epoch": 1.7739897484801526, "grad_norm": 2.8112008571624756, "learning_rate": 3.1806655795528386e-06, "loss": 1.018, "step": 29764 }, { "epoch": 1.7741089521993088, "grad_norm": 3.0452661514282227, "learning_rate": 3.177347317369189e-06, "loss": 1.1136, "step": 29766 }, { "epoch": 1.7742281559184647, "grad_norm": 3.1684672832489014, "learning_rate": 3.174030730192029e-06, "loss": 1.1644, "step": 29768 }, { "epoch": 1.7743473596376207, "grad_norm": 3.052612781524658, "learning_rate": 3.1707158181400022e-06, "loss": 1.1024, "step": 29770 }, { "epoch": 1.7744665633567767, "grad_norm": 3.198197364807129, "learning_rate": 3.1674025813316967e-06, "loss": 0.9727, "step": 29772 }, { "epoch": 1.7745857670759326, "grad_norm": 3.1215426921844482, "learning_rate": 3.1640910198856344e-06, "loss": 1.0718, "step": 29774 }, { "epoch": 1.7747049707950888, "grad_norm": 3.491765022277832, "learning_rate": 3.1607811339202762e-06, "loss": 1.1113, "step": 29776 }, { "epoch": 1.7748241745142448, "grad_norm": 3.5061748027801514, "learning_rate": 3.1574729235540557e-06, "loss": 1.171, "step": 29778 }, { "epoch": 1.774943378233401, "grad_norm": 3.150937795639038, "learning_rate": 3.1541663889052888e-06, "loss": 1.0672, "step": 29780 }, { "epoch": 1.775062581952557, "grad_norm": 3.4260284900665283, "learning_rate": 3.150861530092264e-06, "loss": 1.1875, "step": 29782 }, { "epoch": 1.775181785671713, "grad_norm": 2.835578203201294, "learning_rate": 3.147558347233226e-06, "loss": 0.9855, "step": 29784 }, { "epoch": 1.7753009893908689, "grad_norm": 3.2358365058898926, "learning_rate": 3.1442568404463193e-06, "loss": 0.9422, "step": 29786 }, { "epoch": 1.775420193110025, "grad_norm": 3.673415184020996, "learning_rate": 3.1409570098496654e-06, "loss": 1.1974, "step": 29788 }, { "epoch": 1.775539396829181, "grad_norm": 3.0449070930480957, "learning_rate": 3.1376588555613097e-06, "loss": 1.0652, "step": 29790 }, { "epoch": 1.7756586005483372, "grad_norm": 2.9982147216796875, "learning_rate": 3.1343623776992293e-06, "loss": 1.1741, "step": 29792 }, { "epoch": 1.7757778042674932, "grad_norm": 3.4397225379943848, "learning_rate": 3.131067576381358e-06, "loss": 1.158, "step": 29794 }, { "epoch": 1.7758970079866492, "grad_norm": 3.3342578411102295, "learning_rate": 3.1277744517255626e-06, "loss": 1.1758, "step": 29796 }, { "epoch": 1.7760162117058051, "grad_norm": 3.069732904434204, "learning_rate": 3.1244830038496433e-06, "loss": 0.9451, "step": 29798 }, { "epoch": 1.7761354154249611, "grad_norm": 3.5101280212402344, "learning_rate": 3.1211932328713444e-06, "loss": 1.033, "step": 29800 }, { "epoch": 1.7762546191441173, "grad_norm": 3.4539506435394287, "learning_rate": 3.117905138908367e-06, "loss": 1.0546, "step": 29802 }, { "epoch": 1.7763738228632735, "grad_norm": 2.8248465061187744, "learning_rate": 3.114618722078322e-06, "loss": 1.1554, "step": 29804 }, { "epoch": 1.7764930265824295, "grad_norm": 3.4599649906158447, "learning_rate": 3.111333982498782e-06, "loss": 1.1, "step": 29806 }, { "epoch": 1.7766122303015854, "grad_norm": 3.206862449645996, "learning_rate": 3.1080509202872642e-06, "loss": 1.0588, "step": 29808 }, { "epoch": 1.7767314340207414, "grad_norm": 3.2852611541748047, "learning_rate": 3.104769535561186e-06, "loss": 1.0766, "step": 29810 }, { "epoch": 1.7768506377398974, "grad_norm": 3.255115270614624, "learning_rate": 3.1014898284379646e-06, "loss": 1.048, "step": 29812 }, { "epoch": 1.7769698414590536, "grad_norm": 3.312781810760498, "learning_rate": 3.0982117990349113e-06, "loss": 1.0714, "step": 29814 }, { "epoch": 1.7770890451782095, "grad_norm": 3.0359723567962646, "learning_rate": 3.0949354474692937e-06, "loss": 1.0841, "step": 29816 }, { "epoch": 1.7772082488973657, "grad_norm": 3.2428641319274902, "learning_rate": 3.0916607738583123e-06, "loss": 1.1284, "step": 29818 }, { "epoch": 1.7773274526165217, "grad_norm": 3.4158551692962646, "learning_rate": 3.088387778319135e-06, "loss": 1.2259, "step": 29820 }, { "epoch": 1.7774466563356777, "grad_norm": 3.775804042816162, "learning_rate": 3.085116460968823e-06, "loss": 1.1118, "step": 29822 }, { "epoch": 1.7775658600548336, "grad_norm": 3.1966941356658936, "learning_rate": 3.0818468219244113e-06, "loss": 1.231, "step": 29824 }, { "epoch": 1.7776850637739896, "grad_norm": 3.0838212966918945, "learning_rate": 3.0785788613028777e-06, "loss": 0.9735, "step": 29826 }, { "epoch": 1.7778042674931458, "grad_norm": 3.5169589519500732, "learning_rate": 3.075312579221107e-06, "loss": 1.0383, "step": 29828 }, { "epoch": 1.777923471212302, "grad_norm": 3.1367039680480957, "learning_rate": 3.0720479757959607e-06, "loss": 0.941, "step": 29830 }, { "epoch": 1.778042674931458, "grad_norm": 3.334423065185547, "learning_rate": 3.0687850511442186e-06, "loss": 1.0008, "step": 29832 }, { "epoch": 1.778161878650614, "grad_norm": 3.515861749649048, "learning_rate": 3.065523805382614e-06, "loss": 1.0452, "step": 29834 }, { "epoch": 1.77828108236977, "grad_norm": 3.679441213607788, "learning_rate": 3.0622642386278043e-06, "loss": 1.1528, "step": 29836 }, { "epoch": 1.7784002860889259, "grad_norm": 3.2092602252960205, "learning_rate": 3.059006350996402e-06, "loss": 1.1764, "step": 29838 }, { "epoch": 1.778519489808082, "grad_norm": 3.4149787425994873, "learning_rate": 3.055750142604946e-06, "loss": 1.0723, "step": 29840 }, { "epoch": 1.778638693527238, "grad_norm": 3.3954262733459473, "learning_rate": 3.0524956135699224e-06, "loss": 1.0449, "step": 29842 }, { "epoch": 1.7787578972463942, "grad_norm": 3.521121025085449, "learning_rate": 3.049242764007765e-06, "loss": 1.0486, "step": 29844 }, { "epoch": 1.7788771009655502, "grad_norm": 3.491706132888794, "learning_rate": 3.045991594034825e-06, "loss": 1.1365, "step": 29846 }, { "epoch": 1.7789963046847062, "grad_norm": 3.684702157974243, "learning_rate": 3.042742103767432e-06, "loss": 1.1382, "step": 29848 }, { "epoch": 1.7791155084038621, "grad_norm": 2.972785711288452, "learning_rate": 3.039494293321804e-06, "loss": 1.103, "step": 29850 }, { "epoch": 1.779234712123018, "grad_norm": 3.52500581741333, "learning_rate": 3.0362481628141425e-06, "loss": 1.1568, "step": 29852 }, { "epoch": 1.7793539158421743, "grad_norm": 3.4192302227020264, "learning_rate": 3.0330037123605713e-06, "loss": 1.1132, "step": 29854 }, { "epoch": 1.7794731195613305, "grad_norm": 3.2465708255767822, "learning_rate": 3.0297609420771534e-06, "loss": 1.0604, "step": 29856 }, { "epoch": 1.7795923232804864, "grad_norm": 3.521993637084961, "learning_rate": 3.0265198520798898e-06, "loss": 1.1483, "step": 29858 }, { "epoch": 1.7797115269996424, "grad_norm": 2.754331111907959, "learning_rate": 3.0232804424847327e-06, "loss": 1.1012, "step": 29860 }, { "epoch": 1.7798307307187984, "grad_norm": 3.0976521968841553, "learning_rate": 3.0200427134075615e-06, "loss": 1.0074, "step": 29862 }, { "epoch": 1.7799499344379544, "grad_norm": 2.8566009998321533, "learning_rate": 3.0168066649641946e-06, "loss": 1.2032, "step": 29864 }, { "epoch": 1.7800691381571105, "grad_norm": 3.114694356918335, "learning_rate": 3.013572297270423e-06, "loss": 1.1598, "step": 29866 }, { "epoch": 1.7801883418762665, "grad_norm": 3.20188307762146, "learning_rate": 3.01033961044192e-06, "loss": 1.1055, "step": 29868 }, { "epoch": 1.7803075455954227, "grad_norm": 3.3166277408599854, "learning_rate": 3.0071086045943387e-06, "loss": 1.014, "step": 29870 }, { "epoch": 1.7804267493145787, "grad_norm": 3.4873464107513428, "learning_rate": 3.0038792798432857e-06, "loss": 1.0546, "step": 29872 }, { "epoch": 1.7805459530337346, "grad_norm": 3.43416166305542, "learning_rate": 3.000651636304247e-06, "loss": 1.0992, "step": 29874 }, { "epoch": 1.7806651567528906, "grad_norm": 2.9106106758117676, "learning_rate": 2.9974256740927133e-06, "loss": 1.1148, "step": 29876 }, { "epoch": 1.7807843604720466, "grad_norm": 3.2512402534484863, "learning_rate": 2.994201393324081e-06, "loss": 1.118, "step": 29878 }, { "epoch": 1.7809035641912028, "grad_norm": 3.404264211654663, "learning_rate": 2.9909787941136915e-06, "loss": 1.2543, "step": 29880 }, { "epoch": 1.781022767910359, "grad_norm": 2.9526147842407227, "learning_rate": 2.9877578765768355e-06, "loss": 1.1625, "step": 29882 }, { "epoch": 1.781141971629515, "grad_norm": 3.4524054527282715, "learning_rate": 2.984538640828727e-06, "loss": 1.1136, "step": 29884 }, { "epoch": 1.781261175348671, "grad_norm": 3.6115963459014893, "learning_rate": 2.9813210869845286e-06, "loss": 1.0521, "step": 29886 }, { "epoch": 1.7813803790678269, "grad_norm": 3.091663360595703, "learning_rate": 2.9781052151593492e-06, "loss": 1.1252, "step": 29888 }, { "epoch": 1.7814995827869828, "grad_norm": 3.303701639175415, "learning_rate": 2.9748910254682407e-06, "loss": 1.0407, "step": 29890 }, { "epoch": 1.781618786506139, "grad_norm": 3.408094882965088, "learning_rate": 2.9716785180261607e-06, "loss": 1.0411, "step": 29892 }, { "epoch": 1.781737990225295, "grad_norm": 3.286072015762329, "learning_rate": 2.9684676929480515e-06, "loss": 1.053, "step": 29894 }, { "epoch": 1.7818571939444512, "grad_norm": 3.200241804122925, "learning_rate": 2.9652585503487705e-06, "loss": 1.1127, "step": 29896 }, { "epoch": 1.7819763976636072, "grad_norm": 3.467985153198242, "learning_rate": 2.962051090343121e-06, "loss": 1.1657, "step": 29898 }, { "epoch": 1.7820956013827631, "grad_norm": 3.2868239879608154, "learning_rate": 2.9588453130458383e-06, "loss": 1.1019, "step": 29900 }, { "epoch": 1.782214805101919, "grad_norm": 2.907303810119629, "learning_rate": 2.9556412185716087e-06, "loss": 1.1737, "step": 29902 }, { "epoch": 1.782334008821075, "grad_norm": 3.080015182495117, "learning_rate": 2.952438807035057e-06, "loss": 1.1471, "step": 29904 }, { "epoch": 1.7824532125402313, "grad_norm": 3.1581554412841797, "learning_rate": 2.949238078550737e-06, "loss": 1.144, "step": 29906 }, { "epoch": 1.7825724162593874, "grad_norm": 3.3811094760894775, "learning_rate": 2.9460390332331554e-06, "loss": 1.1015, "step": 29908 }, { "epoch": 1.7826916199785434, "grad_norm": 3.07493257522583, "learning_rate": 2.9428416711967442e-06, "loss": 1.098, "step": 29910 }, { "epoch": 1.7828108236976994, "grad_norm": 2.654339075088501, "learning_rate": 2.939645992555906e-06, "loss": 0.8932, "step": 29912 }, { "epoch": 1.7829300274168554, "grad_norm": 3.06768536567688, "learning_rate": 2.9364519974249382e-06, "loss": 1.2032, "step": 29914 }, { "epoch": 1.7830492311360113, "grad_norm": 3.5300920009613037, "learning_rate": 2.9332596859180996e-06, "loss": 1.3273, "step": 29916 }, { "epoch": 1.7831684348551675, "grad_norm": 3.2587716579437256, "learning_rate": 2.9300690581496037e-06, "loss": 1.1591, "step": 29918 }, { "epoch": 1.7832876385743235, "grad_norm": 3.3450629711151123, "learning_rate": 2.9268801142335934e-06, "loss": 1.0752, "step": 29920 }, { "epoch": 1.7834068422934797, "grad_norm": 3.3769690990448, "learning_rate": 2.9236928542841323e-06, "loss": 1.1097, "step": 29922 }, { "epoch": 1.7835260460126356, "grad_norm": 3.330554962158203, "learning_rate": 2.920507278415252e-06, "loss": 1.1191, "step": 29924 }, { "epoch": 1.7836452497317916, "grad_norm": 3.0939781665802, "learning_rate": 2.9173233867409056e-06, "loss": 1.2021, "step": 29926 }, { "epoch": 1.7837644534509476, "grad_norm": 3.5666377544403076, "learning_rate": 2.914141179374996e-06, "loss": 1.0506, "step": 29928 }, { "epoch": 1.7838836571701036, "grad_norm": 3.164682388305664, "learning_rate": 2.9109606564313606e-06, "loss": 1.0787, "step": 29930 }, { "epoch": 1.7840028608892597, "grad_norm": 3.3332197666168213, "learning_rate": 2.9077818180237693e-06, "loss": 1.0364, "step": 29932 }, { "epoch": 1.784122064608416, "grad_norm": 3.5230164527893066, "learning_rate": 2.9046046642659418e-06, "loss": 1.121, "step": 29934 }, { "epoch": 1.784241268327572, "grad_norm": 2.9676406383514404, "learning_rate": 2.90142919527156e-06, "loss": 1.0433, "step": 29936 }, { "epoch": 1.7843604720467279, "grad_norm": 3.037281036376953, "learning_rate": 2.8982554111541824e-06, "loss": 1.069, "step": 29938 }, { "epoch": 1.7844796757658838, "grad_norm": 3.4853172302246094, "learning_rate": 2.8950833120273745e-06, "loss": 1.1006, "step": 29940 }, { "epoch": 1.7845988794850398, "grad_norm": 2.7482662200927734, "learning_rate": 2.8919128980046064e-06, "loss": 1.0198, "step": 29942 }, { "epoch": 1.784718083204196, "grad_norm": 2.8424081802368164, "learning_rate": 2.8887441691992865e-06, "loss": 1.1209, "step": 29944 }, { "epoch": 1.784837286923352, "grad_norm": 3.2036426067352295, "learning_rate": 2.885577125724781e-06, "loss": 1.0557, "step": 29946 }, { "epoch": 1.7849564906425082, "grad_norm": 3.398904323577881, "learning_rate": 2.8824117676943817e-06, "loss": 1.0458, "step": 29948 }, { "epoch": 1.7850756943616641, "grad_norm": 3.186354160308838, "learning_rate": 2.879248095221321e-06, "loss": 1.0764, "step": 29950 }, { "epoch": 1.78519489808082, "grad_norm": 3.2595324516296387, "learning_rate": 2.876086108418774e-06, "loss": 1.0805, "step": 29952 }, { "epoch": 1.785314101799976, "grad_norm": 2.706217050552368, "learning_rate": 2.8729258073998734e-06, "loss": 1.0198, "step": 29954 }, { "epoch": 1.785433305519132, "grad_norm": 3.10550594329834, "learning_rate": 2.8697671922776447e-06, "loss": 1.0209, "step": 29956 }, { "epoch": 1.7855525092382882, "grad_norm": 2.8767874240875244, "learning_rate": 2.8666102631651095e-06, "loss": 1.1036, "step": 29958 }, { "epoch": 1.7856717129574444, "grad_norm": 3.2043282985687256, "learning_rate": 2.863455020175193e-06, "loss": 1.0496, "step": 29960 }, { "epoch": 1.7857909166766004, "grad_norm": 2.958627462387085, "learning_rate": 2.860301463420756e-06, "loss": 1.0953, "step": 29962 }, { "epoch": 1.7859101203957564, "grad_norm": 3.0486695766448975, "learning_rate": 2.857149593014624e-06, "loss": 1.0096, "step": 29964 }, { "epoch": 1.7860293241149123, "grad_norm": 3.2437639236450195, "learning_rate": 2.853999409069552e-06, "loss": 1.0805, "step": 29966 }, { "epoch": 1.7861485278340683, "grad_norm": 3.239105701446533, "learning_rate": 2.850850911698233e-06, "loss": 1.0659, "step": 29968 }, { "epoch": 1.7862677315532245, "grad_norm": 3.2426750659942627, "learning_rate": 2.847704101013293e-06, "loss": 0.9564, "step": 29970 }, { "epoch": 1.7863869352723805, "grad_norm": 3.1683692932128906, "learning_rate": 2.844558977127304e-06, "loss": 1.1263, "step": 29972 }, { "epoch": 1.7865061389915367, "grad_norm": 3.2985548973083496, "learning_rate": 2.8414155401527866e-06, "loss": 1.0525, "step": 29974 }, { "epoch": 1.7866253427106926, "grad_norm": 3.3818867206573486, "learning_rate": 2.838273790202184e-06, "loss": 1.3143, "step": 29976 }, { "epoch": 1.7867445464298486, "grad_norm": 3.4011449813842773, "learning_rate": 2.835133727387884e-06, "loss": 1.1067, "step": 29978 }, { "epoch": 1.7868637501490046, "grad_norm": 3.1912646293640137, "learning_rate": 2.8319953518222254e-06, "loss": 1.0572, "step": 29980 }, { "epoch": 1.7869829538681605, "grad_norm": 3.462148666381836, "learning_rate": 2.8288586636174843e-06, "loss": 1.1283, "step": 29982 }, { "epoch": 1.7871021575873167, "grad_norm": 3.5947303771972656, "learning_rate": 2.825723662885854e-06, "loss": 1.279, "step": 29984 }, { "epoch": 1.787221361306473, "grad_norm": 3.1259870529174805, "learning_rate": 2.822590349739501e-06, "loss": 1.3634, "step": 29986 }, { "epoch": 1.7873405650256289, "grad_norm": 3.6294827461242676, "learning_rate": 2.8194587242905023e-06, "loss": 1.0364, "step": 29988 }, { "epoch": 1.7874597687447848, "grad_norm": 3.4625558853149414, "learning_rate": 2.8163287866508958e-06, "loss": 1.1155, "step": 29990 }, { "epoch": 1.7875789724639408, "grad_norm": 3.46576189994812, "learning_rate": 2.813200536932642e-06, "loss": 1.0871, "step": 29992 }, { "epoch": 1.7876981761830968, "grad_norm": 2.8224806785583496, "learning_rate": 2.8100739752476514e-06, "loss": 0.9316, "step": 29994 }, { "epoch": 1.787817379902253, "grad_norm": 3.4518182277679443, "learning_rate": 2.8069491017077785e-06, "loss": 1.0798, "step": 29996 }, { "epoch": 1.787936583621409, "grad_norm": 3.34297251701355, "learning_rate": 2.803825916424796e-06, "loss": 1.1271, "step": 29998 }, { "epoch": 1.7880557873405651, "grad_norm": 3.239827871322632, "learning_rate": 2.8007044195104582e-06, "loss": 1.1101, "step": 30000 }, { "epoch": 1.788174991059721, "grad_norm": 3.20829176902771, "learning_rate": 2.7975846110763925e-06, "loss": 1.1727, "step": 30002 }, { "epoch": 1.788294194778877, "grad_norm": 3.3416075706481934, "learning_rate": 2.794466491234238e-06, "loss": 1.0187, "step": 30004 }, { "epoch": 1.788413398498033, "grad_norm": 2.9793920516967773, "learning_rate": 2.791350060095538e-06, "loss": 1.1077, "step": 30006 }, { "epoch": 1.788532602217189, "grad_norm": 2.6438941955566406, "learning_rate": 2.7882353177717537e-06, "loss": 1.0129, "step": 30008 }, { "epoch": 1.7886518059363452, "grad_norm": 3.24841570854187, "learning_rate": 2.785122264374329e-06, "loss": 0.8828, "step": 30010 }, { "epoch": 1.7887710096555014, "grad_norm": 3.142622470855713, "learning_rate": 2.7820109000146257e-06, "loss": 1.1466, "step": 30012 }, { "epoch": 1.7888902133746574, "grad_norm": 3.3158187866210938, "learning_rate": 2.7789012248039537e-06, "loss": 1.1142, "step": 30014 }, { "epoch": 1.7890094170938133, "grad_norm": 3.22571063041687, "learning_rate": 2.7757932388535356e-06, "loss": 1.3371, "step": 30016 }, { "epoch": 1.7891286208129693, "grad_norm": 3.1186695098876953, "learning_rate": 2.772686942274588e-06, "loss": 1.2262, "step": 30018 }, { "epoch": 1.7892478245321253, "grad_norm": 3.091562032699585, "learning_rate": 2.7695823351782057e-06, "loss": 1.0184, "step": 30020 }, { "epoch": 1.7893670282512815, "grad_norm": 3.171525478363037, "learning_rate": 2.7664794176754494e-06, "loss": 1.2301, "step": 30022 }, { "epoch": 1.7894862319704374, "grad_norm": 3.853041648864746, "learning_rate": 2.7633781898773526e-06, "loss": 1.0986, "step": 30024 }, { "epoch": 1.7896054356895936, "grad_norm": 3.1098148822784424, "learning_rate": 2.760278651894821e-06, "loss": 1.086, "step": 30026 }, { "epoch": 1.7897246394087496, "grad_norm": 2.9946858882904053, "learning_rate": 2.757180803838755e-06, "loss": 1.1453, "step": 30028 }, { "epoch": 1.7898438431279056, "grad_norm": 3.4076147079467773, "learning_rate": 2.7540846458199763e-06, "loss": 1.1788, "step": 30030 }, { "epoch": 1.7899630468470615, "grad_norm": 3.297123670578003, "learning_rate": 2.750990177949236e-06, "loss": 1.087, "step": 30032 }, { "epoch": 1.7900822505662175, "grad_norm": 3.291179895401001, "learning_rate": 2.747897400337235e-06, "loss": 1.1069, "step": 30034 }, { "epoch": 1.7902014542853737, "grad_norm": 3.255711793899536, "learning_rate": 2.7448063130946224e-06, "loss": 1.1381, "step": 30036 }, { "epoch": 1.7903206580045299, "grad_norm": 3.2249491214752197, "learning_rate": 2.7417169163319657e-06, "loss": 1.0941, "step": 30038 }, { "epoch": 1.7904398617236859, "grad_norm": 3.2638745307922363, "learning_rate": 2.738629210159788e-06, "loss": 1.1457, "step": 30040 }, { "epoch": 1.7905590654428418, "grad_norm": 3.399665117263794, "learning_rate": 2.7355431946885447e-06, "loss": 1.0345, "step": 30042 }, { "epoch": 1.7906782691619978, "grad_norm": 3.1689956188201904, "learning_rate": 2.732458870028631e-06, "loss": 1.2885, "step": 30044 }, { "epoch": 1.7907974728811538, "grad_norm": 3.4210360050201416, "learning_rate": 2.7293762362903983e-06, "loss": 1.1454, "step": 30046 }, { "epoch": 1.79091667660031, "grad_norm": 3.1201229095458984, "learning_rate": 2.726295293584108e-06, "loss": 1.1002, "step": 30048 }, { "epoch": 1.791035880319466, "grad_norm": 3.2736496925354004, "learning_rate": 2.723216042019977e-06, "loss": 1.1359, "step": 30050 }, { "epoch": 1.7911550840386221, "grad_norm": 3.4179489612579346, "learning_rate": 2.720138481708162e-06, "loss": 1.0291, "step": 30052 }, { "epoch": 1.791274287757778, "grad_norm": 2.8137292861938477, "learning_rate": 2.7170626127587697e-06, "loss": 0.9871, "step": 30054 }, { "epoch": 1.791393491476934, "grad_norm": 3.010908365249634, "learning_rate": 2.7139884352818166e-06, "loss": 1.0478, "step": 30056 }, { "epoch": 1.79151269519609, "grad_norm": 2.8495419025421143, "learning_rate": 2.7109159493872883e-06, "loss": 1.0003, "step": 30058 }, { "epoch": 1.791631898915246, "grad_norm": 3.203190326690674, "learning_rate": 2.7078451551850904e-06, "loss": 1.0168, "step": 30060 }, { "epoch": 1.7917511026344022, "grad_norm": 3.426880359649658, "learning_rate": 2.7047760527850796e-06, "loss": 1.0735, "step": 30062 }, { "epoch": 1.7918703063535584, "grad_norm": 3.229679584503174, "learning_rate": 2.701708642297057e-06, "loss": 1.1421, "step": 30064 }, { "epoch": 1.7919895100727143, "grad_norm": 3.3328356742858887, "learning_rate": 2.698642923830741e-06, "loss": 1.1733, "step": 30066 }, { "epoch": 1.7921087137918703, "grad_norm": 3.0495247840881348, "learning_rate": 2.6955788974957985e-06, "loss": 1.221, "step": 30068 }, { "epoch": 1.7922279175110263, "grad_norm": 3.914400339126587, "learning_rate": 2.6925165634018647e-06, "loss": 1.256, "step": 30070 }, { "epoch": 1.7923471212301822, "grad_norm": 2.6356630325317383, "learning_rate": 2.689455921658457e-06, "loss": 0.9806, "step": 30072 }, { "epoch": 1.7924663249493384, "grad_norm": 3.2805793285369873, "learning_rate": 2.6863969723750936e-06, "loss": 1.1224, "step": 30074 }, { "epoch": 1.7925855286684944, "grad_norm": 3.217742443084717, "learning_rate": 2.6833397156611926e-06, "loss": 1.0772, "step": 30076 }, { "epoch": 1.7927047323876506, "grad_norm": 3.1386406421661377, "learning_rate": 2.6802841516261168e-06, "loss": 1.1809, "step": 30078 }, { "epoch": 1.7928239361068066, "grad_norm": 3.4051034450531006, "learning_rate": 2.6772302803791837e-06, "loss": 1.0833, "step": 30080 }, { "epoch": 1.7929431398259625, "grad_norm": 3.616246223449707, "learning_rate": 2.674178102029634e-06, "loss": 1.1417, "step": 30082 }, { "epoch": 1.7930623435451185, "grad_norm": 3.458102226257324, "learning_rate": 2.6711276166866637e-06, "loss": 1.0857, "step": 30084 }, { "epoch": 1.7931815472642747, "grad_norm": 3.2142975330352783, "learning_rate": 2.66807882445938e-06, "loss": 1.1208, "step": 30086 }, { "epoch": 1.7933007509834307, "grad_norm": 3.0827741622924805, "learning_rate": 2.6650317254568736e-06, "loss": 1.0299, "step": 30088 }, { "epoch": 1.7934199547025869, "grad_norm": 3.287493944168091, "learning_rate": 2.661986319788129e-06, "loss": 1.214, "step": 30090 }, { "epoch": 1.7935391584217428, "grad_norm": 3.515048027038574, "learning_rate": 2.6589426075620984e-06, "loss": 1.1542, "step": 30092 }, { "epoch": 1.7936583621408988, "grad_norm": 2.8455452919006348, "learning_rate": 2.6559005888876777e-06, "loss": 0.9786, "step": 30094 }, { "epoch": 1.7937775658600548, "grad_norm": 3.3267667293548584, "learning_rate": 2.6528602638736687e-06, "loss": 1.0929, "step": 30096 }, { "epoch": 1.7938967695792107, "grad_norm": 3.5810132026672363, "learning_rate": 2.649821632628846e-06, "loss": 1.0701, "step": 30098 }, { "epoch": 1.794015973298367, "grad_norm": 3.4116878509521484, "learning_rate": 2.6467846952619113e-06, "loss": 1.1321, "step": 30100 }, { "epoch": 1.794135177017523, "grad_norm": 3.0340497493743896, "learning_rate": 2.643749451881505e-06, "loss": 1.0296, "step": 30102 }, { "epoch": 1.794254380736679, "grad_norm": 3.4803354740142822, "learning_rate": 2.6407159025962136e-06, "loss": 1.2543, "step": 30104 }, { "epoch": 1.794373584455835, "grad_norm": 3.611710786819458, "learning_rate": 2.6376840475145492e-06, "loss": 1.0163, "step": 30106 }, { "epoch": 1.794492788174991, "grad_norm": 3.1159980297088623, "learning_rate": 2.63465388674497e-06, "loss": 1.0038, "step": 30108 }, { "epoch": 1.794611991894147, "grad_norm": 3.162907838821411, "learning_rate": 2.631625420395889e-06, "loss": 1.0325, "step": 30110 }, { "epoch": 1.7947311956133032, "grad_norm": 3.530574321746826, "learning_rate": 2.6285986485756308e-06, "loss": 1.0387, "step": 30112 }, { "epoch": 1.7948503993324592, "grad_norm": 3.004822254180908, "learning_rate": 2.62557357139247e-06, "loss": 1.0939, "step": 30114 }, { "epoch": 1.7949696030516153, "grad_norm": 3.3390371799468994, "learning_rate": 2.6225501889546424e-06, "loss": 1.2694, "step": 30116 }, { "epoch": 1.7950888067707713, "grad_norm": 3.2761218547821045, "learning_rate": 2.619528501370294e-06, "loss": 1.064, "step": 30118 }, { "epoch": 1.7952080104899273, "grad_norm": 3.153442859649658, "learning_rate": 2.6165085087475174e-06, "loss": 1.2626, "step": 30120 }, { "epoch": 1.7953272142090833, "grad_norm": 2.8796143531799316, "learning_rate": 2.6134902111943583e-06, "loss": 0.9872, "step": 30122 }, { "epoch": 1.7954464179282392, "grad_norm": 3.298295021057129, "learning_rate": 2.610473608818781e-06, "loss": 1.0375, "step": 30124 }, { "epoch": 1.7955656216473954, "grad_norm": 3.1494574546813965, "learning_rate": 2.60745870172871e-06, "loss": 1.1565, "step": 30126 }, { "epoch": 1.7956848253665514, "grad_norm": 3.3288462162017822, "learning_rate": 2.604445490031987e-06, "loss": 1.0565, "step": 30128 }, { "epoch": 1.7958040290857076, "grad_norm": 3.388293981552124, "learning_rate": 2.6014339738364146e-06, "loss": 1.0461, "step": 30130 }, { "epoch": 1.7959232328048635, "grad_norm": 2.8427646160125732, "learning_rate": 2.5984241532497123e-06, "loss": 1.1434, "step": 30132 }, { "epoch": 1.7960424365240195, "grad_norm": 3.332610845565796, "learning_rate": 2.595416028379577e-06, "loss": 1.097, "step": 30134 }, { "epoch": 1.7961616402431755, "grad_norm": 3.1942007541656494, "learning_rate": 2.5924095993335897e-06, "loss": 1.188, "step": 30136 }, { "epoch": 1.7962808439623317, "grad_norm": 3.477285623550415, "learning_rate": 2.5894048662193195e-06, "loss": 1.1192, "step": 30138 }, { "epoch": 1.7964000476814876, "grad_norm": 3.136789321899414, "learning_rate": 2.5864018291442583e-06, "loss": 1.1372, "step": 30140 }, { "epoch": 1.7965192514006438, "grad_norm": 3.1090118885040283, "learning_rate": 2.583400488215815e-06, "loss": 1.0442, "step": 30142 }, { "epoch": 1.7966384551197998, "grad_norm": 3.2800166606903076, "learning_rate": 2.5804008435413753e-06, "loss": 1.0726, "step": 30144 }, { "epoch": 1.7967576588389558, "grad_norm": 3.3810625076293945, "learning_rate": 2.5774028952282425e-06, "loss": 1.1084, "step": 30146 }, { "epoch": 1.7968768625581117, "grad_norm": 3.1455671787261963, "learning_rate": 2.574406643383659e-06, "loss": 1.0734, "step": 30148 }, { "epoch": 1.7969960662772677, "grad_norm": 2.7930335998535156, "learning_rate": 2.5714120881148105e-06, "loss": 0.9591, "step": 30150 }, { "epoch": 1.797115269996424, "grad_norm": 3.3029890060424805, "learning_rate": 2.5684192295288455e-06, "loss": 1.0183, "step": 30152 }, { "epoch": 1.7972344737155799, "grad_norm": 3.3903133869171143, "learning_rate": 2.5654280677328003e-06, "loss": 1.1007, "step": 30154 }, { "epoch": 1.797353677434736, "grad_norm": 3.2705800533294678, "learning_rate": 2.5624386028336776e-06, "loss": 0.993, "step": 30156 }, { "epoch": 1.797472881153892, "grad_norm": 3.3069849014282227, "learning_rate": 2.559450834938448e-06, "loss": 1.0756, "step": 30158 }, { "epoch": 1.797592084873048, "grad_norm": 3.1456823348999023, "learning_rate": 2.5564647641539653e-06, "loss": 1.0034, "step": 30160 }, { "epoch": 1.797711288592204, "grad_norm": 2.97668194770813, "learning_rate": 2.5534803905870764e-06, "loss": 1.0252, "step": 30162 }, { "epoch": 1.7978304923113602, "grad_norm": 2.9100875854492188, "learning_rate": 2.5504977143445242e-06, "loss": 1.0596, "step": 30164 }, { "epoch": 1.7979496960305161, "grad_norm": 3.19327712059021, "learning_rate": 2.5475167355330175e-06, "loss": 1.1194, "step": 30166 }, { "epoch": 1.7980688997496723, "grad_norm": 3.0285184383392334, "learning_rate": 2.544537454259194e-06, "loss": 1.0896, "step": 30168 }, { "epoch": 1.7981881034688283, "grad_norm": 2.8512401580810547, "learning_rate": 2.5415598706296282e-06, "loss": 1.1031, "step": 30170 }, { "epoch": 1.7983073071879843, "grad_norm": 2.983283281326294, "learning_rate": 2.5385839847508473e-06, "loss": 0.9598, "step": 30172 }, { "epoch": 1.7984265109071402, "grad_norm": 3.3106460571289062, "learning_rate": 2.5356097967293046e-06, "loss": 1.1536, "step": 30174 }, { "epoch": 1.7985457146262962, "grad_norm": 3.378791332244873, "learning_rate": 2.5326373066713983e-06, "loss": 1.0342, "step": 30176 }, { "epoch": 1.7986649183454524, "grad_norm": 3.6647682189941406, "learning_rate": 2.5296665146834543e-06, "loss": 1.1536, "step": 30178 }, { "epoch": 1.7987841220646086, "grad_norm": 3.205148458480835, "learning_rate": 2.526697420871771e-06, "loss": 1.1075, "step": 30180 }, { "epoch": 1.7989033257837646, "grad_norm": 3.25516939163208, "learning_rate": 2.5237300253425355e-06, "loss": 1.1677, "step": 30182 }, { "epoch": 1.7990225295029205, "grad_norm": 3.4486887454986572, "learning_rate": 2.5207643282019245e-06, "loss": 1.1033, "step": 30184 }, { "epoch": 1.7991417332220765, "grad_norm": 3.4378836154937744, "learning_rate": 2.517800329556019e-06, "loss": 1.0539, "step": 30186 }, { "epoch": 1.7992609369412325, "grad_norm": 3.054377555847168, "learning_rate": 2.5148380295108577e-06, "loss": 1.0841, "step": 30188 }, { "epoch": 1.7993801406603886, "grad_norm": 3.4921634197235107, "learning_rate": 2.5118774281724046e-06, "loss": 0.9863, "step": 30190 }, { "epoch": 1.7994993443795446, "grad_norm": 3.395078182220459, "learning_rate": 2.508918525646581e-06, "loss": 1.139, "step": 30192 }, { "epoch": 1.7996185480987008, "grad_norm": 3.4443328380584717, "learning_rate": 2.5059613220392243e-06, "loss": 1.1706, "step": 30194 }, { "epoch": 1.7997377518178568, "grad_norm": 3.271578311920166, "learning_rate": 2.503005817456128e-06, "loss": 1.09, "step": 30196 }, { "epoch": 1.7998569555370127, "grad_norm": 3.0624940395355225, "learning_rate": 2.500052012003029e-06, "loss": 1.0014, "step": 30198 }, { "epoch": 1.7999761592561687, "grad_norm": 2.8711931705474854, "learning_rate": 2.497099905785588e-06, "loss": 1.0177, "step": 30200 }, { "epoch": 1.8000953629753247, "grad_norm": 3.0158441066741943, "learning_rate": 2.494149498909404e-06, "loss": 0.9314, "step": 30202 }, { "epoch": 1.8002145666944809, "grad_norm": 3.492725133895874, "learning_rate": 2.4912007914800474e-06, "loss": 1.2358, "step": 30204 }, { "epoch": 1.800333770413637, "grad_norm": 3.3400650024414062, "learning_rate": 2.488253783602973e-06, "loss": 0.9876, "step": 30206 }, { "epoch": 1.800452974132793, "grad_norm": 2.9679341316223145, "learning_rate": 2.4853084753836298e-06, "loss": 0.9638, "step": 30208 }, { "epoch": 1.800572177851949, "grad_norm": 3.2732114791870117, "learning_rate": 2.4823648669273723e-06, "loss": 1.2128, "step": 30210 }, { "epoch": 1.800691381571105, "grad_norm": 3.3412866592407227, "learning_rate": 2.4794229583395e-06, "loss": 0.9942, "step": 30212 }, { "epoch": 1.800810585290261, "grad_norm": 3.34869647026062, "learning_rate": 2.4764827497252617e-06, "loss": 1.24, "step": 30214 }, { "epoch": 1.8009297890094171, "grad_norm": 3.3390443325042725, "learning_rate": 2.4735442411898345e-06, "loss": 1.0528, "step": 30216 }, { "epoch": 1.801048992728573, "grad_norm": 3.42939829826355, "learning_rate": 2.4706074328383343e-06, "loss": 1.0466, "step": 30218 }, { "epoch": 1.8011681964477293, "grad_norm": 3.1528689861297607, "learning_rate": 2.467672324775827e-06, "loss": 1.0376, "step": 30220 }, { "epoch": 1.8012874001668853, "grad_norm": 3.623518705368042, "learning_rate": 2.4647389171073232e-06, "loss": 1.2222, "step": 30222 }, { "epoch": 1.8014066038860412, "grad_norm": 3.4213521480560303, "learning_rate": 2.4618072099377333e-06, "loss": 1.0492, "step": 30224 }, { "epoch": 1.8015258076051972, "grad_norm": 2.831494092941284, "learning_rate": 2.4588772033719566e-06, "loss": 1.1662, "step": 30226 }, { "epoch": 1.8016450113243532, "grad_norm": 2.9775354862213135, "learning_rate": 2.4559488975148036e-06, "loss": 1.1238, "step": 30228 }, { "epoch": 1.8017642150435094, "grad_norm": 2.975236654281616, "learning_rate": 2.4530222924710245e-06, "loss": 1.2084, "step": 30230 }, { "epoch": 1.8018834187626656, "grad_norm": 3.144845485687256, "learning_rate": 2.4500973883453183e-06, "loss": 1.1798, "step": 30232 }, { "epoch": 1.8020026224818215, "grad_norm": 3.227125883102417, "learning_rate": 2.4471741852423237e-06, "loss": 1.2465, "step": 30234 }, { "epoch": 1.8021218262009775, "grad_norm": 3.462101459503174, "learning_rate": 2.444252683266607e-06, "loss": 1.157, "step": 30236 }, { "epoch": 1.8022410299201335, "grad_norm": 2.8952443599700928, "learning_rate": 2.441332882522679e-06, "loss": 1.0519, "step": 30238 }, { "epoch": 1.8023602336392894, "grad_norm": 3.3770103454589844, "learning_rate": 2.4384147831150006e-06, "loss": 1.0835, "step": 30240 }, { "epoch": 1.8024794373584456, "grad_norm": 3.2072768211364746, "learning_rate": 2.4354983851479442e-06, "loss": 1.1511, "step": 30242 }, { "epoch": 1.8025986410776016, "grad_norm": 3.030134677886963, "learning_rate": 2.432583688725865e-06, "loss": 1.0716, "step": 30244 }, { "epoch": 1.8027178447967578, "grad_norm": 3.0887999534606934, "learning_rate": 2.429670693953012e-06, "loss": 1.0858, "step": 30246 }, { "epoch": 1.8028370485159138, "grad_norm": 3.018550395965576, "learning_rate": 2.426759400933587e-06, "loss": 1.0591, "step": 30248 }, { "epoch": 1.8029562522350697, "grad_norm": 3.390542507171631, "learning_rate": 2.423849809771761e-06, "loss": 1.1525, "step": 30250 }, { "epoch": 1.8030754559542257, "grad_norm": 3.156989574432373, "learning_rate": 2.420941920571601e-06, "loss": 1.098, "step": 30252 }, { "epoch": 1.8031946596733817, "grad_norm": 3.208514451980591, "learning_rate": 2.418035733437146e-06, "loss": 1.069, "step": 30254 }, { "epoch": 1.8033138633925379, "grad_norm": 2.451589584350586, "learning_rate": 2.4151312484723465e-06, "loss": 1.0857, "step": 30256 }, { "epoch": 1.803433067111694, "grad_norm": 3.1377320289611816, "learning_rate": 2.4122284657811132e-06, "loss": 0.9751, "step": 30258 }, { "epoch": 1.80355227083085, "grad_norm": 3.440253496170044, "learning_rate": 2.4093273854672916e-06, "loss": 1.0177, "step": 30260 }, { "epoch": 1.803671474550006, "grad_norm": 3.5425021648406982, "learning_rate": 2.4064280076346534e-06, "loss": 1.1294, "step": 30262 }, { "epoch": 1.803790678269162, "grad_norm": 3.3994200229644775, "learning_rate": 2.403530332386933e-06, "loss": 1.1787, "step": 30264 }, { "epoch": 1.803909881988318, "grad_norm": 3.0552752017974854, "learning_rate": 2.400634359827769e-06, "loss": 1.0741, "step": 30266 }, { "epoch": 1.8040290857074741, "grad_norm": 3.5664806365966797, "learning_rate": 2.3977400900607906e-06, "loss": 1.1589, "step": 30268 }, { "epoch": 1.80414828942663, "grad_norm": 3.193145275115967, "learning_rate": 2.394847523189503e-06, "loss": 1.2481, "step": 30270 }, { "epoch": 1.8042674931457863, "grad_norm": 3.60481333732605, "learning_rate": 2.3919566593174016e-06, "loss": 1.3364, "step": 30272 }, { "epoch": 1.8043866968649422, "grad_norm": 2.7826592922210693, "learning_rate": 2.389067498547909e-06, "loss": 0.8941, "step": 30274 }, { "epoch": 1.8045059005840982, "grad_norm": 3.515939474105835, "learning_rate": 2.386180040984359e-06, "loss": 1.1257, "step": 30276 }, { "epoch": 1.8046251043032542, "grad_norm": 3.485947847366333, "learning_rate": 2.3832942867300644e-06, "loss": 1.0926, "step": 30278 }, { "epoch": 1.8047443080224101, "grad_norm": 3.014463186264038, "learning_rate": 2.380410235888253e-06, "loss": 1.0642, "step": 30280 }, { "epoch": 1.8048635117415663, "grad_norm": 3.221797466278076, "learning_rate": 2.377527888562098e-06, "loss": 1.0005, "step": 30282 }, { "epoch": 1.8049827154607225, "grad_norm": 3.1144821643829346, "learning_rate": 2.3746472448547e-06, "loss": 1.1229, "step": 30284 }, { "epoch": 1.8051019191798785, "grad_norm": 3.165787696838379, "learning_rate": 2.3717683048691262e-06, "loss": 1.0687, "step": 30286 }, { "epoch": 1.8052211228990345, "grad_norm": 3.6967546939849854, "learning_rate": 2.368891068708351e-06, "loss": 1.1563, "step": 30288 }, { "epoch": 1.8053403266181904, "grad_norm": 3.006476640701294, "learning_rate": 2.366015536475319e-06, "loss": 1.0762, "step": 30290 }, { "epoch": 1.8054595303373464, "grad_norm": 3.474865674972534, "learning_rate": 2.3631417082728924e-06, "loss": 1.0452, "step": 30292 }, { "epoch": 1.8055787340565026, "grad_norm": 3.0852510929107666, "learning_rate": 2.3602695842038612e-06, "loss": 1.0695, "step": 30294 }, { "epoch": 1.8056979377756586, "grad_norm": 2.959796190261841, "learning_rate": 2.3573991643709937e-06, "loss": 0.9793, "step": 30296 }, { "epoch": 1.8058171414948148, "grad_norm": 3.12339186668396, "learning_rate": 2.3545304488769624e-06, "loss": 1.0525, "step": 30298 }, { "epoch": 1.8059363452139707, "grad_norm": 3.1948180198669434, "learning_rate": 2.3516634378243974e-06, "loss": 1.0743, "step": 30300 }, { "epoch": 1.8060555489331267, "grad_norm": 3.551778554916382, "learning_rate": 2.3487981313158547e-06, "loss": 1.1158, "step": 30302 }, { "epoch": 1.8061747526522827, "grad_norm": 3.1149699687957764, "learning_rate": 2.3459345294538416e-06, "loss": 1.134, "step": 30304 }, { "epoch": 1.8062939563714386, "grad_norm": 3.1536636352539062, "learning_rate": 2.343072632340798e-06, "loss": 1.063, "step": 30306 }, { "epoch": 1.8064131600905948, "grad_norm": 2.959618091583252, "learning_rate": 2.340212440079098e-06, "loss": 1.1272, "step": 30308 }, { "epoch": 1.806532363809751, "grad_norm": 3.5170507431030273, "learning_rate": 2.3373539527710707e-06, "loss": 1.0871, "step": 30310 }, { "epoch": 1.806651567528907, "grad_norm": 2.9476001262664795, "learning_rate": 2.3344971705189567e-06, "loss": 1.2142, "step": 30312 }, { "epoch": 1.806770771248063, "grad_norm": 3.3524951934814453, "learning_rate": 2.3316420934249848e-06, "loss": 1.14, "step": 30314 }, { "epoch": 1.806889974967219, "grad_norm": 3.2261462211608887, "learning_rate": 2.328788721591252e-06, "loss": 1.0696, "step": 30316 }, { "epoch": 1.807009178686375, "grad_norm": 2.5651607513427734, "learning_rate": 2.325937055119859e-06, "loss": 1.0051, "step": 30318 }, { "epoch": 1.807128382405531, "grad_norm": 3.6517691612243652, "learning_rate": 2.3230870941128136e-06, "loss": 1.1054, "step": 30320 }, { "epoch": 1.807247586124687, "grad_norm": 3.5362281799316406, "learning_rate": 2.3202388386720676e-06, "loss": 1.2443, "step": 30322 }, { "epoch": 1.8073667898438432, "grad_norm": 3.5266292095184326, "learning_rate": 2.317392288899517e-06, "loss": 0.9774, "step": 30324 }, { "epoch": 1.8074859935629992, "grad_norm": 3.3076729774475098, "learning_rate": 2.3145474448969806e-06, "loss": 1.0437, "step": 30326 }, { "epoch": 1.8076051972821552, "grad_norm": 3.4484682083129883, "learning_rate": 2.3117043067662438e-06, "loss": 1.0078, "step": 30328 }, { "epoch": 1.8077244010013112, "grad_norm": 3.1592013835906982, "learning_rate": 2.308862874608997e-06, "loss": 1.0099, "step": 30330 }, { "epoch": 1.8078436047204671, "grad_norm": 3.6716389656066895, "learning_rate": 2.306023148526909e-06, "loss": 1.2272, "step": 30332 }, { "epoch": 1.8079628084396233, "grad_norm": 2.9564998149871826, "learning_rate": 2.3031851286215546e-06, "loss": 1.1855, "step": 30334 }, { "epoch": 1.8080820121587795, "grad_norm": 3.059377908706665, "learning_rate": 2.300348814994452e-06, "loss": 1.0846, "step": 30336 }, { "epoch": 1.8082012158779355, "grad_norm": 3.4448156356811523, "learning_rate": 2.2975142077470924e-06, "loss": 1.2541, "step": 30338 }, { "epoch": 1.8083204195970914, "grad_norm": 3.268049478530884, "learning_rate": 2.2946813069808446e-06, "loss": 0.9871, "step": 30340 }, { "epoch": 1.8084396233162474, "grad_norm": 3.0242319107055664, "learning_rate": 2.2918501127970727e-06, "loss": 0.9914, "step": 30342 }, { "epoch": 1.8085588270354034, "grad_norm": 3.2595181465148926, "learning_rate": 2.289020625297056e-06, "loss": 0.9913, "step": 30344 }, { "epoch": 1.8086780307545596, "grad_norm": 3.3137738704681396, "learning_rate": 2.286192844582019e-06, "loss": 1.0693, "step": 30346 }, { "epoch": 1.8087972344737155, "grad_norm": 3.2089972496032715, "learning_rate": 2.2833667707530982e-06, "loss": 1.0129, "step": 30348 }, { "epoch": 1.8089164381928717, "grad_norm": 3.4102494716644287, "learning_rate": 2.280542403911429e-06, "loss": 1.0635, "step": 30350 }, { "epoch": 1.8090356419120277, "grad_norm": 3.0492866039276123, "learning_rate": 2.277719744158019e-06, "loss": 1.1301, "step": 30352 }, { "epoch": 1.8091548456311837, "grad_norm": 3.641920328140259, "learning_rate": 2.2748987915938493e-06, "loss": 1.2006, "step": 30354 }, { "epoch": 1.8092740493503396, "grad_norm": 3.328389883041382, "learning_rate": 2.2720795463198497e-06, "loss": 1.3142, "step": 30356 }, { "epoch": 1.8093932530694956, "grad_norm": 2.888146162033081, "learning_rate": 2.269262008436851e-06, "loss": 1.1179, "step": 30358 }, { "epoch": 1.8095124567886518, "grad_norm": 3.1931917667388916, "learning_rate": 2.266446178045667e-06, "loss": 1.0724, "step": 30360 }, { "epoch": 1.809631660507808, "grad_norm": 2.846010446548462, "learning_rate": 2.2636320552470225e-06, "loss": 1.0958, "step": 30362 }, { "epoch": 1.809750864226964, "grad_norm": 3.3727338314056396, "learning_rate": 2.260819640141587e-06, "loss": 1.0866, "step": 30364 }, { "epoch": 1.80987006794612, "grad_norm": 3.533841848373413, "learning_rate": 2.2580089328299746e-06, "loss": 1.1527, "step": 30366 }, { "epoch": 1.809989271665276, "grad_norm": 3.261990547180176, "learning_rate": 2.255199933412727e-06, "loss": 1.1817, "step": 30368 }, { "epoch": 1.8101084753844319, "grad_norm": 3.145002603530884, "learning_rate": 2.2523926419903307e-06, "loss": 1.2298, "step": 30370 }, { "epoch": 1.810227679103588, "grad_norm": 3.2540228366851807, "learning_rate": 2.2495870586632216e-06, "loss": 1.0972, "step": 30372 }, { "epoch": 1.810346882822744, "grad_norm": 3.368366241455078, "learning_rate": 2.2467831835317587e-06, "loss": 1.2037, "step": 30374 }, { "epoch": 1.8104660865419002, "grad_norm": 3.4656074047088623, "learning_rate": 2.2439810166962393e-06, "loss": 1.0376, "step": 30376 }, { "epoch": 1.8105852902610562, "grad_norm": 3.389256238937378, "learning_rate": 2.2411805582569278e-06, "loss": 1.0061, "step": 30378 }, { "epoch": 1.8107044939802122, "grad_norm": 3.2252542972564697, "learning_rate": 2.2383818083139884e-06, "loss": 1.135, "step": 30380 }, { "epoch": 1.8108236976993681, "grad_norm": 3.7674455642700195, "learning_rate": 2.2355847669675355e-06, "loss": 1.1509, "step": 30382 }, { "epoch": 1.810942901418524, "grad_norm": 3.543225049972534, "learning_rate": 2.2327894343176505e-06, "loss": 1.0511, "step": 30384 }, { "epoch": 1.8110621051376803, "grad_norm": 3.627373218536377, "learning_rate": 2.2299958104643194e-06, "loss": 1.255, "step": 30386 }, { "epoch": 1.8111813088568365, "grad_norm": 3.3636362552642822, "learning_rate": 2.2272038955074793e-06, "loss": 1.1767, "step": 30388 }, { "epoch": 1.8113005125759924, "grad_norm": 3.3183422088623047, "learning_rate": 2.224413689547011e-06, "loss": 1.1974, "step": 30390 }, { "epoch": 1.8114197162951484, "grad_norm": 3.2670745849609375, "learning_rate": 2.22162519268273e-06, "loss": 0.9613, "step": 30392 }, { "epoch": 1.8115389200143044, "grad_norm": 2.7436435222625732, "learning_rate": 2.218838405014384e-06, "loss": 1.1145, "step": 30394 }, { "epoch": 1.8116581237334604, "grad_norm": 3.4369821548461914, "learning_rate": 2.2160533266416704e-06, "loss": 1.0782, "step": 30396 }, { "epoch": 1.8117773274526165, "grad_norm": 3.192328929901123, "learning_rate": 2.2132699576642214e-06, "loss": 1.1954, "step": 30398 }, { "epoch": 1.8118965311717725, "grad_norm": 3.1137678623199463, "learning_rate": 2.2104882981816013e-06, "loss": 1.0279, "step": 30400 }, { "epoch": 1.8120157348909287, "grad_norm": 3.0957536697387695, "learning_rate": 2.2077083482933303e-06, "loss": 0.986, "step": 30402 }, { "epoch": 1.8121349386100847, "grad_norm": 3.512547731399536, "learning_rate": 2.204930108098846e-06, "loss": 1.0941, "step": 30404 }, { "epoch": 1.8122541423292406, "grad_norm": 3.3994762897491455, "learning_rate": 2.2021535776975468e-06, "loss": 1.0482, "step": 30406 }, { "epoch": 1.8123733460483966, "grad_norm": 3.1700284481048584, "learning_rate": 2.1993787571887524e-06, "loss": 1.0485, "step": 30408 }, { "epoch": 1.8124925497675526, "grad_norm": 3.974829912185669, "learning_rate": 2.196605646671723e-06, "loss": 1.1359, "step": 30410 }, { "epoch": 1.8126117534867088, "grad_norm": 3.502025604248047, "learning_rate": 2.193834246245674e-06, "loss": 1.0731, "step": 30412 }, { "epoch": 1.812730957205865, "grad_norm": 3.2776620388031006, "learning_rate": 2.1910645560097365e-06, "loss": 1.1568, "step": 30414 }, { "epoch": 1.812850160925021, "grad_norm": 3.3686914443969727, "learning_rate": 2.1882965760629982e-06, "loss": 1.0339, "step": 30416 }, { "epoch": 1.812969364644177, "grad_norm": 3.052297353744507, "learning_rate": 2.1855303065044684e-06, "loss": 1.0731, "step": 30418 }, { "epoch": 1.8130885683633329, "grad_norm": 3.600067377090454, "learning_rate": 2.18276574743313e-06, "loss": 1.189, "step": 30420 }, { "epoch": 1.8132077720824888, "grad_norm": 3.400770425796509, "learning_rate": 2.1800028989478528e-06, "loss": 1.0778, "step": 30422 }, { "epoch": 1.813326975801645, "grad_norm": 3.182170867919922, "learning_rate": 2.1772417611474916e-06, "loss": 1.2172, "step": 30424 }, { "epoch": 1.813446179520801, "grad_norm": 3.344724655151367, "learning_rate": 2.1744823341308227e-06, "loss": 1.0801, "step": 30426 }, { "epoch": 1.8135653832399572, "grad_norm": 2.9738941192626953, "learning_rate": 2.1717246179965454e-06, "loss": 1.1507, "step": 30428 }, { "epoch": 1.8136845869591132, "grad_norm": 3.1832473278045654, "learning_rate": 2.168968612843325e-06, "loss": 1.0944, "step": 30430 }, { "epoch": 1.8138037906782691, "grad_norm": 2.8239681720733643, "learning_rate": 2.1662143187697494e-06, "loss": 1.1007, "step": 30432 }, { "epoch": 1.813922994397425, "grad_norm": 3.1560919284820557, "learning_rate": 2.1634617358743446e-06, "loss": 1.0722, "step": 30434 }, { "epoch": 1.814042198116581, "grad_norm": 3.0980608463287354, "learning_rate": 2.1607108642555883e-06, "loss": 1.0642, "step": 30436 }, { "epoch": 1.8141614018357373, "grad_norm": 3.1790988445281982, "learning_rate": 2.1579617040118906e-06, "loss": 1.1204, "step": 30438 }, { "epoch": 1.8142806055548935, "grad_norm": 2.8034510612487793, "learning_rate": 2.1552142552415834e-06, "loss": 1.0424, "step": 30440 }, { "epoch": 1.8143998092740494, "grad_norm": 3.436933994293213, "learning_rate": 2.152468518042966e-06, "loss": 1.1431, "step": 30442 }, { "epoch": 1.8145190129932054, "grad_norm": 3.550560235977173, "learning_rate": 2.1497244925142547e-06, "loss": 0.9743, "step": 30444 }, { "epoch": 1.8146382167123614, "grad_norm": 3.6323490142822266, "learning_rate": 2.146982178753615e-06, "loss": 1.0737, "step": 30446 }, { "epoch": 1.8147574204315173, "grad_norm": 3.309713840484619, "learning_rate": 2.1442415768591516e-06, "loss": 1.1641, "step": 30448 }, { "epoch": 1.8148766241506735, "grad_norm": 3.1224687099456787, "learning_rate": 2.141502686928909e-06, "loss": 0.9857, "step": 30450 }, { "epoch": 1.8149958278698295, "grad_norm": 3.3397128582000732, "learning_rate": 2.1387655090608584e-06, "loss": 1.1106, "step": 30452 }, { "epoch": 1.8151150315889857, "grad_norm": 3.5073578357696533, "learning_rate": 2.136030043352921e-06, "loss": 1.1015, "step": 30454 }, { "epoch": 1.8152342353081417, "grad_norm": 3.536623001098633, "learning_rate": 2.1332962899029527e-06, "loss": 0.9856, "step": 30456 }, { "epoch": 1.8153534390272976, "grad_norm": 3.413557767868042, "learning_rate": 2.130564248808753e-06, "loss": 1.1238, "step": 30458 }, { "epoch": 1.8154726427464536, "grad_norm": 3.1199209690093994, "learning_rate": 2.127833920168049e-06, "loss": 0.9656, "step": 30460 }, { "epoch": 1.8155918464656098, "grad_norm": 3.225198268890381, "learning_rate": 2.125105304078523e-06, "loss": 1.099, "step": 30462 }, { "epoch": 1.8157110501847658, "grad_norm": 3.356865167617798, "learning_rate": 2.1223784006377758e-06, "loss": 1.0757, "step": 30464 }, { "epoch": 1.815830253903922, "grad_norm": 2.9067277908325195, "learning_rate": 2.119653209943373e-06, "loss": 1.1451, "step": 30466 }, { "epoch": 1.815949457623078, "grad_norm": 2.797076463699341, "learning_rate": 2.1169297320927872e-06, "loss": 1.0055, "step": 30468 }, { "epoch": 1.8160686613422339, "grad_norm": 3.204118013381958, "learning_rate": 2.1142079671834625e-06, "loss": 1.047, "step": 30470 }, { "epoch": 1.8161878650613899, "grad_norm": 3.6556832790374756, "learning_rate": 2.1114879153127596e-06, "loss": 1.1978, "step": 30472 }, { "epoch": 1.8163070687805458, "grad_norm": 2.660024404525757, "learning_rate": 2.1087695765779736e-06, "loss": 1.0183, "step": 30474 }, { "epoch": 1.816426272499702, "grad_norm": 2.8855626583099365, "learning_rate": 2.106052951076365e-06, "loss": 1.0396, "step": 30476 }, { "epoch": 1.816545476218858, "grad_norm": 3.2286391258239746, "learning_rate": 2.103338038905106e-06, "loss": 1.0961, "step": 30478 }, { "epoch": 1.8166646799380142, "grad_norm": 3.3410072326660156, "learning_rate": 2.100624840161325e-06, "loss": 1.0883, "step": 30480 }, { "epoch": 1.8167838836571701, "grad_norm": 3.269394636154175, "learning_rate": 2.0979133549420715e-06, "loss": 0.9785, "step": 30482 }, { "epoch": 1.816903087376326, "grad_norm": 3.0230886936187744, "learning_rate": 2.0952035833443683e-06, "loss": 1.1483, "step": 30484 }, { "epoch": 1.817022291095482, "grad_norm": 3.279449224472046, "learning_rate": 2.092495525465127e-06, "loss": 1.1022, "step": 30486 }, { "epoch": 1.8171414948146383, "grad_norm": 3.2569730281829834, "learning_rate": 2.0897891814012305e-06, "loss": 1.0002, "step": 30488 }, { "epoch": 1.8172606985337942, "grad_norm": 3.369471549987793, "learning_rate": 2.087084551249513e-06, "loss": 1.0594, "step": 30490 }, { "epoch": 1.8173799022529504, "grad_norm": 3.699157238006592, "learning_rate": 2.084381635106697e-06, "loss": 1.3414, "step": 30492 }, { "epoch": 1.8174991059721064, "grad_norm": 3.036330223083496, "learning_rate": 2.0816804330694994e-06, "loss": 1.0621, "step": 30494 }, { "epoch": 1.8176183096912624, "grad_norm": 3.0046703815460205, "learning_rate": 2.078980945234543e-06, "loss": 1.1201, "step": 30496 }, { "epoch": 1.8177375134104183, "grad_norm": 3.238126277923584, "learning_rate": 2.0762831716983956e-06, "loss": 1.2136, "step": 30498 }, { "epoch": 1.8178567171295743, "grad_norm": 2.9718751907348633, "learning_rate": 2.0735871125575736e-06, "loss": 1.0149, "step": 30500 }, { "epoch": 1.8179759208487305, "grad_norm": 3.3916847705841064, "learning_rate": 2.0708927679085166e-06, "loss": 1.1568, "step": 30502 }, { "epoch": 1.8180951245678865, "grad_norm": 3.2703025341033936, "learning_rate": 2.0682001378476148e-06, "loss": 1.1281, "step": 30504 }, { "epoch": 1.8182143282870427, "grad_norm": 3.454312801361084, "learning_rate": 2.0655092224711847e-06, "loss": 1.1494, "step": 30506 }, { "epoch": 1.8183335320061986, "grad_norm": 2.7070322036743164, "learning_rate": 2.0628200218755e-06, "loss": 1.0526, "step": 30508 }, { "epoch": 1.8184527357253546, "grad_norm": 3.323805570602417, "learning_rate": 2.060132536156756e-06, "loss": 1.2107, "step": 30510 }, { "epoch": 1.8185719394445106, "grad_norm": 3.293752908706665, "learning_rate": 2.057446765411103e-06, "loss": 0.9665, "step": 30512 }, { "epoch": 1.8186911431636668, "grad_norm": 3.299159526824951, "learning_rate": 2.0547627097345977e-06, "loss": 1.0691, "step": 30514 }, { "epoch": 1.8188103468828227, "grad_norm": 3.099191665649414, "learning_rate": 2.052080369223286e-06, "loss": 1.0991, "step": 30516 }, { "epoch": 1.818929550601979, "grad_norm": 3.3359107971191406, "learning_rate": 2.0493997439731073e-06, "loss": 1.261, "step": 30518 }, { "epoch": 1.8190487543211349, "grad_norm": 3.2262380123138428, "learning_rate": 2.0467208340799627e-06, "loss": 1.1117, "step": 30520 }, { "epoch": 1.8191679580402909, "grad_norm": 2.927673578262329, "learning_rate": 2.0440436396396867e-06, "loss": 1.1254, "step": 30522 }, { "epoch": 1.8192871617594468, "grad_norm": 2.8402247428894043, "learning_rate": 2.041368160748047e-06, "loss": 1.0988, "step": 30524 }, { "epoch": 1.8194063654786028, "grad_norm": 3.2102463245391846, "learning_rate": 2.0386943975007565e-06, "loss": 1.1074, "step": 30526 }, { "epoch": 1.819525569197759, "grad_norm": 3.0594687461853027, "learning_rate": 2.036022349993455e-06, "loss": 1.215, "step": 30528 }, { "epoch": 1.819644772916915, "grad_norm": 3.3781278133392334, "learning_rate": 2.0333520183217604e-06, "loss": 1.1725, "step": 30530 }, { "epoch": 1.8197639766360711, "grad_norm": 3.5961201190948486, "learning_rate": 2.0306834025811684e-06, "loss": 1.0309, "step": 30532 }, { "epoch": 1.8198831803552271, "grad_norm": 3.167078971862793, "learning_rate": 2.0280165028671528e-06, "loss": 1.0726, "step": 30534 }, { "epoch": 1.820002384074383, "grad_norm": 3.604214668273926, "learning_rate": 2.0253513192751373e-06, "loss": 1.1196, "step": 30536 }, { "epoch": 1.820121587793539, "grad_norm": 3.272822380065918, "learning_rate": 2.022687851900429e-06, "loss": 1.0762, "step": 30538 }, { "epoch": 1.8202407915126952, "grad_norm": 3.1379077434539795, "learning_rate": 2.0200261008383402e-06, "loss": 1.0283, "step": 30540 }, { "epoch": 1.8203599952318512, "grad_norm": 3.0583250522613525, "learning_rate": 2.0173660661840788e-06, "loss": 1.2583, "step": 30542 }, { "epoch": 1.8204791989510074, "grad_norm": 3.338041305541992, "learning_rate": 2.0147077480328015e-06, "loss": 1.1464, "step": 30544 }, { "epoch": 1.8205984026701634, "grad_norm": 3.1801531314849854, "learning_rate": 2.0120511464796097e-06, "loss": 1.1282, "step": 30546 }, { "epoch": 1.8207176063893193, "grad_norm": 3.136509656906128, "learning_rate": 2.0093962616195395e-06, "loss": 1.203, "step": 30548 }, { "epoch": 1.8208368101084753, "grad_norm": 3.4464614391326904, "learning_rate": 2.0067430935475582e-06, "loss": 1.0081, "step": 30550 }, { "epoch": 1.8209560138276313, "grad_norm": 3.389859676361084, "learning_rate": 2.0040916423585854e-06, "loss": 0.9883, "step": 30552 }, { "epoch": 1.8210752175467875, "grad_norm": 3.15929913520813, "learning_rate": 2.0014419081474777e-06, "loss": 0.9968, "step": 30554 }, { "epoch": 1.8211944212659437, "grad_norm": 3.5463578701019287, "learning_rate": 1.998793891009004e-06, "loss": 0.9855, "step": 30556 }, { "epoch": 1.8213136249850996, "grad_norm": 3.2895548343658447, "learning_rate": 1.9961475910379168e-06, "loss": 1.1783, "step": 30558 }, { "epoch": 1.8214328287042556, "grad_norm": 3.330129861831665, "learning_rate": 1.9935030083288787e-06, "loss": 1.0937, "step": 30560 }, { "epoch": 1.8215520324234116, "grad_norm": 3.157926559448242, "learning_rate": 1.990860142976475e-06, "loss": 1.2042, "step": 30562 }, { "epoch": 1.8216712361425675, "grad_norm": 3.242760419845581, "learning_rate": 1.988218995075275e-06, "loss": 1.2164, "step": 30564 }, { "epoch": 1.8217904398617237, "grad_norm": 3.1946310997009277, "learning_rate": 1.9855795647197528e-06, "loss": 0.9736, "step": 30566 }, { "epoch": 1.8219096435808797, "grad_norm": 3.1861369609832764, "learning_rate": 1.9829418520043274e-06, "loss": 1.113, "step": 30568 }, { "epoch": 1.822028847300036, "grad_norm": 3.3818044662475586, "learning_rate": 1.9803058570233623e-06, "loss": 1.0702, "step": 30570 }, { "epoch": 1.8221480510191919, "grad_norm": 3.4180784225463867, "learning_rate": 1.9776715798711544e-06, "loss": 1.072, "step": 30572 }, { "epoch": 1.8222672547383478, "grad_norm": 3.0826966762542725, "learning_rate": 1.9750390206419335e-06, "loss": 1.0725, "step": 30574 }, { "epoch": 1.8223864584575038, "grad_norm": 3.384996175765991, "learning_rate": 1.972408179429891e-06, "loss": 1.2341, "step": 30576 }, { "epoch": 1.8225056621766598, "grad_norm": 3.2668216228485107, "learning_rate": 1.9697790563291295e-06, "loss": 1.2065, "step": 30578 }, { "epoch": 1.822624865895816, "grad_norm": 3.222163677215576, "learning_rate": 1.9671516514337017e-06, "loss": 1.1185, "step": 30580 }, { "epoch": 1.8227440696149722, "grad_norm": 2.9968202114105225, "learning_rate": 1.9645259648376048e-06, "loss": 1.0855, "step": 30582 }, { "epoch": 1.8228632733341281, "grad_norm": 3.398249626159668, "learning_rate": 1.9619019966347684e-06, "loss": 1.08, "step": 30584 }, { "epoch": 1.822982477053284, "grad_norm": 3.425715923309326, "learning_rate": 1.959279746919057e-06, "loss": 1.1317, "step": 30586 }, { "epoch": 1.82310168077244, "grad_norm": 3.2137534618377686, "learning_rate": 1.9566592157842735e-06, "loss": 1.1445, "step": 30588 }, { "epoch": 1.823220884491596, "grad_norm": 2.98447847366333, "learning_rate": 1.9540404033241756e-06, "loss": 1.0827, "step": 30590 }, { "epoch": 1.8233400882107522, "grad_norm": 2.7587060928344727, "learning_rate": 1.9514233096324387e-06, "loss": 1.0559, "step": 30592 }, { "epoch": 1.8234592919299082, "grad_norm": 2.8091623783111572, "learning_rate": 1.9488079348026824e-06, "loss": 0.9277, "step": 30594 }, { "epoch": 1.8235784956490644, "grad_norm": 3.069903612136841, "learning_rate": 1.946194278928476e-06, "loss": 1.111, "step": 30596 }, { "epoch": 1.8236976993682203, "grad_norm": 2.9586029052734375, "learning_rate": 1.9435823421033063e-06, "loss": 1.0889, "step": 30598 }, { "epoch": 1.8238169030873763, "grad_norm": 3.1937854290008545, "learning_rate": 1.9409721244206313e-06, "loss": 1.0638, "step": 30600 }, { "epoch": 1.8239361068065323, "grad_norm": 3.5418906211853027, "learning_rate": 1.9383636259738046e-06, "loss": 1.1149, "step": 30602 }, { "epoch": 1.8240553105256883, "grad_norm": 3.0500965118408203, "learning_rate": 1.9357568468561573e-06, "loss": 1.0542, "step": 30604 }, { "epoch": 1.8241745142448444, "grad_norm": 3.6358861923217773, "learning_rate": 1.933151787160942e-06, "loss": 1.1223, "step": 30606 }, { "epoch": 1.8242937179640006, "grad_norm": 3.2625792026519775, "learning_rate": 1.9305484469813395e-06, "loss": 1.1682, "step": 30608 }, { "epoch": 1.8244129216831566, "grad_norm": 3.3134477138519287, "learning_rate": 1.927946826410487e-06, "loss": 1.1631, "step": 30610 }, { "epoch": 1.8245321254023126, "grad_norm": 3.2037353515625, "learning_rate": 1.9253469255414547e-06, "loss": 1.123, "step": 30612 }, { "epoch": 1.8246513291214685, "grad_norm": 3.030825138092041, "learning_rate": 1.922748744467251e-06, "loss": 1.0159, "step": 30614 }, { "epoch": 1.8247705328406245, "grad_norm": 3.6052005290985107, "learning_rate": 1.9201522832808127e-06, "loss": 1.315, "step": 30616 }, { "epoch": 1.8248897365597807, "grad_norm": 2.8346316814422607, "learning_rate": 1.9175575420750437e-06, "loss": 0.8954, "step": 30618 }, { "epoch": 1.8250089402789367, "grad_norm": 3.388902187347412, "learning_rate": 1.9149645209427467e-06, "loss": 1.2118, "step": 30620 }, { "epoch": 1.8251281439980929, "grad_norm": 3.52970552444458, "learning_rate": 1.912373219976682e-06, "loss": 1.1334, "step": 30622 }, { "epoch": 1.8252473477172488, "grad_norm": 3.2243902683258057, "learning_rate": 1.909783639269569e-06, "loss": 1.0301, "step": 30624 }, { "epoch": 1.8253665514364048, "grad_norm": 2.830822467803955, "learning_rate": 1.9071957789140227e-06, "loss": 0.9581, "step": 30626 }, { "epoch": 1.8254857551555608, "grad_norm": 3.560945987701416, "learning_rate": 1.9046096390026413e-06, "loss": 1.1576, "step": 30628 }, { "epoch": 1.8256049588747167, "grad_norm": 3.458634376525879, "learning_rate": 1.9020252196279232e-06, "loss": 1.1421, "step": 30630 }, { "epoch": 1.825724162593873, "grad_norm": 3.0280590057373047, "learning_rate": 1.8994425208823329e-06, "loss": 1.0351, "step": 30632 }, { "epoch": 1.8258433663130291, "grad_norm": 2.8042895793914795, "learning_rate": 1.896861542858258e-06, "loss": 1.0145, "step": 30634 }, { "epoch": 1.825962570032185, "grad_norm": 3.384761333465576, "learning_rate": 1.8942822856480247e-06, "loss": 1.1745, "step": 30636 }, { "epoch": 1.826081773751341, "grad_norm": 3.006490707397461, "learning_rate": 1.891704749343909e-06, "loss": 1.1659, "step": 30638 }, { "epoch": 1.826200977470497, "grad_norm": 2.967219829559326, "learning_rate": 1.8891289340381203e-06, "loss": 1.0454, "step": 30640 }, { "epoch": 1.826320181189653, "grad_norm": 3.0979695320129395, "learning_rate": 1.8865548398227905e-06, "loss": 1.0575, "step": 30642 }, { "epoch": 1.8264393849088092, "grad_norm": 3.083127975463867, "learning_rate": 1.883982466790013e-06, "loss": 1.1986, "step": 30644 }, { "epoch": 1.8265585886279652, "grad_norm": 3.4104843139648438, "learning_rate": 1.8814118150318083e-06, "loss": 1.0838, "step": 30646 }, { "epoch": 1.8266777923471214, "grad_norm": 3.428171157836914, "learning_rate": 1.8788428846401473e-06, "loss": 1.217, "step": 30648 }, { "epoch": 1.8267969960662773, "grad_norm": 3.5251004695892334, "learning_rate": 1.8762756757069177e-06, "loss": 1.2077, "step": 30650 }, { "epoch": 1.8269161997854333, "grad_norm": 4.207733631134033, "learning_rate": 1.8737101883239571e-06, "loss": 1.1309, "step": 30652 }, { "epoch": 1.8270354035045893, "grad_norm": 3.05718994140625, "learning_rate": 1.8711464225830477e-06, "loss": 0.9798, "step": 30654 }, { "epoch": 1.8271546072237452, "grad_norm": 3.841843843460083, "learning_rate": 1.868584378575905e-06, "loss": 1.1208, "step": 30656 }, { "epoch": 1.8272738109429014, "grad_norm": 3.152480363845825, "learning_rate": 1.8660240563941778e-06, "loss": 1.1458, "step": 30658 }, { "epoch": 1.8273930146620576, "grad_norm": 3.3605241775512695, "learning_rate": 1.863465456129454e-06, "loss": 1.2205, "step": 30660 }, { "epoch": 1.8275122183812136, "grad_norm": 3.041682004928589, "learning_rate": 1.860908577873266e-06, "loss": 1.1573, "step": 30662 }, { "epoch": 1.8276314221003696, "grad_norm": 3.3090901374816895, "learning_rate": 1.8583534217170962e-06, "loss": 1.2627, "step": 30664 }, { "epoch": 1.8277506258195255, "grad_norm": 3.190742015838623, "learning_rate": 1.855799987752338e-06, "loss": 1.0572, "step": 30666 }, { "epoch": 1.8278698295386815, "grad_norm": 3.0516912937164307, "learning_rate": 1.853248276070324e-06, "loss": 1.0298, "step": 30668 }, { "epoch": 1.8279890332578377, "grad_norm": 3.313023090362549, "learning_rate": 1.8506982867623646e-06, "loss": 0.988, "step": 30670 }, { "epoch": 1.8281082369769937, "grad_norm": 3.3244221210479736, "learning_rate": 1.8481500199196588e-06, "loss": 1.1497, "step": 30672 }, { "epoch": 1.8282274406961498, "grad_norm": 3.410146474838257, "learning_rate": 1.8456034756333839e-06, "loss": 1.0855, "step": 30674 }, { "epoch": 1.8283466444153058, "grad_norm": 3.5128681659698486, "learning_rate": 1.8430586539946283e-06, "loss": 1.1185, "step": 30676 }, { "epoch": 1.8284658481344618, "grad_norm": 3.193692922592163, "learning_rate": 1.8405155550944353e-06, "loss": 1.1292, "step": 30678 }, { "epoch": 1.8285850518536177, "grad_norm": 3.2822444438934326, "learning_rate": 1.837974179023777e-06, "loss": 1.0624, "step": 30680 }, { "epoch": 1.8287042555727737, "grad_norm": 3.139228105545044, "learning_rate": 1.835434525873564e-06, "loss": 1.0825, "step": 30682 }, { "epoch": 1.82882345929193, "grad_norm": 3.3556289672851562, "learning_rate": 1.832896595734651e-06, "loss": 1.0982, "step": 30684 }, { "epoch": 1.828942663011086, "grad_norm": 3.1402294635772705, "learning_rate": 1.8303603886978215e-06, "loss": 1.1605, "step": 30686 }, { "epoch": 1.829061866730242, "grad_norm": 3.126262903213501, "learning_rate": 1.827825904853825e-06, "loss": 1.066, "step": 30688 }, { "epoch": 1.829181070449398, "grad_norm": 3.153657913208008, "learning_rate": 1.8252931442933053e-06, "loss": 1.0375, "step": 30690 }, { "epoch": 1.829300274168554, "grad_norm": 3.4814956188201904, "learning_rate": 1.8227621071068846e-06, "loss": 1.1846, "step": 30692 }, { "epoch": 1.82941947788771, "grad_norm": 3.4921727180480957, "learning_rate": 1.820232793385096e-06, "loss": 1.206, "step": 30694 }, { "epoch": 1.8295386816068662, "grad_norm": 3.4094502925872803, "learning_rate": 1.8177052032184283e-06, "loss": 1.0718, "step": 30696 }, { "epoch": 1.8296578853260221, "grad_norm": 3.2997961044311523, "learning_rate": 1.815179336697298e-06, "loss": 1.1923, "step": 30698 }, { "epoch": 1.8297770890451783, "grad_norm": 3.1105618476867676, "learning_rate": 1.8126551939120662e-06, "loss": 0.9539, "step": 30700 }, { "epoch": 1.8298962927643343, "grad_norm": 3.1976125240325928, "learning_rate": 1.8101327749530272e-06, "loss": 1.1218, "step": 30702 }, { "epoch": 1.8300154964834903, "grad_norm": 3.1831588745117188, "learning_rate": 1.8076120799104202e-06, "loss": 0.9583, "step": 30704 }, { "epoch": 1.8301347002026462, "grad_norm": 3.1413023471832275, "learning_rate": 1.8050931088744227e-06, "loss": 1.145, "step": 30706 }, { "epoch": 1.8302539039218022, "grad_norm": 3.04156494140625, "learning_rate": 1.8025758619351296e-06, "loss": 1.1715, "step": 30708 }, { "epoch": 1.8303731076409584, "grad_norm": 3.190486192703247, "learning_rate": 1.8000603391826188e-06, "loss": 1.0488, "step": 30710 }, { "epoch": 1.8304923113601146, "grad_norm": 3.1525189876556396, "learning_rate": 1.7975465407068515e-06, "loss": 1.0658, "step": 30712 }, { "epoch": 1.8306115150792706, "grad_norm": 3.315286159515381, "learning_rate": 1.7950344665977669e-06, "loss": 1.1993, "step": 30714 }, { "epoch": 1.8307307187984265, "grad_norm": 3.437535285949707, "learning_rate": 1.7925241169452378e-06, "loss": 1.1459, "step": 30716 }, { "epoch": 1.8308499225175825, "grad_norm": 2.905043601989746, "learning_rate": 1.7900154918390588e-06, "loss": 1.0139, "step": 30718 }, { "epoch": 1.8309691262367385, "grad_norm": 3.1224634647369385, "learning_rate": 1.787508591368975e-06, "loss": 1.1386, "step": 30720 }, { "epoch": 1.8310883299558947, "grad_norm": 2.9690940380096436, "learning_rate": 1.7850034156246697e-06, "loss": 0.9239, "step": 30722 }, { "epoch": 1.8312075336750506, "grad_norm": 3.661078929901123, "learning_rate": 1.7824999646957551e-06, "loss": 1.054, "step": 30724 }, { "epoch": 1.8313267373942068, "grad_norm": 3.27929425239563, "learning_rate": 1.7799982386717872e-06, "loss": 1.1959, "step": 30726 }, { "epoch": 1.8314459411133628, "grad_norm": 3.3135986328125, "learning_rate": 1.7774982376422722e-06, "loss": 1.2525, "step": 30728 }, { "epoch": 1.8315651448325188, "grad_norm": 3.515341281890869, "learning_rate": 1.7749999616966328e-06, "loss": 1.0431, "step": 30730 }, { "epoch": 1.8316843485516747, "grad_norm": 2.8912413120269775, "learning_rate": 1.7725034109242367e-06, "loss": 1.0303, "step": 30732 }, { "epoch": 1.8318035522708307, "grad_norm": 3.2888474464416504, "learning_rate": 1.7700085854144178e-06, "loss": 1.1331, "step": 30734 }, { "epoch": 1.8319227559899869, "grad_norm": 3.2692155838012695, "learning_rate": 1.7675154852563935e-06, "loss": 1.1373, "step": 30736 }, { "epoch": 1.832041959709143, "grad_norm": 3.039330244064331, "learning_rate": 1.765024110539376e-06, "loss": 1.1263, "step": 30738 }, { "epoch": 1.832161163428299, "grad_norm": 3.0237348079681396, "learning_rate": 1.762534461352483e-06, "loss": 1.059, "step": 30740 }, { "epoch": 1.832280367147455, "grad_norm": 2.906575918197632, "learning_rate": 1.7600465377847653e-06, "loss": 1.1337, "step": 30742 }, { "epoch": 1.832399570866611, "grad_norm": 2.851196050643921, "learning_rate": 1.7575603399252405e-06, "loss": 1.0366, "step": 30744 }, { "epoch": 1.832518774585767, "grad_norm": 3.810161828994751, "learning_rate": 1.7550758678628432e-06, "loss": 1.1485, "step": 30746 }, { "epoch": 1.8326379783049231, "grad_norm": 3.131582736968994, "learning_rate": 1.7525931216864466e-06, "loss": 1.0095, "step": 30748 }, { "epoch": 1.8327571820240791, "grad_norm": 3.1698622703552246, "learning_rate": 1.7501121014848687e-06, "loss": 1.1994, "step": 30750 }, { "epoch": 1.8328763857432353, "grad_norm": 3.1764302253723145, "learning_rate": 1.7476328073468773e-06, "loss": 1.123, "step": 30752 }, { "epoch": 1.8329955894623913, "grad_norm": 3.382209062576294, "learning_rate": 1.7451552393611403e-06, "loss": 1.1624, "step": 30754 }, { "epoch": 1.8331147931815472, "grad_norm": 3.485867738723755, "learning_rate": 1.7426793976163092e-06, "loss": 1.1129, "step": 30756 }, { "epoch": 1.8332339969007032, "grad_norm": 3.177948474884033, "learning_rate": 1.7402052822009518e-06, "loss": 0.9692, "step": 30758 }, { "epoch": 1.8333532006198592, "grad_norm": 3.036342144012451, "learning_rate": 1.7377328932035586e-06, "loss": 1.1757, "step": 30760 }, { "epoch": 1.8334724043390154, "grad_norm": 3.357985496520996, "learning_rate": 1.735262230712592e-06, "loss": 1.0833, "step": 30762 }, { "epoch": 1.8335916080581716, "grad_norm": 3.1517152786254883, "learning_rate": 1.7327932948164372e-06, "loss": 1.1815, "step": 30764 }, { "epoch": 1.8337108117773275, "grad_norm": 2.7763121128082275, "learning_rate": 1.7303260856034064e-06, "loss": 1.0596, "step": 30766 }, { "epoch": 1.8338300154964835, "grad_norm": 2.994889974594116, "learning_rate": 1.7278606031617629e-06, "loss": 1.0255, "step": 30768 }, { "epoch": 1.8339492192156395, "grad_norm": 3.1701548099517822, "learning_rate": 1.7253968475797078e-06, "loss": 0.9723, "step": 30770 }, { "epoch": 1.8340684229347954, "grad_norm": 3.0175676345825195, "learning_rate": 1.722934818945382e-06, "loss": 1.2603, "step": 30772 }, { "epoch": 1.8341876266539516, "grad_norm": 3.612595796585083, "learning_rate": 1.7204745173468484e-06, "loss": 1.1701, "step": 30774 }, { "epoch": 1.8343068303731076, "grad_norm": 3.358936309814453, "learning_rate": 1.7180159428721366e-06, "loss": 1.0968, "step": 30776 }, { "epoch": 1.8344260340922638, "grad_norm": 3.5082104206085205, "learning_rate": 1.715559095609176e-06, "loss": 1.0211, "step": 30778 }, { "epoch": 1.8345452378114198, "grad_norm": 3.4868452548980713, "learning_rate": 1.71310397564588e-06, "loss": 1.1321, "step": 30780 }, { "epoch": 1.8346644415305757, "grad_norm": 3.464876174926758, "learning_rate": 1.710650583070067e-06, "loss": 1.1363, "step": 30782 }, { "epoch": 1.8347836452497317, "grad_norm": 2.9106218814849854, "learning_rate": 1.7081989179695001e-06, "loss": 0.9755, "step": 30784 }, { "epoch": 1.8349028489688877, "grad_norm": 3.413092851638794, "learning_rate": 1.7057489804318871e-06, "loss": 1.2597, "step": 30786 }, { "epoch": 1.8350220526880439, "grad_norm": 3.148622512817383, "learning_rate": 1.7033007705448689e-06, "loss": 1.0934, "step": 30788 }, { "epoch": 1.8351412564072, "grad_norm": 3.214289665222168, "learning_rate": 1.700854288396031e-06, "loss": 1.1496, "step": 30790 }, { "epoch": 1.835260460126356, "grad_norm": 3.5262255668640137, "learning_rate": 1.6984095340728867e-06, "loss": 1.1676, "step": 30792 }, { "epoch": 1.835379663845512, "grad_norm": 2.9656896591186523, "learning_rate": 1.6959665076628995e-06, "loss": 1.0912, "step": 30794 }, { "epoch": 1.835498867564668, "grad_norm": 3.0819480419158936, "learning_rate": 1.6935252092534492e-06, "loss": 1.0237, "step": 30796 }, { "epoch": 1.835618071283824, "grad_norm": 3.2839207649230957, "learning_rate": 1.691085638931894e-06, "loss": 1.121, "step": 30798 }, { "epoch": 1.8357372750029801, "grad_norm": 3.417145252227783, "learning_rate": 1.6886477967854864e-06, "loss": 1.1412, "step": 30800 }, { "epoch": 1.835856478722136, "grad_norm": 3.312962055206299, "learning_rate": 1.6862116829014396e-06, "loss": 1.0726, "step": 30802 }, { "epoch": 1.8359756824412923, "grad_norm": 3.396754264831543, "learning_rate": 1.6837772973669175e-06, "loss": 1.1642, "step": 30804 }, { "epoch": 1.8360948861604482, "grad_norm": 2.9514269828796387, "learning_rate": 1.6813446402689781e-06, "loss": 0.9744, "step": 30806 }, { "epoch": 1.8362140898796042, "grad_norm": 3.5969574451446533, "learning_rate": 1.6789137116946685e-06, "loss": 1.1934, "step": 30808 }, { "epoch": 1.8363332935987602, "grad_norm": 3.2162156105041504, "learning_rate": 1.6764845117309469e-06, "loss": 1.0955, "step": 30810 }, { "epoch": 1.8364524973179162, "grad_norm": 3.3557162284851074, "learning_rate": 1.6740570404647104e-06, "loss": 1.1892, "step": 30812 }, { "epoch": 1.8365717010370723, "grad_norm": 3.2712104320526123, "learning_rate": 1.6716312979827898e-06, "loss": 1.1358, "step": 30814 }, { "epoch": 1.8366909047562285, "grad_norm": 3.3066160678863525, "learning_rate": 1.6692072843719874e-06, "loss": 1.0779, "step": 30816 }, { "epoch": 1.8368101084753845, "grad_norm": 2.987438201904297, "learning_rate": 1.66678499971899e-06, "loss": 1.0929, "step": 30818 }, { "epoch": 1.8369293121945405, "grad_norm": 3.317312717437744, "learning_rate": 1.664364444110461e-06, "loss": 1.2156, "step": 30820 }, { "epoch": 1.8370485159136964, "grad_norm": 3.5530619621276855, "learning_rate": 1.6619456176330094e-06, "loss": 1.0897, "step": 30822 }, { "epoch": 1.8371677196328524, "grad_norm": 3.559861183166504, "learning_rate": 1.6595285203731382e-06, "loss": 1.232, "step": 30824 }, { "epoch": 1.8372869233520086, "grad_norm": 3.1550326347351074, "learning_rate": 1.6571131524173279e-06, "loss": 1.047, "step": 30826 }, { "epoch": 1.8374061270711646, "grad_norm": 2.9908335208892822, "learning_rate": 1.654699513851987e-06, "loss": 1.0169, "step": 30828 }, { "epoch": 1.8375253307903208, "grad_norm": 3.5726208686828613, "learning_rate": 1.6522876047634528e-06, "loss": 1.0992, "step": 30830 }, { "epoch": 1.8376445345094767, "grad_norm": 3.2803919315338135, "learning_rate": 1.6498774252380167e-06, "loss": 1.1008, "step": 30832 }, { "epoch": 1.8377637382286327, "grad_norm": 3.028167963027954, "learning_rate": 1.6474689753618877e-06, "loss": 0.9627, "step": 30834 }, { "epoch": 1.8378829419477887, "grad_norm": 3.6559362411499023, "learning_rate": 1.6450622552212358e-06, "loss": 1.2105, "step": 30836 }, { "epoch": 1.8380021456669446, "grad_norm": 4.09576940536499, "learning_rate": 1.6426572649021476e-06, "loss": 1.1033, "step": 30838 }, { "epoch": 1.8381213493861008, "grad_norm": 3.152233600616455, "learning_rate": 1.64025400449066e-06, "loss": 1.1352, "step": 30840 }, { "epoch": 1.838240553105257, "grad_norm": 3.1391258239746094, "learning_rate": 1.637852474072743e-06, "loss": 1.0073, "step": 30842 }, { "epoch": 1.838359756824413, "grad_norm": 3.5315873622894287, "learning_rate": 1.6354526737343223e-06, "loss": 1.043, "step": 30844 }, { "epoch": 1.838478960543569, "grad_norm": 3.50374174118042, "learning_rate": 1.6330546035612405e-06, "loss": 0.9808, "step": 30846 }, { "epoch": 1.838598164262725, "grad_norm": 3.1520097255706787, "learning_rate": 1.6306582636392676e-06, "loss": 1.022, "step": 30848 }, { "epoch": 1.838717367981881, "grad_norm": 3.415382146835327, "learning_rate": 1.6282636540541519e-06, "loss": 1.0285, "step": 30850 }, { "epoch": 1.838836571701037, "grad_norm": 2.9840011596679688, "learning_rate": 1.6258707748915414e-06, "loss": 0.9601, "step": 30852 }, { "epoch": 1.838955775420193, "grad_norm": 2.987515449523926, "learning_rate": 1.623479626237051e-06, "loss": 1.0641, "step": 30854 }, { "epoch": 1.8390749791393493, "grad_norm": 3.237790107727051, "learning_rate": 1.6210902081762069e-06, "loss": 0.9976, "step": 30856 }, { "epoch": 1.8391941828585052, "grad_norm": 3.050715684890747, "learning_rate": 1.618702520794496e-06, "loss": 1.1226, "step": 30858 }, { "epoch": 1.8393133865776612, "grad_norm": 2.782038450241089, "learning_rate": 1.6163165641773226e-06, "loss": 1.1197, "step": 30860 }, { "epoch": 1.8394325902968172, "grad_norm": 3.541588068008423, "learning_rate": 1.6139323384100623e-06, "loss": 1.1317, "step": 30862 }, { "epoch": 1.8395517940159734, "grad_norm": 3.67915678024292, "learning_rate": 1.6115498435779863e-06, "loss": 0.9868, "step": 30864 }, { "epoch": 1.8396709977351293, "grad_norm": 3.0251946449279785, "learning_rate": 1.6091690797663261e-06, "loss": 1.2483, "step": 30866 }, { "epoch": 1.8397902014542855, "grad_norm": 2.94297194480896, "learning_rate": 1.6067900470602692e-06, "loss": 1.0445, "step": 30868 }, { "epoch": 1.8399094051734415, "grad_norm": 2.983419895172119, "learning_rate": 1.6044127455448976e-06, "loss": 1.2141, "step": 30870 }, { "epoch": 1.8400286088925975, "grad_norm": 3.2830023765563965, "learning_rate": 1.6020371753052655e-06, "loss": 1.2368, "step": 30872 }, { "epoch": 1.8401478126117534, "grad_norm": 3.2900888919830322, "learning_rate": 1.5996633364263603e-06, "loss": 1.1554, "step": 30874 }, { "epoch": 1.8402670163309094, "grad_norm": 3.1655004024505615, "learning_rate": 1.5972912289930975e-06, "loss": 1.0284, "step": 30876 }, { "epoch": 1.8403862200500656, "grad_norm": 3.021272897720337, "learning_rate": 1.5949208530903315e-06, "loss": 1.0811, "step": 30878 }, { "epoch": 1.8405054237692215, "grad_norm": 2.9278488159179688, "learning_rate": 1.5925522088028666e-06, "loss": 0.9262, "step": 30880 }, { "epoch": 1.8406246274883777, "grad_norm": 3.3966026306152344, "learning_rate": 1.5901852962154296e-06, "loss": 1.2264, "step": 30882 }, { "epoch": 1.8407438312075337, "grad_norm": 3.2456233501434326, "learning_rate": 1.587820115412697e-06, "loss": 0.9768, "step": 30884 }, { "epoch": 1.8408630349266897, "grad_norm": 3.2994821071624756, "learning_rate": 1.5854566664792847e-06, "loss": 1.1364, "step": 30886 }, { "epoch": 1.8409822386458456, "grad_norm": 3.2414917945861816, "learning_rate": 1.5830949494997305e-06, "loss": 1.1226, "step": 30888 }, { "epoch": 1.8411014423650018, "grad_norm": 2.6174063682556152, "learning_rate": 1.580734964558528e-06, "loss": 1.1824, "step": 30890 }, { "epoch": 1.8412206460841578, "grad_norm": 3.567875385284424, "learning_rate": 1.5783767117401038e-06, "loss": 1.1257, "step": 30892 }, { "epoch": 1.841339849803314, "grad_norm": 3.188408136367798, "learning_rate": 1.576020191128813e-06, "loss": 1.0245, "step": 30894 }, { "epoch": 1.84145905352247, "grad_norm": 3.660691976547241, "learning_rate": 1.5736654028089604e-06, "loss": 1.2054, "step": 30896 }, { "epoch": 1.841578257241626, "grad_norm": 3.1646478176116943, "learning_rate": 1.5713123468647895e-06, "loss": 0.9338, "step": 30898 }, { "epoch": 1.841697460960782, "grad_norm": 3.645772933959961, "learning_rate": 1.5689610233804718e-06, "loss": 1.1977, "step": 30900 }, { "epoch": 1.8418166646799379, "grad_norm": 3.3664767742156982, "learning_rate": 1.5666114324401237e-06, "loss": 1.1329, "step": 30902 }, { "epoch": 1.841935868399094, "grad_norm": 3.1595747470855713, "learning_rate": 1.5642635741277944e-06, "loss": 1.0701, "step": 30904 }, { "epoch": 1.84205507211825, "grad_norm": 3.118234157562256, "learning_rate": 1.5619174485274834e-06, "loss": 1.1348, "step": 30906 }, { "epoch": 1.8421742758374062, "grad_norm": 3.3430721759796143, "learning_rate": 1.5595730557231125e-06, "loss": 1.093, "step": 30908 }, { "epoch": 1.8422934795565622, "grad_norm": 3.2271628379821777, "learning_rate": 1.5572303957985479e-06, "loss": 1.1801, "step": 30910 }, { "epoch": 1.8424126832757182, "grad_norm": 2.7933406829833984, "learning_rate": 1.5548894688375947e-06, "loss": 1.1078, "step": 30912 }, { "epoch": 1.8425318869948741, "grad_norm": 3.0413818359375, "learning_rate": 1.5525502749240028e-06, "loss": 1.0682, "step": 30914 }, { "epoch": 1.8426510907140303, "grad_norm": 3.423224925994873, "learning_rate": 1.5502128141414495e-06, "loss": 1.0362, "step": 30916 }, { "epoch": 1.8427702944331863, "grad_norm": 3.1646971702575684, "learning_rate": 1.5478770865735516e-06, "loss": 1.1248, "step": 30918 }, { "epoch": 1.8428894981523425, "grad_norm": 3.419290542602539, "learning_rate": 1.5455430923038694e-06, "loss": 1.123, "step": 30920 }, { "epoch": 1.8430087018714985, "grad_norm": 3.099538803100586, "learning_rate": 1.5432108314158922e-06, "loss": 1.0281, "step": 30922 }, { "epoch": 1.8431279055906544, "grad_norm": 3.506497383117676, "learning_rate": 1.5408803039930586e-06, "loss": 1.1439, "step": 30924 }, { "epoch": 1.8432471093098104, "grad_norm": 3.2706069946289062, "learning_rate": 1.538551510118741e-06, "loss": 1.0213, "step": 30926 }, { "epoch": 1.8433663130289664, "grad_norm": 3.5797619819641113, "learning_rate": 1.5362244498762391e-06, "loss": 1.2059, "step": 30928 }, { "epoch": 1.8434855167481226, "grad_norm": 3.218050718307495, "learning_rate": 1.5338991233488032e-06, "loss": 1.0704, "step": 30930 }, { "epoch": 1.8436047204672785, "grad_norm": 3.3667187690734863, "learning_rate": 1.5315755306196332e-06, "loss": 1.0997, "step": 30932 }, { "epoch": 1.8437239241864347, "grad_norm": 3.245468854904175, "learning_rate": 1.5292536717718241e-06, "loss": 1.0661, "step": 30934 }, { "epoch": 1.8438431279055907, "grad_norm": 2.997710943222046, "learning_rate": 1.5269335468884593e-06, "loss": 1.1143, "step": 30936 }, { "epoch": 1.8439623316247467, "grad_norm": 3.2999441623687744, "learning_rate": 1.524615156052539e-06, "loss": 1.0589, "step": 30938 }, { "epoch": 1.8440815353439026, "grad_norm": 3.1581814289093018, "learning_rate": 1.5222984993469802e-06, "loss": 1.1401, "step": 30940 }, { "epoch": 1.8442007390630588, "grad_norm": 2.9799296855926514, "learning_rate": 1.5199835768546721e-06, "loss": 0.9586, "step": 30942 }, { "epoch": 1.8443199427822148, "grad_norm": 3.3972885608673096, "learning_rate": 1.517670388658421e-06, "loss": 1.1131, "step": 30944 }, { "epoch": 1.844439146501371, "grad_norm": 3.2584331035614014, "learning_rate": 1.5153589348409824e-06, "loss": 1.1436, "step": 30946 }, { "epoch": 1.844558350220527, "grad_norm": 3.2448782920837402, "learning_rate": 1.5130492154850406e-06, "loss": 1.1912, "step": 30948 }, { "epoch": 1.844677553939683, "grad_norm": 3.419316530227661, "learning_rate": 1.510741230673235e-06, "loss": 1.0531, "step": 30950 }, { "epoch": 1.8447967576588389, "grad_norm": 2.72820782661438, "learning_rate": 1.5084349804881104e-06, "loss": 0.9972, "step": 30952 }, { "epoch": 1.8449159613779949, "grad_norm": 3.37880539894104, "learning_rate": 1.5061304650121788e-06, "loss": 1.0778, "step": 30954 }, { "epoch": 1.845035165097151, "grad_norm": 3.7556843757629395, "learning_rate": 1.5038276843278853e-06, "loss": 1.1407, "step": 30956 }, { "epoch": 1.8451543688163072, "grad_norm": 3.315457582473755, "learning_rate": 1.5015266385175975e-06, "loss": 1.0886, "step": 30958 }, { "epoch": 1.8452735725354632, "grad_norm": 3.2655136585235596, "learning_rate": 1.4992273276636436e-06, "loss": 1.128, "step": 30960 }, { "epoch": 1.8453927762546192, "grad_norm": 3.5083060264587402, "learning_rate": 1.4969297518482695e-06, "loss": 1.197, "step": 30962 }, { "epoch": 1.8455119799737751, "grad_norm": 2.9913272857666016, "learning_rate": 1.49463391115367e-06, "loss": 1.0291, "step": 30964 }, { "epoch": 1.845631183692931, "grad_norm": 3.0024354457855225, "learning_rate": 1.4923398056619742e-06, "loss": 1.1251, "step": 30966 }, { "epoch": 1.8457503874120873, "grad_norm": 2.9672040939331055, "learning_rate": 1.4900474354552497e-06, "loss": 1.2014, "step": 30968 }, { "epoch": 1.8458695911312433, "grad_norm": 2.8230090141296387, "learning_rate": 1.487756800615503e-06, "loss": 1.0698, "step": 30970 }, { "epoch": 1.8459887948503995, "grad_norm": 3.3409059047698975, "learning_rate": 1.4854679012246797e-06, "loss": 1.0863, "step": 30972 }, { "epoch": 1.8461079985695554, "grad_norm": 3.090435266494751, "learning_rate": 1.4831807373646644e-06, "loss": 1.0373, "step": 30974 }, { "epoch": 1.8462272022887114, "grad_norm": 3.103353977203369, "learning_rate": 1.480895309117264e-06, "loss": 1.0711, "step": 30976 }, { "epoch": 1.8463464060078674, "grad_norm": 3.14726185798645, "learning_rate": 1.478611616564246e-06, "loss": 1.1642, "step": 30978 }, { "epoch": 1.8464656097270233, "grad_norm": 3.2478621006011963, "learning_rate": 1.4763296597873121e-06, "loss": 0.9753, "step": 30980 }, { "epoch": 1.8465848134461795, "grad_norm": 2.7765560150146484, "learning_rate": 1.4740494388680804e-06, "loss": 1.0169, "step": 30982 }, { "epoch": 1.8467040171653357, "grad_norm": 3.4534780979156494, "learning_rate": 1.4717709538881352e-06, "loss": 1.298, "step": 30984 }, { "epoch": 1.8468232208844917, "grad_norm": 3.362494707107544, "learning_rate": 1.4694942049289783e-06, "loss": 1.1224, "step": 30986 }, { "epoch": 1.8469424246036477, "grad_norm": 3.324506998062134, "learning_rate": 1.4672191920720612e-06, "loss": 1.0972, "step": 30988 }, { "epoch": 1.8470616283228036, "grad_norm": 3.3011300563812256, "learning_rate": 1.4649459153987632e-06, "loss": 1.2635, "step": 30990 }, { "epoch": 1.8471808320419596, "grad_norm": 2.7602481842041016, "learning_rate": 1.4626743749904138e-06, "loss": 1.0847, "step": 30992 }, { "epoch": 1.8473000357611158, "grad_norm": 3.060096025466919, "learning_rate": 1.4604045709282643e-06, "loss": 1.1605, "step": 30994 }, { "epoch": 1.8474192394802718, "grad_norm": 3.6426315307617188, "learning_rate": 1.4581365032935334e-06, "loss": 1.1207, "step": 30996 }, { "epoch": 1.847538443199428, "grad_norm": 3.0713303089141846, "learning_rate": 1.455870172167334e-06, "loss": 1.082, "step": 30998 }, { "epoch": 1.847657646918584, "grad_norm": 3.3012547492980957, "learning_rate": 1.453605577630751e-06, "loss": 1.1288, "step": 31000 }, { "epoch": 1.8477768506377399, "grad_norm": 2.975377082824707, "learning_rate": 1.4513427197647977e-06, "loss": 0.9812, "step": 31002 }, { "epoch": 1.8478960543568959, "grad_norm": 3.2235372066497803, "learning_rate": 1.4490815986504203e-06, "loss": 1.1034, "step": 31004 }, { "epoch": 1.8480152580760518, "grad_norm": 3.3793137073516846, "learning_rate": 1.4468222143685095e-06, "loss": 1.0876, "step": 31006 }, { "epoch": 1.848134461795208, "grad_norm": 3.24931263923645, "learning_rate": 1.4445645669998952e-06, "loss": 1.0996, "step": 31008 }, { "epoch": 1.8482536655143642, "grad_norm": 3.645613193511963, "learning_rate": 1.4423086566253352e-06, "loss": 1.3106, "step": 31010 }, { "epoch": 1.8483728692335202, "grad_norm": 3.3213984966278076, "learning_rate": 1.4400544833255314e-06, "loss": 1.107, "step": 31012 }, { "epoch": 1.8484920729526761, "grad_norm": 3.669754981994629, "learning_rate": 1.4378020471811248e-06, "loss": 1.0895, "step": 31014 }, { "epoch": 1.8486112766718321, "grad_norm": 2.9374382495880127, "learning_rate": 1.4355513482726957e-06, "loss": 1.1865, "step": 31016 }, { "epoch": 1.848730480390988, "grad_norm": 3.2422611713409424, "learning_rate": 1.4333023866807516e-06, "loss": 1.0051, "step": 31018 }, { "epoch": 1.8488496841101443, "grad_norm": 3.455455780029297, "learning_rate": 1.431055162485756e-06, "loss": 1.0307, "step": 31020 }, { "epoch": 1.8489688878293002, "grad_norm": 3.386056423187256, "learning_rate": 1.428809675768089e-06, "loss": 1.1736, "step": 31022 }, { "epoch": 1.8490880915484564, "grad_norm": 3.2586679458618164, "learning_rate": 1.4265659266080865e-06, "loss": 1.1316, "step": 31024 }, { "epoch": 1.8492072952676124, "grad_norm": 2.8557820320129395, "learning_rate": 1.424323915086012e-06, "loss": 0.9882, "step": 31026 }, { "epoch": 1.8493264989867684, "grad_norm": 3.3664588928222656, "learning_rate": 1.4220836412820738e-06, "loss": 1.1031, "step": 31028 }, { "epoch": 1.8494457027059243, "grad_norm": 3.175264358520508, "learning_rate": 1.4198451052764072e-06, "loss": 1.0294, "step": 31030 }, { "epoch": 1.8495649064250803, "grad_norm": 3.4853932857513428, "learning_rate": 1.4176083071490987e-06, "loss": 1.1227, "step": 31032 }, { "epoch": 1.8496841101442365, "grad_norm": 3.6456406116485596, "learning_rate": 1.4153732469801672e-06, "loss": 1.0977, "step": 31034 }, { "epoch": 1.8498033138633927, "grad_norm": 3.230441093444824, "learning_rate": 1.4131399248495603e-06, "loss": 1.1266, "step": 31036 }, { "epoch": 1.8499225175825487, "grad_norm": 3.3849782943725586, "learning_rate": 1.4109083408371804e-06, "loss": 1.2035, "step": 31038 }, { "epoch": 1.8500417213017046, "grad_norm": 3.1857035160064697, "learning_rate": 1.408678495022847e-06, "loss": 1.0386, "step": 31040 }, { "epoch": 1.8501609250208606, "grad_norm": 3.2559053897857666, "learning_rate": 1.406450387486341e-06, "loss": 1.1823, "step": 31042 }, { "epoch": 1.8502801287400166, "grad_norm": 3.2608227729797363, "learning_rate": 1.4042240183073763e-06, "loss": 1.1602, "step": 31044 }, { "epoch": 1.8503993324591728, "grad_norm": 3.4921627044677734, "learning_rate": 1.4019993875655725e-06, "loss": 1.094, "step": 31046 }, { "epoch": 1.8505185361783287, "grad_norm": 3.407489061355591, "learning_rate": 1.3997764953405323e-06, "loss": 1.0696, "step": 31048 }, { "epoch": 1.850637739897485, "grad_norm": 2.891038179397583, "learning_rate": 1.3975553417117703e-06, "loss": 0.9334, "step": 31050 }, { "epoch": 1.850756943616641, "grad_norm": 3.357327938079834, "learning_rate": 1.3953359267587506e-06, "loss": 1.1013, "step": 31052 }, { "epoch": 1.8508761473357969, "grad_norm": 2.998520851135254, "learning_rate": 1.3931182505608597e-06, "loss": 1.0136, "step": 31054 }, { "epoch": 1.8509953510549528, "grad_norm": 3.1239013671875, "learning_rate": 1.3909023131974342e-06, "loss": 1.1657, "step": 31056 }, { "epoch": 1.8511145547741088, "grad_norm": 3.2280311584472656, "learning_rate": 1.3886881147477493e-06, "loss": 1.0614, "step": 31058 }, { "epoch": 1.851233758493265, "grad_norm": 3.7256405353546143, "learning_rate": 1.3864756552910086e-06, "loss": 1.2108, "step": 31060 }, { "epoch": 1.8513529622124212, "grad_norm": 3.3226749897003174, "learning_rate": 1.3842649349063652e-06, "loss": 0.9302, "step": 31062 }, { "epoch": 1.8514721659315772, "grad_norm": 3.2109861373901367, "learning_rate": 1.3820559536729006e-06, "loss": 1.1936, "step": 31064 }, { "epoch": 1.8515913696507331, "grad_norm": 3.5828309059143066, "learning_rate": 1.3798487116696457e-06, "loss": 1.2208, "step": 31066 }, { "epoch": 1.851710573369889, "grad_norm": 3.4850709438323975, "learning_rate": 1.377643208975543e-06, "loss": 1.3165, "step": 31068 }, { "epoch": 1.851829777089045, "grad_norm": 3.326967716217041, "learning_rate": 1.375439445669513e-06, "loss": 0.9779, "step": 31070 }, { "epoch": 1.8519489808082013, "grad_norm": 2.7784829139709473, "learning_rate": 1.3732374218303813e-06, "loss": 0.9416, "step": 31072 }, { "epoch": 1.8520681845273572, "grad_norm": 3.41977858543396, "learning_rate": 1.3710371375369124e-06, "loss": 1.1807, "step": 31074 }, { "epoch": 1.8521873882465134, "grad_norm": 3.0251686573028564, "learning_rate": 1.3688385928678326e-06, "loss": 0.8921, "step": 31076 }, { "epoch": 1.8523065919656694, "grad_norm": 3.1835875511169434, "learning_rate": 1.36664178790179e-06, "loss": 1.0139, "step": 31078 }, { "epoch": 1.8524257956848253, "grad_norm": 3.162719964981079, "learning_rate": 1.3644467227173608e-06, "loss": 0.9422, "step": 31080 }, { "epoch": 1.8525449994039813, "grad_norm": 2.973017692565918, "learning_rate": 1.3622533973930761e-06, "loss": 1.1603, "step": 31082 }, { "epoch": 1.8526642031231373, "grad_norm": 3.1545069217681885, "learning_rate": 1.3600618120074127e-06, "loss": 1.0519, "step": 31084 }, { "epoch": 1.8527834068422935, "grad_norm": 3.453723907470703, "learning_rate": 1.3578719666387407e-06, "loss": 1.1736, "step": 31086 }, { "epoch": 1.8529026105614497, "grad_norm": 3.239896297454834, "learning_rate": 1.3556838613654255e-06, "loss": 1.1866, "step": 31088 }, { "epoch": 1.8530218142806056, "grad_norm": 3.00589656829834, "learning_rate": 1.3534974962657376e-06, "loss": 1.1047, "step": 31090 }, { "epoch": 1.8531410179997616, "grad_norm": 2.981964111328125, "learning_rate": 1.3513128714178757e-06, "loss": 1.0475, "step": 31092 }, { "epoch": 1.8532602217189176, "grad_norm": 3.294761896133423, "learning_rate": 1.3491299869000107e-06, "loss": 1.175, "step": 31094 }, { "epoch": 1.8533794254380735, "grad_norm": 3.032426595687866, "learning_rate": 1.3469488427902188e-06, "loss": 1.2622, "step": 31096 }, { "epoch": 1.8534986291572297, "grad_norm": 3.3737411499023438, "learning_rate": 1.3447694391665322e-06, "loss": 1.244, "step": 31098 }, { "epoch": 1.8536178328763857, "grad_norm": 3.1174185276031494, "learning_rate": 1.3425917761069162e-06, "loss": 1.1388, "step": 31100 }, { "epoch": 1.853737036595542, "grad_norm": 3.5397469997406006, "learning_rate": 1.3404158536892696e-06, "loss": 1.1352, "step": 31102 }, { "epoch": 1.8538562403146979, "grad_norm": 3.049128293991089, "learning_rate": 1.338241671991436e-06, "loss": 1.0457, "step": 31104 }, { "epoch": 1.8539754440338538, "grad_norm": 3.2790653705596924, "learning_rate": 1.3360692310911914e-06, "loss": 1.2152, "step": 31106 }, { "epoch": 1.8540946477530098, "grad_norm": 2.6175146102905273, "learning_rate": 1.333898531066252e-06, "loss": 1.042, "step": 31108 }, { "epoch": 1.8542138514721658, "grad_norm": 3.1513328552246094, "learning_rate": 1.3317295719942724e-06, "loss": 1.1385, "step": 31110 }, { "epoch": 1.854333055191322, "grad_norm": 3.516704559326172, "learning_rate": 1.3295623539528401e-06, "loss": 1.0381, "step": 31112 }, { "epoch": 1.8544522589104782, "grad_norm": 3.5830841064453125, "learning_rate": 1.3273968770194879e-06, "loss": 1.1972, "step": 31114 }, { "epoch": 1.8545714626296341, "grad_norm": 3.230450391769409, "learning_rate": 1.3252331412716867e-06, "loss": 1.0604, "step": 31116 }, { "epoch": 1.85469066634879, "grad_norm": 3.5992212295532227, "learning_rate": 1.3230711467868307e-06, "loss": 1.145, "step": 31118 }, { "epoch": 1.854809870067946, "grad_norm": 3.2671000957489014, "learning_rate": 1.320910893642263e-06, "loss": 1.1239, "step": 31120 }, { "epoch": 1.854929073787102, "grad_norm": 3.2573442459106445, "learning_rate": 1.3187523819152726e-06, "loss": 1.0803, "step": 31122 }, { "epoch": 1.8550482775062582, "grad_norm": 3.370041847229004, "learning_rate": 1.316595611683069e-06, "loss": 1.1098, "step": 31124 }, { "epoch": 1.8551674812254142, "grad_norm": 3.6252927780151367, "learning_rate": 1.3144405830228023e-06, "loss": 1.0674, "step": 31126 }, { "epoch": 1.8552866849445704, "grad_norm": 3.3822567462921143, "learning_rate": 1.3122872960115717e-06, "loss": 1.1001, "step": 31128 }, { "epoch": 1.8554058886637264, "grad_norm": 3.4117276668548584, "learning_rate": 1.3101357507264212e-06, "loss": 1.2223, "step": 31130 }, { "epoch": 1.8555250923828823, "grad_norm": 3.0681793689727783, "learning_rate": 1.3079859472442946e-06, "loss": 1.1737, "step": 31132 }, { "epoch": 1.8556442961020383, "grad_norm": 3.310417413711548, "learning_rate": 1.3058378856421083e-06, "loss": 1.0684, "step": 31134 }, { "epoch": 1.8557634998211943, "grad_norm": 3.451164484024048, "learning_rate": 1.303691565996712e-06, "loss": 1.0965, "step": 31136 }, { "epoch": 1.8558827035403505, "grad_norm": 3.36110520362854, "learning_rate": 1.3015469883848775e-06, "loss": 1.1206, "step": 31138 }, { "epoch": 1.8560019072595066, "grad_norm": 3.2430453300476074, "learning_rate": 1.2994041528833266e-06, "loss": 1.1394, "step": 31140 }, { "epoch": 1.8561211109786626, "grad_norm": 2.9412550926208496, "learning_rate": 1.2972630595687153e-06, "loss": 1.0781, "step": 31142 }, { "epoch": 1.8562403146978186, "grad_norm": 3.2414755821228027, "learning_rate": 1.2951237085176427e-06, "loss": 1.0586, "step": 31144 }, { "epoch": 1.8563595184169746, "grad_norm": 3.6001651287078857, "learning_rate": 1.2929860998066312e-06, "loss": 1.0203, "step": 31146 }, { "epoch": 1.8564787221361305, "grad_norm": 3.341686487197876, "learning_rate": 1.2908502335121642e-06, "loss": 1.0116, "step": 31148 }, { "epoch": 1.8565979258552867, "grad_norm": 3.3791210651397705, "learning_rate": 1.288716109710636e-06, "loss": 1.0713, "step": 31150 }, { "epoch": 1.8567171295744427, "grad_norm": 2.9041976928710938, "learning_rate": 1.2865837284783965e-06, "loss": 0.968, "step": 31152 }, { "epoch": 1.8568363332935989, "grad_norm": 2.983319044113159, "learning_rate": 1.2844530898917295e-06, "loss": 1.2771, "step": 31154 }, { "epoch": 1.8569555370127548, "grad_norm": 2.8853952884674072, "learning_rate": 1.2823241940268515e-06, "loss": 1.1308, "step": 31156 }, { "epoch": 1.8570747407319108, "grad_norm": 2.9742486476898193, "learning_rate": 1.280197040959924e-06, "loss": 1.0372, "step": 31158 }, { "epoch": 1.8571939444510668, "grad_norm": 3.040057420730591, "learning_rate": 1.2780716307670416e-06, "loss": 1.0044, "step": 31160 }, { "epoch": 1.8573131481702227, "grad_norm": 3.216309070587158, "learning_rate": 1.2759479635242378e-06, "loss": 1.1659, "step": 31162 }, { "epoch": 1.857432351889379, "grad_norm": 2.9847259521484375, "learning_rate": 1.2738260393074851e-06, "loss": 1.012, "step": 31164 }, { "epoch": 1.8575515556085351, "grad_norm": 3.5061025619506836, "learning_rate": 1.27170585819269e-06, "loss": 0.9785, "step": 31166 }, { "epoch": 1.857670759327691, "grad_norm": 3.1390693187713623, "learning_rate": 1.2695874202556968e-06, "loss": 1.0168, "step": 31168 }, { "epoch": 1.857789963046847, "grad_norm": 2.9396746158599854, "learning_rate": 1.2674707255722895e-06, "loss": 1.0676, "step": 31170 }, { "epoch": 1.857909166766003, "grad_norm": 2.9703855514526367, "learning_rate": 1.2653557742181909e-06, "loss": 1.233, "step": 31172 }, { "epoch": 1.858028370485159, "grad_norm": 3.5918853282928467, "learning_rate": 1.2632425662690573e-06, "loss": 1.1084, "step": 31174 }, { "epoch": 1.8581475742043152, "grad_norm": 3.119250535964966, "learning_rate": 1.261131101800489e-06, "loss": 1.0242, "step": 31176 }, { "epoch": 1.8582667779234712, "grad_norm": 3.107954978942871, "learning_rate": 1.2590213808880257e-06, "loss": 1.0653, "step": 31178 }, { "epoch": 1.8583859816426274, "grad_norm": 2.925022840499878, "learning_rate": 1.256913403607124e-06, "loss": 1.0958, "step": 31180 }, { "epoch": 1.8585051853617833, "grad_norm": 3.532900094985962, "learning_rate": 1.254807170033212e-06, "loss": 1.1009, "step": 31182 }, { "epoch": 1.8586243890809393, "grad_norm": 3.40368390083313, "learning_rate": 1.2527026802416243e-06, "loss": 1.0758, "step": 31184 }, { "epoch": 1.8587435928000953, "grad_norm": 3.408592462539673, "learning_rate": 1.2505999343076446e-06, "loss": 1.049, "step": 31186 }, { "epoch": 1.8588627965192512, "grad_norm": 3.241436719894409, "learning_rate": 1.2484989323065022e-06, "loss": 0.9279, "step": 31188 }, { "epoch": 1.8589820002384074, "grad_norm": 3.2667815685272217, "learning_rate": 1.2463996743133587e-06, "loss": 0.9899, "step": 31190 }, { "epoch": 1.8591012039575636, "grad_norm": 3.1156232357025146, "learning_rate": 1.2443021604033045e-06, "loss": 1.1247, "step": 31192 }, { "epoch": 1.8592204076767196, "grad_norm": 3.8825247287750244, "learning_rate": 1.2422063906513737e-06, "loss": 1.1359, "step": 31194 }, { "epoch": 1.8593396113958756, "grad_norm": 3.0611305236816406, "learning_rate": 1.2401123651325508e-06, "loss": 0.9705, "step": 31196 }, { "epoch": 1.8594588151150315, "grad_norm": 3.365601062774658, "learning_rate": 1.238020083921726e-06, "loss": 1.1475, "step": 31198 }, { "epoch": 1.8595780188341875, "grad_norm": 3.0593998432159424, "learning_rate": 1.2359295470937782e-06, "loss": 1.251, "step": 31200 }, { "epoch": 1.8596972225533437, "grad_norm": 3.10274076461792, "learning_rate": 1.2338407547234586e-06, "loss": 0.982, "step": 31202 }, { "epoch": 1.8598164262724997, "grad_norm": 3.4468281269073486, "learning_rate": 1.2317537068855134e-06, "loss": 1.2587, "step": 31204 }, { "epoch": 1.8599356299916558, "grad_norm": 3.0037336349487305, "learning_rate": 1.229668403654599e-06, "loss": 0.9905, "step": 31206 }, { "epoch": 1.8600548337108118, "grad_norm": 3.2146658897399902, "learning_rate": 1.2275848451053062e-06, "loss": 1.014, "step": 31208 }, { "epoch": 1.8601740374299678, "grad_norm": 3.2789065837860107, "learning_rate": 1.2255030313121806e-06, "loss": 1.0732, "step": 31210 }, { "epoch": 1.8602932411491238, "grad_norm": 3.636204719543457, "learning_rate": 1.2234229623496962e-06, "loss": 1.0165, "step": 31212 }, { "epoch": 1.8604124448682797, "grad_norm": 3.2575347423553467, "learning_rate": 1.2213446382922545e-06, "loss": 1.1602, "step": 31214 }, { "epoch": 1.860531648587436, "grad_norm": 3.3369593620300293, "learning_rate": 1.2192680592142015e-06, "loss": 1.2286, "step": 31216 }, { "epoch": 1.860650852306592, "grad_norm": 3.41694974899292, "learning_rate": 1.21719322518985e-06, "loss": 1.224, "step": 31218 }, { "epoch": 1.860770056025748, "grad_norm": 3.1895041465759277, "learning_rate": 1.2151201362933851e-06, "loss": 1.097, "step": 31220 }, { "epoch": 1.860889259744904, "grad_norm": 3.070702075958252, "learning_rate": 1.213048792599003e-06, "loss": 1.019, "step": 31222 }, { "epoch": 1.86100846346406, "grad_norm": 4.103283405303955, "learning_rate": 1.210979194180789e-06, "loss": 0.972, "step": 31224 }, { "epoch": 1.861127667183216, "grad_norm": 3.3275842666625977, "learning_rate": 1.2089113411127673e-06, "loss": 1.1422, "step": 31226 }, { "epoch": 1.8612468709023722, "grad_norm": 3.648873805999756, "learning_rate": 1.206845233468934e-06, "loss": 1.1766, "step": 31228 }, { "epoch": 1.8613660746215281, "grad_norm": 3.340385675430298, "learning_rate": 1.2047808713231857e-06, "loss": 1.1231, "step": 31230 }, { "epoch": 1.8614852783406843, "grad_norm": 3.2464041709899902, "learning_rate": 1.2027182547493799e-06, "loss": 1.1086, "step": 31232 }, { "epoch": 1.8616044820598403, "grad_norm": 3.1548898220062256, "learning_rate": 1.2006573838212964e-06, "loss": 1.2104, "step": 31234 }, { "epoch": 1.8617236857789963, "grad_norm": 3.195425033569336, "learning_rate": 1.1985982586126654e-06, "loss": 1.0433, "step": 31236 }, { "epoch": 1.8618428894981522, "grad_norm": 3.0072929859161377, "learning_rate": 1.19654087919715e-06, "loss": 1.0366, "step": 31238 }, { "epoch": 1.8619620932173084, "grad_norm": 3.4077694416046143, "learning_rate": 1.1944852456483356e-06, "loss": 1.2659, "step": 31240 }, { "epoch": 1.8620812969364644, "grad_norm": 3.177023410797119, "learning_rate": 1.192431358039786e-06, "loss": 1.1657, "step": 31242 }, { "epoch": 1.8622005006556206, "grad_norm": 3.192065477371216, "learning_rate": 1.190379216444948e-06, "loss": 1.0028, "step": 31244 }, { "epoch": 1.8623197043747766, "grad_norm": 3.6850616931915283, "learning_rate": 1.1883288209372511e-06, "loss": 1.0193, "step": 31246 }, { "epoch": 1.8624389080939325, "grad_norm": 3.277998447418213, "learning_rate": 1.1862801715900374e-06, "loss": 1.0218, "step": 31248 }, { "epoch": 1.8625581118130885, "grad_norm": 3.1978044509887695, "learning_rate": 1.1842332684765977e-06, "loss": 1.1188, "step": 31250 }, { "epoch": 1.8626773155322445, "grad_norm": 3.4022953510284424, "learning_rate": 1.1821881116701573e-06, "loss": 1.1303, "step": 31252 }, { "epoch": 1.8627965192514007, "grad_norm": 3.5427956581115723, "learning_rate": 1.1801447012438737e-06, "loss": 1.16, "step": 31254 }, { "epoch": 1.8629157229705566, "grad_norm": 3.2944180965423584, "learning_rate": 1.1781030372708501e-06, "loss": 1.0905, "step": 31256 }, { "epoch": 1.8630349266897128, "grad_norm": 3.5129482746124268, "learning_rate": 1.1760631198241224e-06, "loss": 1.2594, "step": 31258 }, { "epoch": 1.8631541304088688, "grad_norm": 3.2404723167419434, "learning_rate": 1.1740249489766709e-06, "loss": 1.1267, "step": 31260 }, { "epoch": 1.8632733341280248, "grad_norm": 3.2420060634613037, "learning_rate": 1.171988524801393e-06, "loss": 1.0433, "step": 31262 }, { "epoch": 1.8633925378471807, "grad_norm": 2.987035036087036, "learning_rate": 1.169953847371158e-06, "loss": 1.1894, "step": 31264 }, { "epoch": 1.863511741566337, "grad_norm": 3.222050189971924, "learning_rate": 1.1679209167587413e-06, "loss": 1.1192, "step": 31266 }, { "epoch": 1.863630945285493, "grad_norm": 2.9096643924713135, "learning_rate": 1.1658897330368678e-06, "loss": 1.0694, "step": 31268 }, { "epoch": 1.863750149004649, "grad_norm": 3.368305206298828, "learning_rate": 1.163860296278213e-06, "loss": 1.0935, "step": 31270 }, { "epoch": 1.863869352723805, "grad_norm": 3.0150625705718994, "learning_rate": 1.161832606555352e-06, "loss": 1.2052, "step": 31272 }, { "epoch": 1.863988556442961, "grad_norm": 3.5292651653289795, "learning_rate": 1.1598066639408434e-06, "loss": 1.1759, "step": 31274 }, { "epoch": 1.864107760162117, "grad_norm": 3.228315591812134, "learning_rate": 1.157782468507157e-06, "loss": 1.1895, "step": 31276 }, { "epoch": 1.864226963881273, "grad_norm": 3.5981063842773438, "learning_rate": 1.1557600203267017e-06, "loss": 1.1448, "step": 31278 }, { "epoch": 1.8643461676004291, "grad_norm": 3.2298707962036133, "learning_rate": 1.153739319471825e-06, "loss": 1.1763, "step": 31280 }, { "epoch": 1.8644653713195851, "grad_norm": 3.1816046237945557, "learning_rate": 1.1517203660148302e-06, "loss": 1.1756, "step": 31282 }, { "epoch": 1.8645845750387413, "grad_norm": 3.33711576461792, "learning_rate": 1.1497031600279208e-06, "loss": 1.0291, "step": 31284 }, { "epoch": 1.8647037787578973, "grad_norm": 3.195446491241455, "learning_rate": 1.1476877015832665e-06, "loss": 1.0946, "step": 31286 }, { "epoch": 1.8648229824770532, "grad_norm": 3.245793104171753, "learning_rate": 1.1456739907529824e-06, "loss": 1.0763, "step": 31288 }, { "epoch": 1.8649421861962092, "grad_norm": 3.3015832901000977, "learning_rate": 1.1436620276090825e-06, "loss": 1.1684, "step": 31290 }, { "epoch": 1.8650613899153654, "grad_norm": 3.2675161361694336, "learning_rate": 1.1416518122235541e-06, "loss": 1.1204, "step": 31292 }, { "epoch": 1.8651805936345214, "grad_norm": 3.250866413116455, "learning_rate": 1.1396433446683064e-06, "loss": 1.2031, "step": 31294 }, { "epoch": 1.8652997973536776, "grad_norm": 3.2163875102996826, "learning_rate": 1.1376366250151927e-06, "loss": 1.1063, "step": 31296 }, { "epoch": 1.8654190010728335, "grad_norm": 3.005748748779297, "learning_rate": 1.1356316533359946e-06, "loss": 0.9258, "step": 31298 }, { "epoch": 1.8655382047919895, "grad_norm": 3.1395654678344727, "learning_rate": 1.1336284297024435e-06, "loss": 1.0729, "step": 31300 }, { "epoch": 1.8656574085111455, "grad_norm": 3.5047860145568848, "learning_rate": 1.131626954186199e-06, "loss": 1.2261, "step": 31302 }, { "epoch": 1.8657766122303014, "grad_norm": 3.3824663162231445, "learning_rate": 1.1296272268588537e-06, "loss": 1.0474, "step": 31304 }, { "epoch": 1.8658958159494576, "grad_norm": 3.6133761405944824, "learning_rate": 1.1276292477919558e-06, "loss": 1.2408, "step": 31306 }, { "epoch": 1.8660150196686136, "grad_norm": 3.3232204914093018, "learning_rate": 1.1256330170569707e-06, "loss": 1.089, "step": 31308 }, { "epoch": 1.8661342233877698, "grad_norm": 3.3675920963287354, "learning_rate": 1.1236385347253132e-06, "loss": 1.1162, "step": 31310 }, { "epoch": 1.8662534271069258, "grad_norm": 2.829177141189575, "learning_rate": 1.1216458008683372e-06, "loss": 1.084, "step": 31312 }, { "epoch": 1.8663726308260817, "grad_norm": 3.4304845333099365, "learning_rate": 1.1196548155573305e-06, "loss": 1.0928, "step": 31314 }, { "epoch": 1.8664918345452377, "grad_norm": 3.0313074588775635, "learning_rate": 1.1176655788635081e-06, "loss": 1.1036, "step": 31316 }, { "epoch": 1.866611038264394, "grad_norm": 3.182469367980957, "learning_rate": 1.115678090858041e-06, "loss": 1.401, "step": 31318 }, { "epoch": 1.8667302419835499, "grad_norm": 3.2533328533172607, "learning_rate": 1.1136923516120223e-06, "loss": 1.1854, "step": 31320 }, { "epoch": 1.866849445702706, "grad_norm": 3.1534457206726074, "learning_rate": 1.1117083611964897e-06, "loss": 1.0195, "step": 31322 }, { "epoch": 1.866968649421862, "grad_norm": 2.8729889392852783, "learning_rate": 1.1097261196824194e-06, "loss": 1.0088, "step": 31324 }, { "epoch": 1.867087853141018, "grad_norm": 3.247190475463867, "learning_rate": 1.1077456271407161e-06, "loss": 1.0977, "step": 31326 }, { "epoch": 1.867207056860174, "grad_norm": 3.3527493476867676, "learning_rate": 1.1057668836422453e-06, "loss": 1.0675, "step": 31328 }, { "epoch": 1.86732626057933, "grad_norm": 3.630462408065796, "learning_rate": 1.103789889257778e-06, "loss": 1.1626, "step": 31330 }, { "epoch": 1.8674454642984861, "grad_norm": 2.9594383239746094, "learning_rate": 1.1018146440580412e-06, "loss": 1.1102, "step": 31332 }, { "epoch": 1.8675646680176423, "grad_norm": 3.4089596271514893, "learning_rate": 1.0998411481137061e-06, "loss": 1.0587, "step": 31334 }, { "epoch": 1.8676838717367983, "grad_norm": 3.1483705043792725, "learning_rate": 1.0978694014953495e-06, "loss": 1.1161, "step": 31336 }, { "epoch": 1.8678030754559543, "grad_norm": 3.1991119384765625, "learning_rate": 1.0958994042735316e-06, "loss": 1.1312, "step": 31338 }, { "epoch": 1.8679222791751102, "grad_norm": 3.48170804977417, "learning_rate": 1.0939311565187128e-06, "loss": 0.9895, "step": 31340 }, { "epoch": 1.8680414828942662, "grad_norm": 2.955901861190796, "learning_rate": 1.0919646583013089e-06, "loss": 0.9335, "step": 31342 }, { "epoch": 1.8681606866134224, "grad_norm": 3.7665345668792725, "learning_rate": 1.0899999096916635e-06, "loss": 1.2524, "step": 31344 }, { "epoch": 1.8682798903325784, "grad_norm": 3.341121196746826, "learning_rate": 1.0880369107600652e-06, "loss": 1.1899, "step": 31346 }, { "epoch": 1.8683990940517345, "grad_norm": 2.817964553833008, "learning_rate": 1.0860756615767354e-06, "loss": 1.0782, "step": 31348 }, { "epoch": 1.8685182977708905, "grad_norm": 3.297353506088257, "learning_rate": 1.0841161622118346e-06, "loss": 1.0841, "step": 31350 }, { "epoch": 1.8686375014900465, "grad_norm": 3.2191946506500244, "learning_rate": 1.0821584127354733e-06, "loss": 1.1775, "step": 31352 }, { "epoch": 1.8687567052092025, "grad_norm": 3.4056313037872314, "learning_rate": 1.080202413217668e-06, "loss": 1.1133, "step": 31354 }, { "epoch": 1.8688759089283584, "grad_norm": 3.6422290802001953, "learning_rate": 1.0782481637284013e-06, "loss": 1.0884, "step": 31356 }, { "epoch": 1.8689951126475146, "grad_norm": 2.985151767730713, "learning_rate": 1.0762956643375843e-06, "loss": 0.8668, "step": 31358 }, { "epoch": 1.8691143163666708, "grad_norm": 3.5055222511291504, "learning_rate": 1.0743449151150553e-06, "loss": 1.0306, "step": 31360 }, { "epoch": 1.8692335200858268, "grad_norm": 3.5385501384735107, "learning_rate": 1.0723959161306086e-06, "loss": 1.1268, "step": 31362 }, { "epoch": 1.8693527238049827, "grad_norm": 2.9063827991485596, "learning_rate": 1.070448667453966e-06, "loss": 1.1498, "step": 31364 }, { "epoch": 1.8694719275241387, "grad_norm": 3.316134452819824, "learning_rate": 1.0685031691547887e-06, "loss": 1.1859, "step": 31366 }, { "epoch": 1.8695911312432947, "grad_norm": 3.4156885147094727, "learning_rate": 1.0665594213026654e-06, "loss": 0.969, "step": 31368 }, { "epoch": 1.8697103349624509, "grad_norm": 3.033097743988037, "learning_rate": 1.0646174239671347e-06, "loss": 1.1733, "step": 31370 }, { "epoch": 1.8698295386816068, "grad_norm": 2.9408421516418457, "learning_rate": 1.0626771772176692e-06, "loss": 0.9704, "step": 31372 }, { "epoch": 1.869948742400763, "grad_norm": 2.8858463764190674, "learning_rate": 1.0607386811236796e-06, "loss": 1.109, "step": 31374 }, { "epoch": 1.870067946119919, "grad_norm": 3.161778211593628, "learning_rate": 1.0588019357545165e-06, "loss": 1.063, "step": 31376 }, { "epoch": 1.870187149839075, "grad_norm": 3.052924394607544, "learning_rate": 1.0568669411794518e-06, "loss": 1.0548, "step": 31378 }, { "epoch": 1.870306353558231, "grad_norm": 3.4135122299194336, "learning_rate": 1.0549336974677138e-06, "loss": 1.03, "step": 31380 }, { "epoch": 1.870425557277387, "grad_norm": 3.303069591522217, "learning_rate": 1.0530022046884636e-06, "loss": 0.9778, "step": 31382 }, { "epoch": 1.870544760996543, "grad_norm": 3.6570286750793457, "learning_rate": 1.051072462910796e-06, "loss": 1.0472, "step": 31384 }, { "epoch": 1.8706639647156993, "grad_norm": 2.7831389904022217, "learning_rate": 1.0491444722037447e-06, "loss": 0.8682, "step": 31386 }, { "epoch": 1.8707831684348553, "grad_norm": 3.2836687564849854, "learning_rate": 1.0472182326362767e-06, "loss": 0.9138, "step": 31388 }, { "epoch": 1.8709023721540112, "grad_norm": 2.911268711090088, "learning_rate": 1.0452937442772982e-06, "loss": 0.9987, "step": 31390 }, { "epoch": 1.8710215758731672, "grad_norm": 3.351510763168335, "learning_rate": 1.0433710071956648e-06, "loss": 1.1027, "step": 31392 }, { "epoch": 1.8711407795923232, "grad_norm": 3.465242862701416, "learning_rate": 1.0414500214601553e-06, "loss": 1.0806, "step": 31394 }, { "epoch": 1.8712599833114794, "grad_norm": 3.5107882022857666, "learning_rate": 1.039530787139481e-06, "loss": 1.0792, "step": 31396 }, { "epoch": 1.8713791870306353, "grad_norm": 3.443122386932373, "learning_rate": 1.0376133043023152e-06, "loss": 1.204, "step": 31398 }, { "epoch": 1.8714983907497915, "grad_norm": 2.8821234703063965, "learning_rate": 1.0356975730172357e-06, "loss": 1.0586, "step": 31400 }, { "epoch": 1.8716175944689475, "grad_norm": 3.3072669506073, "learning_rate": 1.0337835933527884e-06, "loss": 1.1245, "step": 31402 }, { "epoch": 1.8717367981881035, "grad_norm": 3.4479048252105713, "learning_rate": 1.031871365377446e-06, "loss": 1.0643, "step": 31404 }, { "epoch": 1.8718560019072594, "grad_norm": 2.8245835304260254, "learning_rate": 1.0299608891595924e-06, "loss": 1.1212, "step": 31406 }, { "epoch": 1.8719752056264154, "grad_norm": 3.078965663909912, "learning_rate": 1.0280521647675955e-06, "loss": 1.0785, "step": 31408 }, { "epoch": 1.8720944093455716, "grad_norm": 3.190742015838623, "learning_rate": 1.0261451922697286e-06, "loss": 1.1854, "step": 31410 }, { "epoch": 1.8722136130647278, "grad_norm": 3.2012343406677246, "learning_rate": 1.024239971734209e-06, "loss": 1.098, "step": 31412 }, { "epoch": 1.8723328167838837, "grad_norm": 3.162083625793457, "learning_rate": 1.0223365032291932e-06, "loss": 1.0941, "step": 31414 }, { "epoch": 1.8724520205030397, "grad_norm": 3.4081063270568848, "learning_rate": 1.020434786822777e-06, "loss": 1.097, "step": 31416 }, { "epoch": 1.8725712242221957, "grad_norm": 3.405686378479004, "learning_rate": 1.018534822582995e-06, "loss": 1.2537, "step": 31418 }, { "epoch": 1.8726904279413517, "grad_norm": 3.2636029720306396, "learning_rate": 1.016636610577798e-06, "loss": 1.1056, "step": 31420 }, { "epoch": 1.8728096316605078, "grad_norm": 3.1634817123413086, "learning_rate": 1.014740150875121e-06, "loss": 0.9993, "step": 31422 }, { "epoch": 1.8729288353796638, "grad_norm": 3.4610507488250732, "learning_rate": 1.0128454435427758e-06, "loss": 1.2326, "step": 31424 }, { "epoch": 1.87304803909882, "grad_norm": 3.1302504539489746, "learning_rate": 1.0109524886485644e-06, "loss": 1.1901, "step": 31426 }, { "epoch": 1.873167242817976, "grad_norm": 3.170551300048828, "learning_rate": 1.0090612862601935e-06, "loss": 0.9274, "step": 31428 }, { "epoch": 1.873286446537132, "grad_norm": 3.4758217334747314, "learning_rate": 1.0071718364453197e-06, "loss": 1.1428, "step": 31430 }, { "epoch": 1.873405650256288, "grad_norm": 2.9563043117523193, "learning_rate": 1.0052841392715396e-06, "loss": 1.1835, "step": 31432 }, { "epoch": 1.8735248539754439, "grad_norm": 3.5418601036071777, "learning_rate": 1.003398194806382e-06, "loss": 1.2742, "step": 31434 }, { "epoch": 1.8736440576946, "grad_norm": 3.497983455657959, "learning_rate": 1.0015140031173098e-06, "loss": 1.0809, "step": 31436 }, { "epoch": 1.8737632614137563, "grad_norm": 3.2238893508911133, "learning_rate": 9.996315642717191e-07, "loss": 1.1458, "step": 31438 }, { "epoch": 1.8738824651329122, "grad_norm": 3.052635908126831, "learning_rate": 9.977508783369727e-07, "loss": 1.0937, "step": 31440 }, { "epoch": 1.8740016688520682, "grad_norm": 3.2191035747528076, "learning_rate": 9.958719453803278e-07, "loss": 1.188, "step": 31442 }, { "epoch": 1.8741208725712242, "grad_norm": 3.2113704681396484, "learning_rate": 9.93994765469014e-07, "loss": 1.0195, "step": 31444 }, { "epoch": 1.8742400762903801, "grad_norm": 3.7109780311584473, "learning_rate": 9.921193386701776e-07, "loss": 1.2488, "step": 31446 }, { "epoch": 1.8743592800095363, "grad_norm": 2.846330165863037, "learning_rate": 9.902456650509152e-07, "loss": 1.0906, "step": 31448 }, { "epoch": 1.8744784837286923, "grad_norm": 2.7957165241241455, "learning_rate": 9.883737446782449e-07, "loss": 1.0883, "step": 31450 }, { "epoch": 1.8745976874478485, "grad_norm": 3.413909912109375, "learning_rate": 9.865035776191412e-07, "loss": 1.117, "step": 31452 }, { "epoch": 1.8747168911670045, "grad_norm": 3.2500624656677246, "learning_rate": 9.846351639405005e-07, "loss": 1.1505, "step": 31454 }, { "epoch": 1.8748360948861604, "grad_norm": 3.0675010681152344, "learning_rate": 9.827685037091694e-07, "loss": 1.0946, "step": 31456 }, { "epoch": 1.8749552986053164, "grad_norm": 3.227116584777832, "learning_rate": 9.80903596991911e-07, "loss": 1.0937, "step": 31458 }, { "epoch": 1.8750745023244724, "grad_norm": 2.7505972385406494, "learning_rate": 9.7904044385545e-07, "loss": 1.0894, "step": 31460 }, { "epoch": 1.8751937060436286, "grad_norm": 3.1450133323669434, "learning_rate": 9.771790443664442e-07, "loss": 1.0583, "step": 31462 }, { "epoch": 1.8753129097627848, "grad_norm": 3.377103328704834, "learning_rate": 9.753193985914677e-07, "loss": 1.1813, "step": 31464 }, { "epoch": 1.8754321134819407, "grad_norm": 3.1628499031066895, "learning_rate": 9.734615065970454e-07, "loss": 1.1104, "step": 31466 }, { "epoch": 1.8755513172010967, "grad_norm": 3.339925765991211, "learning_rate": 9.716053684496573e-07, "loss": 1.0286, "step": 31468 }, { "epoch": 1.8756705209202527, "grad_norm": 3.3097517490386963, "learning_rate": 9.697509842156894e-07, "loss": 1.2257, "step": 31470 }, { "epoch": 1.8757897246394086, "grad_norm": 3.1315758228302, "learning_rate": 9.678983539614884e-07, "loss": 1.016, "step": 31472 }, { "epoch": 1.8759089283585648, "grad_norm": 2.9626684188842773, "learning_rate": 9.66047477753318e-07, "loss": 1.0887, "step": 31474 }, { "epoch": 1.8760281320777208, "grad_norm": 3.0341100692749023, "learning_rate": 9.641983556574031e-07, "loss": 0.9538, "step": 31476 }, { "epoch": 1.876147335796877, "grad_norm": 3.0381295680999756, "learning_rate": 9.62350987739885e-07, "loss": 1.0032, "step": 31478 }, { "epoch": 1.876266539516033, "grad_norm": 3.3724920749664307, "learning_rate": 9.605053740668556e-07, "loss": 1.1146, "step": 31480 }, { "epoch": 1.876385743235189, "grad_norm": 3.5011000633239746, "learning_rate": 9.586615147043288e-07, "loss": 1.0632, "step": 31482 }, { "epoch": 1.8765049469543449, "grad_norm": 3.168562889099121, "learning_rate": 9.568194097182737e-07, "loss": 1.0647, "step": 31484 }, { "epoch": 1.8766241506735009, "grad_norm": 3.78163743019104, "learning_rate": 9.549790591745988e-07, "loss": 1.2283, "step": 31486 }, { "epoch": 1.876743354392657, "grad_norm": 3.471644163131714, "learning_rate": 9.531404631391127e-07, "loss": 1.0572, "step": 31488 }, { "epoch": 1.8768625581118132, "grad_norm": 3.15085506439209, "learning_rate": 9.513036216776128e-07, "loss": 1.0912, "step": 31490 }, { "epoch": 1.8769817618309692, "grad_norm": 2.911578893661499, "learning_rate": 9.494685348558019e-07, "loss": 0.9315, "step": 31492 }, { "epoch": 1.8771009655501252, "grad_norm": 3.146868944168091, "learning_rate": 9.476352027393276e-07, "loss": 0.9621, "step": 31494 }, { "epoch": 1.8772201692692811, "grad_norm": 3.4461464881896973, "learning_rate": 9.458036253937707e-07, "loss": 1.0708, "step": 31496 }, { "epoch": 1.8773393729884371, "grad_norm": 3.190058946609497, "learning_rate": 9.439738028846512e-07, "loss": 1.1581, "step": 31498 }, { "epoch": 1.8774585767075933, "grad_norm": 3.608947992324829, "learning_rate": 9.421457352774388e-07, "loss": 1.1525, "step": 31500 }, { "epoch": 1.8775777804267493, "grad_norm": 3.520531177520752, "learning_rate": 9.4031942263752e-07, "loss": 1.0769, "step": 31502 }, { "epoch": 1.8776969841459055, "grad_norm": 3.0944082736968994, "learning_rate": 9.384948650302317e-07, "loss": 1.0324, "step": 31504 }, { "epoch": 1.8778161878650614, "grad_norm": 3.4668068885803223, "learning_rate": 9.366720625208381e-07, "loss": 1.0229, "step": 31506 }, { "epoch": 1.8779353915842174, "grad_norm": 3.0704410076141357, "learning_rate": 9.348510151745593e-07, "loss": 0.9877, "step": 31508 }, { "epoch": 1.8780545953033734, "grad_norm": 3.2918283939361572, "learning_rate": 9.330317230565377e-07, "loss": 1.0003, "step": 31510 }, { "epoch": 1.8781737990225293, "grad_norm": 3.6074378490448, "learning_rate": 9.312141862318435e-07, "loss": 1.0931, "step": 31512 }, { "epoch": 1.8782930027416855, "grad_norm": 3.384223461151123, "learning_rate": 9.29398404765508e-07, "loss": 1.0229, "step": 31514 }, { "epoch": 1.8784122064608417, "grad_norm": 2.759894609451294, "learning_rate": 9.275843787224847e-07, "loss": 0.9621, "step": 31516 }, { "epoch": 1.8785314101799977, "grad_norm": 3.021787166595459, "learning_rate": 9.257721081676718e-07, "loss": 0.9322, "step": 31518 }, { "epoch": 1.8786506138991537, "grad_norm": 3.0485286712646484, "learning_rate": 9.239615931658952e-07, "loss": 1.244, "step": 31520 }, { "epoch": 1.8787698176183096, "grad_norm": 3.349304676055908, "learning_rate": 9.221528337819197e-07, "loss": 1.1204, "step": 31522 }, { "epoch": 1.8788890213374656, "grad_norm": 3.0469419956207275, "learning_rate": 9.203458300804602e-07, "loss": 1.1149, "step": 31524 }, { "epoch": 1.8790082250566218, "grad_norm": 3.1919355392456055, "learning_rate": 9.185405821261539e-07, "loss": 1.1402, "step": 31526 }, { "epoch": 1.8791274287757778, "grad_norm": 3.3552260398864746, "learning_rate": 9.167370899835825e-07, "loss": 1.103, "step": 31528 }, { "epoch": 1.879246632494934, "grad_norm": 3.124340295791626, "learning_rate": 9.149353537172556e-07, "loss": 1.0821, "step": 31530 }, { "epoch": 1.87936583621409, "grad_norm": 3.339524269104004, "learning_rate": 9.131353733916437e-07, "loss": 1.0418, "step": 31532 }, { "epoch": 1.879485039933246, "grad_norm": 3.1927902698516846, "learning_rate": 9.113371490711176e-07, "loss": 0.9981, "step": 31534 }, { "epoch": 1.8796042436524019, "grad_norm": 3.1473443508148193, "learning_rate": 9.095406808200202e-07, "loss": 1.1432, "step": 31536 }, { "epoch": 1.8797234473715578, "grad_norm": 3.0929317474365234, "learning_rate": 9.077459687026169e-07, "loss": 1.1917, "step": 31538 }, { "epoch": 1.879842651090714, "grad_norm": 3.375387668609619, "learning_rate": 9.059530127831062e-07, "loss": 1.1417, "step": 31540 }, { "epoch": 1.8799618548098702, "grad_norm": 3.55482816696167, "learning_rate": 9.041618131256313e-07, "loss": 1.2836, "step": 31542 }, { "epoch": 1.8800810585290262, "grad_norm": 3.2152259349823, "learning_rate": 9.023723697942632e-07, "loss": 0.9917, "step": 31544 }, { "epoch": 1.8802002622481822, "grad_norm": 3.1359715461730957, "learning_rate": 9.005846828530285e-07, "loss": 1.0234, "step": 31546 }, { "epoch": 1.8803194659673381, "grad_norm": 3.303271532058716, "learning_rate": 8.987987523658592e-07, "loss": 1.11, "step": 31548 }, { "epoch": 1.880438669686494, "grad_norm": 3.526360034942627, "learning_rate": 8.97014578396671e-07, "loss": 1.2567, "step": 31550 }, { "epoch": 1.8805578734056503, "grad_norm": 3.178875207901001, "learning_rate": 8.952321610092628e-07, "loss": 1.2114, "step": 31552 }, { "epoch": 1.8806770771248063, "grad_norm": 3.3082306385040283, "learning_rate": 8.934515002674171e-07, "loss": 1.0119, "step": 31554 }, { "epoch": 1.8807962808439624, "grad_norm": 3.23431134223938, "learning_rate": 8.916725962348327e-07, "loss": 1.076, "step": 31556 }, { "epoch": 1.8809154845631184, "grad_norm": 3.125969886779785, "learning_rate": 8.89895448975131e-07, "loss": 0.9005, "step": 31558 }, { "epoch": 1.8810346882822744, "grad_norm": 3.3449597358703613, "learning_rate": 8.881200585519056e-07, "loss": 1.0178, "step": 31560 }, { "epoch": 1.8811538920014303, "grad_norm": 3.1137969493865967, "learning_rate": 8.863464250286557e-07, "loss": 1.1497, "step": 31562 }, { "epoch": 1.8812730957205863, "grad_norm": 3.2119693756103516, "learning_rate": 8.845745484688361e-07, "loss": 1.0764, "step": 31564 }, { "epoch": 1.8813922994397425, "grad_norm": 3.619048595428467, "learning_rate": 8.82804428935835e-07, "loss": 1.1698, "step": 31566 }, { "epoch": 1.8815115031588987, "grad_norm": 3.7680270671844482, "learning_rate": 8.810360664929684e-07, "loss": 1.2055, "step": 31568 }, { "epoch": 1.8816307068780547, "grad_norm": 3.5766375064849854, "learning_rate": 8.792694612035024e-07, "loss": 1.1005, "step": 31570 }, { "epoch": 1.8817499105972106, "grad_norm": 3.0006678104400635, "learning_rate": 8.775046131306253e-07, "loss": 0.9178, "step": 31572 }, { "epoch": 1.8818691143163666, "grad_norm": 3.338822364807129, "learning_rate": 8.757415223374921e-07, "loss": 1.1747, "step": 31574 }, { "epoch": 1.8819883180355226, "grad_norm": 3.1773488521575928, "learning_rate": 8.739801888871469e-07, "loss": 1.0669, "step": 31576 }, { "epoch": 1.8821075217546788, "grad_norm": 3.4817073345184326, "learning_rate": 8.722206128426225e-07, "loss": 1.1509, "step": 31578 }, { "epoch": 1.8822267254738347, "grad_norm": 3.100529432296753, "learning_rate": 8.704627942668519e-07, "loss": 1.1297, "step": 31580 }, { "epoch": 1.882345929192991, "grad_norm": 3.5767910480499268, "learning_rate": 8.687067332227239e-07, "loss": 1.2733, "step": 31582 }, { "epoch": 1.882465132912147, "grad_norm": 3.5151281356811523, "learning_rate": 8.669524297730602e-07, "loss": 1.2434, "step": 31584 }, { "epoch": 1.8825843366313029, "grad_norm": 3.1908059120178223, "learning_rate": 8.651998839806108e-07, "loss": 1.0123, "step": 31586 }, { "epoch": 1.8827035403504588, "grad_norm": 3.2875733375549316, "learning_rate": 8.634490959080754e-07, "loss": 1.1369, "step": 31588 }, { "epoch": 1.8828227440696148, "grad_norm": 3.2739083766937256, "learning_rate": 8.617000656180818e-07, "loss": 0.9863, "step": 31590 }, { "epoch": 1.882941947788771, "grad_norm": 2.924964666366577, "learning_rate": 8.599527931732076e-07, "loss": 1.1166, "step": 31592 }, { "epoch": 1.8830611515079272, "grad_norm": 3.377063751220703, "learning_rate": 8.582072786359474e-07, "loss": 1.2109, "step": 31594 }, { "epoch": 1.8831803552270832, "grad_norm": 3.27443265914917, "learning_rate": 8.564635220687567e-07, "loss": 0.9276, "step": 31596 }, { "epoch": 1.8832995589462391, "grad_norm": 3.126642942428589, "learning_rate": 8.547215235339967e-07, "loss": 1.1474, "step": 31598 }, { "epoch": 1.883418762665395, "grad_norm": 3.113649606704712, "learning_rate": 8.529812830940065e-07, "loss": 1.1668, "step": 31600 }, { "epoch": 1.883537966384551, "grad_norm": 3.5523884296417236, "learning_rate": 8.512428008110307e-07, "loss": 1.2038, "step": 31602 }, { "epoch": 1.8836571701037073, "grad_norm": 3.382634401321411, "learning_rate": 8.495060767472529e-07, "loss": 1.1263, "step": 31604 }, { "epoch": 1.8837763738228632, "grad_norm": 3.1210672855377197, "learning_rate": 8.477711109648178e-07, "loss": 1.0809, "step": 31606 }, { "epoch": 1.8838955775420194, "grad_norm": 3.4039368629455566, "learning_rate": 8.46037903525776e-07, "loss": 1.1418, "step": 31608 }, { "epoch": 1.8840147812611754, "grad_norm": 3.234694480895996, "learning_rate": 8.443064544921442e-07, "loss": 1.0766, "step": 31610 }, { "epoch": 1.8841339849803314, "grad_norm": 3.2660272121429443, "learning_rate": 8.425767639258453e-07, "loss": 1.0586, "step": 31612 }, { "epoch": 1.8842531886994873, "grad_norm": 3.3692514896392822, "learning_rate": 8.408488318887742e-07, "loss": 1.0213, "step": 31614 }, { "epoch": 1.8843723924186435, "grad_norm": 3.17111873626709, "learning_rate": 8.391226584427314e-07, "loss": 1.1587, "step": 31616 }, { "epoch": 1.8844915961377995, "grad_norm": 3.251183271408081, "learning_rate": 8.373982436494732e-07, "loss": 1.1142, "step": 31618 }, { "epoch": 1.8846107998569557, "grad_norm": 3.442657947540283, "learning_rate": 8.356755875706945e-07, "loss": 1.1075, "step": 31620 }, { "epoch": 1.8847300035761116, "grad_norm": 3.025477409362793, "learning_rate": 8.339546902680073e-07, "loss": 1.0428, "step": 31622 }, { "epoch": 1.8848492072952676, "grad_norm": 3.296117067337036, "learning_rate": 8.322355518029845e-07, "loss": 1.0019, "step": 31624 }, { "epoch": 1.8849684110144236, "grad_norm": 3.382571220397949, "learning_rate": 8.305181722371159e-07, "loss": 1.0656, "step": 31626 }, { "epoch": 1.8850876147335796, "grad_norm": 2.7988526821136475, "learning_rate": 8.28802551631852e-07, "loss": 0.9948, "step": 31628 }, { "epoch": 1.8852068184527357, "grad_norm": 3.480590343475342, "learning_rate": 8.270886900485553e-07, "loss": 1.1745, "step": 31630 }, { "epoch": 1.8853260221718917, "grad_norm": 3.409158229827881, "learning_rate": 8.253765875485375e-07, "loss": 1.1151, "step": 31632 }, { "epoch": 1.885445225891048, "grad_norm": 3.104917049407959, "learning_rate": 8.236662441930498e-07, "loss": 1.1495, "step": 31634 }, { "epoch": 1.8855644296102039, "grad_norm": 3.4110729694366455, "learning_rate": 8.219576600432711e-07, "loss": 1.1649, "step": 31636 }, { "epoch": 1.8856836333293598, "grad_norm": 3.138303518295288, "learning_rate": 8.202508351603356e-07, "loss": 0.9733, "step": 31638 }, { "epoch": 1.8858028370485158, "grad_norm": 3.2861311435699463, "learning_rate": 8.18545769605289e-07, "loss": 1.1306, "step": 31640 }, { "epoch": 1.885922040767672, "grad_norm": 3.529890775680542, "learning_rate": 8.168424634391325e-07, "loss": 1.1105, "step": 31642 }, { "epoch": 1.886041244486828, "grad_norm": 3.6622846126556396, "learning_rate": 8.151409167228064e-07, "loss": 1.0714, "step": 31644 }, { "epoch": 1.8861604482059842, "grad_norm": 2.9666147232055664, "learning_rate": 8.134411295171618e-07, "loss": 0.9416, "step": 31646 }, { "epoch": 1.8862796519251401, "grad_norm": 3.3934614658355713, "learning_rate": 8.117431018830279e-07, "loss": 1.1375, "step": 31648 }, { "epoch": 1.886398855644296, "grad_norm": 3.1259889602661133, "learning_rate": 8.10046833881134e-07, "loss": 1.0533, "step": 31650 }, { "epoch": 1.886518059363452, "grad_norm": 2.854548454284668, "learning_rate": 8.083523255721704e-07, "loss": 0.9741, "step": 31652 }, { "epoch": 1.886637263082608, "grad_norm": 3.3767001628875732, "learning_rate": 8.066595770167495e-07, "loss": 1.2542, "step": 31654 }, { "epoch": 1.8867564668017642, "grad_norm": 3.3122293949127197, "learning_rate": 8.049685882754288e-07, "loss": 1.1741, "step": 31656 }, { "epoch": 1.8868756705209202, "grad_norm": 3.5922369956970215, "learning_rate": 8.032793594086985e-07, "loss": 1.1515, "step": 31658 }, { "epoch": 1.8869948742400764, "grad_norm": 3.2215185165405273, "learning_rate": 8.015918904769937e-07, "loss": 1.362, "step": 31660 }, { "epoch": 1.8871140779592324, "grad_norm": 3.0981178283691406, "learning_rate": 7.999061815406772e-07, "loss": 1.0489, "step": 31662 }, { "epoch": 1.8872332816783883, "grad_norm": 3.0167689323425293, "learning_rate": 7.982222326600509e-07, "loss": 1.0673, "step": 31664 }, { "epoch": 1.8873524853975443, "grad_norm": 3.1073713302612305, "learning_rate": 7.965400438953607e-07, "loss": 1.172, "step": 31666 }, { "epoch": 1.8874716891167005, "grad_norm": 3.311565399169922, "learning_rate": 7.948596153067756e-07, "loss": 1.0484, "step": 31668 }, { "epoch": 1.8875908928358565, "grad_norm": 3.265190362930298, "learning_rate": 7.931809469544194e-07, "loss": 1.0294, "step": 31670 }, { "epoch": 1.8877100965550127, "grad_norm": 3.273965358734131, "learning_rate": 7.915040388983386e-07, "loss": 1.1708, "step": 31672 }, { "epoch": 1.8878293002741686, "grad_norm": 3.080357313156128, "learning_rate": 7.898288911985296e-07, "loss": 1.132, "step": 31674 }, { "epoch": 1.8879485039933246, "grad_norm": 3.349762439727783, "learning_rate": 7.881555039149058e-07, "loss": 1.0594, "step": 31676 }, { "epoch": 1.8880677077124806, "grad_norm": 3.6044275760650635, "learning_rate": 7.864838771073358e-07, "loss": 1.0961, "step": 31678 }, { "epoch": 1.8881869114316365, "grad_norm": 3.1106998920440674, "learning_rate": 7.848140108356217e-07, "loss": 1.1647, "step": 31680 }, { "epoch": 1.8883061151507927, "grad_norm": 3.5387017726898193, "learning_rate": 7.831459051594936e-07, "loss": 1.1052, "step": 31682 }, { "epoch": 1.8884253188699487, "grad_norm": 3.260859727859497, "learning_rate": 7.814795601386371e-07, "loss": 1.1415, "step": 31684 }, { "epoch": 1.8885445225891049, "grad_norm": 3.2639777660369873, "learning_rate": 7.79814975832649e-07, "loss": 1.1449, "step": 31686 }, { "epoch": 1.8886637263082608, "grad_norm": 2.8179259300231934, "learning_rate": 7.781521523010926e-07, "loss": 1.143, "step": 31688 }, { "epoch": 1.8887829300274168, "grad_norm": 2.8955092430114746, "learning_rate": 7.764910896034428e-07, "loss": 1.0987, "step": 31690 }, { "epoch": 1.8889021337465728, "grad_norm": 3.1187713146209717, "learning_rate": 7.748317877991185e-07, "loss": 1.0178, "step": 31692 }, { "epoch": 1.889021337465729, "grad_norm": 3.272491455078125, "learning_rate": 7.731742469474835e-07, "loss": 1.2266, "step": 31694 }, { "epoch": 1.889140541184885, "grad_norm": 3.1562039852142334, "learning_rate": 7.715184671078346e-07, "loss": 1.1742, "step": 31696 }, { "epoch": 1.8892597449040411, "grad_norm": 3.1183409690856934, "learning_rate": 7.698644483394079e-07, "loss": 1.0024, "step": 31698 }, { "epoch": 1.889378948623197, "grad_norm": 3.1045637130737305, "learning_rate": 7.682121907013618e-07, "loss": 1.0536, "step": 31700 }, { "epoch": 1.889498152342353, "grad_norm": 3.656560182571411, "learning_rate": 7.665616942528154e-07, "loss": 1.1483, "step": 31702 }, { "epoch": 1.889617356061509, "grad_norm": 3.45235538482666, "learning_rate": 7.649129590528049e-07, "loss": 1.0479, "step": 31704 }, { "epoch": 1.889736559780665, "grad_norm": 3.2408149242401123, "learning_rate": 7.632659851603164e-07, "loss": 1.0607, "step": 31706 }, { "epoch": 1.8898557634998212, "grad_norm": 3.489716053009033, "learning_rate": 7.616207726342639e-07, "loss": 1.0334, "step": 31708 }, { "epoch": 1.8899749672189774, "grad_norm": 3.6036550998687744, "learning_rate": 7.599773215335004e-07, "loss": 1.1081, "step": 31710 }, { "epoch": 1.8900941709381334, "grad_norm": 3.5457701683044434, "learning_rate": 7.583356319168289e-07, "loss": 1.2029, "step": 31712 }, { "epoch": 1.8902133746572893, "grad_norm": 3.1528375148773193, "learning_rate": 7.566957038429633e-07, "loss": 1.0794, "step": 31714 }, { "epoch": 1.8903325783764453, "grad_norm": 3.0762128829956055, "learning_rate": 7.550575373705792e-07, "loss": 0.963, "step": 31716 }, { "epoch": 1.8904517820956013, "grad_norm": 3.3560914993286133, "learning_rate": 7.534211325582796e-07, "loss": 1.078, "step": 31718 }, { "epoch": 1.8905709858147575, "grad_norm": 3.4593727588653564, "learning_rate": 7.517864894645954e-07, "loss": 1.0265, "step": 31720 }, { "epoch": 1.8906901895339134, "grad_norm": 3.2593212127685547, "learning_rate": 7.501536081480132e-07, "loss": 1.1303, "step": 31722 }, { "epoch": 1.8908093932530696, "grad_norm": 3.0202696323394775, "learning_rate": 7.485224886669417e-07, "loss": 0.9698, "step": 31724 }, { "epoch": 1.8909285969722256, "grad_norm": 3.289259910583496, "learning_rate": 7.468931310797345e-07, "loss": 1.0105, "step": 31726 }, { "epoch": 1.8910478006913816, "grad_norm": 3.762256383895874, "learning_rate": 7.452655354446725e-07, "loss": 1.1083, "step": 31728 }, { "epoch": 1.8911670044105375, "grad_norm": 3.227783679962158, "learning_rate": 7.436397018199981e-07, "loss": 1.1822, "step": 31730 }, { "epoch": 1.8912862081296935, "grad_norm": 3.2329049110412598, "learning_rate": 7.420156302638481e-07, "loss": 1.0493, "step": 31732 }, { "epoch": 1.8914054118488497, "grad_norm": 3.322741985321045, "learning_rate": 7.403933208343372e-07, "loss": 1.1007, "step": 31734 }, { "epoch": 1.8915246155680059, "grad_norm": 3.205995559692383, "learning_rate": 7.387727735895023e-07, "loss": 0.9528, "step": 31736 }, { "epoch": 1.8916438192871619, "grad_norm": 3.207308769226074, "learning_rate": 7.371539885873024e-07, "loss": 1.0574, "step": 31738 }, { "epoch": 1.8917630230063178, "grad_norm": 2.98299503326416, "learning_rate": 7.355369658856581e-07, "loss": 1.039, "step": 31740 }, { "epoch": 1.8918822267254738, "grad_norm": 3.0443897247314453, "learning_rate": 7.339217055424119e-07, "loss": 1.213, "step": 31742 }, { "epoch": 1.8920014304446298, "grad_norm": 3.2791049480438232, "learning_rate": 7.323082076153509e-07, "loss": 1.0519, "step": 31744 }, { "epoch": 1.892120634163786, "grad_norm": 3.559692859649658, "learning_rate": 7.3069647216219e-07, "loss": 1.0223, "step": 31746 }, { "epoch": 1.892239837882942, "grad_norm": 3.434227228164673, "learning_rate": 7.290864992405944e-07, "loss": 1.1305, "step": 31748 }, { "epoch": 1.8923590416020981, "grad_norm": 3.3111095428466797, "learning_rate": 7.274782889081511e-07, "loss": 1.1193, "step": 31750 }, { "epoch": 1.892478245321254, "grad_norm": 3.3506743907928467, "learning_rate": 7.258718412223919e-07, "loss": 1.0412, "step": 31752 }, { "epoch": 1.89259744904041, "grad_norm": 2.9716310501098633, "learning_rate": 7.242671562407877e-07, "loss": 1.2182, "step": 31754 }, { "epoch": 1.892716652759566, "grad_norm": 3.3683488368988037, "learning_rate": 7.226642340207423e-07, "loss": 1.3614, "step": 31756 }, { "epoch": 1.892835856478722, "grad_norm": 3.1750073432922363, "learning_rate": 7.210630746195934e-07, "loss": 1.1641, "step": 31758 }, { "epoch": 1.8929550601978782, "grad_norm": 3.560718059539795, "learning_rate": 7.19463678094634e-07, "loss": 1.1826, "step": 31760 }, { "epoch": 1.8930742639170344, "grad_norm": 3.4829115867614746, "learning_rate": 7.178660445030627e-07, "loss": 0.9825, "step": 31762 }, { "epoch": 1.8931934676361903, "grad_norm": 3.2541980743408203, "learning_rate": 7.16270173902045e-07, "loss": 1.0867, "step": 31764 }, { "epoch": 1.8933126713553463, "grad_norm": 3.1470420360565186, "learning_rate": 7.146760663486629e-07, "loss": 1.122, "step": 31766 }, { "epoch": 1.8934318750745023, "grad_norm": 2.9975075721740723, "learning_rate": 7.130837218999431e-07, "loss": 1.194, "step": 31768 }, { "epoch": 1.8935510787936582, "grad_norm": 3.1620888710021973, "learning_rate": 7.114931406128511e-07, "loss": 1.2752, "step": 31770 }, { "epoch": 1.8936702825128144, "grad_norm": 2.893411874771118, "learning_rate": 7.09904322544297e-07, "loss": 1.0628, "step": 31772 }, { "epoch": 1.8937894862319704, "grad_norm": 3.697932481765747, "learning_rate": 7.083172677511018e-07, "loss": 1.1935, "step": 31774 }, { "epoch": 1.8939086899511266, "grad_norm": 3.0826048851013184, "learning_rate": 7.067319762900537e-07, "loss": 1.159, "step": 31776 }, { "epoch": 1.8940278936702826, "grad_norm": 3.20097017288208, "learning_rate": 7.051484482178516e-07, "loss": 1.0592, "step": 31778 }, { "epoch": 1.8941470973894385, "grad_norm": 3.4674105644226074, "learning_rate": 7.035666835911558e-07, "loss": 1.1923, "step": 31780 }, { "epoch": 1.8942663011085945, "grad_norm": 3.231198787689209, "learning_rate": 7.019866824665433e-07, "loss": 1.1764, "step": 31782 }, { "epoch": 1.8943855048277505, "grad_norm": 2.8942689895629883, "learning_rate": 7.004084449005355e-07, "loss": 1.0026, "step": 31784 }, { "epoch": 1.8945047085469067, "grad_norm": 3.0959157943725586, "learning_rate": 6.988319709495983e-07, "loss": 1.0532, "step": 31786 }, { "epoch": 1.8946239122660629, "grad_norm": 3.006640911102295, "learning_rate": 6.972572606701255e-07, "loss": 1.1943, "step": 31788 }, { "epoch": 1.8947431159852188, "grad_norm": 3.1560096740722656, "learning_rate": 6.956843141184443e-07, "loss": 1.0826, "step": 31790 }, { "epoch": 1.8948623197043748, "grad_norm": 3.693211078643799, "learning_rate": 6.941131313508265e-07, "loss": 1.2505, "step": 31792 }, { "epoch": 1.8949815234235308, "grad_norm": 3.6040923595428467, "learning_rate": 6.925437124234824e-07, "loss": 1.2691, "step": 31794 }, { "epoch": 1.8951007271426867, "grad_norm": 3.66867733001709, "learning_rate": 6.909760573925561e-07, "loss": 1.1372, "step": 31796 }, { "epoch": 1.895219930861843, "grad_norm": 3.5200259685516357, "learning_rate": 6.894101663141195e-07, "loss": 1.1243, "step": 31798 }, { "epoch": 1.895339134580999, "grad_norm": 3.950819492340088, "learning_rate": 6.878460392441998e-07, "loss": 1.1917, "step": 31800 }, { "epoch": 1.895458338300155, "grad_norm": 2.9227821826934814, "learning_rate": 6.862836762387415e-07, "loss": 1.2479, "step": 31802 }, { "epoch": 1.895577542019311, "grad_norm": 3.551149606704712, "learning_rate": 6.847230773536495e-07, "loss": 1.0499, "step": 31804 }, { "epoch": 1.895696745738467, "grad_norm": 3.1923537254333496, "learning_rate": 6.831642426447405e-07, "loss": 1.0384, "step": 31806 }, { "epoch": 1.895815949457623, "grad_norm": 3.2361831665039062, "learning_rate": 6.816071721677809e-07, "loss": 1.1159, "step": 31808 }, { "epoch": 1.895935153176779, "grad_norm": 2.99045991897583, "learning_rate": 6.800518659784705e-07, "loss": 1.0753, "step": 31810 }, { "epoch": 1.8960543568959352, "grad_norm": 3.389791250228882, "learning_rate": 6.784983241324594e-07, "loss": 1.0393, "step": 31812 }, { "epoch": 1.8961735606150913, "grad_norm": 3.2819325923919678, "learning_rate": 6.769465466853086e-07, "loss": 1.0686, "step": 31814 }, { "epoch": 1.8962927643342473, "grad_norm": 3.26001238822937, "learning_rate": 6.753965336925349e-07, "loss": 1.0346, "step": 31816 }, { "epoch": 1.8964119680534033, "grad_norm": 2.9291141033172607, "learning_rate": 6.738482852095995e-07, "loss": 0.9476, "step": 31818 }, { "epoch": 1.8965311717725593, "grad_norm": 3.46773362159729, "learning_rate": 6.723018012918691e-07, "loss": 1.0781, "step": 31820 }, { "epoch": 1.8966503754917152, "grad_norm": 3.203091859817505, "learning_rate": 6.707570819946773e-07, "loss": 1.1849, "step": 31822 }, { "epoch": 1.8967695792108714, "grad_norm": 3.2445108890533447, "learning_rate": 6.692141273732855e-07, "loss": 1.2772, "step": 31824 }, { "epoch": 1.8968887829300274, "grad_norm": 3.2373175621032715, "learning_rate": 6.676729374828883e-07, "loss": 1.1631, "step": 31826 }, { "epoch": 1.8970079866491836, "grad_norm": 3.2958366870880127, "learning_rate": 6.661335123786195e-07, "loss": 1.09, "step": 31828 }, { "epoch": 1.8971271903683395, "grad_norm": 3.209850788116455, "learning_rate": 6.645958521155459e-07, "loss": 1.0506, "step": 31830 }, { "epoch": 1.8972463940874955, "grad_norm": 3.267825126647949, "learning_rate": 6.630599567486795e-07, "loss": 1.0498, "step": 31832 }, { "epoch": 1.8973655978066515, "grad_norm": 3.839041233062744, "learning_rate": 6.615258263329594e-07, "loss": 1.0743, "step": 31834 }, { "epoch": 1.8974848015258075, "grad_norm": 3.3111276626586914, "learning_rate": 6.599934609232749e-07, "loss": 1.109, "step": 31836 }, { "epoch": 1.8976040052449636, "grad_norm": 3.00014328956604, "learning_rate": 6.58462860574438e-07, "loss": 1.0978, "step": 31838 }, { "epoch": 1.8977232089641198, "grad_norm": 3.0750203132629395, "learning_rate": 6.569340253412049e-07, "loss": 1.1433, "step": 31840 }, { "epoch": 1.8978424126832758, "grad_norm": 2.7233614921569824, "learning_rate": 6.554069552782705e-07, "loss": 1.016, "step": 31842 }, { "epoch": 1.8979616164024318, "grad_norm": 3.260866165161133, "learning_rate": 6.538816504402523e-07, "loss": 1.0356, "step": 31844 }, { "epoch": 1.8980808201215877, "grad_norm": 3.9205784797668457, "learning_rate": 6.52358110881729e-07, "loss": 1.081, "step": 31846 }, { "epoch": 1.8982000238407437, "grad_norm": 3.1703600883483887, "learning_rate": 6.508363366571957e-07, "loss": 1.0937, "step": 31848 }, { "epoch": 1.8983192275599, "grad_norm": 3.5683462619781494, "learning_rate": 6.493163278210923e-07, "loss": 1.0811, "step": 31850 }, { "epoch": 1.8984384312790559, "grad_norm": 3.0822036266326904, "learning_rate": 6.477980844277976e-07, "loss": 1.2256, "step": 31852 }, { "epoch": 1.898557634998212, "grad_norm": 3.27093505859375, "learning_rate": 6.462816065316235e-07, "loss": 1.0628, "step": 31854 }, { "epoch": 1.898676838717368, "grad_norm": 3.3282978534698486, "learning_rate": 6.447668941868157e-07, "loss": 0.9575, "step": 31856 }, { "epoch": 1.898796042436524, "grad_norm": 2.9633121490478516, "learning_rate": 6.43253947447564e-07, "loss": 0.9873, "step": 31858 }, { "epoch": 1.89891524615568, "grad_norm": 3.2088754177093506, "learning_rate": 6.41742766367992e-07, "loss": 1.2163, "step": 31860 }, { "epoch": 1.899034449874836, "grad_norm": 3.402890205383301, "learning_rate": 6.402333510021507e-07, "loss": 1.0114, "step": 31862 }, { "epoch": 1.8991536535939921, "grad_norm": 3.2478485107421875, "learning_rate": 6.387257014040526e-07, "loss": 1.1207, "step": 31864 }, { "epoch": 1.8992728573131483, "grad_norm": 3.579327344894409, "learning_rate": 6.372198176276211e-07, "loss": 1.2149, "step": 31866 }, { "epoch": 1.8993920610323043, "grad_norm": 3.2866013050079346, "learning_rate": 6.357156997267299e-07, "loss": 1.0919, "step": 31868 }, { "epoch": 1.8995112647514603, "grad_norm": 2.8655474185943604, "learning_rate": 6.342133477551915e-07, "loss": 1.0752, "step": 31870 }, { "epoch": 1.8996304684706162, "grad_norm": 3.3138811588287354, "learning_rate": 6.327127617667406e-07, "loss": 1.1606, "step": 31872 }, { "epoch": 1.8997496721897722, "grad_norm": 3.41351580619812, "learning_rate": 6.31213941815062e-07, "loss": 1.1277, "step": 31874 }, { "epoch": 1.8998688759089284, "grad_norm": 3.3026843070983887, "learning_rate": 6.297168879537741e-07, "loss": 1.1324, "step": 31876 }, { "epoch": 1.8999880796280844, "grad_norm": 2.9003312587738037, "learning_rate": 6.282216002364339e-07, "loss": 1.0782, "step": 31878 }, { "epoch": 1.9001072833472405, "grad_norm": 2.7587294578552246, "learning_rate": 6.267280787165264e-07, "loss": 1.0315, "step": 31880 }, { "epoch": 1.9002264870663965, "grad_norm": 3.453592538833618, "learning_rate": 6.252363234474923e-07, "loss": 1.2343, "step": 31882 }, { "epoch": 1.9003456907855525, "grad_norm": 3.397670030593872, "learning_rate": 6.237463344826777e-07, "loss": 1.0426, "step": 31884 }, { "epoch": 1.9004648945047085, "grad_norm": 2.794174909591675, "learning_rate": 6.222581118754067e-07, "loss": 0.9145, "step": 31886 }, { "epoch": 1.9005840982238644, "grad_norm": 3.2388181686401367, "learning_rate": 6.207716556789034e-07, "loss": 1.1214, "step": 31888 }, { "epoch": 1.9007033019430206, "grad_norm": 3.1659998893737793, "learning_rate": 6.192869659463418e-07, "loss": 1.0072, "step": 31890 }, { "epoch": 1.9008225056621768, "grad_norm": 3.0422654151916504, "learning_rate": 6.178040427308463e-07, "loss": 0.9461, "step": 31892 }, { "epoch": 1.9009417093813328, "grad_norm": 3.1913931369781494, "learning_rate": 6.163228860854575e-07, "loss": 0.9974, "step": 31894 }, { "epoch": 1.9010609131004887, "grad_norm": 3.47979998588562, "learning_rate": 6.148434960631611e-07, "loss": 1.0788, "step": 31896 }, { "epoch": 1.9011801168196447, "grad_norm": 2.8971328735351562, "learning_rate": 6.133658727168868e-07, "loss": 1.1047, "step": 31898 }, { "epoch": 1.9012993205388007, "grad_norm": 3.300344705581665, "learning_rate": 6.118900160994867e-07, "loss": 1.1523, "step": 31900 }, { "epoch": 1.9014185242579569, "grad_norm": 3.143801689147949, "learning_rate": 6.104159262637632e-07, "loss": 1.1027, "step": 31902 }, { "epoch": 1.9015377279771128, "grad_norm": 3.2935054302215576, "learning_rate": 6.089436032624407e-07, "loss": 1.2863, "step": 31904 }, { "epoch": 1.901656931696269, "grad_norm": 3.372969150543213, "learning_rate": 6.074730471482049e-07, "loss": 1.0912, "step": 31906 }, { "epoch": 1.901776135415425, "grad_norm": 3.2616078853607178, "learning_rate": 6.060042579736414e-07, "loss": 1.1444, "step": 31908 }, { "epoch": 1.901895339134581, "grad_norm": 2.8687424659729004, "learning_rate": 6.045372357913137e-07, "loss": 1.0177, "step": 31910 }, { "epoch": 1.902014542853737, "grad_norm": 3.3810839653015137, "learning_rate": 6.030719806536911e-07, "loss": 1.0435, "step": 31912 }, { "epoch": 1.902133746572893, "grad_norm": 3.3125176429748535, "learning_rate": 6.016084926131926e-07, "loss": 1.1323, "step": 31914 }, { "epoch": 1.902252950292049, "grad_norm": 3.150222063064575, "learning_rate": 6.001467717221765e-07, "loss": 1.0619, "step": 31916 }, { "epoch": 1.9023721540112053, "grad_norm": 3.528815507888794, "learning_rate": 5.986868180329286e-07, "loss": 1.0883, "step": 31918 }, { "epoch": 1.9024913577303613, "grad_norm": 3.3410351276397705, "learning_rate": 5.972286315976794e-07, "loss": 1.0866, "step": 31920 }, { "epoch": 1.9026105614495172, "grad_norm": 3.5168464183807373, "learning_rate": 5.957722124685872e-07, "loss": 1.2043, "step": 31922 }, { "epoch": 1.9027297651686732, "grad_norm": 3.854686737060547, "learning_rate": 5.943175606977603e-07, "loss": 1.1782, "step": 31924 }, { "epoch": 1.9028489688878292, "grad_norm": 3.031916379928589, "learning_rate": 5.928646763372292e-07, "loss": 1.0304, "step": 31926 }, { "epoch": 1.9029681726069854, "grad_norm": 3.150527238845825, "learning_rate": 5.914135594389857e-07, "loss": 1.1365, "step": 31928 }, { "epoch": 1.9030873763261413, "grad_norm": 3.4618141651153564, "learning_rate": 5.899642100549219e-07, "loss": 1.187, "step": 31930 }, { "epoch": 1.9032065800452975, "grad_norm": 3.105452537536621, "learning_rate": 5.885166282368848e-07, "loss": 1.0474, "step": 31932 }, { "epoch": 1.9033257837644535, "grad_norm": 2.698058605194092, "learning_rate": 5.870708140366777e-07, "loss": 1.0499, "step": 31934 }, { "epoch": 1.9034449874836095, "grad_norm": 3.236546516418457, "learning_rate": 5.85626767506009e-07, "loss": 1.1087, "step": 31936 }, { "epoch": 1.9035641912027654, "grad_norm": 3.4546263217926025, "learning_rate": 5.84184488696543e-07, "loss": 1.2187, "step": 31938 }, { "epoch": 1.9036833949219214, "grad_norm": 3.572378158569336, "learning_rate": 5.827439776598664e-07, "loss": 1.1462, "step": 31940 }, { "epoch": 1.9038025986410776, "grad_norm": 3.296402931213379, "learning_rate": 5.813052344475212e-07, "loss": 1.137, "step": 31942 }, { "epoch": 1.9039218023602338, "grad_norm": 3.124133586883545, "learning_rate": 5.798682591109717e-07, "loss": 1.0496, "step": 31944 }, { "epoch": 1.9040410060793898, "grad_norm": 3.3514339923858643, "learning_rate": 5.784330517016268e-07, "loss": 1.1926, "step": 31946 }, { "epoch": 1.9041602097985457, "grad_norm": 3.103257179260254, "learning_rate": 5.769996122708176e-07, "loss": 1.083, "step": 31948 }, { "epoch": 1.9042794135177017, "grad_norm": 2.981252908706665, "learning_rate": 5.755679408698367e-07, "loss": 0.9957, "step": 31950 }, { "epoch": 1.9043986172368577, "grad_norm": 3.1371567249298096, "learning_rate": 5.741380375498928e-07, "loss": 1.2544, "step": 31952 }, { "epoch": 1.9045178209560139, "grad_norm": 3.0768160820007324, "learning_rate": 5.727099023621396e-07, "loss": 1.0363, "step": 31954 }, { "epoch": 1.9046370246751698, "grad_norm": 3.566875696182251, "learning_rate": 5.712835353576695e-07, "loss": 1.0678, "step": 31956 }, { "epoch": 1.904756228394326, "grad_norm": 3.096975803375244, "learning_rate": 5.698589365875028e-07, "loss": 0.9733, "step": 31958 }, { "epoch": 1.904875432113482, "grad_norm": 3.171973466873169, "learning_rate": 5.6843610610261e-07, "loss": 1.2305, "step": 31960 }, { "epoch": 1.904994635832638, "grad_norm": 3.278357744216919, "learning_rate": 5.670150439538835e-07, "loss": 1.2404, "step": 31962 }, { "epoch": 1.905113839551794, "grad_norm": 2.690228223800659, "learning_rate": 5.655957501921605e-07, "loss": 1.0567, "step": 31964 }, { "epoch": 1.9052330432709499, "grad_norm": 3.398894786834717, "learning_rate": 5.641782248682171e-07, "loss": 0.9561, "step": 31966 }, { "epoch": 1.905352246990106, "grad_norm": 3.2973835468292236, "learning_rate": 5.62762468032757e-07, "loss": 1.0873, "step": 31968 }, { "epoch": 1.9054714507092623, "grad_norm": 3.4371325969696045, "learning_rate": 5.613484797364399e-07, "loss": 1.0317, "step": 31970 }, { "epoch": 1.9055906544284182, "grad_norm": 3.1002357006073, "learning_rate": 5.599362600298308e-07, "loss": 1.0193, "step": 31972 }, { "epoch": 1.9057098581475742, "grad_norm": 3.642354726791382, "learning_rate": 5.585258089634615e-07, "loss": 0.9992, "step": 31974 }, { "epoch": 1.9058290618667302, "grad_norm": 3.7063777446746826, "learning_rate": 5.571171265877917e-07, "loss": 1.1939, "step": 31976 }, { "epoch": 1.9059482655858861, "grad_norm": 3.3248813152313232, "learning_rate": 5.557102129532033e-07, "loss": 1.0028, "step": 31978 }, { "epoch": 1.9060674693050423, "grad_norm": 3.2117528915405273, "learning_rate": 5.543050681100392e-07, "loss": 0.9744, "step": 31980 }, { "epoch": 1.9061866730241983, "grad_norm": 3.428903579711914, "learning_rate": 5.529016921085539e-07, "loss": 1.0806, "step": 31982 }, { "epoch": 1.9063058767433545, "grad_norm": 3.161682367324829, "learning_rate": 5.515000849989571e-07, "loss": 1.0493, "step": 31984 }, { "epoch": 1.9064250804625105, "grad_norm": 3.2599740028381348, "learning_rate": 5.501002468313865e-07, "loss": 1.0119, "step": 31986 }, { "epoch": 1.9065442841816664, "grad_norm": 3.409283399581909, "learning_rate": 5.487021776559242e-07, "loss": 1.1371, "step": 31988 }, { "epoch": 1.9066634879008224, "grad_norm": 3.036663055419922, "learning_rate": 5.473058775225803e-07, "loss": 1.0682, "step": 31990 }, { "epoch": 1.9067826916199786, "grad_norm": 3.2229843139648438, "learning_rate": 5.459113464813092e-07, "loss": 1.3398, "step": 31992 }, { "epoch": 1.9069018953391346, "grad_norm": 3.395806312561035, "learning_rate": 5.445185845819934e-07, "loss": 1.2818, "step": 31994 }, { "epoch": 1.9070210990582908, "grad_norm": 3.430772304534912, "learning_rate": 5.431275918744483e-07, "loss": 1.1825, "step": 31996 }, { "epoch": 1.9071403027774467, "grad_norm": 3.0080103874206543, "learning_rate": 5.417383684084565e-07, "loss": 1.243, "step": 31998 }, { "epoch": 1.9072595064966027, "grad_norm": 3.3378407955169678, "learning_rate": 5.403509142336949e-07, "loss": 1.0961, "step": 32000 }, { "epoch": 1.9073787102157587, "grad_norm": 3.0052037239074707, "learning_rate": 5.389652293998071e-07, "loss": 0.993, "step": 32002 }, { "epoch": 1.9074979139349146, "grad_norm": 3.2431650161743164, "learning_rate": 5.375813139563646e-07, "loss": 1.0391, "step": 32004 }, { "epoch": 1.9076171176540708, "grad_norm": 3.252603530883789, "learning_rate": 5.361991679528721e-07, "loss": 1.1534, "step": 32006 }, { "epoch": 1.9077363213732268, "grad_norm": 2.925398111343384, "learning_rate": 5.348187914387737e-07, "loss": 1.0755, "step": 32008 }, { "epoch": 1.907855525092383, "grad_norm": 3.32736873626709, "learning_rate": 5.334401844634518e-07, "loss": 1.2083, "step": 32010 }, { "epoch": 1.907974728811539, "grad_norm": 3.6510634422302246, "learning_rate": 5.320633470762171e-07, "loss": 1.092, "step": 32012 }, { "epoch": 1.908093932530695, "grad_norm": 2.916010856628418, "learning_rate": 5.306882793263301e-07, "loss": 1.0861, "step": 32014 }, { "epoch": 1.908213136249851, "grad_norm": 2.913877487182617, "learning_rate": 5.29314981262985e-07, "loss": 1.0228, "step": 32016 }, { "epoch": 1.908332339969007, "grad_norm": 3.3426496982574463, "learning_rate": 5.279434529353033e-07, "loss": 1.0155, "step": 32018 }, { "epoch": 1.908451543688163, "grad_norm": 3.634211301803589, "learning_rate": 5.265736943923516e-07, "loss": 1.3493, "step": 32020 }, { "epoch": 1.9085707474073192, "grad_norm": 3.280458927154541, "learning_rate": 5.252057056831349e-07, "loss": 1.0796, "step": 32022 }, { "epoch": 1.9086899511264752, "grad_norm": 2.9695096015930176, "learning_rate": 5.238394868565755e-07, "loss": 1.1639, "step": 32024 }, { "epoch": 1.9088091548456312, "grad_norm": 2.986830949783325, "learning_rate": 5.224750379615673e-07, "loss": 1.1836, "step": 32026 }, { "epoch": 1.9089283585647872, "grad_norm": 3.2917606830596924, "learning_rate": 5.211123590469103e-07, "loss": 1.2313, "step": 32028 }, { "epoch": 1.9090475622839431, "grad_norm": 3.27870774269104, "learning_rate": 5.197514501613543e-07, "loss": 1.043, "step": 32030 }, { "epoch": 1.9091667660030993, "grad_norm": 3.2783143520355225, "learning_rate": 5.183923113535827e-07, "loss": 1.0535, "step": 32032 }, { "epoch": 1.9092859697222553, "grad_norm": 3.5560379028320312, "learning_rate": 5.170349426722176e-07, "loss": 1.1723, "step": 32034 }, { "epoch": 1.9094051734414115, "grad_norm": 2.8616232872009277, "learning_rate": 5.156793441658148e-07, "loss": 0.9468, "step": 32036 }, { "epoch": 1.9095243771605674, "grad_norm": 3.350792169570923, "learning_rate": 5.143255158828741e-07, "loss": 0.9797, "step": 32038 }, { "epoch": 1.9096435808797234, "grad_norm": 3.4416005611419678, "learning_rate": 5.129734578718238e-07, "loss": 1.0698, "step": 32040 }, { "epoch": 1.9097627845988794, "grad_norm": 3.2738420963287354, "learning_rate": 5.116231701810248e-07, "loss": 1.1774, "step": 32042 }, { "epoch": 1.9098819883180356, "grad_norm": 3.1931543350219727, "learning_rate": 5.102746528587943e-07, "loss": 1.0556, "step": 32044 }, { "epoch": 1.9100011920371915, "grad_norm": 3.199666738510132, "learning_rate": 5.089279059533658e-07, "loss": 1.045, "step": 32046 }, { "epoch": 1.9101203957563477, "grad_norm": 3.2813358306884766, "learning_rate": 5.075829295129176e-07, "loss": 1.1186, "step": 32048 }, { "epoch": 1.9102395994755037, "grad_norm": 2.9822235107421875, "learning_rate": 5.06239723585561e-07, "loss": 0.9364, "step": 32050 }, { "epoch": 1.9103588031946597, "grad_norm": 3.4453601837158203, "learning_rate": 5.048982882193521e-07, "loss": 1.1597, "step": 32052 }, { "epoch": 1.9104780069138156, "grad_norm": 3.4966468811035156, "learning_rate": 5.035586234622803e-07, "loss": 0.999, "step": 32054 }, { "epoch": 1.9105972106329716, "grad_norm": 3.3876826763153076, "learning_rate": 5.022207293622627e-07, "loss": 1.0848, "step": 32056 }, { "epoch": 1.9107164143521278, "grad_norm": 3.1869473457336426, "learning_rate": 5.008846059671668e-07, "loss": 1.1217, "step": 32058 }, { "epoch": 1.9108356180712838, "grad_norm": 2.954578399658203, "learning_rate": 4.995502533247876e-07, "loss": 1.067, "step": 32060 }, { "epoch": 1.91095482179044, "grad_norm": 3.1697850227355957, "learning_rate": 4.982176714828646e-07, "loss": 1.09, "step": 32062 }, { "epoch": 1.911074025509596, "grad_norm": 3.198136806488037, "learning_rate": 4.968868604890542e-07, "loss": 1.1755, "step": 32064 }, { "epoch": 1.911193229228752, "grad_norm": 3.5418388843536377, "learning_rate": 4.955578203909795e-07, "loss": 1.1203, "step": 32066 }, { "epoch": 1.9113124329479079, "grad_norm": 3.457245349884033, "learning_rate": 4.9423055123618e-07, "loss": 1.1304, "step": 32068 }, { "epoch": 1.911431636667064, "grad_norm": 3.036144971847534, "learning_rate": 4.929050530721347e-07, "loss": 1.009, "step": 32070 }, { "epoch": 1.91155084038622, "grad_norm": 3.0083906650543213, "learning_rate": 4.91581325946261e-07, "loss": 1.2287, "step": 32072 }, { "epoch": 1.9116700441053762, "grad_norm": 3.388843297958374, "learning_rate": 4.902593699059155e-07, "loss": 1.0329, "step": 32074 }, { "epoch": 1.9117892478245322, "grad_norm": 3.4214320182800293, "learning_rate": 4.889391849983882e-07, "loss": 1.3202, "step": 32076 }, { "epoch": 1.9119084515436882, "grad_norm": 3.2408926486968994, "learning_rate": 4.876207712709025e-07, "loss": 1.0689, "step": 32078 }, { "epoch": 1.9120276552628441, "grad_norm": 3.0552785396575928, "learning_rate": 4.863041287706371e-07, "loss": 1.2015, "step": 32080 }, { "epoch": 1.912146858982, "grad_norm": 3.1733808517456055, "learning_rate": 4.849892575446713e-07, "loss": 1.102, "step": 32082 }, { "epoch": 1.9122660627011563, "grad_norm": 3.223867893218994, "learning_rate": 4.836761576400561e-07, "loss": 1.0865, "step": 32084 }, { "epoch": 1.9123852664203125, "grad_norm": 3.2716472148895264, "learning_rate": 4.823648291037708e-07, "loss": 0.9149, "step": 32086 }, { "epoch": 1.9125044701394684, "grad_norm": 2.88572359085083, "learning_rate": 4.810552719827111e-07, "loss": 1.005, "step": 32088 }, { "epoch": 1.9126236738586244, "grad_norm": 3.248811960220337, "learning_rate": 4.797474863237283e-07, "loss": 1.0068, "step": 32090 }, { "epoch": 1.9127428775777804, "grad_norm": 2.9745519161224365, "learning_rate": 4.784414721736186e-07, "loss": 1.0069, "step": 32092 }, { "epoch": 1.9128620812969364, "grad_norm": 3.3220863342285156, "learning_rate": 4.771372295790888e-07, "loss": 1.1036, "step": 32094 }, { "epoch": 1.9129812850160925, "grad_norm": 3.0035240650177, "learning_rate": 4.758347585868017e-07, "loss": 1.1306, "step": 32096 }, { "epoch": 1.9131004887352485, "grad_norm": 3.4087741374969482, "learning_rate": 4.745340592433478e-07, "loss": 1.0103, "step": 32098 }, { "epoch": 1.9132196924544047, "grad_norm": 3.086939811706543, "learning_rate": 4.7323513159526213e-07, "loss": 1.02, "step": 32100 }, { "epoch": 1.9133388961735607, "grad_norm": 2.8229994773864746, "learning_rate": 4.7193797568900745e-07, "loss": 1.1015, "step": 32102 }, { "epoch": 1.9134580998927166, "grad_norm": 3.1351773738861084, "learning_rate": 4.706425915709967e-07, "loss": 1.0072, "step": 32104 }, { "epoch": 1.9135773036118726, "grad_norm": 3.070167303085327, "learning_rate": 4.693489792875594e-07, "loss": 1.1477, "step": 32106 }, { "epoch": 1.9136965073310286, "grad_norm": 3.573866844177246, "learning_rate": 4.680571388849753e-07, "loss": 1.1027, "step": 32108 }, { "epoch": 1.9138157110501848, "grad_norm": 3.1243109703063965, "learning_rate": 4.667670704094573e-07, "loss": 0.9282, "step": 32110 }, { "epoch": 1.913934914769341, "grad_norm": 3.7333033084869385, "learning_rate": 4.654787739071631e-07, "loss": 1.1194, "step": 32112 }, { "epoch": 1.914054118488497, "grad_norm": 2.997077226638794, "learning_rate": 4.641922494241724e-07, "loss": 0.9892, "step": 32114 }, { "epoch": 1.914173322207653, "grad_norm": 3.4657435417175293, "learning_rate": 4.629074970065039e-07, "loss": 1.0738, "step": 32116 }, { "epoch": 1.9142925259268089, "grad_norm": 3.687413215637207, "learning_rate": 4.6162451670013207e-07, "loss": 1.1224, "step": 32118 }, { "epoch": 1.9144117296459648, "grad_norm": 3.196845293045044, "learning_rate": 4.603433085509368e-07, "loss": 1.1668, "step": 32120 }, { "epoch": 1.914530933365121, "grad_norm": 3.1920785903930664, "learning_rate": 4.590638726047647e-07, "loss": 0.9737, "step": 32122 }, { "epoch": 1.914650137084277, "grad_norm": 3.0165629386901855, "learning_rate": 4.577862089073792e-07, "loss": 0.9934, "step": 32124 }, { "epoch": 1.9147693408034332, "grad_norm": 3.2532858848571777, "learning_rate": 4.5651031750448826e-07, "loss": 1.0833, "step": 32126 }, { "epoch": 1.9148885445225892, "grad_norm": 3.1890790462493896, "learning_rate": 4.5523619844173305e-07, "loss": 1.2629, "step": 32128 }, { "epoch": 1.9150077482417451, "grad_norm": 3.275428295135498, "learning_rate": 4.539638517646938e-07, "loss": 1.1588, "step": 32130 }, { "epoch": 1.915126951960901, "grad_norm": 3.44053053855896, "learning_rate": 4.5269327751889523e-07, "loss": 1.2528, "step": 32132 }, { "epoch": 1.915246155680057, "grad_norm": 3.519258975982666, "learning_rate": 4.514244757497732e-07, "loss": 0.9569, "step": 32134 }, { "epoch": 1.9153653593992133, "grad_norm": 3.024315118789673, "learning_rate": 4.501574465027303e-07, "loss": 1.0423, "step": 32136 }, { "epoch": 1.9154845631183695, "grad_norm": 2.9294991493225098, "learning_rate": 4.488921898230858e-07, "loss": 1.0099, "step": 32138 }, { "epoch": 1.9156037668375254, "grad_norm": 3.180032730102539, "learning_rate": 4.4762870575610905e-07, "loss": 1.2913, "step": 32140 }, { "epoch": 1.9157229705566814, "grad_norm": 2.1137101650238037, "learning_rate": 4.463669943469917e-07, "loss": 1.038, "step": 32142 }, { "epoch": 1.9158421742758374, "grad_norm": 3.161482572555542, "learning_rate": 4.451070556408754e-07, "loss": 1.1091, "step": 32144 }, { "epoch": 1.9159613779949933, "grad_norm": 3.0857036113739014, "learning_rate": 4.4384888968282967e-07, "loss": 1.037, "step": 32146 }, { "epoch": 1.9160805817141495, "grad_norm": 3.223472833633423, "learning_rate": 4.425924965178574e-07, "loss": 1.0675, "step": 32148 }, { "epoch": 1.9161997854333055, "grad_norm": 2.853945016860962, "learning_rate": 4.41337876190917e-07, "loss": 1.2311, "step": 32150 }, { "epoch": 1.9163189891524617, "grad_norm": 3.1601736545562744, "learning_rate": 4.4008502874688383e-07, "loss": 1.0314, "step": 32152 }, { "epoch": 1.9164381928716177, "grad_norm": 3.6526763439178467, "learning_rate": 4.3883395423057197e-07, "loss": 0.945, "step": 32154 }, { "epoch": 1.9165573965907736, "grad_norm": 2.976168394088745, "learning_rate": 4.375846526867511e-07, "loss": 0.9803, "step": 32156 }, { "epoch": 1.9166766003099296, "grad_norm": 3.2268691062927246, "learning_rate": 4.363371241600911e-07, "loss": 1.0988, "step": 32158 }, { "epoch": 1.9167958040290856, "grad_norm": 3.3702802658081055, "learning_rate": 4.350913686952396e-07, "loss": 1.0088, "step": 32160 }, { "epoch": 1.9169150077482418, "grad_norm": 3.4109697341918945, "learning_rate": 4.3384738633675534e-07, "loss": 1.0299, "step": 32162 }, { "epoch": 1.917034211467398, "grad_norm": 3.2015607357025146, "learning_rate": 4.326051771291362e-07, "loss": 1.1448, "step": 32164 }, { "epoch": 1.917153415186554, "grad_norm": 3.553440809249878, "learning_rate": 4.3136474111681867e-07, "loss": 1.2877, "step": 32166 }, { "epoch": 1.9172726189057099, "grad_norm": 3.3376195430755615, "learning_rate": 4.3012607834418963e-07, "loss": 1.0931, "step": 32168 }, { "epoch": 1.9173918226248658, "grad_norm": 3.1582748889923096, "learning_rate": 4.2888918885554684e-07, "loss": 1.1798, "step": 32170 }, { "epoch": 1.9175110263440218, "grad_norm": 3.4349682331085205, "learning_rate": 4.276540726951439e-07, "loss": 1.1525, "step": 32172 }, { "epoch": 1.917630230063178, "grad_norm": 3.352344512939453, "learning_rate": 4.2642072990716765e-07, "loss": 1.1822, "step": 32174 }, { "epoch": 1.917749433782334, "grad_norm": 2.8278372287750244, "learning_rate": 4.2518916053573276e-07, "loss": 1.043, "step": 32176 }, { "epoch": 1.9178686375014902, "grad_norm": 3.3133466243743896, "learning_rate": 4.23959364624904e-07, "loss": 1.2984, "step": 32178 }, { "epoch": 1.9179878412206461, "grad_norm": 3.3783936500549316, "learning_rate": 4.227313422186685e-07, "loss": 0.9326, "step": 32180 }, { "epoch": 1.918107044939802, "grad_norm": 3.1303935050964355, "learning_rate": 4.2150509336095765e-07, "loss": 1.0974, "step": 32182 }, { "epoch": 1.918226248658958, "grad_norm": 3.1260313987731934, "learning_rate": 4.2028061809564756e-07, "loss": 1.0351, "step": 32184 }, { "epoch": 1.918345452378114, "grad_norm": 3.2832252979278564, "learning_rate": 4.1905791646653093e-07, "loss": 0.9993, "step": 32186 }, { "epoch": 1.9184646560972702, "grad_norm": 3.4086272716522217, "learning_rate": 4.1783698851735054e-07, "loss": 1.1248, "step": 32188 }, { "epoch": 1.9185838598164264, "grad_norm": 3.085627317428589, "learning_rate": 4.1661783429178815e-07, "loss": 1.0419, "step": 32190 }, { "epoch": 1.9187030635355824, "grad_norm": 3.345696210861206, "learning_rate": 4.1540045383344773e-07, "loss": 1.0203, "step": 32192 }, { "epoch": 1.9188222672547384, "grad_norm": 3.076815366744995, "learning_rate": 4.141848471858889e-07, "loss": 0.997, "step": 32194 }, { "epoch": 1.9189414709738943, "grad_norm": 3.265316963195801, "learning_rate": 4.129710143925936e-07, "loss": 0.9472, "step": 32196 }, { "epoch": 1.9190606746930503, "grad_norm": 3.466783046722412, "learning_rate": 4.117589554969825e-07, "loss": 1.0728, "step": 32198 }, { "epoch": 1.9191798784122065, "grad_norm": 2.999373197555542, "learning_rate": 4.105486705424211e-07, "loss": 0.9905, "step": 32200 }, { "epoch": 1.9192990821313625, "grad_norm": 3.4764719009399414, "learning_rate": 4.093401595722024e-07, "loss": 1.1143, "step": 32202 }, { "epoch": 1.9194182858505187, "grad_norm": 3.631701946258545, "learning_rate": 4.081334226295586e-07, "loss": 1.0475, "step": 32204 }, { "epoch": 1.9195374895696746, "grad_norm": 3.1192712783813477, "learning_rate": 4.0692845975766057e-07, "loss": 1.128, "step": 32206 }, { "epoch": 1.9196566932888306, "grad_norm": 3.151927947998047, "learning_rate": 4.057252709996073e-07, "loss": 1.0944, "step": 32208 }, { "epoch": 1.9197758970079866, "grad_norm": 3.7772350311279297, "learning_rate": 4.0452385639845323e-07, "loss": 1.1369, "step": 32210 }, { "epoch": 1.9198951007271425, "grad_norm": 3.2534892559051514, "learning_rate": 4.0332421599716396e-07, "loss": 1.0532, "step": 32212 }, { "epoch": 1.9200143044462987, "grad_norm": 3.5330889225006104, "learning_rate": 4.021263498386663e-07, "loss": 1.0642, "step": 32214 }, { "epoch": 1.920133508165455, "grad_norm": 3.3986194133758545, "learning_rate": 4.009302579657981e-07, "loss": 1.0112, "step": 32216 }, { "epoch": 1.9202527118846109, "grad_norm": 3.372455596923828, "learning_rate": 3.9973594042135855e-07, "loss": 1.0878, "step": 32218 }, { "epoch": 1.9203719156037669, "grad_norm": 3.1019980907440186, "learning_rate": 3.9854339724807457e-07, "loss": 0.9916, "step": 32220 }, { "epoch": 1.9204911193229228, "grad_norm": 3.7104074954986572, "learning_rate": 3.973526284886009e-07, "loss": 1.1786, "step": 32222 }, { "epoch": 1.9206103230420788, "grad_norm": 3.1231534481048584, "learning_rate": 3.961636341855368e-07, "loss": 1.1582, "step": 32224 }, { "epoch": 1.920729526761235, "grad_norm": 2.949448585510254, "learning_rate": 3.94976414381415e-07, "loss": 1.0737, "step": 32226 }, { "epoch": 1.920848730480391, "grad_norm": 3.650926351547241, "learning_rate": 3.937909691187125e-07, "loss": 1.1519, "step": 32228 }, { "epoch": 1.9209679341995471, "grad_norm": 3.1720895767211914, "learning_rate": 3.9260729843983436e-07, "loss": 1.1173, "step": 32230 }, { "epoch": 1.9210871379187031, "grad_norm": 3.1768808364868164, "learning_rate": 3.914254023871189e-07, "loss": 1.2797, "step": 32232 }, { "epoch": 1.921206341637859, "grad_norm": 3.3376173973083496, "learning_rate": 3.9024528100284897e-07, "loss": 1.0753, "step": 32234 }, { "epoch": 1.921325545357015, "grad_norm": 3.3967297077178955, "learning_rate": 3.8906693432924634e-07, "loss": 1.0802, "step": 32236 }, { "epoch": 1.921444749076171, "grad_norm": 3.2478439807891846, "learning_rate": 3.878903624084607e-07, "loss": 1.0421, "step": 32238 }, { "epoch": 1.9215639527953272, "grad_norm": 3.1627628803253174, "learning_rate": 3.867155652825805e-07, "loss": 1.0906, "step": 32240 }, { "epoch": 1.9216831565144834, "grad_norm": 3.136852264404297, "learning_rate": 3.855425429936388e-07, "loss": 1.1509, "step": 32242 }, { "epoch": 1.9218023602336394, "grad_norm": 2.8869543075561523, "learning_rate": 3.8437129558359096e-07, "loss": 1.0892, "step": 32244 }, { "epoch": 1.9219215639527953, "grad_norm": 2.8974697589874268, "learning_rate": 3.8320182309434236e-07, "loss": 1.0245, "step": 32246 }, { "epoch": 1.9220407676719513, "grad_norm": 3.0424749851226807, "learning_rate": 3.8203412556772624e-07, "loss": 1.0371, "step": 32248 }, { "epoch": 1.9221599713911073, "grad_norm": 3.178156852722168, "learning_rate": 3.808682030455146e-07, "loss": 1.1097, "step": 32250 }, { "epoch": 1.9222791751102635, "grad_norm": 3.1948466300964355, "learning_rate": 3.797040555694187e-07, "loss": 1.0204, "step": 32252 }, { "epoch": 1.9223983788294194, "grad_norm": 3.1447372436523438, "learning_rate": 3.7854168318108285e-07, "loss": 1.0558, "step": 32254 }, { "epoch": 1.9225175825485756, "grad_norm": 3.3389627933502197, "learning_rate": 3.773810859220905e-07, "loss": 1.2504, "step": 32256 }, { "epoch": 1.9226367862677316, "grad_norm": 3.371530055999756, "learning_rate": 3.76222263833953e-07, "loss": 1.1166, "step": 32258 }, { "epoch": 1.9227559899868876, "grad_norm": 2.830815315246582, "learning_rate": 3.7506521695813703e-07, "loss": 1.1414, "step": 32260 }, { "epoch": 1.9228751937060435, "grad_norm": 3.277069568634033, "learning_rate": 3.739099453360262e-07, "loss": 1.1833, "step": 32262 }, { "epoch": 1.9229943974251995, "grad_norm": 3.19606614112854, "learning_rate": 3.727564490089486e-07, "loss": 1.0145, "step": 32264 }, { "epoch": 1.9231136011443557, "grad_norm": 3.6154611110687256, "learning_rate": 3.7160472801817115e-07, "loss": 1.2458, "step": 32266 }, { "epoch": 1.923232804863512, "grad_norm": 3.2338836193084717, "learning_rate": 3.7045478240489426e-07, "loss": 1.0925, "step": 32268 }, { "epoch": 1.9233520085826679, "grad_norm": 3.2071144580841064, "learning_rate": 3.693066122102573e-07, "loss": 1.0261, "step": 32270 }, { "epoch": 1.9234712123018238, "grad_norm": 2.8585684299468994, "learning_rate": 3.681602174753329e-07, "loss": 1.0412, "step": 32272 }, { "epoch": 1.9235904160209798, "grad_norm": 3.367428779602051, "learning_rate": 3.670155982411272e-07, "loss": 1.2867, "step": 32274 }, { "epoch": 1.9237096197401358, "grad_norm": 3.0728089809417725, "learning_rate": 3.658727545485907e-07, "loss": 0.9933, "step": 32276 }, { "epoch": 1.923828823459292, "grad_norm": 3.471205234527588, "learning_rate": 3.64731686438613e-07, "loss": 1.0597, "step": 32278 }, { "epoch": 1.923948027178448, "grad_norm": 3.6774520874023438, "learning_rate": 3.6359239395200583e-07, "loss": 1.1559, "step": 32280 }, { "epoch": 1.9240672308976041, "grad_norm": 2.7787163257598877, "learning_rate": 3.6245487712952e-07, "loss": 1.1917, "step": 32282 }, { "epoch": 1.92418643461676, "grad_norm": 3.1563353538513184, "learning_rate": 3.613191360118673e-07, "loss": 1.0338, "step": 32284 }, { "epoch": 1.924305638335916, "grad_norm": 2.944795846939087, "learning_rate": 3.6018517063965975e-07, "loss": 1.0718, "step": 32286 }, { "epoch": 1.924424842055072, "grad_norm": 3.4916999340057373, "learning_rate": 3.5905298105347043e-07, "loss": 1.0608, "step": 32288 }, { "epoch": 1.924544045774228, "grad_norm": 3.3830230236053467, "learning_rate": 3.579225672938058e-07, "loss": 1.167, "step": 32290 }, { "epoch": 1.9246632494933842, "grad_norm": 3.4877307415008545, "learning_rate": 3.567939294010947e-07, "loss": 1.0568, "step": 32292 }, { "epoch": 1.9247824532125404, "grad_norm": 3.356032609939575, "learning_rate": 3.556670674157159e-07, "loss": 0.9627, "step": 32294 }, { "epoch": 1.9249016569316963, "grad_norm": 2.9761383533477783, "learning_rate": 3.545419813779871e-07, "loss": 1.1431, "step": 32296 }, { "epoch": 1.9250208606508523, "grad_norm": 3.1182055473327637, "learning_rate": 3.5341867132814845e-07, "loss": 1.0665, "step": 32298 }, { "epoch": 1.9251400643700083, "grad_norm": 3.376039505004883, "learning_rate": 3.5229713730638435e-07, "loss": 1.1283, "step": 32300 }, { "epoch": 1.9252592680891643, "grad_norm": 3.5361239910125732, "learning_rate": 3.511773793528239e-07, "loss": 1.0909, "step": 32302 }, { "epoch": 1.9253784718083204, "grad_norm": 3.2390432357788086, "learning_rate": 3.5005939750751835e-07, "loss": 1.0857, "step": 32304 }, { "epoch": 1.9254976755274764, "grad_norm": 3.0111708641052246, "learning_rate": 3.4894319181046355e-07, "loss": 1.0998, "step": 32306 }, { "epoch": 1.9256168792466326, "grad_norm": 3.468827962875366, "learning_rate": 3.478287623015941e-07, "loss": 1.001, "step": 32308 }, { "epoch": 1.9257360829657886, "grad_norm": 3.1048507690429688, "learning_rate": 3.467161090207727e-07, "loss": 1.085, "step": 32310 }, { "epoch": 1.9258552866849445, "grad_norm": 2.9496119022369385, "learning_rate": 3.456052320078007e-07, "loss": 1.0114, "step": 32312 }, { "epoch": 1.9259744904041005, "grad_norm": 3.462209939956665, "learning_rate": 3.4449613130241865e-07, "loss": 1.0971, "step": 32314 }, { "epoch": 1.9260936941232565, "grad_norm": 2.9996845722198486, "learning_rate": 3.4338880694430586e-07, "loss": 1.0938, "step": 32316 }, { "epoch": 1.9262128978424127, "grad_norm": 3.302964210510254, "learning_rate": 3.4228325897307513e-07, "loss": 1.1134, "step": 32318 }, { "epoch": 1.9263321015615689, "grad_norm": 3.4120900630950928, "learning_rate": 3.4117948742827254e-07, "loss": 1.0255, "step": 32320 }, { "epoch": 1.9264513052807248, "grad_norm": 2.7956902980804443, "learning_rate": 3.400774923493832e-07, "loss": 0.9324, "step": 32322 }, { "epoch": 1.9265705089998808, "grad_norm": 3.421165943145752, "learning_rate": 3.3897727377583675e-07, "loss": 1.0397, "step": 32324 }, { "epoch": 1.9266897127190368, "grad_norm": 3.6290886402130127, "learning_rate": 3.3787883174698496e-07, "loss": 1.1903, "step": 32326 }, { "epoch": 1.9268089164381927, "grad_norm": 2.6469242572784424, "learning_rate": 3.3678216630211865e-07, "loss": 0.9146, "step": 32328 }, { "epoch": 1.926928120157349, "grad_norm": 3.220008134841919, "learning_rate": 3.3568727748048424e-07, "loss": 1.1201, "step": 32330 }, { "epoch": 1.927047323876505, "grad_norm": 3.3292720317840576, "learning_rate": 3.3459416532123365e-07, "loss": 1.2191, "step": 32332 }, { "epoch": 1.927166527595661, "grad_norm": 3.1480000019073486, "learning_rate": 3.3350282986348016e-07, "loss": 1.0572, "step": 32334 }, { "epoch": 1.927285731314817, "grad_norm": 3.1826834678649902, "learning_rate": 3.3241327114626465e-07, "loss": 0.993, "step": 32336 }, { "epoch": 1.927404935033973, "grad_norm": 3.1359450817108154, "learning_rate": 3.3132548920856157e-07, "loss": 1.0943, "step": 32338 }, { "epoch": 1.927524138753129, "grad_norm": 2.996443510055542, "learning_rate": 3.3023948408928416e-07, "loss": 1.1535, "step": 32340 }, { "epoch": 1.927643342472285, "grad_norm": 2.919093608856201, "learning_rate": 3.291552558272848e-07, "loss": 1.0451, "step": 32342 }, { "epoch": 1.9277625461914412, "grad_norm": 3.292464256286621, "learning_rate": 3.280728044613435e-07, "loss": 1.0717, "step": 32344 }, { "epoch": 1.9278817499105974, "grad_norm": 3.296431064605713, "learning_rate": 3.269921300301959e-07, "loss": 1.075, "step": 32346 }, { "epoch": 1.9280009536297533, "grad_norm": 2.83939528465271, "learning_rate": 3.2591323257248893e-07, "loss": 0.9442, "step": 32348 }, { "epoch": 1.9281201573489093, "grad_norm": 3.2097370624542236, "learning_rate": 3.248361121268251e-07, "loss": 1.126, "step": 32350 }, { "epoch": 1.9282393610680653, "grad_norm": 3.651250123977661, "learning_rate": 3.237607687317401e-07, "loss": 1.149, "step": 32352 }, { "epoch": 1.9283585647872212, "grad_norm": 3.0442302227020264, "learning_rate": 3.226872024256922e-07, "loss": 1.1148, "step": 32354 }, { "epoch": 1.9284777685063774, "grad_norm": 4.104615688323975, "learning_rate": 3.21615413247095e-07, "loss": 1.4327, "step": 32356 }, { "epoch": 1.9285969722255334, "grad_norm": 3.5790255069732666, "learning_rate": 3.2054540123428454e-07, "loss": 1.0871, "step": 32358 }, { "epoch": 1.9287161759446896, "grad_norm": 3.2518539428710938, "learning_rate": 3.194771664255469e-07, "loss": 1.1404, "step": 32360 }, { "epoch": 1.9288353796638456, "grad_norm": 3.270293951034546, "learning_rate": 3.184107088590849e-07, "loss": 1.2348, "step": 32362 }, { "epoch": 1.9289545833830015, "grad_norm": 3.196533203125, "learning_rate": 3.173460285730567e-07, "loss": 1.1348, "step": 32364 }, { "epoch": 1.9290737871021575, "grad_norm": 3.4499566555023193, "learning_rate": 3.1628312560555426e-07, "loss": 1.1756, "step": 32366 }, { "epoch": 1.9291929908213137, "grad_norm": 3.241719961166382, "learning_rate": 3.152219999945916e-07, "loss": 1.2458, "step": 32368 }, { "epoch": 1.9293121945404696, "grad_norm": 3.73256778717041, "learning_rate": 3.141626517781271e-07, "loss": 1.2408, "step": 32370 }, { "epoch": 1.9294313982596258, "grad_norm": 3.034212112426758, "learning_rate": 3.131050809940694e-07, "loss": 0.9853, "step": 32372 }, { "epoch": 1.9295506019787818, "grad_norm": 3.031419038772583, "learning_rate": 3.120492876802439e-07, "loss": 0.9503, "step": 32374 }, { "epoch": 1.9296698056979378, "grad_norm": 3.368541717529297, "learning_rate": 3.1099527187442024e-07, "loss": 1.1626, "step": 32376 }, { "epoch": 1.9297890094170937, "grad_norm": 3.224355697631836, "learning_rate": 3.0994303361430167e-07, "loss": 0.9856, "step": 32378 }, { "epoch": 1.9299082131362497, "grad_norm": 3.61964750289917, "learning_rate": 3.088925729375358e-07, "loss": 1.159, "step": 32380 }, { "epoch": 1.930027416855406, "grad_norm": 3.245891571044922, "learning_rate": 3.0784388988170376e-07, "loss": 1.0259, "step": 32382 }, { "epoch": 1.9301466205745619, "grad_norm": 3.181931495666504, "learning_rate": 3.067969844843088e-07, "loss": 1.0655, "step": 32384 }, { "epoch": 1.930265824293718, "grad_norm": 3.154099702835083, "learning_rate": 3.0575185678281547e-07, "loss": 1.0097, "step": 32386 }, { "epoch": 1.930385028012874, "grad_norm": 3.3602914810180664, "learning_rate": 3.047085068145994e-07, "loss": 1.1314, "step": 32388 }, { "epoch": 1.93050423173203, "grad_norm": 3.1351003646850586, "learning_rate": 3.036669346169918e-07, "loss": 1.1492, "step": 32390 }, { "epoch": 1.930623435451186, "grad_norm": 3.291422128677368, "learning_rate": 3.026271402272518e-07, "loss": 1.1179, "step": 32392 }, { "epoch": 1.9307426391703422, "grad_norm": 3.251849412918091, "learning_rate": 3.0158912368258297e-07, "loss": 1.1696, "step": 32394 }, { "epoch": 1.9308618428894981, "grad_norm": 3.4072189331054688, "learning_rate": 3.005528850201056e-07, "loss": 1.1746, "step": 32396 }, { "epoch": 1.9309810466086543, "grad_norm": 2.8322207927703857, "learning_rate": 2.9951842427689557e-07, "loss": 1.0248, "step": 32398 }, { "epoch": 1.9311002503278103, "grad_norm": 3.4068729877471924, "learning_rate": 2.984857414899678e-07, "loss": 1.0931, "step": 32400 }, { "epoch": 1.9312194540469663, "grad_norm": 3.1874330043792725, "learning_rate": 2.974548366962482e-07, "loss": 1.059, "step": 32402 }, { "epoch": 1.9313386577661222, "grad_norm": 3.217743158340454, "learning_rate": 2.964257099326295e-07, "loss": 1.0438, "step": 32404 }, { "epoch": 1.9314578614852782, "grad_norm": 3.609832525253296, "learning_rate": 2.953983612359212e-07, "loss": 1.2151, "step": 32406 }, { "epoch": 1.9315770652044344, "grad_norm": 3.1750669479370117, "learning_rate": 2.9437279064287727e-07, "loss": 1.109, "step": 32408 }, { "epoch": 1.9316962689235904, "grad_norm": 3.4721720218658447, "learning_rate": 2.9334899819017937e-07, "loss": 1.0739, "step": 32410 }, { "epoch": 1.9318154726427466, "grad_norm": 3.4264721870422363, "learning_rate": 2.923269839144649e-07, "loss": 1.1465, "step": 32412 }, { "epoch": 1.9319346763619025, "grad_norm": 2.9616615772247314, "learning_rate": 2.913067478522824e-07, "loss": 1.0697, "step": 32414 }, { "epoch": 1.9320538800810585, "grad_norm": 3.153043746948242, "learning_rate": 2.902882900401305e-07, "loss": 1.1403, "step": 32416 }, { "epoch": 1.9321730838002145, "grad_norm": 2.9505438804626465, "learning_rate": 2.892716105144577e-07, "loss": 0.9818, "step": 32418 }, { "epoch": 1.9322922875193707, "grad_norm": 3.2991435527801514, "learning_rate": 2.882567093116129e-07, "loss": 0.9836, "step": 32420 }, { "epoch": 1.9324114912385266, "grad_norm": 3.3351359367370605, "learning_rate": 2.8724358646791684e-07, "loss": 1.0357, "step": 32422 }, { "epoch": 1.9325306949576828, "grad_norm": 3.4933457374572754, "learning_rate": 2.8623224201960174e-07, "loss": 1.1462, "step": 32424 }, { "epoch": 1.9326498986768388, "grad_norm": 2.904761791229248, "learning_rate": 2.852226760028609e-07, "loss": 1.092, "step": 32426 }, { "epoch": 1.9327691023959948, "grad_norm": 2.824267625808716, "learning_rate": 2.842148884537987e-07, "loss": 1.1515, "step": 32428 }, { "epoch": 1.9328883061151507, "grad_norm": 2.9715864658355713, "learning_rate": 2.8320887940846974e-07, "loss": 1.0458, "step": 32430 }, { "epoch": 1.9330075098343067, "grad_norm": 3.0685088634490967, "learning_rate": 2.8220464890286735e-07, "loss": 0.9894, "step": 32432 }, { "epoch": 1.9331267135534629, "grad_norm": 3.502612590789795, "learning_rate": 2.8120219697290726e-07, "loss": 1.1694, "step": 32434 }, { "epoch": 1.9332459172726189, "grad_norm": 3.5068347454071045, "learning_rate": 2.8020152365446085e-07, "loss": 1.0164, "step": 32436 }, { "epoch": 1.933365120991775, "grad_norm": 2.831620216369629, "learning_rate": 2.7920262898331606e-07, "loss": 1.1211, "step": 32438 }, { "epoch": 1.933484324710931, "grad_norm": 3.4299721717834473, "learning_rate": 2.78205512995211e-07, "loss": 1.1259, "step": 32440 }, { "epoch": 1.933603528430087, "grad_norm": 2.8628108501434326, "learning_rate": 2.772101757258228e-07, "loss": 1.0669, "step": 32442 }, { "epoch": 1.933722732149243, "grad_norm": 3.6041884422302246, "learning_rate": 2.76216617210745e-07, "loss": 1.0402, "step": 32444 }, { "epoch": 1.9338419358683991, "grad_norm": 3.2900984287261963, "learning_rate": 2.752248374855271e-07, "loss": 1.139, "step": 32446 }, { "epoch": 1.9339611395875551, "grad_norm": 3.3661930561065674, "learning_rate": 2.7423483658565177e-07, "loss": 1.0357, "step": 32448 }, { "epoch": 1.9340803433067113, "grad_norm": 3.315892219543457, "learning_rate": 2.732466145465296e-07, "loss": 1.06, "step": 32450 }, { "epoch": 1.9341995470258673, "grad_norm": 2.881757974624634, "learning_rate": 2.7226017140351e-07, "loss": 1.0147, "step": 32452 }, { "epoch": 1.9343187507450232, "grad_norm": 2.9682812690734863, "learning_rate": 2.7127550719189265e-07, "loss": 1.0974, "step": 32454 }, { "epoch": 1.9344379544641792, "grad_norm": 3.460754156112671, "learning_rate": 2.702926219468882e-07, "loss": 1.1004, "step": 32456 }, { "epoch": 1.9345571581833352, "grad_norm": 3.455594062805176, "learning_rate": 2.6931151570367406e-07, "loss": 1.0883, "step": 32458 }, { "epoch": 1.9346763619024914, "grad_norm": 3.2683165073394775, "learning_rate": 2.6833218849733335e-07, "loss": 1.0001, "step": 32460 }, { "epoch": 1.9347955656216476, "grad_norm": 3.0807156562805176, "learning_rate": 2.6735464036289906e-07, "loss": 1.108, "step": 32462 }, { "epoch": 1.9349147693408035, "grad_norm": 3.2062158584594727, "learning_rate": 2.663788713353599e-07, "loss": 1.0956, "step": 32464 }, { "epoch": 1.9350339730599595, "grad_norm": 3.0886518955230713, "learning_rate": 2.6540488144959906e-07, "loss": 1.0488, "step": 32466 }, { "epoch": 1.9351531767791155, "grad_norm": 3.0903701782226562, "learning_rate": 2.644326707404776e-07, "loss": 1.1955, "step": 32468 }, { "epoch": 1.9352723804982714, "grad_norm": 3.119201421737671, "learning_rate": 2.634622392427677e-07, "loss": 1.2317, "step": 32470 }, { "epoch": 1.9353915842174276, "grad_norm": 3.3285751342773438, "learning_rate": 2.62493586991186e-07, "loss": 1.139, "step": 32472 }, { "epoch": 1.9355107879365836, "grad_norm": 3.5103113651275635, "learning_rate": 2.61526714020377e-07, "loss": 0.9865, "step": 32474 }, { "epoch": 1.9356299916557398, "grad_norm": 3.2847979068756104, "learning_rate": 2.605616203649408e-07, "loss": 1.1942, "step": 32476 }, { "epoch": 1.9357491953748958, "grad_norm": 3.0778236389160156, "learning_rate": 2.5959830605939984e-07, "loss": 1.1284, "step": 32478 }, { "epoch": 1.9358683990940517, "grad_norm": 3.2668347358703613, "learning_rate": 2.586367711382098e-07, "loss": 1.1726, "step": 32480 }, { "epoch": 1.9359876028132077, "grad_norm": 3.4557342529296875, "learning_rate": 2.5767701563577106e-07, "loss": 1.0966, "step": 32482 }, { "epoch": 1.9361068065323637, "grad_norm": 3.088499069213867, "learning_rate": 2.5671903958641165e-07, "loss": 1.2497, "step": 32484 }, { "epoch": 1.9362260102515199, "grad_norm": 3.2239158153533936, "learning_rate": 2.5576284302441525e-07, "loss": 1.1872, "step": 32486 }, { "epoch": 1.936345213970676, "grad_norm": 3.446680784225464, "learning_rate": 2.548084259839767e-07, "loss": 1.0922, "step": 32488 }, { "epoch": 1.936464417689832, "grad_norm": 3.3825364112854004, "learning_rate": 2.5385578849924095e-07, "loss": 1.2137, "step": 32490 }, { "epoch": 1.936583621408988, "grad_norm": 3.100055694580078, "learning_rate": 2.529049306042863e-07, "loss": 1.0643, "step": 32492 }, { "epoch": 1.936702825128144, "grad_norm": 3.2895021438598633, "learning_rate": 2.5195585233313555e-07, "loss": 1.1272, "step": 32494 }, { "epoch": 1.9368220288473, "grad_norm": 3.149005889892578, "learning_rate": 2.5100855371973376e-07, "loss": 1.0228, "step": 32496 }, { "epoch": 1.9369412325664561, "grad_norm": 3.042412042617798, "learning_rate": 2.500630347979649e-07, "loss": 1.1262, "step": 32498 }, { "epoch": 1.937060436285612, "grad_norm": 3.087634563446045, "learning_rate": 2.4911929560166305e-07, "loss": 1.1512, "step": 32500 }, { "epoch": 1.9371796400047683, "grad_norm": 3.343106269836426, "learning_rate": 2.4817733616459006e-07, "loss": 1.101, "step": 32502 }, { "epoch": 1.9372988437239242, "grad_norm": 3.236830472946167, "learning_rate": 2.472371565204301e-07, "loss": 1.1143, "step": 32504 }, { "epoch": 1.9374180474430802, "grad_norm": 3.3341150283813477, "learning_rate": 2.462987567028285e-07, "loss": 1.0807, "step": 32506 }, { "epoch": 1.9375372511622362, "grad_norm": 3.3328800201416016, "learning_rate": 2.4536213674534737e-07, "loss": 1.0529, "step": 32508 }, { "epoch": 1.9376564548813922, "grad_norm": 3.454404830932617, "learning_rate": 2.444272966814987e-07, "loss": 1.0885, "step": 32510 }, { "epoch": 1.9377756586005483, "grad_norm": 3.2097270488739014, "learning_rate": 2.434942365447168e-07, "loss": 1.1728, "step": 32512 }, { "epoch": 1.9378948623197045, "grad_norm": 3.1151010990142822, "learning_rate": 2.425629563683918e-07, "loss": 1.0316, "step": 32514 }, { "epoch": 1.9380140660388605, "grad_norm": 3.217262029647827, "learning_rate": 2.4163345618583576e-07, "loss": 1.1629, "step": 32516 }, { "epoch": 1.9381332697580165, "grad_norm": 3.27398419380188, "learning_rate": 2.4070573603029443e-07, "loss": 1.0401, "step": 32518 }, { "epoch": 1.9382524734771724, "grad_norm": 2.799239158630371, "learning_rate": 2.397797959349579e-07, "loss": 1.0368, "step": 32520 }, { "epoch": 1.9383716771963284, "grad_norm": 3.439167022705078, "learning_rate": 2.388556359329497e-07, "loss": 1.0633, "step": 32522 }, { "epoch": 1.9384908809154846, "grad_norm": 3.050471067428589, "learning_rate": 2.3793325605733219e-07, "loss": 1.0031, "step": 32524 }, { "epoch": 1.9386100846346406, "grad_norm": 2.8834540843963623, "learning_rate": 2.3701265634110126e-07, "loss": 0.9711, "step": 32526 }, { "epoch": 1.9387292883537968, "grad_norm": 3.0317494869232178, "learning_rate": 2.3609383681719167e-07, "loss": 1.1286, "step": 32528 }, { "epoch": 1.9388484920729527, "grad_norm": 3.1558210849761963, "learning_rate": 2.35176797518466e-07, "loss": 1.1053, "step": 32530 }, { "epoch": 1.9389676957921087, "grad_norm": 3.3125205039978027, "learning_rate": 2.342615384777369e-07, "loss": 1.1214, "step": 32532 }, { "epoch": 1.9390868995112647, "grad_norm": 2.6748838424682617, "learning_rate": 2.3334805972775041e-07, "loss": 1.0, "step": 32534 }, { "epoch": 1.9392061032304206, "grad_norm": 3.495070457458496, "learning_rate": 2.324363613011693e-07, "loss": 1.0569, "step": 32536 }, { "epoch": 1.9393253069495768, "grad_norm": 3.4191646575927734, "learning_rate": 2.31526443230623e-07, "loss": 1.0697, "step": 32538 }, { "epoch": 1.939444510668733, "grad_norm": 3.0489325523376465, "learning_rate": 2.306183055486577e-07, "loss": 1.1592, "step": 32540 }, { "epoch": 1.939563714387889, "grad_norm": 2.9060635566711426, "learning_rate": 2.2971194828775854e-07, "loss": 1.0454, "step": 32542 }, { "epoch": 1.939682918107045, "grad_norm": 3.412055492401123, "learning_rate": 2.2880737148034403e-07, "loss": 1.067, "step": 32544 }, { "epoch": 1.939802121826201, "grad_norm": 3.458225727081299, "learning_rate": 2.2790457515878826e-07, "loss": 1.0763, "step": 32546 }, { "epoch": 1.939921325545357, "grad_norm": 3.5312819480895996, "learning_rate": 2.2700355935537654e-07, "loss": 1.1449, "step": 32548 }, { "epoch": 1.940040529264513, "grad_norm": 3.0841405391693115, "learning_rate": 2.2610432410234417e-07, "loss": 1.0583, "step": 32550 }, { "epoch": 1.940159732983669, "grad_norm": 2.9329261779785156, "learning_rate": 2.2520686943185987e-07, "loss": 1.0198, "step": 32552 }, { "epoch": 1.9402789367028253, "grad_norm": 3.402508497238159, "learning_rate": 2.2431119537602574e-07, "loss": 1.0357, "step": 32554 }, { "epoch": 1.9403981404219812, "grad_norm": 2.8263614177703857, "learning_rate": 2.2341730196688838e-07, "loss": 1.0209, "step": 32556 }, { "epoch": 1.9405173441411372, "grad_norm": 3.284705638885498, "learning_rate": 2.2252518923642218e-07, "loss": 1.0327, "step": 32558 }, { "epoch": 1.9406365478602932, "grad_norm": 3.0783252716064453, "learning_rate": 2.216348572165461e-07, "loss": 1.0283, "step": 32560 }, { "epoch": 1.9407557515794491, "grad_norm": 2.959703207015991, "learning_rate": 2.2074630593909575e-07, "loss": 1.0558, "step": 32562 }, { "epoch": 1.9408749552986053, "grad_norm": 3.7429511547088623, "learning_rate": 2.1985953543587902e-07, "loss": 1.0712, "step": 32564 }, { "epoch": 1.9409941590177615, "grad_norm": 3.208878755569458, "learning_rate": 2.1897454573860387e-07, "loss": 1.0789, "step": 32566 }, { "epoch": 1.9411133627369175, "grad_norm": 3.1224236488342285, "learning_rate": 2.1809133687892836e-07, "loss": 1.0647, "step": 32568 }, { "epoch": 1.9412325664560734, "grad_norm": 3.625223159790039, "learning_rate": 2.172099088884605e-07, "loss": 1.1481, "step": 32570 }, { "epoch": 1.9413517701752294, "grad_norm": 3.2228264808654785, "learning_rate": 2.163302617987195e-07, "loss": 1.078, "step": 32572 }, { "epoch": 1.9414709738943854, "grad_norm": 3.499915599822998, "learning_rate": 2.154523956411747e-07, "loss": 1.0484, "step": 32574 }, { "epoch": 1.9415901776135416, "grad_norm": 3.3536736965179443, "learning_rate": 2.1457631044723982e-07, "loss": 1.1059, "step": 32576 }, { "epoch": 1.9417093813326975, "grad_norm": 3.22743821144104, "learning_rate": 2.1370200624824533e-07, "loss": 0.9687, "step": 32578 }, { "epoch": 1.9418285850518537, "grad_norm": 3.726823329925537, "learning_rate": 2.1282948307547734e-07, "loss": 1.1878, "step": 32580 }, { "epoch": 1.9419477887710097, "grad_norm": 3.180133819580078, "learning_rate": 2.1195874096013867e-07, "loss": 1.0644, "step": 32582 }, { "epoch": 1.9420669924901657, "grad_norm": 3.1572301387786865, "learning_rate": 2.1108977993338776e-07, "loss": 1.0688, "step": 32584 }, { "epoch": 1.9421861962093216, "grad_norm": 3.050173282623291, "learning_rate": 2.1022260002629968e-07, "loss": 1.0208, "step": 32586 }, { "epoch": 1.9423053999284776, "grad_norm": 3.310687780380249, "learning_rate": 2.0935720126991076e-07, "loss": 1.0553, "step": 32588 }, { "epoch": 1.9424246036476338, "grad_norm": 3.0660061836242676, "learning_rate": 2.0849358369516291e-07, "loss": 1.0998, "step": 32590 }, { "epoch": 1.94254380736679, "grad_norm": 3.2942354679107666, "learning_rate": 2.0763174733297032e-07, "loss": 1.146, "step": 32592 }, { "epoch": 1.942663011085946, "grad_norm": 3.5335443019866943, "learning_rate": 2.0677169221414715e-07, "loss": 1.211, "step": 32594 }, { "epoch": 1.942782214805102, "grad_norm": 4.001776695251465, "learning_rate": 2.059134183694633e-07, "loss": 1.1803, "step": 32596 }, { "epoch": 1.942901418524258, "grad_norm": 3.050769567489624, "learning_rate": 2.0505692582962753e-07, "loss": 1.0815, "step": 32598 }, { "epoch": 1.9430206222434139, "grad_norm": 2.9932363033294678, "learning_rate": 2.04202214625282e-07, "loss": 0.9391, "step": 32600 }, { "epoch": 1.94313982596257, "grad_norm": 3.4036660194396973, "learning_rate": 2.0334928478699112e-07, "loss": 1.0539, "step": 32602 }, { "epoch": 1.943259029681726, "grad_norm": 3.344080686569214, "learning_rate": 2.0249813634527494e-07, "loss": 1.1233, "step": 32604 }, { "epoch": 1.9433782334008822, "grad_norm": 3.005654811859131, "learning_rate": 2.016487693305813e-07, "loss": 1.0459, "step": 32606 }, { "epoch": 1.9434974371200382, "grad_norm": 3.563403606414795, "learning_rate": 2.0080118377329704e-07, "loss": 1.0652, "step": 32608 }, { "epoch": 1.9436166408391942, "grad_norm": 2.7651782035827637, "learning_rate": 1.999553797037368e-07, "loss": 0.9064, "step": 32610 }, { "epoch": 1.9437358445583501, "grad_norm": 3.238243818283081, "learning_rate": 1.9911135715216522e-07, "loss": 1.0929, "step": 32612 }, { "epoch": 1.943855048277506, "grad_norm": 3.4609224796295166, "learning_rate": 1.982691161487693e-07, "loss": 1.0537, "step": 32614 }, { "epoch": 1.9439742519966623, "grad_norm": 2.908379077911377, "learning_rate": 1.9742865672368603e-07, "loss": 1.1467, "step": 32616 }, { "epoch": 1.9440934557158185, "grad_norm": 3.210803270339966, "learning_rate": 1.9658997890696917e-07, "loss": 1.0882, "step": 32618 }, { "epoch": 1.9442126594349745, "grad_norm": 3.1712453365325928, "learning_rate": 1.9575308272863913e-07, "loss": 1.1329, "step": 32620 }, { "epoch": 1.9443318631541304, "grad_norm": 3.4408042430877686, "learning_rate": 1.949179682186164e-07, "loss": 1.2025, "step": 32622 }, { "epoch": 1.9444510668732864, "grad_norm": 2.763432025909424, "learning_rate": 1.9408463540678824e-07, "loss": 1.0718, "step": 32624 }, { "epoch": 1.9445702705924424, "grad_norm": 3.252230405807495, "learning_rate": 1.9325308432296408e-07, "loss": 1.1581, "step": 32626 }, { "epoch": 1.9446894743115986, "grad_norm": 3.3877315521240234, "learning_rate": 1.9242331499688682e-07, "loss": 1.1976, "step": 32628 }, { "epoch": 1.9448086780307545, "grad_norm": 2.8213744163513184, "learning_rate": 1.9159532745823827e-07, "loss": 1.1003, "step": 32630 }, { "epoch": 1.9449278817499107, "grad_norm": 3.347571849822998, "learning_rate": 1.9076912173664475e-07, "loss": 1.0997, "step": 32632 }, { "epoch": 1.9450470854690667, "grad_norm": 3.494234323501587, "learning_rate": 1.899446978616659e-07, "loss": 0.9886, "step": 32634 }, { "epoch": 1.9451662891882227, "grad_norm": 3.183499574661255, "learning_rate": 1.8912205586278374e-07, "loss": 1.1203, "step": 32636 }, { "epoch": 1.9452854929073786, "grad_norm": 3.327788829803467, "learning_rate": 1.8830119576943028e-07, "loss": 1.255, "step": 32638 }, { "epoch": 1.9454046966265346, "grad_norm": 3.5008039474487305, "learning_rate": 1.8748211761097644e-07, "loss": 1.2599, "step": 32640 }, { "epoch": 1.9455239003456908, "grad_norm": 3.3493785858154297, "learning_rate": 1.8666482141671548e-07, "loss": 1.0726, "step": 32642 }, { "epoch": 1.945643104064847, "grad_norm": 3.301597833633423, "learning_rate": 1.8584930721589066e-07, "loss": 1.1207, "step": 32644 }, { "epoch": 1.945762307784003, "grad_norm": 3.3045854568481445, "learning_rate": 1.8503557503766757e-07, "loss": 0.9983, "step": 32646 }, { "epoch": 1.945881511503159, "grad_norm": 3.214827537536621, "learning_rate": 1.842236249111673e-07, "loss": 1.1176, "step": 32648 }, { "epoch": 1.9460007152223149, "grad_norm": 3.2179126739501953, "learning_rate": 1.8341345686543332e-07, "loss": 1.1869, "step": 32650 }, { "epoch": 1.9461199189414708, "grad_norm": 3.235483169555664, "learning_rate": 1.8260507092944245e-07, "loss": 1.0266, "step": 32652 }, { "epoch": 1.946239122660627, "grad_norm": 3.330876350402832, "learning_rate": 1.8179846713211601e-07, "loss": 1.0788, "step": 32654 }, { "epoch": 1.946358326379783, "grad_norm": 3.543820858001709, "learning_rate": 1.809936455023087e-07, "loss": 1.225, "step": 32656 }, { "epoch": 1.9464775300989392, "grad_norm": 3.2251789569854736, "learning_rate": 1.801906060688141e-07, "loss": 1.0252, "step": 32658 }, { "epoch": 1.9465967338180952, "grad_norm": 3.092649221420288, "learning_rate": 1.7938934886035374e-07, "loss": 1.1612, "step": 32660 }, { "epoch": 1.9467159375372511, "grad_norm": 3.5002658367156982, "learning_rate": 1.7858987390560467e-07, "loss": 1.138, "step": 32662 }, { "epoch": 1.946835141256407, "grad_norm": 3.3939106464385986, "learning_rate": 1.7779218123314955e-07, "loss": 1.0646, "step": 32664 }, { "epoch": 1.946954344975563, "grad_norm": 3.3895061016082764, "learning_rate": 1.769962708715378e-07, "loss": 1.2758, "step": 32666 }, { "epoch": 1.9470735486947193, "grad_norm": 3.369001626968384, "learning_rate": 1.7620214284923554e-07, "loss": 1.0396, "step": 32668 }, { "epoch": 1.9471927524138755, "grad_norm": 3.6200129985809326, "learning_rate": 1.7540979719465334e-07, "loss": 1.0794, "step": 32670 }, { "epoch": 1.9473119561330314, "grad_norm": 3.380560874938965, "learning_rate": 1.746192339361352e-07, "loss": 1.0964, "step": 32672 }, { "epoch": 1.9474311598521874, "grad_norm": 3.3004250526428223, "learning_rate": 1.738304531019641e-07, "loss": 0.9831, "step": 32674 }, { "epoch": 1.9475503635713434, "grad_norm": 3.578566312789917, "learning_rate": 1.7304345472035631e-07, "loss": 1.0037, "step": 32676 }, { "epoch": 1.9476695672904993, "grad_norm": 3.3570377826690674, "learning_rate": 1.7225823881946157e-07, "loss": 0.9921, "step": 32678 }, { "epoch": 1.9477887710096555, "grad_norm": 3.244022846221924, "learning_rate": 1.7147480542737405e-07, "loss": 1.1199, "step": 32680 }, { "epoch": 1.9479079747288115, "grad_norm": 3.2494497299194336, "learning_rate": 1.7069315457212132e-07, "loss": 1.1392, "step": 32682 }, { "epoch": 1.9480271784479677, "grad_norm": 3.4196763038635254, "learning_rate": 1.6991328628166437e-07, "loss": 1.2711, "step": 32684 }, { "epoch": 1.9481463821671237, "grad_norm": 3.520155429840088, "learning_rate": 1.6913520058390309e-07, "loss": 1.1179, "step": 32686 }, { "epoch": 1.9482655858862796, "grad_norm": 3.5452611446380615, "learning_rate": 1.6835889750666523e-07, "loss": 1.2613, "step": 32688 }, { "epoch": 1.9483847896054356, "grad_norm": 2.8537545204162598, "learning_rate": 1.6758437707772855e-07, "loss": 1.0328, "step": 32690 }, { "epoch": 1.9485039933245916, "grad_norm": 3.4542975425720215, "learning_rate": 1.6681163932479871e-07, "loss": 1.1072, "step": 32692 }, { "epoch": 1.9486231970437478, "grad_norm": 3.285979747772217, "learning_rate": 1.6604068427551466e-07, "loss": 1.017, "step": 32694 }, { "epoch": 1.948742400762904, "grad_norm": 3.0805933475494385, "learning_rate": 1.6527151195746548e-07, "loss": 1.0808, "step": 32696 }, { "epoch": 1.94886160448206, "grad_norm": 3.0390634536743164, "learning_rate": 1.645041223981625e-07, "loss": 1.2713, "step": 32698 }, { "epoch": 1.9489808082012159, "grad_norm": 3.3408122062683105, "learning_rate": 1.6373851562505594e-07, "loss": 1.1071, "step": 32700 }, { "epoch": 1.9491000119203719, "grad_norm": 3.114037275314331, "learning_rate": 1.6297469166553502e-07, "loss": 1.1571, "step": 32702 }, { "epoch": 1.9492192156395278, "grad_norm": 3.0593323707580566, "learning_rate": 1.622126505469279e-07, "loss": 1.0202, "step": 32704 }, { "epoch": 1.949338419358684, "grad_norm": 2.9226715564727783, "learning_rate": 1.61452392296485e-07, "loss": 1.1049, "step": 32706 }, { "epoch": 1.94945762307784, "grad_norm": 2.9436609745025635, "learning_rate": 1.606939169414179e-07, "loss": 1.0942, "step": 32708 }, { "epoch": 1.9495768267969962, "grad_norm": 3.5883729457855225, "learning_rate": 1.599372245088493e-07, "loss": 1.1252, "step": 32710 }, { "epoch": 1.9496960305161521, "grad_norm": 3.178168296813965, "learning_rate": 1.59182315025852e-07, "loss": 1.0928, "step": 32712 }, { "epoch": 1.9498152342353081, "grad_norm": 2.904949188232422, "learning_rate": 1.5842918851943222e-07, "loss": 1.027, "step": 32714 }, { "epoch": 1.949934437954464, "grad_norm": 2.782017946243286, "learning_rate": 1.5767784501653505e-07, "loss": 1.0939, "step": 32716 }, { "epoch": 1.95005364167362, "grad_norm": 3.5357561111450195, "learning_rate": 1.5692828454403342e-07, "loss": 1.0985, "step": 32718 }, { "epoch": 1.9501728453927762, "grad_norm": 3.3962841033935547, "learning_rate": 1.5618050712873921e-07, "loss": 1.0354, "step": 32720 }, { "epoch": 1.9502920491119324, "grad_norm": 3.4962456226348877, "learning_rate": 1.5543451279741438e-07, "loss": 1.124, "step": 32722 }, { "epoch": 1.9504112528310884, "grad_norm": 3.4610908031463623, "learning_rate": 1.5469030157673203e-07, "loss": 1.026, "step": 32724 }, { "epoch": 1.9505304565502444, "grad_norm": 3.6080856323242188, "learning_rate": 1.5394787349332084e-07, "loss": 1.1956, "step": 32726 }, { "epoch": 1.9506496602694003, "grad_norm": 3.1940605640411377, "learning_rate": 1.5320722857374847e-07, "loss": 0.9424, "step": 32728 }, { "epoch": 1.9507688639885563, "grad_norm": 3.2970309257507324, "learning_rate": 1.5246836684449372e-07, "loss": 1.0318, "step": 32730 }, { "epoch": 1.9508880677077125, "grad_norm": 3.244748592376709, "learning_rate": 1.5173128833200212e-07, "loss": 0.9424, "step": 32732 }, { "epoch": 1.9510072714268685, "grad_norm": 3.052830696105957, "learning_rate": 1.5099599306263034e-07, "loss": 1.0569, "step": 32734 }, { "epoch": 1.9511264751460247, "grad_norm": 3.089712619781494, "learning_rate": 1.5026248106269624e-07, "loss": 1.1068, "step": 32736 }, { "epoch": 1.9512456788651806, "grad_norm": 3.220571279525757, "learning_rate": 1.4953075235842884e-07, "loss": 1.0934, "step": 32738 }, { "epoch": 1.9513648825843366, "grad_norm": 3.0417604446411133, "learning_rate": 1.4880080697600718e-07, "loss": 0.9895, "step": 32740 }, { "epoch": 1.9514840863034926, "grad_norm": 3.1473798751831055, "learning_rate": 1.480726449415437e-07, "loss": 1.1083, "step": 32742 }, { "epoch": 1.9516032900226485, "grad_norm": 4.164779186248779, "learning_rate": 1.4734626628108983e-07, "loss": 1.1316, "step": 32744 }, { "epoch": 1.9517224937418047, "grad_norm": 3.3329813480377197, "learning_rate": 1.4662167102063028e-07, "loss": 1.2154, "step": 32746 }, { "epoch": 1.951841697460961, "grad_norm": 3.634823799133301, "learning_rate": 1.4589885918608326e-07, "loss": 1.0776, "step": 32748 }, { "epoch": 1.951960901180117, "grad_norm": 3.042658805847168, "learning_rate": 1.4517783080331138e-07, "loss": 1.052, "step": 32750 }, { "epoch": 1.9520801048992729, "grad_norm": 3.0913469791412354, "learning_rate": 1.4445858589809957e-07, "loss": 1.0685, "step": 32752 }, { "epoch": 1.9521993086184288, "grad_norm": 3.442610740661621, "learning_rate": 1.4374112449618837e-07, "loss": 1.0126, "step": 32754 }, { "epoch": 1.9523185123375848, "grad_norm": 2.9858336448669434, "learning_rate": 1.4302544662323504e-07, "loss": 0.9298, "step": 32756 }, { "epoch": 1.952437716056741, "grad_norm": 3.261981964111328, "learning_rate": 1.4231155230484684e-07, "loss": 1.0175, "step": 32758 }, { "epoch": 1.952556919775897, "grad_norm": 3.744978666305542, "learning_rate": 1.415994415665589e-07, "loss": 1.1386, "step": 32760 }, { "epoch": 1.9526761234950532, "grad_norm": 3.213740587234497, "learning_rate": 1.4088911443385089e-07, "loss": 1.1641, "step": 32762 }, { "epoch": 1.9527953272142091, "grad_norm": 2.8137354850769043, "learning_rate": 1.4018057093212466e-07, "loss": 1.0819, "step": 32764 }, { "epoch": 1.952914530933365, "grad_norm": 3.47566556930542, "learning_rate": 1.3947381108673773e-07, "loss": 1.1014, "step": 32766 }, { "epoch": 1.953033734652521, "grad_norm": 3.520340919494629, "learning_rate": 1.3876883492296987e-07, "loss": 1.1366, "step": 32768 }, { "epoch": 1.9531529383716772, "grad_norm": 3.4683852195739746, "learning_rate": 1.3806564246603426e-07, "loss": 1.0823, "step": 32770 }, { "epoch": 1.9532721420908332, "grad_norm": 3.1979851722717285, "learning_rate": 1.373642337410941e-07, "loss": 1.147, "step": 32772 }, { "epoch": 1.9533913458099894, "grad_norm": 3.0382280349731445, "learning_rate": 1.3666460877324038e-07, "loss": 1.0508, "step": 32774 }, { "epoch": 1.9535105495291454, "grad_norm": 3.107515573501587, "learning_rate": 1.3596676758749204e-07, "loss": 1.1556, "step": 32776 }, { "epoch": 1.9536297532483013, "grad_norm": 3.137742519378662, "learning_rate": 1.352707102088291e-07, "loss": 1.106, "step": 32778 }, { "epoch": 1.9537489569674573, "grad_norm": 3.464341878890991, "learning_rate": 1.3457643666213716e-07, "loss": 1.1274, "step": 32780 }, { "epoch": 1.9538681606866133, "grad_norm": 3.2287118434906006, "learning_rate": 1.3388394697226304e-07, "loss": 1.1522, "step": 32782 }, { "epoch": 1.9539873644057695, "grad_norm": 3.4149091243743896, "learning_rate": 1.3319324116397025e-07, "loss": 1.0958, "step": 32784 }, { "epoch": 1.9541065681249254, "grad_norm": 3.032987356185913, "learning_rate": 1.3250431926197792e-07, "loss": 1.1043, "step": 32786 }, { "epoch": 1.9542257718440816, "grad_norm": 3.4506351947784424, "learning_rate": 1.318171812909219e-07, "loss": 1.1361, "step": 32788 }, { "epoch": 1.9543449755632376, "grad_norm": 3.136404275894165, "learning_rate": 1.3113182727539365e-07, "loss": 1.0675, "step": 32790 }, { "epoch": 1.9544641792823936, "grad_norm": 3.445446252822876, "learning_rate": 1.3044825723989573e-07, "loss": 1.0306, "step": 32792 }, { "epoch": 1.9545833830015495, "grad_norm": 3.1313157081604004, "learning_rate": 1.2976647120889197e-07, "loss": 0.9998, "step": 32794 }, { "epoch": 1.9547025867207057, "grad_norm": 3.295137405395508, "learning_rate": 1.2908646920677946e-07, "loss": 1.0072, "step": 32796 }, { "epoch": 1.9548217904398617, "grad_norm": 3.0505030155181885, "learning_rate": 1.2840825125786659e-07, "loss": 1.0484, "step": 32798 }, { "epoch": 1.954940994159018, "grad_norm": 3.5554940700531006, "learning_rate": 1.2773181738642836e-07, "loss": 1.0832, "step": 32800 }, { "epoch": 1.9550601978781739, "grad_norm": 3.569399118423462, "learning_rate": 1.2705716761665652e-07, "loss": 1.2381, "step": 32802 }, { "epoch": 1.9551794015973298, "grad_norm": 3.03488826751709, "learning_rate": 1.2638430197268736e-07, "loss": 1.1063, "step": 32804 }, { "epoch": 1.9552986053164858, "grad_norm": 3.3691296577453613, "learning_rate": 1.2571322047859602e-07, "loss": 1.1481, "step": 32806 }, { "epoch": 1.9554178090356418, "grad_norm": 3.4641599655151367, "learning_rate": 1.2504392315838553e-07, "loss": 1.1203, "step": 32808 }, { "epoch": 1.955537012754798, "grad_norm": 3.6445729732513428, "learning_rate": 1.2437641003599787e-07, "loss": 1.1269, "step": 32810 }, { "epoch": 1.955656216473954, "grad_norm": 3.2879488468170166, "learning_rate": 1.2371068113531393e-07, "loss": 1.2245, "step": 32812 }, { "epoch": 1.9557754201931101, "grad_norm": 3.305694103240967, "learning_rate": 1.2304673648014798e-07, "loss": 1.245, "step": 32814 }, { "epoch": 1.955894623912266, "grad_norm": 3.407719850540161, "learning_rate": 1.2238457609425324e-07, "loss": 1.042, "step": 32816 }, { "epoch": 1.956013827631422, "grad_norm": 3.300248622894287, "learning_rate": 1.2172420000131634e-07, "loss": 1.0477, "step": 32818 }, { "epoch": 1.956133031350578, "grad_norm": 3.393115997314453, "learning_rate": 1.2106560822496283e-07, "loss": 1.1686, "step": 32820 }, { "epoch": 1.9562522350697342, "grad_norm": 3.3950889110565186, "learning_rate": 1.2040880078875162e-07, "loss": 1.1389, "step": 32822 }, { "epoch": 1.9563714387888902, "grad_norm": 3.170259952545166, "learning_rate": 1.1975377771617503e-07, "loss": 1.2256, "step": 32824 }, { "epoch": 1.9564906425080464, "grad_norm": 3.2437777519226074, "learning_rate": 1.1910053903067541e-07, "loss": 1.0062, "step": 32826 }, { "epoch": 1.9566098462272024, "grad_norm": 3.0555078983306885, "learning_rate": 1.1844908475561189e-07, "loss": 1.1973, "step": 32828 }, { "epoch": 1.9567290499463583, "grad_norm": 2.6120221614837646, "learning_rate": 1.1779941491428803e-07, "loss": 1.0246, "step": 32830 }, { "epoch": 1.9568482536655143, "grad_norm": 3.211573839187622, "learning_rate": 1.1715152952995745e-07, "loss": 1.1187, "step": 32832 }, { "epoch": 1.9569674573846703, "grad_norm": 3.1661107540130615, "learning_rate": 1.1650542862578495e-07, "loss": 1.1719, "step": 32834 }, { "epoch": 1.9570866611038265, "grad_norm": 3.355921983718872, "learning_rate": 1.1586111222488538e-07, "loss": 1.1358, "step": 32836 }, { "epoch": 1.9572058648229824, "grad_norm": 3.5056264400482178, "learning_rate": 1.1521858035031252e-07, "loss": 1.0377, "step": 32838 }, { "epoch": 1.9573250685421386, "grad_norm": 3.409789562225342, "learning_rate": 1.1457783302505354e-07, "loss": 0.9972, "step": 32840 }, { "epoch": 1.9574442722612946, "grad_norm": 3.1276257038116455, "learning_rate": 1.1393887027202343e-07, "loss": 1.0298, "step": 32842 }, { "epoch": 1.9575634759804506, "grad_norm": 3.0509555339813232, "learning_rate": 1.133016921140817e-07, "loss": 1.0671, "step": 32844 }, { "epoch": 1.9576826796996065, "grad_norm": 3.4245054721832275, "learning_rate": 1.1266629857402677e-07, "loss": 1.0587, "step": 32846 }, { "epoch": 1.9578018834187627, "grad_norm": 3.2232065200805664, "learning_rate": 1.120326896745849e-07, "loss": 1.1727, "step": 32848 }, { "epoch": 1.9579210871379187, "grad_norm": 3.1957263946533203, "learning_rate": 1.1140086543842132e-07, "loss": 1.1322, "step": 32850 }, { "epoch": 1.9580402908570749, "grad_norm": 3.2009453773498535, "learning_rate": 1.107708258881457e-07, "loss": 1.1178, "step": 32852 }, { "epoch": 1.9581594945762308, "grad_norm": 3.0571794509887695, "learning_rate": 1.1014257104629e-07, "loss": 1.0774, "step": 32854 }, { "epoch": 1.9582786982953868, "grad_norm": 3.581608772277832, "learning_rate": 1.0951610093533071e-07, "loss": 1.051, "step": 32856 }, { "epoch": 1.9583979020145428, "grad_norm": 3.3526828289031982, "learning_rate": 1.0889141557767213e-07, "loss": 1.1398, "step": 32858 }, { "epoch": 1.9585171057336987, "grad_norm": 3.0092501640319824, "learning_rate": 1.082685149956797e-07, "loss": 1.0507, "step": 32860 }, { "epoch": 1.958636309452855, "grad_norm": 3.193246603012085, "learning_rate": 1.0764739921161893e-07, "loss": 1.0362, "step": 32862 }, { "epoch": 1.9587555131720111, "grad_norm": 3.442564010620117, "learning_rate": 1.070280682477165e-07, "loss": 1.2095, "step": 32864 }, { "epoch": 1.958874716891167, "grad_norm": 3.4544718265533447, "learning_rate": 1.064105221261269e-07, "loss": 1.1643, "step": 32866 }, { "epoch": 1.958993920610323, "grad_norm": 3.430934190750122, "learning_rate": 1.0579476086893803e-07, "loss": 1.0958, "step": 32868 }, { "epoch": 1.959113124329479, "grad_norm": 3.1565306186676025, "learning_rate": 1.051807844981878e-07, "loss": 1.1141, "step": 32870 }, { "epoch": 1.959232328048635, "grad_norm": 3.3490917682647705, "learning_rate": 1.0456859303583089e-07, "loss": 1.1008, "step": 32872 }, { "epoch": 1.9593515317677912, "grad_norm": 2.896399974822998, "learning_rate": 1.0395818650377198e-07, "loss": 1.1175, "step": 32874 }, { "epoch": 1.9594707354869472, "grad_norm": 3.4315836429595947, "learning_rate": 1.0334956492384918e-07, "loss": 1.0079, "step": 32876 }, { "epoch": 1.9595899392061034, "grad_norm": 3.132883071899414, "learning_rate": 1.0274272831782839e-07, "loss": 1.0286, "step": 32878 }, { "epoch": 1.9597091429252593, "grad_norm": 3.446661949157715, "learning_rate": 1.0213767670742003e-07, "loss": 1.0848, "step": 32880 }, { "epoch": 1.9598283466444153, "grad_norm": 3.369624137878418, "learning_rate": 1.0153441011427345e-07, "loss": 1.1652, "step": 32882 }, { "epoch": 1.9599475503635713, "grad_norm": 3.270531177520752, "learning_rate": 1.0093292855997139e-07, "loss": 1.1767, "step": 32884 }, { "epoch": 1.9600667540827272, "grad_norm": 2.911874771118164, "learning_rate": 1.0033323206601886e-07, "loss": 1.0177, "step": 32886 }, { "epoch": 1.9601859578018834, "grad_norm": 3.155703544616699, "learning_rate": 9.973532065387648e-08, "loss": 1.1507, "step": 32888 }, { "epoch": 1.9603051615210396, "grad_norm": 3.2359402179718018, "learning_rate": 9.913919434493823e-08, "loss": 1.0442, "step": 32890 }, { "epoch": 1.9604243652401956, "grad_norm": 3.6877782344818115, "learning_rate": 9.854485316052043e-08, "loss": 1.0743, "step": 32892 }, { "epoch": 1.9605435689593516, "grad_norm": 3.0128896236419678, "learning_rate": 9.79522971218949e-08, "loss": 1.0954, "step": 32894 }, { "epoch": 1.9606627726785075, "grad_norm": 3.291261672973633, "learning_rate": 9.73615262502503e-08, "loss": 1.0036, "step": 32896 }, { "epoch": 1.9607819763976635, "grad_norm": 3.231896162033081, "learning_rate": 9.677254056672525e-08, "loss": 1.2135, "step": 32898 }, { "epoch": 1.9609011801168197, "grad_norm": 3.682321786880493, "learning_rate": 9.618534009238623e-08, "loss": 1.1624, "step": 32900 }, { "epoch": 1.9610203838359757, "grad_norm": 3.3744759559631348, "learning_rate": 9.55999248482442e-08, "loss": 1.0526, "step": 32902 }, { "epoch": 1.9611395875551318, "grad_norm": 3.4626123905181885, "learning_rate": 9.501629485523244e-08, "loss": 1.1681, "step": 32904 }, { "epoch": 1.9612587912742878, "grad_norm": 3.3184545040130615, "learning_rate": 9.443445013424534e-08, "loss": 1.0541, "step": 32906 }, { "epoch": 1.9613779949934438, "grad_norm": 2.9467287063598633, "learning_rate": 9.385439070608293e-08, "loss": 1.2141, "step": 32908 }, { "epoch": 1.9614971987125998, "grad_norm": 3.279736280441284, "learning_rate": 9.327611659150082e-08, "loss": 1.052, "step": 32910 }, { "epoch": 1.9616164024317557, "grad_norm": 3.408257007598877, "learning_rate": 9.269962781118802e-08, "loss": 1.1548, "step": 32912 }, { "epoch": 1.961735606150912, "grad_norm": 3.191147565841675, "learning_rate": 9.212492438576137e-08, "loss": 1.1009, "step": 32914 }, { "epoch": 1.961854809870068, "grad_norm": 3.204913377761841, "learning_rate": 9.155200633578776e-08, "loss": 1.0501, "step": 32916 }, { "epoch": 1.961974013589224, "grad_norm": 3.887456178665161, "learning_rate": 9.098087368176189e-08, "loss": 1.2416, "step": 32918 }, { "epoch": 1.96209321730838, "grad_norm": 3.6887013912200928, "learning_rate": 9.041152644410633e-08, "loss": 1.2223, "step": 32920 }, { "epoch": 1.962212421027536, "grad_norm": 3.406318426132202, "learning_rate": 8.984396464319367e-08, "loss": 1.1125, "step": 32922 }, { "epoch": 1.962331624746692, "grad_norm": 2.9839797019958496, "learning_rate": 8.927818829933543e-08, "loss": 1.1088, "step": 32924 }, { "epoch": 1.9624508284658482, "grad_norm": 3.0993382930755615, "learning_rate": 8.871419743275989e-08, "loss": 1.081, "step": 32926 }, { "epoch": 1.9625700321850041, "grad_norm": 3.394439935684204, "learning_rate": 8.815199206364533e-08, "loss": 1.1873, "step": 32928 }, { "epoch": 1.9626892359041603, "grad_norm": 3.2881295680999756, "learning_rate": 8.759157221210346e-08, "loss": 1.1836, "step": 32930 }, { "epoch": 1.9628084396233163, "grad_norm": 3.150216579437256, "learning_rate": 8.703293789819045e-08, "loss": 1.1844, "step": 32932 }, { "epoch": 1.9629276433424723, "grad_norm": 3.571183204650879, "learning_rate": 8.647608914188476e-08, "loss": 1.0623, "step": 32934 }, { "epoch": 1.9630468470616282, "grad_norm": 3.2300872802734375, "learning_rate": 8.592102596310381e-08, "loss": 1.1089, "step": 32936 }, { "epoch": 1.9631660507807842, "grad_norm": 3.2794926166534424, "learning_rate": 8.536774838170391e-08, "loss": 0.9933, "step": 32938 }, { "epoch": 1.9632852544999404, "grad_norm": 3.171208381652832, "learning_rate": 8.481625641748592e-08, "loss": 1.0957, "step": 32940 }, { "epoch": 1.9634044582190966, "grad_norm": 2.638995409011841, "learning_rate": 8.426655009017292e-08, "loss": 0.9576, "step": 32942 }, { "epoch": 1.9635236619382526, "grad_norm": 2.9038243293762207, "learning_rate": 8.371862941942699e-08, "loss": 0.9246, "step": 32944 }, { "epoch": 1.9636428656574085, "grad_norm": 3.205735683441162, "learning_rate": 8.317249442485464e-08, "loss": 1.1231, "step": 32946 }, { "epoch": 1.9637620693765645, "grad_norm": 3.0459494590759277, "learning_rate": 8.262814512599026e-08, "loss": 0.9845, "step": 32948 }, { "epoch": 1.9638812730957205, "grad_norm": 3.111745595932007, "learning_rate": 8.208558154230717e-08, "loss": 1.1367, "step": 32950 }, { "epoch": 1.9640004768148767, "grad_norm": 2.9911508560180664, "learning_rate": 8.15448036932176e-08, "loss": 0.9927, "step": 32952 }, { "epoch": 1.9641196805340326, "grad_norm": 3.3371763229370117, "learning_rate": 8.100581159806165e-08, "loss": 1.1053, "step": 32954 }, { "epoch": 1.9642388842531888, "grad_norm": 2.9044315814971924, "learning_rate": 8.046860527612387e-08, "loss": 1.051, "step": 32956 }, { "epoch": 1.9643580879723448, "grad_norm": 3.5138180255889893, "learning_rate": 7.993318474662226e-08, "loss": 1.194, "step": 32958 }, { "epoch": 1.9644772916915008, "grad_norm": 3.111534595489502, "learning_rate": 7.939955002871368e-08, "loss": 1.1298, "step": 32960 }, { "epoch": 1.9645964954106567, "grad_norm": 3.3758842945098877, "learning_rate": 7.88677011414829e-08, "loss": 1.0226, "step": 32962 }, { "epoch": 1.9647156991298127, "grad_norm": 3.414482355117798, "learning_rate": 7.833763810395355e-08, "loss": 1.1436, "step": 32964 }, { "epoch": 1.964834902848969, "grad_norm": 2.9565651416778564, "learning_rate": 7.780936093509939e-08, "loss": 1.0636, "step": 32966 }, { "epoch": 1.964954106568125, "grad_norm": 3.3408584594726562, "learning_rate": 7.728286965380527e-08, "loss": 1.0303, "step": 32968 }, { "epoch": 1.965073310287281, "grad_norm": 3.314429759979248, "learning_rate": 7.675816427891169e-08, "loss": 1.0983, "step": 32970 }, { "epoch": 1.965192514006437, "grad_norm": 3.684166669845581, "learning_rate": 7.623524482919253e-08, "loss": 1.0447, "step": 32972 }, { "epoch": 1.965311717725593, "grad_norm": 3.0570156574249268, "learning_rate": 7.571411132334949e-08, "loss": 1.0645, "step": 32974 }, { "epoch": 1.965430921444749, "grad_norm": 2.952948570251465, "learning_rate": 7.519476378002321e-08, "loss": 1.1217, "step": 32976 }, { "epoch": 1.9655501251639051, "grad_norm": 3.23142409324646, "learning_rate": 7.467720221780439e-08, "loss": 1.2332, "step": 32978 }, { "epoch": 1.9656693288830611, "grad_norm": 3.058206796646118, "learning_rate": 7.416142665519488e-08, "loss": 1.0406, "step": 32980 }, { "epoch": 1.9657885326022173, "grad_norm": 3.2561326026916504, "learning_rate": 7.364743711064659e-08, "loss": 1.0612, "step": 32982 }, { "epoch": 1.9659077363213733, "grad_norm": 2.743530035018921, "learning_rate": 7.313523360255592e-08, "loss": 1.112, "step": 32984 }, { "epoch": 1.9660269400405292, "grad_norm": 3.171283483505249, "learning_rate": 7.262481614924155e-08, "loss": 1.122, "step": 32986 }, { "epoch": 1.9661461437596852, "grad_norm": 3.1463136672973633, "learning_rate": 7.211618476896109e-08, "loss": 1.019, "step": 32988 }, { "epoch": 1.9662653474788412, "grad_norm": 3.4169459342956543, "learning_rate": 7.160933947991111e-08, "loss": 1.1004, "step": 32990 }, { "epoch": 1.9663845511979974, "grad_norm": 3.0179359912872314, "learning_rate": 7.110428030022153e-08, "loss": 1.2217, "step": 32992 }, { "epoch": 1.9665037549171536, "grad_norm": 3.264394760131836, "learning_rate": 7.060100724796126e-08, "loss": 1.0731, "step": 32994 }, { "epoch": 1.9666229586363095, "grad_norm": 3.453557252883911, "learning_rate": 7.009952034113809e-08, "loss": 1.04, "step": 32996 }, { "epoch": 1.9667421623554655, "grad_norm": 3.3857662677764893, "learning_rate": 6.959981959768214e-08, "loss": 1.0514, "step": 32998 }, { "epoch": 1.9668613660746215, "grad_norm": 2.9821012020111084, "learning_rate": 6.910190503547909e-08, "loss": 1.1016, "step": 33000 }, { "epoch": 1.9669805697937774, "grad_norm": 3.051848888397217, "learning_rate": 6.860577667234247e-08, "loss": 1.1099, "step": 33002 }, { "epoch": 1.9670997735129336, "grad_norm": 3.1339974403381348, "learning_rate": 6.81114345260081e-08, "loss": 0.9724, "step": 33004 }, { "epoch": 1.9672189772320896, "grad_norm": 3.327211856842041, "learning_rate": 6.761887861417293e-08, "loss": 1.0405, "step": 33006 }, { "epoch": 1.9673381809512458, "grad_norm": 3.3342602252960205, "learning_rate": 6.712810895445065e-08, "loss": 1.114, "step": 33008 }, { "epoch": 1.9674573846704018, "grad_norm": 3.2723097801208496, "learning_rate": 6.663912556439944e-08, "loss": 1.0654, "step": 33010 }, { "epoch": 1.9675765883895577, "grad_norm": 3.422367811203003, "learning_rate": 6.615192846151086e-08, "loss": 1.0536, "step": 33012 }, { "epoch": 1.9676957921087137, "grad_norm": 3.257884979248047, "learning_rate": 6.566651766321541e-08, "loss": 1.0968, "step": 33014 }, { "epoch": 1.9678149958278697, "grad_norm": 3.543151617050171, "learning_rate": 6.518289318687698e-08, "loss": 1.1032, "step": 33016 }, { "epoch": 1.9679341995470259, "grad_norm": 3.1370513439178467, "learning_rate": 6.47010550497984e-08, "loss": 0.9586, "step": 33018 }, { "epoch": 1.968053403266182, "grad_norm": 3.1372835636138916, "learning_rate": 6.422100326921033e-08, "loss": 1.0654, "step": 33020 }, { "epoch": 1.968172606985338, "grad_norm": 3.456167221069336, "learning_rate": 6.374273786229345e-08, "loss": 1.1401, "step": 33022 }, { "epoch": 1.968291810704494, "grad_norm": 3.4318225383758545, "learning_rate": 6.326625884615634e-08, "loss": 1.0139, "step": 33024 }, { "epoch": 1.96841101442365, "grad_norm": 3.5280184745788574, "learning_rate": 6.279156623784088e-08, "loss": 1.207, "step": 33026 }, { "epoch": 1.968530218142806, "grad_norm": 3.3307247161865234, "learning_rate": 6.231866005432241e-08, "loss": 1.1589, "step": 33028 }, { "epoch": 1.9686494218619621, "grad_norm": 3.4092705249786377, "learning_rate": 6.184754031253737e-08, "loss": 1.2306, "step": 33030 }, { "epoch": 1.968768625581118, "grad_norm": 3.286372184753418, "learning_rate": 6.137820702931674e-08, "loss": 1.1644, "step": 33032 }, { "epoch": 1.9688878293002743, "grad_norm": 3.5472421646118164, "learning_rate": 6.091066022146374e-08, "loss": 1.0402, "step": 33034 }, { "epoch": 1.9690070330194303, "grad_norm": 3.637617588043213, "learning_rate": 6.04448999057039e-08, "loss": 1.1208, "step": 33036 }, { "epoch": 1.9691262367385862, "grad_norm": 2.8368453979492188, "learning_rate": 5.99809260986961e-08, "loss": 1.0566, "step": 33038 }, { "epoch": 1.9692454404577422, "grad_norm": 2.8991568088531494, "learning_rate": 5.9518738817038175e-08, "loss": 1.0905, "step": 33040 }, { "epoch": 1.9693646441768982, "grad_norm": 3.475952625274658, "learning_rate": 5.9058338077261356e-08, "loss": 1.1, "step": 33042 }, { "epoch": 1.9694838478960544, "grad_norm": 4.4206624031066895, "learning_rate": 5.859972389584134e-08, "loss": 1.1866, "step": 33044 }, { "epoch": 1.9696030516152105, "grad_norm": 3.2633864879608154, "learning_rate": 5.814289628918168e-08, "loss": 1.1822, "step": 33046 }, { "epoch": 1.9697222553343665, "grad_norm": 3.1601696014404297, "learning_rate": 5.768785527362486e-08, "loss": 1.1382, "step": 33048 }, { "epoch": 1.9698414590535225, "grad_norm": 3.1898601055145264, "learning_rate": 5.723460086544674e-08, "loss": 1.0885, "step": 33050 }, { "epoch": 1.9699606627726785, "grad_norm": 3.216582775115967, "learning_rate": 5.678313308086769e-08, "loss": 1.204, "step": 33052 }, { "epoch": 1.9700798664918344, "grad_norm": 3.630056858062744, "learning_rate": 5.6333451936035894e-08, "loss": 1.1415, "step": 33054 }, { "epoch": 1.9701990702109906, "grad_norm": 3.2669665813446045, "learning_rate": 5.588555744703294e-08, "loss": 1.0372, "step": 33056 }, { "epoch": 1.9703182739301466, "grad_norm": 3.663661479949951, "learning_rate": 5.543944962989045e-08, "loss": 1.3433, "step": 33058 }, { "epoch": 1.9704374776493028, "grad_norm": 3.454970121383667, "learning_rate": 5.499512850055677e-08, "loss": 1.0862, "step": 33060 }, { "epoch": 1.9705566813684587, "grad_norm": 3.329177141189575, "learning_rate": 5.455259407493585e-08, "loss": 1.0187, "step": 33062 }, { "epoch": 1.9706758850876147, "grad_norm": 3.389712333679199, "learning_rate": 5.4111846368853916e-08, "loss": 1.0366, "step": 33064 }, { "epoch": 1.9707950888067707, "grad_norm": 3.076923370361328, "learning_rate": 5.367288539808168e-08, "loss": 1.1405, "step": 33066 }, { "epoch": 1.9709142925259266, "grad_norm": 2.851682186126709, "learning_rate": 5.323571117831772e-08, "loss": 1.1884, "step": 33068 }, { "epoch": 1.9710334962450828, "grad_norm": 3.099775552749634, "learning_rate": 5.28003237251995e-08, "loss": 1.0629, "step": 33070 }, { "epoch": 1.971152699964239, "grad_norm": 3.1139307022094727, "learning_rate": 5.236672305431456e-08, "loss": 1.0377, "step": 33072 }, { "epoch": 1.971271903683395, "grad_norm": 3.159510612487793, "learning_rate": 5.1934909181156064e-08, "loss": 1.2104, "step": 33074 }, { "epoch": 1.971391107402551, "grad_norm": 3.1905434131622314, "learning_rate": 5.150488212118942e-08, "loss": 1.0662, "step": 33076 }, { "epoch": 1.971510311121707, "grad_norm": 3.369523048400879, "learning_rate": 5.107664188978567e-08, "loss": 1.0623, "step": 33078 }, { "epoch": 1.971629514840863, "grad_norm": 3.2627546787261963, "learning_rate": 5.0650188502271434e-08, "loss": 0.9933, "step": 33080 }, { "epoch": 1.971748718560019, "grad_norm": 3.368212938308716, "learning_rate": 5.022552197390118e-08, "loss": 1.0242, "step": 33082 }, { "epoch": 1.971867922279175, "grad_norm": 2.99804949760437, "learning_rate": 4.980264231986276e-08, "loss": 1.0443, "step": 33084 }, { "epoch": 1.9719871259983313, "grad_norm": 3.121476173400879, "learning_rate": 4.938154955528296e-08, "loss": 1.0297, "step": 33086 }, { "epoch": 1.9721063297174872, "grad_norm": 3.161787509918213, "learning_rate": 4.896224369523861e-08, "loss": 1.0613, "step": 33088 }, { "epoch": 1.9722255334366432, "grad_norm": 3.296156406402588, "learning_rate": 4.8544724754712166e-08, "loss": 1.15, "step": 33090 }, { "epoch": 1.9723447371557992, "grad_norm": 3.2294042110443115, "learning_rate": 4.812899274865279e-08, "loss": 1.0822, "step": 33092 }, { "epoch": 1.9724639408749551, "grad_norm": 3.162163257598877, "learning_rate": 4.7715047691926364e-08, "loss": 1.0027, "step": 33094 }, { "epoch": 1.9725831445941113, "grad_norm": 2.957268714904785, "learning_rate": 4.730288959934326e-08, "loss": 1.1078, "step": 33096 }, { "epoch": 1.9727023483132675, "grad_norm": 2.948993444442749, "learning_rate": 4.689251848564169e-08, "loss": 0.9849, "step": 33098 }, { "epoch": 1.9728215520324235, "grad_norm": 2.9031450748443604, "learning_rate": 4.6483934365515456e-08, "loss": 1.0123, "step": 33100 }, { "epoch": 1.9729407557515795, "grad_norm": 3.02059268951416, "learning_rate": 4.6077137253563995e-08, "loss": 0.9624, "step": 33102 }, { "epoch": 1.9730599594707354, "grad_norm": 3.489265203475952, "learning_rate": 4.567212716435898e-08, "loss": 1.2132, "step": 33104 }, { "epoch": 1.9731791631898914, "grad_norm": 3.5187153816223145, "learning_rate": 4.526890411237217e-08, "loss": 1.1467, "step": 33106 }, { "epoch": 1.9732983669090476, "grad_norm": 3.3033154010772705, "learning_rate": 4.4867468112036457e-08, "loss": 1.1009, "step": 33108 }, { "epoch": 1.9734175706282036, "grad_norm": 3.1010637283325195, "learning_rate": 4.446781917771259e-08, "loss": 1.1441, "step": 33110 }, { "epoch": 1.9735367743473597, "grad_norm": 3.143075704574585, "learning_rate": 4.406995732369468e-08, "loss": 1.0446, "step": 33112 }, { "epoch": 1.9736559780665157, "grad_norm": 3.4138736724853516, "learning_rate": 4.3673882564215786e-08, "loss": 1.0258, "step": 33114 }, { "epoch": 1.9737751817856717, "grad_norm": 3.322550058364868, "learning_rate": 4.3279594913447905e-08, "loss": 1.2599, "step": 33116 }, { "epoch": 1.9738943855048277, "grad_norm": 3.0701026916503906, "learning_rate": 4.288709438549088e-08, "loss": 1.1661, "step": 33118 }, { "epoch": 1.9740135892239836, "grad_norm": 3.460918664932251, "learning_rate": 4.249638099439457e-08, "loss": 1.16, "step": 33120 }, { "epoch": 1.9741327929431398, "grad_norm": 3.3548717498779297, "learning_rate": 4.210745475412558e-08, "loss": 1.1224, "step": 33122 }, { "epoch": 1.974251996662296, "grad_norm": 3.4825527667999268, "learning_rate": 4.1720315678600575e-08, "loss": 1.1653, "step": 33124 }, { "epoch": 1.974371200381452, "grad_norm": 3.7178592681884766, "learning_rate": 4.1334963781675115e-08, "loss": 1.0499, "step": 33126 }, { "epoch": 1.974490404100608, "grad_norm": 2.97310471534729, "learning_rate": 4.0951399077127087e-08, "loss": 1.0308, "step": 33128 }, { "epoch": 1.974609607819764, "grad_norm": 3.205070972442627, "learning_rate": 4.056962157867883e-08, "loss": 1.0368, "step": 33130 }, { "epoch": 1.9747288115389199, "grad_norm": 3.348068952560425, "learning_rate": 4.01896312999972e-08, "loss": 1.1604, "step": 33132 }, { "epoch": 1.974848015258076, "grad_norm": 2.9610910415649414, "learning_rate": 3.981142825466022e-08, "loss": 1.0455, "step": 33134 }, { "epoch": 1.974967218977232, "grad_norm": 3.395373582839966, "learning_rate": 3.943501245620707e-08, "loss": 1.2152, "step": 33136 }, { "epoch": 1.9750864226963882, "grad_norm": 3.279578685760498, "learning_rate": 3.9060383918104736e-08, "loss": 1.0854, "step": 33138 }, { "epoch": 1.9752056264155442, "grad_norm": 3.735917329788208, "learning_rate": 3.868754265374808e-08, "loss": 1.253, "step": 33140 }, { "epoch": 1.9753248301347002, "grad_norm": 3.1905200481414795, "learning_rate": 3.8316488676481964e-08, "loss": 1.0857, "step": 33142 }, { "epoch": 1.9754440338538561, "grad_norm": 2.9804184436798096, "learning_rate": 3.7947221999573565e-08, "loss": 1.0819, "step": 33144 }, { "epoch": 1.9755632375730123, "grad_norm": 3.673020839691162, "learning_rate": 3.757974263624009e-08, "loss": 1.1607, "step": 33146 }, { "epoch": 1.9756824412921683, "grad_norm": 3.352562427520752, "learning_rate": 3.721405059962102e-08, "loss": 1.0731, "step": 33148 }, { "epoch": 1.9758016450113245, "grad_norm": 3.49894380569458, "learning_rate": 3.685014590280589e-08, "loss": 1.2884, "step": 33150 }, { "epoch": 1.9759208487304805, "grad_norm": 3.0588877201080322, "learning_rate": 3.6488028558800954e-08, "loss": 1.0132, "step": 33152 }, { "epoch": 1.9760400524496364, "grad_norm": 3.179774284362793, "learning_rate": 3.6127698580573634e-08, "loss": 1.0528, "step": 33154 }, { "epoch": 1.9761592561687924, "grad_norm": 3.1715502738952637, "learning_rate": 3.57691559810025e-08, "loss": 1.047, "step": 33156 }, { "epoch": 1.9762784598879484, "grad_norm": 3.379631757736206, "learning_rate": 3.541240077292729e-08, "loss": 1.1247, "step": 33158 }, { "epoch": 1.9763976636071046, "grad_norm": 3.2651569843292236, "learning_rate": 3.505743296909336e-08, "loss": 1.2281, "step": 33160 }, { "epoch": 1.9765168673262605, "grad_norm": 3.3244190216064453, "learning_rate": 3.4704252582212773e-08, "loss": 1.1567, "step": 33162 }, { "epoch": 1.9766360710454167, "grad_norm": 3.0423166751861572, "learning_rate": 3.435285962491985e-08, "loss": 1.0258, "step": 33164 }, { "epoch": 1.9767552747645727, "grad_norm": 3.0331051349639893, "learning_rate": 3.4003254109776785e-08, "loss": 0.9879, "step": 33166 }, { "epoch": 1.9768744784837287, "grad_norm": 2.9130735397338867, "learning_rate": 3.365543604929022e-08, "loss": 1.003, "step": 33168 }, { "epoch": 1.9769936822028846, "grad_norm": 3.226714611053467, "learning_rate": 3.3309405455911326e-08, "loss": 1.2833, "step": 33170 }, { "epoch": 1.9771128859220408, "grad_norm": 3.502668619155884, "learning_rate": 3.2965162342013524e-08, "loss": 1.1573, "step": 33172 }, { "epoch": 1.9772320896411968, "grad_norm": 3.171980619430542, "learning_rate": 3.262270671990919e-08, "loss": 1.1427, "step": 33174 }, { "epoch": 1.977351293360353, "grad_norm": 3.1509664058685303, "learning_rate": 3.2282038601855194e-08, "loss": 1.0248, "step": 33176 }, { "epoch": 1.977470497079509, "grad_norm": 3.35906720161438, "learning_rate": 3.1943158000036225e-08, "loss": 1.0422, "step": 33178 }, { "epoch": 1.977589700798665, "grad_norm": 2.8650081157684326, "learning_rate": 3.160606492657592e-08, "loss": 1.1091, "step": 33180 }, { "epoch": 1.9777089045178209, "grad_norm": 3.4242660999298096, "learning_rate": 3.127075939352575e-08, "loss": 1.1934, "step": 33182 }, { "epoch": 1.9778281082369769, "grad_norm": 2.961358070373535, "learning_rate": 3.093724141289278e-08, "loss": 0.9798, "step": 33184 }, { "epoch": 1.977947311956133, "grad_norm": 3.1658804416656494, "learning_rate": 3.0605510996595254e-08, "loss": 1.0344, "step": 33186 }, { "epoch": 1.978066515675289, "grad_norm": 2.921738862991333, "learning_rate": 3.027556815651256e-08, "loss": 1.0294, "step": 33188 }, { "epoch": 1.9781857193944452, "grad_norm": 3.091027021408081, "learning_rate": 2.9947412904440806e-08, "loss": 1.0857, "step": 33190 }, { "epoch": 1.9783049231136012, "grad_norm": 3.340045928955078, "learning_rate": 2.9621045252120616e-08, "loss": 1.2426, "step": 33192 }, { "epoch": 1.9784241268327571, "grad_norm": 3.4231386184692383, "learning_rate": 2.9296465211225975e-08, "loss": 1.2475, "step": 33194 }, { "epoch": 1.9785433305519131, "grad_norm": 3.29671049118042, "learning_rate": 2.8973672793375372e-08, "loss": 1.1923, "step": 33196 }, { "epoch": 1.9786625342710693, "grad_norm": 3.320927381515503, "learning_rate": 2.8652668010104023e-08, "loss": 1.0864, "step": 33198 }, { "epoch": 1.9787817379902253, "grad_norm": 3.077676296234131, "learning_rate": 2.8333450872902734e-08, "loss": 1.0175, "step": 33200 }, { "epoch": 1.9789009417093815, "grad_norm": 3.0297274589538574, "learning_rate": 2.8016021393195703e-08, "loss": 1.15, "step": 33202 }, { "epoch": 1.9790201454285374, "grad_norm": 3.160088300704956, "learning_rate": 2.7700379582323854e-08, "loss": 1.0916, "step": 33204 }, { "epoch": 1.9791393491476934, "grad_norm": 3.393005609512329, "learning_rate": 2.7386525451594815e-08, "loss": 1.1635, "step": 33206 }, { "epoch": 1.9792585528668494, "grad_norm": 3.4484856128692627, "learning_rate": 2.7074459012227383e-08, "loss": 1.1185, "step": 33208 }, { "epoch": 1.9793777565860053, "grad_norm": 3.4296703338623047, "learning_rate": 2.6764180275384852e-08, "loss": 1.1751, "step": 33210 }, { "epoch": 1.9794969603051615, "grad_norm": 3.1452372074127197, "learning_rate": 2.645568925217501e-08, "loss": 1.0534, "step": 33212 }, { "epoch": 1.9796161640243175, "grad_norm": 3.4737706184387207, "learning_rate": 2.6148985953622362e-08, "loss": 1.1834, "step": 33214 }, { "epoch": 1.9797353677434737, "grad_norm": 3.529782772064209, "learning_rate": 2.5844070390707022e-08, "loss": 1.1674, "step": 33216 }, { "epoch": 1.9798545714626297, "grad_norm": 3.3426365852355957, "learning_rate": 2.5540942574331373e-08, "loss": 1.2121, "step": 33218 }, { "epoch": 1.9799737751817856, "grad_norm": 3.0931363105773926, "learning_rate": 2.5239602515342298e-08, "loss": 0.9457, "step": 33220 }, { "epoch": 1.9800929789009416, "grad_norm": 3.286435604095459, "learning_rate": 2.4940050224520062e-08, "loss": 1.2259, "step": 33222 }, { "epoch": 1.9802121826200978, "grad_norm": 2.953619956970215, "learning_rate": 2.464228571258387e-08, "loss": 1.2624, "step": 33224 }, { "epoch": 1.9803313863392538, "grad_norm": 3.181711435317993, "learning_rate": 2.4346308990175203e-08, "loss": 1.0633, "step": 33226 }, { "epoch": 1.98045059005841, "grad_norm": 3.169461727142334, "learning_rate": 2.40521200678967e-08, "loss": 1.0263, "step": 33228 }, { "epoch": 1.980569793777566, "grad_norm": 3.831531047821045, "learning_rate": 2.3759718956256617e-08, "loss": 1.1463, "step": 33230 }, { "epoch": 1.980688997496722, "grad_norm": 3.657383441925049, "learning_rate": 2.3469105665729906e-08, "loss": 1.1193, "step": 33232 }, { "epoch": 1.9808082012158779, "grad_norm": 3.6998038291931152, "learning_rate": 2.318028020670271e-08, "loss": 1.1385, "step": 33234 }, { "epoch": 1.9809274049350338, "grad_norm": 3.1135165691375732, "learning_rate": 2.2893242589511197e-08, "loss": 1.0564, "step": 33236 }, { "epoch": 1.98104660865419, "grad_norm": 3.1503281593322754, "learning_rate": 2.2607992824419388e-08, "loss": 1.0731, "step": 33238 }, { "epoch": 1.9811658123733462, "grad_norm": 3.455592155456543, "learning_rate": 2.232453092164133e-08, "loss": 1.0383, "step": 33240 }, { "epoch": 1.9812850160925022, "grad_norm": 2.9619803428649902, "learning_rate": 2.2042856891307807e-08, "loss": 1.1799, "step": 33242 }, { "epoch": 1.9814042198116582, "grad_norm": 2.919606924057007, "learning_rate": 2.1762970743505197e-08, "loss": 1.0193, "step": 33244 }, { "epoch": 1.9815234235308141, "grad_norm": 2.9298627376556396, "learning_rate": 2.1484872488236606e-08, "loss": 1.0916, "step": 33246 }, { "epoch": 1.98164262724997, "grad_norm": 3.24993896484375, "learning_rate": 2.1208562135449638e-08, "loss": 1.0568, "step": 33248 }, { "epoch": 1.9817618309691263, "grad_norm": 3.2522714138031006, "learning_rate": 2.093403969503638e-08, "loss": 1.0574, "step": 33250 }, { "epoch": 1.9818810346882823, "grad_norm": 3.002091646194458, "learning_rate": 2.0661305176811196e-08, "loss": 1.0429, "step": 33252 }, { "epoch": 1.9820002384074384, "grad_norm": 3.413533926010132, "learning_rate": 2.0390358590538504e-08, "loss": 1.061, "step": 33254 }, { "epoch": 1.9821194421265944, "grad_norm": 3.3902711868286133, "learning_rate": 2.0121199945905e-08, "loss": 1.1292, "step": 33256 }, { "epoch": 1.9822386458457504, "grad_norm": 3.429619312286377, "learning_rate": 1.9853829252536314e-08, "loss": 1.053, "step": 33258 }, { "epoch": 1.9823578495649063, "grad_norm": 3.488384962081909, "learning_rate": 1.9588246520008123e-08, "loss": 1.0428, "step": 33260 }, { "epoch": 1.9824770532840623, "grad_norm": 3.3566908836364746, "learning_rate": 1.9324451757807284e-08, "loss": 1.1272, "step": 33262 }, { "epoch": 1.9825962570032185, "grad_norm": 2.666114330291748, "learning_rate": 1.9062444975381787e-08, "loss": 0.9869, "step": 33264 }, { "epoch": 1.9827154607223747, "grad_norm": 3.467219829559326, "learning_rate": 1.8802226182101924e-08, "loss": 1.2494, "step": 33266 }, { "epoch": 1.9828346644415307, "grad_norm": 3.256053924560547, "learning_rate": 1.8543795387271356e-08, "loss": 1.0028, "step": 33268 }, { "epoch": 1.9829538681606866, "grad_norm": 3.072017192840576, "learning_rate": 1.8287152600143796e-08, "loss": 1.2556, "step": 33270 }, { "epoch": 1.9830730718798426, "grad_norm": 2.76526141166687, "learning_rate": 1.803229782988969e-08, "loss": 1.1168, "step": 33272 }, { "epoch": 1.9831922755989986, "grad_norm": 3.0905449390411377, "learning_rate": 1.777923108563506e-08, "loss": 0.9681, "step": 33274 }, { "epoch": 1.9833114793181548, "grad_norm": 3.230457305908203, "learning_rate": 1.7527952376428236e-08, "loss": 1.0869, "step": 33276 }, { "epoch": 1.9834306830373107, "grad_norm": 3.102653980255127, "learning_rate": 1.727846171126202e-08, "loss": 1.0485, "step": 33278 }, { "epoch": 1.983549886756467, "grad_norm": 3.0400943756103516, "learning_rate": 1.7030759099057047e-08, "loss": 0.9419, "step": 33280 }, { "epoch": 1.983669090475623, "grad_norm": 3.116875648498535, "learning_rate": 1.6784844548678457e-08, "loss": 0.9933, "step": 33282 }, { "epoch": 1.9837882941947789, "grad_norm": 3.6567296981811523, "learning_rate": 1.6540718068919215e-08, "loss": 1.1521, "step": 33284 }, { "epoch": 1.9839074979139348, "grad_norm": 3.378648042678833, "learning_rate": 1.6298379668516773e-08, "loss": 1.1754, "step": 33286 }, { "epoch": 1.9840267016330908, "grad_norm": 3.0435755252838135, "learning_rate": 1.6057829356141973e-08, "loss": 1.0554, "step": 33288 }, { "epoch": 1.984145905352247, "grad_norm": 2.887589931488037, "learning_rate": 1.5819067140393497e-08, "loss": 1.0476, "step": 33290 }, { "epoch": 1.9842651090714032, "grad_norm": 3.235412359237671, "learning_rate": 1.5582093029814504e-08, "loss": 1.2388, "step": 33292 }, { "epoch": 1.9843843127905592, "grad_norm": 3.3768060207366943, "learning_rate": 1.5346907032887105e-08, "loss": 1.1154, "step": 33294 }, { "epoch": 1.9845035165097151, "grad_norm": 3.3116540908813477, "learning_rate": 1.511350915801568e-08, "loss": 1.1892, "step": 33296 }, { "epoch": 1.984622720228871, "grad_norm": 2.904735803604126, "learning_rate": 1.4881899413560218e-08, "loss": 1.0517, "step": 33298 }, { "epoch": 1.984741923948027, "grad_norm": 3.078476905822754, "learning_rate": 1.4652077807802978e-08, "loss": 1.0607, "step": 33300 }, { "epoch": 1.9848611276671833, "grad_norm": 3.076713800430298, "learning_rate": 1.442404434895961e-08, "loss": 1.1681, "step": 33302 }, { "epoch": 1.9849803313863392, "grad_norm": 3.2036168575286865, "learning_rate": 1.4197799045195803e-08, "loss": 1.1282, "step": 33304 }, { "epoch": 1.9850995351054954, "grad_norm": 3.1678264141082764, "learning_rate": 1.3973341904599535e-08, "loss": 1.1085, "step": 33306 }, { "epoch": 1.9852187388246514, "grad_norm": 3.242858648300171, "learning_rate": 1.3750672935203268e-08, "loss": 1.1891, "step": 33308 }, { "epoch": 1.9853379425438074, "grad_norm": 3.2958834171295166, "learning_rate": 1.3529792144967301e-08, "loss": 0.9947, "step": 33310 }, { "epoch": 1.9854571462629633, "grad_norm": 3.226848840713501, "learning_rate": 1.3310699541801974e-08, "loss": 1.1534, "step": 33312 }, { "epoch": 1.9855763499821193, "grad_norm": 2.983067512512207, "learning_rate": 1.309339513353991e-08, "loss": 1.1079, "step": 33314 }, { "epoch": 1.9856955537012755, "grad_norm": 3.0586960315704346, "learning_rate": 1.2877878927952669e-08, "loss": 1.1964, "step": 33316 }, { "epoch": 1.9858147574204317, "grad_norm": 2.991132974624634, "learning_rate": 1.2664150932756303e-08, "loss": 1.2385, "step": 33318 }, { "epoch": 1.9859339611395876, "grad_norm": 3.30749773979187, "learning_rate": 1.2452211155594696e-08, "loss": 1.0772, "step": 33320 }, { "epoch": 1.9860531648587436, "grad_norm": 3.39383602142334, "learning_rate": 1.2242059604045119e-08, "loss": 1.0842, "step": 33322 }, { "epoch": 1.9861723685778996, "grad_norm": 3.229369878768921, "learning_rate": 1.2033696285623786e-08, "loss": 1.0541, "step": 33324 }, { "epoch": 1.9862915722970556, "grad_norm": 3.6036665439605713, "learning_rate": 1.1827121207796943e-08, "loss": 1.1565, "step": 33326 }, { "epoch": 1.9864107760162117, "grad_norm": 3.416165351867676, "learning_rate": 1.1622334377942023e-08, "loss": 1.1038, "step": 33328 }, { "epoch": 1.9865299797353677, "grad_norm": 3.666783571243286, "learning_rate": 1.141933580339205e-08, "loss": 1.0978, "step": 33330 }, { "epoch": 1.986649183454524, "grad_norm": 3.3510429859161377, "learning_rate": 1.1218125491402331e-08, "loss": 1.1124, "step": 33332 }, { "epoch": 1.9867683871736799, "grad_norm": 3.3366317749023438, "learning_rate": 1.1018703449178214e-08, "loss": 1.1012, "step": 33334 }, { "epoch": 1.9868875908928358, "grad_norm": 3.5265533924102783, "learning_rate": 1.082106968385288e-08, "loss": 1.1541, "step": 33336 }, { "epoch": 1.9870067946119918, "grad_norm": 3.2021613121032715, "learning_rate": 1.06252242024929e-08, "loss": 1.0548, "step": 33338 }, { "epoch": 1.9871259983311478, "grad_norm": 3.279256582260132, "learning_rate": 1.0431167012103783e-08, "loss": 1.0969, "step": 33340 }, { "epoch": 1.987245202050304, "grad_norm": 2.779806137084961, "learning_rate": 1.023889811962997e-08, "loss": 0.9703, "step": 33342 }, { "epoch": 1.9873644057694602, "grad_norm": 3.1295523643493652, "learning_rate": 1.0048417531949294e-08, "loss": 1.0309, "step": 33344 }, { "epoch": 1.9874836094886161, "grad_norm": 3.190216302871704, "learning_rate": 9.859725255872975e-09, "loss": 1.0637, "step": 33346 }, { "epoch": 1.987602813207772, "grad_norm": 3.2611520290374756, "learning_rate": 9.67282129815672e-09, "loss": 1.097, "step": 33348 }, { "epoch": 1.987722016926928, "grad_norm": 3.2161648273468018, "learning_rate": 9.48770566548407e-09, "loss": 0.9598, "step": 33350 }, { "epoch": 1.987841220646084, "grad_norm": 3.2746469974517822, "learning_rate": 9.304378364471956e-09, "loss": 1.2203, "step": 33352 }, { "epoch": 1.9879604243652402, "grad_norm": 3.3110454082489014, "learning_rate": 9.122839401687344e-09, "loss": 1.0551, "step": 33354 }, { "epoch": 1.9880796280843962, "grad_norm": 3.585075855255127, "learning_rate": 8.94308878361949e-09, "loss": 1.1088, "step": 33356 }, { "epoch": 1.9881988318035524, "grad_norm": 2.8538239002227783, "learning_rate": 8.765126516702138e-09, "loss": 1.1081, "step": 33358 }, { "epoch": 1.9883180355227084, "grad_norm": 2.97515869140625, "learning_rate": 8.588952607296863e-09, "loss": 1.0628, "step": 33360 }, { "epoch": 1.9884372392418643, "grad_norm": 3.0959911346435547, "learning_rate": 8.414567061709732e-09, "loss": 1.0138, "step": 33362 }, { "epoch": 1.9885564429610203, "grad_norm": 3.067014455795288, "learning_rate": 8.241969886180201e-09, "loss": 0.9261, "step": 33364 }, { "epoch": 1.9886756466801763, "grad_norm": 3.068730115890503, "learning_rate": 8.071161086875555e-09, "loss": 1.0326, "step": 33366 }, { "epoch": 1.9887948503993325, "grad_norm": 3.2657065391540527, "learning_rate": 7.902140669913127e-09, "loss": 1.1151, "step": 33368 }, { "epoch": 1.9889140541184887, "grad_norm": 3.2415771484375, "learning_rate": 7.734908641338078e-09, "loss": 1.1526, "step": 33370 }, { "epoch": 1.9890332578376446, "grad_norm": 3.0784528255462646, "learning_rate": 7.569465007134513e-09, "loss": 1.0198, "step": 33372 }, { "epoch": 1.9891524615568006, "grad_norm": 3.2165677547454834, "learning_rate": 7.405809773214367e-09, "loss": 0.9147, "step": 33374 }, { "epoch": 1.9892716652759566, "grad_norm": 3.1126725673675537, "learning_rate": 7.243942945439619e-09, "loss": 1.1364, "step": 33376 }, { "epoch": 1.9893908689951125, "grad_norm": 3.0297603607177734, "learning_rate": 7.08386452959453e-09, "loss": 1.0026, "step": 33378 }, { "epoch": 1.9895100727142687, "grad_norm": 3.3757336139678955, "learning_rate": 6.925574531407852e-09, "loss": 1.1194, "step": 33380 }, { "epoch": 1.9896292764334247, "grad_norm": 3.1938042640686035, "learning_rate": 6.7690729565417225e-09, "loss": 1.0614, "step": 33382 }, { "epoch": 1.9897484801525809, "grad_norm": 3.7223317623138428, "learning_rate": 6.614359810602766e-09, "loss": 1.1695, "step": 33384 }, { "epoch": 1.9898676838717368, "grad_norm": 3.488818645477295, "learning_rate": 6.461435099114343e-09, "loss": 1.0722, "step": 33386 }, { "epoch": 1.9899868875908928, "grad_norm": 3.3810200691223145, "learning_rate": 6.310298827549854e-09, "loss": 1.0845, "step": 33388 }, { "epoch": 1.9901060913100488, "grad_norm": 3.557391881942749, "learning_rate": 6.160951001316084e-09, "loss": 1.0533, "step": 33390 }, { "epoch": 1.9902252950292048, "grad_norm": 3.244570016860962, "learning_rate": 6.0133916257643084e-09, "loss": 1.0856, "step": 33392 }, { "epoch": 1.990344498748361, "grad_norm": 3.033026933670044, "learning_rate": 5.867620706156985e-09, "loss": 1.1704, "step": 33394 }, { "epoch": 1.9904637024675171, "grad_norm": 3.3465611934661865, "learning_rate": 5.723638247723262e-09, "loss": 1.1766, "step": 33396 }, { "epoch": 1.990582906186673, "grad_norm": 3.431887626647949, "learning_rate": 5.5814442556090255e-09, "loss": 1.002, "step": 33398 }, { "epoch": 1.990702109905829, "grad_norm": 3.1193654537200928, "learning_rate": 5.4410387348990955e-09, "loss": 1.0464, "step": 33400 }, { "epoch": 1.990821313624985, "grad_norm": 3.3847126960754395, "learning_rate": 5.302421690622783e-09, "loss": 1.0233, "step": 33402 }, { "epoch": 1.990940517344141, "grad_norm": 3.855999708175659, "learning_rate": 5.165593127726131e-09, "loss": 1.0576, "step": 33404 }, { "epoch": 1.9910597210632972, "grad_norm": 3.1850926876068115, "learning_rate": 5.030553051116327e-09, "loss": 1.0877, "step": 33406 }, { "epoch": 1.9911789247824532, "grad_norm": 3.0416808128356934, "learning_rate": 4.8973014656172875e-09, "loss": 1.0084, "step": 33408 }, { "epoch": 1.9912981285016094, "grad_norm": 2.9888384342193604, "learning_rate": 4.765838375997422e-09, "loss": 0.9976, "step": 33410 }, { "epoch": 1.9914173322207653, "grad_norm": 3.210613965988159, "learning_rate": 4.636163786964076e-09, "loss": 0.997, "step": 33412 }, { "epoch": 1.9915365359399213, "grad_norm": 3.1668782234191895, "learning_rate": 4.5082777031524306e-09, "loss": 1.143, "step": 33414 }, { "epoch": 1.9916557396590773, "grad_norm": 3.46696400642395, "learning_rate": 4.3821801291366036e-09, "loss": 1.044, "step": 33416 }, { "epoch": 1.9917749433782332, "grad_norm": 3.25184965133667, "learning_rate": 4.257871069424102e-09, "loss": 1.1257, "step": 33418 }, { "epoch": 1.9918941470973894, "grad_norm": 3.4468472003936768, "learning_rate": 4.13535052847247e-09, "loss": 1.0469, "step": 33420 }, { "epoch": 1.9920133508165456, "grad_norm": 2.9694442749023438, "learning_rate": 4.014618510650437e-09, "loss": 0.9165, "step": 33422 }, { "epoch": 1.9921325545357016, "grad_norm": 3.0937280654907227, "learning_rate": 3.895675020293421e-09, "loss": 1.377, "step": 33424 }, { "epoch": 1.9922517582548576, "grad_norm": 3.8001530170440674, "learning_rate": 3.7785200616424765e-09, "loss": 1.1051, "step": 33426 }, { "epoch": 1.9923709619740135, "grad_norm": 3.2935452461242676, "learning_rate": 3.6631536388942454e-09, "loss": 1.1174, "step": 33428 }, { "epoch": 1.9924901656931695, "grad_norm": 3.1897189617156982, "learning_rate": 3.549575756178758e-09, "loss": 1.2551, "step": 33430 }, { "epoch": 1.9926093694123257, "grad_norm": 3.150840997695923, "learning_rate": 3.4377864175538786e-09, "loss": 1.1042, "step": 33432 }, { "epoch": 1.9927285731314817, "grad_norm": 3.4618451595306396, "learning_rate": 3.327785627021962e-09, "loss": 1.0286, "step": 33434 }, { "epoch": 1.9928477768506379, "grad_norm": 2.9764490127563477, "learning_rate": 3.2195733885187485e-09, "loss": 1.0131, "step": 33436 }, { "epoch": 1.9929669805697938, "grad_norm": 3.433881998062134, "learning_rate": 3.113149705907814e-09, "loss": 0.972, "step": 33438 }, { "epoch": 1.9930861842889498, "grad_norm": 3.7665185928344727, "learning_rate": 3.008514583008326e-09, "loss": 1.0787, "step": 33440 }, { "epoch": 1.9932053880081058, "grad_norm": 3.0750789642333984, "learning_rate": 2.905668023550634e-09, "loss": 1.0519, "step": 33442 }, { "epoch": 1.9933245917272617, "grad_norm": 3.2034478187561035, "learning_rate": 2.8046100312262293e-09, "loss": 0.9868, "step": 33444 }, { "epoch": 1.993443795446418, "grad_norm": 3.268205404281616, "learning_rate": 2.7053406096433364e-09, "loss": 1.3155, "step": 33446 }, { "epoch": 1.9935629991655741, "grad_norm": 3.101346254348755, "learning_rate": 2.6078597623491184e-09, "loss": 1.0263, "step": 33448 }, { "epoch": 1.99368220288473, "grad_norm": 3.1947340965270996, "learning_rate": 2.512167492840778e-09, "loss": 1.213, "step": 33450 }, { "epoch": 1.993801406603886, "grad_norm": 3.3542380332946777, "learning_rate": 2.4182638045378014e-09, "loss": 1.0261, "step": 33452 }, { "epoch": 1.993920610323042, "grad_norm": 3.4645721912384033, "learning_rate": 2.326148700798614e-09, "loss": 1.1091, "step": 33454 }, { "epoch": 1.994039814042198, "grad_norm": 3.4015772342681885, "learning_rate": 2.235822184915026e-09, "loss": 1.0312, "step": 33456 }, { "epoch": 1.9941590177613542, "grad_norm": 3.607508420944214, "learning_rate": 2.147284260123339e-09, "loss": 1.1467, "step": 33458 }, { "epoch": 1.9942782214805101, "grad_norm": 3.2358462810516357, "learning_rate": 2.0605349295876876e-09, "loss": 1.0567, "step": 33460 }, { "epoch": 1.9943974251996663, "grad_norm": 3.0539026260375977, "learning_rate": 1.975574196411145e-09, "loss": 1.0228, "step": 33462 }, { "epoch": 1.9945166289188223, "grad_norm": 3.196610450744629, "learning_rate": 1.892402063641274e-09, "loss": 1.1051, "step": 33464 }, { "epoch": 1.9946358326379783, "grad_norm": 3.5231237411499023, "learning_rate": 1.8110185342423702e-09, "loss": 1.2533, "step": 33466 }, { "epoch": 1.9947550363571342, "grad_norm": 3.1381421089172363, "learning_rate": 1.7314236111287686e-09, "loss": 0.9697, "step": 33468 }, { "epoch": 1.9948742400762902, "grad_norm": 3.196343183517456, "learning_rate": 1.6536172971537423e-09, "loss": 1.0802, "step": 33470 }, { "epoch": 1.9949934437954464, "grad_norm": 2.9529104232788086, "learning_rate": 1.5775995950928491e-09, "loss": 0.9819, "step": 33472 }, { "epoch": 1.9951126475146026, "grad_norm": 3.0857672691345215, "learning_rate": 1.5033705076716865e-09, "loss": 1.1153, "step": 33474 }, { "epoch": 1.9952318512337586, "grad_norm": 3.521771192550659, "learning_rate": 1.4309300375381363e-09, "loss": 1.0544, "step": 33476 }, { "epoch": 1.9953510549529145, "grad_norm": 3.291020154953003, "learning_rate": 1.3602781872901205e-09, "loss": 1.0652, "step": 33478 }, { "epoch": 1.9954702586720705, "grad_norm": 3.1129956245422363, "learning_rate": 1.2914149594589475e-09, "loss": 1.0699, "step": 33480 }, { "epoch": 1.9955894623912265, "grad_norm": 3.312941312789917, "learning_rate": 1.2243403564982104e-09, "loss": 1.0823, "step": 33482 }, { "epoch": 1.9957086661103827, "grad_norm": 3.3343656063079834, "learning_rate": 1.1590543808115416e-09, "loss": 1.1264, "step": 33484 }, { "epoch": 1.9958278698295386, "grad_norm": 3.1595842838287354, "learning_rate": 1.095557034735961e-09, "loss": 1.1265, "step": 33486 }, { "epoch": 1.9959470735486948, "grad_norm": 3.205173969268799, "learning_rate": 1.0338483205418747e-09, "loss": 1.0036, "step": 33488 }, { "epoch": 1.9960662772678508, "grad_norm": 3.398576498031616, "learning_rate": 9.739282404330751e-10, "loss": 1.0921, "step": 33490 }, { "epoch": 1.9961854809870068, "grad_norm": 3.4905827045440674, "learning_rate": 9.157967965633951e-10, "loss": 1.1087, "step": 33492 }, { "epoch": 1.9963046847061627, "grad_norm": 3.370443105697632, "learning_rate": 8.594539909978494e-10, "loss": 1.1429, "step": 33494 }, { "epoch": 1.9964238884253187, "grad_norm": 2.916287899017334, "learning_rate": 8.048998257681461e-10, "loss": 1.1644, "step": 33496 }, { "epoch": 1.996543092144475, "grad_norm": 3.0166738033294678, "learning_rate": 7.521343028116245e-10, "loss": 0.9467, "step": 33498 }, { "epoch": 1.996662295863631, "grad_norm": 3.167728900909424, "learning_rate": 7.011574240212149e-10, "loss": 1.009, "step": 33500 }, { "epoch": 1.996781499582787, "grad_norm": 3.4697067737579346, "learning_rate": 6.519691912287851e-10, "loss": 1.0055, "step": 33502 }, { "epoch": 1.996900703301943, "grad_norm": 3.0659499168395996, "learning_rate": 6.045696061773853e-10, "loss": 1.0517, "step": 33504 }, { "epoch": 1.997019907021099, "grad_norm": 3.3626325130462646, "learning_rate": 5.589586705767591e-10, "loss": 1.0009, "step": 33506 }, { "epoch": 1.997139110740255, "grad_norm": 3.3394200801849365, "learning_rate": 5.151363860533831e-10, "loss": 1.0416, "step": 33508 }, { "epoch": 1.9972583144594112, "grad_norm": 3.429811477661133, "learning_rate": 4.731027541726718e-10, "loss": 1.042, "step": 33510 }, { "epoch": 1.9973775181785671, "grad_norm": 3.0817956924438477, "learning_rate": 4.328577764389774e-10, "loss": 1.133, "step": 33512 }, { "epoch": 1.9974967218977233, "grad_norm": 3.54951810836792, "learning_rate": 3.944014542955898e-10, "loss": 1.2235, "step": 33514 }, { "epoch": 1.9976159256168793, "grad_norm": 3.2529137134552, "learning_rate": 3.5773378911918566e-10, "loss": 1.101, "step": 33516 }, { "epoch": 1.9977351293360353, "grad_norm": 3.161158800125122, "learning_rate": 3.228547822142769e-10, "loss": 1.0175, "step": 33518 }, { "epoch": 1.9978543330551912, "grad_norm": 3.6019067764282227, "learning_rate": 2.897644348354156e-10, "loss": 1.096, "step": 33520 }, { "epoch": 1.9979735367743474, "grad_norm": 3.268402576446533, "learning_rate": 2.5846274816498925e-10, "loss": 1.1818, "step": 33522 }, { "epoch": 1.9980927404935034, "grad_norm": 2.7597599029541016, "learning_rate": 2.2894972331877206e-10, "loss": 1.0839, "step": 33524 }, { "epoch": 1.9982119442126596, "grad_norm": 3.2396955490112305, "learning_rate": 2.0122536135702696e-10, "loss": 1.015, "step": 33526 }, { "epoch": 1.9983311479318155, "grad_norm": 3.5365490913391113, "learning_rate": 1.7528966326785245e-10, "loss": 1.0449, "step": 33528 }, { "epoch": 1.9984503516509715, "grad_norm": 3.5264947414398193, "learning_rate": 1.5114262998383587e-10, "loss": 1.2036, "step": 33530 }, { "epoch": 1.9985695553701275, "grad_norm": 3.470327854156494, "learning_rate": 1.287842623654001e-10, "loss": 1.0678, "step": 33532 }, { "epoch": 1.9986887590892835, "grad_norm": 3.0380895137786865, "learning_rate": 1.0821456121190565e-10, "loss": 1.0132, "step": 33534 }, { "epoch": 1.9988079628084396, "grad_norm": 3.1850030422210693, "learning_rate": 8.943352726165088e-11, "loss": 1.1317, "step": 33536 }, { "epoch": 1.9989271665275956, "grad_norm": 3.472034215927124, "learning_rate": 7.244116118632072e-11, "loss": 1.0672, "step": 33538 }, { "epoch": 1.9990463702467518, "grad_norm": 2.9411885738372803, "learning_rate": 5.7237463590986695e-11, "loss": 1.2171, "step": 33540 }, { "epoch": 1.9991655739659078, "grad_norm": 3.18446683883667, "learning_rate": 4.38224350196581e-11, "loss": 1.1626, "step": 33542 }, { "epoch": 1.9992847776850637, "grad_norm": 3.4653241634368896, "learning_rate": 3.2196075955281956e-11, "loss": 1.157, "step": 33544 }, { "epoch": 1.9994039814042197, "grad_norm": 2.6432883739471436, "learning_rate": 2.235838681419189e-11, "loss": 0.9167, "step": 33546 }, { "epoch": 1.999523185123376, "grad_norm": 3.3995237350463867, "learning_rate": 1.4309367946108154e-11, "loss": 1.2138, "step": 33548 }, { "epoch": 1.9996423888425319, "grad_norm": 3.425281524658203, "learning_rate": 8.049019639688737e-12, "loss": 1.1424, "step": 33550 }, { "epoch": 1.999761592561688, "grad_norm": 3.3032853603363037, "learning_rate": 3.577342116978244e-12, "loss": 1.0233, "step": 33552 }, { "epoch": 1.999880796280844, "grad_norm": 3.4304404258728027, "learning_rate": 8.943355389590125e-13, "loss": 1.1441, "step": 33554 }, { "epoch": 2.0, "grad_norm": 3.348916530609131, "learning_rate": 0.0, "loss": 1.0292, "step": 33556 } ], "logging_steps": 2, "max_steps": 33556, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.390872339382272e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }