{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9884486035970173, "eval_steps": 500, "global_step": 6800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00029244041526538966, "grad_norm": 1.376689135449382, "learning_rate": 0.0, "loss": 1.2599382400512695, "step": 1 }, { "epoch": 0.0005848808305307793, "grad_norm": 1.234681838317607, "learning_rate": 5.847953216374269e-08, "loss": 0.9314937591552734, "step": 2 }, { "epoch": 0.000877321245796169, "grad_norm": 1.3874138849382744, "learning_rate": 1.1695906432748539e-07, "loss": 1.1433629989624023, "step": 3 }, { "epoch": 0.0011697616610615586, "grad_norm": 1.4618979511530414, "learning_rate": 1.7543859649122808e-07, "loss": 1.2224640846252441, "step": 4 }, { "epoch": 0.0014622020763269484, "grad_norm": 1.236340065064986, "learning_rate": 2.3391812865497077e-07, "loss": 1.0468370914459229, "step": 5 }, { "epoch": 0.001754642491592338, "grad_norm": 1.358651453520776, "learning_rate": 2.9239766081871344e-07, "loss": 1.1314436197280884, "step": 6 }, { "epoch": 0.0020470829068577278, "grad_norm": 1.3850033876300505, "learning_rate": 3.5087719298245616e-07, "loss": 0.9903597831726074, "step": 7 }, { "epoch": 0.0023395233221231173, "grad_norm": 1.326993456005612, "learning_rate": 4.093567251461988e-07, "loss": 1.1988611221313477, "step": 8 }, { "epoch": 0.002631963737388507, "grad_norm": 1.3313234883955534, "learning_rate": 4.6783625730994155e-07, "loss": 1.1209533214569092, "step": 9 }, { "epoch": 0.0029244041526538967, "grad_norm": 1.3471142230235869, "learning_rate": 5.263157894736843e-07, "loss": 1.1582586765289307, "step": 10 }, { "epoch": 0.0032168445679192866, "grad_norm": 1.3073172655293792, "learning_rate": 5.847953216374269e-07, "loss": 1.2469007968902588, "step": 11 }, { "epoch": 0.003509284983184676, "grad_norm": 1.500493931988472, "learning_rate": 6.432748538011696e-07, "loss": 1.115494728088379, "step": 12 }, { "epoch": 0.0038017253984500656, "grad_norm": 1.4157975190751417, "learning_rate": 7.017543859649123e-07, "loss": 1.1927871704101562, "step": 13 }, { "epoch": 0.0040941658137154556, "grad_norm": 1.4273551735693608, "learning_rate": 7.60233918128655e-07, "loss": 1.1014869213104248, "step": 14 }, { "epoch": 0.004386606228980845, "grad_norm": 1.214320734942881, "learning_rate": 8.187134502923977e-07, "loss": 1.1055865287780762, "step": 15 }, { "epoch": 0.0046790466442462346, "grad_norm": 1.2962699407775686, "learning_rate": 8.771929824561404e-07, "loss": 1.1071349382400513, "step": 16 }, { "epoch": 0.004971487059511625, "grad_norm": 1.2885224717964352, "learning_rate": 9.356725146198831e-07, "loss": 1.1737473011016846, "step": 17 }, { "epoch": 0.005263927474777014, "grad_norm": 1.407390623938155, "learning_rate": 9.941520467836258e-07, "loss": 1.283717155456543, "step": 18 }, { "epoch": 0.005556367890042404, "grad_norm": 1.4470139877184414, "learning_rate": 1.0526315789473685e-06, "loss": 1.2509160041809082, "step": 19 }, { "epoch": 0.005848808305307793, "grad_norm": 1.3242663031296102, "learning_rate": 1.111111111111111e-06, "loss": 0.9722317457199097, "step": 20 }, { "epoch": 0.006141248720573183, "grad_norm": 1.7221218211796423, "learning_rate": 1.1695906432748538e-06, "loss": 1.1927049160003662, "step": 21 }, { "epoch": 0.006433689135838573, "grad_norm": 1.4346324267765085, "learning_rate": 1.2280701754385965e-06, "loss": 1.2133033275604248, "step": 22 }, { "epoch": 0.006726129551103963, "grad_norm": 1.449278395489955, "learning_rate": 1.2865497076023392e-06, "loss": 1.2373273372650146, "step": 23 }, { "epoch": 0.007018569966369352, "grad_norm": 1.6650860096596214, "learning_rate": 1.345029239766082e-06, "loss": 0.9476668834686279, "step": 24 }, { "epoch": 0.007311010381634742, "grad_norm": 1.2748998150534738, "learning_rate": 1.4035087719298246e-06, "loss": 1.1171324253082275, "step": 25 }, { "epoch": 0.007603450796900131, "grad_norm": 1.4396688825039674, "learning_rate": 1.4619883040935674e-06, "loss": 1.1276075839996338, "step": 26 }, { "epoch": 0.007895891212165522, "grad_norm": 1.4009443443291978, "learning_rate": 1.52046783625731e-06, "loss": 1.190751314163208, "step": 27 }, { "epoch": 0.008188331627430911, "grad_norm": 1.3912141798418658, "learning_rate": 1.5789473684210526e-06, "loss": 1.2171813249588013, "step": 28 }, { "epoch": 0.0084807720426963, "grad_norm": 1.3073224250652524, "learning_rate": 1.6374269005847953e-06, "loss": 0.8595987558364868, "step": 29 }, { "epoch": 0.00877321245796169, "grad_norm": 1.2671914308960317, "learning_rate": 1.695906432748538e-06, "loss": 1.0270106792449951, "step": 30 }, { "epoch": 0.00906565287322708, "grad_norm": 1.5005896829818803, "learning_rate": 1.7543859649122807e-06, "loss": 1.068537712097168, "step": 31 }, { "epoch": 0.009358093288492469, "grad_norm": 1.2766478202995049, "learning_rate": 1.8128654970760235e-06, "loss": 1.1307867765426636, "step": 32 }, { "epoch": 0.009650533703757859, "grad_norm": 1.5582616996952416, "learning_rate": 1.8713450292397662e-06, "loss": 1.0837950706481934, "step": 33 }, { "epoch": 0.00994297411902325, "grad_norm": 1.4304945053464713, "learning_rate": 1.929824561403509e-06, "loss": 1.1506178379058838, "step": 34 }, { "epoch": 0.01023541453428864, "grad_norm": 1.4722243618391941, "learning_rate": 1.9883040935672516e-06, "loss": 0.9450151324272156, "step": 35 }, { "epoch": 0.010527854949554029, "grad_norm": 1.4847744449229108, "learning_rate": 2.0467836257309943e-06, "loss": 1.2040901184082031, "step": 36 }, { "epoch": 0.010820295364819418, "grad_norm": 1.4600954284408973, "learning_rate": 2.105263157894737e-06, "loss": 1.2316429615020752, "step": 37 }, { "epoch": 0.011112735780084808, "grad_norm": 1.479845514016971, "learning_rate": 2.1637426900584798e-06, "loss": 1.2119100093841553, "step": 38 }, { "epoch": 0.011405176195350197, "grad_norm": 1.353351745720387, "learning_rate": 2.222222222222222e-06, "loss": 1.276926875114441, "step": 39 }, { "epoch": 0.011697616610615587, "grad_norm": 1.256680621146734, "learning_rate": 2.280701754385965e-06, "loss": 0.9357824921607971, "step": 40 }, { "epoch": 0.011990057025880976, "grad_norm": 1.3348703609284243, "learning_rate": 2.3391812865497075e-06, "loss": 1.1861131191253662, "step": 41 }, { "epoch": 0.012282497441146366, "grad_norm": 1.3287978940598948, "learning_rate": 2.3976608187134502e-06, "loss": 1.1745539903640747, "step": 42 }, { "epoch": 0.012574937856411755, "grad_norm": 1.1561631937443322, "learning_rate": 2.456140350877193e-06, "loss": 1.0291770696640015, "step": 43 }, { "epoch": 0.012867378271677147, "grad_norm": 1.2176771446345134, "learning_rate": 2.5146198830409357e-06, "loss": 1.2361294031143188, "step": 44 }, { "epoch": 0.013159818686942536, "grad_norm": 1.3295063710563702, "learning_rate": 2.5730994152046784e-06, "loss": 1.1909143924713135, "step": 45 }, { "epoch": 0.013452259102207926, "grad_norm": 1.2650643173778968, "learning_rate": 2.631578947368421e-06, "loss": 1.1998133659362793, "step": 46 }, { "epoch": 0.013744699517473315, "grad_norm": 1.1278701463292995, "learning_rate": 2.690058479532164e-06, "loss": 1.0011268854141235, "step": 47 }, { "epoch": 0.014037139932738705, "grad_norm": 1.4726969666937608, "learning_rate": 2.7485380116959066e-06, "loss": 1.0552136898040771, "step": 48 }, { "epoch": 0.014329580348004094, "grad_norm": 1.0797124442917296, "learning_rate": 2.8070175438596493e-06, "loss": 0.9727921485900879, "step": 49 }, { "epoch": 0.014622020763269484, "grad_norm": 1.1798592697113668, "learning_rate": 2.865497076023392e-06, "loss": 0.9361351728439331, "step": 50 }, { "epoch": 0.014914461178534873, "grad_norm": 1.1254749584923542, "learning_rate": 2.9239766081871347e-06, "loss": 1.140329360961914, "step": 51 }, { "epoch": 0.015206901593800263, "grad_norm": 1.1050662639156084, "learning_rate": 2.9824561403508774e-06, "loss": 0.991325855255127, "step": 52 }, { "epoch": 0.015499342009065652, "grad_norm": 1.364923415701691, "learning_rate": 3.04093567251462e-06, "loss": 1.3082914352416992, "step": 53 }, { "epoch": 0.015791782424331043, "grad_norm": 1.1357483626397489, "learning_rate": 3.0994152046783624e-06, "loss": 0.9767723083496094, "step": 54 }, { "epoch": 0.016084222839596433, "grad_norm": 1.1338887919712684, "learning_rate": 3.157894736842105e-06, "loss": 1.193568229675293, "step": 55 }, { "epoch": 0.016376663254861822, "grad_norm": 1.176328275981774, "learning_rate": 3.216374269005848e-06, "loss": 0.9767440557479858, "step": 56 }, { "epoch": 0.016669103670127212, "grad_norm": 1.0263265896491178, "learning_rate": 3.2748538011695906e-06, "loss": 0.8888605833053589, "step": 57 }, { "epoch": 0.0169615440853926, "grad_norm": 1.0668435517314094, "learning_rate": 3.3333333333333333e-06, "loss": 1.087357997894287, "step": 58 }, { "epoch": 0.01725398450065799, "grad_norm": 1.1952584851106463, "learning_rate": 3.391812865497076e-06, "loss": 1.0217459201812744, "step": 59 }, { "epoch": 0.01754642491592338, "grad_norm": 1.1279843674972485, "learning_rate": 3.4502923976608188e-06, "loss": 1.0783777236938477, "step": 60 }, { "epoch": 0.01783886533118877, "grad_norm": 0.9080265579264722, "learning_rate": 3.5087719298245615e-06, "loss": 0.85099196434021, "step": 61 }, { "epoch": 0.01813130574645416, "grad_norm": 1.0228765689803359, "learning_rate": 3.567251461988304e-06, "loss": 0.9322569966316223, "step": 62 }, { "epoch": 0.01842374616171955, "grad_norm": 0.991842254830473, "learning_rate": 3.625730994152047e-06, "loss": 0.8749685287475586, "step": 63 }, { "epoch": 0.018716186576984938, "grad_norm": 0.9789077968505817, "learning_rate": 3.6842105263157896e-06, "loss": 0.857900857925415, "step": 64 }, { "epoch": 0.019008626992250328, "grad_norm": 0.8092242526335478, "learning_rate": 3.7426900584795324e-06, "loss": 0.8891770243644714, "step": 65 }, { "epoch": 0.019301067407515717, "grad_norm": 1.0526332302181824, "learning_rate": 3.801169590643275e-06, "loss": 1.0730159282684326, "step": 66 }, { "epoch": 0.019593507822781107, "grad_norm": 1.124329301516788, "learning_rate": 3.859649122807018e-06, "loss": 1.108138084411621, "step": 67 }, { "epoch": 0.0198859482380465, "grad_norm": 1.3581659451048562, "learning_rate": 3.9181286549707605e-06, "loss": 1.2126305103302002, "step": 68 }, { "epoch": 0.02017838865331189, "grad_norm": 1.1108109420327934, "learning_rate": 3.976608187134503e-06, "loss": 0.9527193307876587, "step": 69 }, { "epoch": 0.02047082906857728, "grad_norm": 0.9965971604796123, "learning_rate": 4.035087719298246e-06, "loss": 1.0454832315444946, "step": 70 }, { "epoch": 0.020763269483842668, "grad_norm": 0.821178202034714, "learning_rate": 4.093567251461989e-06, "loss": 0.7075237035751343, "step": 71 }, { "epoch": 0.021055709899108058, "grad_norm": 1.2413273222740282, "learning_rate": 4.152046783625731e-06, "loss": 1.0972111225128174, "step": 72 }, { "epoch": 0.021348150314373447, "grad_norm": 0.9838475362870381, "learning_rate": 4.210526315789474e-06, "loss": 1.0400984287261963, "step": 73 }, { "epoch": 0.021640590729638837, "grad_norm": 0.8577987626348056, "learning_rate": 4.269005847953217e-06, "loss": 0.7712557315826416, "step": 74 }, { "epoch": 0.021933031144904226, "grad_norm": 1.0937426764383058, "learning_rate": 4.3274853801169596e-06, "loss": 1.1733636856079102, "step": 75 }, { "epoch": 0.022225471560169616, "grad_norm": 0.9896291906902066, "learning_rate": 4.385964912280702e-06, "loss": 0.8653621673583984, "step": 76 }, { "epoch": 0.022517911975435005, "grad_norm": 0.9059062097735997, "learning_rate": 4.444444444444444e-06, "loss": 0.8797299861907959, "step": 77 }, { "epoch": 0.022810352390700395, "grad_norm": 1.0128235878781693, "learning_rate": 4.502923976608187e-06, "loss": 0.8357750177383423, "step": 78 }, { "epoch": 0.023102792805965784, "grad_norm": 1.241636412088512, "learning_rate": 4.56140350877193e-06, "loss": 1.1249456405639648, "step": 79 }, { "epoch": 0.023395233221231174, "grad_norm": 1.2743547410748093, "learning_rate": 4.619883040935672e-06, "loss": 0.9920758008956909, "step": 80 }, { "epoch": 0.023687673636496563, "grad_norm": 1.0290847197991744, "learning_rate": 4.678362573099415e-06, "loss": 0.8115094900131226, "step": 81 }, { "epoch": 0.023980114051761953, "grad_norm": 0.9339898981913745, "learning_rate": 4.736842105263158e-06, "loss": 1.060575246810913, "step": 82 }, { "epoch": 0.024272554467027342, "grad_norm": 1.1898301512766587, "learning_rate": 4.7953216374269005e-06, "loss": 1.028218150138855, "step": 83 }, { "epoch": 0.02456499488229273, "grad_norm": 0.9840324243241313, "learning_rate": 4.853801169590643e-06, "loss": 1.090872049331665, "step": 84 }, { "epoch": 0.02485743529755812, "grad_norm": 1.110956193223445, "learning_rate": 4.912280701754386e-06, "loss": 1.0069574117660522, "step": 85 }, { "epoch": 0.02514987571282351, "grad_norm": 1.0134868000559825, "learning_rate": 4.970760233918129e-06, "loss": 0.9391698837280273, "step": 86 }, { "epoch": 0.025442316128088904, "grad_norm": 1.0912235029106665, "learning_rate": 5.029239766081871e-06, "loss": 0.881995677947998, "step": 87 }, { "epoch": 0.025734756543354293, "grad_norm": 1.0399116507679627, "learning_rate": 5.087719298245615e-06, "loss": 0.87871253490448, "step": 88 }, { "epoch": 0.026027196958619683, "grad_norm": 1.0265015868708693, "learning_rate": 5.146198830409357e-06, "loss": 1.005904197692871, "step": 89 }, { "epoch": 0.026319637373885072, "grad_norm": 1.0161210383553128, "learning_rate": 5.2046783625731e-06, "loss": 0.8624223470687866, "step": 90 }, { "epoch": 0.02661207778915046, "grad_norm": 1.0154040401745301, "learning_rate": 5.263157894736842e-06, "loss": 0.9976427555084229, "step": 91 }, { "epoch": 0.02690451820441585, "grad_norm": 1.157266795240935, "learning_rate": 5.321637426900586e-06, "loss": 0.7743148803710938, "step": 92 }, { "epoch": 0.02719695861968124, "grad_norm": 1.0027983307117943, "learning_rate": 5.380116959064328e-06, "loss": 0.8541792631149292, "step": 93 }, { "epoch": 0.02748939903494663, "grad_norm": 1.0195872536359372, "learning_rate": 5.438596491228071e-06, "loss": 0.9141846895217896, "step": 94 }, { "epoch": 0.02778183945021202, "grad_norm": 0.9964676811589505, "learning_rate": 5.497076023391813e-06, "loss": 0.9762974977493286, "step": 95 }, { "epoch": 0.02807427986547741, "grad_norm": 1.086834377136063, "learning_rate": 5.555555555555557e-06, "loss": 0.8039775490760803, "step": 96 }, { "epoch": 0.0283667202807428, "grad_norm": 1.0288673358640383, "learning_rate": 5.6140350877192985e-06, "loss": 0.9464477300643921, "step": 97 }, { "epoch": 0.028659160696008188, "grad_norm": 0.9989091266376411, "learning_rate": 5.672514619883041e-06, "loss": 0.8264896869659424, "step": 98 }, { "epoch": 0.028951601111273578, "grad_norm": 1.239452647422259, "learning_rate": 5.730994152046784e-06, "loss": 0.8347363471984863, "step": 99 }, { "epoch": 0.029244041526538967, "grad_norm": 1.1482101557047766, "learning_rate": 5.789473684210527e-06, "loss": 0.7974327802658081, "step": 100 }, { "epoch": 0.029536481941804357, "grad_norm": 1.040746567320999, "learning_rate": 5.847953216374269e-06, "loss": 0.7953752875328064, "step": 101 }, { "epoch": 0.029828922357069746, "grad_norm": 1.0186289029859024, "learning_rate": 5.906432748538012e-06, "loss": 0.8652607798576355, "step": 102 }, { "epoch": 0.030121362772335136, "grad_norm": 1.0719829766550855, "learning_rate": 5.964912280701755e-06, "loss": 0.973792552947998, "step": 103 }, { "epoch": 0.030413803187600525, "grad_norm": 0.9226382056883017, "learning_rate": 6.023391812865498e-06, "loss": 0.8093612194061279, "step": 104 }, { "epoch": 0.030706243602865915, "grad_norm": 0.9154711374479992, "learning_rate": 6.08187134502924e-06, "loss": 0.8463394045829773, "step": 105 }, { "epoch": 0.030998684018131304, "grad_norm": 1.2769916053670627, "learning_rate": 6.140350877192983e-06, "loss": 0.7898350358009338, "step": 106 }, { "epoch": 0.0312911244333967, "grad_norm": 1.298220618549192, "learning_rate": 6.198830409356725e-06, "loss": 0.9750698804855347, "step": 107 }, { "epoch": 0.031583564848662087, "grad_norm": 1.000315516155276, "learning_rate": 6.2573099415204685e-06, "loss": 0.8137387633323669, "step": 108 }, { "epoch": 0.031876005263927476, "grad_norm": 1.082436003075408, "learning_rate": 6.31578947368421e-06, "loss": 1.0641593933105469, "step": 109 }, { "epoch": 0.032168445679192866, "grad_norm": 1.0363310086535433, "learning_rate": 6.374269005847954e-06, "loss": 0.9647193551063538, "step": 110 }, { "epoch": 0.032460886094458255, "grad_norm": 1.1062097211432278, "learning_rate": 6.432748538011696e-06, "loss": 0.9693200588226318, "step": 111 }, { "epoch": 0.032753326509723645, "grad_norm": 1.145031857661525, "learning_rate": 6.491228070175439e-06, "loss": 0.9600590467453003, "step": 112 }, { "epoch": 0.033045766924989034, "grad_norm": 1.0203404188427831, "learning_rate": 6.549707602339181e-06, "loss": 0.8908880949020386, "step": 113 }, { "epoch": 0.033338207340254424, "grad_norm": 1.2162435709165451, "learning_rate": 6.608187134502925e-06, "loss": 0.9803124666213989, "step": 114 }, { "epoch": 0.03363064775551981, "grad_norm": 1.1738875143751093, "learning_rate": 6.666666666666667e-06, "loss": 0.8288271427154541, "step": 115 }, { "epoch": 0.0339230881707852, "grad_norm": 0.9490473067752526, "learning_rate": 6.72514619883041e-06, "loss": 0.7203798890113831, "step": 116 }, { "epoch": 0.03421552858605059, "grad_norm": 1.0046253156347025, "learning_rate": 6.783625730994152e-06, "loss": 0.7670629024505615, "step": 117 }, { "epoch": 0.03450796900131598, "grad_norm": 1.0563125407630551, "learning_rate": 6.842105263157896e-06, "loss": 0.8487929105758667, "step": 118 }, { "epoch": 0.03480040941658137, "grad_norm": 1.1292147521599132, "learning_rate": 6.9005847953216375e-06, "loss": 0.8332704305648804, "step": 119 }, { "epoch": 0.03509284983184676, "grad_norm": 1.2138847310663696, "learning_rate": 6.959064327485381e-06, "loss": 0.9984017610549927, "step": 120 }, { "epoch": 0.03538529024711215, "grad_norm": 1.126543099330432, "learning_rate": 7.017543859649123e-06, "loss": 0.788459062576294, "step": 121 }, { "epoch": 0.03567773066237754, "grad_norm": 1.5166585395762038, "learning_rate": 7.0760233918128665e-06, "loss": 1.0288443565368652, "step": 122 }, { "epoch": 0.03597017107764293, "grad_norm": 1.0086777607738802, "learning_rate": 7.134502923976608e-06, "loss": 0.7939552664756775, "step": 123 }, { "epoch": 0.03626261149290832, "grad_norm": 1.0254521267017753, "learning_rate": 7.192982456140352e-06, "loss": 0.8816506862640381, "step": 124 }, { "epoch": 0.03655505190817371, "grad_norm": 1.0223917066157164, "learning_rate": 7.251461988304094e-06, "loss": 0.8864353895187378, "step": 125 }, { "epoch": 0.0368474923234391, "grad_norm": 1.2363556273996017, "learning_rate": 7.309941520467837e-06, "loss": 0.9817954897880554, "step": 126 }, { "epoch": 0.03713993273870449, "grad_norm": 1.0757650534793346, "learning_rate": 7.368421052631579e-06, "loss": 0.8423842787742615, "step": 127 }, { "epoch": 0.037432373153969876, "grad_norm": 1.1636915661730252, "learning_rate": 7.426900584795322e-06, "loss": 0.8375135660171509, "step": 128 }, { "epoch": 0.037724813569235266, "grad_norm": 1.2215328884976426, "learning_rate": 7.485380116959065e-06, "loss": 0.9105685949325562, "step": 129 }, { "epoch": 0.038017253984500655, "grad_norm": 1.1346801425180852, "learning_rate": 7.5438596491228074e-06, "loss": 0.8784557580947876, "step": 130 }, { "epoch": 0.038309694399766045, "grad_norm": 1.0071578019284073, "learning_rate": 7.60233918128655e-06, "loss": 0.7557879686355591, "step": 131 }, { "epoch": 0.038602134815031434, "grad_norm": 1.228942961434803, "learning_rate": 7.660818713450294e-06, "loss": 0.8966819047927856, "step": 132 }, { "epoch": 0.038894575230296824, "grad_norm": 1.0961114842309465, "learning_rate": 7.719298245614036e-06, "loss": 0.7642185091972351, "step": 133 }, { "epoch": 0.03918701564556221, "grad_norm": 1.062961529950125, "learning_rate": 7.77777777777778e-06, "loss": 0.8313230276107788, "step": 134 }, { "epoch": 0.0394794560608276, "grad_norm": 1.3350623914867434, "learning_rate": 7.836257309941521e-06, "loss": 0.8388677835464478, "step": 135 }, { "epoch": 0.039771896476093, "grad_norm": 1.2027686314521255, "learning_rate": 7.894736842105265e-06, "loss": 0.9065952301025391, "step": 136 }, { "epoch": 0.04006433689135839, "grad_norm": 1.123144368922916, "learning_rate": 7.953216374269006e-06, "loss": 0.8153767585754395, "step": 137 }, { "epoch": 0.04035677730662378, "grad_norm": 1.163761684167935, "learning_rate": 8.01169590643275e-06, "loss": 0.8976421356201172, "step": 138 }, { "epoch": 0.04064921772188917, "grad_norm": 1.1354333989669174, "learning_rate": 8.070175438596492e-06, "loss": 0.7360264658927917, "step": 139 }, { "epoch": 0.04094165813715456, "grad_norm": 1.1009203930924998, "learning_rate": 8.128654970760235e-06, "loss": 0.8442148566246033, "step": 140 }, { "epoch": 0.04123409855241995, "grad_norm": 1.0872796831159965, "learning_rate": 8.187134502923977e-06, "loss": 0.6541435718536377, "step": 141 }, { "epoch": 0.041526538967685336, "grad_norm": 1.2792221696979318, "learning_rate": 8.24561403508772e-06, "loss": 0.7492353916168213, "step": 142 }, { "epoch": 0.041818979382950726, "grad_norm": 1.0406728730985955, "learning_rate": 8.304093567251463e-06, "loss": 0.6681893467903137, "step": 143 }, { "epoch": 0.042111419798216115, "grad_norm": 1.2507905783247102, "learning_rate": 8.362573099415205e-06, "loss": 0.8384866714477539, "step": 144 }, { "epoch": 0.042403860213481505, "grad_norm": 1.125680624680095, "learning_rate": 8.421052631578948e-06, "loss": 0.8338214159011841, "step": 145 }, { "epoch": 0.042696300628746894, "grad_norm": 1.3441065562284606, "learning_rate": 8.47953216374269e-06, "loss": 0.8549021482467651, "step": 146 }, { "epoch": 0.042988741044012284, "grad_norm": 1.0226139512096055, "learning_rate": 8.538011695906434e-06, "loss": 0.8324464559555054, "step": 147 }, { "epoch": 0.04328118145927767, "grad_norm": 1.3742681865566602, "learning_rate": 8.596491228070176e-06, "loss": 0.9247474670410156, "step": 148 }, { "epoch": 0.04357362187454306, "grad_norm": 1.3295257009133983, "learning_rate": 8.654970760233919e-06, "loss": 0.8488880395889282, "step": 149 }, { "epoch": 0.04386606228980845, "grad_norm": 1.244174459745273, "learning_rate": 8.713450292397661e-06, "loss": 0.7844473123550415, "step": 150 }, { "epoch": 0.04415850270507384, "grad_norm": 1.3605735346558072, "learning_rate": 8.771929824561405e-06, "loss": 1.0540976524353027, "step": 151 }, { "epoch": 0.04445094312033923, "grad_norm": 1.096092225329518, "learning_rate": 8.830409356725146e-06, "loss": 0.7919446229934692, "step": 152 }, { "epoch": 0.04474338353560462, "grad_norm": 1.1577837223865697, "learning_rate": 8.888888888888888e-06, "loss": 0.818670928478241, "step": 153 }, { "epoch": 0.04503582395087001, "grad_norm": 1.4320201209257988, "learning_rate": 8.947368421052632e-06, "loss": 0.8491114377975464, "step": 154 }, { "epoch": 0.0453282643661354, "grad_norm": 1.8326606844764444, "learning_rate": 9.005847953216374e-06, "loss": 0.660563588142395, "step": 155 }, { "epoch": 0.04562070478140079, "grad_norm": 1.1838649114458772, "learning_rate": 9.064327485380117e-06, "loss": 0.8559159636497498, "step": 156 }, { "epoch": 0.04591314519666618, "grad_norm": 1.0968958293675206, "learning_rate": 9.12280701754386e-06, "loss": 0.8478386402130127, "step": 157 }, { "epoch": 0.04620558561193157, "grad_norm": 1.1272218094040445, "learning_rate": 9.181286549707603e-06, "loss": 0.758915901184082, "step": 158 }, { "epoch": 0.04649802602719696, "grad_norm": 1.3159367769875163, "learning_rate": 9.239766081871345e-06, "loss": 0.773307204246521, "step": 159 }, { "epoch": 0.04679046644246235, "grad_norm": 1.29739510285095, "learning_rate": 9.298245614035088e-06, "loss": 0.8948490023612976, "step": 160 }, { "epoch": 0.04708290685772774, "grad_norm": 1.2170406448830853, "learning_rate": 9.35672514619883e-06, "loss": 0.83086097240448, "step": 161 }, { "epoch": 0.047375347272993126, "grad_norm": 1.474814122834776, "learning_rate": 9.415204678362574e-06, "loss": 0.7683168649673462, "step": 162 }, { "epoch": 0.047667787688258516, "grad_norm": 1.2546637555360107, "learning_rate": 9.473684210526315e-06, "loss": 0.9267748594284058, "step": 163 }, { "epoch": 0.047960228103523905, "grad_norm": 1.1945733924353639, "learning_rate": 9.532163742690059e-06, "loss": 0.9243365526199341, "step": 164 }, { "epoch": 0.048252668518789295, "grad_norm": 1.1508961292698372, "learning_rate": 9.590643274853801e-06, "loss": 0.7841176986694336, "step": 165 }, { "epoch": 0.048545108934054684, "grad_norm": 1.1853174404309834, "learning_rate": 9.649122807017545e-06, "loss": 0.8318643569946289, "step": 166 }, { "epoch": 0.048837549349320074, "grad_norm": 1.3089312801161905, "learning_rate": 9.707602339181286e-06, "loss": 0.866286039352417, "step": 167 }, { "epoch": 0.04912998976458546, "grad_norm": 1.32215003396801, "learning_rate": 9.76608187134503e-06, "loss": 0.8232241868972778, "step": 168 }, { "epoch": 0.04942243017985085, "grad_norm": 1.4759162272800292, "learning_rate": 9.824561403508772e-06, "loss": 0.874968945980072, "step": 169 }, { "epoch": 0.04971487059511624, "grad_norm": 1.3247540509223557, "learning_rate": 9.883040935672515e-06, "loss": 0.9048999547958374, "step": 170 }, { "epoch": 0.05000731101038163, "grad_norm": 1.4647995646715117, "learning_rate": 9.941520467836257e-06, "loss": 0.9220215082168579, "step": 171 }, { "epoch": 0.05029975142564702, "grad_norm": 1.3290504006044366, "learning_rate": 1e-05, "loss": 0.8326996564865112, "step": 172 }, { "epoch": 0.05059219184091241, "grad_norm": 1.0687285940591045, "learning_rate": 1.0058479532163743e-05, "loss": 0.8023662567138672, "step": 173 }, { "epoch": 0.05088463225617781, "grad_norm": 1.4370267362244613, "learning_rate": 1.0116959064327488e-05, "loss": 0.9172271490097046, "step": 174 }, { "epoch": 0.0511770726714432, "grad_norm": 1.2538172153184461, "learning_rate": 1.017543859649123e-05, "loss": 0.8016377687454224, "step": 175 }, { "epoch": 0.051469513086708586, "grad_norm": 1.1436252675754246, "learning_rate": 1.0233918128654972e-05, "loss": 0.7656369805335999, "step": 176 }, { "epoch": 0.051761953501973976, "grad_norm": 1.1951944941269466, "learning_rate": 1.0292397660818714e-05, "loss": 0.7769640684127808, "step": 177 }, { "epoch": 0.052054393917239365, "grad_norm": 1.3791114600068226, "learning_rate": 1.0350877192982459e-05, "loss": 0.9830589294433594, "step": 178 }, { "epoch": 0.052346834332504755, "grad_norm": 1.1501081025808126, "learning_rate": 1.04093567251462e-05, "loss": 0.8002523183822632, "step": 179 }, { "epoch": 0.052639274747770144, "grad_norm": 1.3726838653365003, "learning_rate": 1.0467836257309943e-05, "loss": 0.879243016242981, "step": 180 }, { "epoch": 0.052931715163035534, "grad_norm": 1.2863425151805854, "learning_rate": 1.0526315789473684e-05, "loss": 0.7266525030136108, "step": 181 }, { "epoch": 0.05322415557830092, "grad_norm": 1.350994010752117, "learning_rate": 1.0584795321637428e-05, "loss": 0.784702479839325, "step": 182 }, { "epoch": 0.05351659599356631, "grad_norm": 1.415897619399055, "learning_rate": 1.0643274853801172e-05, "loss": 0.8419734239578247, "step": 183 }, { "epoch": 0.0538090364088317, "grad_norm": 1.201782404599289, "learning_rate": 1.0701754385964913e-05, "loss": 0.8462855815887451, "step": 184 }, { "epoch": 0.05410147682409709, "grad_norm": 1.361501494219251, "learning_rate": 1.0760233918128655e-05, "loss": 0.8888737559318542, "step": 185 }, { "epoch": 0.05439391723936248, "grad_norm": 1.3305576553150047, "learning_rate": 1.0818713450292399e-05, "loss": 0.8063781261444092, "step": 186 }, { "epoch": 0.05468635765462787, "grad_norm": 1.2109684966022718, "learning_rate": 1.0877192982456142e-05, "loss": 0.7981499433517456, "step": 187 }, { "epoch": 0.05497879806989326, "grad_norm": 1.5415785509759563, "learning_rate": 1.0935672514619884e-05, "loss": 0.8474490642547607, "step": 188 }, { "epoch": 0.05527123848515865, "grad_norm": 1.300197838887535, "learning_rate": 1.0994152046783626e-05, "loss": 0.818732500076294, "step": 189 }, { "epoch": 0.05556367890042404, "grad_norm": 1.3192619521811115, "learning_rate": 1.105263157894737e-05, "loss": 0.7660291194915771, "step": 190 }, { "epoch": 0.05585611931568943, "grad_norm": 1.2626389127660034, "learning_rate": 1.1111111111111113e-05, "loss": 0.8240147233009338, "step": 191 }, { "epoch": 0.05614855973095482, "grad_norm": 1.340830231936402, "learning_rate": 1.1169590643274855e-05, "loss": 0.9377203583717346, "step": 192 }, { "epoch": 0.05644100014622021, "grad_norm": 1.416661564809907, "learning_rate": 1.1228070175438597e-05, "loss": 0.8662704229354858, "step": 193 }, { "epoch": 0.0567334405614856, "grad_norm": 1.3274611257173192, "learning_rate": 1.128654970760234e-05, "loss": 0.717308759689331, "step": 194 }, { "epoch": 0.05702588097675099, "grad_norm": 1.1942152308113003, "learning_rate": 1.1345029239766083e-05, "loss": 0.8538037538528442, "step": 195 }, { "epoch": 0.057318321392016376, "grad_norm": 1.4411136610170212, "learning_rate": 1.1403508771929826e-05, "loss": 0.9016960859298706, "step": 196 }, { "epoch": 0.057610761807281766, "grad_norm": 1.4664426354083508, "learning_rate": 1.1461988304093568e-05, "loss": 0.9313502311706543, "step": 197 }, { "epoch": 0.057903202222547155, "grad_norm": 1.2885330427126278, "learning_rate": 1.1520467836257312e-05, "loss": 0.7330124974250793, "step": 198 }, { "epoch": 0.058195642637812545, "grad_norm": 1.272277327326545, "learning_rate": 1.1578947368421053e-05, "loss": 0.8904056549072266, "step": 199 }, { "epoch": 0.058488083053077934, "grad_norm": 1.4761275028472136, "learning_rate": 1.1637426900584797e-05, "loss": 0.7816377878189087, "step": 200 }, { "epoch": 0.058780523468343324, "grad_norm": 1.3244130760300052, "learning_rate": 1.1695906432748539e-05, "loss": 0.7109910249710083, "step": 201 }, { "epoch": 0.05907296388360871, "grad_norm": 1.499082853070359, "learning_rate": 1.1754385964912282e-05, "loss": 0.7657924890518188, "step": 202 }, { "epoch": 0.0593654042988741, "grad_norm": 1.5632309821036996, "learning_rate": 1.1812865497076024e-05, "loss": 0.8521978259086609, "step": 203 }, { "epoch": 0.05965784471413949, "grad_norm": 1.3625729366507646, "learning_rate": 1.1871345029239766e-05, "loss": 0.7558364868164062, "step": 204 }, { "epoch": 0.05995028512940488, "grad_norm": 1.3362044158661328, "learning_rate": 1.192982456140351e-05, "loss": 0.8488497734069824, "step": 205 }, { "epoch": 0.06024272554467027, "grad_norm": 1.5823695803446844, "learning_rate": 1.1988304093567253e-05, "loss": 0.7905591726303101, "step": 206 }, { "epoch": 0.06053516595993566, "grad_norm": 1.324069880941127, "learning_rate": 1.2046783625730995e-05, "loss": 0.747936487197876, "step": 207 }, { "epoch": 0.06082760637520105, "grad_norm": 1.3370127883002023, "learning_rate": 1.2105263157894737e-05, "loss": 0.8653486967086792, "step": 208 }, { "epoch": 0.06112004679046644, "grad_norm": 1.295171295812896, "learning_rate": 1.216374269005848e-05, "loss": 0.8662437200546265, "step": 209 }, { "epoch": 0.06141248720573183, "grad_norm": 1.6369328366726996, "learning_rate": 1.2222222222222224e-05, "loss": 0.9567133188247681, "step": 210 }, { "epoch": 0.06170492762099722, "grad_norm": 1.4011109813275144, "learning_rate": 1.2280701754385966e-05, "loss": 0.8994660377502441, "step": 211 }, { "epoch": 0.06199736803626261, "grad_norm": 1.2989562892904951, "learning_rate": 1.2339181286549708e-05, "loss": 0.7889316082000732, "step": 212 }, { "epoch": 0.062289808451528005, "grad_norm": 1.2266327731037636, "learning_rate": 1.239766081871345e-05, "loss": 0.883985161781311, "step": 213 }, { "epoch": 0.0625822488667934, "grad_norm": 1.2190679056716556, "learning_rate": 1.2456140350877195e-05, "loss": 0.7780495882034302, "step": 214 }, { "epoch": 0.06287468928205878, "grad_norm": 1.3596314866008754, "learning_rate": 1.2514619883040937e-05, "loss": 0.6514906883239746, "step": 215 }, { "epoch": 0.06316712969732417, "grad_norm": 1.3008367711622892, "learning_rate": 1.2573099415204679e-05, "loss": 0.750559389591217, "step": 216 }, { "epoch": 0.06345957011258956, "grad_norm": 1.4761536100726258, "learning_rate": 1.263157894736842e-05, "loss": 0.8330573439598083, "step": 217 }, { "epoch": 0.06375201052785495, "grad_norm": 1.4144186396910836, "learning_rate": 1.2690058479532166e-05, "loss": 0.8075361847877502, "step": 218 }, { "epoch": 0.06404445094312033, "grad_norm": 1.2867265784947997, "learning_rate": 1.2748538011695908e-05, "loss": 0.7636772394180298, "step": 219 }, { "epoch": 0.06433689135838573, "grad_norm": 1.1905704140813884, "learning_rate": 1.280701754385965e-05, "loss": 0.8241903185844421, "step": 220 }, { "epoch": 0.06462933177365111, "grad_norm": 1.261461662230418, "learning_rate": 1.2865497076023392e-05, "loss": 0.6582514047622681, "step": 221 }, { "epoch": 0.06492177218891651, "grad_norm": 1.461492259499335, "learning_rate": 1.2923976608187137e-05, "loss": 0.6363992691040039, "step": 222 }, { "epoch": 0.06521421260418189, "grad_norm": 1.5776709499534403, "learning_rate": 1.2982456140350879e-05, "loss": 0.8093860149383545, "step": 223 }, { "epoch": 0.06550665301944729, "grad_norm": 1.5281675606912017, "learning_rate": 1.304093567251462e-05, "loss": 0.7719511985778809, "step": 224 }, { "epoch": 0.06579909343471267, "grad_norm": 1.4484434101459598, "learning_rate": 1.3099415204678362e-05, "loss": 0.8314809799194336, "step": 225 }, { "epoch": 0.06609153384997807, "grad_norm": 1.3751378156667435, "learning_rate": 1.3157894736842108e-05, "loss": 0.8752902746200562, "step": 226 }, { "epoch": 0.06638397426524345, "grad_norm": 1.4660956062146326, "learning_rate": 1.321637426900585e-05, "loss": 0.7564839124679565, "step": 227 }, { "epoch": 0.06667641468050885, "grad_norm": 1.6744274403459947, "learning_rate": 1.3274853801169591e-05, "loss": 0.7377971410751343, "step": 228 }, { "epoch": 0.06696885509577423, "grad_norm": 1.3046915227989528, "learning_rate": 1.3333333333333333e-05, "loss": 0.7298087477684021, "step": 229 }, { "epoch": 0.06726129551103963, "grad_norm": 1.4026797729918719, "learning_rate": 1.3391812865497079e-05, "loss": 0.7291176915168762, "step": 230 }, { "epoch": 0.06755373592630501, "grad_norm": 1.3421785664914363, "learning_rate": 1.345029239766082e-05, "loss": 0.8226944208145142, "step": 231 }, { "epoch": 0.0678461763415704, "grad_norm": 1.4277073905518047, "learning_rate": 1.3508771929824562e-05, "loss": 0.7185185551643372, "step": 232 }, { "epoch": 0.0681386167568358, "grad_norm": 1.2950151686673683, "learning_rate": 1.3567251461988304e-05, "loss": 0.7028212547302246, "step": 233 }, { "epoch": 0.06843105717210118, "grad_norm": 1.6157016450339874, "learning_rate": 1.362573099415205e-05, "loss": 0.8809897899627686, "step": 234 }, { "epoch": 0.06872349758736658, "grad_norm": 1.388536739112073, "learning_rate": 1.3684210526315791e-05, "loss": 0.7779085040092468, "step": 235 }, { "epoch": 0.06901593800263196, "grad_norm": 1.5070530641919806, "learning_rate": 1.3742690058479533e-05, "loss": 0.731019139289856, "step": 236 }, { "epoch": 0.06930837841789736, "grad_norm": 1.4005389899518954, "learning_rate": 1.3801169590643275e-05, "loss": 0.7495850920677185, "step": 237 }, { "epoch": 0.06960081883316274, "grad_norm": 1.2241508662035476, "learning_rate": 1.385964912280702e-05, "loss": 0.7018189430236816, "step": 238 }, { "epoch": 0.06989325924842814, "grad_norm": 1.2596692368793962, "learning_rate": 1.3918128654970762e-05, "loss": 0.7072417736053467, "step": 239 }, { "epoch": 0.07018569966369352, "grad_norm": 1.3606864903220994, "learning_rate": 1.3976608187134504e-05, "loss": 0.8125720620155334, "step": 240 }, { "epoch": 0.07047814007895892, "grad_norm": 1.442924901417446, "learning_rate": 1.4035087719298246e-05, "loss": 0.6101655960083008, "step": 241 }, { "epoch": 0.0707705804942243, "grad_norm": 1.3725413795436465, "learning_rate": 1.409356725146199e-05, "loss": 0.9005568623542786, "step": 242 }, { "epoch": 0.0710630209094897, "grad_norm": 1.4215646059439664, "learning_rate": 1.4152046783625733e-05, "loss": 0.7678338289260864, "step": 243 }, { "epoch": 0.07135546132475508, "grad_norm": 1.4745728838056915, "learning_rate": 1.4210526315789475e-05, "loss": 0.7563410997390747, "step": 244 }, { "epoch": 0.07164790174002048, "grad_norm": 1.3043448641122064, "learning_rate": 1.4269005847953217e-05, "loss": 0.7497583627700806, "step": 245 }, { "epoch": 0.07194034215528586, "grad_norm": 1.8237088246729396, "learning_rate": 1.432748538011696e-05, "loss": 0.8913442492485046, "step": 246 }, { "epoch": 0.07223278257055125, "grad_norm": 1.446976759622428, "learning_rate": 1.4385964912280704e-05, "loss": 0.7714704871177673, "step": 247 }, { "epoch": 0.07252522298581664, "grad_norm": 1.4721214924941617, "learning_rate": 1.4444444444444446e-05, "loss": 0.6752789616584778, "step": 248 }, { "epoch": 0.07281766340108203, "grad_norm": 1.4015875441769006, "learning_rate": 1.4502923976608188e-05, "loss": 0.6092795133590698, "step": 249 }, { "epoch": 0.07311010381634742, "grad_norm": 1.4602535650914903, "learning_rate": 1.4561403508771931e-05, "loss": 0.9300343990325928, "step": 250 }, { "epoch": 0.07340254423161281, "grad_norm": 1.3884630911660603, "learning_rate": 1.4619883040935675e-05, "loss": 0.8005613088607788, "step": 251 }, { "epoch": 0.0736949846468782, "grad_norm": 1.2918508056771596, "learning_rate": 1.4678362573099417e-05, "loss": 0.7188931703567505, "step": 252 }, { "epoch": 0.07398742506214359, "grad_norm": 1.3258314938186555, "learning_rate": 1.4736842105263159e-05, "loss": 0.6967242956161499, "step": 253 }, { "epoch": 0.07427986547740897, "grad_norm": 1.300875000270566, "learning_rate": 1.4795321637426902e-05, "loss": 0.6921653747558594, "step": 254 }, { "epoch": 0.07457230589267437, "grad_norm": 1.4258732788152875, "learning_rate": 1.4853801169590644e-05, "loss": 0.8498743772506714, "step": 255 }, { "epoch": 0.07486474630793975, "grad_norm": 1.4311730434285577, "learning_rate": 1.4912280701754388e-05, "loss": 0.6420027017593384, "step": 256 }, { "epoch": 0.07515718672320515, "grad_norm": 1.3747073212413874, "learning_rate": 1.497076023391813e-05, "loss": 0.7101434469223022, "step": 257 }, { "epoch": 0.07544962713847053, "grad_norm": 1.562801712624193, "learning_rate": 1.5029239766081873e-05, "loss": 0.740740180015564, "step": 258 }, { "epoch": 0.07574206755373593, "grad_norm": 1.726645998674187, "learning_rate": 1.5087719298245615e-05, "loss": 0.891905665397644, "step": 259 }, { "epoch": 0.07603450796900131, "grad_norm": 1.5486677390214905, "learning_rate": 1.5146198830409358e-05, "loss": 0.867740273475647, "step": 260 }, { "epoch": 0.07632694838426671, "grad_norm": 1.5072500165891534, "learning_rate": 1.52046783625731e-05, "loss": 0.7895220518112183, "step": 261 }, { "epoch": 0.07661938879953209, "grad_norm": 1.5579945503860015, "learning_rate": 1.5263157894736846e-05, "loss": 0.7987008094787598, "step": 262 }, { "epoch": 0.07691182921479749, "grad_norm": 1.4014455476427317, "learning_rate": 1.5321637426900587e-05, "loss": 0.7780282497406006, "step": 263 }, { "epoch": 0.07720426963006287, "grad_norm": 1.2290290646079385, "learning_rate": 1.538011695906433e-05, "loss": 0.6265891194343567, "step": 264 }, { "epoch": 0.07749671004532827, "grad_norm": 1.4917276843875658, "learning_rate": 1.543859649122807e-05, "loss": 0.6559646129608154, "step": 265 }, { "epoch": 0.07778915046059365, "grad_norm": 1.4406503206723986, "learning_rate": 1.5497076023391816e-05, "loss": 0.8362047672271729, "step": 266 }, { "epoch": 0.07808159087585904, "grad_norm": 1.481487764499426, "learning_rate": 1.555555555555556e-05, "loss": 0.707663357257843, "step": 267 }, { "epoch": 0.07837403129112443, "grad_norm": 1.398507930714671, "learning_rate": 1.56140350877193e-05, "loss": 0.67903071641922, "step": 268 }, { "epoch": 0.07866647170638982, "grad_norm": 1.3187056037490035, "learning_rate": 1.5672514619883042e-05, "loss": 0.7634894251823425, "step": 269 }, { "epoch": 0.0789589121216552, "grad_norm": 1.3791372975152867, "learning_rate": 1.5730994152046787e-05, "loss": 0.6395117044448853, "step": 270 }, { "epoch": 0.0792513525369206, "grad_norm": 1.4273746235266698, "learning_rate": 1.578947368421053e-05, "loss": 0.6948165893554688, "step": 271 }, { "epoch": 0.079543792952186, "grad_norm": 1.342718294320327, "learning_rate": 1.584795321637427e-05, "loss": 0.9288383722305298, "step": 272 }, { "epoch": 0.07983623336745138, "grad_norm": 1.4727633207578312, "learning_rate": 1.5906432748538013e-05, "loss": 0.9291346073150635, "step": 273 }, { "epoch": 0.08012867378271678, "grad_norm": 1.3613936763496384, "learning_rate": 1.5964912280701755e-05, "loss": 0.7399512529373169, "step": 274 }, { "epoch": 0.08042111419798216, "grad_norm": 1.5856072060707183, "learning_rate": 1.60233918128655e-05, "loss": 0.6890764236450195, "step": 275 }, { "epoch": 0.08071355461324756, "grad_norm": 1.1844012071470522, "learning_rate": 1.6081871345029242e-05, "loss": 0.6520324349403381, "step": 276 }, { "epoch": 0.08100599502851294, "grad_norm": 1.4161353486782806, "learning_rate": 1.6140350877192984e-05, "loss": 0.6726658344268799, "step": 277 }, { "epoch": 0.08129843544377834, "grad_norm": 1.5076627116667636, "learning_rate": 1.6198830409356726e-05, "loss": 0.7453294992446899, "step": 278 }, { "epoch": 0.08159087585904372, "grad_norm": 1.6796077609043067, "learning_rate": 1.625730994152047e-05, "loss": 0.755578875541687, "step": 279 }, { "epoch": 0.08188331627430911, "grad_norm": 1.576837195920435, "learning_rate": 1.6315789473684213e-05, "loss": 0.713086724281311, "step": 280 }, { "epoch": 0.0821757566895745, "grad_norm": 1.5223162841340931, "learning_rate": 1.6374269005847955e-05, "loss": 0.8714310526847839, "step": 281 }, { "epoch": 0.0824681971048399, "grad_norm": 1.4999918578300349, "learning_rate": 1.6432748538011697e-05, "loss": 0.6827348470687866, "step": 282 }, { "epoch": 0.08276063752010528, "grad_norm": 1.5263417760460645, "learning_rate": 1.649122807017544e-05, "loss": 0.8613482713699341, "step": 283 }, { "epoch": 0.08305307793537067, "grad_norm": 1.3847261162959308, "learning_rate": 1.6549707602339184e-05, "loss": 0.7442763447761536, "step": 284 }, { "epoch": 0.08334551835063606, "grad_norm": 1.3784508201309091, "learning_rate": 1.6608187134502926e-05, "loss": 0.7505494356155396, "step": 285 }, { "epoch": 0.08363795876590145, "grad_norm": 1.3042392110114591, "learning_rate": 1.6666666666666667e-05, "loss": 0.7720779776573181, "step": 286 }, { "epoch": 0.08393039918116683, "grad_norm": 1.5516828033558783, "learning_rate": 1.672514619883041e-05, "loss": 0.7746216654777527, "step": 287 }, { "epoch": 0.08422283959643223, "grad_norm": 1.4429865955911445, "learning_rate": 1.6783625730994155e-05, "loss": 0.8471436500549316, "step": 288 }, { "epoch": 0.08451528001169761, "grad_norm": 1.4116704654777366, "learning_rate": 1.6842105263157896e-05, "loss": 0.7117248773574829, "step": 289 }, { "epoch": 0.08480772042696301, "grad_norm": 1.4428575448924124, "learning_rate": 1.690058479532164e-05, "loss": 0.758680522441864, "step": 290 }, { "epoch": 0.08510016084222839, "grad_norm": 1.4632326474117294, "learning_rate": 1.695906432748538e-05, "loss": 0.9083560705184937, "step": 291 }, { "epoch": 0.08539260125749379, "grad_norm": 1.3444847997489586, "learning_rate": 1.7017543859649125e-05, "loss": 0.7457551956176758, "step": 292 }, { "epoch": 0.08568504167275917, "grad_norm": 1.423532632485526, "learning_rate": 1.7076023391812867e-05, "loss": 0.7463638782501221, "step": 293 }, { "epoch": 0.08597748208802457, "grad_norm": 1.4584931442713187, "learning_rate": 1.713450292397661e-05, "loss": 0.6983559131622314, "step": 294 }, { "epoch": 0.08626992250328995, "grad_norm": 1.3612667828489424, "learning_rate": 1.719298245614035e-05, "loss": 0.8043842911720276, "step": 295 }, { "epoch": 0.08656236291855535, "grad_norm": 1.5042924331122234, "learning_rate": 1.7251461988304093e-05, "loss": 0.7150747776031494, "step": 296 }, { "epoch": 0.08685480333382073, "grad_norm": 2.0308017082996326, "learning_rate": 1.7309941520467838e-05, "loss": 0.7805558443069458, "step": 297 }, { "epoch": 0.08714724374908613, "grad_norm": 1.4326584270734728, "learning_rate": 1.736842105263158e-05, "loss": 0.7158486843109131, "step": 298 }, { "epoch": 0.08743968416435151, "grad_norm": 1.2329719748746066, "learning_rate": 1.7426900584795322e-05, "loss": 0.6496458053588867, "step": 299 }, { "epoch": 0.0877321245796169, "grad_norm": 1.3255444740397837, "learning_rate": 1.7485380116959064e-05, "loss": 0.7488506436347961, "step": 300 }, { "epoch": 0.08802456499488229, "grad_norm": 1.5658056782887144, "learning_rate": 1.754385964912281e-05, "loss": 0.8370999097824097, "step": 301 }, { "epoch": 0.08831700541014768, "grad_norm": 1.3342670844496862, "learning_rate": 1.760233918128655e-05, "loss": 0.6624353528022766, "step": 302 }, { "epoch": 0.08860944582541307, "grad_norm": 1.4627534576360353, "learning_rate": 1.7660818713450293e-05, "loss": 0.6861047148704529, "step": 303 }, { "epoch": 0.08890188624067846, "grad_norm": 1.6532053166188327, "learning_rate": 1.7719298245614035e-05, "loss": 0.746711015701294, "step": 304 }, { "epoch": 0.08919432665594385, "grad_norm": 1.554160121250669, "learning_rate": 1.7777777777777777e-05, "loss": 0.7794955968856812, "step": 305 }, { "epoch": 0.08948676707120924, "grad_norm": 1.7649976265227958, "learning_rate": 1.7836257309941522e-05, "loss": 0.7202489972114563, "step": 306 }, { "epoch": 0.08977920748647462, "grad_norm": 1.6262384567896693, "learning_rate": 1.7894736842105264e-05, "loss": 0.7252119183540344, "step": 307 }, { "epoch": 0.09007164790174002, "grad_norm": 1.5452508352574224, "learning_rate": 1.7953216374269006e-05, "loss": 0.9168737530708313, "step": 308 }, { "epoch": 0.0903640883170054, "grad_norm": 1.487069935429652, "learning_rate": 1.8011695906432747e-05, "loss": 0.7647944688796997, "step": 309 }, { "epoch": 0.0906565287322708, "grad_norm": 1.7447386842901849, "learning_rate": 1.8070175438596493e-05, "loss": 0.7836136817932129, "step": 310 }, { "epoch": 0.0909489691475362, "grad_norm": 1.2604562921756688, "learning_rate": 1.8128654970760235e-05, "loss": 0.6495587825775146, "step": 311 }, { "epoch": 0.09124140956280158, "grad_norm": 1.5613577023920442, "learning_rate": 1.8187134502923976e-05, "loss": 0.7266290187835693, "step": 312 }, { "epoch": 0.09153384997806698, "grad_norm": 1.9984801625992445, "learning_rate": 1.824561403508772e-05, "loss": 0.8417587876319885, "step": 313 }, { "epoch": 0.09182629039333236, "grad_norm": 1.5767499272635297, "learning_rate": 1.8304093567251464e-05, "loss": 0.8431564569473267, "step": 314 }, { "epoch": 0.09211873080859775, "grad_norm": 1.4390326104450535, "learning_rate": 1.8362573099415205e-05, "loss": 0.7724050283432007, "step": 315 }, { "epoch": 0.09241117122386314, "grad_norm": 1.4145032164176374, "learning_rate": 1.8421052631578947e-05, "loss": 0.6687352657318115, "step": 316 }, { "epoch": 0.09270361163912853, "grad_norm": 1.3696816256616517, "learning_rate": 1.847953216374269e-05, "loss": 0.7465454339981079, "step": 317 }, { "epoch": 0.09299605205439392, "grad_norm": 1.507661205433782, "learning_rate": 1.8538011695906434e-05, "loss": 0.6944088935852051, "step": 318 }, { "epoch": 0.09328849246965931, "grad_norm": 1.2922205760098913, "learning_rate": 1.8596491228070176e-05, "loss": 0.6692598462104797, "step": 319 }, { "epoch": 0.0935809328849247, "grad_norm": 1.4345621362788812, "learning_rate": 1.8654970760233918e-05, "loss": 0.7287981510162354, "step": 320 }, { "epoch": 0.09387337330019009, "grad_norm": 1.426362426046858, "learning_rate": 1.871345029239766e-05, "loss": 0.704437255859375, "step": 321 }, { "epoch": 0.09416581371545547, "grad_norm": 1.2757141813139592, "learning_rate": 1.8771929824561405e-05, "loss": 0.6425009965896606, "step": 322 }, { "epoch": 0.09445825413072087, "grad_norm": 1.4929466314279891, "learning_rate": 1.8830409356725147e-05, "loss": 0.765799880027771, "step": 323 }, { "epoch": 0.09475069454598625, "grad_norm": 1.482293870539422, "learning_rate": 1.888888888888889e-05, "loss": 0.9151520133018494, "step": 324 }, { "epoch": 0.09504313496125165, "grad_norm": 1.5087468194478204, "learning_rate": 1.894736842105263e-05, "loss": 0.8753486275672913, "step": 325 }, { "epoch": 0.09533557537651703, "grad_norm": 1.649363404228967, "learning_rate": 1.9005847953216376e-05, "loss": 0.7652826309204102, "step": 326 }, { "epoch": 0.09562801579178243, "grad_norm": 1.405975419146797, "learning_rate": 1.9064327485380118e-05, "loss": 0.7309015393257141, "step": 327 }, { "epoch": 0.09592045620704781, "grad_norm": 1.6766609888433524, "learning_rate": 1.912280701754386e-05, "loss": 0.7656553983688354, "step": 328 }, { "epoch": 0.09621289662231321, "grad_norm": 1.4942542074310006, "learning_rate": 1.9181286549707602e-05, "loss": 0.7400631904602051, "step": 329 }, { "epoch": 0.09650533703757859, "grad_norm": 1.4740815055784118, "learning_rate": 1.9239766081871347e-05, "loss": 0.6812465190887451, "step": 330 }, { "epoch": 0.09679777745284399, "grad_norm": 1.4394939888427052, "learning_rate": 1.929824561403509e-05, "loss": 0.6820628046989441, "step": 331 }, { "epoch": 0.09709021786810937, "grad_norm": 1.9824484648298863, "learning_rate": 1.935672514619883e-05, "loss": 0.7437758445739746, "step": 332 }, { "epoch": 0.09738265828337477, "grad_norm": 1.4755288186056683, "learning_rate": 1.9415204678362573e-05, "loss": 0.8011504411697388, "step": 333 }, { "epoch": 0.09767509869864015, "grad_norm": 1.3829561395962537, "learning_rate": 1.9473684210526318e-05, "loss": 0.7437810301780701, "step": 334 }, { "epoch": 0.09796753911390554, "grad_norm": 1.328838303483977, "learning_rate": 1.953216374269006e-05, "loss": 0.7419568300247192, "step": 335 }, { "epoch": 0.09825997952917093, "grad_norm": 1.4291436246188844, "learning_rate": 1.9590643274853802e-05, "loss": 0.7805042266845703, "step": 336 }, { "epoch": 0.09855241994443632, "grad_norm": 1.3104711543583085, "learning_rate": 1.9649122807017544e-05, "loss": 0.6952530145645142, "step": 337 }, { "epoch": 0.0988448603597017, "grad_norm": 1.313224719465845, "learning_rate": 1.970760233918129e-05, "loss": 0.7669289112091064, "step": 338 }, { "epoch": 0.0991373007749671, "grad_norm": 1.4101609769639065, "learning_rate": 1.976608187134503e-05, "loss": 0.8033919930458069, "step": 339 }, { "epoch": 0.09942974119023248, "grad_norm": 1.2883543538345825, "learning_rate": 1.9824561403508773e-05, "loss": 0.6523177623748779, "step": 340 }, { "epoch": 0.09972218160549788, "grad_norm": 1.3960808628411998, "learning_rate": 1.9883040935672515e-05, "loss": 0.7221896648406982, "step": 341 }, { "epoch": 0.10001462202076326, "grad_norm": 1.2255647850534943, "learning_rate": 1.994152046783626e-05, "loss": 0.6054700016975403, "step": 342 }, { "epoch": 0.10030706243602866, "grad_norm": 1.6303566611100393, "learning_rate": 2e-05, "loss": 0.8368290662765503, "step": 343 }, { "epoch": 0.10059950285129404, "grad_norm": 1.4276425594743465, "learning_rate": 1.99999988312804e-05, "loss": 0.9075677990913391, "step": 344 }, { "epoch": 0.10089194326655944, "grad_norm": 1.4517524210925274, "learning_rate": 1.999999532512188e-05, "loss": 0.7202495336532593, "step": 345 }, { "epoch": 0.10118438368182482, "grad_norm": 1.5340311782896001, "learning_rate": 1.9999989481525245e-05, "loss": 0.7373536229133606, "step": 346 }, { "epoch": 0.10147682409709022, "grad_norm": 1.3128585037330316, "learning_rate": 1.9999981300491873e-05, "loss": 0.7292035222053528, "step": 347 }, { "epoch": 0.10176926451235561, "grad_norm": 1.2681362139682877, "learning_rate": 1.9999970782023673e-05, "loss": 0.8970675468444824, "step": 348 }, { "epoch": 0.102061704927621, "grad_norm": 1.384714606589521, "learning_rate": 1.9999957926123104e-05, "loss": 0.7909846305847168, "step": 349 }, { "epoch": 0.1023541453428864, "grad_norm": 1.3537270396362884, "learning_rate": 1.999994273279317e-05, "loss": 0.7784097790718079, "step": 350 }, { "epoch": 0.10264658575815178, "grad_norm": 1.4008631296209513, "learning_rate": 1.9999925202037422e-05, "loss": 0.7129874229431152, "step": 351 }, { "epoch": 0.10293902617341717, "grad_norm": 1.3322666039831734, "learning_rate": 1.999990533385996e-05, "loss": 0.7185519337654114, "step": 352 }, { "epoch": 0.10323146658868255, "grad_norm": 1.379111892126872, "learning_rate": 1.9999883128265428e-05, "loss": 0.812228798866272, "step": 353 }, { "epoch": 0.10352390700394795, "grad_norm": 1.2831139743741589, "learning_rate": 1.999985858525901e-05, "loss": 0.7187886238098145, "step": 354 }, { "epoch": 0.10381634741921333, "grad_norm": 1.133776070922858, "learning_rate": 1.9999831704846452e-05, "loss": 0.6618789434432983, "step": 355 }, { "epoch": 0.10410878783447873, "grad_norm": 1.5601168208020613, "learning_rate": 1.999980248703403e-05, "loss": 0.9226458072662354, "step": 356 }, { "epoch": 0.10440122824974411, "grad_norm": 1.3702611517072447, "learning_rate": 1.9999770931828578e-05, "loss": 0.7326352596282959, "step": 357 }, { "epoch": 0.10469366866500951, "grad_norm": 1.4755549813416367, "learning_rate": 1.9999737039237472e-05, "loss": 0.719240128993988, "step": 358 }, { "epoch": 0.10498610908027489, "grad_norm": 1.2914576093532248, "learning_rate": 1.999970080926863e-05, "loss": 0.7380290031433105, "step": 359 }, { "epoch": 0.10527854949554029, "grad_norm": 1.6255135036531254, "learning_rate": 1.9999662241930523e-05, "loss": 0.736219048500061, "step": 360 }, { "epoch": 0.10557098991080567, "grad_norm": 1.381933387611508, "learning_rate": 1.999962133723217e-05, "loss": 0.8160735368728638, "step": 361 }, { "epoch": 0.10586343032607107, "grad_norm": 1.4607575491849774, "learning_rate": 1.9999578095183126e-05, "loss": 0.6679781675338745, "step": 362 }, { "epoch": 0.10615587074133645, "grad_norm": 1.551414308388604, "learning_rate": 1.9999532515793498e-05, "loss": 0.7670542001724243, "step": 363 }, { "epoch": 0.10644831115660185, "grad_norm": 1.2802491712211252, "learning_rate": 1.9999484599073945e-05, "loss": 0.6395057439804077, "step": 364 }, { "epoch": 0.10674075157186723, "grad_norm": 1.571289013739176, "learning_rate": 1.9999434345035666e-05, "loss": 0.7226368188858032, "step": 365 }, { "epoch": 0.10703319198713263, "grad_norm": 1.4755023089198305, "learning_rate": 1.9999381753690403e-05, "loss": 0.6236128211021423, "step": 366 }, { "epoch": 0.10732563240239801, "grad_norm": 1.2507526885979663, "learning_rate": 1.9999326825050455e-05, "loss": 0.5937299132347107, "step": 367 }, { "epoch": 0.1076180728176634, "grad_norm": 1.294239826855842, "learning_rate": 1.999926955912866e-05, "loss": 0.6014857292175293, "step": 368 }, { "epoch": 0.10791051323292879, "grad_norm": 1.1031323946933334, "learning_rate": 1.9999209955938394e-05, "loss": 0.5898704528808594, "step": 369 }, { "epoch": 0.10820295364819418, "grad_norm": 1.475520460275832, "learning_rate": 1.9999148015493602e-05, "loss": 0.6879048943519592, "step": 370 }, { "epoch": 0.10849539406345957, "grad_norm": 1.5235484717330832, "learning_rate": 1.999908373780876e-05, "loss": 0.781298041343689, "step": 371 }, { "epoch": 0.10878783447872496, "grad_norm": 1.2913472995661532, "learning_rate": 1.9999017122898886e-05, "loss": 0.6997531652450562, "step": 372 }, { "epoch": 0.10908027489399034, "grad_norm": 1.2104967688689228, "learning_rate": 1.9998948170779556e-05, "loss": 0.6979694366455078, "step": 373 }, { "epoch": 0.10937271530925574, "grad_norm": 1.6154905149339498, "learning_rate": 1.999887688146689e-05, "loss": 0.8069214820861816, "step": 374 }, { "epoch": 0.10966515572452112, "grad_norm": 1.4534879205249425, "learning_rate": 1.9998803254977538e-05, "loss": 0.875137448310852, "step": 375 }, { "epoch": 0.10995759613978652, "grad_norm": 1.4252221781216903, "learning_rate": 1.9998727291328725e-05, "loss": 0.8267173767089844, "step": 376 }, { "epoch": 0.1102500365550519, "grad_norm": 1.3704709368430794, "learning_rate": 1.99986489905382e-05, "loss": 0.7589337825775146, "step": 377 }, { "epoch": 0.1105424769703173, "grad_norm": 1.7248131297126135, "learning_rate": 1.999856835262427e-05, "loss": 0.7479992508888245, "step": 378 }, { "epoch": 0.11083491738558268, "grad_norm": 1.2827951417341936, "learning_rate": 1.999848537760577e-05, "loss": 0.7315084934234619, "step": 379 }, { "epoch": 0.11112735780084808, "grad_norm": 1.2954297558049002, "learning_rate": 1.9998400065502113e-05, "loss": 0.6256793737411499, "step": 380 }, { "epoch": 0.11141979821611346, "grad_norm": 1.3569633064170001, "learning_rate": 1.999831241633323e-05, "loss": 0.7521710395812988, "step": 381 }, { "epoch": 0.11171223863137886, "grad_norm": 1.0851029845548303, "learning_rate": 1.999822243011961e-05, "loss": 0.6824651956558228, "step": 382 }, { "epoch": 0.11200467904664424, "grad_norm": 1.4206429861314096, "learning_rate": 1.9998130106882286e-05, "loss": 0.7254977226257324, "step": 383 }, { "epoch": 0.11229711946190964, "grad_norm": 1.4795080730717471, "learning_rate": 1.999803544664284e-05, "loss": 0.8263741731643677, "step": 384 }, { "epoch": 0.11258955987717502, "grad_norm": 1.3096519492267191, "learning_rate": 1.9997938449423397e-05, "loss": 0.6829507350921631, "step": 385 }, { "epoch": 0.11288200029244042, "grad_norm": 1.2970935037264724, "learning_rate": 1.9997839115246632e-05, "loss": 0.7452428340911865, "step": 386 }, { "epoch": 0.11317444070770581, "grad_norm": 1.322513824449788, "learning_rate": 1.999773744413576e-05, "loss": 0.7900702953338623, "step": 387 }, { "epoch": 0.1134668811229712, "grad_norm": 1.288312120065537, "learning_rate": 1.9997633436114547e-05, "loss": 0.6215303540229797, "step": 388 }, { "epoch": 0.11375932153823659, "grad_norm": 1.3132613017546322, "learning_rate": 1.999752709120731e-05, "loss": 0.798041820526123, "step": 389 }, { "epoch": 0.11405176195350197, "grad_norm": 1.1590478323977431, "learning_rate": 1.9997418409438893e-05, "loss": 0.6033064126968384, "step": 390 }, { "epoch": 0.11434420236876737, "grad_norm": 1.0686988063553795, "learning_rate": 1.9997307390834712e-05, "loss": 0.6358453631401062, "step": 391 }, { "epoch": 0.11463664278403275, "grad_norm": 1.2775095189945147, "learning_rate": 1.999719403542071e-05, "loss": 0.6544308662414551, "step": 392 }, { "epoch": 0.11492908319929815, "grad_norm": 1.3305771925144483, "learning_rate": 1.9997078343223393e-05, "loss": 0.73077392578125, "step": 393 }, { "epoch": 0.11522152361456353, "grad_norm": 1.1914838503287841, "learning_rate": 1.9996960314269792e-05, "loss": 0.5874192118644714, "step": 394 }, { "epoch": 0.11551396402982893, "grad_norm": 1.420658082184349, "learning_rate": 1.9996839948587503e-05, "loss": 0.8242438435554504, "step": 395 }, { "epoch": 0.11580640444509431, "grad_norm": 1.705790457884444, "learning_rate": 1.9996717246204655e-05, "loss": 0.9496668577194214, "step": 396 }, { "epoch": 0.1160988448603597, "grad_norm": 1.2258839048083405, "learning_rate": 1.9996592207149933e-05, "loss": 0.6940287351608276, "step": 397 }, { "epoch": 0.11639128527562509, "grad_norm": 1.4226760671412086, "learning_rate": 1.999646483145256e-05, "loss": 0.7403827905654907, "step": 398 }, { "epoch": 0.11668372569089049, "grad_norm": 1.441557495225195, "learning_rate": 1.9996335119142315e-05, "loss": 0.7493172287940979, "step": 399 }, { "epoch": 0.11697616610615587, "grad_norm": 1.1233068749163333, "learning_rate": 1.9996203070249516e-05, "loss": 0.6048015356063843, "step": 400 }, { "epoch": 0.11726860652142126, "grad_norm": 1.218449987518831, "learning_rate": 1.9996068684805025e-05, "loss": 0.7220426797866821, "step": 401 }, { "epoch": 0.11756104693668665, "grad_norm": 1.4820269559236292, "learning_rate": 1.9995931962840255e-05, "loss": 0.7294620275497437, "step": 402 }, { "epoch": 0.11785348735195204, "grad_norm": 1.2693334480850886, "learning_rate": 1.999579290438717e-05, "loss": 0.7075647115707397, "step": 403 }, { "epoch": 0.11814592776721743, "grad_norm": 1.4353448940274405, "learning_rate": 1.9995651509478264e-05, "loss": 0.7396657466888428, "step": 404 }, { "epoch": 0.11843836818248282, "grad_norm": 1.5214596029668779, "learning_rate": 1.999550777814659e-05, "loss": 0.8240506649017334, "step": 405 }, { "epoch": 0.1187308085977482, "grad_norm": 1.3463253886040645, "learning_rate": 1.9995361710425752e-05, "loss": 0.7518147826194763, "step": 406 }, { "epoch": 0.1190232490130136, "grad_norm": 1.3938258800517485, "learning_rate": 1.9995213306349886e-05, "loss": 0.6998933553695679, "step": 407 }, { "epoch": 0.11931568942827898, "grad_norm": 2.8811625928277134, "learning_rate": 1.999506256595368e-05, "loss": 0.659205973148346, "step": 408 }, { "epoch": 0.11960812984354438, "grad_norm": 1.6815673603725616, "learning_rate": 1.9994909489272372e-05, "loss": 0.7826964259147644, "step": 409 }, { "epoch": 0.11990057025880976, "grad_norm": 1.4225942370637599, "learning_rate": 1.999475407634174e-05, "loss": 0.770768404006958, "step": 410 }, { "epoch": 0.12019301067407516, "grad_norm": 1.4031411556955713, "learning_rate": 1.9994596327198113e-05, "loss": 0.7390692234039307, "step": 411 }, { "epoch": 0.12048545108934054, "grad_norm": 1.238945633280151, "learning_rate": 1.999443624187836e-05, "loss": 0.7092628479003906, "step": 412 }, { "epoch": 0.12077789150460594, "grad_norm": 1.2795019723948553, "learning_rate": 1.9994273820419903e-05, "loss": 0.5252765417098999, "step": 413 }, { "epoch": 0.12107033191987132, "grad_norm": 1.389583747663469, "learning_rate": 1.9994109062860707e-05, "loss": 0.8131704330444336, "step": 414 }, { "epoch": 0.12136277233513672, "grad_norm": 1.490804798338551, "learning_rate": 1.9993941969239284e-05, "loss": 0.8257562518119812, "step": 415 }, { "epoch": 0.1216552127504021, "grad_norm": 1.5541597255876767, "learning_rate": 1.999377253959469e-05, "loss": 0.7163048982620239, "step": 416 }, { "epoch": 0.1219476531656675, "grad_norm": 1.590877283394053, "learning_rate": 1.9993600773966528e-05, "loss": 0.7216504812240601, "step": 417 }, { "epoch": 0.12224009358093288, "grad_norm": 1.6748981575800963, "learning_rate": 1.9993426672394945e-05, "loss": 0.7831340432167053, "step": 418 }, { "epoch": 0.12253253399619828, "grad_norm": 1.3976993960000088, "learning_rate": 1.9993250234920638e-05, "loss": 0.7675709128379822, "step": 419 }, { "epoch": 0.12282497441146366, "grad_norm": 1.454911379398845, "learning_rate": 1.999307146158485e-05, "loss": 0.8085238337516785, "step": 420 }, { "epoch": 0.12311741482672905, "grad_norm": 1.2979608734451222, "learning_rate": 1.9992890352429368e-05, "loss": 0.735150933265686, "step": 421 }, { "epoch": 0.12340985524199444, "grad_norm": 1.2046206432187132, "learning_rate": 1.9992706907496523e-05, "loss": 0.612186074256897, "step": 422 }, { "epoch": 0.12370229565725983, "grad_norm": 1.364838486847665, "learning_rate": 1.9992521126829194e-05, "loss": 0.6636590957641602, "step": 423 }, { "epoch": 0.12399473607252522, "grad_norm": 1.4068215451581474, "learning_rate": 1.9992333010470806e-05, "loss": 0.6814526319503784, "step": 424 }, { "epoch": 0.12428717648779061, "grad_norm": 1.3620595505436823, "learning_rate": 1.9992142558465335e-05, "loss": 0.6940894722938538, "step": 425 }, { "epoch": 0.12457961690305601, "grad_norm": 1.3427645949787534, "learning_rate": 1.9991949770857294e-05, "loss": 0.7485121488571167, "step": 426 }, { "epoch": 0.12487205731832139, "grad_norm": 1.266832638558228, "learning_rate": 1.9991754647691744e-05, "loss": 0.5315885543823242, "step": 427 }, { "epoch": 0.1251644977335868, "grad_norm": 1.2511757429133081, "learning_rate": 1.9991557189014297e-05, "loss": 0.7416529655456543, "step": 428 }, { "epoch": 0.12545693814885217, "grad_norm": 1.4031357379707678, "learning_rate": 1.9991357394871106e-05, "loss": 0.7937026023864746, "step": 429 }, { "epoch": 0.12574937856411755, "grad_norm": 1.3448962462478107, "learning_rate": 1.9991155265308872e-05, "loss": 0.7009662389755249, "step": 430 }, { "epoch": 0.12604181897938296, "grad_norm": 1.3042132277590721, "learning_rate": 1.999095080037484e-05, "loss": 0.6577681303024292, "step": 431 }, { "epoch": 0.12633425939464835, "grad_norm": 1.4036627734956777, "learning_rate": 1.9990744000116808e-05, "loss": 0.7372399568557739, "step": 432 }, { "epoch": 0.12662669980991373, "grad_norm": 1.3819832545517663, "learning_rate": 1.999053486458311e-05, "loss": 0.5959814190864563, "step": 433 }, { "epoch": 0.1269191402251791, "grad_norm": 1.424207998116027, "learning_rate": 1.999032339382263e-05, "loss": 0.6684107780456543, "step": 434 }, { "epoch": 0.12721158064044452, "grad_norm": 1.7048493578408517, "learning_rate": 1.99901095878848e-05, "loss": 0.8837687373161316, "step": 435 }, { "epoch": 0.1275040210557099, "grad_norm": 3.7468635382669717, "learning_rate": 1.9989893446819594e-05, "loss": 0.7128579616546631, "step": 436 }, { "epoch": 0.1277964614709753, "grad_norm": 1.2617709714670788, "learning_rate": 1.9989674970677533e-05, "loss": 0.6634687185287476, "step": 437 }, { "epoch": 0.12808890188624067, "grad_norm": 1.626814629507008, "learning_rate": 1.998945415950969e-05, "loss": 0.7866299152374268, "step": 438 }, { "epoch": 0.12838134230150608, "grad_norm": 1.6912246432889755, "learning_rate": 1.998923101336767e-05, "loss": 0.8104820251464844, "step": 439 }, { "epoch": 0.12867378271677146, "grad_norm": 1.3163679319076276, "learning_rate": 1.9989005532303637e-05, "loss": 0.6643097400665283, "step": 440 }, { "epoch": 0.12896622313203684, "grad_norm": 1.304280975921877, "learning_rate": 1.9988777716370293e-05, "loss": 0.7663843631744385, "step": 441 }, { "epoch": 0.12925866354730223, "grad_norm": 1.4275530439491644, "learning_rate": 1.9988547565620896e-05, "loss": 0.8831629753112793, "step": 442 }, { "epoch": 0.12955110396256764, "grad_norm": 1.2581390355141424, "learning_rate": 1.9988315080109233e-05, "loss": 0.6889798045158386, "step": 443 }, { "epoch": 0.12984354437783302, "grad_norm": 1.2589816711321935, "learning_rate": 1.9988080259889652e-05, "loss": 0.8173589706420898, "step": 444 }, { "epoch": 0.1301359847930984, "grad_norm": 1.437216407920067, "learning_rate": 1.998784310501704e-05, "loss": 0.7444369196891785, "step": 445 }, { "epoch": 0.13042842520836379, "grad_norm": 1.2527388287385341, "learning_rate": 1.998760361554682e-05, "loss": 0.6728573441505432, "step": 446 }, { "epoch": 0.1307208656236292, "grad_norm": 1.4620149588082576, "learning_rate": 1.998736179153499e-05, "loss": 0.6398168802261353, "step": 447 }, { "epoch": 0.13101330603889458, "grad_norm": 1.3925962417611275, "learning_rate": 1.9987117633038063e-05, "loss": 0.7367146015167236, "step": 448 }, { "epoch": 0.13130574645415996, "grad_norm": 1.3497781950543108, "learning_rate": 1.998687114011311e-05, "loss": 0.7072159051895142, "step": 449 }, { "epoch": 0.13159818686942534, "grad_norm": 1.402234544131691, "learning_rate": 1.998662231281775e-05, "loss": 0.7899993062019348, "step": 450 }, { "epoch": 0.13189062728469075, "grad_norm": 1.4376114251018388, "learning_rate": 1.9986371151210146e-05, "loss": 0.7668592929840088, "step": 451 }, { "epoch": 0.13218306769995614, "grad_norm": 1.3943197925338484, "learning_rate": 1.9986117655349003e-05, "loss": 0.7222825288772583, "step": 452 }, { "epoch": 0.13247550811522152, "grad_norm": 1.2939952744587226, "learning_rate": 1.9985861825293577e-05, "loss": 0.7301540374755859, "step": 453 }, { "epoch": 0.1327679485304869, "grad_norm": 1.174339392511722, "learning_rate": 1.998560366110366e-05, "loss": 0.6517907381057739, "step": 454 }, { "epoch": 0.1330603889457523, "grad_norm": 1.5763167634786863, "learning_rate": 1.99853431628396e-05, "loss": 0.6889342069625854, "step": 455 }, { "epoch": 0.1333528293610177, "grad_norm": 1.525770213874127, "learning_rate": 1.9985080330562293e-05, "loss": 0.6804303526878357, "step": 456 }, { "epoch": 0.13364526977628308, "grad_norm": 1.3944930335298842, "learning_rate": 1.9984815164333163e-05, "loss": 0.7699184417724609, "step": 457 }, { "epoch": 0.13393771019154846, "grad_norm": 1.4886205672815649, "learning_rate": 1.99845476642142e-05, "loss": 0.7470533847808838, "step": 458 }, { "epoch": 0.13423015060681387, "grad_norm": 1.251305257809984, "learning_rate": 1.9984277830267927e-05, "loss": 0.6689419746398926, "step": 459 }, { "epoch": 0.13452259102207925, "grad_norm": 1.5088252817247363, "learning_rate": 1.998400566255742e-05, "loss": 0.6395387649536133, "step": 460 }, { "epoch": 0.13481503143734463, "grad_norm": 1.3414013526988133, "learning_rate": 1.9983731161146288e-05, "loss": 0.7785208225250244, "step": 461 }, { "epoch": 0.13510747185261002, "grad_norm": 1.2995640327613904, "learning_rate": 1.9983454326098703e-05, "loss": 0.6864018440246582, "step": 462 }, { "epoch": 0.13539991226787543, "grad_norm": 1.424075352019454, "learning_rate": 1.9983175157479366e-05, "loss": 0.7201317548751831, "step": 463 }, { "epoch": 0.1356923526831408, "grad_norm": 1.4977322356937255, "learning_rate": 1.9982893655353534e-05, "loss": 0.7128555774688721, "step": 464 }, { "epoch": 0.1359847930984062, "grad_norm": 1.2421635772982216, "learning_rate": 1.998260981978701e-05, "loss": 0.7252457141876221, "step": 465 }, { "epoch": 0.1362772335136716, "grad_norm": 1.472555101507684, "learning_rate": 1.9982323650846137e-05, "loss": 0.7453348636627197, "step": 466 }, { "epoch": 0.13656967392893699, "grad_norm": 1.153602031844393, "learning_rate": 1.9982035148597804e-05, "loss": 0.6643078923225403, "step": 467 }, { "epoch": 0.13686211434420237, "grad_norm": 1.280273878296217, "learning_rate": 1.9981744313109445e-05, "loss": 0.7249360084533691, "step": 468 }, { "epoch": 0.13715455475946775, "grad_norm": 1.2363385614561972, "learning_rate": 1.9981451144449042e-05, "loss": 0.8179303407669067, "step": 469 }, { "epoch": 0.13744699517473316, "grad_norm": 1.1335812448130365, "learning_rate": 1.9981155642685125e-05, "loss": 0.6763637661933899, "step": 470 }, { "epoch": 0.13773943558999854, "grad_norm": 1.4603088026603306, "learning_rate": 1.998085780788676e-05, "loss": 0.6684300303459167, "step": 471 }, { "epoch": 0.13803187600526393, "grad_norm": 1.2670786265894947, "learning_rate": 1.9980557640123566e-05, "loss": 0.7251675128936768, "step": 472 }, { "epoch": 0.1383243164205293, "grad_norm": 1.5269819113708596, "learning_rate": 1.998025513946571e-05, "loss": 0.7146456241607666, "step": 473 }, { "epoch": 0.13861675683579472, "grad_norm": 1.2263952606430522, "learning_rate": 1.9979950305983895e-05, "loss": 0.7067978382110596, "step": 474 }, { "epoch": 0.1389091972510601, "grad_norm": 1.2396761565289731, "learning_rate": 1.9979643139749373e-05, "loss": 0.7017637491226196, "step": 475 }, { "epoch": 0.13920163766632548, "grad_norm": 1.397663972134979, "learning_rate": 1.9979333640833947e-05, "loss": 0.7511367201805115, "step": 476 }, { "epoch": 0.13949407808159087, "grad_norm": 1.5675722536579784, "learning_rate": 1.997902180930996e-05, "loss": 0.8129127025604248, "step": 477 }, { "epoch": 0.13978651849685628, "grad_norm": 1.3801608404871573, "learning_rate": 1.9978707645250293e-05, "loss": 0.7760868072509766, "step": 478 }, { "epoch": 0.14007895891212166, "grad_norm": 1.2722362515735255, "learning_rate": 1.9978391148728388e-05, "loss": 0.5190733671188354, "step": 479 }, { "epoch": 0.14037139932738704, "grad_norm": 1.4267690174722667, "learning_rate": 1.9978072319818222e-05, "loss": 0.759798526763916, "step": 480 }, { "epoch": 0.14066383974265242, "grad_norm": 1.3594087764036291, "learning_rate": 1.997775115859432e-05, "loss": 0.5750235319137573, "step": 481 }, { "epoch": 0.14095628015791783, "grad_norm": 1.5288357817907694, "learning_rate": 1.9977427665131748e-05, "loss": 0.6837687492370605, "step": 482 }, { "epoch": 0.14124872057318322, "grad_norm": 1.4085455647433316, "learning_rate": 1.9977101839506123e-05, "loss": 0.8774302005767822, "step": 483 }, { "epoch": 0.1415411609884486, "grad_norm": 1.3951237263634118, "learning_rate": 1.9976773681793605e-05, "loss": 0.6447024345397949, "step": 484 }, { "epoch": 0.14183360140371398, "grad_norm": 1.3077152366881364, "learning_rate": 1.99764431920709e-05, "loss": 0.6212965250015259, "step": 485 }, { "epoch": 0.1421260418189794, "grad_norm": 1.7246179492768339, "learning_rate": 1.9976110370415257e-05, "loss": 0.7606823444366455, "step": 486 }, { "epoch": 0.14241848223424478, "grad_norm": 1.6009360634049956, "learning_rate": 1.9975775216904468e-05, "loss": 0.792106032371521, "step": 487 }, { "epoch": 0.14271092264951016, "grad_norm": 1.526072177508378, "learning_rate": 1.997543773161688e-05, "loss": 0.828373372554779, "step": 488 }, { "epoch": 0.14300336306477554, "grad_norm": 1.2193329399673667, "learning_rate": 1.997509791463137e-05, "loss": 0.7148743867874146, "step": 489 }, { "epoch": 0.14329580348004095, "grad_norm": 1.617921839516307, "learning_rate": 1.9974755766027372e-05, "loss": 0.6566554307937622, "step": 490 }, { "epoch": 0.14358824389530633, "grad_norm": 1.2041404679997165, "learning_rate": 1.9974411285884865e-05, "loss": 0.7833706140518188, "step": 491 }, { "epoch": 0.14388068431057172, "grad_norm": 1.3715764541616051, "learning_rate": 1.997406447428436e-05, "loss": 0.7661226987838745, "step": 492 }, { "epoch": 0.1441731247258371, "grad_norm": 1.2510873907811162, "learning_rate": 1.9973715331306935e-05, "loss": 0.5403884649276733, "step": 493 }, { "epoch": 0.1444655651411025, "grad_norm": 1.417853529635827, "learning_rate": 1.9973363857034183e-05, "loss": 0.7744722366333008, "step": 494 }, { "epoch": 0.1447580055563679, "grad_norm": 1.7245567814035911, "learning_rate": 1.9973010051548274e-05, "loss": 0.9036808013916016, "step": 495 }, { "epoch": 0.14505044597163327, "grad_norm": 1.2752769917707012, "learning_rate": 1.9972653914931902e-05, "loss": 0.6952388286590576, "step": 496 }, { "epoch": 0.14534288638689866, "grad_norm": 1.5454177465030166, "learning_rate": 1.9972295447268312e-05, "loss": 0.7818677425384521, "step": 497 }, { "epoch": 0.14563532680216407, "grad_norm": 1.2104336195623258, "learning_rate": 1.9971934648641294e-05, "loss": 0.8197327256202698, "step": 498 }, { "epoch": 0.14592776721742945, "grad_norm": 1.1376920899270277, "learning_rate": 1.997157151913518e-05, "loss": 0.5898807644844055, "step": 499 }, { "epoch": 0.14622020763269483, "grad_norm": 1.6480348319290024, "learning_rate": 1.9971206058834857e-05, "loss": 0.7980005741119385, "step": 500 }, { "epoch": 0.14651264804796021, "grad_norm": 1.2480430258500308, "learning_rate": 1.997083826782574e-05, "loss": 0.7161837816238403, "step": 501 }, { "epoch": 0.14680508846322562, "grad_norm": 1.436852590534495, "learning_rate": 1.99704681461938e-05, "loss": 0.7657293081283569, "step": 502 }, { "epoch": 0.147097528878491, "grad_norm": 1.256627894457605, "learning_rate": 1.9970095694025553e-05, "loss": 0.6638028621673584, "step": 503 }, { "epoch": 0.1473899692937564, "grad_norm": 1.344090583049545, "learning_rate": 1.996972091140806e-05, "loss": 0.8759262561798096, "step": 504 }, { "epoch": 0.1476824097090218, "grad_norm": 1.1099770302505587, "learning_rate": 1.9969343798428916e-05, "loss": 0.6686065196990967, "step": 505 }, { "epoch": 0.14797485012428718, "grad_norm": 1.5672815870081807, "learning_rate": 1.9968964355176276e-05, "loss": 0.7900313138961792, "step": 506 }, { "epoch": 0.14826729053955257, "grad_norm": 1.3116088800480374, "learning_rate": 1.996858258173883e-05, "loss": 0.699286937713623, "step": 507 }, { "epoch": 0.14855973095481795, "grad_norm": 1.149004701425465, "learning_rate": 1.9968198478205817e-05, "loss": 0.6613560914993286, "step": 508 }, { "epoch": 0.14885217137008336, "grad_norm": 1.471579106109443, "learning_rate": 1.9967812044667014e-05, "loss": 0.8586459755897522, "step": 509 }, { "epoch": 0.14914461178534874, "grad_norm": 1.5307049334622256, "learning_rate": 1.9967423281212754e-05, "loss": 0.6620850563049316, "step": 510 }, { "epoch": 0.14943705220061412, "grad_norm": 1.6192191406380994, "learning_rate": 1.9967032187933905e-05, "loss": 0.7991048097610474, "step": 511 }, { "epoch": 0.1497294926158795, "grad_norm": 1.2792732447271702, "learning_rate": 1.9966638764921882e-05, "loss": 0.7301167845726013, "step": 512 }, { "epoch": 0.15002193303114492, "grad_norm": 1.244527824938295, "learning_rate": 1.9966243012268645e-05, "loss": 0.6470698118209839, "step": 513 }, { "epoch": 0.1503143734464103, "grad_norm": 1.3436689137677134, "learning_rate": 1.99658449300667e-05, "loss": 0.5766996145248413, "step": 514 }, { "epoch": 0.15060681386167568, "grad_norm": 1.2104018154852028, "learning_rate": 1.9965444518409098e-05, "loss": 0.6365845203399658, "step": 515 }, { "epoch": 0.15089925427694106, "grad_norm": 1.6995742833660814, "learning_rate": 1.9965041777389426e-05, "loss": 0.6945745944976807, "step": 516 }, { "epoch": 0.15119169469220647, "grad_norm": 1.6841525179657149, "learning_rate": 1.996463670710183e-05, "loss": 0.802032470703125, "step": 517 }, { "epoch": 0.15148413510747186, "grad_norm": 1.4666130226044234, "learning_rate": 1.996422930764099e-05, "loss": 0.7429964542388916, "step": 518 }, { "epoch": 0.15177657552273724, "grad_norm": 1.5508181233008433, "learning_rate": 1.9963819579102134e-05, "loss": 0.6462180614471436, "step": 519 }, { "epoch": 0.15206901593800262, "grad_norm": 1.3226128228565077, "learning_rate": 1.996340752158103e-05, "loss": 0.888412594795227, "step": 520 }, { "epoch": 0.15236145635326803, "grad_norm": 1.386680099002057, "learning_rate": 1.9962993135173996e-05, "loss": 0.6734700798988342, "step": 521 }, { "epoch": 0.15265389676853341, "grad_norm": 1.385050142293082, "learning_rate": 1.9962576419977894e-05, "loss": 0.6951336860656738, "step": 522 }, { "epoch": 0.1529463371837988, "grad_norm": 1.26022036147928, "learning_rate": 1.9962157376090126e-05, "loss": 0.7130852341651917, "step": 523 }, { "epoch": 0.15323877759906418, "grad_norm": 1.4353500802059385, "learning_rate": 1.9961736003608646e-05, "loss": 0.8322055339813232, "step": 524 }, { "epoch": 0.1535312180143296, "grad_norm": 1.2563635075596429, "learning_rate": 1.996131230263194e-05, "loss": 0.7031791806221008, "step": 525 }, { "epoch": 0.15382365842959497, "grad_norm": 1.3606474846075662, "learning_rate": 1.9960886273259052e-05, "loss": 0.8268769979476929, "step": 526 }, { "epoch": 0.15411609884486036, "grad_norm": 1.048782156231717, "learning_rate": 1.9960457915589557e-05, "loss": 0.6843237280845642, "step": 527 }, { "epoch": 0.15440853926012574, "grad_norm": 1.29845256190474, "learning_rate": 1.9960027229723585e-05, "loss": 0.8267906904220581, "step": 528 }, { "epoch": 0.15470097967539115, "grad_norm": 1.502232175088585, "learning_rate": 1.9959594215761807e-05, "loss": 0.8259629011154175, "step": 529 }, { "epoch": 0.15499342009065653, "grad_norm": 1.3618507954167858, "learning_rate": 1.9959158873805435e-05, "loss": 0.654765248298645, "step": 530 }, { "epoch": 0.1552858605059219, "grad_norm": 1.3762650099604372, "learning_rate": 1.9958721203956233e-05, "loss": 0.7841149568557739, "step": 531 }, { "epoch": 0.1555783009211873, "grad_norm": 1.131527995151024, "learning_rate": 1.9958281206316497e-05, "loss": 0.7364583015441895, "step": 532 }, { "epoch": 0.1558707413364527, "grad_norm": 1.2428392866727909, "learning_rate": 1.9957838880989076e-05, "loss": 0.7985796928405762, "step": 533 }, { "epoch": 0.1561631817517181, "grad_norm": 1.7674168807742325, "learning_rate": 1.9957394228077363e-05, "loss": 0.8432350754737854, "step": 534 }, { "epoch": 0.15645562216698347, "grad_norm": 1.409652061557183, "learning_rate": 1.995694724768529e-05, "loss": 0.713615894317627, "step": 535 }, { "epoch": 0.15674806258224885, "grad_norm": 1.3406073565001748, "learning_rate": 1.9956497939917336e-05, "loss": 0.6472936868667603, "step": 536 }, { "epoch": 0.15704050299751426, "grad_norm": 1.4828550722777096, "learning_rate": 1.9956046304878528e-05, "loss": 0.7963594198226929, "step": 537 }, { "epoch": 0.15733294341277965, "grad_norm": 1.3875627998599316, "learning_rate": 1.9955592342674427e-05, "loss": 0.8043302893638611, "step": 538 }, { "epoch": 0.15762538382804503, "grad_norm": 1.3187786308741334, "learning_rate": 1.995513605341115e-05, "loss": 0.6277294754981995, "step": 539 }, { "epoch": 0.1579178242433104, "grad_norm": 1.2577326193858611, "learning_rate": 1.9954677437195345e-05, "loss": 0.569086492061615, "step": 540 }, { "epoch": 0.15821026465857582, "grad_norm": 1.4002846512494251, "learning_rate": 1.9954216494134217e-05, "loss": 0.7694308757781982, "step": 541 }, { "epoch": 0.1585027050738412, "grad_norm": 1.2602961243105442, "learning_rate": 1.9953753224335504e-05, "loss": 0.7782721519470215, "step": 542 }, { "epoch": 0.1587951454891066, "grad_norm": 1.4115021596903525, "learning_rate": 1.9953287627907498e-05, "loss": 0.6231539249420166, "step": 543 }, { "epoch": 0.159087585904372, "grad_norm": 1.6469299670076099, "learning_rate": 1.9952819704959022e-05, "loss": 0.6431725025177002, "step": 544 }, { "epoch": 0.15938002631963738, "grad_norm": 1.3041234892791729, "learning_rate": 1.9952349455599455e-05, "loss": 0.7062366008758545, "step": 545 }, { "epoch": 0.15967246673490276, "grad_norm": 1.3521393089140767, "learning_rate": 1.9951876879938716e-05, "loss": 0.5376520156860352, "step": 546 }, { "epoch": 0.15996490715016815, "grad_norm": 1.385911158215194, "learning_rate": 1.9951401978087267e-05, "loss": 0.7693386077880859, "step": 547 }, { "epoch": 0.16025734756543356, "grad_norm": 1.7168051396485104, "learning_rate": 1.9950924750156107e-05, "loss": 0.6735765337944031, "step": 548 }, { "epoch": 0.16054978798069894, "grad_norm": 1.1186135901816567, "learning_rate": 1.995044519625679e-05, "loss": 0.5333552360534668, "step": 549 }, { "epoch": 0.16084222839596432, "grad_norm": 1.2149038323564916, "learning_rate": 1.994996331650141e-05, "loss": 0.6694493293762207, "step": 550 }, { "epoch": 0.1611346688112297, "grad_norm": 1.3895443962170193, "learning_rate": 1.9949479111002596e-05, "loss": 0.6056857109069824, "step": 551 }, { "epoch": 0.1614271092264951, "grad_norm": 1.4990214566868623, "learning_rate": 1.9948992579873538e-05, "loss": 0.7174896001815796, "step": 552 }, { "epoch": 0.1617195496417605, "grad_norm": 1.4417886999069138, "learning_rate": 1.9948503723227954e-05, "loss": 0.9150595664978027, "step": 553 }, { "epoch": 0.16201199005702588, "grad_norm": 1.475120009674046, "learning_rate": 1.9948012541180116e-05, "loss": 0.7418098449707031, "step": 554 }, { "epoch": 0.16230443047229126, "grad_norm": 1.3802668140870205, "learning_rate": 1.9947519033844828e-05, "loss": 0.6937648057937622, "step": 555 }, { "epoch": 0.16259687088755667, "grad_norm": 1.198267913228467, "learning_rate": 1.9947023201337448e-05, "loss": 0.628747820854187, "step": 556 }, { "epoch": 0.16288931130282205, "grad_norm": 1.3166666693196283, "learning_rate": 1.9946525043773875e-05, "loss": 0.6252326965332031, "step": 557 }, { "epoch": 0.16318175171808744, "grad_norm": 1.4085830915284543, "learning_rate": 1.9946024561270547e-05, "loss": 0.6243278980255127, "step": 558 }, { "epoch": 0.16347419213335282, "grad_norm": 1.515326552036181, "learning_rate": 1.994552175394445e-05, "loss": 0.7613602876663208, "step": 559 }, { "epoch": 0.16376663254861823, "grad_norm": 1.4167210376939137, "learning_rate": 1.9945016621913115e-05, "loss": 0.7680152654647827, "step": 560 }, { "epoch": 0.1640590729638836, "grad_norm": 1.4413485992010024, "learning_rate": 1.9944509165294614e-05, "loss": 0.6926383972167969, "step": 561 }, { "epoch": 0.164351513379149, "grad_norm": 1.3901402403092062, "learning_rate": 1.9943999384207556e-05, "loss": 0.6822172403335571, "step": 562 }, { "epoch": 0.16464395379441438, "grad_norm": 1.1253426305557543, "learning_rate": 1.99434872787711e-05, "loss": 0.6533722281455994, "step": 563 }, { "epoch": 0.1649363942096798, "grad_norm": 1.3434183662540475, "learning_rate": 1.9942972849104955e-05, "loss": 0.6754113435745239, "step": 564 }, { "epoch": 0.16522883462494517, "grad_norm": 1.3906070154993262, "learning_rate": 1.9942456095329357e-05, "loss": 0.5585163235664368, "step": 565 }, { "epoch": 0.16552127504021055, "grad_norm": 1.18702583603665, "learning_rate": 1.99419370175651e-05, "loss": 0.6268453598022461, "step": 566 }, { "epoch": 0.16581371545547594, "grad_norm": 1.3564219134919553, "learning_rate": 1.994141561593351e-05, "loss": 0.6508245468139648, "step": 567 }, { "epoch": 0.16610615587074135, "grad_norm": 1.353057425024783, "learning_rate": 1.9940891890556468e-05, "loss": 0.7337379455566406, "step": 568 }, { "epoch": 0.16639859628600673, "grad_norm": 1.3764723902611744, "learning_rate": 1.9940365841556385e-05, "loss": 0.7888853549957275, "step": 569 }, { "epoch": 0.1666910367012721, "grad_norm": 1.5384301744775797, "learning_rate": 1.993983746905623e-05, "loss": 0.777199923992157, "step": 570 }, { "epoch": 0.1669834771165375, "grad_norm": 1.5194907821323576, "learning_rate": 1.9939306773179498e-05, "loss": 0.761531412601471, "step": 571 }, { "epoch": 0.1672759175318029, "grad_norm": 1.5063040441270878, "learning_rate": 1.993877375405024e-05, "loss": 0.7060664296150208, "step": 572 }, { "epoch": 0.16756835794706829, "grad_norm": 1.472994627130685, "learning_rate": 1.9938238411793045e-05, "loss": 0.6797431707382202, "step": 573 }, { "epoch": 0.16786079836233367, "grad_norm": 1.3131930617818641, "learning_rate": 1.9937700746533048e-05, "loss": 0.7202910780906677, "step": 574 }, { "epoch": 0.16815323877759905, "grad_norm": 1.198711592546953, "learning_rate": 1.9937160758395923e-05, "loss": 0.7241546511650085, "step": 575 }, { "epoch": 0.16844567919286446, "grad_norm": 1.3694786109804489, "learning_rate": 1.993661844750789e-05, "loss": 0.7055338621139526, "step": 576 }, { "epoch": 0.16873811960812984, "grad_norm": 1.4237978283864139, "learning_rate": 1.993607381399571e-05, "loss": 0.6973986625671387, "step": 577 }, { "epoch": 0.16903056002339523, "grad_norm": 1.1715457050926792, "learning_rate": 1.993552685798669e-05, "loss": 0.693436861038208, "step": 578 }, { "epoch": 0.1693230004386606, "grad_norm": 1.5585764488361307, "learning_rate": 1.9934977579608676e-05, "loss": 0.6687765121459961, "step": 579 }, { "epoch": 0.16961544085392602, "grad_norm": 1.3798925262407884, "learning_rate": 1.9934425978990057e-05, "loss": 0.7776578068733215, "step": 580 }, { "epoch": 0.1699078812691914, "grad_norm": 1.3168335454892666, "learning_rate": 1.9933872056259768e-05, "loss": 0.6914045810699463, "step": 581 }, { "epoch": 0.17020032168445678, "grad_norm": 1.4649859185166105, "learning_rate": 1.9933315811547283e-05, "loss": 0.8005306720733643, "step": 582 }, { "epoch": 0.1704927620997222, "grad_norm": 1.3952257625848015, "learning_rate": 1.9932757244982625e-05, "loss": 0.6936507225036621, "step": 583 }, { "epoch": 0.17078520251498758, "grad_norm": 1.157795409448355, "learning_rate": 1.9932196356696353e-05, "loss": 0.6915504932403564, "step": 584 }, { "epoch": 0.17107764293025296, "grad_norm": 1.4153568154846778, "learning_rate": 1.9931633146819573e-05, "loss": 0.7583723664283752, "step": 585 }, { "epoch": 0.17137008334551834, "grad_norm": 1.2959976429359619, "learning_rate": 1.9931067615483927e-05, "loss": 0.7097266912460327, "step": 586 }, { "epoch": 0.17166252376078375, "grad_norm": 1.5238633829769868, "learning_rate": 1.9930499762821608e-05, "loss": 0.7586667537689209, "step": 587 }, { "epoch": 0.17195496417604914, "grad_norm": 1.3505202775838374, "learning_rate": 1.9929929588965352e-05, "loss": 0.7043411731719971, "step": 588 }, { "epoch": 0.17224740459131452, "grad_norm": 1.3150009626714483, "learning_rate": 1.9929357094048425e-05, "loss": 0.8502261638641357, "step": 589 }, { "epoch": 0.1725398450065799, "grad_norm": 1.3901300269374877, "learning_rate": 1.992878227820465e-05, "loss": 0.7196993827819824, "step": 590 }, { "epoch": 0.1728322854218453, "grad_norm": 1.5475395216492736, "learning_rate": 1.9928205141568388e-05, "loss": 0.6783720850944519, "step": 591 }, { "epoch": 0.1731247258371107, "grad_norm": 1.1911883688546063, "learning_rate": 1.9927625684274534e-05, "loss": 0.7128307819366455, "step": 592 }, { "epoch": 0.17341716625237608, "grad_norm": 1.226507853409212, "learning_rate": 1.9927043906458538e-05, "loss": 0.7289423942565918, "step": 593 }, { "epoch": 0.17370960666764146, "grad_norm": 1.298942183876381, "learning_rate": 1.992645980825639e-05, "loss": 0.6306120157241821, "step": 594 }, { "epoch": 0.17400204708290687, "grad_norm": 1.2456494719411173, "learning_rate": 1.9925873389804614e-05, "loss": 0.7910655736923218, "step": 595 }, { "epoch": 0.17429448749817225, "grad_norm": 1.267940212117298, "learning_rate": 1.9925284651240282e-05, "loss": 0.6075282096862793, "step": 596 }, { "epoch": 0.17458692791343763, "grad_norm": 1.251937615037275, "learning_rate": 1.992469359270101e-05, "loss": 0.6270443201065063, "step": 597 }, { "epoch": 0.17487936832870302, "grad_norm": 1.3200413033724028, "learning_rate": 1.9924100214324955e-05, "loss": 0.6487830877304077, "step": 598 }, { "epoch": 0.17517180874396843, "grad_norm": 1.45237431858529, "learning_rate": 1.9923504516250814e-05, "loss": 0.5986843705177307, "step": 599 }, { "epoch": 0.1754642491592338, "grad_norm": 1.2191897136056242, "learning_rate": 1.992290649861783e-05, "loss": 0.7734183073043823, "step": 600 }, { "epoch": 0.1757566895744992, "grad_norm": 1.167414919229407, "learning_rate": 1.9922306161565782e-05, "loss": 0.5784964561462402, "step": 601 }, { "epoch": 0.17604912998976457, "grad_norm": 1.501564665297397, "learning_rate": 1.9921703505234995e-05, "loss": 0.8034321069717407, "step": 602 }, { "epoch": 0.17634157040502998, "grad_norm": 1.314622713247698, "learning_rate": 1.992109852976634e-05, "loss": 0.8153722882270813, "step": 603 }, { "epoch": 0.17663401082029537, "grad_norm": 1.877065501880657, "learning_rate": 1.992049123530123e-05, "loss": 0.7293002605438232, "step": 604 }, { "epoch": 0.17692645123556075, "grad_norm": 1.514670729590329, "learning_rate": 1.9919881621981606e-05, "loss": 0.7108439207077026, "step": 605 }, { "epoch": 0.17721889165082613, "grad_norm": 1.4748189889445555, "learning_rate": 1.9919269689949968e-05, "loss": 0.7581946849822998, "step": 606 }, { "epoch": 0.17751133206609154, "grad_norm": 1.2337358872247315, "learning_rate": 1.991865543934935e-05, "loss": 0.6821258068084717, "step": 607 }, { "epoch": 0.17780377248135693, "grad_norm": 1.2791852908008183, "learning_rate": 1.991803887032333e-05, "loss": 0.7116109728813171, "step": 608 }, { "epoch": 0.1780962128966223, "grad_norm": 1.2208883706731903, "learning_rate": 1.9917419983016025e-05, "loss": 0.6680186986923218, "step": 609 }, { "epoch": 0.1783886533118877, "grad_norm": 1.3494621179320938, "learning_rate": 1.99167987775721e-05, "loss": 0.6763704419136047, "step": 610 }, { "epoch": 0.1786810937271531, "grad_norm": 1.4133729383070797, "learning_rate": 1.9916175254136755e-05, "loss": 0.756158709526062, "step": 611 }, { "epoch": 0.17897353414241848, "grad_norm": 1.4652489049885558, "learning_rate": 1.9915549412855734e-05, "loss": 0.600861132144928, "step": 612 }, { "epoch": 0.17926597455768387, "grad_norm": 1.4731466609399737, "learning_rate": 1.991492125387533e-05, "loss": 0.6927047967910767, "step": 613 }, { "epoch": 0.17955841497294925, "grad_norm": 1.6937006516406405, "learning_rate": 1.9914290777342362e-05, "loss": 0.6908516883850098, "step": 614 }, { "epoch": 0.17985085538821466, "grad_norm": 1.4155029526585772, "learning_rate": 1.9913657983404206e-05, "loss": 0.7968926429748535, "step": 615 }, { "epoch": 0.18014329580348004, "grad_norm": 1.1016955037712495, "learning_rate": 1.9913022872208773e-05, "loss": 0.6035164594650269, "step": 616 }, { "epoch": 0.18043573621874542, "grad_norm": 1.4061380717551752, "learning_rate": 1.9912385443904518e-05, "loss": 0.6733090877532959, "step": 617 }, { "epoch": 0.1807281766340108, "grad_norm": 2.2181842231696645, "learning_rate": 1.9911745698640426e-05, "loss": 0.6968391537666321, "step": 618 }, { "epoch": 0.18102061704927622, "grad_norm": 1.2136657361400474, "learning_rate": 1.991110363656605e-05, "loss": 0.7126309871673584, "step": 619 }, { "epoch": 0.1813130574645416, "grad_norm": 1.5461052617008268, "learning_rate": 1.9910459257831455e-05, "loss": 0.8604997396469116, "step": 620 }, { "epoch": 0.18160549787980698, "grad_norm": 1.4378853015325992, "learning_rate": 1.9909812562587266e-05, "loss": 0.674797534942627, "step": 621 }, { "epoch": 0.1818979382950724, "grad_norm": 1.4538548213207452, "learning_rate": 1.9909163550984644e-05, "loss": 0.7439107894897461, "step": 622 }, { "epoch": 0.18219037871033777, "grad_norm": 1.4410118469577065, "learning_rate": 1.9908512223175293e-05, "loss": 0.7137601971626282, "step": 623 }, { "epoch": 0.18248281912560316, "grad_norm": 1.286772355171783, "learning_rate": 1.9907858579311448e-05, "loss": 0.6395502090454102, "step": 624 }, { "epoch": 0.18277525954086854, "grad_norm": 1.7411485569290241, "learning_rate": 1.9907202619545905e-05, "loss": 0.6747852563858032, "step": 625 }, { "epoch": 0.18306769995613395, "grad_norm": 1.3891342500470065, "learning_rate": 1.9906544344031986e-05, "loss": 0.6995632648468018, "step": 626 }, { "epoch": 0.18336014037139933, "grad_norm": 1.3916150531596103, "learning_rate": 1.9905883752923557e-05, "loss": 0.7006711363792419, "step": 627 }, { "epoch": 0.18365258078666472, "grad_norm": 1.189158109720048, "learning_rate": 1.990522084637503e-05, "loss": 0.660778820514679, "step": 628 }, { "epoch": 0.1839450212019301, "grad_norm": 1.258003733155152, "learning_rate": 1.9904555624541362e-05, "loss": 0.5826665163040161, "step": 629 }, { "epoch": 0.1842374616171955, "grad_norm": 1.5565251427155322, "learning_rate": 1.990388808757803e-05, "loss": 0.8064266443252563, "step": 630 }, { "epoch": 0.1845299020324609, "grad_norm": 1.3066621609893527, "learning_rate": 1.9903218235641078e-05, "loss": 0.6856451034545898, "step": 631 }, { "epoch": 0.18482234244772627, "grad_norm": 1.325447510265949, "learning_rate": 1.9902546068887076e-05, "loss": 0.6423801183700562, "step": 632 }, { "epoch": 0.18511478286299166, "grad_norm": 1.252931011950935, "learning_rate": 1.9901871587473135e-05, "loss": 0.6903005242347717, "step": 633 }, { "epoch": 0.18540722327825707, "grad_norm": 1.2981623515351661, "learning_rate": 1.9901194791556916e-05, "loss": 0.636742115020752, "step": 634 }, { "epoch": 0.18569966369352245, "grad_norm": 1.154196245030106, "learning_rate": 1.9900515681296614e-05, "loss": 0.6541105508804321, "step": 635 }, { "epoch": 0.18599210410878783, "grad_norm": 1.2463484642096474, "learning_rate": 1.9899834256850973e-05, "loss": 0.7026485204696655, "step": 636 }, { "epoch": 0.1862845445240532, "grad_norm": 1.2626549460002545, "learning_rate": 1.989915051837926e-05, "loss": 0.6232702732086182, "step": 637 }, { "epoch": 0.18657698493931862, "grad_norm": 1.222405284140282, "learning_rate": 1.9898464466041306e-05, "loss": 0.5971217155456543, "step": 638 }, { "epoch": 0.186869425354584, "grad_norm": 1.228365693552395, "learning_rate": 1.9897776099997463e-05, "loss": 0.7942230701446533, "step": 639 }, { "epoch": 0.1871618657698494, "grad_norm": 1.4547764939553913, "learning_rate": 1.9897085420408637e-05, "loss": 0.6578072309494019, "step": 640 }, { "epoch": 0.18745430618511477, "grad_norm": 1.3118111344764942, "learning_rate": 1.989639242743627e-05, "loss": 0.6928422451019287, "step": 641 }, { "epoch": 0.18774674660038018, "grad_norm": 1.4232777703090678, "learning_rate": 1.9895697121242346e-05, "loss": 0.7656213641166687, "step": 642 }, { "epoch": 0.18803918701564556, "grad_norm": 1.3841907158773847, "learning_rate": 1.9894999501989383e-05, "loss": 0.6540038585662842, "step": 643 }, { "epoch": 0.18833162743091095, "grad_norm": 1.5637672668766274, "learning_rate": 1.989429956984045e-05, "loss": 0.707741379737854, "step": 644 }, { "epoch": 0.18862406784617633, "grad_norm": 1.2389494128425964, "learning_rate": 1.9893597324959156e-05, "loss": 0.6191326379776001, "step": 645 }, { "epoch": 0.18891650826144174, "grad_norm": 1.2174290538744046, "learning_rate": 1.9892892767509634e-05, "loss": 0.616736114025116, "step": 646 }, { "epoch": 0.18920894867670712, "grad_norm": 1.4366227278982104, "learning_rate": 1.989218589765658e-05, "loss": 0.803301215171814, "step": 647 }, { "epoch": 0.1895013890919725, "grad_norm": 1.2775653707157333, "learning_rate": 1.989147671556522e-05, "loss": 0.6528021097183228, "step": 648 }, { "epoch": 0.1897938295072379, "grad_norm": 1.5463247112798635, "learning_rate": 1.9890765221401314e-05, "loss": 0.6966919898986816, "step": 649 }, { "epoch": 0.1900862699225033, "grad_norm": 1.2768484224289256, "learning_rate": 1.9890051415331178e-05, "loss": 0.7223595380783081, "step": 650 }, { "epoch": 0.19037871033776868, "grad_norm": 1.404271714764208, "learning_rate": 1.9889335297521656e-05, "loss": 0.6727452278137207, "step": 651 }, { "epoch": 0.19067115075303406, "grad_norm": 1.5662163632688932, "learning_rate": 1.988861686814014e-05, "loss": 0.7008258104324341, "step": 652 }, { "epoch": 0.19096359116829945, "grad_norm": 1.3756400508505757, "learning_rate": 1.988789612735455e-05, "loss": 0.7624703049659729, "step": 653 }, { "epoch": 0.19125603158356486, "grad_norm": 1.4133612106119275, "learning_rate": 1.988717307533336e-05, "loss": 0.6813088655471802, "step": 654 }, { "epoch": 0.19154847199883024, "grad_norm": 1.1919173127519105, "learning_rate": 1.988644771224558e-05, "loss": 0.5401284694671631, "step": 655 }, { "epoch": 0.19184091241409562, "grad_norm": 1.4613018451006843, "learning_rate": 1.9885720038260756e-05, "loss": 0.6805379986763, "step": 656 }, { "epoch": 0.19213335282936103, "grad_norm": 1.5412845974712732, "learning_rate": 1.9884990053548982e-05, "loss": 0.6449974775314331, "step": 657 }, { "epoch": 0.19242579324462641, "grad_norm": 1.3481077932409014, "learning_rate": 1.988425775828088e-05, "loss": 0.6940032839775085, "step": 658 }, { "epoch": 0.1927182336598918, "grad_norm": 1.3088210596354761, "learning_rate": 1.9883523152627626e-05, "loss": 0.7089565396308899, "step": 659 }, { "epoch": 0.19301067407515718, "grad_norm": 1.3865316758332553, "learning_rate": 1.9882786236760932e-05, "loss": 0.7508438229560852, "step": 660 }, { "epoch": 0.1933031144904226, "grad_norm": 1.6156320166139564, "learning_rate": 1.988204701085304e-05, "loss": 0.6828616261482239, "step": 661 }, { "epoch": 0.19359555490568797, "grad_norm": 1.2372815991073003, "learning_rate": 1.9881305475076744e-05, "loss": 0.6652963161468506, "step": 662 }, { "epoch": 0.19388799532095335, "grad_norm": 1.2410743539313074, "learning_rate": 1.988056162960537e-05, "loss": 0.6859447360038757, "step": 663 }, { "epoch": 0.19418043573621874, "grad_norm": 1.4440746421071415, "learning_rate": 1.9879815474612794e-05, "loss": 0.693805992603302, "step": 664 }, { "epoch": 0.19447287615148415, "grad_norm": 1.359257774367856, "learning_rate": 1.987906701027342e-05, "loss": 0.7028747200965881, "step": 665 }, { "epoch": 0.19476531656674953, "grad_norm": 1.2833261279779522, "learning_rate": 1.9878316236762195e-05, "loss": 0.7492112517356873, "step": 666 }, { "epoch": 0.1950577569820149, "grad_norm": 1.2065346249489062, "learning_rate": 1.9877563154254613e-05, "loss": 0.5394963026046753, "step": 667 }, { "epoch": 0.1953501973972803, "grad_norm": 1.1848542596539768, "learning_rate": 1.98768077629267e-05, "loss": 0.5185493230819702, "step": 668 }, { "epoch": 0.1956426378125457, "grad_norm": 1.2600065416138704, "learning_rate": 1.9876050062955027e-05, "loss": 0.7279829382896423, "step": 669 }, { "epoch": 0.1959350782278111, "grad_norm": 1.3533145550923509, "learning_rate": 1.9875290054516692e-05, "loss": 0.7437206506729126, "step": 670 }, { "epoch": 0.19622751864307647, "grad_norm": 1.6022192807514979, "learning_rate": 1.9874527737789358e-05, "loss": 0.7294617891311646, "step": 671 }, { "epoch": 0.19651995905834185, "grad_norm": 1.3433918645025815, "learning_rate": 1.9873763112951198e-05, "loss": 0.7710307240486145, "step": 672 }, { "epoch": 0.19681239947360726, "grad_norm": 1.3797998364213817, "learning_rate": 1.9872996180180947e-05, "loss": 0.690025806427002, "step": 673 }, { "epoch": 0.19710483988887265, "grad_norm": 1.2826936342217614, "learning_rate": 1.9872226939657867e-05, "loss": 0.6690589189529419, "step": 674 }, { "epoch": 0.19739728030413803, "grad_norm": 1.31971712284742, "learning_rate": 1.9871455391561764e-05, "loss": 0.7587239742279053, "step": 675 }, { "epoch": 0.1976897207194034, "grad_norm": 1.2583882254944232, "learning_rate": 1.987068153607298e-05, "loss": 0.8048006296157837, "step": 676 }, { "epoch": 0.19798216113466882, "grad_norm": 1.4904938665104162, "learning_rate": 1.9869905373372402e-05, "loss": 0.721023678779602, "step": 677 }, { "epoch": 0.1982746015499342, "grad_norm": 1.2975987405043754, "learning_rate": 1.9869126903641457e-05, "loss": 0.646798849105835, "step": 678 }, { "epoch": 0.1985670419651996, "grad_norm": 1.2591898865565592, "learning_rate": 1.9868346127062098e-05, "loss": 0.597393274307251, "step": 679 }, { "epoch": 0.19885948238046497, "grad_norm": 1.2773189541737207, "learning_rate": 1.9867563043816836e-05, "loss": 0.8619129657745361, "step": 680 }, { "epoch": 0.19915192279573038, "grad_norm": 1.2343587826225086, "learning_rate": 1.986677765408871e-05, "loss": 0.5391764640808105, "step": 681 }, { "epoch": 0.19944436321099576, "grad_norm": 1.360221019641669, "learning_rate": 1.9865989958061297e-05, "loss": 0.8185729384422302, "step": 682 }, { "epoch": 0.19973680362626114, "grad_norm": 1.3798220626145994, "learning_rate": 1.9865199955918712e-05, "loss": 0.6629397869110107, "step": 683 }, { "epoch": 0.20002924404152653, "grad_norm": 1.2700323386046573, "learning_rate": 1.9864407647845626e-05, "loss": 0.6752325296401978, "step": 684 }, { "epoch": 0.20032168445679194, "grad_norm": 1.4583632577866723, "learning_rate": 1.9863613034027224e-05, "loss": 0.8509782552719116, "step": 685 }, { "epoch": 0.20061412487205732, "grad_norm": 1.2832087066986109, "learning_rate": 1.986281611464925e-05, "loss": 0.5573478937149048, "step": 686 }, { "epoch": 0.2009065652873227, "grad_norm": 1.4672386586086157, "learning_rate": 1.9862016889897976e-05, "loss": 0.8152032494544983, "step": 687 }, { "epoch": 0.20119900570258809, "grad_norm": 1.2878245307564982, "learning_rate": 1.9861215359960217e-05, "loss": 0.6346902847290039, "step": 688 }, { "epoch": 0.2014914461178535, "grad_norm": 1.3877152633732261, "learning_rate": 1.986041152502332e-05, "loss": 0.6608721017837524, "step": 689 }, { "epoch": 0.20178388653311888, "grad_norm": 1.5061562575575014, "learning_rate": 1.9859605385275188e-05, "loss": 0.7753713130950928, "step": 690 }, { "epoch": 0.20207632694838426, "grad_norm": 1.2917361787707549, "learning_rate": 1.9858796940904238e-05, "loss": 0.6747434139251709, "step": 691 }, { "epoch": 0.20236876736364964, "grad_norm": 1.4853341728710303, "learning_rate": 1.9857986192099446e-05, "loss": 0.7263737320899963, "step": 692 }, { "epoch": 0.20266120777891505, "grad_norm": 1.2072706917482865, "learning_rate": 1.9857173139050324e-05, "loss": 0.7910827994346619, "step": 693 }, { "epoch": 0.20295364819418044, "grad_norm": 1.479189890111576, "learning_rate": 1.9856357781946913e-05, "loss": 0.7245683670043945, "step": 694 }, { "epoch": 0.20324608860944582, "grad_norm": 1.146324196354459, "learning_rate": 1.9855540120979794e-05, "loss": 0.7440140247344971, "step": 695 }, { "epoch": 0.20353852902471123, "grad_norm": 1.823699641073059, "learning_rate": 1.9854720156340096e-05, "loss": 0.7485358715057373, "step": 696 }, { "epoch": 0.2038309694399766, "grad_norm": 1.3927934028554216, "learning_rate": 1.985389788821948e-05, "loss": 0.7658560872077942, "step": 697 }, { "epoch": 0.204123409855242, "grad_norm": 1.5269096149843602, "learning_rate": 1.9853073316810144e-05, "loss": 0.7366135120391846, "step": 698 }, { "epoch": 0.20441585027050738, "grad_norm": 1.2008198015347107, "learning_rate": 1.985224644230483e-05, "loss": 0.622355580329895, "step": 699 }, { "epoch": 0.2047082906857728, "grad_norm": 1.1924050316279482, "learning_rate": 1.985141726489681e-05, "loss": 0.6123125553131104, "step": 700 }, { "epoch": 0.20500073110103817, "grad_norm": 1.3537888634275872, "learning_rate": 1.9850585784779907e-05, "loss": 0.6768301725387573, "step": 701 }, { "epoch": 0.20529317151630355, "grad_norm": 1.2390814549745153, "learning_rate": 1.9849752002148465e-05, "loss": 0.6562466621398926, "step": 702 }, { "epoch": 0.20558561193156893, "grad_norm": 1.5562868949340583, "learning_rate": 1.984891591719738e-05, "loss": 0.7818280458450317, "step": 703 }, { "epoch": 0.20587805234683434, "grad_norm": 1.3407102317592055, "learning_rate": 1.9848077530122083e-05, "loss": 0.7144001722335815, "step": 704 }, { "epoch": 0.20617049276209973, "grad_norm": 1.1671039191657233, "learning_rate": 1.9847236841118537e-05, "loss": 0.700564980506897, "step": 705 }, { "epoch": 0.2064629331773651, "grad_norm": 1.3051666135645792, "learning_rate": 1.984639385038326e-05, "loss": 0.5933517217636108, "step": 706 }, { "epoch": 0.2067553735926305, "grad_norm": 1.2749925819283578, "learning_rate": 1.9845548558113278e-05, "loss": 0.6174886226654053, "step": 707 }, { "epoch": 0.2070478140078959, "grad_norm": 1.3159599421199524, "learning_rate": 1.9844700964506188e-05, "loss": 0.7241572141647339, "step": 708 }, { "epoch": 0.20734025442316129, "grad_norm": 1.227834334214839, "learning_rate": 1.9843851069760103e-05, "loss": 0.6620675325393677, "step": 709 }, { "epoch": 0.20763269483842667, "grad_norm": 1.3263327729601424, "learning_rate": 1.9842998874073682e-05, "loss": 0.6115273237228394, "step": 710 }, { "epoch": 0.20792513525369205, "grad_norm": 1.2961824988419117, "learning_rate": 1.984214437764612e-05, "loss": 0.6871848106384277, "step": 711 }, { "epoch": 0.20821757566895746, "grad_norm": 1.3134080639211354, "learning_rate": 1.9841287580677152e-05, "loss": 0.6887271404266357, "step": 712 }, { "epoch": 0.20851001608422284, "grad_norm": 1.4994035488495783, "learning_rate": 1.9840428483367046e-05, "loss": 0.8519056439399719, "step": 713 }, { "epoch": 0.20880245649948823, "grad_norm": 1.1754556134484295, "learning_rate": 1.9839567085916617e-05, "loss": 0.8168978691101074, "step": 714 }, { "epoch": 0.2090948969147536, "grad_norm": 1.3651960767502735, "learning_rate": 1.98387033885272e-05, "loss": 0.6565415859222412, "step": 715 }, { "epoch": 0.20938733733001902, "grad_norm": 1.3008644261492222, "learning_rate": 1.9837837391400697e-05, "loss": 0.7305471897125244, "step": 716 }, { "epoch": 0.2096797777452844, "grad_norm": 1.4799180289336367, "learning_rate": 1.9836969094739512e-05, "loss": 0.7676819562911987, "step": 717 }, { "epoch": 0.20997221816054978, "grad_norm": 1.8463650009400876, "learning_rate": 1.983609849874661e-05, "loss": 0.6519052982330322, "step": 718 }, { "epoch": 0.21026465857581517, "grad_norm": 1.2876599445155823, "learning_rate": 1.9835225603625488e-05, "loss": 0.6298089623451233, "step": 719 }, { "epoch": 0.21055709899108058, "grad_norm": 1.3906710149258825, "learning_rate": 1.9834350409580184e-05, "loss": 0.6384454369544983, "step": 720 }, { "epoch": 0.21084953940634596, "grad_norm": 1.1568343654967514, "learning_rate": 1.9833472916815264e-05, "loss": 0.6335986852645874, "step": 721 }, { "epoch": 0.21114197982161134, "grad_norm": 1.3831022749264381, "learning_rate": 1.983259312553584e-05, "loss": 0.6587867736816406, "step": 722 }, { "epoch": 0.21143442023687672, "grad_norm": 1.4202837808347009, "learning_rate": 1.9831711035947552e-05, "loss": 0.6884294748306274, "step": 723 }, { "epoch": 0.21172686065214213, "grad_norm": 1.3257507653834097, "learning_rate": 1.983082664825659e-05, "loss": 0.7094298601150513, "step": 724 }, { "epoch": 0.21201930106740752, "grad_norm": 1.2528953355997736, "learning_rate": 1.982993996266967e-05, "loss": 0.736876368522644, "step": 725 }, { "epoch": 0.2123117414826729, "grad_norm": 1.3690939580337487, "learning_rate": 1.9829050979394052e-05, "loss": 0.7802199125289917, "step": 726 }, { "epoch": 0.21260418189793828, "grad_norm": 1.1986325257536081, "learning_rate": 1.9828159698637527e-05, "loss": 0.602590799331665, "step": 727 }, { "epoch": 0.2128966223132037, "grad_norm": 1.2705657575851783, "learning_rate": 1.982726612060843e-05, "loss": 0.6855295896530151, "step": 728 }, { "epoch": 0.21318906272846908, "grad_norm": 1.3075577627317818, "learning_rate": 1.982637024551563e-05, "loss": 0.7174949645996094, "step": 729 }, { "epoch": 0.21348150314373446, "grad_norm": 1.404568014095412, "learning_rate": 1.9825472073568527e-05, "loss": 0.7002695798873901, "step": 730 }, { "epoch": 0.21377394355899984, "grad_norm": 1.3606210741478622, "learning_rate": 1.982457160497707e-05, "loss": 0.7256268262863159, "step": 731 }, { "epoch": 0.21406638397426525, "grad_norm": 1.6598974008247112, "learning_rate": 1.9823668839951732e-05, "loss": 0.8223557472229004, "step": 732 }, { "epoch": 0.21435882438953063, "grad_norm": 1.361285088499868, "learning_rate": 1.982276377870353e-05, "loss": 0.760543942451477, "step": 733 }, { "epoch": 0.21465126480479602, "grad_norm": 1.1189262427603888, "learning_rate": 1.982185642144402e-05, "loss": 0.5587141513824463, "step": 734 }, { "epoch": 0.21494370522006143, "grad_norm": 1.5077440828298982, "learning_rate": 1.9820946768385295e-05, "loss": 0.5775829553604126, "step": 735 }, { "epoch": 0.2152361456353268, "grad_norm": 1.2761529870001347, "learning_rate": 1.982003481973997e-05, "loss": 0.6654443144798279, "step": 736 }, { "epoch": 0.2155285860505922, "grad_norm": 1.5826837327135188, "learning_rate": 1.9819120575721212e-05, "loss": 0.7963466048240662, "step": 737 }, { "epoch": 0.21582102646585757, "grad_norm": 1.3788031698645051, "learning_rate": 1.981820403654272e-05, "loss": 0.6748678684234619, "step": 738 }, { "epoch": 0.21611346688112298, "grad_norm": 1.4155297807006182, "learning_rate": 1.9817285202418733e-05, "loss": 0.7041783928871155, "step": 739 }, { "epoch": 0.21640590729638837, "grad_norm": 1.5390789301713295, "learning_rate": 1.981636407356402e-05, "loss": 0.8008041381835938, "step": 740 }, { "epoch": 0.21669834771165375, "grad_norm": 1.4349473190399622, "learning_rate": 1.9815440650193887e-05, "loss": 0.6873682141304016, "step": 741 }, { "epoch": 0.21699078812691913, "grad_norm": 1.4041288075629241, "learning_rate": 1.981451493252418e-05, "loss": 0.6316831111907959, "step": 742 }, { "epoch": 0.21728322854218454, "grad_norm": 1.3377112960270812, "learning_rate": 1.9813586920771283e-05, "loss": 0.6481543779373169, "step": 743 }, { "epoch": 0.21757566895744992, "grad_norm": 1.2613104485847573, "learning_rate": 1.9812656615152112e-05, "loss": 0.6642731428146362, "step": 744 }, { "epoch": 0.2178681093727153, "grad_norm": 1.4870873028073741, "learning_rate": 1.9811724015884115e-05, "loss": 0.6769483089447021, "step": 745 }, { "epoch": 0.2181605497879807, "grad_norm": 1.4050593471281791, "learning_rate": 1.981078912318529e-05, "loss": 0.6397525072097778, "step": 746 }, { "epoch": 0.2184529902032461, "grad_norm": 1.170420294448055, "learning_rate": 1.9809851937274154e-05, "loss": 0.4963756203651428, "step": 747 }, { "epoch": 0.21874543061851148, "grad_norm": 1.6049508757911466, "learning_rate": 1.9808912458369774e-05, "loss": 0.7352936267852783, "step": 748 }, { "epoch": 0.21903787103377687, "grad_norm": 1.3947943752325116, "learning_rate": 1.980797068669175e-05, "loss": 0.7177609205245972, "step": 749 }, { "epoch": 0.21933031144904225, "grad_norm": 1.2819324457206713, "learning_rate": 1.980702662246021e-05, "loss": 0.76703941822052, "step": 750 }, { "epoch": 0.21962275186430766, "grad_norm": 1.4885423867402507, "learning_rate": 1.980608026589582e-05, "loss": 0.8591324090957642, "step": 751 }, { "epoch": 0.21991519227957304, "grad_norm": 1.1920075550965599, "learning_rate": 1.9805131617219792e-05, "loss": 0.6216185092926025, "step": 752 }, { "epoch": 0.22020763269483842, "grad_norm": 1.359972752643247, "learning_rate": 1.9804180676653867e-05, "loss": 0.6067323684692383, "step": 753 }, { "epoch": 0.2205000731101038, "grad_norm": 1.329886038437426, "learning_rate": 1.9803227444420316e-05, "loss": 0.5832521319389343, "step": 754 }, { "epoch": 0.22079251352536922, "grad_norm": 1.3701144460168073, "learning_rate": 1.9802271920741957e-05, "loss": 0.6181083917617798, "step": 755 }, { "epoch": 0.2210849539406346, "grad_norm": 1.6323941211416428, "learning_rate": 1.9801314105842135e-05, "loss": 0.614393949508667, "step": 756 }, { "epoch": 0.22137739435589998, "grad_norm": 1.4783150089736257, "learning_rate": 1.980035399994473e-05, "loss": 0.7598476409912109, "step": 757 }, { "epoch": 0.22166983477116536, "grad_norm": 1.3445249209174277, "learning_rate": 1.979939160327417e-05, "loss": 0.7185830473899841, "step": 758 }, { "epoch": 0.22196227518643077, "grad_norm": 1.2604381133839313, "learning_rate": 1.9798426916055403e-05, "loss": 0.6672362089157104, "step": 759 }, { "epoch": 0.22225471560169616, "grad_norm": 1.323605486489286, "learning_rate": 1.9797459938513918e-05, "loss": 0.60948646068573, "step": 760 }, { "epoch": 0.22254715601696154, "grad_norm": 1.376081699980774, "learning_rate": 1.979649067087574e-05, "loss": 0.6073893308639526, "step": 761 }, { "epoch": 0.22283959643222692, "grad_norm": 2.11374968768554, "learning_rate": 1.9795519113367434e-05, "loss": 0.7521525025367737, "step": 762 }, { "epoch": 0.22313203684749233, "grad_norm": 1.3631196959673009, "learning_rate": 1.979454526621609e-05, "loss": 0.7281486988067627, "step": 763 }, { "epoch": 0.22342447726275771, "grad_norm": 1.3466801989985047, "learning_rate": 1.9793569129649345e-05, "loss": 0.5628652572631836, "step": 764 }, { "epoch": 0.2237169176780231, "grad_norm": 1.7030188389110175, "learning_rate": 1.9792590703895364e-05, "loss": 0.9115084409713745, "step": 765 }, { "epoch": 0.22400935809328848, "grad_norm": 1.1906430527809846, "learning_rate": 1.9791609989182843e-05, "loss": 0.5793902277946472, "step": 766 }, { "epoch": 0.2243017985085539, "grad_norm": 1.319680929079464, "learning_rate": 1.979062698574102e-05, "loss": 0.5811150074005127, "step": 767 }, { "epoch": 0.22459423892381927, "grad_norm": 1.8337754364313175, "learning_rate": 1.978964169379967e-05, "loss": 0.7450643181800842, "step": 768 }, { "epoch": 0.22488667933908466, "grad_norm": 1.2696945630714354, "learning_rate": 1.9788654113589093e-05, "loss": 0.6617515087127686, "step": 769 }, { "epoch": 0.22517911975435004, "grad_norm": 1.1685310150494228, "learning_rate": 1.9787664245340137e-05, "loss": 0.6240406036376953, "step": 770 }, { "epoch": 0.22547156016961545, "grad_norm": 1.450209328719988, "learning_rate": 1.978667208928417e-05, "loss": 0.694688081741333, "step": 771 }, { "epoch": 0.22576400058488083, "grad_norm": 1.274649499261431, "learning_rate": 1.9785677645653107e-05, "loss": 0.6855190396308899, "step": 772 }, { "epoch": 0.2260564410001462, "grad_norm": 1.5531275718881066, "learning_rate": 1.978468091467939e-05, "loss": 0.8132567405700684, "step": 773 }, { "epoch": 0.22634888141541162, "grad_norm": 1.2819374084058084, "learning_rate": 1.9783681896596006e-05, "loss": 0.7011039853096008, "step": 774 }, { "epoch": 0.226641321830677, "grad_norm": 1.2317633693628418, "learning_rate": 1.9782680591636462e-05, "loss": 0.5754199028015137, "step": 775 }, { "epoch": 0.2269337622459424, "grad_norm": 1.3342396229289735, "learning_rate": 1.9781677000034807e-05, "loss": 0.7518784403800964, "step": 776 }, { "epoch": 0.22722620266120777, "grad_norm": 1.4619385156109748, "learning_rate": 1.978067112202563e-05, "loss": 0.6802738904953003, "step": 777 }, { "epoch": 0.22751864307647318, "grad_norm": 1.2836639966818497, "learning_rate": 1.9779662957844046e-05, "loss": 0.7667055726051331, "step": 778 }, { "epoch": 0.22781108349173856, "grad_norm": 1.3402387686228199, "learning_rate": 1.9778652507725704e-05, "loss": 0.7590975165367126, "step": 779 }, { "epoch": 0.22810352390700395, "grad_norm": 1.5322182562597366, "learning_rate": 1.9777639771906795e-05, "loss": 0.8009685277938843, "step": 780 }, { "epoch": 0.22839596432226933, "grad_norm": 1.2184372022517955, "learning_rate": 1.977662475062404e-05, "loss": 0.6094385981559753, "step": 781 }, { "epoch": 0.22868840473753474, "grad_norm": 1.2258891813878965, "learning_rate": 1.977560744411469e-05, "loss": 0.5919946432113647, "step": 782 }, { "epoch": 0.22898084515280012, "grad_norm": 1.3994922066796667, "learning_rate": 1.9774587852616537e-05, "loss": 0.7616838216781616, "step": 783 }, { "epoch": 0.2292732855680655, "grad_norm": 1.0864449553171927, "learning_rate": 1.9773565976367903e-05, "loss": 0.5107603073120117, "step": 784 }, { "epoch": 0.2295657259833309, "grad_norm": 1.3785741559157736, "learning_rate": 1.9772541815607645e-05, "loss": 0.6819792985916138, "step": 785 }, { "epoch": 0.2298581663985963, "grad_norm": 1.3095462010721952, "learning_rate": 1.977151537057516e-05, "loss": 0.748264729976654, "step": 786 }, { "epoch": 0.23015060681386168, "grad_norm": 1.511078591377817, "learning_rate": 1.977048664151037e-05, "loss": 0.7341534495353699, "step": 787 }, { "epoch": 0.23044304722912706, "grad_norm": 1.3481462417331131, "learning_rate": 1.976945562865373e-05, "loss": 0.569247841835022, "step": 788 }, { "epoch": 0.23073548764439245, "grad_norm": 1.4792545387125078, "learning_rate": 1.9768422332246233e-05, "loss": 0.7003188133239746, "step": 789 }, { "epoch": 0.23102792805965786, "grad_norm": 1.222254549739519, "learning_rate": 1.9767386752529415e-05, "loss": 0.6484041810035706, "step": 790 }, { "epoch": 0.23132036847492324, "grad_norm": 1.2921197831934208, "learning_rate": 1.9766348889745324e-05, "loss": 0.6635721921920776, "step": 791 }, { "epoch": 0.23161280889018862, "grad_norm": 1.3606759597173597, "learning_rate": 1.9765308744136568e-05, "loss": 0.5855914354324341, "step": 792 }, { "epoch": 0.231905249305454, "grad_norm": 1.3590534475124305, "learning_rate": 1.976426631594626e-05, "loss": 0.7606059312820435, "step": 793 }, { "epoch": 0.2321976897207194, "grad_norm": 1.399907486961256, "learning_rate": 1.976322160541807e-05, "loss": 0.7080718278884888, "step": 794 }, { "epoch": 0.2324901301359848, "grad_norm": 1.6372996876909576, "learning_rate": 1.9762174612796195e-05, "loss": 0.8838162422180176, "step": 795 }, { "epoch": 0.23278257055125018, "grad_norm": 1.1906217629409164, "learning_rate": 1.9761125338325357e-05, "loss": 0.5776950120925903, "step": 796 }, { "epoch": 0.23307501096651556, "grad_norm": 1.4075761903811832, "learning_rate": 1.9760073782250817e-05, "loss": 0.7455854415893555, "step": 797 }, { "epoch": 0.23336745138178097, "grad_norm": 1.4778525028622385, "learning_rate": 1.9759019944818375e-05, "loss": 0.7160001993179321, "step": 798 }, { "epoch": 0.23365989179704635, "grad_norm": 1.2680712563874137, "learning_rate": 1.9757963826274357e-05, "loss": 0.6282311081886292, "step": 799 }, { "epoch": 0.23395233221231174, "grad_norm": 1.3617325718771658, "learning_rate": 1.9756905426865626e-05, "loss": 0.6479916572570801, "step": 800 }, { "epoch": 0.23424477262757712, "grad_norm": 1.2789508587545713, "learning_rate": 1.9755844746839573e-05, "loss": 0.6519639492034912, "step": 801 }, { "epoch": 0.23453721304284253, "grad_norm": 1.4374021901805083, "learning_rate": 1.9754781786444122e-05, "loss": 0.5591464638710022, "step": 802 }, { "epoch": 0.2348296534581079, "grad_norm": 1.6094479116430809, "learning_rate": 1.9753716545927745e-05, "loss": 0.6378511190414429, "step": 803 }, { "epoch": 0.2351220938733733, "grad_norm": 1.593476138868701, "learning_rate": 1.9752649025539424e-05, "loss": 0.7932485342025757, "step": 804 }, { "epoch": 0.23541453428863868, "grad_norm": 1.327032855057245, "learning_rate": 1.9751579225528694e-05, "loss": 0.7344592809677124, "step": 805 }, { "epoch": 0.2357069747039041, "grad_norm": 1.5060138108990804, "learning_rate": 1.975050714614561e-05, "loss": 0.7879096269607544, "step": 806 }, { "epoch": 0.23599941511916947, "grad_norm": 1.31391427286964, "learning_rate": 1.9749432787640764e-05, "loss": 0.6428436040878296, "step": 807 }, { "epoch": 0.23629185553443485, "grad_norm": 1.1924129057081494, "learning_rate": 1.9748356150265283e-05, "loss": 0.7018194198608398, "step": 808 }, { "epoch": 0.23658429594970024, "grad_norm": 1.3487665777693398, "learning_rate": 1.974727723427082e-05, "loss": 0.7696131467819214, "step": 809 }, { "epoch": 0.23687673636496565, "grad_norm": 1.2806200429683234, "learning_rate": 1.974619603990957e-05, "loss": 0.6429424285888672, "step": 810 }, { "epoch": 0.23716917678023103, "grad_norm": 1.4197164517856635, "learning_rate": 1.9745112567434254e-05, "loss": 0.7205626964569092, "step": 811 }, { "epoch": 0.2374616171954964, "grad_norm": 1.206628595880062, "learning_rate": 1.9744026817098122e-05, "loss": 0.7018989324569702, "step": 812 }, { "epoch": 0.23775405761076182, "grad_norm": 1.4562632106002198, "learning_rate": 1.974293878915497e-05, "loss": 0.6861958503723145, "step": 813 }, { "epoch": 0.2380464980260272, "grad_norm": 1.8277672251442496, "learning_rate": 1.9741848483859117e-05, "loss": 0.687503457069397, "step": 814 }, { "epoch": 0.23833893844129259, "grad_norm": 1.6702364448324796, "learning_rate": 1.9740755901465408e-05, "loss": 0.7808526754379272, "step": 815 }, { "epoch": 0.23863137885655797, "grad_norm": 1.4777579354772585, "learning_rate": 1.973966104222923e-05, "loss": 0.7387286424636841, "step": 816 }, { "epoch": 0.23892381927182338, "grad_norm": 1.2761337726208828, "learning_rate": 1.9738563906406508e-05, "loss": 0.6262110471725464, "step": 817 }, { "epoch": 0.23921625968708876, "grad_norm": 1.2308979686961945, "learning_rate": 1.973746449425368e-05, "loss": 0.6618830561637878, "step": 818 }, { "epoch": 0.23950870010235414, "grad_norm": 1.3525742869997646, "learning_rate": 1.9736362806027732e-05, "loss": 0.5866184234619141, "step": 819 }, { "epoch": 0.23980114051761953, "grad_norm": 1.1916120410649227, "learning_rate": 1.9735258841986175e-05, "loss": 0.6413314342498779, "step": 820 }, { "epoch": 0.24009358093288494, "grad_norm": 1.3855684564301443, "learning_rate": 1.9734152602387054e-05, "loss": 0.6125906109809875, "step": 821 }, { "epoch": 0.24038602134815032, "grad_norm": 1.3708182915073268, "learning_rate": 1.973304408748895e-05, "loss": 0.6128122806549072, "step": 822 }, { "epoch": 0.2406784617634157, "grad_norm": 1.4552398411515748, "learning_rate": 1.973193329755097e-05, "loss": 0.7763051986694336, "step": 823 }, { "epoch": 0.24097090217868108, "grad_norm": 1.406068384249821, "learning_rate": 1.9730820232832747e-05, "loss": 0.7187550067901611, "step": 824 }, { "epoch": 0.2412633425939465, "grad_norm": 1.4089612736012989, "learning_rate": 1.972970489359446e-05, "loss": 0.6564748287200928, "step": 825 }, { "epoch": 0.24155578300921188, "grad_norm": 1.2962838731212396, "learning_rate": 1.9728587280096815e-05, "loss": 0.6573271751403809, "step": 826 }, { "epoch": 0.24184822342447726, "grad_norm": 1.606482466732529, "learning_rate": 1.9727467392601042e-05, "loss": 0.8032153844833374, "step": 827 }, { "epoch": 0.24214066383974264, "grad_norm": 1.344534982986645, "learning_rate": 1.972634523136891e-05, "loss": 0.6781449913978577, "step": 828 }, { "epoch": 0.24243310425500805, "grad_norm": 1.3970734980370678, "learning_rate": 1.972522079666272e-05, "loss": 0.580757737159729, "step": 829 }, { "epoch": 0.24272554467027344, "grad_norm": 1.4569992070347761, "learning_rate": 1.97240940887453e-05, "loss": 0.626894474029541, "step": 830 }, { "epoch": 0.24301798508553882, "grad_norm": 1.4885978649776115, "learning_rate": 1.9722965107880005e-05, "loss": 0.8188163042068481, "step": 831 }, { "epoch": 0.2433104255008042, "grad_norm": 1.4514623765445114, "learning_rate": 1.9721833854330734e-05, "loss": 0.6943579912185669, "step": 832 }, { "epoch": 0.2436028659160696, "grad_norm": 1.3452906489662066, "learning_rate": 1.972070032836191e-05, "loss": 0.6177504658699036, "step": 833 }, { "epoch": 0.243895306331335, "grad_norm": 1.3249219466208975, "learning_rate": 1.971956453023849e-05, "loss": 0.683998703956604, "step": 834 }, { "epoch": 0.24418774674660038, "grad_norm": 1.3523687150823345, "learning_rate": 1.9718426460225952e-05, "loss": 0.77602219581604, "step": 835 }, { "epoch": 0.24448018716186576, "grad_norm": 1.0190390519787025, "learning_rate": 1.971728611859032e-05, "loss": 0.4930742383003235, "step": 836 }, { "epoch": 0.24477262757713117, "grad_norm": 1.057766741950331, "learning_rate": 1.971614350559814e-05, "loss": 0.634628415107727, "step": 837 }, { "epoch": 0.24506506799239655, "grad_norm": 1.4273024070967653, "learning_rate": 1.971499862151649e-05, "loss": 0.6439167857170105, "step": 838 }, { "epoch": 0.24535750840766193, "grad_norm": 1.1385728991135244, "learning_rate": 1.9713851466612982e-05, "loss": 0.701258659362793, "step": 839 }, { "epoch": 0.24564994882292732, "grad_norm": 1.4590112387376561, "learning_rate": 1.9712702041155753e-05, "loss": 0.6488544344902039, "step": 840 }, { "epoch": 0.24594238923819273, "grad_norm": 1.3405708553224296, "learning_rate": 1.9711550345413476e-05, "loss": 0.6962910890579224, "step": 841 }, { "epoch": 0.2462348296534581, "grad_norm": 1.1939053963741824, "learning_rate": 1.9710396379655355e-05, "loss": 0.6617723703384399, "step": 842 }, { "epoch": 0.2465272700687235, "grad_norm": 1.2279058278823862, "learning_rate": 1.970924014415112e-05, "loss": 0.7152801752090454, "step": 843 }, { "epoch": 0.24681971048398887, "grad_norm": 1.2796222731345095, "learning_rate": 1.9708081639171035e-05, "loss": 0.6712393760681152, "step": 844 }, { "epoch": 0.24711215089925428, "grad_norm": 1.3941735155074029, "learning_rate": 1.970692086498589e-05, "loss": 0.8413758277893066, "step": 845 }, { "epoch": 0.24740459131451967, "grad_norm": 1.423836225011119, "learning_rate": 1.9705757821867015e-05, "loss": 0.6460679769515991, "step": 846 }, { "epoch": 0.24769703172978505, "grad_norm": 1.3704721229511874, "learning_rate": 1.970459251008626e-05, "loss": 0.759244441986084, "step": 847 }, { "epoch": 0.24798947214505043, "grad_norm": 1.2356631241001201, "learning_rate": 1.970342492991601e-05, "loss": 0.8148110508918762, "step": 848 }, { "epoch": 0.24828191256031584, "grad_norm": 1.2587770996787473, "learning_rate": 1.970225508162918e-05, "loss": 0.6620084047317505, "step": 849 }, { "epoch": 0.24857435297558123, "grad_norm": 1.451838551232366, "learning_rate": 1.9701082965499217e-05, "loss": 0.7090305089950562, "step": 850 }, { "epoch": 0.2488667933908466, "grad_norm": 1.2074340737341804, "learning_rate": 1.9699908581800094e-05, "loss": 0.6846730709075928, "step": 851 }, { "epoch": 0.24915923380611202, "grad_norm": 1.0752757256209107, "learning_rate": 1.9698731930806315e-05, "loss": 0.5183212757110596, "step": 852 }, { "epoch": 0.2494516742213774, "grad_norm": 1.4176078828661092, "learning_rate": 1.9697553012792915e-05, "loss": 0.6913097500801086, "step": 853 }, { "epoch": 0.24974411463664278, "grad_norm": 1.4996885245263052, "learning_rate": 1.9696371828035466e-05, "loss": 0.7896280884742737, "step": 854 }, { "epoch": 0.2500365550519082, "grad_norm": 1.4718644942105623, "learning_rate": 1.9695188376810055e-05, "loss": 0.947577714920044, "step": 855 }, { "epoch": 0.2503289954671736, "grad_norm": 1.3825164821538705, "learning_rate": 1.9694002659393306e-05, "loss": 0.7772419452667236, "step": 856 }, { "epoch": 0.25062143588243896, "grad_norm": 1.3624521016930335, "learning_rate": 1.9692814676062376e-05, "loss": 0.6255912780761719, "step": 857 }, { "epoch": 0.25091387629770434, "grad_norm": 1.3319834146029552, "learning_rate": 1.969162442709495e-05, "loss": 0.6572105884552002, "step": 858 }, { "epoch": 0.2512063167129697, "grad_norm": 1.3718275193420901, "learning_rate": 1.969043191276924e-05, "loss": 0.6387436389923096, "step": 859 }, { "epoch": 0.2514987571282351, "grad_norm": 1.1976239787141296, "learning_rate": 1.968923713336399e-05, "loss": 0.9180483222007751, "step": 860 }, { "epoch": 0.2517911975435005, "grad_norm": 1.211847411431562, "learning_rate": 1.9688040089158473e-05, "loss": 0.6830536127090454, "step": 861 }, { "epoch": 0.2520836379587659, "grad_norm": 1.6904119232689327, "learning_rate": 1.9686840780432487e-05, "loss": 0.9061588644981384, "step": 862 }, { "epoch": 0.2523760783740313, "grad_norm": 1.157670921080695, "learning_rate": 1.9685639207466365e-05, "loss": 0.558010458946228, "step": 863 }, { "epoch": 0.2526685187892967, "grad_norm": 1.1825470022948923, "learning_rate": 1.968443537054097e-05, "loss": 0.6788249611854553, "step": 864 }, { "epoch": 0.2529609592045621, "grad_norm": 1.2105730438992965, "learning_rate": 1.968322926993769e-05, "loss": 0.576469898223877, "step": 865 }, { "epoch": 0.25325339961982746, "grad_norm": 1.2982512656817862, "learning_rate": 1.9682020905938438e-05, "loss": 0.6994123458862305, "step": 866 }, { "epoch": 0.25354584003509284, "grad_norm": 1.206872992638966, "learning_rate": 1.9680810278825672e-05, "loss": 0.6929521560668945, "step": 867 }, { "epoch": 0.2538382804503582, "grad_norm": 1.273656030058159, "learning_rate": 1.9679597388882363e-05, "loss": 0.7596743106842041, "step": 868 }, { "epoch": 0.2541307208656236, "grad_norm": 1.4805809886864818, "learning_rate": 1.9678382236392013e-05, "loss": 0.7925904989242554, "step": 869 }, { "epoch": 0.25442316128088904, "grad_norm": 1.3335550122348163, "learning_rate": 1.9677164821638666e-05, "loss": 0.722467839717865, "step": 870 }, { "epoch": 0.2547156016961544, "grad_norm": 1.3131624182400288, "learning_rate": 1.9675945144906882e-05, "loss": 0.7165451049804688, "step": 871 }, { "epoch": 0.2550080421114198, "grad_norm": 1.1797512350865442, "learning_rate": 1.9674723206481746e-05, "loss": 0.5897061824798584, "step": 872 }, { "epoch": 0.2553004825266852, "grad_norm": 1.2365962649439657, "learning_rate": 1.9673499006648885e-05, "loss": 0.6634531021118164, "step": 873 }, { "epoch": 0.2555929229419506, "grad_norm": 1.3214235822507945, "learning_rate": 1.9672272545694445e-05, "loss": 0.7237584590911865, "step": 874 }, { "epoch": 0.25588536335721596, "grad_norm": 1.4848759223566366, "learning_rate": 1.967104382390511e-05, "loss": 0.6382388472557068, "step": 875 }, { "epoch": 0.25617780377248134, "grad_norm": 1.31447030866248, "learning_rate": 1.966981284156808e-05, "loss": 0.6788768768310547, "step": 876 }, { "epoch": 0.2564702441877467, "grad_norm": 1.3072783419197107, "learning_rate": 1.966857959897109e-05, "loss": 0.6347095966339111, "step": 877 }, { "epoch": 0.25676268460301216, "grad_norm": 1.4344629064681063, "learning_rate": 1.9667344096402406e-05, "loss": 0.8896903991699219, "step": 878 }, { "epoch": 0.25705512501827754, "grad_norm": 1.3876445939749689, "learning_rate": 1.966610633415082e-05, "loss": 0.71473228931427, "step": 879 }, { "epoch": 0.2573475654335429, "grad_norm": 1.3302375445053003, "learning_rate": 1.9664866312505646e-05, "loss": 0.7311601638793945, "step": 880 }, { "epoch": 0.2576400058488083, "grad_norm": 1.2472942559074918, "learning_rate": 1.9663624031756737e-05, "loss": 0.6186199188232422, "step": 881 }, { "epoch": 0.2579324462640737, "grad_norm": 1.4896774549089442, "learning_rate": 1.9662379492194467e-05, "loss": 0.8059204816818237, "step": 882 }, { "epoch": 0.25822488667933907, "grad_norm": 1.4468929069066396, "learning_rate": 1.9661132694109736e-05, "loss": 0.6065236330032349, "step": 883 }, { "epoch": 0.25851732709460445, "grad_norm": 1.182060018600662, "learning_rate": 1.965988363779398e-05, "loss": 0.6491106152534485, "step": 884 }, { "epoch": 0.25880976750986984, "grad_norm": 1.197300798410388, "learning_rate": 1.9658632323539158e-05, "loss": 0.526267945766449, "step": 885 }, { "epoch": 0.2591022079251353, "grad_norm": 1.5008074138248908, "learning_rate": 1.9657378751637755e-05, "loss": 0.812760591506958, "step": 886 }, { "epoch": 0.25939464834040066, "grad_norm": 1.807239371921464, "learning_rate": 1.9656122922382786e-05, "loss": 0.7957908511161804, "step": 887 }, { "epoch": 0.25968708875566604, "grad_norm": 1.3552357306732934, "learning_rate": 1.9654864836067796e-05, "loss": 0.7426323890686035, "step": 888 }, { "epoch": 0.2599795291709314, "grad_norm": 1.3206271267013228, "learning_rate": 1.9653604492986852e-05, "loss": 0.602961540222168, "step": 889 }, { "epoch": 0.2602719695861968, "grad_norm": 1.7789592821205134, "learning_rate": 1.965234189343455e-05, "loss": 0.8706510066986084, "step": 890 }, { "epoch": 0.2605644100014622, "grad_norm": 1.3042391493572836, "learning_rate": 1.965107703770602e-05, "loss": 0.6245810985565186, "step": 891 }, { "epoch": 0.26085685041672757, "grad_norm": 1.3389608750174764, "learning_rate": 1.964980992609691e-05, "loss": 0.7455421686172485, "step": 892 }, { "epoch": 0.261149290831993, "grad_norm": 1.3769047718413097, "learning_rate": 1.9648540558903404e-05, "loss": 0.6917043328285217, "step": 893 }, { "epoch": 0.2614417312472584, "grad_norm": 1.5543661242785587, "learning_rate": 1.9647268936422204e-05, "loss": 0.6488040685653687, "step": 894 }, { "epoch": 0.2617341716625238, "grad_norm": 1.4168880936407573, "learning_rate": 1.964599505895055e-05, "loss": 0.7416148781776428, "step": 895 }, { "epoch": 0.26202661207778916, "grad_norm": 1.2398123962846468, "learning_rate": 1.9644718926786196e-05, "loss": 0.7012773156166077, "step": 896 }, { "epoch": 0.26231905249305454, "grad_norm": 1.4024640685787384, "learning_rate": 1.9643440540227438e-05, "loss": 0.8644432425498962, "step": 897 }, { "epoch": 0.2626114929083199, "grad_norm": 1.2155057674795815, "learning_rate": 1.9642159899573084e-05, "loss": 0.614842414855957, "step": 898 }, { "epoch": 0.2629039333235853, "grad_norm": 1.406064497865486, "learning_rate": 1.964087700512248e-05, "loss": 0.7794508337974548, "step": 899 }, { "epoch": 0.2631963737388507, "grad_norm": 1.3041032890013364, "learning_rate": 1.9639591857175492e-05, "loss": 0.49217259883880615, "step": 900 }, { "epoch": 0.2634888141541161, "grad_norm": 2.5300379427879656, "learning_rate": 1.9638304456032516e-05, "loss": 0.6319605708122253, "step": 901 }, { "epoch": 0.2637812545693815, "grad_norm": 1.2937854520821135, "learning_rate": 1.9637014801994478e-05, "loss": 0.6066744327545166, "step": 902 }, { "epoch": 0.2640736949846469, "grad_norm": 1.3364560601793205, "learning_rate": 1.9635722895362824e-05, "loss": 0.7529127597808838, "step": 903 }, { "epoch": 0.26436613539991227, "grad_norm": 1.1766314649269587, "learning_rate": 1.9634428736439524e-05, "loss": 0.6026389598846436, "step": 904 }, { "epoch": 0.26465857581517765, "grad_norm": 1.1341480559887087, "learning_rate": 1.9633132325527092e-05, "loss": 0.6227229237556458, "step": 905 }, { "epoch": 0.26495101623044304, "grad_norm": 1.0934147682033295, "learning_rate": 1.9631833662928548e-05, "loss": 0.5959285497665405, "step": 906 }, { "epoch": 0.2652434566457084, "grad_norm": 1.5332323248713289, "learning_rate": 1.9630532748947445e-05, "loss": 0.8104684352874756, "step": 907 }, { "epoch": 0.2655358970609738, "grad_norm": 1.4286964634802555, "learning_rate": 1.962922958388787e-05, "loss": 0.6722325682640076, "step": 908 }, { "epoch": 0.26582833747623924, "grad_norm": 1.3146328085881052, "learning_rate": 1.962792416805442e-05, "loss": 0.5996029376983643, "step": 909 }, { "epoch": 0.2661207778915046, "grad_norm": 1.2576705371159294, "learning_rate": 1.962661650175224e-05, "loss": 0.7214776873588562, "step": 910 }, { "epoch": 0.26641321830677, "grad_norm": 1.3644451050997106, "learning_rate": 1.9625306585286986e-05, "loss": 0.6833420991897583, "step": 911 }, { "epoch": 0.2667056587220354, "grad_norm": 1.3539788924921423, "learning_rate": 1.9623994418964834e-05, "loss": 0.5571368336677551, "step": 912 }, { "epoch": 0.26699809913730077, "grad_norm": 1.3710487138213245, "learning_rate": 1.9622680003092503e-05, "loss": 0.6748533248901367, "step": 913 }, { "epoch": 0.26729053955256615, "grad_norm": 1.3715994474814863, "learning_rate": 1.9621363337977232e-05, "loss": 0.6681679487228394, "step": 914 }, { "epoch": 0.26758297996783154, "grad_norm": 1.482670676536411, "learning_rate": 1.9620044423926775e-05, "loss": 0.6839786767959595, "step": 915 }, { "epoch": 0.2678754203830969, "grad_norm": 1.4250296018843953, "learning_rate": 1.961872326124943e-05, "loss": 0.7481753826141357, "step": 916 }, { "epoch": 0.26816786079836236, "grad_norm": 1.2167024955211783, "learning_rate": 1.9617399850254e-05, "loss": 0.6044093370437622, "step": 917 }, { "epoch": 0.26846030121362774, "grad_norm": 1.284073365031053, "learning_rate": 1.9616074191249833e-05, "loss": 0.6399786472320557, "step": 918 }, { "epoch": 0.2687527416288931, "grad_norm": 1.4810486497659208, "learning_rate": 1.961474628454679e-05, "loss": 0.6769053339958191, "step": 919 }, { "epoch": 0.2690451820441585, "grad_norm": 1.3650368498715015, "learning_rate": 1.961341613045526e-05, "loss": 0.7508189678192139, "step": 920 }, { "epoch": 0.2693376224594239, "grad_norm": 1.3260194970823536, "learning_rate": 1.9612083729286164e-05, "loss": 0.728675365447998, "step": 921 }, { "epoch": 0.26963006287468927, "grad_norm": 1.241243201070507, "learning_rate": 1.9610749081350934e-05, "loss": 0.6886277794837952, "step": 922 }, { "epoch": 0.26992250328995465, "grad_norm": 1.272552251820391, "learning_rate": 1.9609412186961542e-05, "loss": 0.6756877899169922, "step": 923 }, { "epoch": 0.27021494370522003, "grad_norm": 1.3464083414999921, "learning_rate": 1.960807304643048e-05, "loss": 0.6761744022369385, "step": 924 }, { "epoch": 0.2705073841204855, "grad_norm": 1.3141872927798783, "learning_rate": 1.9606731660070758e-05, "loss": 0.6475736498832703, "step": 925 }, { "epoch": 0.27079982453575085, "grad_norm": 1.2576667239396297, "learning_rate": 1.9605388028195922e-05, "loss": 0.6169984936714172, "step": 926 }, { "epoch": 0.27109226495101624, "grad_norm": 1.36667119537221, "learning_rate": 1.9604042151120035e-05, "loss": 0.6411685943603516, "step": 927 }, { "epoch": 0.2713847053662816, "grad_norm": 1.203794827188605, "learning_rate": 1.960269402915769e-05, "loss": 0.6802625060081482, "step": 928 }, { "epoch": 0.271677145781547, "grad_norm": 1.1204382547238934, "learning_rate": 1.9601343662624e-05, "loss": 0.6321320533752441, "step": 929 }, { "epoch": 0.2719695861968124, "grad_norm": 1.1836254946940896, "learning_rate": 1.959999105183461e-05, "loss": 0.6242578029632568, "step": 930 }, { "epoch": 0.27226202661207777, "grad_norm": 1.3574626937776866, "learning_rate": 1.9598636197105672e-05, "loss": 0.8106271624565125, "step": 931 }, { "epoch": 0.2725544670273432, "grad_norm": 1.3336233570386715, "learning_rate": 1.9597279098753893e-05, "loss": 0.6810879707336426, "step": 932 }, { "epoch": 0.2728469074426086, "grad_norm": 1.4182604377271, "learning_rate": 1.959591975709647e-05, "loss": 0.6121781468391418, "step": 933 }, { "epoch": 0.27313934785787397, "grad_norm": 1.3855646528211634, "learning_rate": 1.9594558172451153e-05, "loss": 0.7347930669784546, "step": 934 }, { "epoch": 0.27343178827313935, "grad_norm": 1.7726573891466724, "learning_rate": 1.9593194345136196e-05, "loss": 0.8280940651893616, "step": 935 }, { "epoch": 0.27372422868840474, "grad_norm": 1.7069126445705718, "learning_rate": 1.959182827547039e-05, "loss": 0.8171218633651733, "step": 936 }, { "epoch": 0.2740166691036701, "grad_norm": 1.5519639216005559, "learning_rate": 1.9590459963773043e-05, "loss": 0.7350337505340576, "step": 937 }, { "epoch": 0.2743091095189355, "grad_norm": 1.2380635233009907, "learning_rate": 1.9589089410363992e-05, "loss": 0.5648026466369629, "step": 938 }, { "epoch": 0.2746015499342009, "grad_norm": 1.2184482229154892, "learning_rate": 1.9587716615563592e-05, "loss": 0.630626916885376, "step": 939 }, { "epoch": 0.2748939903494663, "grad_norm": 1.247434869071023, "learning_rate": 1.9586341579692728e-05, "loss": 0.658649206161499, "step": 940 }, { "epoch": 0.2751864307647317, "grad_norm": 1.3583264773002954, "learning_rate": 1.9584964303072804e-05, "loss": 0.6938339471817017, "step": 941 }, { "epoch": 0.2754788711799971, "grad_norm": 1.2844871691004516, "learning_rate": 1.9583584786025755e-05, "loss": 0.7124238014221191, "step": 942 }, { "epoch": 0.27577131159526247, "grad_norm": 1.295461976555009, "learning_rate": 1.9582203028874027e-05, "loss": 0.5879669189453125, "step": 943 }, { "epoch": 0.27606375201052785, "grad_norm": 1.3092326597229536, "learning_rate": 1.9580819031940605e-05, "loss": 0.6169895529747009, "step": 944 }, { "epoch": 0.27635619242579323, "grad_norm": 1.3408083006486937, "learning_rate": 1.9579432795548986e-05, "loss": 0.6367429494857788, "step": 945 }, { "epoch": 0.2766486328410586, "grad_norm": 1.294470969807804, "learning_rate": 1.9578044320023195e-05, "loss": 0.6198331117630005, "step": 946 }, { "epoch": 0.276941073256324, "grad_norm": 1.2934388501492589, "learning_rate": 1.9576653605687782e-05, "loss": 0.6731230616569519, "step": 947 }, { "epoch": 0.27723351367158944, "grad_norm": 1.3743119206413423, "learning_rate": 1.957526065286781e-05, "loss": 0.7185516953468323, "step": 948 }, { "epoch": 0.2775259540868548, "grad_norm": 1.5124791251983178, "learning_rate": 1.9573865461888882e-05, "loss": 0.7362357378005981, "step": 949 }, { "epoch": 0.2778183945021202, "grad_norm": 1.481999625276378, "learning_rate": 1.9572468033077113e-05, "loss": 0.7051525712013245, "step": 950 }, { "epoch": 0.2781108349173856, "grad_norm": 1.3167000079730038, "learning_rate": 1.9571068366759143e-05, "loss": 0.6267420053482056, "step": 951 }, { "epoch": 0.27840327533265097, "grad_norm": 1.4667668035632615, "learning_rate": 1.9569666463262136e-05, "loss": 0.649080753326416, "step": 952 }, { "epoch": 0.27869571574791635, "grad_norm": 1.1940294879505342, "learning_rate": 1.9568262322913777e-05, "loss": 0.5700061321258545, "step": 953 }, { "epoch": 0.27898815616318173, "grad_norm": 1.21562106075719, "learning_rate": 1.9566855946042274e-05, "loss": 0.6121870875358582, "step": 954 }, { "epoch": 0.2792805965784471, "grad_norm": 1.3828404656512372, "learning_rate": 1.9565447332976362e-05, "loss": 0.8294541239738464, "step": 955 }, { "epoch": 0.27957303699371255, "grad_norm": 1.2953263908127255, "learning_rate": 1.9564036484045295e-05, "loss": 0.6979323625564575, "step": 956 }, { "epoch": 0.27986547740897794, "grad_norm": 1.4787353970640398, "learning_rate": 1.9562623399578853e-05, "loss": 0.6847009658813477, "step": 957 }, { "epoch": 0.2801579178242433, "grad_norm": 1.174633661295302, "learning_rate": 1.956120807990733e-05, "loss": 0.6821733713150024, "step": 958 }, { "epoch": 0.2804503582395087, "grad_norm": 1.2766608312969014, "learning_rate": 1.955979052536155e-05, "loss": 0.6943963766098022, "step": 959 }, { "epoch": 0.2807427986547741, "grad_norm": 1.6283703947702834, "learning_rate": 1.955837073627286e-05, "loss": 0.5841893553733826, "step": 960 }, { "epoch": 0.28103523907003947, "grad_norm": 1.4526296199919857, "learning_rate": 1.955694871297313e-05, "loss": 0.7196778059005737, "step": 961 }, { "epoch": 0.28132767948530485, "grad_norm": 1.3568922084457422, "learning_rate": 1.9555524455794743e-05, "loss": 0.697501540184021, "step": 962 }, { "epoch": 0.28162011990057023, "grad_norm": 1.3269336256780513, "learning_rate": 1.9554097965070612e-05, "loss": 0.7265810966491699, "step": 963 }, { "epoch": 0.28191256031583567, "grad_norm": 1.1794879937673313, "learning_rate": 1.955266924113417e-05, "loss": 0.5766021013259888, "step": 964 }, { "epoch": 0.28220500073110105, "grad_norm": 1.1486001787824904, "learning_rate": 1.955123828431938e-05, "loss": 0.6885402202606201, "step": 965 }, { "epoch": 0.28249744114636643, "grad_norm": 1.4093622546586522, "learning_rate": 1.954980509496071e-05, "loss": 0.719329297542572, "step": 966 }, { "epoch": 0.2827898815616318, "grad_norm": 1.1657877260705576, "learning_rate": 1.954836967339316e-05, "loss": 0.5621368885040283, "step": 967 }, { "epoch": 0.2830823219768972, "grad_norm": 1.4684107409650433, "learning_rate": 1.954693201995226e-05, "loss": 0.6323715448379517, "step": 968 }, { "epoch": 0.2833747623921626, "grad_norm": 1.1727530946898588, "learning_rate": 1.954549213497404e-05, "loss": 0.6265028119087219, "step": 969 }, { "epoch": 0.28366720280742797, "grad_norm": 1.2740242277637046, "learning_rate": 1.9544050018795076e-05, "loss": 0.6234713792800903, "step": 970 }, { "epoch": 0.2839596432226934, "grad_norm": 1.2342517719802, "learning_rate": 1.9542605671752447e-05, "loss": 0.6505804657936096, "step": 971 }, { "epoch": 0.2842520836379588, "grad_norm": 1.408353713096739, "learning_rate": 1.954115909418376e-05, "loss": 0.7756558656692505, "step": 972 }, { "epoch": 0.28454452405322417, "grad_norm": 1.4275947350210108, "learning_rate": 1.953971028642715e-05, "loss": 0.767257034778595, "step": 973 }, { "epoch": 0.28483696446848955, "grad_norm": 1.5164327383088176, "learning_rate": 1.9538259248821265e-05, "loss": 0.6702018976211548, "step": 974 }, { "epoch": 0.28512940488375493, "grad_norm": 1.5385088670888984, "learning_rate": 1.953680598170527e-05, "loss": 0.7072827816009521, "step": 975 }, { "epoch": 0.2854218452990203, "grad_norm": 1.4449259987675327, "learning_rate": 1.953535048541886e-05, "loss": 0.6343571543693542, "step": 976 }, { "epoch": 0.2857142857142857, "grad_norm": 1.2668558478543779, "learning_rate": 1.953389276030225e-05, "loss": 0.6361520290374756, "step": 977 }, { "epoch": 0.2860067261295511, "grad_norm": 1.144363699587152, "learning_rate": 1.9532432806696178e-05, "loss": 0.6757364273071289, "step": 978 }, { "epoch": 0.2862991665448165, "grad_norm": 1.2373799950730142, "learning_rate": 1.9530970624941896e-05, "loss": 0.6311759948730469, "step": 979 }, { "epoch": 0.2865916069600819, "grad_norm": 1.3327233434420644, "learning_rate": 1.9529506215381176e-05, "loss": 0.6207036972045898, "step": 980 }, { "epoch": 0.2868840473753473, "grad_norm": 1.182706201187961, "learning_rate": 1.952803957835632e-05, "loss": 0.5154495239257812, "step": 981 }, { "epoch": 0.28717648779061267, "grad_norm": 1.4885508278374788, "learning_rate": 1.9526570714210146e-05, "loss": 0.797666072845459, "step": 982 }, { "epoch": 0.28746892820587805, "grad_norm": 1.5013519512468485, "learning_rate": 1.9525099623285983e-05, "loss": 0.659400224685669, "step": 983 }, { "epoch": 0.28776136862114343, "grad_norm": 1.565667149921291, "learning_rate": 1.9523626305927706e-05, "loss": 0.7638698816299438, "step": 984 }, { "epoch": 0.2880538090364088, "grad_norm": 1.282540952352899, "learning_rate": 1.952215076247968e-05, "loss": 0.6656497120857239, "step": 985 }, { "epoch": 0.2883462494516742, "grad_norm": 1.6004320535828411, "learning_rate": 1.9520672993286807e-05, "loss": 0.7701614499092102, "step": 986 }, { "epoch": 0.28863868986693964, "grad_norm": 1.4907110687279852, "learning_rate": 1.951919299869451e-05, "loss": 0.6710221767425537, "step": 987 }, { "epoch": 0.288931130282205, "grad_norm": 1.3912460639172692, "learning_rate": 1.951771077904873e-05, "loss": 0.6307191848754883, "step": 988 }, { "epoch": 0.2892235706974704, "grad_norm": 1.5585350101159294, "learning_rate": 1.951622633469592e-05, "loss": 0.8226636648178101, "step": 989 }, { "epoch": 0.2895160111127358, "grad_norm": 1.3925257650330547, "learning_rate": 1.9514739665983065e-05, "loss": 0.6286089420318604, "step": 990 }, { "epoch": 0.28980845152800117, "grad_norm": 1.3766260212895336, "learning_rate": 1.9513250773257667e-05, "loss": 0.8167316317558289, "step": 991 }, { "epoch": 0.29010089194326655, "grad_norm": 1.3082034964893225, "learning_rate": 1.9511759656867738e-05, "loss": 0.6840806603431702, "step": 992 }, { "epoch": 0.29039333235853193, "grad_norm": 4.707433700267527, "learning_rate": 1.9510266317161823e-05, "loss": 0.5731699466705322, "step": 993 }, { "epoch": 0.2906857727737973, "grad_norm": 1.179743170686313, "learning_rate": 1.950877075448898e-05, "loss": 0.696578860282898, "step": 994 }, { "epoch": 0.29097821318906275, "grad_norm": 1.28092562469002, "learning_rate": 1.9507272969198787e-05, "loss": 0.7194398641586304, "step": 995 }, { "epoch": 0.29127065360432813, "grad_norm": 1.7406610068492592, "learning_rate": 1.9505772961641342e-05, "loss": 0.7041016817092896, "step": 996 }, { "epoch": 0.2915630940195935, "grad_norm": 1.2586308004321554, "learning_rate": 1.9504270732167267e-05, "loss": 0.7073841691017151, "step": 997 }, { "epoch": 0.2918555344348589, "grad_norm": 1.204085782896564, "learning_rate": 1.9502766281127693e-05, "loss": 0.5097789764404297, "step": 998 }, { "epoch": 0.2921479748501243, "grad_norm": 1.1340482101200409, "learning_rate": 1.9501259608874276e-05, "loss": 0.6522337198257446, "step": 999 }, { "epoch": 0.29244041526538966, "grad_norm": 1.2639457143948831, "learning_rate": 1.9499750715759197e-05, "loss": 0.8276036381721497, "step": 1000 }, { "epoch": 0.29273285568065505, "grad_norm": 1.3336888124261281, "learning_rate": 1.9498239602135145e-05, "loss": 0.7701225876808167, "step": 1001 }, { "epoch": 0.29302529609592043, "grad_norm": 1.4216994028606598, "learning_rate": 1.949672626835534e-05, "loss": 0.6112316846847534, "step": 1002 }, { "epoch": 0.29331773651118587, "grad_norm": 1.5055133598944146, "learning_rate": 1.9495210714773506e-05, "loss": 0.7196093201637268, "step": 1003 }, { "epoch": 0.29361017692645125, "grad_norm": 1.3102459642638802, "learning_rate": 1.9493692941743903e-05, "loss": 0.708210825920105, "step": 1004 }, { "epoch": 0.29390261734171663, "grad_norm": 1.1576562552023075, "learning_rate": 1.9492172949621298e-05, "loss": 0.6156430244445801, "step": 1005 }, { "epoch": 0.294195057756982, "grad_norm": 1.3177580817558727, "learning_rate": 1.9490650738760977e-05, "loss": 0.6125216484069824, "step": 1006 }, { "epoch": 0.2944874981722474, "grad_norm": 1.5792615772910776, "learning_rate": 1.9489126309518752e-05, "loss": 0.5691695213317871, "step": 1007 }, { "epoch": 0.2947799385875128, "grad_norm": 1.2458453862912673, "learning_rate": 1.9487599662250945e-05, "loss": 0.6733062267303467, "step": 1008 }, { "epoch": 0.29507237900277816, "grad_norm": 1.3579820847813902, "learning_rate": 1.94860707973144e-05, "loss": 0.6069025993347168, "step": 1009 }, { "epoch": 0.2953648194180436, "grad_norm": 1.3771790647505693, "learning_rate": 1.9484539715066488e-05, "loss": 0.6191028356552124, "step": 1010 }, { "epoch": 0.295657259833309, "grad_norm": 1.3927395620788336, "learning_rate": 1.9483006415865082e-05, "loss": 0.7423045635223389, "step": 1011 }, { "epoch": 0.29594970024857437, "grad_norm": 1.584259935283413, "learning_rate": 1.9481470900068585e-05, "loss": 0.854878306388855, "step": 1012 }, { "epoch": 0.29624214066383975, "grad_norm": 1.3274147652805814, "learning_rate": 1.9479933168035914e-05, "loss": 0.6950500011444092, "step": 1013 }, { "epoch": 0.29653458107910513, "grad_norm": 1.2664754529699496, "learning_rate": 1.9478393220126503e-05, "loss": 0.6944484710693359, "step": 1014 }, { "epoch": 0.2968270214943705, "grad_norm": 1.3385070796010239, "learning_rate": 1.9476851056700303e-05, "loss": 0.7120212316513062, "step": 1015 }, { "epoch": 0.2971194619096359, "grad_norm": 1.2818173555684258, "learning_rate": 1.9475306678117792e-05, "loss": 0.6271052956581116, "step": 1016 }, { "epoch": 0.2974119023249013, "grad_norm": 1.386949235285712, "learning_rate": 1.9473760084739958e-05, "loss": 0.6398453712463379, "step": 1017 }, { "epoch": 0.2977043427401667, "grad_norm": 1.440440679973054, "learning_rate": 1.94722112769283e-05, "loss": 0.5563585758209229, "step": 1018 }, { "epoch": 0.2979967831554321, "grad_norm": 1.2637928746894573, "learning_rate": 1.947066025504485e-05, "loss": 0.7895959615707397, "step": 1019 }, { "epoch": 0.2982892235706975, "grad_norm": 1.2684661754258477, "learning_rate": 1.9469107019452148e-05, "loss": 0.6304349303245544, "step": 1020 }, { "epoch": 0.29858166398596286, "grad_norm": 1.4493096125993807, "learning_rate": 1.9467551570513257e-05, "loss": 0.6915549039840698, "step": 1021 }, { "epoch": 0.29887410440122825, "grad_norm": 1.2593652754748748, "learning_rate": 1.9465993908591748e-05, "loss": 0.6257511377334595, "step": 1022 }, { "epoch": 0.29916654481649363, "grad_norm": 1.4075585450481771, "learning_rate": 1.9464434034051716e-05, "loss": 0.6409085988998413, "step": 1023 }, { "epoch": 0.299458985231759, "grad_norm": 1.358442522813864, "learning_rate": 1.9462871947257772e-05, "loss": 0.7281351089477539, "step": 1024 }, { "epoch": 0.2997514256470244, "grad_norm": 1.441690145181621, "learning_rate": 1.9461307648575047e-05, "loss": 0.8016781806945801, "step": 1025 }, { "epoch": 0.30004386606228983, "grad_norm": 1.2844064559637345, "learning_rate": 1.9459741138369186e-05, "loss": 0.5883209705352783, "step": 1026 }, { "epoch": 0.3003363064775552, "grad_norm": 1.674320224055934, "learning_rate": 1.9458172417006347e-05, "loss": 0.6414197683334351, "step": 1027 }, { "epoch": 0.3006287468928206, "grad_norm": 1.465437904752509, "learning_rate": 1.9456601484853218e-05, "loss": 0.7076515555381775, "step": 1028 }, { "epoch": 0.300921187308086, "grad_norm": 1.4091861442316225, "learning_rate": 1.9455028342276984e-05, "loss": 0.8102637529373169, "step": 1029 }, { "epoch": 0.30121362772335136, "grad_norm": 1.3935099692215975, "learning_rate": 1.9453452989645362e-05, "loss": 0.6954574584960938, "step": 1030 }, { "epoch": 0.30150606813861675, "grad_norm": 1.1912974865854908, "learning_rate": 1.9451875427326585e-05, "loss": 0.6647125482559204, "step": 1031 }, { "epoch": 0.3017985085538821, "grad_norm": 1.2637381593470247, "learning_rate": 1.9450295655689392e-05, "loss": 0.5501933097839355, "step": 1032 }, { "epoch": 0.3020909489691475, "grad_norm": 1.1642394496276798, "learning_rate": 1.944871367510305e-05, "loss": 0.6561415195465088, "step": 1033 }, { "epoch": 0.30238338938441295, "grad_norm": 1.2818557575199787, "learning_rate": 1.9447129485937335e-05, "loss": 0.6768229007720947, "step": 1034 }, { "epoch": 0.30267582979967833, "grad_norm": 1.229414584528048, "learning_rate": 1.9445543088562543e-05, "loss": 0.5693868398666382, "step": 1035 }, { "epoch": 0.3029682702149437, "grad_norm": 1.197937800783061, "learning_rate": 1.9443954483349485e-05, "loss": 0.6165708303451538, "step": 1036 }, { "epoch": 0.3032607106302091, "grad_norm": 1.0808504567320436, "learning_rate": 1.944236367066948e-05, "loss": 0.6116082668304443, "step": 1037 }, { "epoch": 0.3035531510454745, "grad_norm": 1.2481100676234638, "learning_rate": 1.9440770650894384e-05, "loss": 0.7027714848518372, "step": 1038 }, { "epoch": 0.30384559146073986, "grad_norm": 1.3613368127158991, "learning_rate": 1.943917542439655e-05, "loss": 0.7339189052581787, "step": 1039 }, { "epoch": 0.30413803187600524, "grad_norm": 1.322856585416547, "learning_rate": 1.943757799154885e-05, "loss": 0.7975895404815674, "step": 1040 }, { "epoch": 0.3044304722912706, "grad_norm": 1.2603507441667385, "learning_rate": 1.9435978352724673e-05, "loss": 0.6421841382980347, "step": 1041 }, { "epoch": 0.30472291270653606, "grad_norm": 1.3017046883641064, "learning_rate": 1.943437650829793e-05, "loss": 0.6731791496276855, "step": 1042 }, { "epoch": 0.30501535312180145, "grad_norm": 1.224211690521448, "learning_rate": 1.943277245864304e-05, "loss": 0.7008551359176636, "step": 1043 }, { "epoch": 0.30530779353706683, "grad_norm": 1.2549197569852149, "learning_rate": 1.943116620413494e-05, "loss": 0.6777141094207764, "step": 1044 }, { "epoch": 0.3056002339523322, "grad_norm": 1.258074600817151, "learning_rate": 1.9429557745149084e-05, "loss": 0.7649033069610596, "step": 1045 }, { "epoch": 0.3058926743675976, "grad_norm": 1.2626508350830759, "learning_rate": 1.9427947082061432e-05, "loss": 0.6460477709770203, "step": 1046 }, { "epoch": 0.306185114782863, "grad_norm": 1.3748035809258794, "learning_rate": 1.942633421524848e-05, "loss": 0.5939697623252869, "step": 1047 }, { "epoch": 0.30647755519812836, "grad_norm": 1.3696807292374817, "learning_rate": 1.9424719145087216e-05, "loss": 0.606407880783081, "step": 1048 }, { "epoch": 0.3067699956133938, "grad_norm": 1.2114201905625201, "learning_rate": 1.9423101871955153e-05, "loss": 0.5515298843383789, "step": 1049 }, { "epoch": 0.3070624360286592, "grad_norm": 1.4449996700249255, "learning_rate": 1.942148239623032e-05, "loss": 0.7397217154502869, "step": 1050 }, { "epoch": 0.30735487644392456, "grad_norm": 1.708533630902304, "learning_rate": 1.9419860718291265e-05, "loss": 0.6397782564163208, "step": 1051 }, { "epoch": 0.30764731685918995, "grad_norm": 1.1946031757535738, "learning_rate": 1.9418236838517036e-05, "loss": 0.589732825756073, "step": 1052 }, { "epoch": 0.30793975727445533, "grad_norm": 1.4196894685331136, "learning_rate": 1.941661075728721e-05, "loss": 0.7968351244926453, "step": 1053 }, { "epoch": 0.3082321976897207, "grad_norm": 1.35500416476017, "learning_rate": 1.9414982474981877e-05, "loss": 0.5740514397621155, "step": 1054 }, { "epoch": 0.3085246381049861, "grad_norm": 1.314001411398827, "learning_rate": 1.9413351991981632e-05, "loss": 0.656599760055542, "step": 1055 }, { "epoch": 0.3088170785202515, "grad_norm": 1.2592244001939052, "learning_rate": 1.9411719308667593e-05, "loss": 0.5638262033462524, "step": 1056 }, { "epoch": 0.3091095189355169, "grad_norm": 1.3510783569743914, "learning_rate": 1.9410084425421392e-05, "loss": 0.6391294002532959, "step": 1057 }, { "epoch": 0.3094019593507823, "grad_norm": 1.300451628146748, "learning_rate": 1.9408447342625167e-05, "loss": 0.7109906077384949, "step": 1058 }, { "epoch": 0.3096943997660477, "grad_norm": 1.35271058872007, "learning_rate": 1.9406808060661583e-05, "loss": 0.6922626495361328, "step": 1059 }, { "epoch": 0.30998684018131306, "grad_norm": 1.3729160813047252, "learning_rate": 1.9405166579913808e-05, "loss": 0.6708151698112488, "step": 1060 }, { "epoch": 0.31027928059657844, "grad_norm": 1.3049592711968918, "learning_rate": 1.940352290076553e-05, "loss": 0.6259905099868774, "step": 1061 }, { "epoch": 0.3105717210118438, "grad_norm": 1.3047971530530311, "learning_rate": 1.940187702360095e-05, "loss": 0.6590703725814819, "step": 1062 }, { "epoch": 0.3108641614271092, "grad_norm": 1.5136066296614852, "learning_rate": 1.9400228948804777e-05, "loss": 0.7371482849121094, "step": 1063 }, { "epoch": 0.3111566018423746, "grad_norm": 1.3637094061000257, "learning_rate": 1.9398578676762243e-05, "loss": 0.6954984664916992, "step": 1064 }, { "epoch": 0.31144904225764003, "grad_norm": 1.197618668709007, "learning_rate": 1.9396926207859085e-05, "loss": 0.604501485824585, "step": 1065 }, { "epoch": 0.3117414826729054, "grad_norm": 1.4637648544146704, "learning_rate": 1.939527154248156e-05, "loss": 0.7580305337905884, "step": 1066 }, { "epoch": 0.3120339230881708, "grad_norm": 1.2774221611024956, "learning_rate": 1.9393614681016443e-05, "loss": 0.5996969938278198, "step": 1067 }, { "epoch": 0.3123263635034362, "grad_norm": 1.2247945329694363, "learning_rate": 1.9391955623851e-05, "loss": 0.5939687490463257, "step": 1068 }, { "epoch": 0.31261880391870156, "grad_norm": 1.2833481425507127, "learning_rate": 1.939029437137304e-05, "loss": 0.6194947957992554, "step": 1069 }, { "epoch": 0.31291124433396694, "grad_norm": 1.406800587144287, "learning_rate": 1.9388630923970862e-05, "loss": 0.7419420480728149, "step": 1070 }, { "epoch": 0.3132036847492323, "grad_norm": 1.4290715744520364, "learning_rate": 1.938696528203329e-05, "loss": 0.6950613856315613, "step": 1071 }, { "epoch": 0.3134961251644977, "grad_norm": 1.542135386244918, "learning_rate": 1.9385297445949657e-05, "loss": 0.7376282215118408, "step": 1072 }, { "epoch": 0.31378856557976315, "grad_norm": 1.4197281288148755, "learning_rate": 1.938362741610981e-05, "loss": 0.800892174243927, "step": 1073 }, { "epoch": 0.31408100599502853, "grad_norm": 1.3238571566647774, "learning_rate": 1.938195519290411e-05, "loss": 0.5747013688087463, "step": 1074 }, { "epoch": 0.3143734464102939, "grad_norm": 1.3986418419585354, "learning_rate": 1.9380280776723422e-05, "loss": 0.7341697216033936, "step": 1075 }, { "epoch": 0.3146658868255593, "grad_norm": 1.2703636347468634, "learning_rate": 1.9378604167959138e-05, "loss": 0.6229791641235352, "step": 1076 }, { "epoch": 0.3149583272408247, "grad_norm": 1.2978515497126824, "learning_rate": 1.937692536700315e-05, "loss": 0.7266645431518555, "step": 1077 }, { "epoch": 0.31525076765609006, "grad_norm": 1.1885775086050685, "learning_rate": 1.937524437424787e-05, "loss": 0.6163127422332764, "step": 1078 }, { "epoch": 0.31554320807135544, "grad_norm": 1.3182925237610392, "learning_rate": 1.9373561190086225e-05, "loss": 0.6609925031661987, "step": 1079 }, { "epoch": 0.3158356484866208, "grad_norm": 1.305005533703013, "learning_rate": 1.937187581491164e-05, "loss": 0.7157741785049438, "step": 1080 }, { "epoch": 0.31612808890188626, "grad_norm": 1.286980397276467, "learning_rate": 1.937018824911807e-05, "loss": 0.6486212015151978, "step": 1081 }, { "epoch": 0.31642052931715164, "grad_norm": 1.3383942599839993, "learning_rate": 1.9368498493099963e-05, "loss": 0.6931928396224976, "step": 1082 }, { "epoch": 0.316712969732417, "grad_norm": 1.3241410530363713, "learning_rate": 1.9366806547252295e-05, "loss": 0.9291354417800903, "step": 1083 }, { "epoch": 0.3170054101476824, "grad_norm": 1.247174604159187, "learning_rate": 1.936511241197055e-05, "loss": 0.6276642084121704, "step": 1084 }, { "epoch": 0.3172978505629478, "grad_norm": 1.5622111951472988, "learning_rate": 1.936341608765072e-05, "loss": 0.7836581468582153, "step": 1085 }, { "epoch": 0.3175902909782132, "grad_norm": 1.3508415072117352, "learning_rate": 1.9361717574689308e-05, "loss": 0.6785084009170532, "step": 1086 }, { "epoch": 0.31788273139347856, "grad_norm": 1.2099617708073434, "learning_rate": 1.936001687348333e-05, "loss": 0.5715218782424927, "step": 1087 }, { "epoch": 0.318175171808744, "grad_norm": 1.4697249093694587, "learning_rate": 1.9358313984430324e-05, "loss": 0.8417775630950928, "step": 1088 }, { "epoch": 0.3184676122240094, "grad_norm": 1.2670961493626953, "learning_rate": 1.935660890792832e-05, "loss": 0.598076343536377, "step": 1089 }, { "epoch": 0.31876005263927476, "grad_norm": 1.1923598198798329, "learning_rate": 1.9354901644375876e-05, "loss": 0.5830154418945312, "step": 1090 }, { "epoch": 0.31905249305454014, "grad_norm": 1.307163759376097, "learning_rate": 1.935319219417205e-05, "loss": 0.5746437311172485, "step": 1091 }, { "epoch": 0.3193449334698055, "grad_norm": 1.1091668518622428, "learning_rate": 1.9351480557716414e-05, "loss": 0.5520191788673401, "step": 1092 }, { "epoch": 0.3196373738850709, "grad_norm": 1.218411442512637, "learning_rate": 1.9349766735409058e-05, "loss": 0.5847123861312866, "step": 1093 }, { "epoch": 0.3199298143003363, "grad_norm": 1.568693203893066, "learning_rate": 1.9348050727650577e-05, "loss": 0.7390924692153931, "step": 1094 }, { "epoch": 0.3202222547156017, "grad_norm": 1.4427842898109178, "learning_rate": 1.9346332534842074e-05, "loss": 0.5812145471572876, "step": 1095 }, { "epoch": 0.3205146951308671, "grad_norm": 1.615769461575852, "learning_rate": 1.9344612157385166e-05, "loss": 0.6958816647529602, "step": 1096 }, { "epoch": 0.3208071355461325, "grad_norm": 1.3481467288956208, "learning_rate": 1.9342889595681986e-05, "loss": 0.5618177652359009, "step": 1097 }, { "epoch": 0.3210995759613979, "grad_norm": 1.2846515235734224, "learning_rate": 1.9341164850135163e-05, "loss": 0.6099411845207214, "step": 1098 }, { "epoch": 0.32139201637666326, "grad_norm": 1.1242331249756639, "learning_rate": 1.9339437921147854e-05, "loss": 0.6772094964981079, "step": 1099 }, { "epoch": 0.32168445679192864, "grad_norm": 1.4006184046576602, "learning_rate": 1.9337708809123718e-05, "loss": 0.6916643381118774, "step": 1100 }, { "epoch": 0.321976897207194, "grad_norm": 1.199381767960838, "learning_rate": 1.933597751446692e-05, "loss": 0.5716762542724609, "step": 1101 }, { "epoch": 0.3222693376224594, "grad_norm": 1.511781401125701, "learning_rate": 1.9334244037582143e-05, "loss": 0.68224036693573, "step": 1102 }, { "epoch": 0.3225617780377248, "grad_norm": 1.3199204633429549, "learning_rate": 1.933250837887457e-05, "loss": 0.6888231635093689, "step": 1103 }, { "epoch": 0.3228542184529902, "grad_norm": 1.4809797608653643, "learning_rate": 1.933077053874991e-05, "loss": 0.6469036340713501, "step": 1104 }, { "epoch": 0.3231466588682556, "grad_norm": 1.5099365665086963, "learning_rate": 1.932903051761437e-05, "loss": 0.6202501058578491, "step": 1105 }, { "epoch": 0.323439099283521, "grad_norm": 1.454362918518285, "learning_rate": 1.932728831587467e-05, "loss": 0.6041314601898193, "step": 1106 }, { "epoch": 0.3237315396987864, "grad_norm": 1.3479422917529533, "learning_rate": 1.9325543933938034e-05, "loss": 0.7081667184829712, "step": 1107 }, { "epoch": 0.32402398011405176, "grad_norm": 1.477559211803618, "learning_rate": 1.9323797372212204e-05, "loss": 0.7743494510650635, "step": 1108 }, { "epoch": 0.32431642052931714, "grad_norm": 1.3188148010775738, "learning_rate": 1.9322048631105428e-05, "loss": 0.6122584342956543, "step": 1109 }, { "epoch": 0.3246088609445825, "grad_norm": 1.279178726850882, "learning_rate": 1.932029771102646e-05, "loss": 0.6106122732162476, "step": 1110 }, { "epoch": 0.3249013013598479, "grad_norm": 1.1897376224269591, "learning_rate": 1.9318544612384572e-05, "loss": 0.5082784295082092, "step": 1111 }, { "epoch": 0.32519374177511334, "grad_norm": 1.3081590787355515, "learning_rate": 1.9316789335589542e-05, "loss": 0.6845188140869141, "step": 1112 }, { "epoch": 0.3254861821903787, "grad_norm": 1.343292960468675, "learning_rate": 1.9315031881051653e-05, "loss": 0.5972481966018677, "step": 1113 }, { "epoch": 0.3257786226056441, "grad_norm": 1.372744387816622, "learning_rate": 1.931327224918169e-05, "loss": 0.6312427520751953, "step": 1114 }, { "epoch": 0.3260710630209095, "grad_norm": 1.6334469145871557, "learning_rate": 1.9311510440390973e-05, "loss": 0.7904551029205322, "step": 1115 }, { "epoch": 0.3263635034361749, "grad_norm": 1.4496533611968336, "learning_rate": 1.9309746455091302e-05, "loss": 0.6513646841049194, "step": 1116 }, { "epoch": 0.32665594385144026, "grad_norm": 1.2559107839078971, "learning_rate": 1.9307980293694997e-05, "loss": 0.5349715948104858, "step": 1117 }, { "epoch": 0.32694838426670564, "grad_norm": 1.122130050588245, "learning_rate": 1.93062119566149e-05, "loss": 0.5815087556838989, "step": 1118 }, { "epoch": 0.327240824681971, "grad_norm": 1.3638823451289013, "learning_rate": 1.9304441444264335e-05, "loss": 0.6380286812782288, "step": 1119 }, { "epoch": 0.32753326509723646, "grad_norm": 1.2646470578382853, "learning_rate": 1.9302668757057157e-05, "loss": 0.7222728729248047, "step": 1120 }, { "epoch": 0.32782570551250184, "grad_norm": 1.1611022114208025, "learning_rate": 1.9300893895407715e-05, "loss": 0.6262868642807007, "step": 1121 }, { "epoch": 0.3281181459277672, "grad_norm": 1.522127361747668, "learning_rate": 1.929911685973088e-05, "loss": 0.6387197971343994, "step": 1122 }, { "epoch": 0.3284105863430326, "grad_norm": 1.3879760662124887, "learning_rate": 1.9297337650442015e-05, "loss": 0.77378249168396, "step": 1123 }, { "epoch": 0.328703026758298, "grad_norm": 1.3651260322738243, "learning_rate": 1.9295556267957004e-05, "loss": 0.7589142322540283, "step": 1124 }, { "epoch": 0.32899546717356337, "grad_norm": 1.481283036614999, "learning_rate": 1.9293772712692233e-05, "loss": 0.7153090238571167, "step": 1125 }, { "epoch": 0.32928790758882875, "grad_norm": 1.2572705841763243, "learning_rate": 1.9291986985064595e-05, "loss": 0.5738104581832886, "step": 1126 }, { "epoch": 0.3295803480040942, "grad_norm": 1.2803221849130417, "learning_rate": 1.92901990854915e-05, "loss": 0.6530819535255432, "step": 1127 }, { "epoch": 0.3298727884193596, "grad_norm": 1.221270033991816, "learning_rate": 1.9288409014390854e-05, "loss": 0.59107506275177, "step": 1128 }, { "epoch": 0.33016522883462496, "grad_norm": 1.4658806562930384, "learning_rate": 1.9286616772181072e-05, "loss": 0.5798863172531128, "step": 1129 }, { "epoch": 0.33045766924989034, "grad_norm": 1.689951660615568, "learning_rate": 1.9284822359281085e-05, "loss": 0.6957223415374756, "step": 1130 }, { "epoch": 0.3307501096651557, "grad_norm": 1.3614959188818774, "learning_rate": 1.9283025776110326e-05, "loss": 0.6933379173278809, "step": 1131 }, { "epoch": 0.3310425500804211, "grad_norm": 1.2712490538707164, "learning_rate": 1.928122702308873e-05, "loss": 0.527482271194458, "step": 1132 }, { "epoch": 0.3313349904956865, "grad_norm": 1.2166131933862214, "learning_rate": 1.927942610063675e-05, "loss": 0.7244399785995483, "step": 1133 }, { "epoch": 0.33162743091095187, "grad_norm": 1.4636848406157517, "learning_rate": 1.9277623009175338e-05, "loss": 0.7881563901901245, "step": 1134 }, { "epoch": 0.3319198713262173, "grad_norm": 1.415089568819196, "learning_rate": 1.9275817749125956e-05, "loss": 0.7523232698440552, "step": 1135 }, { "epoch": 0.3322123117414827, "grad_norm": 1.5537538186729503, "learning_rate": 1.9274010320910575e-05, "loss": 0.7226657867431641, "step": 1136 }, { "epoch": 0.3325047521567481, "grad_norm": 1.4230293062648038, "learning_rate": 1.9272200724951666e-05, "loss": 0.6461686491966248, "step": 1137 }, { "epoch": 0.33279719257201346, "grad_norm": 1.1785466753796996, "learning_rate": 1.9270388961672214e-05, "loss": 0.6343599557876587, "step": 1138 }, { "epoch": 0.33308963298727884, "grad_norm": 1.2762072218920462, "learning_rate": 1.926857503149571e-05, "loss": 0.5510993599891663, "step": 1139 }, { "epoch": 0.3333820734025442, "grad_norm": 1.3887196408907312, "learning_rate": 1.9266758934846142e-05, "loss": 0.6022439002990723, "step": 1140 }, { "epoch": 0.3336745138178096, "grad_norm": 1.2716403438701216, "learning_rate": 1.9264940672148018e-05, "loss": 0.708207368850708, "step": 1141 }, { "epoch": 0.333966954233075, "grad_norm": 1.3137035916667523, "learning_rate": 1.9263120243826345e-05, "loss": 0.566935122013092, "step": 1142 }, { "epoch": 0.3342593946483404, "grad_norm": 1.634118861015607, "learning_rate": 1.9261297650306635e-05, "loss": 0.6848355531692505, "step": 1143 }, { "epoch": 0.3345518350636058, "grad_norm": 1.3058715943169161, "learning_rate": 1.9259472892014907e-05, "loss": 0.7335090637207031, "step": 1144 }, { "epoch": 0.3348442754788712, "grad_norm": 1.425387946547354, "learning_rate": 1.925764596937769e-05, "loss": 0.7323876023292542, "step": 1145 }, { "epoch": 0.33513671589413657, "grad_norm": 1.377083094919456, "learning_rate": 1.9255816882822017e-05, "loss": 0.5564731955528259, "step": 1146 }, { "epoch": 0.33542915630940195, "grad_norm": 1.5323554162589257, "learning_rate": 1.925398563277542e-05, "loss": 0.7699049711227417, "step": 1147 }, { "epoch": 0.33572159672466734, "grad_norm": 1.4580663324783634, "learning_rate": 1.925215221966595e-05, "loss": 0.688602089881897, "step": 1148 }, { "epoch": 0.3360140371399327, "grad_norm": 1.4706838569192882, "learning_rate": 1.9250316643922153e-05, "loss": 0.7103208899497986, "step": 1149 }, { "epoch": 0.3363064775551981, "grad_norm": 1.2723502109555263, "learning_rate": 1.9248478905973078e-05, "loss": 0.6313603520393372, "step": 1150 }, { "epoch": 0.33659891797046354, "grad_norm": 1.4985289931464978, "learning_rate": 1.9246639006248294e-05, "loss": 0.8420118093490601, "step": 1151 }, { "epoch": 0.3368913583857289, "grad_norm": 1.4358130705661303, "learning_rate": 1.9244796945177864e-05, "loss": 0.6566640734672546, "step": 1152 }, { "epoch": 0.3371837988009943, "grad_norm": 1.3408154011751006, "learning_rate": 1.9242952723192357e-05, "loss": 0.6455206274986267, "step": 1153 }, { "epoch": 0.3374762392162597, "grad_norm": 1.3469873034007918, "learning_rate": 1.924110634072285e-05, "loss": 0.7348071336746216, "step": 1154 }, { "epoch": 0.33776867963152507, "grad_norm": 1.7471975705727423, "learning_rate": 1.9239257798200918e-05, "loss": 0.7187973260879517, "step": 1155 }, { "epoch": 0.33806112004679045, "grad_norm": 1.2712100505239146, "learning_rate": 1.9237407096058655e-05, "loss": 0.683181643486023, "step": 1156 }, { "epoch": 0.33835356046205584, "grad_norm": 1.2445760635583791, "learning_rate": 1.9235554234728646e-05, "loss": 0.7296931743621826, "step": 1157 }, { "epoch": 0.3386460008773212, "grad_norm": 1.1890040509691011, "learning_rate": 1.923369921464399e-05, "loss": 0.6656480431556702, "step": 1158 }, { "epoch": 0.33893844129258666, "grad_norm": 1.3248976863888173, "learning_rate": 1.923184203623828e-05, "loss": 0.6284874677658081, "step": 1159 }, { "epoch": 0.33923088170785204, "grad_norm": 1.29677623825286, "learning_rate": 1.922998269994563e-05, "loss": 0.7065030336380005, "step": 1160 }, { "epoch": 0.3395233221231174, "grad_norm": 1.3728212504218815, "learning_rate": 1.9228121206200637e-05, "loss": 0.7077580690383911, "step": 1161 }, { "epoch": 0.3398157625383828, "grad_norm": 1.4855746260471363, "learning_rate": 1.9226257555438428e-05, "loss": 0.6012637615203857, "step": 1162 }, { "epoch": 0.3401082029536482, "grad_norm": 1.304745585816947, "learning_rate": 1.9224391748094607e-05, "loss": 0.8166115283966064, "step": 1163 }, { "epoch": 0.34040064336891357, "grad_norm": 1.502407347484804, "learning_rate": 1.92225237846053e-05, "loss": 0.6066576242446899, "step": 1164 }, { "epoch": 0.34069308378417895, "grad_norm": 1.2739635711375565, "learning_rate": 1.922065366540713e-05, "loss": 0.7226361632347107, "step": 1165 }, { "epoch": 0.3409855241994444, "grad_norm": 1.6597939275709621, "learning_rate": 1.9218781390937233e-05, "loss": 0.7786005139350891, "step": 1166 }, { "epoch": 0.3412779646147098, "grad_norm": 1.4812475412632635, "learning_rate": 1.9216906961633234e-05, "loss": 0.6534268856048584, "step": 1167 }, { "epoch": 0.34157040502997515, "grad_norm": 1.2208380860350694, "learning_rate": 1.9215030377933274e-05, "loss": 0.6048434376716614, "step": 1168 }, { "epoch": 0.34186284544524054, "grad_norm": 1.356773734579803, "learning_rate": 1.921315164027599e-05, "loss": 0.8321201205253601, "step": 1169 }, { "epoch": 0.3421552858605059, "grad_norm": 1.5327102242092, "learning_rate": 1.9211270749100527e-05, "loss": 0.7142379283905029, "step": 1170 }, { "epoch": 0.3424477262757713, "grad_norm": 1.205344060714777, "learning_rate": 1.9209387704846535e-05, "loss": 0.6262812614440918, "step": 1171 }, { "epoch": 0.3427401666910367, "grad_norm": 1.4345003452190939, "learning_rate": 1.920750250795416e-05, "loss": 0.7242386341094971, "step": 1172 }, { "epoch": 0.34303260710630207, "grad_norm": 1.3077522067723235, "learning_rate": 1.9205615158864054e-05, "loss": 0.6064128875732422, "step": 1173 }, { "epoch": 0.3433250475215675, "grad_norm": 1.3868970600691566, "learning_rate": 1.9203725658017374e-05, "loss": 0.6720623970031738, "step": 1174 }, { "epoch": 0.3436174879368329, "grad_norm": 1.3281016407079367, "learning_rate": 1.9201834005855785e-05, "loss": 0.745712161064148, "step": 1175 }, { "epoch": 0.34390992835209827, "grad_norm": 1.3336156391355163, "learning_rate": 1.9199940202821445e-05, "loss": 0.6387969255447388, "step": 1176 }, { "epoch": 0.34420236876736365, "grad_norm": 1.3104807608053473, "learning_rate": 1.9198044249357018e-05, "loss": 0.6634984612464905, "step": 1177 }, { "epoch": 0.34449480918262904, "grad_norm": 1.0968916991502757, "learning_rate": 1.919614614590567e-05, "loss": 0.4732145667076111, "step": 1178 }, { "epoch": 0.3447872495978944, "grad_norm": 1.4315145497505135, "learning_rate": 1.9194245892911077e-05, "loss": 0.6621897220611572, "step": 1179 }, { "epoch": 0.3450796900131598, "grad_norm": 1.3492357768294603, "learning_rate": 1.9192343490817412e-05, "loss": 0.5691112279891968, "step": 1180 }, { "epoch": 0.3453721304284252, "grad_norm": 1.5009979829344267, "learning_rate": 1.919043894006934e-05, "loss": 0.6326683759689331, "step": 1181 }, { "epoch": 0.3456645708436906, "grad_norm": 1.3965991672121214, "learning_rate": 1.9188532241112047e-05, "loss": 0.6068567037582397, "step": 1182 }, { "epoch": 0.345957011258956, "grad_norm": 1.5425955582670972, "learning_rate": 1.918662339439121e-05, "loss": 0.707065761089325, "step": 1183 }, { "epoch": 0.3462494516742214, "grad_norm": 1.4506511105853803, "learning_rate": 1.9184712400353008e-05, "loss": 0.7821887135505676, "step": 1184 }, { "epoch": 0.34654189208948677, "grad_norm": 1.3192596730278041, "learning_rate": 1.918279925944413e-05, "loss": 0.6759425401687622, "step": 1185 }, { "epoch": 0.34683433250475215, "grad_norm": 1.2819153702423505, "learning_rate": 1.9180883972111756e-05, "loss": 0.5660048127174377, "step": 1186 }, { "epoch": 0.34712677292001753, "grad_norm": 1.392117573401842, "learning_rate": 1.9178966538803574e-05, "loss": 0.708798885345459, "step": 1187 }, { "epoch": 0.3474192133352829, "grad_norm": 1.0828895012382165, "learning_rate": 1.9177046959967774e-05, "loss": 0.603208065032959, "step": 1188 }, { "epoch": 0.3477116537505483, "grad_norm": 1.2856052178527815, "learning_rate": 1.9175125236053043e-05, "loss": 0.8259323835372925, "step": 1189 }, { "epoch": 0.34800409416581374, "grad_norm": 1.2349901090123199, "learning_rate": 1.9173201367508572e-05, "loss": 0.573014497756958, "step": 1190 }, { "epoch": 0.3482965345810791, "grad_norm": 1.49130421629148, "learning_rate": 1.9171275354784062e-05, "loss": 0.8202974200248718, "step": 1191 }, { "epoch": 0.3485889749963445, "grad_norm": 1.313328733803151, "learning_rate": 1.9169347198329693e-05, "loss": 0.5352192521095276, "step": 1192 }, { "epoch": 0.3488814154116099, "grad_norm": 1.4707600848748155, "learning_rate": 1.916741689859617e-05, "loss": 0.7303881645202637, "step": 1193 }, { "epoch": 0.34917385582687527, "grad_norm": 1.136402601726834, "learning_rate": 1.9165484456034683e-05, "loss": 0.670224666595459, "step": 1194 }, { "epoch": 0.34946629624214065, "grad_norm": 1.213410956274994, "learning_rate": 1.9163549871096934e-05, "loss": 0.7311158776283264, "step": 1195 }, { "epoch": 0.34975873665740603, "grad_norm": 1.3163563045896416, "learning_rate": 1.9161613144235117e-05, "loss": 0.6346032619476318, "step": 1196 }, { "epoch": 0.3500511770726714, "grad_norm": 1.3538502473866518, "learning_rate": 1.9159674275901932e-05, "loss": 0.66914302110672, "step": 1197 }, { "epoch": 0.35034361748793685, "grad_norm": 1.408804907617288, "learning_rate": 1.9157733266550577e-05, "loss": 0.6775194406509399, "step": 1198 }, { "epoch": 0.35063605790320224, "grad_norm": 1.3219370751555166, "learning_rate": 1.915579011663475e-05, "loss": 0.6887085437774658, "step": 1199 }, { "epoch": 0.3509284983184676, "grad_norm": 1.4120877262018603, "learning_rate": 1.9153844826608652e-05, "loss": 0.7474929690361023, "step": 1200 }, { "epoch": 0.351220938733733, "grad_norm": 1.3551417524104399, "learning_rate": 1.915189739692698e-05, "loss": 0.5665907859802246, "step": 1201 }, { "epoch": 0.3515133791489984, "grad_norm": 1.4582334765772325, "learning_rate": 1.9149947828044938e-05, "loss": 0.6044580340385437, "step": 1202 }, { "epoch": 0.35180581956426377, "grad_norm": 1.1481279810019642, "learning_rate": 1.914799612041822e-05, "loss": 0.6590601205825806, "step": 1203 }, { "epoch": 0.35209825997952915, "grad_norm": 1.1796025597233206, "learning_rate": 1.9146042274503033e-05, "loss": 0.5204451084136963, "step": 1204 }, { "epoch": 0.3523907003947946, "grad_norm": 1.3267878452954167, "learning_rate": 1.9144086290756077e-05, "loss": 0.6036473512649536, "step": 1205 }, { "epoch": 0.35268314081005997, "grad_norm": 1.438922587418907, "learning_rate": 1.914212816963454e-05, "loss": 0.5652757883071899, "step": 1206 }, { "epoch": 0.35297558122532535, "grad_norm": 1.3041918712359999, "learning_rate": 1.9140167911596133e-05, "loss": 0.707310676574707, "step": 1207 }, { "epoch": 0.35326802164059073, "grad_norm": 1.4881761799215045, "learning_rate": 1.9138205517099048e-05, "loss": 0.8539729714393616, "step": 1208 }, { "epoch": 0.3535604620558561, "grad_norm": 1.6882139973772572, "learning_rate": 1.9136240986601986e-05, "loss": 0.6502546072006226, "step": 1209 }, { "epoch": 0.3538529024711215, "grad_norm": 1.3852727639404194, "learning_rate": 1.9134274320564145e-05, "loss": 0.7279889583587646, "step": 1210 }, { "epoch": 0.3541453428863869, "grad_norm": 1.380784482123245, "learning_rate": 1.9132305519445215e-05, "loss": 0.6916895508766174, "step": 1211 }, { "epoch": 0.35443778330165226, "grad_norm": 1.3426787464995344, "learning_rate": 1.9130334583705395e-05, "loss": 0.6941961050033569, "step": 1212 }, { "epoch": 0.3547302237169177, "grad_norm": 1.211958694677935, "learning_rate": 1.912836151380538e-05, "loss": 0.6686822175979614, "step": 1213 }, { "epoch": 0.3550226641321831, "grad_norm": 1.4335891633323221, "learning_rate": 1.912638631020636e-05, "loss": 0.818913459777832, "step": 1214 }, { "epoch": 0.35531510454744847, "grad_norm": 1.3902796641746433, "learning_rate": 1.9124408973370034e-05, "loss": 0.6461240649223328, "step": 1215 }, { "epoch": 0.35560754496271385, "grad_norm": 1.3521177557458626, "learning_rate": 1.9122429503758586e-05, "loss": 0.6982225179672241, "step": 1216 }, { "epoch": 0.35589998537797923, "grad_norm": 1.2726818955529642, "learning_rate": 1.9120447901834708e-05, "loss": 0.6319124698638916, "step": 1217 }, { "epoch": 0.3561924257932446, "grad_norm": 1.4379853975185637, "learning_rate": 1.9118464168061584e-05, "loss": 0.7092441320419312, "step": 1218 }, { "epoch": 0.35648486620851, "grad_norm": 1.4989677994022448, "learning_rate": 1.9116478302902904e-05, "loss": 0.7696874141693115, "step": 1219 }, { "epoch": 0.3567773066237754, "grad_norm": 1.348418923049424, "learning_rate": 1.9114490306822846e-05, "loss": 0.6944275498390198, "step": 1220 }, { "epoch": 0.3570697470390408, "grad_norm": 1.4736146352332777, "learning_rate": 1.9112500180286098e-05, "loss": 0.6179015636444092, "step": 1221 }, { "epoch": 0.3573621874543062, "grad_norm": 1.4832162039625727, "learning_rate": 1.911050792375784e-05, "loss": 0.6964149475097656, "step": 1222 }, { "epoch": 0.3576546278695716, "grad_norm": 1.314680516503926, "learning_rate": 1.9108513537703746e-05, "loss": 0.6923096776008606, "step": 1223 }, { "epoch": 0.35794706828483697, "grad_norm": 1.3108509564109556, "learning_rate": 1.9106517022589993e-05, "loss": 0.5205660462379456, "step": 1224 }, { "epoch": 0.35823950870010235, "grad_norm": 1.604738205722927, "learning_rate": 1.910451837888325e-05, "loss": 0.7488006353378296, "step": 1225 }, { "epoch": 0.35853194911536773, "grad_norm": 1.1847506052614252, "learning_rate": 1.91025176070507e-05, "loss": 0.5414390563964844, "step": 1226 }, { "epoch": 0.3588243895306331, "grad_norm": 1.2745914596652235, "learning_rate": 1.910051470756e-05, "loss": 0.6891577839851379, "step": 1227 }, { "epoch": 0.3591168299458985, "grad_norm": 1.3018823092824294, "learning_rate": 1.9098509680879318e-05, "loss": 0.6496376991271973, "step": 1228 }, { "epoch": 0.35940927036116394, "grad_norm": 1.386313672695145, "learning_rate": 1.909650252747732e-05, "loss": 0.758609414100647, "step": 1229 }, { "epoch": 0.3597017107764293, "grad_norm": 1.544442120518355, "learning_rate": 1.9094493247823164e-05, "loss": 0.7509145736694336, "step": 1230 }, { "epoch": 0.3599941511916947, "grad_norm": 1.2125512669659357, "learning_rate": 1.9092481842386506e-05, "loss": 0.7432405352592468, "step": 1231 }, { "epoch": 0.3602865916069601, "grad_norm": 1.4492900887661606, "learning_rate": 1.90904683116375e-05, "loss": 0.7208698391914368, "step": 1232 }, { "epoch": 0.36057903202222547, "grad_norm": 1.4245050002638069, "learning_rate": 1.9088452656046798e-05, "loss": 0.638593852519989, "step": 1233 }, { "epoch": 0.36087147243749085, "grad_norm": 1.4428449097608804, "learning_rate": 1.9086434876085548e-05, "loss": 0.6663007736206055, "step": 1234 }, { "epoch": 0.36116391285275623, "grad_norm": 1.4112526680406456, "learning_rate": 1.908441497222539e-05, "loss": 0.7132781744003296, "step": 1235 }, { "epoch": 0.3614563532680216, "grad_norm": 1.3024532647304885, "learning_rate": 1.9082392944938467e-05, "loss": 0.6545308828353882, "step": 1236 }, { "epoch": 0.36174879368328705, "grad_norm": 1.1385624733680002, "learning_rate": 1.908036879469741e-05, "loss": 0.7525626420974731, "step": 1237 }, { "epoch": 0.36204123409855243, "grad_norm": 1.4286424106237192, "learning_rate": 1.9078342521975365e-05, "loss": 0.7336804866790771, "step": 1238 }, { "epoch": 0.3623336745138178, "grad_norm": 1.3025101490885231, "learning_rate": 1.907631412724595e-05, "loss": 0.5822359323501587, "step": 1239 }, { "epoch": 0.3626261149290832, "grad_norm": 1.1928464678887247, "learning_rate": 1.907428361098329e-05, "loss": 0.6110040545463562, "step": 1240 }, { "epoch": 0.3629185553443486, "grad_norm": 1.3329812952112776, "learning_rate": 1.9072250973662008e-05, "loss": 0.5363205671310425, "step": 1241 }, { "epoch": 0.36321099575961396, "grad_norm": 1.298737392722519, "learning_rate": 1.9070216215757225e-05, "loss": 0.6804911494255066, "step": 1242 }, { "epoch": 0.36350343617487935, "grad_norm": 1.306574555012534, "learning_rate": 1.906817933774455e-05, "loss": 0.5670056343078613, "step": 1243 }, { "epoch": 0.3637958765901448, "grad_norm": 1.6342501045897717, "learning_rate": 1.9066140340100086e-05, "loss": 0.6839423775672913, "step": 1244 }, { "epoch": 0.36408831700541017, "grad_norm": 1.3102468000864722, "learning_rate": 1.906409922330044e-05, "loss": 0.6512447595596313, "step": 1245 }, { "epoch": 0.36438075742067555, "grad_norm": 1.3767881480650324, "learning_rate": 1.9062055987822713e-05, "loss": 0.6602088212966919, "step": 1246 }, { "epoch": 0.36467319783594093, "grad_norm": 1.3684046563228518, "learning_rate": 1.9060010634144502e-05, "loss": 0.6859074831008911, "step": 1247 }, { "epoch": 0.3649656382512063, "grad_norm": 1.4029132597681886, "learning_rate": 1.9057963162743888e-05, "loss": 0.6871531009674072, "step": 1248 }, { "epoch": 0.3652580786664717, "grad_norm": 1.2778457575589584, "learning_rate": 1.9055913574099454e-05, "loss": 0.7396048307418823, "step": 1249 }, { "epoch": 0.3655505190817371, "grad_norm": 2.313748947770577, "learning_rate": 1.9053861868690283e-05, "loss": 0.7013602256774902, "step": 1250 }, { "epoch": 0.36584295949700246, "grad_norm": 1.2854553849472183, "learning_rate": 1.905180804699595e-05, "loss": 0.6355527639389038, "step": 1251 }, { "epoch": 0.3661353999122679, "grad_norm": 1.1923686434429392, "learning_rate": 1.9049752109496526e-05, "loss": 0.6869304180145264, "step": 1252 }, { "epoch": 0.3664278403275333, "grad_norm": 1.2404032301108463, "learning_rate": 1.9047694056672566e-05, "loss": 0.5267671346664429, "step": 1253 }, { "epoch": 0.36672028074279867, "grad_norm": 1.2479293372256655, "learning_rate": 1.9045633889005134e-05, "loss": 0.6586635112762451, "step": 1254 }, { "epoch": 0.36701272115806405, "grad_norm": 1.2783901733768512, "learning_rate": 1.9043571606975776e-05, "loss": 0.6743361949920654, "step": 1255 }, { "epoch": 0.36730516157332943, "grad_norm": 1.267912865737822, "learning_rate": 1.9041507211066543e-05, "loss": 0.5779668688774109, "step": 1256 }, { "epoch": 0.3675976019885948, "grad_norm": 1.240910914837657, "learning_rate": 1.9039440701759972e-05, "loss": 0.693313479423523, "step": 1257 }, { "epoch": 0.3678900424038602, "grad_norm": 1.2581810913293596, "learning_rate": 1.9037372079539096e-05, "loss": 0.6314960718154907, "step": 1258 }, { "epoch": 0.3681824828191256, "grad_norm": 1.4026915606466803, "learning_rate": 1.9035301344887445e-05, "loss": 0.6483266949653625, "step": 1259 }, { "epoch": 0.368474923234391, "grad_norm": 1.1963714897771014, "learning_rate": 1.903322849828904e-05, "loss": 0.5896739959716797, "step": 1260 }, { "epoch": 0.3687673636496564, "grad_norm": 1.3246139419549132, "learning_rate": 1.9031153540228398e-05, "loss": 0.6760983467102051, "step": 1261 }, { "epoch": 0.3690598040649218, "grad_norm": 1.409129098147532, "learning_rate": 1.9029076471190525e-05, "loss": 0.7453440427780151, "step": 1262 }, { "epoch": 0.36935224448018716, "grad_norm": 1.4768395375517958, "learning_rate": 1.9026997291660926e-05, "loss": 0.7382408380508423, "step": 1263 }, { "epoch": 0.36964468489545255, "grad_norm": 1.3416426687197567, "learning_rate": 1.9024916002125594e-05, "loss": 0.6420471668243408, "step": 1264 }, { "epoch": 0.36993712531071793, "grad_norm": 1.406350116015231, "learning_rate": 1.9022832603071017e-05, "loss": 0.6436389684677124, "step": 1265 }, { "epoch": 0.3702295657259833, "grad_norm": 1.3047843220477244, "learning_rate": 1.9020747094984182e-05, "loss": 0.689171314239502, "step": 1266 }, { "epoch": 0.3705220061412487, "grad_norm": 1.2640328794263636, "learning_rate": 1.9018659478352556e-05, "loss": 0.6704196333885193, "step": 1267 }, { "epoch": 0.37081444655651413, "grad_norm": 2.0690106215423536, "learning_rate": 1.9016569753664118e-05, "loss": 0.6598329544067383, "step": 1268 }, { "epoch": 0.3711068869717795, "grad_norm": 1.8262603065561684, "learning_rate": 1.901447792140732e-05, "loss": 0.7353986501693726, "step": 1269 }, { "epoch": 0.3713993273870449, "grad_norm": 1.4285098808767827, "learning_rate": 1.9012383982071112e-05, "loss": 0.666167140007019, "step": 1270 }, { "epoch": 0.3716917678023103, "grad_norm": 1.2598465904930443, "learning_rate": 1.9010287936144948e-05, "loss": 0.6097015738487244, "step": 1271 }, { "epoch": 0.37198420821757566, "grad_norm": 1.107025542737965, "learning_rate": 1.9008189784118764e-05, "loss": 0.6352437138557434, "step": 1272 }, { "epoch": 0.37227664863284105, "grad_norm": 1.5662430122293758, "learning_rate": 1.9006089526482982e-05, "loss": 0.6686104536056519, "step": 1273 }, { "epoch": 0.3725690890481064, "grad_norm": 1.1719719158143125, "learning_rate": 1.9003987163728535e-05, "loss": 0.6504377126693726, "step": 1274 }, { "epoch": 0.3728615294633718, "grad_norm": 1.2550627286183815, "learning_rate": 1.9001882696346835e-05, "loss": 0.5834585428237915, "step": 1275 }, { "epoch": 0.37315396987863725, "grad_norm": 1.5420452194055032, "learning_rate": 1.8999776124829788e-05, "loss": 0.665432870388031, "step": 1276 }, { "epoch": 0.37344641029390263, "grad_norm": 1.4022956370096276, "learning_rate": 1.899766744966979e-05, "loss": 0.659697949886322, "step": 1277 }, { "epoch": 0.373738850709168, "grad_norm": 1.194404836566078, "learning_rate": 1.899555667135973e-05, "loss": 0.5703476071357727, "step": 1278 }, { "epoch": 0.3740312911244334, "grad_norm": 1.5482464090140011, "learning_rate": 1.8993443790392994e-05, "loss": 0.809308648109436, "step": 1279 }, { "epoch": 0.3743237315396988, "grad_norm": 1.293354946450912, "learning_rate": 1.8991328807263455e-05, "loss": 0.7120508551597595, "step": 1280 }, { "epoch": 0.37461617195496416, "grad_norm": 1.2261555314771986, "learning_rate": 1.898921172246547e-05, "loss": 0.625985860824585, "step": 1281 }, { "epoch": 0.37490861237022954, "grad_norm": 1.3045001966325798, "learning_rate": 1.898709253649391e-05, "loss": 0.637261152267456, "step": 1282 }, { "epoch": 0.375201052785495, "grad_norm": 1.3550273094265433, "learning_rate": 1.89849712498441e-05, "loss": 0.7420133352279663, "step": 1283 }, { "epoch": 0.37549349320076036, "grad_norm": 1.2854448504545577, "learning_rate": 1.8982847863011898e-05, "loss": 0.6230417490005493, "step": 1284 }, { "epoch": 0.37578593361602575, "grad_norm": 1.2127007776565961, "learning_rate": 1.8980722376493622e-05, "loss": 0.6896604299545288, "step": 1285 }, { "epoch": 0.37607837403129113, "grad_norm": 1.3900367736992565, "learning_rate": 1.8978594790786092e-05, "loss": 0.5767710208892822, "step": 1286 }, { "epoch": 0.3763708144465565, "grad_norm": 1.3829044432724817, "learning_rate": 1.8976465106386625e-05, "loss": 0.6945392489433289, "step": 1287 }, { "epoch": 0.3766632548618219, "grad_norm": 1.215943914903153, "learning_rate": 1.8974333323793014e-05, "loss": 0.7208314538002014, "step": 1288 }, { "epoch": 0.3769556952770873, "grad_norm": 1.5769052361743978, "learning_rate": 1.8972199443503556e-05, "loss": 0.7201139330863953, "step": 1289 }, { "epoch": 0.37724813569235266, "grad_norm": 1.3366567930451483, "learning_rate": 1.8970063466017028e-05, "loss": 0.6791107654571533, "step": 1290 }, { "epoch": 0.3775405761076181, "grad_norm": 1.2566261030582595, "learning_rate": 1.89679253918327e-05, "loss": 0.6535364389419556, "step": 1291 }, { "epoch": 0.3778330165228835, "grad_norm": 1.3498011568256927, "learning_rate": 1.8965785221450343e-05, "loss": 0.5910370349884033, "step": 1292 }, { "epoch": 0.37812545693814886, "grad_norm": 1.273886266732073, "learning_rate": 1.8963642955370203e-05, "loss": 0.7025415897369385, "step": 1293 }, { "epoch": 0.37841789735341425, "grad_norm": 1.3743129752593892, "learning_rate": 1.8961498594093018e-05, "loss": 0.8007702827453613, "step": 1294 }, { "epoch": 0.37871033776867963, "grad_norm": 1.3170193178053329, "learning_rate": 1.895935213812003e-05, "loss": 0.6947172284126282, "step": 1295 }, { "epoch": 0.379002778183945, "grad_norm": 1.1178211676030798, "learning_rate": 1.895720358795295e-05, "loss": 0.7024818658828735, "step": 1296 }, { "epoch": 0.3792952185992104, "grad_norm": 1.3447819598276562, "learning_rate": 1.895505294409399e-05, "loss": 0.8202607035636902, "step": 1297 }, { "epoch": 0.3795876590144758, "grad_norm": 1.3114070841017331, "learning_rate": 1.8952900207045853e-05, "loss": 0.8001795411109924, "step": 1298 }, { "epoch": 0.3798800994297412, "grad_norm": 1.4499936638579116, "learning_rate": 1.895074537731173e-05, "loss": 0.8068668842315674, "step": 1299 }, { "epoch": 0.3801725398450066, "grad_norm": 1.5847094151692727, "learning_rate": 1.8948588455395294e-05, "loss": 0.7685220241546631, "step": 1300 }, { "epoch": 0.380464980260272, "grad_norm": 1.5870604224504243, "learning_rate": 1.8946429441800715e-05, "loss": 0.695665717124939, "step": 1301 }, { "epoch": 0.38075742067553736, "grad_norm": 1.2451750201018865, "learning_rate": 1.894426833703265e-05, "loss": 0.6073132753372192, "step": 1302 }, { "epoch": 0.38104986109080274, "grad_norm": 1.3373381283666117, "learning_rate": 1.894210514159624e-05, "loss": 0.6334577798843384, "step": 1303 }, { "epoch": 0.3813423015060681, "grad_norm": 1.519675902061051, "learning_rate": 1.8939939855997125e-05, "loss": 0.6448806524276733, "step": 1304 }, { "epoch": 0.3816347419213335, "grad_norm": 1.438744535892561, "learning_rate": 1.8937772480741427e-05, "loss": 0.7587993144989014, "step": 1305 }, { "epoch": 0.3819271823365989, "grad_norm": 1.3270277700231368, "learning_rate": 1.8935603016335752e-05, "loss": 0.6924787759780884, "step": 1306 }, { "epoch": 0.38221962275186433, "grad_norm": 1.4268553989545638, "learning_rate": 1.8933431463287197e-05, "loss": 0.678055465221405, "step": 1307 }, { "epoch": 0.3825120631671297, "grad_norm": 1.5391207675187488, "learning_rate": 1.8931257822103357e-05, "loss": 0.7519007325172424, "step": 1308 }, { "epoch": 0.3828045035823951, "grad_norm": 1.3654471111870499, "learning_rate": 1.8929082093292306e-05, "loss": 0.6905468702316284, "step": 1309 }, { "epoch": 0.3830969439976605, "grad_norm": 1.2938870443591295, "learning_rate": 1.8926904277362603e-05, "loss": 0.6718122363090515, "step": 1310 }, { "epoch": 0.38338938441292586, "grad_norm": 1.2471147738993698, "learning_rate": 1.89247243748233e-05, "loss": 0.6903961896896362, "step": 1311 }, { "epoch": 0.38368182482819124, "grad_norm": 1.2542516264352948, "learning_rate": 1.8922542386183942e-05, "loss": 0.6947582960128784, "step": 1312 }, { "epoch": 0.3839742652434566, "grad_norm": 1.188134072228004, "learning_rate": 1.8920358311954548e-05, "loss": 0.5850759148597717, "step": 1313 }, { "epoch": 0.38426670565872206, "grad_norm": 1.3420186334522382, "learning_rate": 1.891817215264564e-05, "loss": 0.6512178778648376, "step": 1314 }, { "epoch": 0.38455914607398745, "grad_norm": 1.217758250797112, "learning_rate": 1.891598390876821e-05, "loss": 0.5910850167274475, "step": 1315 }, { "epoch": 0.38485158648925283, "grad_norm": 1.5593571397265127, "learning_rate": 1.891379358083375e-05, "loss": 0.7113536596298218, "step": 1316 }, { "epoch": 0.3851440269045182, "grad_norm": 1.293224731928583, "learning_rate": 1.891160116935424e-05, "loss": 0.705318808555603, "step": 1317 }, { "epoch": 0.3854364673197836, "grad_norm": 1.3559272013315313, "learning_rate": 1.890940667484214e-05, "loss": 0.7524716258049011, "step": 1318 }, { "epoch": 0.385728907735049, "grad_norm": 1.4516012043532711, "learning_rate": 1.89072100978104e-05, "loss": 0.6130248308181763, "step": 1319 }, { "epoch": 0.38602134815031436, "grad_norm": 1.4354959704098622, "learning_rate": 1.8905011438772455e-05, "loss": 0.6535071134567261, "step": 1320 }, { "epoch": 0.38631378856557974, "grad_norm": 1.3663991139031981, "learning_rate": 1.890281069824223e-05, "loss": 0.7027082443237305, "step": 1321 }, { "epoch": 0.3866062289808452, "grad_norm": 1.4293858623040305, "learning_rate": 1.8900607876734133e-05, "loss": 0.7055719494819641, "step": 1322 }, { "epoch": 0.38689866939611056, "grad_norm": 1.3719150653410752, "learning_rate": 1.8898402974763063e-05, "loss": 0.7403384447097778, "step": 1323 }, { "epoch": 0.38719110981137594, "grad_norm": 1.4816215708629428, "learning_rate": 1.88961959928444e-05, "loss": 0.6771470308303833, "step": 1324 }, { "epoch": 0.3874835502266413, "grad_norm": 1.552809609148836, "learning_rate": 1.8893986931494015e-05, "loss": 0.7258767485618591, "step": 1325 }, { "epoch": 0.3877759906419067, "grad_norm": 1.4168779145398758, "learning_rate": 1.889177579122826e-05, "loss": 0.7587069869041443, "step": 1326 }, { "epoch": 0.3880684310571721, "grad_norm": 1.0432394702021985, "learning_rate": 1.888956257256398e-05, "loss": 0.5434668660163879, "step": 1327 }, { "epoch": 0.3883608714724375, "grad_norm": 1.2927507112469059, "learning_rate": 1.8887347276018496e-05, "loss": 0.5311154127120972, "step": 1328 }, { "epoch": 0.38865331188770286, "grad_norm": 1.2533915775325788, "learning_rate": 1.888512990210962e-05, "loss": 0.5651747584342957, "step": 1329 }, { "epoch": 0.3889457523029683, "grad_norm": 1.1863162008873491, "learning_rate": 1.8882910451355654e-05, "loss": 0.628046989440918, "step": 1330 }, { "epoch": 0.3892381927182337, "grad_norm": 1.362511771688155, "learning_rate": 1.888068892427538e-05, "loss": 0.644639253616333, "step": 1331 }, { "epoch": 0.38953063313349906, "grad_norm": 1.2081658901416763, "learning_rate": 1.887846532138806e-05, "loss": 0.6290382742881775, "step": 1332 }, { "epoch": 0.38982307354876444, "grad_norm": 1.319310840364395, "learning_rate": 1.8876239643213456e-05, "loss": 0.6881425380706787, "step": 1333 }, { "epoch": 0.3901155139640298, "grad_norm": 1.398816649776473, "learning_rate": 1.8874011890271807e-05, "loss": 0.645643949508667, "step": 1334 }, { "epoch": 0.3904079543792952, "grad_norm": 1.3694554603281324, "learning_rate": 1.887178206308383e-05, "loss": 0.6965867280960083, "step": 1335 }, { "epoch": 0.3907003947945606, "grad_norm": 1.1683610353079796, "learning_rate": 1.886955016217074e-05, "loss": 0.7326550483703613, "step": 1336 }, { "epoch": 0.390992835209826, "grad_norm": 1.1781300264884254, "learning_rate": 1.886731618805422e-05, "loss": 0.6570208668708801, "step": 1337 }, { "epoch": 0.3912852756250914, "grad_norm": 1.3563073747469718, "learning_rate": 1.886508014125646e-05, "loss": 0.7391610145568848, "step": 1338 }, { "epoch": 0.3915777160403568, "grad_norm": 1.2946978380119605, "learning_rate": 1.8862842022300124e-05, "loss": 0.651665985584259, "step": 1339 }, { "epoch": 0.3918701564556222, "grad_norm": 1.383137909559831, "learning_rate": 1.8860601831708346e-05, "loss": 0.695915699005127, "step": 1340 }, { "epoch": 0.39216259687088756, "grad_norm": 1.4373340263094165, "learning_rate": 1.885835957000476e-05, "loss": 0.7209347486495972, "step": 1341 }, { "epoch": 0.39245503728615294, "grad_norm": 1.3210736597005808, "learning_rate": 1.885611523771349e-05, "loss": 0.6083317995071411, "step": 1342 }, { "epoch": 0.3927474777014183, "grad_norm": 1.1271235346878163, "learning_rate": 1.8853868835359127e-05, "loss": 0.5544713735580444, "step": 1343 }, { "epoch": 0.3930399181166837, "grad_norm": 1.5309462253741093, "learning_rate": 1.8851620363466756e-05, "loss": 0.6333836317062378, "step": 1344 }, { "epoch": 0.3933323585319491, "grad_norm": 1.2454908963117732, "learning_rate": 1.8849369822561943e-05, "loss": 0.554995059967041, "step": 1345 }, { "epoch": 0.3936247989472145, "grad_norm": 1.1941759610212306, "learning_rate": 1.884711721317074e-05, "loss": 0.6696420907974243, "step": 1346 }, { "epoch": 0.3939172393624799, "grad_norm": 1.3685512221597254, "learning_rate": 1.8844862535819682e-05, "loss": 0.6031695604324341, "step": 1347 }, { "epoch": 0.3942096797777453, "grad_norm": 1.2956806020723108, "learning_rate": 1.884260579103578e-05, "loss": 0.686814546585083, "step": 1348 }, { "epoch": 0.3945021201930107, "grad_norm": 1.3341047094387086, "learning_rate": 1.884034697934654e-05, "loss": 0.8113317489624023, "step": 1349 }, { "epoch": 0.39479456060827606, "grad_norm": 1.2860694131453334, "learning_rate": 1.8838086101279946e-05, "loss": 0.5645952820777893, "step": 1350 }, { "epoch": 0.39508700102354144, "grad_norm": 1.2016385972759884, "learning_rate": 1.883582315736446e-05, "loss": 0.6431643962860107, "step": 1351 }, { "epoch": 0.3953794414388068, "grad_norm": 1.5868415992731069, "learning_rate": 1.8833558148129034e-05, "loss": 0.7691985368728638, "step": 1352 }, { "epoch": 0.39567188185407226, "grad_norm": 1.2630640231572245, "learning_rate": 1.88312910741031e-05, "loss": 0.5951793789863586, "step": 1353 }, { "epoch": 0.39596432226933764, "grad_norm": 1.4300765958854422, "learning_rate": 1.8829021935816572e-05, "loss": 0.671844482421875, "step": 1354 }, { "epoch": 0.396256762684603, "grad_norm": 1.1270834377200167, "learning_rate": 1.8826750733799845e-05, "loss": 0.5290843844413757, "step": 1355 }, { "epoch": 0.3965492030998684, "grad_norm": 1.5593842090138308, "learning_rate": 1.8824477468583806e-05, "loss": 0.6492103934288025, "step": 1356 }, { "epoch": 0.3968416435151338, "grad_norm": 1.5289372619537642, "learning_rate": 1.882220214069981e-05, "loss": 0.6111055016517639, "step": 1357 }, { "epoch": 0.3971340839303992, "grad_norm": 1.5921417426189186, "learning_rate": 1.8819924750679702e-05, "loss": 0.8123398423194885, "step": 1358 }, { "epoch": 0.39742652434566456, "grad_norm": 1.1798948792158594, "learning_rate": 1.8817645299055815e-05, "loss": 0.6118077039718628, "step": 1359 }, { "epoch": 0.39771896476092994, "grad_norm": 1.3941391170101545, "learning_rate": 1.8815363786360948e-05, "loss": 0.6916248798370361, "step": 1360 }, { "epoch": 0.3980114051761954, "grad_norm": 1.2139550861968382, "learning_rate": 1.8813080213128394e-05, "loss": 0.5586028099060059, "step": 1361 }, { "epoch": 0.39830384559146076, "grad_norm": 1.4269154081314215, "learning_rate": 1.8810794579891925e-05, "loss": 0.6132841110229492, "step": 1362 }, { "epoch": 0.39859628600672614, "grad_norm": 1.6891257084549798, "learning_rate": 1.8808506887185793e-05, "loss": 0.7329133749008179, "step": 1363 }, { "epoch": 0.3988887264219915, "grad_norm": 1.2704606915664418, "learning_rate": 1.8806217135544736e-05, "loss": 0.5975138545036316, "step": 1364 }, { "epoch": 0.3991811668372569, "grad_norm": 1.315662529163245, "learning_rate": 1.8803925325503963e-05, "loss": 0.5790295600891113, "step": 1365 }, { "epoch": 0.3994736072525223, "grad_norm": 1.2020939373145192, "learning_rate": 1.8801631457599173e-05, "loss": 0.6506124138832092, "step": 1366 }, { "epoch": 0.39976604766778767, "grad_norm": 1.1981976421950613, "learning_rate": 1.8799335532366547e-05, "loss": 0.5577528476715088, "step": 1367 }, { "epoch": 0.40005848808305305, "grad_norm": 1.4216027512167424, "learning_rate": 1.879703755034274e-05, "loss": 0.675471305847168, "step": 1368 }, { "epoch": 0.4003509284983185, "grad_norm": 1.2857175045016282, "learning_rate": 1.879473751206489e-05, "loss": 0.5826357007026672, "step": 1369 }, { "epoch": 0.4006433689135839, "grad_norm": 1.31090591162355, "learning_rate": 1.8792435418070623e-05, "loss": 0.5146772265434265, "step": 1370 }, { "epoch": 0.40093580932884926, "grad_norm": 1.224194477069696, "learning_rate": 1.879013126889804e-05, "loss": 0.6049208641052246, "step": 1371 }, { "epoch": 0.40122824974411464, "grad_norm": 1.4047657351006413, "learning_rate": 1.878782506508571e-05, "loss": 0.7058207392692566, "step": 1372 }, { "epoch": 0.40152069015938, "grad_norm": 1.3782924104285919, "learning_rate": 1.8785516807172704e-05, "loss": 0.6281940937042236, "step": 1373 }, { "epoch": 0.4018131305746454, "grad_norm": 1.5710053658084482, "learning_rate": 1.878320649569856e-05, "loss": 0.6318703889846802, "step": 1374 }, { "epoch": 0.4021055709899108, "grad_norm": 1.369183615763356, "learning_rate": 1.87808941312033e-05, "loss": 0.6595311164855957, "step": 1375 }, { "epoch": 0.40239801140517617, "grad_norm": 1.2850685362090555, "learning_rate": 1.8778579714227433e-05, "loss": 0.6964930295944214, "step": 1376 }, { "epoch": 0.4026904518204416, "grad_norm": 1.338060741631637, "learning_rate": 1.8776263245311926e-05, "loss": 0.6093966364860535, "step": 1377 }, { "epoch": 0.402982892235707, "grad_norm": 1.2438362189425571, "learning_rate": 1.8773944724998248e-05, "loss": 0.5337893962860107, "step": 1378 }, { "epoch": 0.4032753326509724, "grad_norm": 1.3665212711176857, "learning_rate": 1.8771624153828336e-05, "loss": 0.5899128317832947, "step": 1379 }, { "epoch": 0.40356777306623776, "grad_norm": 1.41983053503157, "learning_rate": 1.876930153234461e-05, "loss": 0.7054699659347534, "step": 1380 }, { "epoch": 0.40386021348150314, "grad_norm": 1.4240217447880834, "learning_rate": 1.876697686108997e-05, "loss": 0.6910602450370789, "step": 1381 }, { "epoch": 0.4041526538967685, "grad_norm": 1.383183489617276, "learning_rate": 1.876465014060779e-05, "loss": 0.605659008026123, "step": 1382 }, { "epoch": 0.4044450943120339, "grad_norm": 1.4798727853043008, "learning_rate": 1.8762321371441934e-05, "loss": 0.7159937620162964, "step": 1383 }, { "epoch": 0.4047375347272993, "grad_norm": 1.394589338486404, "learning_rate": 1.8759990554136733e-05, "loss": 0.7568333148956299, "step": 1384 }, { "epoch": 0.4050299751425647, "grad_norm": 1.613153659624872, "learning_rate": 1.8757657689236998e-05, "loss": 0.7117356657981873, "step": 1385 }, { "epoch": 0.4053224155578301, "grad_norm": 1.4078248670298101, "learning_rate": 1.8755322777288027e-05, "loss": 0.6429109573364258, "step": 1386 }, { "epoch": 0.4056148559730955, "grad_norm": 1.571804966063755, "learning_rate": 1.875298581883559e-05, "loss": 0.643811821937561, "step": 1387 }, { "epoch": 0.40590729638836087, "grad_norm": 1.439883937827134, "learning_rate": 1.875064681442594e-05, "loss": 0.7143295407295227, "step": 1388 }, { "epoch": 0.40619973680362625, "grad_norm": 1.5655377581350667, "learning_rate": 1.8748305764605798e-05, "loss": 0.732312023639679, "step": 1389 }, { "epoch": 0.40649217721889164, "grad_norm": 1.2430462426936875, "learning_rate": 1.8745962669922375e-05, "loss": 0.6878848075866699, "step": 1390 }, { "epoch": 0.406784617634157, "grad_norm": 1.40828278888595, "learning_rate": 1.8743617530923356e-05, "loss": 0.6716262698173523, "step": 1391 }, { "epoch": 0.40707705804942246, "grad_norm": 1.346254534859124, "learning_rate": 1.87412703481569e-05, "loss": 0.4990834593772888, "step": 1392 }, { "epoch": 0.40736949846468784, "grad_norm": 1.4289189141042684, "learning_rate": 1.8738921122171647e-05, "loss": 0.6541857719421387, "step": 1393 }, { "epoch": 0.4076619388799532, "grad_norm": 1.4587922420879296, "learning_rate": 1.8736569853516715e-05, "loss": 0.6310811042785645, "step": 1394 }, { "epoch": 0.4079543792952186, "grad_norm": 1.5328348699666439, "learning_rate": 1.8734216542741702e-05, "loss": 0.6335423588752747, "step": 1395 }, { "epoch": 0.408246819710484, "grad_norm": 1.4540701020587141, "learning_rate": 1.873186119039667e-05, "loss": 0.6315034627914429, "step": 1396 }, { "epoch": 0.40853926012574937, "grad_norm": 1.2591166704430221, "learning_rate": 1.872950379703218e-05, "loss": 0.6822362542152405, "step": 1397 }, { "epoch": 0.40883170054101475, "grad_norm": 1.3128671260601936, "learning_rate": 1.8727144363199257e-05, "loss": 0.699965238571167, "step": 1398 }, { "epoch": 0.40912414095628014, "grad_norm": 1.376512615463435, "learning_rate": 1.8724782889449397e-05, "loss": 0.6769841313362122, "step": 1399 }, { "epoch": 0.4094165813715456, "grad_norm": 1.242831468646962, "learning_rate": 1.8722419376334584e-05, "loss": 0.5219473838806152, "step": 1400 }, { "epoch": 0.40970902178681096, "grad_norm": 1.1119386554431685, "learning_rate": 1.872005382440728e-05, "loss": 0.6091574430465698, "step": 1401 }, { "epoch": 0.41000146220207634, "grad_norm": 1.3005524040148213, "learning_rate": 1.8717686234220406e-05, "loss": 0.7589390277862549, "step": 1402 }, { "epoch": 0.4102939026173417, "grad_norm": 1.2931582987016021, "learning_rate": 1.8715316606327384e-05, "loss": 0.7042895555496216, "step": 1403 }, { "epoch": 0.4105863430326071, "grad_norm": 1.2509877991876854, "learning_rate": 1.8712944941282095e-05, "loss": 0.6490949988365173, "step": 1404 }, { "epoch": 0.4108787834478725, "grad_norm": 1.3726951776657805, "learning_rate": 1.87105712396389e-05, "loss": 0.6614132523536682, "step": 1405 }, { "epoch": 0.41117122386313787, "grad_norm": 1.3416970895813871, "learning_rate": 1.8708195501952637e-05, "loss": 0.666157603263855, "step": 1406 }, { "epoch": 0.41146366427840325, "grad_norm": 1.5455429688837699, "learning_rate": 1.8705817728778626e-05, "loss": 0.7347884178161621, "step": 1407 }, { "epoch": 0.4117561046936687, "grad_norm": 1.6323767587093516, "learning_rate": 1.8703437920672652e-05, "loss": 0.8129836320877075, "step": 1408 }, { "epoch": 0.4120485451089341, "grad_norm": 1.2690047775005027, "learning_rate": 1.870105607819098e-05, "loss": 0.645210862159729, "step": 1409 }, { "epoch": 0.41234098552419945, "grad_norm": 1.367879279910813, "learning_rate": 1.8698672201890355e-05, "loss": 0.6716916561126709, "step": 1410 }, { "epoch": 0.41263342593946484, "grad_norm": 1.66933384894401, "learning_rate": 1.869628629232799e-05, "loss": 0.8190855383872986, "step": 1411 }, { "epoch": 0.4129258663547302, "grad_norm": 1.3900061091611966, "learning_rate": 1.8693898350061582e-05, "loss": 0.7618075609207153, "step": 1412 }, { "epoch": 0.4132183067699956, "grad_norm": 1.3486290329442485, "learning_rate": 1.869150837564929e-05, "loss": 0.719980001449585, "step": 1413 }, { "epoch": 0.413510747185261, "grad_norm": 1.2278158990840933, "learning_rate": 1.8689116369649763e-05, "loss": 0.6601548194885254, "step": 1414 }, { "epoch": 0.41380318760052637, "grad_norm": 1.4157736896401232, "learning_rate": 1.8686722332622112e-05, "loss": 0.5991787314414978, "step": 1415 }, { "epoch": 0.4140956280157918, "grad_norm": 1.2916299361998576, "learning_rate": 1.8684326265125935e-05, "loss": 0.6089641451835632, "step": 1416 }, { "epoch": 0.4143880684310572, "grad_norm": 1.4857622706167455, "learning_rate": 1.8681928167721297e-05, "loss": 0.8143327236175537, "step": 1417 }, { "epoch": 0.41468050884632257, "grad_norm": 1.1137129272750816, "learning_rate": 1.8679528040968733e-05, "loss": 0.6127045154571533, "step": 1418 }, { "epoch": 0.41497294926158795, "grad_norm": 1.2684856043432204, "learning_rate": 1.8677125885429262e-05, "loss": 0.659069299697876, "step": 1419 }, { "epoch": 0.41526538967685334, "grad_norm": 1.3122733176612695, "learning_rate": 1.8674721701664377e-05, "loss": 0.7277505397796631, "step": 1420 }, { "epoch": 0.4155578300921187, "grad_norm": 1.2107555045955465, "learning_rate": 1.8672315490236034e-05, "loss": 0.6128710508346558, "step": 1421 }, { "epoch": 0.4158502705073841, "grad_norm": 1.7155790773588848, "learning_rate": 1.866990725170667e-05, "loss": 0.7439340949058533, "step": 1422 }, { "epoch": 0.4161427109226495, "grad_norm": 1.1423179387443951, "learning_rate": 1.8667496986639206e-05, "loss": 0.5855459570884705, "step": 1423 }, { "epoch": 0.4164351513379149, "grad_norm": 1.3335637335552337, "learning_rate": 1.866508469559702e-05, "loss": 0.6865170001983643, "step": 1424 }, { "epoch": 0.4167275917531803, "grad_norm": 1.3191415692644766, "learning_rate": 1.866267037914397e-05, "loss": 0.6648446917533875, "step": 1425 }, { "epoch": 0.4170200321684457, "grad_norm": 1.5198580088053322, "learning_rate": 1.866025403784439e-05, "loss": 0.6919275522232056, "step": 1426 }, { "epoch": 0.41731247258371107, "grad_norm": 1.3309988770277923, "learning_rate": 1.865783567226308e-05, "loss": 0.7270313501358032, "step": 1427 }, { "epoch": 0.41760491299897645, "grad_norm": 1.2814450794742573, "learning_rate": 1.8655415282965327e-05, "loss": 0.5938387513160706, "step": 1428 }, { "epoch": 0.41789735341424183, "grad_norm": 1.378984312222445, "learning_rate": 1.8652992870516872e-05, "loss": 0.6517149209976196, "step": 1429 }, { "epoch": 0.4181897938295072, "grad_norm": 1.1862439123900306, "learning_rate": 1.8650568435483948e-05, "loss": 0.6688356399536133, "step": 1430 }, { "epoch": 0.41848223424477266, "grad_norm": 1.153419964025717, "learning_rate": 1.864814197843325e-05, "loss": 0.5300855040550232, "step": 1431 }, { "epoch": 0.41877467466003804, "grad_norm": 1.270766161064103, "learning_rate": 1.8645713499931943e-05, "loss": 0.6404704451560974, "step": 1432 }, { "epoch": 0.4190671150753034, "grad_norm": 1.5430855845367462, "learning_rate": 1.8643283000547673e-05, "loss": 0.6758813858032227, "step": 1433 }, { "epoch": 0.4193595554905688, "grad_norm": 1.3783205387944717, "learning_rate": 1.8640850480848552e-05, "loss": 0.6328250169754028, "step": 1434 }, { "epoch": 0.4196519959058342, "grad_norm": 1.26459826517306, "learning_rate": 1.863841594140317e-05, "loss": 0.6747157573699951, "step": 1435 }, { "epoch": 0.41994443632109957, "grad_norm": 1.3504769695047412, "learning_rate": 1.8635979382780584e-05, "loss": 0.5314475893974304, "step": 1436 }, { "epoch": 0.42023687673636495, "grad_norm": 1.345420561697831, "learning_rate": 1.863354080555033e-05, "loss": 0.478320837020874, "step": 1437 }, { "epoch": 0.42052931715163033, "grad_norm": 1.7039787917499718, "learning_rate": 1.86311002102824e-05, "loss": 0.7389972805976868, "step": 1438 }, { "epoch": 0.42082175756689577, "grad_norm": 1.5752835852867741, "learning_rate": 1.8628657597547273e-05, "loss": 0.5449938178062439, "step": 1439 }, { "epoch": 0.42111419798216115, "grad_norm": 1.417913338852298, "learning_rate": 1.8626212967915897e-05, "loss": 0.6752811670303345, "step": 1440 }, { "epoch": 0.42140663839742654, "grad_norm": 1.3268814915367182, "learning_rate": 1.862376632195969e-05, "loss": 0.7750412821769714, "step": 1441 }, { "epoch": 0.4216990788126919, "grad_norm": 1.4150998717703018, "learning_rate": 1.8621317660250535e-05, "loss": 0.5967680215835571, "step": 1442 }, { "epoch": 0.4219915192279573, "grad_norm": 1.2836272802739963, "learning_rate": 1.86188669833608e-05, "loss": 0.6781327724456787, "step": 1443 }, { "epoch": 0.4222839596432227, "grad_norm": 1.421988457915262, "learning_rate": 1.8616414291863307e-05, "loss": 0.7539681196212769, "step": 1444 }, { "epoch": 0.42257640005848807, "grad_norm": 1.5265432564271315, "learning_rate": 1.8613959586331364e-05, "loss": 0.6976957321166992, "step": 1445 }, { "epoch": 0.42286884047375345, "grad_norm": 1.3365892238255053, "learning_rate": 1.861150286733874e-05, "loss": 0.6616528034210205, "step": 1446 }, { "epoch": 0.4231612808890189, "grad_norm": 1.4482994306877846, "learning_rate": 1.860904413545968e-05, "loss": 0.6407957077026367, "step": 1447 }, { "epoch": 0.42345372130428427, "grad_norm": 1.4193133822561126, "learning_rate": 1.86065833912689e-05, "loss": 0.5918550491333008, "step": 1448 }, { "epoch": 0.42374616171954965, "grad_norm": 1.421765780188314, "learning_rate": 1.8604120635341574e-05, "loss": 0.6142056584358215, "step": 1449 }, { "epoch": 0.42403860213481503, "grad_norm": 1.4371201128611453, "learning_rate": 1.8601655868253368e-05, "loss": 0.6359597444534302, "step": 1450 }, { "epoch": 0.4243310425500804, "grad_norm": 1.2914617625794835, "learning_rate": 1.8599189090580402e-05, "loss": 0.7149467468261719, "step": 1451 }, { "epoch": 0.4246234829653458, "grad_norm": 1.2900964447275098, "learning_rate": 1.8596720302899272e-05, "loss": 0.6015822887420654, "step": 1452 }, { "epoch": 0.4249159233806112, "grad_norm": 1.1866564154864978, "learning_rate": 1.8594249505787035e-05, "loss": 0.6389881372451782, "step": 1453 }, { "epoch": 0.42520836379587656, "grad_norm": 1.381321058965008, "learning_rate": 1.8591776699821235e-05, "loss": 0.7479783892631531, "step": 1454 }, { "epoch": 0.425500804211142, "grad_norm": 1.2271977568055246, "learning_rate": 1.8589301885579866e-05, "loss": 0.6574498414993286, "step": 1455 }, { "epoch": 0.4257932446264074, "grad_norm": 1.3187836865578064, "learning_rate": 1.858682506364141e-05, "loss": 0.6314088702201843, "step": 1456 }, { "epoch": 0.42608568504167277, "grad_norm": 1.4747450600155867, "learning_rate": 1.85843462345848e-05, "loss": 0.605385959148407, "step": 1457 }, { "epoch": 0.42637812545693815, "grad_norm": 1.280849948973879, "learning_rate": 1.8581865398989452e-05, "loss": 0.6355551481246948, "step": 1458 }, { "epoch": 0.42667056587220353, "grad_norm": 1.3012840164028812, "learning_rate": 1.8579382557435247e-05, "loss": 0.6303017139434814, "step": 1459 }, { "epoch": 0.4269630062874689, "grad_norm": 1.2629380280411955, "learning_rate": 1.8576897710502532e-05, "loss": 0.5916526317596436, "step": 1460 }, { "epoch": 0.4272554467027343, "grad_norm": 1.2467440963341316, "learning_rate": 1.8574410858772126e-05, "loss": 0.5709279179573059, "step": 1461 }, { "epoch": 0.4275478871179997, "grad_norm": 1.2909430743502928, "learning_rate": 1.8571922002825317e-05, "loss": 0.571231484413147, "step": 1462 }, { "epoch": 0.4278403275332651, "grad_norm": 1.310017395907512, "learning_rate": 1.8569431143243856e-05, "loss": 0.6352202892303467, "step": 1463 }, { "epoch": 0.4281327679485305, "grad_norm": 1.316165374470179, "learning_rate": 1.8566938280609965e-05, "loss": 0.553265392780304, "step": 1464 }, { "epoch": 0.4284252083637959, "grad_norm": 1.1127868543655046, "learning_rate": 1.8564443415506343e-05, "loss": 0.4913727045059204, "step": 1465 }, { "epoch": 0.42871764877906127, "grad_norm": 1.4457215110099157, "learning_rate": 1.8561946548516143e-05, "loss": 0.542539119720459, "step": 1466 }, { "epoch": 0.42901008919432665, "grad_norm": 1.5261496853017646, "learning_rate": 1.8559447680222994e-05, "loss": 0.719292163848877, "step": 1467 }, { "epoch": 0.42930252960959203, "grad_norm": 1.4842625427656275, "learning_rate": 1.8556946811210993e-05, "loss": 0.8443170785903931, "step": 1468 }, { "epoch": 0.4295949700248574, "grad_norm": 1.4024545882927506, "learning_rate": 1.8554443942064705e-05, "loss": 0.7899821996688843, "step": 1469 }, { "epoch": 0.42988741044012285, "grad_norm": 1.3637198474337424, "learning_rate": 1.8551939073369155e-05, "loss": 0.617426872253418, "step": 1470 }, { "epoch": 0.43017985085538823, "grad_norm": 1.284473833943433, "learning_rate": 1.8549432205709842e-05, "loss": 0.5573505163192749, "step": 1471 }, { "epoch": 0.4304722912706536, "grad_norm": 1.2050796372555104, "learning_rate": 1.8546923339672734e-05, "loss": 0.5571975111961365, "step": 1472 }, { "epoch": 0.430764731685919, "grad_norm": 1.2452948917501594, "learning_rate": 1.854441247584426e-05, "loss": 0.6411981582641602, "step": 1473 }, { "epoch": 0.4310571721011844, "grad_norm": 1.4342124934143161, "learning_rate": 1.8541899614811323e-05, "loss": 0.4766804277896881, "step": 1474 }, { "epoch": 0.43134961251644977, "grad_norm": 1.5114551227786939, "learning_rate": 1.8539384757161285e-05, "loss": 0.7479405403137207, "step": 1475 }, { "epoch": 0.43164205293171515, "grad_norm": 1.3476436799817348, "learning_rate": 1.8536867903481983e-05, "loss": 0.6848211288452148, "step": 1476 }, { "epoch": 0.43193449334698053, "grad_norm": 1.2973665530504777, "learning_rate": 1.8534349054361708e-05, "loss": 0.7413634061813354, "step": 1477 }, { "epoch": 0.43222693376224597, "grad_norm": 1.1870657052305638, "learning_rate": 1.8531828210389236e-05, "loss": 0.5880843997001648, "step": 1478 }, { "epoch": 0.43251937417751135, "grad_norm": 1.16075786792099, "learning_rate": 1.852930537215379e-05, "loss": 0.5885627269744873, "step": 1479 }, { "epoch": 0.43281181459277673, "grad_norm": 1.3270242768891243, "learning_rate": 1.8526780540245077e-05, "loss": 0.706636905670166, "step": 1480 }, { "epoch": 0.4331042550080421, "grad_norm": 1.3793959384028218, "learning_rate": 1.8524253715253255e-05, "loss": 0.6521843075752258, "step": 1481 }, { "epoch": 0.4333966954233075, "grad_norm": 1.3825746336646279, "learning_rate": 1.8521724897768955e-05, "loss": 0.6231021881103516, "step": 1482 }, { "epoch": 0.4336891358385729, "grad_norm": 1.4460679872410762, "learning_rate": 1.851919408838327e-05, "loss": 0.6859451532363892, "step": 1483 }, { "epoch": 0.43398157625383826, "grad_norm": 1.2507527028404273, "learning_rate": 1.851666128768777e-05, "loss": 0.7948323488235474, "step": 1484 }, { "epoch": 0.43427401666910365, "grad_norm": 1.3631419376990976, "learning_rate": 1.8514126496274473e-05, "loss": 0.7815203070640564, "step": 1485 }, { "epoch": 0.4345664570843691, "grad_norm": 1.2904619284943133, "learning_rate": 1.8511589714735875e-05, "loss": 0.6941452622413635, "step": 1486 }, { "epoch": 0.43485889749963447, "grad_norm": 1.41567858231915, "learning_rate": 1.850905094366493e-05, "loss": 0.5500549674034119, "step": 1487 }, { "epoch": 0.43515133791489985, "grad_norm": 1.2918667262960315, "learning_rate": 1.8506510183655066e-05, "loss": 0.6616400480270386, "step": 1488 }, { "epoch": 0.43544377833016523, "grad_norm": 1.2491627898498192, "learning_rate": 1.8503967435300166e-05, "loss": 0.6920043230056763, "step": 1489 }, { "epoch": 0.4357362187454306, "grad_norm": 1.215912086863742, "learning_rate": 1.8501422699194584e-05, "loss": 0.6080813407897949, "step": 1490 }, { "epoch": 0.436028659160696, "grad_norm": 1.2215283867587456, "learning_rate": 1.8498875975933135e-05, "loss": 0.576184868812561, "step": 1491 }, { "epoch": 0.4363210995759614, "grad_norm": 1.3544983329172053, "learning_rate": 1.84963272661111e-05, "loss": 0.6647310256958008, "step": 1492 }, { "epoch": 0.43661353999122676, "grad_norm": 1.5126248587795905, "learning_rate": 1.8493776570324224e-05, "loss": 0.6738306283950806, "step": 1493 }, { "epoch": 0.4369059804064922, "grad_norm": 1.306695091605799, "learning_rate": 1.849122388916872e-05, "loss": 0.681056022644043, "step": 1494 }, { "epoch": 0.4371984208217576, "grad_norm": 1.2802492616875505, "learning_rate": 1.848866922324126e-05, "loss": 0.7844547033309937, "step": 1495 }, { "epoch": 0.43749086123702297, "grad_norm": 1.278338668380481, "learning_rate": 1.8486112573138977e-05, "loss": 0.6478928327560425, "step": 1496 }, { "epoch": 0.43778330165228835, "grad_norm": 1.1565510309984284, "learning_rate": 1.8483553939459477e-05, "loss": 0.6035341024398804, "step": 1497 }, { "epoch": 0.43807574206755373, "grad_norm": 1.5407821231530743, "learning_rate": 1.8480993322800826e-05, "loss": 0.6664912700653076, "step": 1498 }, { "epoch": 0.4383681824828191, "grad_norm": 1.2757017491830842, "learning_rate": 1.847843072376155e-05, "loss": 0.7171953916549683, "step": 1499 }, { "epoch": 0.4386606228980845, "grad_norm": 1.6930649567828897, "learning_rate": 1.8475866142940646e-05, "loss": 0.8400344848632812, "step": 1500 }, { "epoch": 0.4389530633133499, "grad_norm": 1.4411024776302432, "learning_rate": 1.8473299580937563e-05, "loss": 0.5119056701660156, "step": 1501 }, { "epoch": 0.4392455037286153, "grad_norm": 1.2781692932924433, "learning_rate": 1.847073103835222e-05, "loss": 0.5864866375923157, "step": 1502 }, { "epoch": 0.4395379441438807, "grad_norm": 1.1391351003013295, "learning_rate": 1.8468160515785e-05, "loss": 0.6389576196670532, "step": 1503 }, { "epoch": 0.4398303845591461, "grad_norm": 1.3447539998849671, "learning_rate": 1.846558801383675e-05, "loss": 0.6745110750198364, "step": 1504 }, { "epoch": 0.44012282497441146, "grad_norm": 1.4359844129069297, "learning_rate": 1.846301353310877e-05, "loss": 0.6207559704780579, "step": 1505 }, { "epoch": 0.44041526538967685, "grad_norm": 1.4143769366285628, "learning_rate": 1.8460437074202832e-05, "loss": 0.6818139553070068, "step": 1506 }, { "epoch": 0.44070770580494223, "grad_norm": 1.4877202307925406, "learning_rate": 1.845785863772117e-05, "loss": 0.652062714099884, "step": 1507 }, { "epoch": 0.4410001462202076, "grad_norm": 1.340284980688535, "learning_rate": 1.8455278224266476e-05, "loss": 0.6842166185379028, "step": 1508 }, { "epoch": 0.44129258663547305, "grad_norm": 1.3899905625699573, "learning_rate": 1.8452695834441904e-05, "loss": 0.6459342837333679, "step": 1509 }, { "epoch": 0.44158502705073843, "grad_norm": 1.3677235686172902, "learning_rate": 1.8450111468851078e-05, "loss": 0.6036739349365234, "step": 1510 }, { "epoch": 0.4418774674660038, "grad_norm": 1.401326082704981, "learning_rate": 1.844752512809807e-05, "loss": 0.7530199289321899, "step": 1511 }, { "epoch": 0.4421699078812692, "grad_norm": 1.249585374389202, "learning_rate": 1.8444936812787428e-05, "loss": 0.6098290085792542, "step": 1512 }, { "epoch": 0.4424623482965346, "grad_norm": 1.6252323705163014, "learning_rate": 1.844234652352415e-05, "loss": 0.7142464518547058, "step": 1513 }, { "epoch": 0.44275478871179996, "grad_norm": 1.3215155589821708, "learning_rate": 1.8439754260913703e-05, "loss": 0.4895970821380615, "step": 1514 }, { "epoch": 0.44304722912706535, "grad_norm": 1.2855871920553614, "learning_rate": 1.8437160025562012e-05, "loss": 0.6166520118713379, "step": 1515 }, { "epoch": 0.4433396695423307, "grad_norm": 1.3621423468696194, "learning_rate": 1.8434563818075462e-05, "loss": 0.6020585894584656, "step": 1516 }, { "epoch": 0.44363210995759617, "grad_norm": 1.3215872914676274, "learning_rate": 1.8431965639060904e-05, "loss": 0.6879030466079712, "step": 1517 }, { "epoch": 0.44392455037286155, "grad_norm": 1.2000763930073624, "learning_rate": 1.8429365489125644e-05, "loss": 0.5753897428512573, "step": 1518 }, { "epoch": 0.44421699078812693, "grad_norm": 1.2916902596192155, "learning_rate": 1.8426763368877455e-05, "loss": 0.5165301561355591, "step": 1519 }, { "epoch": 0.4445094312033923, "grad_norm": 1.630208225804633, "learning_rate": 1.842415927892456e-05, "loss": 0.6377310752868652, "step": 1520 }, { "epoch": 0.4448018716186577, "grad_norm": 1.4221002668397775, "learning_rate": 1.842155321987566e-05, "loss": 0.7429912090301514, "step": 1521 }, { "epoch": 0.4450943120339231, "grad_norm": 1.5079395076396265, "learning_rate": 1.8418945192339892e-05, "loss": 0.6177542209625244, "step": 1522 }, { "epoch": 0.44538675244918846, "grad_norm": 1.2784904022569494, "learning_rate": 1.8416335196926877e-05, "loss": 0.662541389465332, "step": 1523 }, { "epoch": 0.44567919286445384, "grad_norm": 1.2782173083325044, "learning_rate": 1.841372323424668e-05, "loss": 0.6026759743690491, "step": 1524 }, { "epoch": 0.4459716332797193, "grad_norm": 1.5759742604234355, "learning_rate": 1.8411109304909837e-05, "loss": 0.7902384400367737, "step": 1525 }, { "epoch": 0.44626407369498466, "grad_norm": 1.4904175669631523, "learning_rate": 1.840849340952733e-05, "loss": 0.6588590145111084, "step": 1526 }, { "epoch": 0.44655651411025005, "grad_norm": 1.1682358413615135, "learning_rate": 1.8405875548710614e-05, "loss": 0.49133825302124023, "step": 1527 }, { "epoch": 0.44684895452551543, "grad_norm": 1.4464174570347765, "learning_rate": 1.8403255723071597e-05, "loss": 0.6644654273986816, "step": 1528 }, { "epoch": 0.4471413949407808, "grad_norm": 1.2325053536943291, "learning_rate": 1.8400633933222647e-05, "loss": 0.6257454752922058, "step": 1529 }, { "epoch": 0.4474338353560462, "grad_norm": 1.4100106920950097, "learning_rate": 1.8398010179776597e-05, "loss": 0.6671919226646423, "step": 1530 }, { "epoch": 0.4477262757713116, "grad_norm": 1.1625081058782702, "learning_rate": 1.839538446334672e-05, "loss": 0.6001447439193726, "step": 1531 }, { "epoch": 0.44801871618657696, "grad_norm": 1.6509081383772402, "learning_rate": 1.8392756784546775e-05, "loss": 0.8103213310241699, "step": 1532 }, { "epoch": 0.4483111566018424, "grad_norm": 1.1675484766628168, "learning_rate": 1.839012714399096e-05, "loss": 0.7010835409164429, "step": 1533 }, { "epoch": 0.4486035970171078, "grad_norm": 1.0773967688725017, "learning_rate": 1.8387495542293935e-05, "loss": 0.5709215402603149, "step": 1534 }, { "epoch": 0.44889603743237316, "grad_norm": 1.3558935245332375, "learning_rate": 1.8384861980070826e-05, "loss": 0.6410949230194092, "step": 1535 }, { "epoch": 0.44918847784763855, "grad_norm": 1.358963272892771, "learning_rate": 1.838222645793721e-05, "loss": 0.8036839962005615, "step": 1536 }, { "epoch": 0.44948091826290393, "grad_norm": 1.1470889977158967, "learning_rate": 1.8379588976509123e-05, "loss": 0.49213099479675293, "step": 1537 }, { "epoch": 0.4497733586781693, "grad_norm": 1.5829843161961048, "learning_rate": 1.8376949536403063e-05, "loss": 0.7111018896102905, "step": 1538 }, { "epoch": 0.4500657990934347, "grad_norm": 1.313995907545699, "learning_rate": 1.837430813823598e-05, "loss": 0.8506999015808105, "step": 1539 }, { "epoch": 0.4503582395087001, "grad_norm": 1.2175571229137518, "learning_rate": 1.8371664782625287e-05, "loss": 0.7369798421859741, "step": 1540 }, { "epoch": 0.4506506799239655, "grad_norm": 1.3435168892785054, "learning_rate": 1.8369019470188855e-05, "loss": 0.5982831120491028, "step": 1541 }, { "epoch": 0.4509431203392309, "grad_norm": 1.2303590063922416, "learning_rate": 1.8366372201545002e-05, "loss": 0.6129144430160522, "step": 1542 }, { "epoch": 0.4512355607544963, "grad_norm": 1.5191607059455674, "learning_rate": 1.8363722977312512e-05, "loss": 0.7142921686172485, "step": 1543 }, { "epoch": 0.45152800116976166, "grad_norm": 1.1545455601160404, "learning_rate": 1.8361071798110635e-05, "loss": 0.515651524066925, "step": 1544 }, { "epoch": 0.45182044158502704, "grad_norm": 1.3144713138844157, "learning_rate": 1.8358418664559058e-05, "loss": 0.5544168949127197, "step": 1545 }, { "epoch": 0.4521128820002924, "grad_norm": 1.2540637765053078, "learning_rate": 1.8355763577277938e-05, "loss": 0.6801918745040894, "step": 1546 }, { "epoch": 0.4524053224155578, "grad_norm": 1.3664850716479517, "learning_rate": 1.835310653688789e-05, "loss": 0.683785080909729, "step": 1547 }, { "epoch": 0.45269776283082325, "grad_norm": 1.363558169999723, "learning_rate": 1.835044754400997e-05, "loss": 0.5689892172813416, "step": 1548 }, { "epoch": 0.45299020324608863, "grad_norm": 1.1621305276584806, "learning_rate": 1.8347786599265713e-05, "loss": 0.5260726809501648, "step": 1549 }, { "epoch": 0.453282643661354, "grad_norm": 1.2201116845769602, "learning_rate": 1.834512370327709e-05, "loss": 0.6792432069778442, "step": 1550 }, { "epoch": 0.4535750840766194, "grad_norm": 1.198643016289117, "learning_rate": 1.8342458856666545e-05, "loss": 0.6336524486541748, "step": 1551 }, { "epoch": 0.4538675244918848, "grad_norm": 1.3472994421503108, "learning_rate": 1.8339792060056965e-05, "loss": 0.5929614901542664, "step": 1552 }, { "epoch": 0.45415996490715016, "grad_norm": 1.2599505430948363, "learning_rate": 1.8337123314071696e-05, "loss": 0.6683382391929626, "step": 1553 }, { "epoch": 0.45445240532241554, "grad_norm": 1.2860246628200298, "learning_rate": 1.833445261933454e-05, "loss": 0.6256811618804932, "step": 1554 }, { "epoch": 0.4547448457376809, "grad_norm": 1.3499468606960694, "learning_rate": 1.8331779976469765e-05, "loss": 0.5974653959274292, "step": 1555 }, { "epoch": 0.45503728615294636, "grad_norm": 1.2078321854850618, "learning_rate": 1.8329105386102074e-05, "loss": 0.5471535325050354, "step": 1556 }, { "epoch": 0.45532972656821175, "grad_norm": 1.284169615938693, "learning_rate": 1.832642884885664e-05, "loss": 0.5751267075538635, "step": 1557 }, { "epoch": 0.45562216698347713, "grad_norm": 1.3234326952626145, "learning_rate": 1.8323750365359092e-05, "loss": 0.7003380060195923, "step": 1558 }, { "epoch": 0.4559146073987425, "grad_norm": 1.3333099062603002, "learning_rate": 1.8321069936235503e-05, "loss": 0.6351351737976074, "step": 1559 }, { "epoch": 0.4562070478140079, "grad_norm": 1.4452410048586575, "learning_rate": 1.8318387562112407e-05, "loss": 0.6083345413208008, "step": 1560 }, { "epoch": 0.4564994882292733, "grad_norm": 1.230127453588353, "learning_rate": 1.83157032436168e-05, "loss": 0.589935302734375, "step": 1561 }, { "epoch": 0.45679192864453866, "grad_norm": 1.3491229847821233, "learning_rate": 1.8313016981376116e-05, "loss": 0.7648014426231384, "step": 1562 }, { "epoch": 0.45708436905980404, "grad_norm": 1.2461686063365083, "learning_rate": 1.831032877601826e-05, "loss": 0.7309973239898682, "step": 1563 }, { "epoch": 0.4573768094750695, "grad_norm": 1.4691097869713072, "learning_rate": 1.8307638628171575e-05, "loss": 0.7231593728065491, "step": 1564 }, { "epoch": 0.45766924989033486, "grad_norm": 1.4770239307253334, "learning_rate": 1.8304946538464876e-05, "loss": 0.7321262359619141, "step": 1565 }, { "epoch": 0.45796169030560024, "grad_norm": 1.1157038717428966, "learning_rate": 1.830225250752742e-05, "loss": 0.5866271257400513, "step": 1566 }, { "epoch": 0.4582541307208656, "grad_norm": 1.4899327841327124, "learning_rate": 1.8299556535988917e-05, "loss": 0.7146202325820923, "step": 1567 }, { "epoch": 0.458546571136131, "grad_norm": 1.0989226716242009, "learning_rate": 1.8296858624479536e-05, "loss": 0.4600168466567993, "step": 1568 }, { "epoch": 0.4588390115513964, "grad_norm": 1.5647421342147445, "learning_rate": 1.8294158773629896e-05, "loss": 0.5710705518722534, "step": 1569 }, { "epoch": 0.4591314519666618, "grad_norm": 1.4737029572986353, "learning_rate": 1.8291456984071073e-05, "loss": 0.7075216770172119, "step": 1570 }, { "epoch": 0.45942389238192716, "grad_norm": 1.2087048615463696, "learning_rate": 1.828875325643459e-05, "loss": 0.5262739062309265, "step": 1571 }, { "epoch": 0.4597163327971926, "grad_norm": 1.2732843462549814, "learning_rate": 1.8286047591352436e-05, "loss": 0.724657416343689, "step": 1572 }, { "epoch": 0.460008773212458, "grad_norm": 1.2778614004914874, "learning_rate": 1.8283339989457033e-05, "loss": 0.6047587394714355, "step": 1573 }, { "epoch": 0.46030121362772336, "grad_norm": 1.481028950467352, "learning_rate": 1.828063045138127e-05, "loss": 0.6647980213165283, "step": 1574 }, { "epoch": 0.46059365404298874, "grad_norm": 1.3031844151965102, "learning_rate": 1.827791897775849e-05, "loss": 0.6081969738006592, "step": 1575 }, { "epoch": 0.4608860944582541, "grad_norm": 1.2574668609577524, "learning_rate": 1.827520556922248e-05, "loss": 0.6815003156661987, "step": 1576 }, { "epoch": 0.4611785348735195, "grad_norm": 1.25588669780601, "learning_rate": 1.8272490226407476e-05, "loss": 0.5571715235710144, "step": 1577 }, { "epoch": 0.4614709752887849, "grad_norm": 1.241115553107667, "learning_rate": 1.8269772949948185e-05, "loss": 0.7562757730484009, "step": 1578 }, { "epoch": 0.4617634157040503, "grad_norm": 1.3753582703744767, "learning_rate": 1.8267053740479745e-05, "loss": 0.6330382227897644, "step": 1579 }, { "epoch": 0.4620558561193157, "grad_norm": 1.5331426598457012, "learning_rate": 1.826433259863776e-05, "loss": 0.7696597576141357, "step": 1580 }, { "epoch": 0.4623482965345811, "grad_norm": 1.3594821877317964, "learning_rate": 1.8261609525058275e-05, "loss": 0.6953772306442261, "step": 1581 }, { "epoch": 0.4626407369498465, "grad_norm": 1.3957443557298115, "learning_rate": 1.8258884520377797e-05, "loss": 0.5856037735939026, "step": 1582 }, { "epoch": 0.46293317736511186, "grad_norm": 1.3245931479550002, "learning_rate": 1.8256157585233277e-05, "loss": 0.5988172888755798, "step": 1583 }, { "epoch": 0.46322561778037724, "grad_norm": 1.3153037118046438, "learning_rate": 1.8253428720262117e-05, "loss": 0.6320241689682007, "step": 1584 }, { "epoch": 0.4635180581956426, "grad_norm": 1.1680775814478943, "learning_rate": 1.8250697926102182e-05, "loss": 0.5758935213088989, "step": 1585 }, { "epoch": 0.463810498610908, "grad_norm": 1.4295465315991271, "learning_rate": 1.8247965203391763e-05, "loss": 0.7104986906051636, "step": 1586 }, { "epoch": 0.46410293902617344, "grad_norm": 1.4739846709331708, "learning_rate": 1.8245230552769634e-05, "loss": 0.6322015523910522, "step": 1587 }, { "epoch": 0.4643953794414388, "grad_norm": 1.4263760736603013, "learning_rate": 1.824249397487499e-05, "loss": 0.5881235003471375, "step": 1588 }, { "epoch": 0.4646878198567042, "grad_norm": 1.5652864190332019, "learning_rate": 1.8239755470347497e-05, "loss": 0.8097240924835205, "step": 1589 }, { "epoch": 0.4649802602719696, "grad_norm": 1.4192861983980027, "learning_rate": 1.823701503982726e-05, "loss": 0.6538649201393127, "step": 1590 }, { "epoch": 0.465272700687235, "grad_norm": 1.2329012857349442, "learning_rate": 1.8234272683954842e-05, "loss": 0.5868922472000122, "step": 1591 }, { "epoch": 0.46556514110250036, "grad_norm": 1.3076575357637654, "learning_rate": 1.8231528403371248e-05, "loss": 0.6747265458106995, "step": 1592 }, { "epoch": 0.46585758151776574, "grad_norm": 1.2961728564371904, "learning_rate": 1.8228782198717936e-05, "loss": 0.6519996523857117, "step": 1593 }, { "epoch": 0.4661500219330311, "grad_norm": 1.2212124627082057, "learning_rate": 1.822603407063682e-05, "loss": 0.7268975973129272, "step": 1594 }, { "epoch": 0.46644246234829656, "grad_norm": 1.1603454255193932, "learning_rate": 1.8223284019770252e-05, "loss": 0.6554980278015137, "step": 1595 }, { "epoch": 0.46673490276356194, "grad_norm": 1.350233636463654, "learning_rate": 1.8220532046761047e-05, "loss": 0.7014105319976807, "step": 1596 }, { "epoch": 0.4670273431788273, "grad_norm": 1.4228663397014873, "learning_rate": 1.821777815225245e-05, "loss": 0.5766602158546448, "step": 1597 }, { "epoch": 0.4673197835940927, "grad_norm": 1.304159292005938, "learning_rate": 1.8215022336888182e-05, "loss": 0.5106521844863892, "step": 1598 }, { "epoch": 0.4676122240093581, "grad_norm": 1.419250792414019, "learning_rate": 1.821226460131239e-05, "loss": 0.801375150680542, "step": 1599 }, { "epoch": 0.4679046644246235, "grad_norm": 1.2845833863087142, "learning_rate": 1.8209504946169677e-05, "loss": 0.6189062595367432, "step": 1600 }, { "epoch": 0.46819710483988886, "grad_norm": 1.2468379881228138, "learning_rate": 1.8206743372105098e-05, "loss": 0.6719359159469604, "step": 1601 }, { "epoch": 0.46848954525515424, "grad_norm": 1.4660205035921348, "learning_rate": 1.8203979879764153e-05, "loss": 0.7437123656272888, "step": 1602 }, { "epoch": 0.4687819856704197, "grad_norm": 1.3639008290802046, "learning_rate": 1.8201214469792793e-05, "loss": 0.7273217439651489, "step": 1603 }, { "epoch": 0.46907442608568506, "grad_norm": 1.332814377531963, "learning_rate": 1.8198447142837416e-05, "loss": 0.6467087268829346, "step": 1604 }, { "epoch": 0.46936686650095044, "grad_norm": 1.1167815102053054, "learning_rate": 1.8195677899544866e-05, "loss": 0.5764428973197937, "step": 1605 }, { "epoch": 0.4696593069162158, "grad_norm": 1.4761144768835275, "learning_rate": 1.8192906740562437e-05, "loss": 0.5969977378845215, "step": 1606 }, { "epoch": 0.4699517473314812, "grad_norm": 1.3424638711815577, "learning_rate": 1.819013366653787e-05, "loss": 0.7237746119499207, "step": 1607 }, { "epoch": 0.4702441877467466, "grad_norm": 1.4494789457227795, "learning_rate": 1.8187358678119355e-05, "loss": 0.6289568543434143, "step": 1608 }, { "epoch": 0.47053662816201197, "grad_norm": 1.1494676131886132, "learning_rate": 1.8184581775955533e-05, "loss": 0.5773013234138489, "step": 1609 }, { "epoch": 0.47082906857727735, "grad_norm": 1.3055308518970814, "learning_rate": 1.818180296069548e-05, "loss": 0.5940284729003906, "step": 1610 }, { "epoch": 0.4711215089925428, "grad_norm": 1.506634303312927, "learning_rate": 1.8179022232988735e-05, "loss": 0.7051881551742554, "step": 1611 }, { "epoch": 0.4714139494078082, "grad_norm": 1.2817274142705404, "learning_rate": 1.8176239593485267e-05, "loss": 0.6427813768386841, "step": 1612 }, { "epoch": 0.47170638982307356, "grad_norm": 1.3150009445137423, "learning_rate": 1.817345504283551e-05, "loss": 0.7041782736778259, "step": 1613 }, { "epoch": 0.47199883023833894, "grad_norm": 1.1960422316530261, "learning_rate": 1.817066858169033e-05, "loss": 0.6568688154220581, "step": 1614 }, { "epoch": 0.4722912706536043, "grad_norm": 1.1082706297141673, "learning_rate": 1.816788021070105e-05, "loss": 0.4784452021121979, "step": 1615 }, { "epoch": 0.4725837110688697, "grad_norm": 1.403652579196444, "learning_rate": 1.816508993051943e-05, "loss": 0.6012705564498901, "step": 1616 }, { "epoch": 0.4728761514841351, "grad_norm": 1.441258763214559, "learning_rate": 1.8162297741797685e-05, "loss": 0.6414428949356079, "step": 1617 }, { "epoch": 0.47316859189940047, "grad_norm": 1.4131643644174843, "learning_rate": 1.815950364518847e-05, "loss": 0.6446187496185303, "step": 1618 }, { "epoch": 0.4734610323146659, "grad_norm": 1.2552495046018781, "learning_rate": 1.8156707641344885e-05, "loss": 0.5153034329414368, "step": 1619 }, { "epoch": 0.4737534727299313, "grad_norm": 1.5159052607593526, "learning_rate": 1.8153909730920485e-05, "loss": 0.7209463715553284, "step": 1620 }, { "epoch": 0.4740459131451967, "grad_norm": 1.2933785450044248, "learning_rate": 1.8151109914569267e-05, "loss": 0.5990744829177856, "step": 1621 }, { "epoch": 0.47433835356046206, "grad_norm": 1.3033668993107679, "learning_rate": 1.814830819294566e-05, "loss": 0.5706672668457031, "step": 1622 }, { "epoch": 0.47463079397572744, "grad_norm": 1.1946317041445573, "learning_rate": 1.814550456670456e-05, "loss": 0.538548469543457, "step": 1623 }, { "epoch": 0.4749232343909928, "grad_norm": 1.3282078081285205, "learning_rate": 1.8142699036501288e-05, "loss": 0.6450623273849487, "step": 1624 }, { "epoch": 0.4752156748062582, "grad_norm": 1.336508209824809, "learning_rate": 1.813989160299163e-05, "loss": 0.6537624597549438, "step": 1625 }, { "epoch": 0.47550811522152364, "grad_norm": 1.2777879020397362, "learning_rate": 1.8137082266831794e-05, "loss": 0.7126362323760986, "step": 1626 }, { "epoch": 0.475800555636789, "grad_norm": 1.4542616967071014, "learning_rate": 1.813427102867846e-05, "loss": 0.6686921119689941, "step": 1627 }, { "epoch": 0.4760929960520544, "grad_norm": 1.4231643377055359, "learning_rate": 1.8131457889188723e-05, "loss": 0.5925619602203369, "step": 1628 }, { "epoch": 0.4763854364673198, "grad_norm": 1.2702390975554385, "learning_rate": 1.8128642849020147e-05, "loss": 0.7251017689704895, "step": 1629 }, { "epoch": 0.47667787688258517, "grad_norm": 1.5675645867645378, "learning_rate": 1.8125825908830733e-05, "loss": 0.7524283528327942, "step": 1630 }, { "epoch": 0.47697031729785055, "grad_norm": 1.2843975237623166, "learning_rate": 1.8123007069278914e-05, "loss": 0.7593197226524353, "step": 1631 }, { "epoch": 0.47726275771311594, "grad_norm": 1.2304771008785658, "learning_rate": 1.812018633102358e-05, "loss": 0.43353578448295593, "step": 1632 }, { "epoch": 0.4775551981283813, "grad_norm": 1.1488804965894268, "learning_rate": 1.8117363694724063e-05, "loss": 0.6254708766937256, "step": 1633 }, { "epoch": 0.47784763854364676, "grad_norm": 1.2467231401784862, "learning_rate": 1.811453916104014e-05, "loss": 0.5970091223716736, "step": 1634 }, { "epoch": 0.47814007895891214, "grad_norm": 1.2798152763028137, "learning_rate": 1.8111712730632024e-05, "loss": 0.6299331188201904, "step": 1635 }, { "epoch": 0.4784325193741775, "grad_norm": 1.4325282365212126, "learning_rate": 1.810888440416038e-05, "loss": 0.7461789846420288, "step": 1636 }, { "epoch": 0.4787249597894429, "grad_norm": 1.2539146793136515, "learning_rate": 1.8106054182286305e-05, "loss": 0.5053290724754333, "step": 1637 }, { "epoch": 0.4790174002047083, "grad_norm": 1.2809048918941985, "learning_rate": 1.810322206567135e-05, "loss": 0.6853327751159668, "step": 1638 }, { "epoch": 0.47930984061997367, "grad_norm": 1.4027979186429358, "learning_rate": 1.8100388054977508e-05, "loss": 0.5337134599685669, "step": 1639 }, { "epoch": 0.47960228103523905, "grad_norm": 1.357622845311743, "learning_rate": 1.809755215086721e-05, "loss": 0.7082560062408447, "step": 1640 }, { "epoch": 0.47989472145050444, "grad_norm": 1.3590974916852807, "learning_rate": 1.8094714354003325e-05, "loss": 0.680424153804779, "step": 1641 }, { "epoch": 0.4801871618657699, "grad_norm": 1.1398057291819046, "learning_rate": 1.8091874665049183e-05, "loss": 0.5235139727592468, "step": 1642 }, { "epoch": 0.48047960228103526, "grad_norm": 1.3822416905178454, "learning_rate": 1.8089033084668535e-05, "loss": 0.7843992114067078, "step": 1643 }, { "epoch": 0.48077204269630064, "grad_norm": 1.4941957252025324, "learning_rate": 1.8086189613525587e-05, "loss": 0.6736497282981873, "step": 1644 }, { "epoch": 0.481064483111566, "grad_norm": 1.3326594399820286, "learning_rate": 1.808334425228498e-05, "loss": 0.6898948550224304, "step": 1645 }, { "epoch": 0.4813569235268314, "grad_norm": 1.3419429940100798, "learning_rate": 1.80804970016118e-05, "loss": 0.6719726324081421, "step": 1646 }, { "epoch": 0.4816493639420968, "grad_norm": 1.316270232362313, "learning_rate": 1.807764786217158e-05, "loss": 0.6904356479644775, "step": 1647 }, { "epoch": 0.48194180435736217, "grad_norm": 1.3009257254922486, "learning_rate": 1.8074796834630285e-05, "loss": 0.5956645011901855, "step": 1648 }, { "epoch": 0.48223424477262755, "grad_norm": 1.162557710559535, "learning_rate": 1.8071943919654323e-05, "loss": 0.5676499009132385, "step": 1649 }, { "epoch": 0.482526685187893, "grad_norm": 1.3145895725362904, "learning_rate": 1.8069089117910547e-05, "loss": 0.6006937026977539, "step": 1650 }, { "epoch": 0.48281912560315837, "grad_norm": 1.3694341047830378, "learning_rate": 1.806623243006625e-05, "loss": 0.6241977214813232, "step": 1651 }, { "epoch": 0.48311156601842375, "grad_norm": 1.4152304986784254, "learning_rate": 1.806337385678917e-05, "loss": 0.7359870672225952, "step": 1652 }, { "epoch": 0.48340400643368914, "grad_norm": 1.155725074088707, "learning_rate": 1.806051339874748e-05, "loss": 0.6113119125366211, "step": 1653 }, { "epoch": 0.4836964468489545, "grad_norm": 1.3288798785197886, "learning_rate": 1.8057651056609784e-05, "loss": 0.642951488494873, "step": 1654 }, { "epoch": 0.4839888872642199, "grad_norm": 1.3081605749498326, "learning_rate": 1.8054786831045147e-05, "loss": 0.7020113468170166, "step": 1655 }, { "epoch": 0.4842813276794853, "grad_norm": 1.355302216036822, "learning_rate": 1.8051920722723063e-05, "loss": 0.678231418132782, "step": 1656 }, { "epoch": 0.48457376809475067, "grad_norm": 1.2407750790627203, "learning_rate": 1.8049052732313466e-05, "loss": 0.604765772819519, "step": 1657 }, { "epoch": 0.4848662085100161, "grad_norm": 1.501775861517808, "learning_rate": 1.8046182860486735e-05, "loss": 0.6812270879745483, "step": 1658 }, { "epoch": 0.4851586489252815, "grad_norm": 1.329019452940817, "learning_rate": 1.8043311107913675e-05, "loss": 0.6284930109977722, "step": 1659 }, { "epoch": 0.48545108934054687, "grad_norm": 1.4460160298748268, "learning_rate": 1.8040437475265554e-05, "loss": 0.665177583694458, "step": 1660 }, { "epoch": 0.48574352975581225, "grad_norm": 1.365611165893268, "learning_rate": 1.8037561963214058e-05, "loss": 0.7628738284111023, "step": 1661 }, { "epoch": 0.48603597017107764, "grad_norm": 1.4917601408905583, "learning_rate": 1.8034684572431322e-05, "loss": 0.6372654438018799, "step": 1662 }, { "epoch": 0.486328410586343, "grad_norm": 1.2986927468884095, "learning_rate": 1.803180530358992e-05, "loss": 0.5915756225585938, "step": 1663 }, { "epoch": 0.4866208510016084, "grad_norm": 1.3509164579114188, "learning_rate": 1.802892415736286e-05, "loss": 0.6821908950805664, "step": 1664 }, { "epoch": 0.48691329141687384, "grad_norm": 1.3857679722145793, "learning_rate": 1.80260411344236e-05, "loss": 0.6418279409408569, "step": 1665 }, { "epoch": 0.4872057318321392, "grad_norm": 1.154306591574384, "learning_rate": 1.802315623544602e-05, "loss": 0.5582526922225952, "step": 1666 }, { "epoch": 0.4874981722474046, "grad_norm": 1.3431793608397968, "learning_rate": 1.8020269461104448e-05, "loss": 0.7145007848739624, "step": 1667 }, { "epoch": 0.48779061266267, "grad_norm": 1.2110741699326812, "learning_rate": 1.8017380812073658e-05, "loss": 0.5415871739387512, "step": 1668 }, { "epoch": 0.48808305307793537, "grad_norm": 1.488356994545647, "learning_rate": 1.801449028902885e-05, "loss": 0.728327751159668, "step": 1669 }, { "epoch": 0.48837549349320075, "grad_norm": 1.3273378299589804, "learning_rate": 1.8011597892645665e-05, "loss": 0.6469160914421082, "step": 1670 }, { "epoch": 0.48866793390846613, "grad_norm": 1.3096259850876997, "learning_rate": 1.8008703623600185e-05, "loss": 0.7107353210449219, "step": 1671 }, { "epoch": 0.4889603743237315, "grad_norm": 1.4201847213896843, "learning_rate": 1.8005807482568926e-05, "loss": 0.6918982267379761, "step": 1672 }, { "epoch": 0.48925281473899696, "grad_norm": 1.4096024584844806, "learning_rate": 1.800290947022884e-05, "loss": 0.661738932132721, "step": 1673 }, { "epoch": 0.48954525515426234, "grad_norm": 1.4938181766281158, "learning_rate": 1.800000958725733e-05, "loss": 0.6816283464431763, "step": 1674 }, { "epoch": 0.4898376955695277, "grad_norm": 1.348689926804817, "learning_rate": 1.7997107834332217e-05, "loss": 0.6988941431045532, "step": 1675 }, { "epoch": 0.4901301359847931, "grad_norm": 1.5696470599370025, "learning_rate": 1.799420421213177e-05, "loss": 0.7997519969940186, "step": 1676 }, { "epoch": 0.4904225764000585, "grad_norm": 1.3512394042939826, "learning_rate": 1.7991298721334697e-05, "loss": 0.6552794575691223, "step": 1677 }, { "epoch": 0.49071501681532387, "grad_norm": 1.2446219807906005, "learning_rate": 1.7988391362620135e-05, "loss": 0.6144021153450012, "step": 1678 }, { "epoch": 0.49100745723058925, "grad_norm": 1.2086851376188177, "learning_rate": 1.798548213666766e-05, "loss": 0.5036276578903198, "step": 1679 }, { "epoch": 0.49129989764585463, "grad_norm": 1.1620444251602322, "learning_rate": 1.7982571044157288e-05, "loss": 0.5152162313461304, "step": 1680 }, { "epoch": 0.49159233806112007, "grad_norm": 1.4266855366652862, "learning_rate": 1.797965808576947e-05, "loss": 0.7249797582626343, "step": 1681 }, { "epoch": 0.49188477847638545, "grad_norm": 1.138885414798186, "learning_rate": 1.7976743262185094e-05, "loss": 0.5769079923629761, "step": 1682 }, { "epoch": 0.49217721889165084, "grad_norm": 1.2523240509929359, "learning_rate": 1.797382657408548e-05, "loss": 0.7017331123352051, "step": 1683 }, { "epoch": 0.4924696593069162, "grad_norm": 1.3095438640742119, "learning_rate": 1.797090802215238e-05, "loss": 0.788599967956543, "step": 1684 }, { "epoch": 0.4927620997221816, "grad_norm": 1.3652642181905799, "learning_rate": 1.7967987607067997e-05, "loss": 0.5716612935066223, "step": 1685 }, { "epoch": 0.493054540137447, "grad_norm": 1.396592202891807, "learning_rate": 1.796506532951496e-05, "loss": 0.6808345913887024, "step": 1686 }, { "epoch": 0.49334698055271237, "grad_norm": 1.421363062787346, "learning_rate": 1.7962141190176326e-05, "loss": 0.6540817022323608, "step": 1687 }, { "epoch": 0.49363942096797775, "grad_norm": 1.3162774070898267, "learning_rate": 1.7959215189735604e-05, "loss": 0.6522870063781738, "step": 1688 }, { "epoch": 0.4939318613832432, "grad_norm": 1.2120992084575881, "learning_rate": 1.7956287328876724e-05, "loss": 0.5217882990837097, "step": 1689 }, { "epoch": 0.49422430179850857, "grad_norm": 1.1456971313507769, "learning_rate": 1.795335760828405e-05, "loss": 0.6985372304916382, "step": 1690 }, { "epoch": 0.49451674221377395, "grad_norm": 1.6308222645679713, "learning_rate": 1.7950426028642397e-05, "loss": 0.7199063301086426, "step": 1691 }, { "epoch": 0.49480918262903933, "grad_norm": 1.2503132677681021, "learning_rate": 1.7947492590636998e-05, "loss": 0.5810575485229492, "step": 1692 }, { "epoch": 0.4951016230443047, "grad_norm": 1.5393913616038981, "learning_rate": 1.7944557294953528e-05, "loss": 0.7443726658821106, "step": 1693 }, { "epoch": 0.4953940634595701, "grad_norm": 1.4257690332105803, "learning_rate": 1.7941620142278092e-05, "loss": 0.6774560213088989, "step": 1694 }, { "epoch": 0.4956865038748355, "grad_norm": 1.4876883296800856, "learning_rate": 1.793868113329724e-05, "loss": 0.6983137726783752, "step": 1695 }, { "epoch": 0.49597894429010086, "grad_norm": 1.500775887710686, "learning_rate": 1.793574026869793e-05, "loss": 0.6481274366378784, "step": 1696 }, { "epoch": 0.4962713847053663, "grad_norm": 1.5261372345633493, "learning_rate": 1.793279754916759e-05, "loss": 0.6489002704620361, "step": 1697 }, { "epoch": 0.4965638251206317, "grad_norm": 1.200851338265551, "learning_rate": 1.7929852975394056e-05, "loss": 0.7054505348205566, "step": 1698 }, { "epoch": 0.49685626553589707, "grad_norm": 1.1948769153228862, "learning_rate": 1.79269065480656e-05, "loss": 0.5257681608200073, "step": 1699 }, { "epoch": 0.49714870595116245, "grad_norm": 1.2760885846913066, "learning_rate": 1.7923958267870936e-05, "loss": 0.8625251054763794, "step": 1700 }, { "epoch": 0.49744114636642783, "grad_norm": 1.223950331700182, "learning_rate": 1.7921008135499205e-05, "loss": 0.6736147999763489, "step": 1701 }, { "epoch": 0.4977335867816932, "grad_norm": 1.351351583663473, "learning_rate": 1.7918056151639985e-05, "loss": 0.5079643130302429, "step": 1702 }, { "epoch": 0.4980260271969586, "grad_norm": 1.2324398794203584, "learning_rate": 1.791510231698328e-05, "loss": 0.597242534160614, "step": 1703 }, { "epoch": 0.49831846761222404, "grad_norm": 1.3776511171825507, "learning_rate": 1.791214663221953e-05, "loss": 0.6695376038551331, "step": 1704 }, { "epoch": 0.4986109080274894, "grad_norm": 1.2400454845090276, "learning_rate": 1.7909189098039616e-05, "loss": 0.6411684155464172, "step": 1705 }, { "epoch": 0.4989033484427548, "grad_norm": 1.3917271277458743, "learning_rate": 1.790622971513484e-05, "loss": 0.6671754121780396, "step": 1706 }, { "epoch": 0.4991957888580202, "grad_norm": 1.1384272276613905, "learning_rate": 1.7903268484196936e-05, "loss": 0.5312573909759521, "step": 1707 }, { "epoch": 0.49948822927328557, "grad_norm": 1.3626241120949947, "learning_rate": 1.7900305405918076e-05, "loss": 0.643236517906189, "step": 1708 }, { "epoch": 0.49978066968855095, "grad_norm": 1.4093385837144417, "learning_rate": 1.7897340480990863e-05, "loss": 0.7942951321601868, "step": 1709 }, { "epoch": 0.5000731101038164, "grad_norm": 1.3198251548980515, "learning_rate": 1.789437371010833e-05, "loss": 0.701362133026123, "step": 1710 }, { "epoch": 0.5003655505190817, "grad_norm": 1.3304955567316399, "learning_rate": 1.789140509396394e-05, "loss": 0.6993157863616943, "step": 1711 }, { "epoch": 0.5006579909343472, "grad_norm": 1.0719148279657758, "learning_rate": 1.788843463325159e-05, "loss": 0.568405270576477, "step": 1712 }, { "epoch": 0.5009504313496125, "grad_norm": 0.976150644308567, "learning_rate": 1.7885462328665605e-05, "loss": 0.4948374032974243, "step": 1713 }, { "epoch": 0.5012428717648779, "grad_norm": 1.4692514127239873, "learning_rate": 1.7882488180900743e-05, "loss": 0.6679480671882629, "step": 1714 }, { "epoch": 0.5015353121801432, "grad_norm": 1.5018221461401142, "learning_rate": 1.78795121906522e-05, "loss": 0.706131100654602, "step": 1715 }, { "epoch": 0.5018277525954087, "grad_norm": 1.207740414795638, "learning_rate": 1.787653435861559e-05, "loss": 0.6691830158233643, "step": 1716 }, { "epoch": 0.5021201930106741, "grad_norm": 1.163150990025552, "learning_rate": 1.787355468548696e-05, "loss": 0.5624213218688965, "step": 1717 }, { "epoch": 0.5024126334259394, "grad_norm": 1.3394004970303723, "learning_rate": 1.78705731719628e-05, "loss": 0.4589618444442749, "step": 1718 }, { "epoch": 0.5027050738412049, "grad_norm": 1.384883869852314, "learning_rate": 1.7867589818740012e-05, "loss": 0.571403980255127, "step": 1719 }, { "epoch": 0.5029975142564702, "grad_norm": 1.0668853872947273, "learning_rate": 1.786460462651594e-05, "loss": 0.5395561456680298, "step": 1720 }, { "epoch": 0.5032899546717357, "grad_norm": 1.243223907233259, "learning_rate": 1.7861617595988355e-05, "loss": 0.6166945695877075, "step": 1721 }, { "epoch": 0.503582395087001, "grad_norm": 1.4857752879775032, "learning_rate": 1.7858628727855458e-05, "loss": 0.6812523603439331, "step": 1722 }, { "epoch": 0.5038748355022664, "grad_norm": 1.2390654420633957, "learning_rate": 1.7855638022815872e-05, "loss": 0.6602752208709717, "step": 1723 }, { "epoch": 0.5041672759175319, "grad_norm": 1.0873682718880517, "learning_rate": 1.7852645481568665e-05, "loss": 0.49925822019577026, "step": 1724 }, { "epoch": 0.5044597163327972, "grad_norm": 1.3265310908908576, "learning_rate": 1.784965110481332e-05, "loss": 0.5557682514190674, "step": 1725 }, { "epoch": 0.5047521567480626, "grad_norm": 1.2775644185514514, "learning_rate": 1.7846654893249756e-05, "loss": 0.6576372981071472, "step": 1726 }, { "epoch": 0.505044597163328, "grad_norm": 2.047704943438843, "learning_rate": 1.7843656847578317e-05, "loss": 0.5266367197036743, "step": 1727 }, { "epoch": 0.5053370375785934, "grad_norm": 1.6086224094226402, "learning_rate": 1.7840656968499782e-05, "loss": 0.7368261218070984, "step": 1728 }, { "epoch": 0.5056294779938587, "grad_norm": 1.2755318597370908, "learning_rate": 1.7837655256715355e-05, "loss": 0.6583619117736816, "step": 1729 }, { "epoch": 0.5059219184091241, "grad_norm": 1.4196511617190575, "learning_rate": 1.7834651712926662e-05, "loss": 0.7323073148727417, "step": 1730 }, { "epoch": 0.5062143588243895, "grad_norm": 1.540686270234863, "learning_rate": 1.783164633783577e-05, "loss": 0.6059812307357788, "step": 1731 }, { "epoch": 0.5065067992396549, "grad_norm": 1.451028079648097, "learning_rate": 1.782863913214516e-05, "loss": 0.5992608070373535, "step": 1732 }, { "epoch": 0.5067992396549204, "grad_norm": 1.3452146161553644, "learning_rate": 1.7825630096557754e-05, "loss": 0.5729147791862488, "step": 1733 }, { "epoch": 0.5070916800701857, "grad_norm": 1.4383912240083958, "learning_rate": 1.782261923177689e-05, "loss": 0.6708269119262695, "step": 1734 }, { "epoch": 0.5073841204854511, "grad_norm": 1.0922943221428454, "learning_rate": 1.7819606538506347e-05, "loss": 0.5377235412597656, "step": 1735 }, { "epoch": 0.5076765609007164, "grad_norm": 1.3060450837457043, "learning_rate": 1.781659201745032e-05, "loss": 0.6899171471595764, "step": 1736 }, { "epoch": 0.5079690013159819, "grad_norm": 1.2574262616785272, "learning_rate": 1.7813575669313434e-05, "loss": 0.6712576150894165, "step": 1737 }, { "epoch": 0.5082614417312472, "grad_norm": 1.3797290531865334, "learning_rate": 1.781055749480074e-05, "loss": 0.6989667415618896, "step": 1738 }, { "epoch": 0.5085538821465126, "grad_norm": 1.4976341004458755, "learning_rate": 1.7807537494617723e-05, "loss": 0.6103490591049194, "step": 1739 }, { "epoch": 0.5088463225617781, "grad_norm": 1.2059878229475702, "learning_rate": 1.7804515669470287e-05, "loss": 0.4882289171218872, "step": 1740 }, { "epoch": 0.5091387629770434, "grad_norm": 1.3963253268337052, "learning_rate": 1.7801492020064764e-05, "loss": 0.7244713306427002, "step": 1741 }, { "epoch": 0.5094312033923089, "grad_norm": 1.2588544303384788, "learning_rate": 1.7798466547107918e-05, "loss": 0.6055952310562134, "step": 1742 }, { "epoch": 0.5097236438075742, "grad_norm": 1.3449125705801426, "learning_rate": 1.779543925130693e-05, "loss": 0.5893995761871338, "step": 1743 }, { "epoch": 0.5100160842228396, "grad_norm": 1.4169541262971606, "learning_rate": 1.7792410133369413e-05, "loss": 0.6154330968856812, "step": 1744 }, { "epoch": 0.5103085246381049, "grad_norm": 1.294650393818464, "learning_rate": 1.778937919400341e-05, "loss": 0.6227806806564331, "step": 1745 }, { "epoch": 0.5106009650533704, "grad_norm": 1.563882907776874, "learning_rate": 1.7786346433917376e-05, "loss": 0.6192313432693481, "step": 1746 }, { "epoch": 0.5108934054686358, "grad_norm": 1.324638073205218, "learning_rate": 1.7783311853820205e-05, "loss": 0.6175359487533569, "step": 1747 }, { "epoch": 0.5111858458839011, "grad_norm": 1.17912928754983, "learning_rate": 1.7780275454421218e-05, "loss": 0.5588991641998291, "step": 1748 }, { "epoch": 0.5114782862991666, "grad_norm": 1.0201894222615457, "learning_rate": 1.777723723643014e-05, "loss": 0.637115478515625, "step": 1749 }, { "epoch": 0.5117707267144319, "grad_norm": 1.5101308062255179, "learning_rate": 1.777419720055715e-05, "loss": 0.6762860417366028, "step": 1750 }, { "epoch": 0.5120631671296973, "grad_norm": 1.5211239881114056, "learning_rate": 1.7771155347512828e-05, "loss": 0.6980293989181519, "step": 1751 }, { "epoch": 0.5123556075449627, "grad_norm": 1.3145597239587745, "learning_rate": 1.7768111678008194e-05, "loss": 0.6587250232696533, "step": 1752 }, { "epoch": 0.5126480479602281, "grad_norm": 1.4750219793579704, "learning_rate": 1.776506619275469e-05, "loss": 0.6571120619773865, "step": 1753 }, { "epoch": 0.5129404883754934, "grad_norm": 1.705487520120489, "learning_rate": 1.7762018892464172e-05, "loss": 0.8127633333206177, "step": 1754 }, { "epoch": 0.5132329287907589, "grad_norm": 1.4136977790679228, "learning_rate": 1.7758969777848935e-05, "loss": 0.6585550308227539, "step": 1755 }, { "epoch": 0.5135253692060243, "grad_norm": 1.5019600327645424, "learning_rate": 1.7755918849621686e-05, "loss": 0.6347511410713196, "step": 1756 }, { "epoch": 0.5138178096212896, "grad_norm": 1.4489353235186164, "learning_rate": 1.775286610849556e-05, "loss": 0.5918457508087158, "step": 1757 }, { "epoch": 0.5141102500365551, "grad_norm": 1.2541802522573693, "learning_rate": 1.774981155518412e-05, "loss": 0.7042769193649292, "step": 1758 }, { "epoch": 0.5144026904518204, "grad_norm": 1.4327318826910254, "learning_rate": 1.7746755190401353e-05, "loss": 0.8014250993728638, "step": 1759 }, { "epoch": 0.5146951308670858, "grad_norm": 1.339232110324459, "learning_rate": 1.774369701486166e-05, "loss": 0.6703939437866211, "step": 1760 }, { "epoch": 0.5149875712823512, "grad_norm": 1.1710558248660605, "learning_rate": 1.774063702927987e-05, "loss": 0.6189682483673096, "step": 1761 }, { "epoch": 0.5152800116976166, "grad_norm": 1.4110546220906648, "learning_rate": 1.7737575234371238e-05, "loss": 0.5386991500854492, "step": 1762 }, { "epoch": 0.515572452112882, "grad_norm": 1.4204019461155708, "learning_rate": 1.773451163085144e-05, "loss": 0.6389357447624207, "step": 1763 }, { "epoch": 0.5158648925281474, "grad_norm": 1.1798787279597898, "learning_rate": 1.7731446219436577e-05, "loss": 0.7247746586799622, "step": 1764 }, { "epoch": 0.5161573329434128, "grad_norm": 1.2114702713778023, "learning_rate": 1.7728379000843164e-05, "loss": 0.5538983941078186, "step": 1765 }, { "epoch": 0.5164497733586781, "grad_norm": 1.155329008927324, "learning_rate": 1.7725309975788155e-05, "loss": 0.6003320813179016, "step": 1766 }, { "epoch": 0.5167422137739436, "grad_norm": 1.4065479816352848, "learning_rate": 1.7722239144988908e-05, "loss": 0.603177011013031, "step": 1767 }, { "epoch": 0.5170346541892089, "grad_norm": 1.1699743536266287, "learning_rate": 1.771916650916321e-05, "loss": 0.6071338653564453, "step": 1768 }, { "epoch": 0.5173270946044743, "grad_norm": 1.4268603398797357, "learning_rate": 1.7716092069029275e-05, "loss": 0.6148535013198853, "step": 1769 }, { "epoch": 0.5176195350197397, "grad_norm": 1.3460628970570976, "learning_rate": 1.7713015825305735e-05, "loss": 0.6236969828605652, "step": 1770 }, { "epoch": 0.5179119754350051, "grad_norm": 1.4613715991480511, "learning_rate": 1.770993777871164e-05, "loss": 0.5439775586128235, "step": 1771 }, { "epoch": 0.5182044158502705, "grad_norm": 1.3246469866549868, "learning_rate": 1.770685792996647e-05, "loss": 0.6498249769210815, "step": 1772 }, { "epoch": 0.5184968562655359, "grad_norm": 1.307598965769502, "learning_rate": 1.7703776279790113e-05, "loss": 0.5838749408721924, "step": 1773 }, { "epoch": 0.5187892966808013, "grad_norm": 1.44861400348765, "learning_rate": 1.770069282890289e-05, "loss": 0.6467812657356262, "step": 1774 }, { "epoch": 0.5190817370960666, "grad_norm": 1.3332181124442455, "learning_rate": 1.7697607578025543e-05, "loss": 0.5878627896308899, "step": 1775 }, { "epoch": 0.5193741775113321, "grad_norm": 1.2905348700615993, "learning_rate": 1.7694520527879223e-05, "loss": 0.6252161264419556, "step": 1776 }, { "epoch": 0.5196666179265974, "grad_norm": 1.2071686484495499, "learning_rate": 1.7691431679185518e-05, "loss": 0.6098401546478271, "step": 1777 }, { "epoch": 0.5199590583418628, "grad_norm": 1.4529959736387221, "learning_rate": 1.7688341032666415e-05, "loss": 0.7401748299598694, "step": 1778 }, { "epoch": 0.5202514987571283, "grad_norm": 1.278188059333223, "learning_rate": 1.768524858904435e-05, "loss": 0.5398571491241455, "step": 1779 }, { "epoch": 0.5205439391723936, "grad_norm": 1.211971903081478, "learning_rate": 1.768215434904215e-05, "loss": 0.5565935969352722, "step": 1780 }, { "epoch": 0.520836379587659, "grad_norm": 1.3982258941889667, "learning_rate": 1.7679058313383078e-05, "loss": 0.5510461926460266, "step": 1781 }, { "epoch": 0.5211288200029244, "grad_norm": 1.5839871959956162, "learning_rate": 1.7675960482790818e-05, "loss": 0.670242428779602, "step": 1782 }, { "epoch": 0.5214212604181898, "grad_norm": 1.309838763427276, "learning_rate": 1.7672860857989463e-05, "loss": 0.6556246280670166, "step": 1783 }, { "epoch": 0.5217137008334551, "grad_norm": 1.3555406156984307, "learning_rate": 1.7669759439703537e-05, "loss": 0.7133421897888184, "step": 1784 }, { "epoch": 0.5220061412487206, "grad_norm": 1.340410804208978, "learning_rate": 1.766665622865797e-05, "loss": 0.5520647168159485, "step": 1785 }, { "epoch": 0.522298581663986, "grad_norm": 1.2754706768801123, "learning_rate": 1.766355122557813e-05, "loss": 0.6906430125236511, "step": 1786 }, { "epoch": 0.5225910220792513, "grad_norm": 1.331418831759662, "learning_rate": 1.766044443118978e-05, "loss": 0.6847748756408691, "step": 1787 }, { "epoch": 0.5228834624945168, "grad_norm": 1.6656678493050783, "learning_rate": 1.7657335846219125e-05, "loss": 0.6690354347229004, "step": 1788 }, { "epoch": 0.5231759029097821, "grad_norm": 1.5097667681145126, "learning_rate": 1.765422547139277e-05, "loss": 0.6508032083511353, "step": 1789 }, { "epoch": 0.5234683433250475, "grad_norm": 1.3545274700404182, "learning_rate": 1.7651113307437754e-05, "loss": 0.7686585187911987, "step": 1790 }, { "epoch": 0.5237607837403129, "grad_norm": 1.5694388106807053, "learning_rate": 1.764799935508152e-05, "loss": 0.7669490575790405, "step": 1791 }, { "epoch": 0.5240532241555783, "grad_norm": 1.3694245126086426, "learning_rate": 1.7644883615051936e-05, "loss": 0.6630266308784485, "step": 1792 }, { "epoch": 0.5243456645708436, "grad_norm": 1.350854180871217, "learning_rate": 1.764176608807729e-05, "loss": 0.6054951548576355, "step": 1793 }, { "epoch": 0.5246381049861091, "grad_norm": 1.3573271710882402, "learning_rate": 1.7638646774886282e-05, "loss": 0.6519330739974976, "step": 1794 }, { "epoch": 0.5249305454013745, "grad_norm": 1.3013890836364408, "learning_rate": 1.7635525676208034e-05, "loss": 0.6797915101051331, "step": 1795 }, { "epoch": 0.5252229858166398, "grad_norm": 1.4138018427804997, "learning_rate": 1.7632402792772084e-05, "loss": 0.7296736240386963, "step": 1796 }, { "epoch": 0.5255154262319053, "grad_norm": 1.4894816204298726, "learning_rate": 1.7629278125308388e-05, "loss": 0.6371006965637207, "step": 1797 }, { "epoch": 0.5258078666471706, "grad_norm": 1.1913157227609021, "learning_rate": 1.762615167454732e-05, "loss": 0.5315746068954468, "step": 1798 }, { "epoch": 0.526100307062436, "grad_norm": 1.115665172593258, "learning_rate": 1.762302344121966e-05, "loss": 0.5285685062408447, "step": 1799 }, { "epoch": 0.5263927474777014, "grad_norm": 1.269936179033053, "learning_rate": 1.7619893426056622e-05, "loss": 0.623146653175354, "step": 1800 }, { "epoch": 0.5266851878929668, "grad_norm": 1.3314922698636598, "learning_rate": 1.7616761629789824e-05, "loss": 0.5433363318443298, "step": 1801 }, { "epoch": 0.5269776283082322, "grad_norm": 1.422200045831386, "learning_rate": 1.7613628053151307e-05, "loss": 0.5035480260848999, "step": 1802 }, { "epoch": 0.5272700687234976, "grad_norm": 1.3947936859584276, "learning_rate": 1.7610492696873523e-05, "loss": 0.678544819355011, "step": 1803 }, { "epoch": 0.527562509138763, "grad_norm": 1.2973841494755158, "learning_rate": 1.7607355561689347e-05, "loss": 0.6237714290618896, "step": 1804 }, { "epoch": 0.5278549495540283, "grad_norm": 1.8411758190439966, "learning_rate": 1.760421664833206e-05, "loss": 0.6943943500518799, "step": 1805 }, { "epoch": 0.5281473899692938, "grad_norm": 1.1545458109151105, "learning_rate": 1.7601075957535366e-05, "loss": 0.5477268695831299, "step": 1806 }, { "epoch": 0.5284398303845591, "grad_norm": 1.5589440207416567, "learning_rate": 1.759793349003338e-05, "loss": 0.6627641320228577, "step": 1807 }, { "epoch": 0.5287322707998245, "grad_norm": 1.169894530317387, "learning_rate": 1.7594789246560638e-05, "loss": 0.5394496917724609, "step": 1808 }, { "epoch": 0.5290247112150899, "grad_norm": 1.5989109343746286, "learning_rate": 1.759164322785209e-05, "loss": 0.7824013233184814, "step": 1809 }, { "epoch": 0.5293171516303553, "grad_norm": 1.5859531867022811, "learning_rate": 1.7588495434643094e-05, "loss": 0.6959671974182129, "step": 1810 }, { "epoch": 0.5296095920456207, "grad_norm": 1.256097179377318, "learning_rate": 1.7585345867669427e-05, "loss": 0.7036902904510498, "step": 1811 }, { "epoch": 0.5299020324608861, "grad_norm": 1.2520265115718123, "learning_rate": 1.7582194527667285e-05, "loss": 0.6700775623321533, "step": 1812 }, { "epoch": 0.5301944728761515, "grad_norm": 1.4077714911889505, "learning_rate": 1.7579041415373273e-05, "loss": 0.648280918598175, "step": 1813 }, { "epoch": 0.5304869132914168, "grad_norm": 1.3424741441047479, "learning_rate": 1.757588653152441e-05, "loss": 0.688485324382782, "step": 1814 }, { "epoch": 0.5307793537066823, "grad_norm": 1.4718330240816029, "learning_rate": 1.757272987685813e-05, "loss": 0.6743370890617371, "step": 1815 }, { "epoch": 0.5310717941219476, "grad_norm": 1.2524252340987996, "learning_rate": 1.7569571452112288e-05, "loss": 0.5597015619277954, "step": 1816 }, { "epoch": 0.531364234537213, "grad_norm": 1.0387462800714626, "learning_rate": 1.756641125802514e-05, "loss": 0.48607051372528076, "step": 1817 }, { "epoch": 0.5316566749524785, "grad_norm": 1.3375496888713005, "learning_rate": 1.7563249295335366e-05, "loss": 0.6712289452552795, "step": 1818 }, { "epoch": 0.5319491153677438, "grad_norm": 1.4037646661677698, "learning_rate": 1.7560085564782057e-05, "loss": 0.5937772989273071, "step": 1819 }, { "epoch": 0.5322415557830092, "grad_norm": 1.5529497860681427, "learning_rate": 1.7556920067104714e-05, "loss": 0.7416468262672424, "step": 1820 }, { "epoch": 0.5325339961982746, "grad_norm": 1.1975217725231788, "learning_rate": 1.7553752803043247e-05, "loss": 0.6302096247673035, "step": 1821 }, { "epoch": 0.53282643661354, "grad_norm": 1.268842982106158, "learning_rate": 1.7550583773337992e-05, "loss": 0.5576045513153076, "step": 1822 }, { "epoch": 0.5331188770288053, "grad_norm": 1.3076658324014316, "learning_rate": 1.7547412978729688e-05, "loss": 0.5436257123947144, "step": 1823 }, { "epoch": 0.5334113174440708, "grad_norm": 1.2387778464918946, "learning_rate": 1.754424041995949e-05, "loss": 0.5674831867218018, "step": 1824 }, { "epoch": 0.5337037578593362, "grad_norm": 1.3729116406743342, "learning_rate": 1.7541066097768965e-05, "loss": 0.7254515290260315, "step": 1825 }, { "epoch": 0.5339961982746015, "grad_norm": 1.1721694105309242, "learning_rate": 1.7537890012900088e-05, "loss": 0.5706701278686523, "step": 1826 }, { "epoch": 0.534288638689867, "grad_norm": 1.4929452380767032, "learning_rate": 1.7534712166095253e-05, "loss": 0.6801357269287109, "step": 1827 }, { "epoch": 0.5345810791051323, "grad_norm": 1.115878861059579, "learning_rate": 1.753153255809726e-05, "loss": 0.6851463317871094, "step": 1828 }, { "epoch": 0.5348735195203977, "grad_norm": 1.3277835192492438, "learning_rate": 1.7528351189649324e-05, "loss": 0.6475861072540283, "step": 1829 }, { "epoch": 0.5351659599356631, "grad_norm": 1.462925601634232, "learning_rate": 1.752516806149507e-05, "loss": 0.6953648924827576, "step": 1830 }, { "epoch": 0.5354584003509285, "grad_norm": 1.5314952476377168, "learning_rate": 1.7521983174378537e-05, "loss": 0.5128777623176575, "step": 1831 }, { "epoch": 0.5357508407661938, "grad_norm": 1.3754167803768682, "learning_rate": 1.751879652904417e-05, "loss": 0.5780255198478699, "step": 1832 }, { "epoch": 0.5360432811814593, "grad_norm": 1.1326334157819233, "learning_rate": 1.751560812623683e-05, "loss": 0.581814169883728, "step": 1833 }, { "epoch": 0.5363357215967247, "grad_norm": 1.2244339664502468, "learning_rate": 1.7512417966701788e-05, "loss": 0.5609169006347656, "step": 1834 }, { "epoch": 0.53662816201199, "grad_norm": 1.2348222464159622, "learning_rate": 1.7509226051184716e-05, "loss": 0.6029868125915527, "step": 1835 }, { "epoch": 0.5369206024272555, "grad_norm": 1.5575658935823142, "learning_rate": 1.7506032380431718e-05, "loss": 0.6749545335769653, "step": 1836 }, { "epoch": 0.5372130428425208, "grad_norm": 1.4261868258477342, "learning_rate": 1.750283695518929e-05, "loss": 0.7710991501808167, "step": 1837 }, { "epoch": 0.5375054832577862, "grad_norm": 1.2797893583505542, "learning_rate": 1.7499639776204334e-05, "loss": 0.6330907940864563, "step": 1838 }, { "epoch": 0.5377979236730516, "grad_norm": 1.3697405221939354, "learning_rate": 1.7496440844224186e-05, "loss": 0.655827522277832, "step": 1839 }, { "epoch": 0.538090364088317, "grad_norm": 1.3640883815652403, "learning_rate": 1.7493240159996565e-05, "loss": 0.723412275314331, "step": 1840 }, { "epoch": 0.5383828045035824, "grad_norm": 1.273855459734962, "learning_rate": 1.7490037724269618e-05, "loss": 0.5504157543182373, "step": 1841 }, { "epoch": 0.5386752449188478, "grad_norm": 1.3867652356352673, "learning_rate": 1.7486833537791895e-05, "loss": 0.6258282661437988, "step": 1842 }, { "epoch": 0.5389676853341132, "grad_norm": 1.3063024833172743, "learning_rate": 1.748362760131235e-05, "loss": 0.7044231295585632, "step": 1843 }, { "epoch": 0.5392601257493785, "grad_norm": 1.329844005030904, "learning_rate": 1.7480419915580357e-05, "loss": 0.5979568362236023, "step": 1844 }, { "epoch": 0.539552566164644, "grad_norm": 1.2396904419147898, "learning_rate": 1.7477210481345686e-05, "loss": 0.558562159538269, "step": 1845 }, { "epoch": 0.5398450065799093, "grad_norm": 1.5914882070233294, "learning_rate": 1.747399929935853e-05, "loss": 0.5965149402618408, "step": 1846 }, { "epoch": 0.5401374469951747, "grad_norm": 1.2286076413347484, "learning_rate": 1.7470786370369483e-05, "loss": 0.6202878355979919, "step": 1847 }, { "epoch": 0.5404298874104401, "grad_norm": 1.4696847585462156, "learning_rate": 1.746757169512954e-05, "loss": 0.652141273021698, "step": 1848 }, { "epoch": 0.5407223278257055, "grad_norm": 1.3491880900702233, "learning_rate": 1.746435527439012e-05, "loss": 0.5713402628898621, "step": 1849 }, { "epoch": 0.541014768240971, "grad_norm": 1.1036198614058235, "learning_rate": 1.7461137108903042e-05, "loss": 0.49776554107666016, "step": 1850 }, { "epoch": 0.5413072086562363, "grad_norm": 1.3593053008733638, "learning_rate": 1.7457917199420525e-05, "loss": 0.7047991752624512, "step": 1851 }, { "epoch": 0.5415996490715017, "grad_norm": 1.249302868601747, "learning_rate": 1.7454695546695207e-05, "loss": 0.7019875049591064, "step": 1852 }, { "epoch": 0.541892089486767, "grad_norm": 1.1395410254023401, "learning_rate": 1.745147215148013e-05, "loss": 0.5448435544967651, "step": 1853 }, { "epoch": 0.5421845299020325, "grad_norm": 1.3392616230054089, "learning_rate": 1.7448247014528745e-05, "loss": 0.6042202711105347, "step": 1854 }, { "epoch": 0.5424769703172978, "grad_norm": 1.6632726033150385, "learning_rate": 1.744502013659491e-05, "loss": 0.8448539972305298, "step": 1855 }, { "epoch": 0.5427694107325632, "grad_norm": 1.5168637416823716, "learning_rate": 1.7441791518432877e-05, "loss": 0.6541755795478821, "step": 1856 }, { "epoch": 0.5430618511478287, "grad_norm": 1.3214742528031191, "learning_rate": 1.7438561160797326e-05, "loss": 0.6700184345245361, "step": 1857 }, { "epoch": 0.543354291563094, "grad_norm": 1.5975598198717695, "learning_rate": 1.7435329064443335e-05, "loss": 0.6407896280288696, "step": 1858 }, { "epoch": 0.5436467319783594, "grad_norm": 1.1007084555597737, "learning_rate": 1.7432095230126382e-05, "loss": 0.5380120277404785, "step": 1859 }, { "epoch": 0.5439391723936248, "grad_norm": 1.4184366915429367, "learning_rate": 1.7428859658602353e-05, "loss": 0.6561373472213745, "step": 1860 }, { "epoch": 0.5442316128088902, "grad_norm": 1.7211281199225186, "learning_rate": 1.7425622350627545e-05, "loss": 0.724541962146759, "step": 1861 }, { "epoch": 0.5445240532241555, "grad_norm": 1.3361773700031112, "learning_rate": 1.7422383306958666e-05, "loss": 0.6258946657180786, "step": 1862 }, { "epoch": 0.544816493639421, "grad_norm": 1.4343211647036773, "learning_rate": 1.7419142528352815e-05, "loss": 0.560769259929657, "step": 1863 }, { "epoch": 0.5451089340546864, "grad_norm": 1.3199774156859019, "learning_rate": 1.741590001556751e-05, "loss": 0.7782202363014221, "step": 1864 }, { "epoch": 0.5454013744699517, "grad_norm": 1.1330260111547463, "learning_rate": 1.7412655769360663e-05, "loss": 0.5956888198852539, "step": 1865 }, { "epoch": 0.5456938148852172, "grad_norm": 1.2304180375361309, "learning_rate": 1.7409409790490602e-05, "loss": 0.6251999139785767, "step": 1866 }, { "epoch": 0.5459862553004825, "grad_norm": 1.201828702533108, "learning_rate": 1.740616207971605e-05, "loss": 0.5864061713218689, "step": 1867 }, { "epoch": 0.5462786957157479, "grad_norm": 1.1335552643310969, "learning_rate": 1.7402912637796146e-05, "loss": 0.6241225004196167, "step": 1868 }, { "epoch": 0.5465711361310133, "grad_norm": 1.4457655679285375, "learning_rate": 1.739966146549042e-05, "loss": 0.7190053462982178, "step": 1869 }, { "epoch": 0.5468635765462787, "grad_norm": 1.3107442552185273, "learning_rate": 1.739640856355882e-05, "loss": 0.6771985292434692, "step": 1870 }, { "epoch": 0.547156016961544, "grad_norm": 1.3163112428890422, "learning_rate": 1.7393153932761687e-05, "loss": 0.5480636954307556, "step": 1871 }, { "epoch": 0.5474484573768095, "grad_norm": 1.5272520029044583, "learning_rate": 1.7389897573859773e-05, "loss": 0.7362977862358093, "step": 1872 }, { "epoch": 0.5477408977920749, "grad_norm": 1.3701377425052599, "learning_rate": 1.7386639487614232e-05, "loss": 0.6483198404312134, "step": 1873 }, { "epoch": 0.5480333382073402, "grad_norm": 1.137215399363759, "learning_rate": 1.7383379674786622e-05, "loss": 0.479977548122406, "step": 1874 }, { "epoch": 0.5483257786226057, "grad_norm": 1.2815568792711947, "learning_rate": 1.738011813613891e-05, "loss": 0.6824718117713928, "step": 1875 }, { "epoch": 0.548618219037871, "grad_norm": 1.4252738341228008, "learning_rate": 1.737685487243345e-05, "loss": 0.564873218536377, "step": 1876 }, { "epoch": 0.5489106594531364, "grad_norm": 1.208162951014484, "learning_rate": 1.7373589884433015e-05, "loss": 0.5748772621154785, "step": 1877 }, { "epoch": 0.5492030998684018, "grad_norm": 1.329038884364766, "learning_rate": 1.7370323172900778e-05, "loss": 0.6403437852859497, "step": 1878 }, { "epoch": 0.5494955402836672, "grad_norm": 1.7288162586927747, "learning_rate": 1.7367054738600312e-05, "loss": 0.8253078460693359, "step": 1879 }, { "epoch": 0.5497879806989326, "grad_norm": 1.204164217164209, "learning_rate": 1.7363784582295596e-05, "loss": 0.6823058128356934, "step": 1880 }, { "epoch": 0.550080421114198, "grad_norm": 1.0289811643005782, "learning_rate": 1.7360512704751003e-05, "loss": 0.48659563064575195, "step": 1881 }, { "epoch": 0.5503728615294634, "grad_norm": 1.5395158772607802, "learning_rate": 1.735723910673132e-05, "loss": 0.6380710601806641, "step": 1882 }, { "epoch": 0.5506653019447287, "grad_norm": 1.512121712639047, "learning_rate": 1.7353963789001723e-05, "loss": 0.6956683397293091, "step": 1883 }, { "epoch": 0.5509577423599942, "grad_norm": 1.2992852551955654, "learning_rate": 1.735068675232781e-05, "loss": 0.5751473903656006, "step": 1884 }, { "epoch": 0.5512501827752595, "grad_norm": 1.3297689987083825, "learning_rate": 1.734740799747556e-05, "loss": 0.7265490293502808, "step": 1885 }, { "epoch": 0.5515426231905249, "grad_norm": 1.3317519459591716, "learning_rate": 1.734412752521136e-05, "loss": 0.7419843673706055, "step": 1886 }, { "epoch": 0.5518350636057903, "grad_norm": 1.3385317545855182, "learning_rate": 1.734084533630201e-05, "loss": 0.7381073236465454, "step": 1887 }, { "epoch": 0.5521275040210557, "grad_norm": 1.4687535531628584, "learning_rate": 1.7337561431514692e-05, "loss": 0.6542054414749146, "step": 1888 }, { "epoch": 0.5524199444363211, "grad_norm": 1.3144027889366288, "learning_rate": 1.7334275811617e-05, "loss": 0.6283866167068481, "step": 1889 }, { "epoch": 0.5527123848515865, "grad_norm": 1.34879443340803, "learning_rate": 1.7330988477376935e-05, "loss": 0.6171330809593201, "step": 1890 }, { "epoch": 0.5530048252668519, "grad_norm": 1.309075407888037, "learning_rate": 1.7327699429562887e-05, "loss": 0.5181430578231812, "step": 1891 }, { "epoch": 0.5532972656821172, "grad_norm": 1.4382455208413174, "learning_rate": 1.7324408668943645e-05, "loss": 0.7337771058082581, "step": 1892 }, { "epoch": 0.5535897060973827, "grad_norm": 1.3677542553778577, "learning_rate": 1.7321116196288413e-05, "loss": 0.5193721055984497, "step": 1893 }, { "epoch": 0.553882146512648, "grad_norm": 1.2912522952038028, "learning_rate": 1.731782201236678e-05, "loss": 0.7743211388587952, "step": 1894 }, { "epoch": 0.5541745869279134, "grad_norm": 1.5457463678190766, "learning_rate": 1.731452611794875e-05, "loss": 0.8244242072105408, "step": 1895 }, { "epoch": 0.5544670273431789, "grad_norm": 1.3153817051947132, "learning_rate": 1.7311228513804712e-05, "loss": 0.6276153326034546, "step": 1896 }, { "epoch": 0.5547594677584442, "grad_norm": 1.4741498614217154, "learning_rate": 1.7307929200705463e-05, "loss": 0.7919771671295166, "step": 1897 }, { "epoch": 0.5550519081737096, "grad_norm": 1.527110359994231, "learning_rate": 1.7304628179422192e-05, "loss": 0.6187459230422974, "step": 1898 }, { "epoch": 0.555344348588975, "grad_norm": 1.1766146767977552, "learning_rate": 1.7301325450726497e-05, "loss": 0.6190363764762878, "step": 1899 }, { "epoch": 0.5556367890042404, "grad_norm": 1.209178127119406, "learning_rate": 1.7298021015390375e-05, "loss": 0.5537956953048706, "step": 1900 }, { "epoch": 0.5559292294195057, "grad_norm": 1.434637926231007, "learning_rate": 1.729471487418621e-05, "loss": 0.7164788246154785, "step": 1901 }, { "epoch": 0.5562216698347712, "grad_norm": 1.2878374944552806, "learning_rate": 1.7291407027886796e-05, "loss": 0.6101689338684082, "step": 1902 }, { "epoch": 0.5565141102500366, "grad_norm": 1.4102535348815881, "learning_rate": 1.7288097477265322e-05, "loss": 0.7112093567848206, "step": 1903 }, { "epoch": 0.5568065506653019, "grad_norm": 1.698804519808014, "learning_rate": 1.7284786223095376e-05, "loss": 0.7807149291038513, "step": 1904 }, { "epoch": 0.5570989910805674, "grad_norm": 1.3150296925108194, "learning_rate": 1.7281473266150942e-05, "loss": 0.5723121166229248, "step": 1905 }, { "epoch": 0.5573914314958327, "grad_norm": 1.4287078485940368, "learning_rate": 1.7278158607206402e-05, "loss": 0.6901307106018066, "step": 1906 }, { "epoch": 0.5576838719110981, "grad_norm": 1.3895105915390893, "learning_rate": 1.7274842247036547e-05, "loss": 0.8247314095497131, "step": 1907 }, { "epoch": 0.5579763123263635, "grad_norm": 1.2902939634670878, "learning_rate": 1.727152418641654e-05, "loss": 0.758405327796936, "step": 1908 }, { "epoch": 0.5582687527416289, "grad_norm": 1.1507745861737273, "learning_rate": 1.7268204426121967e-05, "loss": 0.6448276042938232, "step": 1909 }, { "epoch": 0.5585611931568942, "grad_norm": 1.4597983603763345, "learning_rate": 1.7264882966928803e-05, "loss": 0.6846790313720703, "step": 1910 }, { "epoch": 0.5588536335721597, "grad_norm": 1.494960410585431, "learning_rate": 1.726155980961342e-05, "loss": 0.6427637338638306, "step": 1911 }, { "epoch": 0.5591460739874251, "grad_norm": 1.6049335332675108, "learning_rate": 1.7258234954952578e-05, "loss": 0.7105496525764465, "step": 1912 }, { "epoch": 0.5594385144026904, "grad_norm": 1.247874236176648, "learning_rate": 1.7254908403723446e-05, "loss": 0.6307404041290283, "step": 1913 }, { "epoch": 0.5597309548179559, "grad_norm": 1.3460021193743466, "learning_rate": 1.7251580156703587e-05, "loss": 0.7194197177886963, "step": 1914 }, { "epoch": 0.5600233952332212, "grad_norm": 1.4541814827650097, "learning_rate": 1.7248250214670955e-05, "loss": 0.676772952079773, "step": 1915 }, { "epoch": 0.5603158356484866, "grad_norm": 1.4231220185819522, "learning_rate": 1.724491857840391e-05, "loss": 0.6047924160957336, "step": 1916 }, { "epoch": 0.560608276063752, "grad_norm": 1.4639689581400968, "learning_rate": 1.7241585248681192e-05, "loss": 0.7412474155426025, "step": 1917 }, { "epoch": 0.5609007164790174, "grad_norm": 1.3634846491128696, "learning_rate": 1.7238250226281952e-05, "loss": 0.6337922215461731, "step": 1918 }, { "epoch": 0.5611931568942828, "grad_norm": 1.325394488194612, "learning_rate": 1.7234913511985733e-05, "loss": 0.7192416787147522, "step": 1919 }, { "epoch": 0.5614855973095482, "grad_norm": 1.5807591545293311, "learning_rate": 1.723157510657247e-05, "loss": 0.6576168537139893, "step": 1920 }, { "epoch": 0.5617780377248136, "grad_norm": 1.2677184116479052, "learning_rate": 1.722823501082249e-05, "loss": 0.6592451333999634, "step": 1921 }, { "epoch": 0.5620704781400789, "grad_norm": 1.3384834377307993, "learning_rate": 1.722489322551653e-05, "loss": 0.8042774796485901, "step": 1922 }, { "epoch": 0.5623629185553444, "grad_norm": 1.4566017039283872, "learning_rate": 1.7221549751435706e-05, "loss": 0.727135181427002, "step": 1923 }, { "epoch": 0.5626553589706097, "grad_norm": 1.3099994778880142, "learning_rate": 1.7218204589361535e-05, "loss": 0.5641134977340698, "step": 1924 }, { "epoch": 0.5629477993858751, "grad_norm": 1.5113194940037022, "learning_rate": 1.7214857740075924e-05, "loss": 0.6354084610939026, "step": 1925 }, { "epoch": 0.5632402398011405, "grad_norm": 1.3038206210364904, "learning_rate": 1.7211509204361187e-05, "loss": 0.6044377088546753, "step": 1926 }, { "epoch": 0.5635326802164059, "grad_norm": 1.2045011077136063, "learning_rate": 1.7208158983000022e-05, "loss": 0.5519559383392334, "step": 1927 }, { "epoch": 0.5638251206316713, "grad_norm": 1.162061868190052, "learning_rate": 1.7204807076775514e-05, "loss": 0.4480612277984619, "step": 1928 }, { "epoch": 0.5641175610469367, "grad_norm": 1.3899173129631617, "learning_rate": 1.7201453486471167e-05, "loss": 0.5929607152938843, "step": 1929 }, { "epoch": 0.5644100014622021, "grad_norm": 2.021763483016241, "learning_rate": 1.7198098212870847e-05, "loss": 0.6863572001457214, "step": 1930 }, { "epoch": 0.5647024418774674, "grad_norm": 1.0938398450209694, "learning_rate": 1.719474125675884e-05, "loss": 0.5551834106445312, "step": 1931 }, { "epoch": 0.5649948822927329, "grad_norm": 1.3644128319132816, "learning_rate": 1.7191382618919802e-05, "loss": 0.6113166809082031, "step": 1932 }, { "epoch": 0.5652873227079982, "grad_norm": 1.419009993473521, "learning_rate": 1.7188022300138805e-05, "loss": 0.7833362817764282, "step": 1933 }, { "epoch": 0.5655797631232636, "grad_norm": 1.3899666208681147, "learning_rate": 1.71846603012013e-05, "loss": 0.5981882810592651, "step": 1934 }, { "epoch": 0.5658722035385291, "grad_norm": 1.3211180154101085, "learning_rate": 1.7181296622893132e-05, "loss": 0.6009912490844727, "step": 1935 }, { "epoch": 0.5661646439537944, "grad_norm": 1.5201002205446237, "learning_rate": 1.717793126600054e-05, "loss": 0.5605272054672241, "step": 1936 }, { "epoch": 0.5664570843690598, "grad_norm": 1.9581129231236365, "learning_rate": 1.717456423131016e-05, "loss": 0.6310821771621704, "step": 1937 }, { "epoch": 0.5667495247843252, "grad_norm": 1.3290964241159713, "learning_rate": 1.7171195519609013e-05, "loss": 0.6776266694068909, "step": 1938 }, { "epoch": 0.5670419651995906, "grad_norm": 1.5744599660597636, "learning_rate": 1.7167825131684516e-05, "loss": 0.6369091868400574, "step": 1939 }, { "epoch": 0.5673344056148559, "grad_norm": 1.5708596771950396, "learning_rate": 1.7164453068324472e-05, "loss": 0.6241647005081177, "step": 1940 }, { "epoch": 0.5676268460301214, "grad_norm": 1.1863544042032323, "learning_rate": 1.7161079330317086e-05, "loss": 0.6411961317062378, "step": 1941 }, { "epoch": 0.5679192864453868, "grad_norm": 1.4635134179889109, "learning_rate": 1.7157703918450942e-05, "loss": 0.6148936152458191, "step": 1942 }, { "epoch": 0.5682117268606521, "grad_norm": 1.3183225060577142, "learning_rate": 1.7154326833515034e-05, "loss": 0.5006934404373169, "step": 1943 }, { "epoch": 0.5685041672759176, "grad_norm": 1.462356689812602, "learning_rate": 1.7150948076298722e-05, "loss": 0.7446701526641846, "step": 1944 }, { "epoch": 0.5687966076911829, "grad_norm": 1.2052848826016378, "learning_rate": 1.7147567647591777e-05, "loss": 0.6159533262252808, "step": 1945 }, { "epoch": 0.5690890481064483, "grad_norm": 1.4298530885651661, "learning_rate": 1.7144185548184355e-05, "loss": 0.6437554359436035, "step": 1946 }, { "epoch": 0.5693814885217137, "grad_norm": 1.3361469734250542, "learning_rate": 1.7140801778866995e-05, "loss": 0.6229397654533386, "step": 1947 }, { "epoch": 0.5696739289369791, "grad_norm": 1.4197238006731758, "learning_rate": 1.7137416340430636e-05, "loss": 0.5777184963226318, "step": 1948 }, { "epoch": 0.5699663693522444, "grad_norm": 1.543436374887725, "learning_rate": 1.7134029233666603e-05, "loss": 0.7817827463150024, "step": 1949 }, { "epoch": 0.5702588097675099, "grad_norm": 1.3527927450904613, "learning_rate": 1.713064045936662e-05, "loss": 0.6784861087799072, "step": 1950 }, { "epoch": 0.5705512501827753, "grad_norm": 1.2839254399050724, "learning_rate": 1.7127250018322777e-05, "loss": 0.6883150339126587, "step": 1951 }, { "epoch": 0.5708436905980406, "grad_norm": 1.093202890209594, "learning_rate": 1.712385791132758e-05, "loss": 0.5464504957199097, "step": 1952 }, { "epoch": 0.5711361310133061, "grad_norm": 1.2617859237604026, "learning_rate": 1.7120464139173908e-05, "loss": 0.5950040817260742, "step": 1953 }, { "epoch": 0.5714285714285714, "grad_norm": 1.374864335037442, "learning_rate": 1.7117068702655034e-05, "loss": 0.6381576061248779, "step": 1954 }, { "epoch": 0.5717210118438368, "grad_norm": 1.2624571465966312, "learning_rate": 1.7113671602564628e-05, "loss": 0.6611777544021606, "step": 1955 }, { "epoch": 0.5720134522591022, "grad_norm": 1.2625162580462326, "learning_rate": 1.7110272839696735e-05, "loss": 0.5057446956634521, "step": 1956 }, { "epoch": 0.5723058926743676, "grad_norm": 1.3802970727547992, "learning_rate": 1.7106872414845798e-05, "loss": 0.6095671653747559, "step": 1957 }, { "epoch": 0.572598333089633, "grad_norm": 1.4171107803407814, "learning_rate": 1.710347032880664e-05, "loss": 0.5514808893203735, "step": 1958 }, { "epoch": 0.5728907735048984, "grad_norm": 2.1059044775107516, "learning_rate": 1.7100066582374487e-05, "loss": 0.6491304039955139, "step": 1959 }, { "epoch": 0.5731832139201638, "grad_norm": 1.2887931231971388, "learning_rate": 1.7096661176344936e-05, "loss": 0.6759692430496216, "step": 1960 }, { "epoch": 0.5734756543354291, "grad_norm": 1.4738884192318065, "learning_rate": 1.709325411151399e-05, "loss": 0.5897858142852783, "step": 1961 }, { "epoch": 0.5737680947506946, "grad_norm": 1.537196415964603, "learning_rate": 1.7089845388678015e-05, "loss": 0.6822922229766846, "step": 1962 }, { "epoch": 0.5740605351659599, "grad_norm": 1.2963583337618676, "learning_rate": 1.7086435008633792e-05, "loss": 0.7694820165634155, "step": 1963 }, { "epoch": 0.5743529755812253, "grad_norm": 1.5109651591265172, "learning_rate": 1.7083022972178473e-05, "loss": 0.702151358127594, "step": 1964 }, { "epoch": 0.5746454159964907, "grad_norm": 1.564445011536072, "learning_rate": 1.7079609280109597e-05, "loss": 0.768844485282898, "step": 1965 }, { "epoch": 0.5749378564117561, "grad_norm": 1.4251497195478635, "learning_rate": 1.7076193933225097e-05, "loss": 0.6641331911087036, "step": 1966 }, { "epoch": 0.5752302968270215, "grad_norm": 1.3577479649866828, "learning_rate": 1.707277693232329e-05, "loss": 0.7176777124404907, "step": 1967 }, { "epoch": 0.5755227372422869, "grad_norm": 1.4539026175393464, "learning_rate": 1.7069358278202877e-05, "loss": 0.6543929576873779, "step": 1968 }, { "epoch": 0.5758151776575523, "grad_norm": 1.422676342883674, "learning_rate": 1.7065937971662953e-05, "loss": 0.7501214742660522, "step": 1969 }, { "epoch": 0.5761076180728176, "grad_norm": 1.1830543705848042, "learning_rate": 1.7062516013502984e-05, "loss": 0.6013212203979492, "step": 1970 }, { "epoch": 0.5764000584880831, "grad_norm": 1.489892931502725, "learning_rate": 1.7059092404522843e-05, "loss": 0.5920547246932983, "step": 1971 }, { "epoch": 0.5766924989033484, "grad_norm": 1.1082983109051399, "learning_rate": 1.7055667145522767e-05, "loss": 0.6720744371414185, "step": 1972 }, { "epoch": 0.5769849393186138, "grad_norm": 1.3476214386922525, "learning_rate": 1.70522402373034e-05, "loss": 0.6938234567642212, "step": 1973 }, { "epoch": 0.5772773797338793, "grad_norm": 1.321699429936501, "learning_rate": 1.704881168066575e-05, "loss": 0.6430555582046509, "step": 1974 }, { "epoch": 0.5775698201491446, "grad_norm": 1.331724408429167, "learning_rate": 1.7045381476411234e-05, "loss": 0.7738221883773804, "step": 1975 }, { "epoch": 0.57786226056441, "grad_norm": 1.2033511527827634, "learning_rate": 1.704194962534163e-05, "loss": 0.5335453748703003, "step": 1976 }, { "epoch": 0.5781547009796754, "grad_norm": 1.4123366931040846, "learning_rate": 1.7038516128259118e-05, "loss": 0.691404402256012, "step": 1977 }, { "epoch": 0.5784471413949408, "grad_norm": 1.6032589522393152, "learning_rate": 1.7035080985966253e-05, "loss": 0.7371880412101746, "step": 1978 }, { "epoch": 0.5787395818102061, "grad_norm": 1.356558066648364, "learning_rate": 1.7031644199265987e-05, "loss": 0.5661574602127075, "step": 1979 }, { "epoch": 0.5790320222254716, "grad_norm": 1.069750621474732, "learning_rate": 1.702820576896164e-05, "loss": 0.5823863744735718, "step": 1980 }, { "epoch": 0.579324462640737, "grad_norm": 1.608685609966537, "learning_rate": 1.7024765695856924e-05, "loss": 0.6228796243667603, "step": 1981 }, { "epoch": 0.5796169030560023, "grad_norm": 1.3395261062815815, "learning_rate": 1.702132398075594e-05, "loss": 0.5788040161132812, "step": 1982 }, { "epoch": 0.5799093434712678, "grad_norm": 1.1540676629937416, "learning_rate": 1.701788062446317e-05, "loss": 0.5950253009796143, "step": 1983 }, { "epoch": 0.5802017838865331, "grad_norm": 1.2446098890682338, "learning_rate": 1.7014435627783466e-05, "loss": 0.5672034025192261, "step": 1984 }, { "epoch": 0.5804942243017985, "grad_norm": 1.329055336569987, "learning_rate": 1.7010988991522085e-05, "loss": 0.6646316051483154, "step": 1985 }, { "epoch": 0.5807866647170639, "grad_norm": 1.2423480846022465, "learning_rate": 1.7007540716484657e-05, "loss": 0.6430097818374634, "step": 1986 }, { "epoch": 0.5810791051323293, "grad_norm": 1.2889752174339557, "learning_rate": 1.700409080347719e-05, "loss": 0.5803329348564148, "step": 1987 }, { "epoch": 0.5813715455475946, "grad_norm": 1.613226747300198, "learning_rate": 1.7000639253306085e-05, "loss": 0.7526525259017944, "step": 1988 }, { "epoch": 0.5816639859628601, "grad_norm": 1.329271357875936, "learning_rate": 1.6997186066778118e-05, "loss": 0.6679468750953674, "step": 1989 }, { "epoch": 0.5819564263781255, "grad_norm": 1.5773364597040387, "learning_rate": 1.6993731244700454e-05, "loss": 0.7233256101608276, "step": 1990 }, { "epoch": 0.5822488667933908, "grad_norm": 1.3632345541871926, "learning_rate": 1.6990274787880633e-05, "loss": 0.5986290574073792, "step": 1991 }, { "epoch": 0.5825413072086563, "grad_norm": 1.3136772281139917, "learning_rate": 1.6986816697126583e-05, "loss": 0.6898672580718994, "step": 1992 }, { "epoch": 0.5828337476239216, "grad_norm": 1.6057802032529045, "learning_rate": 1.698335697324661e-05, "loss": 0.6888613104820251, "step": 1993 }, { "epoch": 0.583126188039187, "grad_norm": 1.3469913891844598, "learning_rate": 1.6979895617049404e-05, "loss": 0.6002428531646729, "step": 1994 }, { "epoch": 0.5834186284544524, "grad_norm": 1.3517104173069454, "learning_rate": 1.6976432629344036e-05, "loss": 0.6372438669204712, "step": 1995 }, { "epoch": 0.5837110688697178, "grad_norm": 1.0868680846473084, "learning_rate": 1.6972968010939953e-05, "loss": 0.529569149017334, "step": 1996 }, { "epoch": 0.5840035092849832, "grad_norm": 1.415626330345063, "learning_rate": 1.6969501762647002e-05, "loss": 0.5534025430679321, "step": 1997 }, { "epoch": 0.5842959497002486, "grad_norm": 1.5855609078257513, "learning_rate": 1.6966033885275384e-05, "loss": 0.8105937242507935, "step": 1998 }, { "epoch": 0.584588390115514, "grad_norm": 1.4927698791899027, "learning_rate": 1.6962564379635702e-05, "loss": 0.7657530903816223, "step": 1999 }, { "epoch": 0.5848808305307793, "grad_norm": 1.2186213815751603, "learning_rate": 1.6959093246538927e-05, "loss": 0.5941641330718994, "step": 2000 }, { "epoch": 0.5851732709460448, "grad_norm": 1.2310851342087676, "learning_rate": 1.695562048679642e-05, "loss": 0.6130149364471436, "step": 2001 }, { "epoch": 0.5854657113613101, "grad_norm": 1.4904324383349616, "learning_rate": 1.6952146101219914e-05, "loss": 0.7078043222427368, "step": 2002 }, { "epoch": 0.5857581517765755, "grad_norm": 1.4412882425814895, "learning_rate": 1.6948670090621528e-05, "loss": 0.6330863237380981, "step": 2003 }, { "epoch": 0.5860505921918409, "grad_norm": 1.2835823957491164, "learning_rate": 1.6945192455813755e-05, "loss": 0.6631220579147339, "step": 2004 }, { "epoch": 0.5863430326071063, "grad_norm": 1.411600977622384, "learning_rate": 1.6941713197609476e-05, "loss": 0.6669473648071289, "step": 2005 }, { "epoch": 0.5866354730223717, "grad_norm": 1.370088328820007, "learning_rate": 1.6938232316821938e-05, "loss": 0.608252763748169, "step": 2006 }, { "epoch": 0.5869279134376371, "grad_norm": 1.3777699704962545, "learning_rate": 1.6934749814264786e-05, "loss": 0.5979427695274353, "step": 2007 }, { "epoch": 0.5872203538529025, "grad_norm": 1.541200433158731, "learning_rate": 1.6931265690752027e-05, "loss": 0.5653454661369324, "step": 2008 }, { "epoch": 0.5875127942681678, "grad_norm": 1.1212005773159774, "learning_rate": 1.6927779947098052e-05, "loss": 0.6399147510528564, "step": 2009 }, { "epoch": 0.5878052346834333, "grad_norm": 1.1797468758477498, "learning_rate": 1.6924292584117642e-05, "loss": 0.41824793815612793, "step": 2010 }, { "epoch": 0.5880976750986986, "grad_norm": 1.917297128854583, "learning_rate": 1.6920803602625938e-05, "loss": 0.8881042003631592, "step": 2011 }, { "epoch": 0.588390115513964, "grad_norm": 1.331713386917835, "learning_rate": 1.6917313003438473e-05, "loss": 0.636030912399292, "step": 2012 }, { "epoch": 0.5886825559292295, "grad_norm": 1.4002891525649699, "learning_rate": 1.6913820787371147e-05, "loss": 0.6038305759429932, "step": 2013 }, { "epoch": 0.5889749963444948, "grad_norm": 1.1435051787090085, "learning_rate": 1.6910326955240252e-05, "loss": 0.7073840498924255, "step": 2014 }, { "epoch": 0.5892674367597602, "grad_norm": 1.4386346426993692, "learning_rate": 1.6906831507862446e-05, "loss": 0.5804994106292725, "step": 2015 }, { "epoch": 0.5895598771750256, "grad_norm": 1.5741785374654678, "learning_rate": 1.6903334446054768e-05, "loss": 0.8194780349731445, "step": 2016 }, { "epoch": 0.589852317590291, "grad_norm": 1.812303850133564, "learning_rate": 1.689983577063464e-05, "loss": 0.7348685264587402, "step": 2017 }, { "epoch": 0.5901447580055563, "grad_norm": 1.1971589423872142, "learning_rate": 1.689633548241985e-05, "loss": 0.5855007171630859, "step": 2018 }, { "epoch": 0.5904371984208218, "grad_norm": 1.3707253561652837, "learning_rate": 1.689283358222857e-05, "loss": 0.7387616634368896, "step": 2019 }, { "epoch": 0.5907296388360872, "grad_norm": 1.1680954205847025, "learning_rate": 1.688933007087935e-05, "loss": 0.688759446144104, "step": 2020 }, { "epoch": 0.5910220792513525, "grad_norm": 1.5341989172452428, "learning_rate": 1.6885824949191117e-05, "loss": 0.7203953266143799, "step": 2021 }, { "epoch": 0.591314519666618, "grad_norm": 1.2850552689542662, "learning_rate": 1.6882318217983165e-05, "loss": 0.6465663909912109, "step": 2022 }, { "epoch": 0.5916069600818833, "grad_norm": 1.1725524993946357, "learning_rate": 1.6878809878075176e-05, "loss": 0.6625394821166992, "step": 2023 }, { "epoch": 0.5918994004971487, "grad_norm": 1.1518619162929866, "learning_rate": 1.68752999302872e-05, "loss": 0.6577074527740479, "step": 2024 }, { "epoch": 0.5921918409124141, "grad_norm": 1.2660442226503865, "learning_rate": 1.6871788375439667e-05, "loss": 0.50509113073349, "step": 2025 }, { "epoch": 0.5924842813276795, "grad_norm": 1.1506732126554624, "learning_rate": 1.6868275214353387e-05, "loss": 0.5723974704742432, "step": 2026 }, { "epoch": 0.5927767217429448, "grad_norm": 1.5630741195611901, "learning_rate": 1.6864760447849533e-05, "loss": 0.6383459568023682, "step": 2027 }, { "epoch": 0.5930691621582103, "grad_norm": 1.5937791400894217, "learning_rate": 1.6861244076749663e-05, "loss": 0.5307388305664062, "step": 2028 }, { "epoch": 0.5933616025734757, "grad_norm": 1.3756662975981515, "learning_rate": 1.6857726101875706e-05, "loss": 0.8009265661239624, "step": 2029 }, { "epoch": 0.593654042988741, "grad_norm": 1.3635510886639874, "learning_rate": 1.685420652404997e-05, "loss": 0.5505321025848389, "step": 2030 }, { "epoch": 0.5939464834040065, "grad_norm": 1.2645625310092812, "learning_rate": 1.6850685344095134e-05, "loss": 0.680927038192749, "step": 2031 }, { "epoch": 0.5942389238192718, "grad_norm": 1.419624052256642, "learning_rate": 1.684716256283425e-05, "loss": 0.7357309460639954, "step": 2032 }, { "epoch": 0.5945313642345372, "grad_norm": 1.2277919560967578, "learning_rate": 1.6843638181090748e-05, "loss": 0.5896620750427246, "step": 2033 }, { "epoch": 0.5948238046498026, "grad_norm": 1.261982037348603, "learning_rate": 1.6840112199688432e-05, "loss": 0.5567387342453003, "step": 2034 }, { "epoch": 0.595116245065068, "grad_norm": 1.2606984508496513, "learning_rate": 1.6836584619451478e-05, "loss": 0.6428712606430054, "step": 2035 }, { "epoch": 0.5954086854803334, "grad_norm": 1.3387753764851709, "learning_rate": 1.6833055441204436e-05, "loss": 0.7430459260940552, "step": 2036 }, { "epoch": 0.5957011258955988, "grad_norm": 1.250181817593343, "learning_rate": 1.682952466577223e-05, "loss": 0.5982654690742493, "step": 2037 }, { "epoch": 0.5959935663108642, "grad_norm": 1.2721973260460164, "learning_rate": 1.6825992293980158e-05, "loss": 0.5807450413703918, "step": 2038 }, { "epoch": 0.5962860067261295, "grad_norm": 1.4202543697420538, "learning_rate": 1.6822458326653888e-05, "loss": 0.7667814493179321, "step": 2039 }, { "epoch": 0.596578447141395, "grad_norm": 1.4555539952275451, "learning_rate": 1.6818922764619467e-05, "loss": 0.8192781805992126, "step": 2040 }, { "epoch": 0.5968708875566603, "grad_norm": 1.3146767820144227, "learning_rate": 1.681538560870331e-05, "loss": 0.6652504205703735, "step": 2041 }, { "epoch": 0.5971633279719257, "grad_norm": 1.4465108366403951, "learning_rate": 1.6811846859732207e-05, "loss": 0.6227332353591919, "step": 2042 }, { "epoch": 0.597455768387191, "grad_norm": 1.1394575473936808, "learning_rate": 1.6808306518533315e-05, "loss": 0.5459558963775635, "step": 2043 }, { "epoch": 0.5977482088024565, "grad_norm": 1.3498516241816683, "learning_rate": 1.6804764585934167e-05, "loss": 0.5176202058792114, "step": 2044 }, { "epoch": 0.5980406492177219, "grad_norm": 1.5025501377940633, "learning_rate": 1.6801221062762677e-05, "loss": 0.5818016529083252, "step": 2045 }, { "epoch": 0.5983330896329873, "grad_norm": 1.3397658451047565, "learning_rate": 1.679767594984711e-05, "loss": 0.622256875038147, "step": 2046 }, { "epoch": 0.5986255300482527, "grad_norm": 1.2198859984633783, "learning_rate": 1.6794129248016124e-05, "loss": 0.5538911819458008, "step": 2047 }, { "epoch": 0.598917970463518, "grad_norm": 1.1939205886096602, "learning_rate": 1.6790580958098733e-05, "loss": 0.4934890568256378, "step": 2048 }, { "epoch": 0.5992104108787835, "grad_norm": 1.5628453531282531, "learning_rate": 1.678703108092433e-05, "loss": 0.6754223108291626, "step": 2049 }, { "epoch": 0.5995028512940488, "grad_norm": 1.3047429440272302, "learning_rate": 1.678347961732268e-05, "loss": 0.48618268966674805, "step": 2050 }, { "epoch": 0.5997952917093142, "grad_norm": 2.239352665042965, "learning_rate": 1.6779926568123913e-05, "loss": 0.6844758987426758, "step": 2051 }, { "epoch": 0.6000877321245797, "grad_norm": 1.222439693123936, "learning_rate": 1.677637193415853e-05, "loss": 0.5258621573448181, "step": 2052 }, { "epoch": 0.600380172539845, "grad_norm": 1.5856950316684058, "learning_rate": 1.6772815716257414e-05, "loss": 0.5571128129959106, "step": 2053 }, { "epoch": 0.6006726129551104, "grad_norm": 1.514916071293939, "learning_rate": 1.67692579152518e-05, "loss": 0.5881344079971313, "step": 2054 }, { "epoch": 0.6009650533703758, "grad_norm": 1.5804701546241575, "learning_rate": 1.6765698531973305e-05, "loss": 0.7162419557571411, "step": 2055 }, { "epoch": 0.6012574937856412, "grad_norm": 1.487082432347586, "learning_rate": 1.6762137567253917e-05, "loss": 0.7470849752426147, "step": 2056 }, { "epoch": 0.6015499342009065, "grad_norm": 1.4154424289161787, "learning_rate": 1.6758575021925987e-05, "loss": 0.6043628454208374, "step": 2057 }, { "epoch": 0.601842374616172, "grad_norm": 1.4033964052969388, "learning_rate": 1.6755010896822237e-05, "loss": 0.6574143171310425, "step": 2058 }, { "epoch": 0.6021348150314374, "grad_norm": 1.3508419478610747, "learning_rate": 1.675144519277576e-05, "loss": 0.605838418006897, "step": 2059 }, { "epoch": 0.6024272554467027, "grad_norm": 1.2890691190480261, "learning_rate": 1.6747877910620022e-05, "loss": 0.5859218239784241, "step": 2060 }, { "epoch": 0.6027196958619682, "grad_norm": 1.3985794655421304, "learning_rate": 1.674430905118885e-05, "loss": 0.7272971868515015, "step": 2061 }, { "epoch": 0.6030121362772335, "grad_norm": 1.2870566467248659, "learning_rate": 1.674073861531644e-05, "loss": 0.606023907661438, "step": 2062 }, { "epoch": 0.6033045766924989, "grad_norm": 1.345090429761192, "learning_rate": 1.6737166603837364e-05, "loss": 0.6029521822929382, "step": 2063 }, { "epoch": 0.6035970171077643, "grad_norm": 1.1860277395685632, "learning_rate": 1.673359301758656e-05, "loss": 0.7544999122619629, "step": 2064 }, { "epoch": 0.6038894575230297, "grad_norm": 1.3953376279645262, "learning_rate": 1.6730017857399327e-05, "loss": 0.7487601637840271, "step": 2065 }, { "epoch": 0.604181897938295, "grad_norm": 1.3904468062872732, "learning_rate": 1.672644112411134e-05, "loss": 0.6429200172424316, "step": 2066 }, { "epoch": 0.6044743383535605, "grad_norm": 1.4246263416975375, "learning_rate": 1.6722862818558635e-05, "loss": 0.7337179183959961, "step": 2067 }, { "epoch": 0.6047667787688259, "grad_norm": 1.432290850861675, "learning_rate": 1.671928294157762e-05, "loss": 0.6644014120101929, "step": 2068 }, { "epoch": 0.6050592191840912, "grad_norm": 1.3048966935224826, "learning_rate": 1.6715701494005078e-05, "loss": 0.5987672805786133, "step": 2069 }, { "epoch": 0.6053516595993567, "grad_norm": 1.5176113056744007, "learning_rate": 1.671211847667814e-05, "loss": 0.5878695845603943, "step": 2070 }, { "epoch": 0.605644100014622, "grad_norm": 1.3348485026555847, "learning_rate": 1.670853389043432e-05, "loss": 0.540128231048584, "step": 2071 }, { "epoch": 0.6059365404298874, "grad_norm": 1.3888450119982874, "learning_rate": 1.670494773611149e-05, "loss": 0.667206346988678, "step": 2072 }, { "epoch": 0.6062289808451528, "grad_norm": 1.5911825658421195, "learning_rate": 1.6701360014547896e-05, "loss": 0.6433641910552979, "step": 2073 }, { "epoch": 0.6065214212604182, "grad_norm": 1.447981653333928, "learning_rate": 1.669777072658214e-05, "loss": 0.5803529024124146, "step": 2074 }, { "epoch": 0.6068138616756836, "grad_norm": 1.125005009009719, "learning_rate": 1.6694179873053202e-05, "loss": 0.6203820705413818, "step": 2075 }, { "epoch": 0.607106302090949, "grad_norm": 1.3092542979615172, "learning_rate": 1.669058745480042e-05, "loss": 0.6194918155670166, "step": 2076 }, { "epoch": 0.6073987425062144, "grad_norm": 1.593480689755987, "learning_rate": 1.66869934726635e-05, "loss": 0.6797547936439514, "step": 2077 }, { "epoch": 0.6076911829214797, "grad_norm": 1.3923211889522802, "learning_rate": 1.6683397927482512e-05, "loss": 0.6076459884643555, "step": 2078 }, { "epoch": 0.6079836233367452, "grad_norm": 1.3874225830336557, "learning_rate": 1.6679800820097895e-05, "loss": 0.6958068609237671, "step": 2079 }, { "epoch": 0.6082760637520105, "grad_norm": 1.3355509335032223, "learning_rate": 1.6676202151350453e-05, "loss": 0.5819929242134094, "step": 2080 }, { "epoch": 0.6085685041672759, "grad_norm": 1.3476445996808082, "learning_rate": 1.6672601922081347e-05, "loss": 0.7125047445297241, "step": 2081 }, { "epoch": 0.6088609445825413, "grad_norm": 1.4432332437479862, "learning_rate": 1.6669000133132108e-05, "loss": 0.8046560287475586, "step": 2082 }, { "epoch": 0.6091533849978067, "grad_norm": 1.192025927247586, "learning_rate": 1.666539678534464e-05, "loss": 0.5468478202819824, "step": 2083 }, { "epoch": 0.6094458254130721, "grad_norm": 1.3403719695971306, "learning_rate": 1.6661791879561204e-05, "loss": 0.6387852430343628, "step": 2084 }, { "epoch": 0.6097382658283375, "grad_norm": 1.3327872578740647, "learning_rate": 1.6658185416624415e-05, "loss": 0.643539547920227, "step": 2085 }, { "epoch": 0.6100307062436029, "grad_norm": 1.2236148701775094, "learning_rate": 1.6654577397377266e-05, "loss": 0.5031965374946594, "step": 2086 }, { "epoch": 0.6103231466588682, "grad_norm": 1.507439246425782, "learning_rate": 1.6650967822663115e-05, "loss": 0.6690273284912109, "step": 2087 }, { "epoch": 0.6106155870741337, "grad_norm": 1.2924449065282086, "learning_rate": 1.6647356693325672e-05, "loss": 0.6396887302398682, "step": 2088 }, { "epoch": 0.610908027489399, "grad_norm": 1.4444361497865652, "learning_rate": 1.664374401020902e-05, "loss": 0.6306549310684204, "step": 2089 }, { "epoch": 0.6112004679046644, "grad_norm": 1.3565777173208147, "learning_rate": 1.66401297741576e-05, "loss": 0.5936366319656372, "step": 2090 }, { "epoch": 0.6114929083199299, "grad_norm": 1.1669567203268514, "learning_rate": 1.6636513986016215e-05, "loss": 0.6153277158737183, "step": 2091 }, { "epoch": 0.6117853487351952, "grad_norm": 1.2085146124175858, "learning_rate": 1.663289664663004e-05, "loss": 0.6361621618270874, "step": 2092 }, { "epoch": 0.6120777891504606, "grad_norm": 1.2163858440552462, "learning_rate": 1.6629277756844603e-05, "loss": 0.6511524319648743, "step": 2093 }, { "epoch": 0.612370229565726, "grad_norm": 1.2219001757495958, "learning_rate": 1.6625657317505792e-05, "loss": 0.5811333656311035, "step": 2094 }, { "epoch": 0.6126626699809914, "grad_norm": 1.4531007944498606, "learning_rate": 1.6622035329459872e-05, "loss": 0.6935377717018127, "step": 2095 }, { "epoch": 0.6129551103962567, "grad_norm": 1.3697721797296887, "learning_rate": 1.6618411793553455e-05, "loss": 0.6363199949264526, "step": 2096 }, { "epoch": 0.6132475508115222, "grad_norm": 1.6107434013725794, "learning_rate": 1.6614786710633525e-05, "loss": 0.7325713634490967, "step": 2097 }, { "epoch": 0.6135399912267876, "grad_norm": 1.3944095356365322, "learning_rate": 1.6611160081547414e-05, "loss": 0.5739182829856873, "step": 2098 }, { "epoch": 0.6138324316420529, "grad_norm": 1.4193388816384238, "learning_rate": 1.6607531907142835e-05, "loss": 0.611133873462677, "step": 2099 }, { "epoch": 0.6141248720573184, "grad_norm": 1.579788361702439, "learning_rate": 1.6603902188267842e-05, "loss": 0.6419532299041748, "step": 2100 }, { "epoch": 0.6144173124725837, "grad_norm": 1.482873128334509, "learning_rate": 1.660027092577087e-05, "loss": 0.7736743688583374, "step": 2101 }, { "epoch": 0.6147097528878491, "grad_norm": 1.199857125427724, "learning_rate": 1.6596638120500696e-05, "loss": 0.5249119400978088, "step": 2102 }, { "epoch": 0.6150021933031145, "grad_norm": 2.505852142425954, "learning_rate": 1.6593003773306475e-05, "loss": 0.7145636081695557, "step": 2103 }, { "epoch": 0.6152946337183799, "grad_norm": 1.3335089477583737, "learning_rate": 1.65893678850377e-05, "loss": 0.5807666182518005, "step": 2104 }, { "epoch": 0.6155870741336452, "grad_norm": 1.2437068513912055, "learning_rate": 1.6585730456544255e-05, "loss": 0.5049663782119751, "step": 2105 }, { "epoch": 0.6158795145489107, "grad_norm": 1.4826397888996732, "learning_rate": 1.658209148867635e-05, "loss": 0.6744092702865601, "step": 2106 }, { "epoch": 0.6161719549641761, "grad_norm": 1.4821897923446594, "learning_rate": 1.6578450982284584e-05, "loss": 0.605404794216156, "step": 2107 }, { "epoch": 0.6164643953794414, "grad_norm": 1.1917544416711534, "learning_rate": 1.6574808938219894e-05, "loss": 0.6074866056442261, "step": 2108 }, { "epoch": 0.6167568357947069, "grad_norm": 1.284543555588908, "learning_rate": 1.6571165357333594e-05, "loss": 0.6758207082748413, "step": 2109 }, { "epoch": 0.6170492762099722, "grad_norm": 1.580962080275822, "learning_rate": 1.6567520240477344e-05, "loss": 0.7669274806976318, "step": 2110 }, { "epoch": 0.6173417166252376, "grad_norm": 1.3997913559025885, "learning_rate": 1.6563873588503173e-05, "loss": 0.497562050819397, "step": 2111 }, { "epoch": 0.617634157040503, "grad_norm": 1.6655652024231358, "learning_rate": 1.656022540226345e-05, "loss": 0.6398104429244995, "step": 2112 }, { "epoch": 0.6179265974557684, "grad_norm": 1.4155810596985208, "learning_rate": 1.6556575682610935e-05, "loss": 0.6739988327026367, "step": 2113 }, { "epoch": 0.6182190378710338, "grad_norm": 1.3164921836609038, "learning_rate": 1.6552924430398716e-05, "loss": 0.5710165500640869, "step": 2114 }, { "epoch": 0.6185114782862992, "grad_norm": 1.1567442833736337, "learning_rate": 1.6549271646480253e-05, "loss": 0.6087738871574402, "step": 2115 }, { "epoch": 0.6188039187015646, "grad_norm": 1.1877649418617353, "learning_rate": 1.6545617331709364e-05, "loss": 0.5300824642181396, "step": 2116 }, { "epoch": 0.6190963591168299, "grad_norm": 1.3759503189909044, "learning_rate": 1.6541961486940222e-05, "loss": 0.7384774684906006, "step": 2117 }, { "epoch": 0.6193887995320954, "grad_norm": 1.1608035895573054, "learning_rate": 1.6538304113027356e-05, "loss": 0.5867838263511658, "step": 2118 }, { "epoch": 0.6196812399473607, "grad_norm": 1.4435135524238625, "learning_rate": 1.653464521082566e-05, "loss": 0.617068886756897, "step": 2119 }, { "epoch": 0.6199736803626261, "grad_norm": 1.2420433862943483, "learning_rate": 1.6530984781190374e-05, "loss": 0.7316439151763916, "step": 2120 }, { "epoch": 0.6202661207778914, "grad_norm": 1.3153827472233475, "learning_rate": 1.6527322824977104e-05, "loss": 0.5469995737075806, "step": 2121 }, { "epoch": 0.6205585611931569, "grad_norm": 1.4608354678316708, "learning_rate": 1.6523659343041815e-05, "loss": 0.6577411890029907, "step": 2122 }, { "epoch": 0.6208510016084223, "grad_norm": 1.5130442860821829, "learning_rate": 1.6519994336240816e-05, "loss": 0.7425049543380737, "step": 2123 }, { "epoch": 0.6211434420236877, "grad_norm": 1.7408354143028393, "learning_rate": 1.6516327805430785e-05, "loss": 0.7894090414047241, "step": 2124 }, { "epoch": 0.6214358824389531, "grad_norm": 1.2267269656084083, "learning_rate": 1.651265975146875e-05, "loss": 0.5739543437957764, "step": 2125 }, { "epoch": 0.6217283228542184, "grad_norm": 1.2973694692382243, "learning_rate": 1.6508990175212092e-05, "loss": 0.6987308263778687, "step": 2126 }, { "epoch": 0.6220207632694839, "grad_norm": 1.237403110571432, "learning_rate": 1.650531907751856e-05, "loss": 0.5956544280052185, "step": 2127 }, { "epoch": 0.6223132036847492, "grad_norm": 1.3646659152675398, "learning_rate": 1.6501646459246245e-05, "loss": 0.582348108291626, "step": 2128 }, { "epoch": 0.6226056441000146, "grad_norm": 1.327256978138479, "learning_rate": 1.64979723212536e-05, "loss": 0.8057917356491089, "step": 2129 }, { "epoch": 0.6228980845152801, "grad_norm": 1.1623408864017983, "learning_rate": 1.6494296664399428e-05, "loss": 0.6237305402755737, "step": 2130 }, { "epoch": 0.6231905249305454, "grad_norm": 1.3152067943219485, "learning_rate": 1.6490619489542905e-05, "loss": 0.6445767879486084, "step": 2131 }, { "epoch": 0.6234829653458108, "grad_norm": 1.4611569228302668, "learning_rate": 1.648694079754354e-05, "loss": 0.6397994160652161, "step": 2132 }, { "epoch": 0.6237754057610762, "grad_norm": 1.3955823025243248, "learning_rate": 1.64832605892612e-05, "loss": 0.8216533660888672, "step": 2133 }, { "epoch": 0.6240678461763416, "grad_norm": 1.3134524569329014, "learning_rate": 1.6479578865556115e-05, "loss": 0.6894406080245972, "step": 2134 }, { "epoch": 0.6243602865916069, "grad_norm": 1.2940264658828888, "learning_rate": 1.6475895627288873e-05, "loss": 0.6608946323394775, "step": 2135 }, { "epoch": 0.6246527270068724, "grad_norm": 1.4094544295935185, "learning_rate": 1.6472210875320397e-05, "loss": 0.6070076823234558, "step": 2136 }, { "epoch": 0.6249451674221378, "grad_norm": 1.4359082412623407, "learning_rate": 1.6468524610511982e-05, "loss": 0.7357348799705505, "step": 2137 }, { "epoch": 0.6252376078374031, "grad_norm": 1.201965871501085, "learning_rate": 1.6464836833725267e-05, "loss": 0.5959880352020264, "step": 2138 }, { "epoch": 0.6255300482526686, "grad_norm": 1.3046810888024383, "learning_rate": 1.646114754582225e-05, "loss": 0.7812649011611938, "step": 2139 }, { "epoch": 0.6258224886679339, "grad_norm": 1.6609760293820528, "learning_rate": 1.6457456747665282e-05, "loss": 0.5985091924667358, "step": 2140 }, { "epoch": 0.6261149290831993, "grad_norm": 1.5609316045902142, "learning_rate": 1.645376444011706e-05, "loss": 0.6610564589500427, "step": 2141 }, { "epoch": 0.6264073694984647, "grad_norm": 1.3917319855245425, "learning_rate": 1.6450070624040636e-05, "loss": 0.6876299381256104, "step": 2142 }, { "epoch": 0.6266998099137301, "grad_norm": 1.3567193814213938, "learning_rate": 1.6446375300299425e-05, "loss": 0.6715782284736633, "step": 2143 }, { "epoch": 0.6269922503289954, "grad_norm": 1.6061237563072754, "learning_rate": 1.644267846975718e-05, "loss": 0.6066923141479492, "step": 2144 }, { "epoch": 0.6272846907442609, "grad_norm": 1.2493532553829008, "learning_rate": 1.6438980133278017e-05, "loss": 0.5642968416213989, "step": 2145 }, { "epoch": 0.6275771311595263, "grad_norm": 1.0703284322753808, "learning_rate": 1.6435280291726394e-05, "loss": 0.604590654373169, "step": 2146 }, { "epoch": 0.6278695715747916, "grad_norm": 1.3292746736885825, "learning_rate": 1.643157894596713e-05, "loss": 0.6313889026641846, "step": 2147 }, { "epoch": 0.6281620119900571, "grad_norm": 1.0767305616181233, "learning_rate": 1.6427876096865394e-05, "loss": 0.5084092617034912, "step": 2148 }, { "epoch": 0.6284544524053224, "grad_norm": 1.250433663172197, "learning_rate": 1.6424171745286704e-05, "loss": 0.5191931128501892, "step": 2149 }, { "epoch": 0.6287468928205878, "grad_norm": 1.3567625810681667, "learning_rate": 1.6420465892096924e-05, "loss": 0.7397615909576416, "step": 2150 }, { "epoch": 0.6290393332358531, "grad_norm": 1.1359315638082286, "learning_rate": 1.641675853816228e-05, "loss": 0.622586727142334, "step": 2151 }, { "epoch": 0.6293317736511186, "grad_norm": 1.433028642480203, "learning_rate": 1.6413049684349344e-05, "loss": 0.7894928455352783, "step": 2152 }, { "epoch": 0.629624214066384, "grad_norm": 1.4395392231763253, "learning_rate": 1.640933933152504e-05, "loss": 0.5752773284912109, "step": 2153 }, { "epoch": 0.6299166544816494, "grad_norm": 1.3952520818076775, "learning_rate": 1.640562748055663e-05, "loss": 0.6738473176956177, "step": 2154 }, { "epoch": 0.6302090948969148, "grad_norm": 1.2597002399242925, "learning_rate": 1.6401914132311745e-05, "loss": 0.5789517164230347, "step": 2155 }, { "epoch": 0.6305015353121801, "grad_norm": 1.2840904364476742, "learning_rate": 1.6398199287658358e-05, "loss": 0.5925524830818176, "step": 2156 }, { "epoch": 0.6307939757274456, "grad_norm": 1.4374336859820211, "learning_rate": 1.6394482947464784e-05, "loss": 0.6949414610862732, "step": 2157 }, { "epoch": 0.6310864161427109, "grad_norm": 1.3617313094593515, "learning_rate": 1.6390765112599705e-05, "loss": 0.7435301542282104, "step": 2158 }, { "epoch": 0.6313788565579763, "grad_norm": 1.5109256996682827, "learning_rate": 1.6387045783932137e-05, "loss": 0.6931856274604797, "step": 2159 }, { "epoch": 0.6316712969732416, "grad_norm": 1.4369843702380298, "learning_rate": 1.638332496233145e-05, "loss": 0.7856471538543701, "step": 2160 }, { "epoch": 0.6319637373885071, "grad_norm": 1.460850634730034, "learning_rate": 1.6379602648667362e-05, "loss": 0.6299946308135986, "step": 2161 }, { "epoch": 0.6322561778037725, "grad_norm": 1.5299113211206812, "learning_rate": 1.6375878843809946e-05, "loss": 0.6209328174591064, "step": 2162 }, { "epoch": 0.6325486182190379, "grad_norm": 1.4269696757613273, "learning_rate": 1.6372153548629617e-05, "loss": 0.6498390436172485, "step": 2163 }, { "epoch": 0.6328410586343033, "grad_norm": 2.1028833494160573, "learning_rate": 1.6368426763997137e-05, "loss": 0.6757122278213501, "step": 2164 }, { "epoch": 0.6331334990495686, "grad_norm": 1.289589419762841, "learning_rate": 1.6364698490783623e-05, "loss": 0.5137026906013489, "step": 2165 }, { "epoch": 0.633425939464834, "grad_norm": 1.3914324771074273, "learning_rate": 1.6360968729860536e-05, "loss": 0.5876519680023193, "step": 2166 }, { "epoch": 0.6337183798800994, "grad_norm": 1.2533286000898018, "learning_rate": 1.6357237482099682e-05, "loss": 0.5804057717323303, "step": 2167 }, { "epoch": 0.6340108202953648, "grad_norm": 1.361440329822907, "learning_rate": 1.635350474837322e-05, "loss": 0.6186444759368896, "step": 2168 }, { "epoch": 0.6343032607106303, "grad_norm": 1.4479908785794617, "learning_rate": 1.6349770529553654e-05, "loss": 0.6358560919761658, "step": 2169 }, { "epoch": 0.6345957011258956, "grad_norm": 1.2507636068938528, "learning_rate": 1.6346034826513834e-05, "loss": 0.64283686876297, "step": 2170 }, { "epoch": 0.634888141541161, "grad_norm": 1.3854516647796151, "learning_rate": 1.6342297640126955e-05, "loss": 0.5269169807434082, "step": 2171 }, { "epoch": 0.6351805819564263, "grad_norm": 1.3233372829927026, "learning_rate": 1.6338558971266563e-05, "loss": 0.5338561534881592, "step": 2172 }, { "epoch": 0.6354730223716918, "grad_norm": 1.365606957045604, "learning_rate": 1.6334818820806555e-05, "loss": 0.5587184429168701, "step": 2173 }, { "epoch": 0.6357654627869571, "grad_norm": 1.2288709810094502, "learning_rate": 1.633107718962116e-05, "loss": 0.6468764543533325, "step": 2174 }, { "epoch": 0.6360579032022226, "grad_norm": 1.4431243955955453, "learning_rate": 1.6327334078584967e-05, "loss": 0.7305203676223755, "step": 2175 }, { "epoch": 0.636350343617488, "grad_norm": 1.3207763162749322, "learning_rate": 1.6323589488572908e-05, "loss": 0.6226189136505127, "step": 2176 }, { "epoch": 0.6366427840327533, "grad_norm": 1.4828987038724675, "learning_rate": 1.631984342046025e-05, "loss": 0.6552053093910217, "step": 2177 }, { "epoch": 0.6369352244480188, "grad_norm": 1.6836072588979352, "learning_rate": 1.6316095875122617e-05, "loss": 0.8121978044509888, "step": 2178 }, { "epoch": 0.6372276648632841, "grad_norm": 1.3359221660901908, "learning_rate": 1.6312346853435976e-05, "loss": 0.5826296806335449, "step": 2179 }, { "epoch": 0.6375201052785495, "grad_norm": 1.3567795832303162, "learning_rate": 1.630859635627664e-05, "loss": 0.5862709283828735, "step": 2180 }, { "epoch": 0.6378125456938148, "grad_norm": 1.2132204868801326, "learning_rate": 1.6304844384521263e-05, "loss": 0.7081524133682251, "step": 2181 }, { "epoch": 0.6381049861090803, "grad_norm": 1.2359384159808198, "learning_rate": 1.6301090939046843e-05, "loss": 0.6394449472427368, "step": 2182 }, { "epoch": 0.6383974265243456, "grad_norm": 1.25131780401235, "learning_rate": 1.6297336020730727e-05, "loss": 0.6184799075126648, "step": 2183 }, { "epoch": 0.638689866939611, "grad_norm": 1.3090426226978378, "learning_rate": 1.6293579630450606e-05, "loss": 0.6877666711807251, "step": 2184 }, { "epoch": 0.6389823073548765, "grad_norm": 1.3648594367613462, "learning_rate": 1.6289821769084512e-05, "loss": 0.5596371293067932, "step": 2185 }, { "epoch": 0.6392747477701418, "grad_norm": 1.1779148594123119, "learning_rate": 1.6286062437510823e-05, "loss": 0.5378291010856628, "step": 2186 }, { "epoch": 0.6395671881854073, "grad_norm": 1.2132664638530417, "learning_rate": 1.6282301636608256e-05, "loss": 0.6965627670288086, "step": 2187 }, { "epoch": 0.6398596286006726, "grad_norm": 1.3017112466193883, "learning_rate": 1.6278539367255885e-05, "loss": 0.5939220190048218, "step": 2188 }, { "epoch": 0.640152069015938, "grad_norm": 1.3743138396251577, "learning_rate": 1.6274775630333104e-05, "loss": 0.6225341558456421, "step": 2189 }, { "epoch": 0.6404445094312033, "grad_norm": 1.103061387587319, "learning_rate": 1.6271010426719672e-05, "loss": 0.471333384513855, "step": 2190 }, { "epoch": 0.6407369498464688, "grad_norm": 1.3505910885858836, "learning_rate": 1.626724375729568e-05, "loss": 0.6066263914108276, "step": 2191 }, { "epoch": 0.6410293902617342, "grad_norm": 1.2842885881869934, "learning_rate": 1.626347562294157e-05, "loss": 0.6525982618331909, "step": 2192 }, { "epoch": 0.6413218306769995, "grad_norm": 1.375624970339684, "learning_rate": 1.6259706024538113e-05, "loss": 0.7395817041397095, "step": 2193 }, { "epoch": 0.641614271092265, "grad_norm": 1.326045982489242, "learning_rate": 1.6255934962966432e-05, "loss": 0.720014214515686, "step": 2194 }, { "epoch": 0.6419067115075303, "grad_norm": 1.4102074363113735, "learning_rate": 1.625216243910799e-05, "loss": 0.6905295252799988, "step": 2195 }, { "epoch": 0.6421991519227958, "grad_norm": 1.3533501829991437, "learning_rate": 1.6248388453844596e-05, "loss": 0.6877295970916748, "step": 2196 }, { "epoch": 0.6424915923380611, "grad_norm": 1.414790050061214, "learning_rate": 1.6244613008058386e-05, "loss": 0.5782181024551392, "step": 2197 }, { "epoch": 0.6427840327533265, "grad_norm": 1.2129092557671588, "learning_rate": 1.6240836102631856e-05, "loss": 0.5253425240516663, "step": 2198 }, { "epoch": 0.6430764731685918, "grad_norm": 1.2461747547364295, "learning_rate": 1.623705773844783e-05, "loss": 0.6631319522857666, "step": 2199 }, { "epoch": 0.6433689135838573, "grad_norm": 1.6130890971192966, "learning_rate": 1.6233277916389482e-05, "loss": 0.6458526849746704, "step": 2200 }, { "epoch": 0.6436613539991227, "grad_norm": 1.5712729506149452, "learning_rate": 1.622949663734032e-05, "loss": 0.5723023414611816, "step": 2201 }, { "epoch": 0.643953794414388, "grad_norm": 1.4119455791937807, "learning_rate": 1.6225713902184193e-05, "loss": 0.6852096319198608, "step": 2202 }, { "epoch": 0.6442462348296535, "grad_norm": 1.460558869527006, "learning_rate": 1.6221929711805297e-05, "loss": 0.6343507170677185, "step": 2203 }, { "epoch": 0.6445386752449188, "grad_norm": 1.217897103510346, "learning_rate": 1.6218144067088157e-05, "loss": 0.6378631591796875, "step": 2204 }, { "epoch": 0.6448311156601843, "grad_norm": 1.1203441428966674, "learning_rate": 1.621435696891765e-05, "loss": 0.6550023555755615, "step": 2205 }, { "epoch": 0.6451235560754496, "grad_norm": 1.3522778560223117, "learning_rate": 1.6210568418178983e-05, "loss": 0.5555052757263184, "step": 2206 }, { "epoch": 0.645415996490715, "grad_norm": 1.330819772406298, "learning_rate": 1.6206778415757715e-05, "loss": 0.7171934247016907, "step": 2207 }, { "epoch": 0.6457084369059805, "grad_norm": 1.2953726655501339, "learning_rate": 1.6202986962539726e-05, "loss": 0.6464889049530029, "step": 2208 }, { "epoch": 0.6460008773212458, "grad_norm": 1.5324773487302452, "learning_rate": 1.619919405941125e-05, "loss": 0.6316033601760864, "step": 2209 }, { "epoch": 0.6462933177365112, "grad_norm": 1.2083095479015487, "learning_rate": 1.6195399707258855e-05, "loss": 0.5548732876777649, "step": 2210 }, { "epoch": 0.6465857581517765, "grad_norm": 1.088879983740594, "learning_rate": 1.6191603906969447e-05, "loss": 0.5055203437805176, "step": 2211 }, { "epoch": 0.646878198567042, "grad_norm": 1.3416079726495937, "learning_rate": 1.6187806659430268e-05, "loss": 0.7010073661804199, "step": 2212 }, { "epoch": 0.6471706389823073, "grad_norm": 1.39696751963916, "learning_rate": 1.6184007965528908e-05, "loss": 0.6188487410545349, "step": 2213 }, { "epoch": 0.6474630793975727, "grad_norm": 1.1122504211535682, "learning_rate": 1.6180207826153284e-05, "loss": 0.46920153498649597, "step": 2214 }, { "epoch": 0.6477555198128382, "grad_norm": 1.1420938414191775, "learning_rate": 1.617640624219166e-05, "loss": 0.6811172962188721, "step": 2215 }, { "epoch": 0.6480479602281035, "grad_norm": 1.456471656413964, "learning_rate": 1.617260321453263e-05, "loss": 0.6425800323486328, "step": 2216 }, { "epoch": 0.648340400643369, "grad_norm": 1.5968265799871777, "learning_rate": 1.6168798744065123e-05, "loss": 0.7020897269248962, "step": 2217 }, { "epoch": 0.6486328410586343, "grad_norm": 1.1227944263783516, "learning_rate": 1.6164992831678422e-05, "loss": 0.5872179865837097, "step": 2218 }, { "epoch": 0.6489252814738997, "grad_norm": 1.6374275819992907, "learning_rate": 1.6161185478262127e-05, "loss": 0.7414118647575378, "step": 2219 }, { "epoch": 0.649217721889165, "grad_norm": 1.2707285395428818, "learning_rate": 1.615737668470619e-05, "loss": 0.5408385396003723, "step": 2220 }, { "epoch": 0.6495101623044305, "grad_norm": 1.2587309097221344, "learning_rate": 1.6153566451900887e-05, "loss": 0.6145513653755188, "step": 2221 }, { "epoch": 0.6498026027196958, "grad_norm": 1.1746181148032837, "learning_rate": 1.6149754780736847e-05, "loss": 0.556422233581543, "step": 2222 }, { "epoch": 0.6500950431349612, "grad_norm": 1.4903419319059785, "learning_rate": 1.614594167210501e-05, "loss": 0.7155405282974243, "step": 2223 }, { "epoch": 0.6503874835502267, "grad_norm": 1.2945043385192228, "learning_rate": 1.6142127126896682e-05, "loss": 0.4988427758216858, "step": 2224 }, { "epoch": 0.650679923965492, "grad_norm": 1.3962995233264988, "learning_rate": 1.6138311146003477e-05, "loss": 0.6187007427215576, "step": 2225 }, { "epoch": 0.6509723643807575, "grad_norm": 1.329312474096709, "learning_rate": 1.6134493730317364e-05, "loss": 0.5668798685073853, "step": 2226 }, { "epoch": 0.6512648047960228, "grad_norm": 1.2528148742640925, "learning_rate": 1.6130674880730642e-05, "loss": 0.6354215145111084, "step": 2227 }, { "epoch": 0.6515572452112882, "grad_norm": 1.3738601794334195, "learning_rate": 1.612685459813594e-05, "loss": 0.5409573912620544, "step": 2228 }, { "epoch": 0.6518496856265535, "grad_norm": 1.24582725943008, "learning_rate": 1.612303288342623e-05, "loss": 0.5622435808181763, "step": 2229 }, { "epoch": 0.652142126041819, "grad_norm": 1.3303126336426627, "learning_rate": 1.6119209737494814e-05, "loss": 0.786159873008728, "step": 2230 }, { "epoch": 0.6524345664570844, "grad_norm": 1.3038971892359654, "learning_rate": 1.611538516123532e-05, "loss": 0.6359272003173828, "step": 2231 }, { "epoch": 0.6527270068723497, "grad_norm": 1.2508619512631416, "learning_rate": 1.6111559155541732e-05, "loss": 0.5688974261283875, "step": 2232 }, { "epoch": 0.6530194472876152, "grad_norm": 1.1877745994435736, "learning_rate": 1.610773172130835e-05, "loss": 0.581497311592102, "step": 2233 }, { "epoch": 0.6533118877028805, "grad_norm": 1.6577687870030173, "learning_rate": 1.6103902859429812e-05, "loss": 0.674004316329956, "step": 2234 }, { "epoch": 0.653604328118146, "grad_norm": 1.4167456148188138, "learning_rate": 1.6100072570801092e-05, "loss": 0.6798728108406067, "step": 2235 }, { "epoch": 0.6538967685334113, "grad_norm": 1.245467514643811, "learning_rate": 1.60962408563175e-05, "loss": 0.5742023587226868, "step": 2236 }, { "epoch": 0.6541892089486767, "grad_norm": 1.1993067492933944, "learning_rate": 1.6092407716874674e-05, "loss": 0.470009446144104, "step": 2237 }, { "epoch": 0.654481649363942, "grad_norm": 1.3725626324774514, "learning_rate": 1.6088573153368586e-05, "loss": 0.8113270998001099, "step": 2238 }, { "epoch": 0.6547740897792075, "grad_norm": 1.4825942391015299, "learning_rate": 1.6084737166695542e-05, "loss": 0.7737559676170349, "step": 2239 }, { "epoch": 0.6550665301944729, "grad_norm": 1.5932921988768602, "learning_rate": 1.6080899757752183e-05, "loss": 0.6499667167663574, "step": 2240 }, { "epoch": 0.6553589706097382, "grad_norm": 1.5295213411109583, "learning_rate": 1.6077060927435476e-05, "loss": 0.6898500323295593, "step": 2241 }, { "epoch": 0.6556514110250037, "grad_norm": 1.264521733401818, "learning_rate": 1.6073220676642724e-05, "loss": 0.5933262705802917, "step": 2242 }, { "epoch": 0.655943851440269, "grad_norm": 1.6150723182894215, "learning_rate": 1.606937900627157e-05, "loss": 0.6566172242164612, "step": 2243 }, { "epoch": 0.6562362918555344, "grad_norm": 1.5267009306631556, "learning_rate": 1.606553591721997e-05, "loss": 0.6955286264419556, "step": 2244 }, { "epoch": 0.6565287322707998, "grad_norm": 1.2904648803296817, "learning_rate": 1.6061691410386234e-05, "loss": 0.6905182600021362, "step": 2245 }, { "epoch": 0.6568211726860652, "grad_norm": 1.3780634556903595, "learning_rate": 1.6057845486668984e-05, "loss": 0.6733677387237549, "step": 2246 }, { "epoch": 0.6571136131013307, "grad_norm": 1.2340466884298544, "learning_rate": 1.6053998146967186e-05, "loss": 0.5368545055389404, "step": 2247 }, { "epoch": 0.657406053516596, "grad_norm": 1.4627351725055429, "learning_rate": 1.6050149392180125e-05, "loss": 0.6995619535446167, "step": 2248 }, { "epoch": 0.6576984939318614, "grad_norm": 1.2552392614352392, "learning_rate": 1.6046299223207432e-05, "loss": 0.6637085676193237, "step": 2249 }, { "epoch": 0.6579909343471267, "grad_norm": 1.3894808498189977, "learning_rate": 1.6042447640949058e-05, "loss": 0.5834380388259888, "step": 2250 }, { "epoch": 0.6582833747623922, "grad_norm": 1.1700440243092598, "learning_rate": 1.6038594646305285e-05, "loss": 0.5735288858413696, "step": 2251 }, { "epoch": 0.6585758151776575, "grad_norm": 1.274727070163542, "learning_rate": 1.6034740240176728e-05, "loss": 0.6227413415908813, "step": 2252 }, { "epoch": 0.658868255592923, "grad_norm": 1.5091805441488135, "learning_rate": 1.6030884423464336e-05, "loss": 0.6881246566772461, "step": 2253 }, { "epoch": 0.6591606960081884, "grad_norm": 1.3237201049051734, "learning_rate": 1.6027027197069376e-05, "loss": 0.6059132814407349, "step": 2254 }, { "epoch": 0.6594531364234537, "grad_norm": 1.5070949945133527, "learning_rate": 1.6023168561893453e-05, "loss": 0.5829097032546997, "step": 2255 }, { "epoch": 0.6597455768387191, "grad_norm": 1.1821076640408643, "learning_rate": 1.60193085188385e-05, "loss": 0.5173588991165161, "step": 2256 }, { "epoch": 0.6600380172539845, "grad_norm": 1.0404057140160172, "learning_rate": 1.601544706880678e-05, "loss": 0.5128534436225891, "step": 2257 }, { "epoch": 0.6603304576692499, "grad_norm": 1.4274902732235735, "learning_rate": 1.601158421270088e-05, "loss": 0.5472848415374756, "step": 2258 }, { "epoch": 0.6606228980845152, "grad_norm": 1.2505155913554076, "learning_rate": 1.6007719951423725e-05, "loss": 0.5775434970855713, "step": 2259 }, { "epoch": 0.6609153384997807, "grad_norm": 1.2760490287043558, "learning_rate": 1.6003854285878558e-05, "loss": 0.5529654622077942, "step": 2260 }, { "epoch": 0.661207778915046, "grad_norm": 1.2950239037035343, "learning_rate": 1.5999987216968954e-05, "loss": 0.5295222997665405, "step": 2261 }, { "epoch": 0.6615002193303114, "grad_norm": 1.42880093351922, "learning_rate": 1.5996118745598817e-05, "loss": 0.6782759428024292, "step": 2262 }, { "epoch": 0.6617926597455769, "grad_norm": 1.5123560217291456, "learning_rate": 1.5992248872672384e-05, "loss": 0.7698723077774048, "step": 2263 }, { "epoch": 0.6620851001608422, "grad_norm": 1.224014553870767, "learning_rate": 1.5988377599094208e-05, "loss": 0.5056325793266296, "step": 2264 }, { "epoch": 0.6623775405761076, "grad_norm": 1.2811286417806291, "learning_rate": 1.598450492576918e-05, "loss": 0.6748740673065186, "step": 2265 }, { "epoch": 0.662669980991373, "grad_norm": 1.4413699029522251, "learning_rate": 1.598063085360251e-05, "loss": 0.6594111919403076, "step": 2266 }, { "epoch": 0.6629624214066384, "grad_norm": 1.490546706478741, "learning_rate": 1.5976755383499743e-05, "loss": 0.5942472815513611, "step": 2267 }, { "epoch": 0.6632548618219037, "grad_norm": 1.4166382340274284, "learning_rate": 1.5972878516366742e-05, "loss": 0.6956725120544434, "step": 2268 }, { "epoch": 0.6635473022371692, "grad_norm": 1.5479108671282409, "learning_rate": 1.5969000253109707e-05, "loss": 0.6743103265762329, "step": 2269 }, { "epoch": 0.6638397426524346, "grad_norm": 1.2415014970437994, "learning_rate": 1.596512059463515e-05, "loss": 0.5452187061309814, "step": 2270 }, { "epoch": 0.6641321830676999, "grad_norm": 1.305856048148522, "learning_rate": 1.5961239541849923e-05, "loss": 0.6064754128456116, "step": 2271 }, { "epoch": 0.6644246234829654, "grad_norm": 1.1672873660489786, "learning_rate": 1.59573570956612e-05, "loss": 0.5879498720169067, "step": 2272 }, { "epoch": 0.6647170638982307, "grad_norm": 1.2464190562799757, "learning_rate": 1.595347325697648e-05, "loss": 0.6610721945762634, "step": 2273 }, { "epoch": 0.6650095043134961, "grad_norm": 1.5001752360693776, "learning_rate": 1.594958802670358e-05, "loss": 0.6674839854240417, "step": 2274 }, { "epoch": 0.6653019447287615, "grad_norm": 1.2669024802691538, "learning_rate": 1.5945701405750654e-05, "loss": 0.5189186334609985, "step": 2275 }, { "epoch": 0.6655943851440269, "grad_norm": 1.096047033017533, "learning_rate": 1.5941813395026174e-05, "loss": 0.5225304365158081, "step": 2276 }, { "epoch": 0.6658868255592922, "grad_norm": 1.1982797539630743, "learning_rate": 1.5937923995438942e-05, "loss": 0.5426747798919678, "step": 2277 }, { "epoch": 0.6661792659745577, "grad_norm": 1.1331316680397499, "learning_rate": 1.593403320789808e-05, "loss": 0.6408158540725708, "step": 2278 }, { "epoch": 0.6664717063898231, "grad_norm": 1.2777185085969938, "learning_rate": 1.5930141033313034e-05, "loss": 0.6213311553001404, "step": 2279 }, { "epoch": 0.6667641468050884, "grad_norm": 1.2938845863415658, "learning_rate": 1.5926247472593575e-05, "loss": 0.6538233757019043, "step": 2280 }, { "epoch": 0.6670565872203539, "grad_norm": 1.4396815547692279, "learning_rate": 1.5922352526649803e-05, "loss": 0.6714701056480408, "step": 2281 }, { "epoch": 0.6673490276356192, "grad_norm": 1.2875131974555427, "learning_rate": 1.5918456196392137e-05, "loss": 0.501068115234375, "step": 2282 }, { "epoch": 0.6676414680508846, "grad_norm": 1.483722651200639, "learning_rate": 1.5914558482731317e-05, "loss": 0.6551339626312256, "step": 2283 }, { "epoch": 0.66793390846615, "grad_norm": 1.575561891265528, "learning_rate": 1.5910659386578415e-05, "loss": 0.666611909866333, "step": 2284 }, { "epoch": 0.6682263488814154, "grad_norm": 1.3058077151253007, "learning_rate": 1.590675890884482e-05, "loss": 0.6612483859062195, "step": 2285 }, { "epoch": 0.6685187892966808, "grad_norm": 1.535602248808955, "learning_rate": 1.590285705044224e-05, "loss": 0.5299272537231445, "step": 2286 }, { "epoch": 0.6688112297119462, "grad_norm": 1.5209550044520355, "learning_rate": 1.589895381228272e-05, "loss": 0.6873815655708313, "step": 2287 }, { "epoch": 0.6691036701272116, "grad_norm": 1.333463107294571, "learning_rate": 1.5895049195278608e-05, "loss": 0.6473613977432251, "step": 2288 }, { "epoch": 0.6693961105424769, "grad_norm": 1.4389212790848083, "learning_rate": 1.589114320034259e-05, "loss": 0.6600902080535889, "step": 2289 }, { "epoch": 0.6696885509577424, "grad_norm": 1.7581559017014303, "learning_rate": 1.5887235828387667e-05, "loss": 0.6066039800643921, "step": 2290 }, { "epoch": 0.6699809913730077, "grad_norm": 1.2475073124572584, "learning_rate": 1.5883327080327165e-05, "loss": 0.5411461591720581, "step": 2291 }, { "epoch": 0.6702734317882731, "grad_norm": 1.3264098990068387, "learning_rate": 1.587941695707473e-05, "loss": 0.5678138136863708, "step": 2292 }, { "epoch": 0.6705658722035386, "grad_norm": 1.2017893940389541, "learning_rate": 1.5875505459544327e-05, "loss": 0.6175323724746704, "step": 2293 }, { "epoch": 0.6708583126188039, "grad_norm": 1.2255154092981597, "learning_rate": 1.587159258865025e-05, "loss": 0.5790976285934448, "step": 2294 }, { "epoch": 0.6711507530340693, "grad_norm": 1.4070059880127774, "learning_rate": 1.58676783453071e-05, "loss": 0.5891247391700745, "step": 2295 }, { "epoch": 0.6714431934493347, "grad_norm": 1.3680740765730994, "learning_rate": 1.5863762730429817e-05, "loss": 0.5604299902915955, "step": 2296 }, { "epoch": 0.6717356338646001, "grad_norm": 1.156075846793115, "learning_rate": 1.585984574493365e-05, "loss": 0.5402317047119141, "step": 2297 }, { "epoch": 0.6720280742798654, "grad_norm": 1.2729484704762741, "learning_rate": 1.5855927389734163e-05, "loss": 0.5569097995758057, "step": 2298 }, { "epoch": 0.6723205146951309, "grad_norm": 1.792109537125727, "learning_rate": 1.5852007665747255e-05, "loss": 0.6754734516143799, "step": 2299 }, { "epoch": 0.6726129551103962, "grad_norm": 1.2015482502693244, "learning_rate": 1.584808657388914e-05, "loss": 0.5555064678192139, "step": 2300 }, { "epoch": 0.6729053955256616, "grad_norm": 1.2978798977032824, "learning_rate": 1.584416411507634e-05, "loss": 0.5735480785369873, "step": 2301 }, { "epoch": 0.6731978359409271, "grad_norm": 1.3948021707686127, "learning_rate": 1.5840240290225713e-05, "loss": 0.6084697842597961, "step": 2302 }, { "epoch": 0.6734902763561924, "grad_norm": 1.3972987341637648, "learning_rate": 1.5836315100254427e-05, "loss": 0.5747361779212952, "step": 2303 }, { "epoch": 0.6737827167714578, "grad_norm": 1.3042539657521541, "learning_rate": 1.583238854607997e-05, "loss": 0.6597394943237305, "step": 2304 }, { "epoch": 0.6740751571867232, "grad_norm": 1.2885200657030746, "learning_rate": 1.582846062862016e-05, "loss": 0.6054418087005615, "step": 2305 }, { "epoch": 0.6743675976019886, "grad_norm": 1.4670353156004656, "learning_rate": 1.5824531348793106e-05, "loss": 0.6897715330123901, "step": 2306 }, { "epoch": 0.6746600380172539, "grad_norm": 1.2379672312585208, "learning_rate": 1.5820600707517265e-05, "loss": 0.5438888072967529, "step": 2307 }, { "epoch": 0.6749524784325194, "grad_norm": 1.3511076823584265, "learning_rate": 1.5816668705711402e-05, "loss": 0.5139850378036499, "step": 2308 }, { "epoch": 0.6752449188477848, "grad_norm": 1.3878243291723096, "learning_rate": 1.5812735344294594e-05, "loss": 0.5970615744590759, "step": 2309 }, { "epoch": 0.6755373592630501, "grad_norm": 1.5290136714699685, "learning_rate": 1.580880062418624e-05, "loss": 0.6206730604171753, "step": 2310 }, { "epoch": 0.6758297996783156, "grad_norm": 1.5283867982171593, "learning_rate": 1.580486454630606e-05, "loss": 0.6545864939689636, "step": 2311 }, { "epoch": 0.6761222400935809, "grad_norm": 1.6726831788405112, "learning_rate": 1.5800927111574084e-05, "loss": 0.6284571290016174, "step": 2312 }, { "epoch": 0.6764146805088463, "grad_norm": 1.3062366838416066, "learning_rate": 1.5796988320910665e-05, "loss": 0.6662822365760803, "step": 2313 }, { "epoch": 0.6767071209241117, "grad_norm": 1.4857961720461585, "learning_rate": 1.5793048175236477e-05, "loss": 0.6952080130577087, "step": 2314 }, { "epoch": 0.6769995613393771, "grad_norm": 1.1527122349254486, "learning_rate": 1.5789106675472496e-05, "loss": 0.55562424659729, "step": 2315 }, { "epoch": 0.6772920017546424, "grad_norm": 1.417075363017466, "learning_rate": 1.578516382254003e-05, "loss": 0.696354866027832, "step": 2316 }, { "epoch": 0.6775844421699079, "grad_norm": 1.2481046919985836, "learning_rate": 1.5781219617360695e-05, "loss": 0.5764954686164856, "step": 2317 }, { "epoch": 0.6778768825851733, "grad_norm": 1.5617477082955222, "learning_rate": 1.577727406085642e-05, "loss": 0.6944533586502075, "step": 2318 }, { "epoch": 0.6781693230004386, "grad_norm": 1.5273473613933928, "learning_rate": 1.5773327153949465e-05, "loss": 0.5517882704734802, "step": 2319 }, { "epoch": 0.6784617634157041, "grad_norm": 1.3495609581159556, "learning_rate": 1.576937889756239e-05, "loss": 0.6151533126831055, "step": 2320 }, { "epoch": 0.6787542038309694, "grad_norm": 1.3729348393231853, "learning_rate": 1.5765429292618075e-05, "loss": 0.6221417784690857, "step": 2321 }, { "epoch": 0.6790466442462348, "grad_norm": 1.5561656408525308, "learning_rate": 1.576147834003972e-05, "loss": 0.6218827962875366, "step": 2322 }, { "epoch": 0.6793390846615002, "grad_norm": 1.2844085482190328, "learning_rate": 1.575752604075083e-05, "loss": 0.689696192741394, "step": 2323 }, { "epoch": 0.6796315250767656, "grad_norm": 1.459910366351317, "learning_rate": 1.5753572395675234e-05, "loss": 0.6457825899124146, "step": 2324 }, { "epoch": 0.679923965492031, "grad_norm": 1.660980107305809, "learning_rate": 1.5749617405737075e-05, "loss": 0.6261845827102661, "step": 2325 }, { "epoch": 0.6802164059072964, "grad_norm": 1.5113706854166593, "learning_rate": 1.5745661071860802e-05, "loss": 0.6631760597229004, "step": 2326 }, { "epoch": 0.6805088463225618, "grad_norm": 1.4700703601826162, "learning_rate": 1.574170339497119e-05, "loss": 0.6223125457763672, "step": 2327 }, { "epoch": 0.6808012867378271, "grad_norm": 1.4289384563362724, "learning_rate": 1.5737744375993318e-05, "loss": 0.5649152398109436, "step": 2328 }, { "epoch": 0.6810937271530926, "grad_norm": 1.3637036537520066, "learning_rate": 1.573378401585259e-05, "loss": 0.6822011470794678, "step": 2329 }, { "epoch": 0.6813861675683579, "grad_norm": 1.243454490323945, "learning_rate": 1.5729822315474704e-05, "loss": 0.4853206276893616, "step": 2330 }, { "epoch": 0.6816786079836233, "grad_norm": 1.3491879449563893, "learning_rate": 1.572585927578569e-05, "loss": 0.6410783529281616, "step": 2331 }, { "epoch": 0.6819710483988888, "grad_norm": 1.2349335330440738, "learning_rate": 1.572189489771189e-05, "loss": 0.607154369354248, "step": 2332 }, { "epoch": 0.6822634888141541, "grad_norm": 1.2303800918258645, "learning_rate": 1.571792918217994e-05, "loss": 0.5079061388969421, "step": 2333 }, { "epoch": 0.6825559292294195, "grad_norm": 1.355109139858454, "learning_rate": 1.5713962130116812e-05, "loss": 0.534178614616394, "step": 2334 }, { "epoch": 0.6828483696446849, "grad_norm": 1.099124567807314, "learning_rate": 1.5709993742449777e-05, "loss": 0.6172807812690735, "step": 2335 }, { "epoch": 0.6831408100599503, "grad_norm": 1.468863618054796, "learning_rate": 1.5706024020106425e-05, "loss": 0.6863975524902344, "step": 2336 }, { "epoch": 0.6834332504752156, "grad_norm": 1.3542187494807805, "learning_rate": 1.570205296401465e-05, "loss": 0.6314880847930908, "step": 2337 }, { "epoch": 0.6837256908904811, "grad_norm": 1.4888474767820694, "learning_rate": 1.5698080575102662e-05, "loss": 0.5420910120010376, "step": 2338 }, { "epoch": 0.6840181313057464, "grad_norm": 1.545548665208996, "learning_rate": 1.5694106854298988e-05, "loss": 0.6598352789878845, "step": 2339 }, { "epoch": 0.6843105717210118, "grad_norm": 1.1855737189309028, "learning_rate": 1.5690131802532454e-05, "loss": 0.49957770109176636, "step": 2340 }, { "epoch": 0.6846030121362773, "grad_norm": 1.3910703437631544, "learning_rate": 1.568615542073221e-05, "loss": 0.7217017412185669, "step": 2341 }, { "epoch": 0.6848954525515426, "grad_norm": 1.383168011584397, "learning_rate": 1.5682177709827705e-05, "loss": 0.5824606418609619, "step": 2342 }, { "epoch": 0.685187892966808, "grad_norm": 1.4861418668417947, "learning_rate": 1.567819867074871e-05, "loss": 0.5932704210281372, "step": 2343 }, { "epoch": 0.6854803333820734, "grad_norm": 1.1927307747773088, "learning_rate": 1.5674218304425304e-05, "loss": 0.6098836660385132, "step": 2344 }, { "epoch": 0.6857727737973388, "grad_norm": 1.3302018518433079, "learning_rate": 1.5670236611787865e-05, "loss": 0.5158270597457886, "step": 2345 }, { "epoch": 0.6860652142126041, "grad_norm": 1.431950758183516, "learning_rate": 1.5666253593767095e-05, "loss": 0.7840174436569214, "step": 2346 }, { "epoch": 0.6863576546278696, "grad_norm": 1.3462478651155303, "learning_rate": 1.5662269251294e-05, "loss": 0.5665150880813599, "step": 2347 }, { "epoch": 0.686650095043135, "grad_norm": 1.2308130347699304, "learning_rate": 1.5658283585299894e-05, "loss": 0.5801588296890259, "step": 2348 }, { "epoch": 0.6869425354584003, "grad_norm": 1.487298330014143, "learning_rate": 1.56542965967164e-05, "loss": 0.759188175201416, "step": 2349 }, { "epoch": 0.6872349758736658, "grad_norm": 1.5717076197736846, "learning_rate": 1.565030828647546e-05, "loss": 0.7182703018188477, "step": 2350 }, { "epoch": 0.6875274162889311, "grad_norm": 1.3681215378392677, "learning_rate": 1.564631865550931e-05, "loss": 0.7172018885612488, "step": 2351 }, { "epoch": 0.6878198567041965, "grad_norm": 1.3897042930637002, "learning_rate": 1.5642327704750502e-05, "loss": 0.5959519743919373, "step": 2352 }, { "epoch": 0.6881122971194619, "grad_norm": 1.3686338632915553, "learning_rate": 1.5638335435131902e-05, "loss": 0.5531836748123169, "step": 2353 }, { "epoch": 0.6884047375347273, "grad_norm": 1.2097339017222586, "learning_rate": 1.5634341847586676e-05, "loss": 0.672225296497345, "step": 2354 }, { "epoch": 0.6886971779499926, "grad_norm": 1.3740176007353215, "learning_rate": 1.5630346943048297e-05, "loss": 0.5721465349197388, "step": 2355 }, { "epoch": 0.6889896183652581, "grad_norm": 1.2416767467837069, "learning_rate": 1.5626350722450555e-05, "loss": 0.6357900500297546, "step": 2356 }, { "epoch": 0.6892820587805235, "grad_norm": 1.241847883566859, "learning_rate": 1.5622353186727542e-05, "loss": 0.6348878145217896, "step": 2357 }, { "epoch": 0.6895744991957888, "grad_norm": 1.390537638221337, "learning_rate": 1.5618354336813656e-05, "loss": 0.5473623275756836, "step": 2358 }, { "epoch": 0.6898669396110543, "grad_norm": 1.4299851255948683, "learning_rate": 1.5614354173643606e-05, "loss": 0.8284158706665039, "step": 2359 }, { "epoch": 0.6901593800263196, "grad_norm": 1.3561063303885135, "learning_rate": 1.5610352698152396e-05, "loss": 0.5915359854698181, "step": 2360 }, { "epoch": 0.690451820441585, "grad_norm": 1.434488423567872, "learning_rate": 1.560634991127536e-05, "loss": 0.6173555254936218, "step": 2361 }, { "epoch": 0.6907442608568504, "grad_norm": 1.2348756002421877, "learning_rate": 1.560234581394812e-05, "loss": 0.5551577806472778, "step": 2362 }, { "epoch": 0.6910367012721158, "grad_norm": 1.6912535037446208, "learning_rate": 1.559834040710661e-05, "loss": 0.7160264253616333, "step": 2363 }, { "epoch": 0.6913291416873812, "grad_norm": 1.4348139771874249, "learning_rate": 1.5594333691687062e-05, "loss": 0.5986248850822449, "step": 2364 }, { "epoch": 0.6916215821026466, "grad_norm": 1.6827348555719241, "learning_rate": 1.559032566862603e-05, "loss": 0.7347019910812378, "step": 2365 }, { "epoch": 0.691914022517912, "grad_norm": 1.1496166027771255, "learning_rate": 1.5586316338860363e-05, "loss": 0.502663791179657, "step": 2366 }, { "epoch": 0.6922064629331773, "grad_norm": 1.1610976211375774, "learning_rate": 1.558230570332722e-05, "loss": 0.5026617050170898, "step": 2367 }, { "epoch": 0.6924989033484428, "grad_norm": 1.3196703072069724, "learning_rate": 1.5578293762964057e-05, "loss": 0.6091101169586182, "step": 2368 }, { "epoch": 0.6927913437637081, "grad_norm": 1.1607138049044183, "learning_rate": 1.5574280518708645e-05, "loss": 0.6202579736709595, "step": 2369 }, { "epoch": 0.6930837841789735, "grad_norm": 1.3867301068189375, "learning_rate": 1.557026597149905e-05, "loss": 0.6532948017120361, "step": 2370 }, { "epoch": 0.693376224594239, "grad_norm": 1.2799465632685962, "learning_rate": 1.5566250122273658e-05, "loss": 0.6197448372840881, "step": 2371 }, { "epoch": 0.6936686650095043, "grad_norm": 1.330123548058068, "learning_rate": 1.556223297197114e-05, "loss": 0.6181553602218628, "step": 2372 }, { "epoch": 0.6939611054247697, "grad_norm": 1.3757625130132767, "learning_rate": 1.5558214521530482e-05, "loss": 0.6015427112579346, "step": 2373 }, { "epoch": 0.6942535458400351, "grad_norm": 1.4511778478720454, "learning_rate": 1.555419477189098e-05, "loss": 0.6204534769058228, "step": 2374 }, { "epoch": 0.6945459862553005, "grad_norm": 1.2237746404921626, "learning_rate": 1.5550173723992218e-05, "loss": 0.5914584994316101, "step": 2375 }, { "epoch": 0.6948384266705658, "grad_norm": 1.2633817911858796, "learning_rate": 1.554615137877409e-05, "loss": 0.5077188611030579, "step": 2376 }, { "epoch": 0.6951308670858313, "grad_norm": 1.1523903505061626, "learning_rate": 1.55421277371768e-05, "loss": 0.5560270547866821, "step": 2377 }, { "epoch": 0.6954233075010966, "grad_norm": 1.6214020445600121, "learning_rate": 1.553810280014085e-05, "loss": 0.7064549922943115, "step": 2378 }, { "epoch": 0.695715747916362, "grad_norm": 1.4249847873824701, "learning_rate": 1.5534076568607043e-05, "loss": 0.7433110475540161, "step": 2379 }, { "epoch": 0.6960081883316275, "grad_norm": 1.4661372034410074, "learning_rate": 1.553004904351648e-05, "loss": 0.6061110496520996, "step": 2380 }, { "epoch": 0.6963006287468928, "grad_norm": 1.3530915937691412, "learning_rate": 1.5526020225810583e-05, "loss": 0.604006290435791, "step": 2381 }, { "epoch": 0.6965930691621582, "grad_norm": 1.3193058416919141, "learning_rate": 1.5521990116431052e-05, "loss": 0.6221635341644287, "step": 2382 }, { "epoch": 0.6968855095774236, "grad_norm": 1.17260855579956, "learning_rate": 1.551795871631991e-05, "loss": 0.5848093032836914, "step": 2383 }, { "epoch": 0.697177949992689, "grad_norm": 1.3909866883805502, "learning_rate": 1.5513926026419464e-05, "loss": 0.6451606154441833, "step": 2384 }, { "epoch": 0.6974703904079543, "grad_norm": 1.2515682694896817, "learning_rate": 1.5509892047672336e-05, "loss": 0.7922245264053345, "step": 2385 }, { "epoch": 0.6977628308232198, "grad_norm": 1.501698757307051, "learning_rate": 1.5505856781021443e-05, "loss": 0.6458885073661804, "step": 2386 }, { "epoch": 0.6980552712384852, "grad_norm": 1.3253141303151825, "learning_rate": 1.5501820227410002e-05, "loss": 0.5989570617675781, "step": 2387 }, { "epoch": 0.6983477116537505, "grad_norm": 1.4240123629840666, "learning_rate": 1.5497782387781536e-05, "loss": 0.740998387336731, "step": 2388 }, { "epoch": 0.698640152069016, "grad_norm": 1.4547948512453808, "learning_rate": 1.5493743263079866e-05, "loss": 0.63981032371521, "step": 2389 }, { "epoch": 0.6989325924842813, "grad_norm": 1.325001348454028, "learning_rate": 1.5489702854249106e-05, "loss": 0.766716480255127, "step": 2390 }, { "epoch": 0.6992250328995467, "grad_norm": 1.541044208915787, "learning_rate": 1.5485661162233684e-05, "loss": 0.7879365086555481, "step": 2391 }, { "epoch": 0.6995174733148121, "grad_norm": 1.3532949065271656, "learning_rate": 1.5481618187978322e-05, "loss": 0.6005786657333374, "step": 2392 }, { "epoch": 0.6998099137300775, "grad_norm": 1.2952910023515818, "learning_rate": 1.5477573932428033e-05, "loss": 0.6207927465438843, "step": 2393 }, { "epoch": 0.7001023541453428, "grad_norm": 1.4490674696543298, "learning_rate": 1.5473528396528144e-05, "loss": 0.5582053661346436, "step": 2394 }, { "epoch": 0.7003947945606083, "grad_norm": 1.6315416515790502, "learning_rate": 1.5469481581224274e-05, "loss": 0.5701307058334351, "step": 2395 }, { "epoch": 0.7006872349758737, "grad_norm": 1.3804181292115258, "learning_rate": 1.546543348746233e-05, "loss": 0.6201068162918091, "step": 2396 }, { "epoch": 0.700979675391139, "grad_norm": 1.3282086716914991, "learning_rate": 1.5461384116188546e-05, "loss": 0.6102321147918701, "step": 2397 }, { "epoch": 0.7012721158064045, "grad_norm": 1.361382387889105, "learning_rate": 1.545733346834943e-05, "loss": 0.5445820093154907, "step": 2398 }, { "epoch": 0.7015645562216698, "grad_norm": 1.3134018034606705, "learning_rate": 1.5453281544891797e-05, "loss": 0.5278012752532959, "step": 2399 }, { "epoch": 0.7018569966369352, "grad_norm": 1.6159840401286016, "learning_rate": 1.544922834676276e-05, "loss": 0.7051252126693726, "step": 2400 }, { "epoch": 0.7021494370522006, "grad_norm": 1.3552623655435003, "learning_rate": 1.544517387490973e-05, "loss": 0.6024646759033203, "step": 2401 }, { "epoch": 0.702441877467466, "grad_norm": 1.3323978020414873, "learning_rate": 1.5441118130280406e-05, "loss": 0.5563746094703674, "step": 2402 }, { "epoch": 0.7027343178827314, "grad_norm": 1.3671297363224464, "learning_rate": 1.5437061113822805e-05, "loss": 0.5971669554710388, "step": 2403 }, { "epoch": 0.7030267582979968, "grad_norm": 1.5082475685517047, "learning_rate": 1.5433002826485234e-05, "loss": 0.5846019983291626, "step": 2404 }, { "epoch": 0.7033191987132622, "grad_norm": 1.2921876796744827, "learning_rate": 1.5428943269216278e-05, "loss": 0.5571885108947754, "step": 2405 }, { "epoch": 0.7036116391285275, "grad_norm": 1.15652993390593, "learning_rate": 1.542488244296484e-05, "loss": 0.4770846962928772, "step": 2406 }, { "epoch": 0.703904079543793, "grad_norm": 1.6398352091801953, "learning_rate": 1.542082034868012e-05, "loss": 0.636760950088501, "step": 2407 }, { "epoch": 0.7041965199590583, "grad_norm": 1.6877906333209267, "learning_rate": 1.5416756987311603e-05, "loss": 0.7264662981033325, "step": 2408 }, { "epoch": 0.7044889603743237, "grad_norm": 1.372256728403267, "learning_rate": 1.5412692359809073e-05, "loss": 0.6723978519439697, "step": 2409 }, { "epoch": 0.7047814007895892, "grad_norm": 1.4362583031777838, "learning_rate": 1.5408626467122612e-05, "loss": 0.6205083727836609, "step": 2410 }, { "epoch": 0.7050738412048545, "grad_norm": 1.4495567778043355, "learning_rate": 1.54045593102026e-05, "loss": 0.5980903506278992, "step": 2411 }, { "epoch": 0.7053662816201199, "grad_norm": 1.4897959908790472, "learning_rate": 1.540049088999971e-05, "loss": 0.6311691999435425, "step": 2412 }, { "epoch": 0.7056587220353853, "grad_norm": 1.428243709143454, "learning_rate": 1.539642120746491e-05, "loss": 0.5872593522071838, "step": 2413 }, { "epoch": 0.7059511624506507, "grad_norm": 1.351001450570791, "learning_rate": 1.5392350263549462e-05, "loss": 0.5037539005279541, "step": 2414 }, { "epoch": 0.706243602865916, "grad_norm": 1.4775045660401276, "learning_rate": 1.538827805920493e-05, "loss": 0.5917855501174927, "step": 2415 }, { "epoch": 0.7065360432811815, "grad_norm": 1.3687769613569196, "learning_rate": 1.538420459538316e-05, "loss": 0.6350749731063843, "step": 2416 }, { "epoch": 0.7068284836964468, "grad_norm": 1.330110483636511, "learning_rate": 1.53801298730363e-05, "loss": 0.6828908920288086, "step": 2417 }, { "epoch": 0.7071209241117122, "grad_norm": 1.5864329436081315, "learning_rate": 1.5376053893116796e-05, "loss": 0.6307995319366455, "step": 2418 }, { "epoch": 0.7074133645269777, "grad_norm": 1.3609756396375527, "learning_rate": 1.5371976656577385e-05, "loss": 0.5305014252662659, "step": 2419 }, { "epoch": 0.707705804942243, "grad_norm": 1.2953614031977334, "learning_rate": 1.536789816437109e-05, "loss": 0.560103178024292, "step": 2420 }, { "epoch": 0.7079982453575084, "grad_norm": 1.4823675619867462, "learning_rate": 1.5363818417451236e-05, "loss": 0.5449249148368835, "step": 2421 }, { "epoch": 0.7082906857727738, "grad_norm": 1.575423149049035, "learning_rate": 1.5359737416771438e-05, "loss": 0.7456427812576294, "step": 2422 }, { "epoch": 0.7085831261880392, "grad_norm": 1.4606336998212586, "learning_rate": 1.5355655163285607e-05, "loss": 0.5401932597160339, "step": 2423 }, { "epoch": 0.7088755666033045, "grad_norm": 1.4384817217494414, "learning_rate": 1.5351571657947947e-05, "loss": 0.6215255856513977, "step": 2424 }, { "epoch": 0.70916800701857, "grad_norm": 1.454238489435378, "learning_rate": 1.5347486901712946e-05, "loss": 0.724073052406311, "step": 2425 }, { "epoch": 0.7094604474338354, "grad_norm": 1.280381472439187, "learning_rate": 1.5343400895535402e-05, "loss": 0.6375223398208618, "step": 2426 }, { "epoch": 0.7097528878491007, "grad_norm": 1.4740965908748953, "learning_rate": 1.533931364037038e-05, "loss": 0.6087045669555664, "step": 2427 }, { "epoch": 0.7100453282643662, "grad_norm": 1.4709664710326, "learning_rate": 1.5335225137173262e-05, "loss": 0.7927658557891846, "step": 2428 }, { "epoch": 0.7103377686796315, "grad_norm": 1.4583904783773962, "learning_rate": 1.5331135386899702e-05, "loss": 0.6312417387962341, "step": 2429 }, { "epoch": 0.7106302090948969, "grad_norm": 1.3514647325044575, "learning_rate": 1.5327044390505666e-05, "loss": 0.6856948137283325, "step": 2430 }, { "epoch": 0.7109226495101623, "grad_norm": 1.3486136616110067, "learning_rate": 1.532295214894739e-05, "loss": 0.5683865547180176, "step": 2431 }, { "epoch": 0.7112150899254277, "grad_norm": 1.5290242403967753, "learning_rate": 1.5318858663181412e-05, "loss": 0.6208291053771973, "step": 2432 }, { "epoch": 0.711507530340693, "grad_norm": 1.5265891330435364, "learning_rate": 1.531476393416456e-05, "loss": 0.6751389503479004, "step": 2433 }, { "epoch": 0.7117999707559585, "grad_norm": 1.1685210774635664, "learning_rate": 1.5310667962853954e-05, "loss": 0.422024667263031, "step": 2434 }, { "epoch": 0.7120924111712239, "grad_norm": 1.377587949543332, "learning_rate": 1.5306570750207003e-05, "loss": 0.6714169979095459, "step": 2435 }, { "epoch": 0.7123848515864892, "grad_norm": 1.4808127088080212, "learning_rate": 1.53024722971814e-05, "loss": 0.5757386088371277, "step": 2436 }, { "epoch": 0.7126772920017547, "grad_norm": 1.4790386820456973, "learning_rate": 1.529837260473514e-05, "loss": 0.5686037540435791, "step": 2437 }, { "epoch": 0.71296973241702, "grad_norm": 1.570681384959534, "learning_rate": 1.5294271673826498e-05, "loss": 0.7601959705352783, "step": 2438 }, { "epoch": 0.7132621728322854, "grad_norm": 1.44814607189911, "learning_rate": 1.529016950541404e-05, "loss": 0.5654840469360352, "step": 2439 }, { "epoch": 0.7135546132475508, "grad_norm": 1.5844428110219366, "learning_rate": 1.5286066100456623e-05, "loss": 0.7009234428405762, "step": 2440 }, { "epoch": 0.7138470536628162, "grad_norm": 1.166961279939158, "learning_rate": 1.52819614599134e-05, "loss": 0.4856370687484741, "step": 2441 }, { "epoch": 0.7141394940780816, "grad_norm": 1.2283639685035557, "learning_rate": 1.52778555847438e-05, "loss": 0.5135019421577454, "step": 2442 }, { "epoch": 0.714431934493347, "grad_norm": 1.3437978900697465, "learning_rate": 1.5273748475907542e-05, "loss": 0.7350283861160278, "step": 2443 }, { "epoch": 0.7147243749086124, "grad_norm": 1.1274424242274286, "learning_rate": 1.5269640134364646e-05, "loss": 0.5985803604125977, "step": 2444 }, { "epoch": 0.7150168153238777, "grad_norm": 1.2982732418248375, "learning_rate": 1.5265530561075407e-05, "loss": 0.6840892434120178, "step": 2445 }, { "epoch": 0.7153092557391432, "grad_norm": 1.2979743249484705, "learning_rate": 1.5261419757000417e-05, "loss": 0.6921327114105225, "step": 2446 }, { "epoch": 0.7156016961544085, "grad_norm": 1.429595570109343, "learning_rate": 1.525730772310055e-05, "loss": 0.6428500413894653, "step": 2447 }, { "epoch": 0.7158941365696739, "grad_norm": 1.3812578358040712, "learning_rate": 1.5253194460336964e-05, "loss": 0.645559549331665, "step": 2448 }, { "epoch": 0.7161865769849394, "grad_norm": 1.7222193716043204, "learning_rate": 1.5249079969671114e-05, "loss": 0.6211013793945312, "step": 2449 }, { "epoch": 0.7164790174002047, "grad_norm": 1.2302035745629583, "learning_rate": 1.5244964252064737e-05, "loss": 0.5709721446037292, "step": 2450 }, { "epoch": 0.7167714578154701, "grad_norm": 1.4516717315033434, "learning_rate": 1.5240847308479855e-05, "loss": 0.6781377196311951, "step": 2451 }, { "epoch": 0.7170638982307355, "grad_norm": 1.5070563114338018, "learning_rate": 1.523672913987878e-05, "loss": 0.6476876735687256, "step": 2452 }, { "epoch": 0.7173563386460009, "grad_norm": 1.5653544129198373, "learning_rate": 1.523260974722411e-05, "loss": 0.6564218997955322, "step": 2453 }, { "epoch": 0.7176487790612662, "grad_norm": 1.3630096136191563, "learning_rate": 1.5228489131478722e-05, "loss": 0.6455773711204529, "step": 2454 }, { "epoch": 0.7179412194765317, "grad_norm": 1.393672298684458, "learning_rate": 1.5224367293605791e-05, "loss": 0.6039570569992065, "step": 2455 }, { "epoch": 0.718233659891797, "grad_norm": 1.337144764968105, "learning_rate": 1.522024423456877e-05, "loss": 0.7060747146606445, "step": 2456 }, { "epoch": 0.7185261003070624, "grad_norm": 1.3843662041380984, "learning_rate": 1.52161199553314e-05, "loss": 0.561469316482544, "step": 2457 }, { "epoch": 0.7188185407223279, "grad_norm": 1.4419398084710615, "learning_rate": 1.5211994456857706e-05, "loss": 0.6682697534561157, "step": 2458 }, { "epoch": 0.7191109811375932, "grad_norm": 1.5477256567407798, "learning_rate": 1.5207867740111994e-05, "loss": 0.7893983125686646, "step": 2459 }, { "epoch": 0.7194034215528586, "grad_norm": 1.3454706918314496, "learning_rate": 1.5203739806058863e-05, "loss": 0.617809534072876, "step": 2460 }, { "epoch": 0.719695861968124, "grad_norm": 1.2866429351470308, "learning_rate": 1.5199610655663193e-05, "loss": 0.5444413423538208, "step": 2461 }, { "epoch": 0.7199883023833894, "grad_norm": 1.342633438363169, "learning_rate": 1.5195480289890146e-05, "loss": 0.615330696105957, "step": 2462 }, { "epoch": 0.7202807427986547, "grad_norm": 1.7658118623485195, "learning_rate": 1.5191348709705169e-05, "loss": 0.6811497211456299, "step": 2463 }, { "epoch": 0.7205731832139202, "grad_norm": 1.3224396770739022, "learning_rate": 1.5187215916073997e-05, "loss": 0.612322211265564, "step": 2464 }, { "epoch": 0.7208656236291856, "grad_norm": 1.2201627110269677, "learning_rate": 1.518308190996264e-05, "loss": 0.6106880903244019, "step": 2465 }, { "epoch": 0.7211580640444509, "grad_norm": 1.2431923365136468, "learning_rate": 1.5178946692337405e-05, "loss": 0.4901464581489563, "step": 2466 }, { "epoch": 0.7214505044597164, "grad_norm": 2.053814058775723, "learning_rate": 1.5174810264164865e-05, "loss": 0.6777167320251465, "step": 2467 }, { "epoch": 0.7217429448749817, "grad_norm": 1.4212256530727148, "learning_rate": 1.5170672626411888e-05, "loss": 0.6353746056556702, "step": 2468 }, { "epoch": 0.7220353852902471, "grad_norm": 1.4867453474426244, "learning_rate": 1.516653378004563e-05, "loss": 0.6218847632408142, "step": 2469 }, { "epoch": 0.7223278257055125, "grad_norm": 1.2225434595050702, "learning_rate": 1.5162393726033508e-05, "loss": 0.5001585483551025, "step": 2470 }, { "epoch": 0.7226202661207779, "grad_norm": 1.5129628743171017, "learning_rate": 1.5158252465343242e-05, "loss": 0.6801280975341797, "step": 2471 }, { "epoch": 0.7229127065360432, "grad_norm": 1.208746836224967, "learning_rate": 1.5154109998942823e-05, "loss": 0.6739565134048462, "step": 2472 }, { "epoch": 0.7232051469513087, "grad_norm": 1.1414220178862078, "learning_rate": 1.5149966327800532e-05, "loss": 0.5970213413238525, "step": 2473 }, { "epoch": 0.7234975873665741, "grad_norm": 1.295455082889375, "learning_rate": 1.5145821452884923e-05, "loss": 0.7367317080497742, "step": 2474 }, { "epoch": 0.7237900277818394, "grad_norm": 1.3877158266331615, "learning_rate": 1.5141675375164839e-05, "loss": 0.6332153677940369, "step": 2475 }, { "epoch": 0.7240824681971049, "grad_norm": 1.223786080062607, "learning_rate": 1.5137528095609395e-05, "loss": 0.6185739636421204, "step": 2476 }, { "epoch": 0.7243749086123702, "grad_norm": 1.436341367228992, "learning_rate": 1.5133379615187996e-05, "loss": 0.5982746481895447, "step": 2477 }, { "epoch": 0.7246673490276356, "grad_norm": 1.32306496712973, "learning_rate": 1.512922993487032e-05, "loss": 0.5946815013885498, "step": 2478 }, { "epoch": 0.724959789442901, "grad_norm": 1.2916301226572995, "learning_rate": 1.5125079055626337e-05, "loss": 0.5645624399185181, "step": 2479 }, { "epoch": 0.7252522298581664, "grad_norm": 1.0689440382368105, "learning_rate": 1.5120926978426288e-05, "loss": 0.43329858779907227, "step": 2480 }, { "epoch": 0.7255446702734318, "grad_norm": 1.420557871943188, "learning_rate": 1.5116773704240689e-05, "loss": 0.64244544506073, "step": 2481 }, { "epoch": 0.7258371106886972, "grad_norm": 1.3002221181867923, "learning_rate": 1.5112619234040348e-05, "loss": 0.6640222072601318, "step": 2482 }, { "epoch": 0.7261295511039626, "grad_norm": 1.4810661665547034, "learning_rate": 1.5108463568796346e-05, "loss": 0.6346921324729919, "step": 2483 }, { "epoch": 0.7264219915192279, "grad_norm": 1.4101536258246594, "learning_rate": 1.5104306709480045e-05, "loss": 0.5891947746276855, "step": 2484 }, { "epoch": 0.7267144319344934, "grad_norm": 1.2478330500785222, "learning_rate": 1.5100148657063089e-05, "loss": 0.616216242313385, "step": 2485 }, { "epoch": 0.7270068723497587, "grad_norm": 1.3541911638943873, "learning_rate": 1.5095989412517389e-05, "loss": 0.5961766242980957, "step": 2486 }, { "epoch": 0.7272993127650241, "grad_norm": 1.27681624299837, "learning_rate": 1.509182897681515e-05, "loss": 0.5629050731658936, "step": 2487 }, { "epoch": 0.7275917531802896, "grad_norm": 1.3918382252124497, "learning_rate": 1.5087667350928844e-05, "loss": 0.6640661954879761, "step": 2488 }, { "epoch": 0.7278841935955549, "grad_norm": 1.1741006713729014, "learning_rate": 1.5083504535831233e-05, "loss": 0.5884503126144409, "step": 2489 }, { "epoch": 0.7281766340108203, "grad_norm": 1.2387841976936662, "learning_rate": 1.5079340532495344e-05, "loss": 0.5395207405090332, "step": 2490 }, { "epoch": 0.7284690744260857, "grad_norm": 1.5570127298934886, "learning_rate": 1.5075175341894487e-05, "loss": 0.5713212490081787, "step": 2491 }, { "epoch": 0.7287615148413511, "grad_norm": 1.8811783299638292, "learning_rate": 1.5071008965002252e-05, "loss": 0.5732176303863525, "step": 2492 }, { "epoch": 0.7290539552566164, "grad_norm": 1.3314823409610355, "learning_rate": 1.50668414027925e-05, "loss": 0.6381006240844727, "step": 2493 }, { "epoch": 0.7293463956718819, "grad_norm": 1.5687830928425197, "learning_rate": 1.5062672656239381e-05, "loss": 0.6533833742141724, "step": 2494 }, { "epoch": 0.7296388360871472, "grad_norm": 1.213698756503139, "learning_rate": 1.5058502726317309e-05, "loss": 0.5919456481933594, "step": 2495 }, { "epoch": 0.7299312765024126, "grad_norm": 1.3954865057419796, "learning_rate": 1.5054331614000984e-05, "loss": 0.6128921508789062, "step": 2496 }, { "epoch": 0.7302237169176781, "grad_norm": 1.3910630571139424, "learning_rate": 1.5050159320265371e-05, "loss": 0.5949394702911377, "step": 2497 }, { "epoch": 0.7305161573329434, "grad_norm": 1.5386167534502115, "learning_rate": 1.5045985846085724e-05, "loss": 0.6262483596801758, "step": 2498 }, { "epoch": 0.7308085977482088, "grad_norm": 1.4477928134421267, "learning_rate": 1.5041811192437563e-05, "loss": 0.5032243728637695, "step": 2499 }, { "epoch": 0.7311010381634742, "grad_norm": 1.31776348667592, "learning_rate": 1.5037635360296695e-05, "loss": 0.6721810102462769, "step": 2500 }, { "epoch": 0.7313934785787396, "grad_norm": 1.3556666925406757, "learning_rate": 1.5033458350639185e-05, "loss": 0.7091001272201538, "step": 2501 }, { "epoch": 0.7316859189940049, "grad_norm": 3.8317594491760163, "learning_rate": 1.5029280164441395e-05, "loss": 0.5414971113204956, "step": 2502 }, { "epoch": 0.7319783594092704, "grad_norm": 1.364096425695391, "learning_rate": 1.5025100802679944e-05, "loss": 0.6714789271354675, "step": 2503 }, { "epoch": 0.7322707998245358, "grad_norm": 1.3566105024089323, "learning_rate": 1.5020920266331733e-05, "loss": 0.5008493065834045, "step": 2504 }, { "epoch": 0.7325632402398011, "grad_norm": 1.652155025588763, "learning_rate": 1.5016738556373936e-05, "loss": 0.563892126083374, "step": 2505 }, { "epoch": 0.7328556806550666, "grad_norm": 1.3313159442091285, "learning_rate": 1.5012555673784004e-05, "loss": 0.6371973752975464, "step": 2506 }, { "epoch": 0.7331481210703319, "grad_norm": 1.5289698261558242, "learning_rate": 1.5008371619539661e-05, "loss": 0.7365365624427795, "step": 2507 }, { "epoch": 0.7334405614855973, "grad_norm": 1.357001447635837, "learning_rate": 1.5004186394618906e-05, "loss": 0.5401967763900757, "step": 2508 }, { "epoch": 0.7337330019008627, "grad_norm": 1.4338213701683389, "learning_rate": 1.5000000000000002e-05, "loss": 0.5827134847640991, "step": 2509 }, { "epoch": 0.7340254423161281, "grad_norm": 1.5173171956884226, "learning_rate": 1.49958124366615e-05, "loss": 0.7655869126319885, "step": 2510 }, { "epoch": 0.7343178827313934, "grad_norm": 1.3360976464033478, "learning_rate": 1.4991623705582216e-05, "loss": 0.5410823822021484, "step": 2511 }, { "epoch": 0.7346103231466589, "grad_norm": 1.6453007873220271, "learning_rate": 1.4987433807741242e-05, "loss": 0.6831178665161133, "step": 2512 }, { "epoch": 0.7349027635619243, "grad_norm": 1.2152056235269613, "learning_rate": 1.498324274411794e-05, "loss": 0.4952821731567383, "step": 2513 }, { "epoch": 0.7351952039771896, "grad_norm": 1.4425254779779118, "learning_rate": 1.4979050515691944e-05, "loss": 0.6973339319229126, "step": 2514 }, { "epoch": 0.7354876443924551, "grad_norm": 1.428400853551732, "learning_rate": 1.4974857123443163e-05, "loss": 0.6604373455047607, "step": 2515 }, { "epoch": 0.7357800848077204, "grad_norm": 1.3355207439959806, "learning_rate": 1.4970662568351776e-05, "loss": 0.6523034572601318, "step": 2516 }, { "epoch": 0.7360725252229858, "grad_norm": 1.2739776061453822, "learning_rate": 1.4966466851398238e-05, "loss": 0.6557538509368896, "step": 2517 }, { "epoch": 0.7363649656382512, "grad_norm": 1.3243836594251046, "learning_rate": 1.4962269973563269e-05, "loss": 0.6993967294692993, "step": 2518 }, { "epoch": 0.7366574060535166, "grad_norm": 1.3043008466806634, "learning_rate": 1.4958071935827862e-05, "loss": 0.611979067325592, "step": 2519 }, { "epoch": 0.736949846468782, "grad_norm": 1.5837280682600245, "learning_rate": 1.4953872739173289e-05, "loss": 0.9108786582946777, "step": 2520 }, { "epoch": 0.7372422868840474, "grad_norm": 1.5471791396278156, "learning_rate": 1.4949672384581082e-05, "loss": 0.7086392045021057, "step": 2521 }, { "epoch": 0.7375347272993128, "grad_norm": 1.341070279173996, "learning_rate": 1.494547087303305e-05, "loss": 0.6103025674819946, "step": 2522 }, { "epoch": 0.7378271677145781, "grad_norm": 1.223930383405044, "learning_rate": 1.4941268205511272e-05, "loss": 0.5597528219223022, "step": 2523 }, { "epoch": 0.7381196081298436, "grad_norm": 1.4817126292023657, "learning_rate": 1.4937064382998091e-05, "loss": 0.6222598552703857, "step": 2524 }, { "epoch": 0.7384120485451089, "grad_norm": 1.4738198225513357, "learning_rate": 1.4932859406476131e-05, "loss": 0.6083353757858276, "step": 2525 }, { "epoch": 0.7387044889603743, "grad_norm": 1.2716230350108357, "learning_rate": 1.4928653276928275e-05, "loss": 0.47920671105384827, "step": 2526 }, { "epoch": 0.7389969293756398, "grad_norm": 1.2356122713189879, "learning_rate": 1.4924445995337685e-05, "loss": 0.5752983093261719, "step": 2527 }, { "epoch": 0.7392893697909051, "grad_norm": 1.3500870063925003, "learning_rate": 1.4920237562687784e-05, "loss": 0.6275333762168884, "step": 2528 }, { "epoch": 0.7395818102061705, "grad_norm": 1.3423023519178945, "learning_rate": 1.4916027979962266e-05, "loss": 0.6362103223800659, "step": 2529 }, { "epoch": 0.7398742506214359, "grad_norm": 1.4246415171584412, "learning_rate": 1.49118172481451e-05, "loss": 0.5902664661407471, "step": 2530 }, { "epoch": 0.7401666910367013, "grad_norm": 1.3036213595476636, "learning_rate": 1.4907605368220514e-05, "loss": 0.5293874740600586, "step": 2531 }, { "epoch": 0.7404591314519666, "grad_norm": 1.3590290047464213, "learning_rate": 1.4903392341173013e-05, "loss": 0.7298746109008789, "step": 2532 }, { "epoch": 0.7407515718672321, "grad_norm": 1.3755489549876734, "learning_rate": 1.4899178167987367e-05, "loss": 0.6428382396697998, "step": 2533 }, { "epoch": 0.7410440122824974, "grad_norm": 1.3444422145970576, "learning_rate": 1.489496284964861e-05, "loss": 0.6204425096511841, "step": 2534 }, { "epoch": 0.7413364526977628, "grad_norm": 1.2627663029943075, "learning_rate": 1.4890746387142052e-05, "loss": 0.6025601625442505, "step": 2535 }, { "epoch": 0.7416288931130283, "grad_norm": 1.212213289149315, "learning_rate": 1.4886528781453258e-05, "loss": 0.5570085644721985, "step": 2536 }, { "epoch": 0.7419213335282936, "grad_norm": 1.387517207017057, "learning_rate": 1.4882310033568072e-05, "loss": 0.6816439628601074, "step": 2537 }, { "epoch": 0.742213773943559, "grad_norm": 1.341130650337267, "learning_rate": 1.4878090144472603e-05, "loss": 0.5424396991729736, "step": 2538 }, { "epoch": 0.7425062143588244, "grad_norm": 1.583973779595893, "learning_rate": 1.4873869115153223e-05, "loss": 0.58860182762146, "step": 2539 }, { "epoch": 0.7427986547740898, "grad_norm": 1.227937032120959, "learning_rate": 1.4869646946596568e-05, "loss": 0.513140857219696, "step": 2540 }, { "epoch": 0.7430910951893551, "grad_norm": 1.3321578929704418, "learning_rate": 1.486542363978955e-05, "loss": 0.5967035293579102, "step": 2541 }, { "epoch": 0.7433835356046206, "grad_norm": 1.2958174333377406, "learning_rate": 1.4861199195719334e-05, "loss": 0.6988440752029419, "step": 2542 }, { "epoch": 0.743675976019886, "grad_norm": 1.3279731889181368, "learning_rate": 1.4856973615373366e-05, "loss": 0.6176164746284485, "step": 2543 }, { "epoch": 0.7439684164351513, "grad_norm": 1.394214331783624, "learning_rate": 1.4852746899739346e-05, "loss": 0.5616505742073059, "step": 2544 }, { "epoch": 0.7442608568504168, "grad_norm": 1.199172810090394, "learning_rate": 1.4848519049805243e-05, "loss": 0.5470465421676636, "step": 2545 }, { "epoch": 0.7445532972656821, "grad_norm": 1.393649724579279, "learning_rate": 1.4844290066559292e-05, "loss": 0.6362754106521606, "step": 2546 }, { "epoch": 0.7448457376809475, "grad_norm": 1.2298975206172837, "learning_rate": 1.4840059950989992e-05, "loss": 0.6290515661239624, "step": 2547 }, { "epoch": 0.7451381780962129, "grad_norm": 1.4356832247939193, "learning_rate": 1.4835828704086105e-05, "loss": 0.7225647568702698, "step": 2548 }, { "epoch": 0.7454306185114783, "grad_norm": 1.4603777863967904, "learning_rate": 1.483159632683666e-05, "loss": 0.6993023157119751, "step": 2549 }, { "epoch": 0.7457230589267436, "grad_norm": 1.5062925776475273, "learning_rate": 1.482736282023095e-05, "loss": 0.6960086226463318, "step": 2550 }, { "epoch": 0.7460154993420091, "grad_norm": 1.4783046017210701, "learning_rate": 1.4823128185258535e-05, "loss": 0.627712607383728, "step": 2551 }, { "epoch": 0.7463079397572745, "grad_norm": 1.3756379084869055, "learning_rate": 1.481889242290923e-05, "loss": 0.6314729452133179, "step": 2552 }, { "epoch": 0.7466003801725398, "grad_norm": 1.293029687195421, "learning_rate": 1.4814655534173121e-05, "loss": 0.5948070287704468, "step": 2553 }, { "epoch": 0.7468928205878053, "grad_norm": 1.28283626174806, "learning_rate": 1.4810417520040551e-05, "loss": 0.6227586269378662, "step": 2554 }, { "epoch": 0.7471852610030706, "grad_norm": 1.156874509923564, "learning_rate": 1.4806178381502139e-05, "loss": 0.589213490486145, "step": 2555 }, { "epoch": 0.747477701418336, "grad_norm": 1.3920763104069633, "learning_rate": 1.4801938119548748e-05, "loss": 0.6748968362808228, "step": 2556 }, { "epoch": 0.7477701418336014, "grad_norm": 1.5278244850962377, "learning_rate": 1.4797696735171521e-05, "loss": 0.627450704574585, "step": 2557 }, { "epoch": 0.7480625822488668, "grad_norm": 1.3979513679962843, "learning_rate": 1.479345422936185e-05, "loss": 0.5816184878349304, "step": 2558 }, { "epoch": 0.7483550226641322, "grad_norm": 1.3403975244231432, "learning_rate": 1.4789210603111399e-05, "loss": 0.5184855461120605, "step": 2559 }, { "epoch": 0.7486474630793976, "grad_norm": 1.3184163367774433, "learning_rate": 1.4784965857412088e-05, "loss": 0.5747300982475281, "step": 2560 }, { "epoch": 0.748939903494663, "grad_norm": 1.5154750654158269, "learning_rate": 1.4780719993256104e-05, "loss": 0.6957682371139526, "step": 2561 }, { "epoch": 0.7492323439099283, "grad_norm": 1.3790848349629903, "learning_rate": 1.4776473011635886e-05, "loss": 0.5711330771446228, "step": 2562 }, { "epoch": 0.7495247843251938, "grad_norm": 1.260228471581513, "learning_rate": 1.4772224913544142e-05, "loss": 0.687350869178772, "step": 2563 }, { "epoch": 0.7498172247404591, "grad_norm": 1.549796921470129, "learning_rate": 1.476797569997384e-05, "loss": 0.71396803855896, "step": 2564 }, { "epoch": 0.7501096651557245, "grad_norm": 1.3620133851355087, "learning_rate": 1.4763725371918209e-05, "loss": 0.5457814335823059, "step": 2565 }, { "epoch": 0.75040210557099, "grad_norm": 1.4687420339775556, "learning_rate": 1.4759473930370738e-05, "loss": 0.5889413952827454, "step": 2566 }, { "epoch": 0.7506945459862553, "grad_norm": 1.8883582542449355, "learning_rate": 1.4755221376325171e-05, "loss": 0.6222226619720459, "step": 2567 }, { "epoch": 0.7509869864015207, "grad_norm": 1.17580934018018, "learning_rate": 1.475096771077552e-05, "loss": 0.5273243188858032, "step": 2568 }, { "epoch": 0.7512794268167861, "grad_norm": 1.2062680853030614, "learning_rate": 1.4746712934716055e-05, "loss": 0.5665162801742554, "step": 2569 }, { "epoch": 0.7515718672320515, "grad_norm": 1.6320800654071554, "learning_rate": 1.4742457049141298e-05, "loss": 0.5748391151428223, "step": 2570 }, { "epoch": 0.7518643076473168, "grad_norm": 1.4197866961281498, "learning_rate": 1.4738200055046044e-05, "loss": 0.7002041339874268, "step": 2571 }, { "epoch": 0.7521567480625823, "grad_norm": 1.3507056136966096, "learning_rate": 1.4733941953425337e-05, "loss": 0.6841630935668945, "step": 2572 }, { "epoch": 0.7524491884778476, "grad_norm": 1.6017928671701795, "learning_rate": 1.4729682745274478e-05, "loss": 0.7047172784805298, "step": 2573 }, { "epoch": 0.752741628893113, "grad_norm": 1.4397980876250445, "learning_rate": 1.4725422431589035e-05, "loss": 0.6979919672012329, "step": 2574 }, { "epoch": 0.7530340693083785, "grad_norm": 1.3152000128748418, "learning_rate": 1.4721161013364829e-05, "loss": 0.6437125205993652, "step": 2575 }, { "epoch": 0.7533265097236438, "grad_norm": 1.4573280156715103, "learning_rate": 1.4716898491597942e-05, "loss": 0.591254711151123, "step": 2576 }, { "epoch": 0.7536189501389092, "grad_norm": 1.592793146861773, "learning_rate": 1.4712634867284714e-05, "loss": 0.6276297569274902, "step": 2577 }, { "epoch": 0.7539113905541746, "grad_norm": 1.2004846116513588, "learning_rate": 1.4708370141421737e-05, "loss": 0.5310626029968262, "step": 2578 }, { "epoch": 0.75420383096944, "grad_norm": 1.374287364754045, "learning_rate": 1.4704104315005864e-05, "loss": 0.5256849527359009, "step": 2579 }, { "epoch": 0.7544962713847053, "grad_norm": 1.4473126972035357, "learning_rate": 1.4699837389034212e-05, "loss": 0.6050584316253662, "step": 2580 }, { "epoch": 0.7547887117999708, "grad_norm": 1.3425248874126274, "learning_rate": 1.4695569364504144e-05, "loss": 0.5124386548995972, "step": 2581 }, { "epoch": 0.7550811522152362, "grad_norm": 1.1600080124683732, "learning_rate": 1.4691300242413289e-05, "loss": 0.5631951093673706, "step": 2582 }, { "epoch": 0.7553735926305015, "grad_norm": 1.3017433820111879, "learning_rate": 1.4687030023759527e-05, "loss": 0.6352444291114807, "step": 2583 }, { "epoch": 0.755666033045767, "grad_norm": 1.4490307646785157, "learning_rate": 1.4682758709540992e-05, "loss": 0.6717500686645508, "step": 2584 }, { "epoch": 0.7559584734610323, "grad_norm": 3.0905292476778428, "learning_rate": 1.467848630075608e-05, "loss": 0.5889217853546143, "step": 2585 }, { "epoch": 0.7562509138762977, "grad_norm": 1.258529998432557, "learning_rate": 1.4674212798403443e-05, "loss": 0.49069908261299133, "step": 2586 }, { "epoch": 0.756543354291563, "grad_norm": 1.1729027861993524, "learning_rate": 1.4669938203481982e-05, "loss": 0.6272397041320801, "step": 2587 }, { "epoch": 0.7568357947068285, "grad_norm": 1.5090841451643915, "learning_rate": 1.466566251699086e-05, "loss": 0.6218451261520386, "step": 2588 }, { "epoch": 0.7571282351220939, "grad_norm": 1.4025085245751263, "learning_rate": 1.4661385739929492e-05, "loss": 0.6174849271774292, "step": 2589 }, { "epoch": 0.7574206755373593, "grad_norm": 1.3554209784525295, "learning_rate": 1.465710787329755e-05, "loss": 0.5595160126686096, "step": 2590 }, { "epoch": 0.7577131159526247, "grad_norm": 1.5657464206953444, "learning_rate": 1.4652828918094954e-05, "loss": 0.757240891456604, "step": 2591 }, { "epoch": 0.75800555636789, "grad_norm": 1.3337551846990978, "learning_rate": 1.4648548875321893e-05, "loss": 0.630811333656311, "step": 2592 }, { "epoch": 0.7582979967831555, "grad_norm": 1.208341715070646, "learning_rate": 1.4644267745978797e-05, "loss": 0.5857812762260437, "step": 2593 }, { "epoch": 0.7585904371984208, "grad_norm": 1.1785954348430454, "learning_rate": 1.463998553106635e-05, "loss": 0.5869519710540771, "step": 2594 }, { "epoch": 0.7588828776136862, "grad_norm": 1.2035584714461103, "learning_rate": 1.4635702231585498e-05, "loss": 0.5610413551330566, "step": 2595 }, { "epoch": 0.7591753180289516, "grad_norm": 1.255732340436211, "learning_rate": 1.4631417848537435e-05, "loss": 0.5634676218032837, "step": 2596 }, { "epoch": 0.759467758444217, "grad_norm": 1.2847976698363035, "learning_rate": 1.4627132382923607e-05, "loss": 0.6813392639160156, "step": 2597 }, { "epoch": 0.7597601988594824, "grad_norm": 1.5611350123657577, "learning_rate": 1.4622845835745723e-05, "loss": 0.644945502281189, "step": 2598 }, { "epoch": 0.7600526392747478, "grad_norm": 1.4458723370490596, "learning_rate": 1.461855820800573e-05, "loss": 0.7432133555412292, "step": 2599 }, { "epoch": 0.7603450796900132, "grad_norm": 1.1406983279122715, "learning_rate": 1.4614269500705832e-05, "loss": 0.4729112982749939, "step": 2600 }, { "epoch": 0.7606375201052785, "grad_norm": 1.4806970647351285, "learning_rate": 1.4609979714848499e-05, "loss": 0.7146443128585815, "step": 2601 }, { "epoch": 0.760929960520544, "grad_norm": 1.4348530933940364, "learning_rate": 1.4605688851436436e-05, "loss": 0.5959945917129517, "step": 2602 }, { "epoch": 0.7612224009358093, "grad_norm": 1.3380784718799885, "learning_rate": 1.4601396911472605e-05, "loss": 0.6091525554656982, "step": 2603 }, { "epoch": 0.7615148413510747, "grad_norm": 1.3043703832448297, "learning_rate": 1.4597103895960228e-05, "loss": 0.5101523399353027, "step": 2604 }, { "epoch": 0.7618072817663402, "grad_norm": 1.3937793894568855, "learning_rate": 1.4592809805902762e-05, "loss": 0.6036165952682495, "step": 2605 }, { "epoch": 0.7620997221816055, "grad_norm": 1.361507946530242, "learning_rate": 1.4588514642303928e-05, "loss": 0.6094970703125, "step": 2606 }, { "epoch": 0.7623921625968709, "grad_norm": 1.3770518433820003, "learning_rate": 1.4584218406167697e-05, "loss": 0.49754881858825684, "step": 2607 }, { "epoch": 0.7626846030121363, "grad_norm": 1.3703785644048119, "learning_rate": 1.4579921098498285e-05, "loss": 0.6066807508468628, "step": 2608 }, { "epoch": 0.7629770434274017, "grad_norm": 1.4768479795454132, "learning_rate": 1.4575622720300162e-05, "loss": 0.5758910179138184, "step": 2609 }, { "epoch": 0.763269483842667, "grad_norm": 1.4281250780822374, "learning_rate": 1.457132327257805e-05, "loss": 0.6641621589660645, "step": 2610 }, { "epoch": 0.7635619242579325, "grad_norm": 1.506727865728889, "learning_rate": 1.4567022756336916e-05, "loss": 0.7024788856506348, "step": 2611 }, { "epoch": 0.7638543646731978, "grad_norm": 1.2921755321984356, "learning_rate": 1.4562721172581982e-05, "loss": 0.6066344380378723, "step": 2612 }, { "epoch": 0.7641468050884632, "grad_norm": 1.3533854830579282, "learning_rate": 1.4558418522318713e-05, "loss": 0.566038966178894, "step": 2613 }, { "epoch": 0.7644392455037287, "grad_norm": 1.3370326372322123, "learning_rate": 1.4554114806552833e-05, "loss": 0.5817335844039917, "step": 2614 }, { "epoch": 0.764731685918994, "grad_norm": 1.2813703243908812, "learning_rate": 1.4549810026290305e-05, "loss": 0.6001763343811035, "step": 2615 }, { "epoch": 0.7650241263342594, "grad_norm": 1.617460530676573, "learning_rate": 1.4545504182537346e-05, "loss": 0.6363068222999573, "step": 2616 }, { "epoch": 0.7653165667495248, "grad_norm": 1.4805158326873171, "learning_rate": 1.4541197276300424e-05, "loss": 0.669566810131073, "step": 2617 }, { "epoch": 0.7656090071647902, "grad_norm": 1.2122677055370945, "learning_rate": 1.4536889308586245e-05, "loss": 0.47967004776000977, "step": 2618 }, { "epoch": 0.7659014475800555, "grad_norm": 1.310958704364757, "learning_rate": 1.4532580280401777e-05, "loss": 0.5803399085998535, "step": 2619 }, { "epoch": 0.766193887995321, "grad_norm": 1.3185113057937472, "learning_rate": 1.452827019275423e-05, "loss": 0.6870115995407104, "step": 2620 }, { "epoch": 0.7664863284105864, "grad_norm": 1.307156915151953, "learning_rate": 1.4523959046651058e-05, "loss": 0.6190885901451111, "step": 2621 }, { "epoch": 0.7667787688258517, "grad_norm": 1.4891479565012034, "learning_rate": 1.4519646843099961e-05, "loss": 0.6624859571456909, "step": 2622 }, { "epoch": 0.7670712092411172, "grad_norm": 1.253302711959068, "learning_rate": 1.4515333583108896e-05, "loss": 0.5770546197891235, "step": 2623 }, { "epoch": 0.7673636496563825, "grad_norm": 1.3410371709150275, "learning_rate": 1.451101926768606e-05, "loss": 0.6843355894088745, "step": 2624 }, { "epoch": 0.7676560900716479, "grad_norm": 1.0930173610522418, "learning_rate": 1.4506703897839895e-05, "loss": 0.5293717384338379, "step": 2625 }, { "epoch": 0.7679485304869133, "grad_norm": 1.1789701874259584, "learning_rate": 1.45023874745791e-05, "loss": 0.44534316658973694, "step": 2626 }, { "epoch": 0.7682409709021787, "grad_norm": 4.2234169958332295, "learning_rate": 1.4498069998912603e-05, "loss": 0.7279446721076965, "step": 2627 }, { "epoch": 0.7685334113174441, "grad_norm": 1.3924343198630234, "learning_rate": 1.4493751471849596e-05, "loss": 0.6990453600883484, "step": 2628 }, { "epoch": 0.7688258517327095, "grad_norm": 1.3337373981179779, "learning_rate": 1.44894318943995e-05, "loss": 0.6610965728759766, "step": 2629 }, { "epoch": 0.7691182921479749, "grad_norm": 1.285212706548779, "learning_rate": 1.4485111267571999e-05, "loss": 0.5124749541282654, "step": 2630 }, { "epoch": 0.7694107325632402, "grad_norm": 1.3445630320041935, "learning_rate": 1.448078959237701e-05, "loss": 0.7191518545150757, "step": 2631 }, { "epoch": 0.7697031729785057, "grad_norm": 1.1499690572165278, "learning_rate": 1.4476466869824694e-05, "loss": 0.5798880457878113, "step": 2632 }, { "epoch": 0.769995613393771, "grad_norm": 1.3900006441925277, "learning_rate": 1.4472143100925467e-05, "loss": 0.5187106728553772, "step": 2633 }, { "epoch": 0.7702880538090364, "grad_norm": 1.1672945310140501, "learning_rate": 1.4467818286689981e-05, "loss": 0.5794588327407837, "step": 2634 }, { "epoch": 0.7705804942243017, "grad_norm": 1.2435528275045493, "learning_rate": 1.4463492428129133e-05, "loss": 0.4884936809539795, "step": 2635 }, { "epoch": 0.7708729346395672, "grad_norm": 1.3037745440935204, "learning_rate": 1.4459165526254074e-05, "loss": 0.5782946348190308, "step": 2636 }, { "epoch": 0.7711653750548326, "grad_norm": 1.2531837165046444, "learning_rate": 1.445483758207618e-05, "loss": 0.5173349380493164, "step": 2637 }, { "epoch": 0.771457815470098, "grad_norm": 1.4752149684021225, "learning_rate": 1.4450508596607087e-05, "loss": 0.616407573223114, "step": 2638 }, { "epoch": 0.7717502558853634, "grad_norm": 1.4855666629653779, "learning_rate": 1.4446178570858672e-05, "loss": 0.537878155708313, "step": 2639 }, { "epoch": 0.7720426963006287, "grad_norm": 1.2968861628303388, "learning_rate": 1.4441847505843048e-05, "loss": 0.674277663230896, "step": 2640 }, { "epoch": 0.7723351367158942, "grad_norm": 1.440782866010467, "learning_rate": 1.4437515402572576e-05, "loss": 0.5064860582351685, "step": 2641 }, { "epoch": 0.7726275771311595, "grad_norm": 1.2859384806045262, "learning_rate": 1.4433182262059861e-05, "loss": 0.6256883144378662, "step": 2642 }, { "epoch": 0.7729200175464249, "grad_norm": 1.2490391757844836, "learning_rate": 1.4428848085317744e-05, "loss": 0.6023700833320618, "step": 2643 }, { "epoch": 0.7732124579616904, "grad_norm": 1.5137270909206324, "learning_rate": 1.4424512873359316e-05, "loss": 0.5670932531356812, "step": 2644 }, { "epoch": 0.7735048983769557, "grad_norm": 1.406486208295682, "learning_rate": 1.4420176627197906e-05, "loss": 0.760460376739502, "step": 2645 }, { "epoch": 0.7737973387922211, "grad_norm": 1.3383411751300025, "learning_rate": 1.4415839347847082e-05, "loss": 0.5680848956108093, "step": 2646 }, { "epoch": 0.7740897792074865, "grad_norm": 1.2948318300140997, "learning_rate": 1.4411501036320661e-05, "loss": 0.5962368249893188, "step": 2647 }, { "epoch": 0.7743822196227519, "grad_norm": 1.3851281269469669, "learning_rate": 1.4407161693632697e-05, "loss": 0.7149791121482849, "step": 2648 }, { "epoch": 0.7746746600380172, "grad_norm": 1.4438569377090373, "learning_rate": 1.440282132079748e-05, "loss": 0.5943992733955383, "step": 2649 }, { "epoch": 0.7749671004532827, "grad_norm": 1.681920535370579, "learning_rate": 1.439847991882955e-05, "loss": 0.7265899181365967, "step": 2650 }, { "epoch": 0.775259540868548, "grad_norm": 1.257384791880329, "learning_rate": 1.4394137488743682e-05, "loss": 0.6011309027671814, "step": 2651 }, { "epoch": 0.7755519812838134, "grad_norm": 1.4419500386554907, "learning_rate": 1.4389794031554894e-05, "loss": 0.6853964328765869, "step": 2652 }, { "epoch": 0.7758444216990789, "grad_norm": 1.4140520249216477, "learning_rate": 1.438544954827844e-05, "loss": 0.6598547697067261, "step": 2653 }, { "epoch": 0.7761368621143442, "grad_norm": 1.3919438302264315, "learning_rate": 1.4381104039929819e-05, "loss": 0.5776119232177734, "step": 2654 }, { "epoch": 0.7764293025296096, "grad_norm": 1.182931573556341, "learning_rate": 1.4376757507524766e-05, "loss": 0.6026376485824585, "step": 2655 }, { "epoch": 0.776721742944875, "grad_norm": 1.2883148172478378, "learning_rate": 1.4372409952079256e-05, "loss": 0.5776997804641724, "step": 2656 }, { "epoch": 0.7770141833601404, "grad_norm": 1.5317545348037325, "learning_rate": 1.4368061374609505e-05, "loss": 0.5766068696975708, "step": 2657 }, { "epoch": 0.7773066237754057, "grad_norm": 1.0428168520269592, "learning_rate": 1.4363711776131966e-05, "loss": 0.4783105254173279, "step": 2658 }, { "epoch": 0.7775990641906712, "grad_norm": 1.4837098758543301, "learning_rate": 1.4359361157663332e-05, "loss": 0.6563695073127747, "step": 2659 }, { "epoch": 0.7778915046059366, "grad_norm": 1.0898257169197185, "learning_rate": 1.4355009520220531e-05, "loss": 0.5177119374275208, "step": 2660 }, { "epoch": 0.7781839450212019, "grad_norm": 1.3520526907259511, "learning_rate": 1.4350656864820733e-05, "loss": 0.6590641736984253, "step": 2661 }, { "epoch": 0.7784763854364674, "grad_norm": 1.2923155412118275, "learning_rate": 1.4346303192481348e-05, "loss": 0.6012274622917175, "step": 2662 }, { "epoch": 0.7787688258517327, "grad_norm": 1.439032337982527, "learning_rate": 1.4341948504220016e-05, "loss": 0.6731704473495483, "step": 2663 }, { "epoch": 0.7790612662669981, "grad_norm": 1.4598986218346195, "learning_rate": 1.4337592801054623e-05, "loss": 0.6827171444892883, "step": 2664 }, { "epoch": 0.7793537066822634, "grad_norm": 1.3963311439466064, "learning_rate": 1.4333236084003282e-05, "loss": 0.6654937267303467, "step": 2665 }, { "epoch": 0.7796461470975289, "grad_norm": 1.276825216432019, "learning_rate": 1.4328878354084355e-05, "loss": 0.5673532485961914, "step": 2666 }, { "epoch": 0.7799385875127943, "grad_norm": 1.3049192363130713, "learning_rate": 1.432451961231643e-05, "loss": 0.5401986241340637, "step": 2667 }, { "epoch": 0.7802310279280597, "grad_norm": 1.2877259559166432, "learning_rate": 1.4320159859718341e-05, "loss": 0.6134701371192932, "step": 2668 }, { "epoch": 0.7805234683433251, "grad_norm": 1.5022932512908924, "learning_rate": 1.4315799097309152e-05, "loss": 0.6913554668426514, "step": 2669 }, { "epoch": 0.7808159087585904, "grad_norm": 1.6126405133572825, "learning_rate": 1.4311437326108167e-05, "loss": 0.6969482898712158, "step": 2670 }, { "epoch": 0.7811083491738559, "grad_norm": 1.343855488902383, "learning_rate": 1.4307074547134918e-05, "loss": 0.6612537503242493, "step": 2671 }, { "epoch": 0.7814007895891212, "grad_norm": 1.1627822310905236, "learning_rate": 1.430271076140918e-05, "loss": 0.5545899868011475, "step": 2672 }, { "epoch": 0.7816932300043866, "grad_norm": 1.1885930128001867, "learning_rate": 1.4298345969950965e-05, "loss": 0.6635574698448181, "step": 2673 }, { "epoch": 0.781985670419652, "grad_norm": 1.4316816688950922, "learning_rate": 1.4293980173780514e-05, "loss": 0.5859510898590088, "step": 2674 }, { "epoch": 0.7822781108349174, "grad_norm": 1.246244040215616, "learning_rate": 1.4289613373918304e-05, "loss": 0.5839825868606567, "step": 2675 }, { "epoch": 0.7825705512501828, "grad_norm": 1.7192756445293216, "learning_rate": 1.428524557138505e-05, "loss": 0.6376889944076538, "step": 2676 }, { "epoch": 0.7828629916654481, "grad_norm": 1.2061132029389496, "learning_rate": 1.4280876767201696e-05, "loss": 0.5473129749298096, "step": 2677 }, { "epoch": 0.7831554320807136, "grad_norm": 1.2355367438994083, "learning_rate": 1.4276506962389429e-05, "loss": 0.6723904609680176, "step": 2678 }, { "epoch": 0.7834478724959789, "grad_norm": 1.318329485547163, "learning_rate": 1.4272136157969658e-05, "loss": 0.6036845445632935, "step": 2679 }, { "epoch": 0.7837403129112444, "grad_norm": 1.4527977807212105, "learning_rate": 1.4267764354964038e-05, "loss": 0.5993655920028687, "step": 2680 }, { "epoch": 0.7840327533265097, "grad_norm": 1.5159579383707373, "learning_rate": 1.4263391554394448e-05, "loss": 0.6678075194358826, "step": 2681 }, { "epoch": 0.7843251937417751, "grad_norm": 1.2588619303254647, "learning_rate": 1.4259017757283003e-05, "loss": 0.5627151727676392, "step": 2682 }, { "epoch": 0.7846176341570406, "grad_norm": 1.2632820141578516, "learning_rate": 1.4254642964652053e-05, "loss": 0.6060316562652588, "step": 2683 }, { "epoch": 0.7849100745723059, "grad_norm": 1.590473454276912, "learning_rate": 1.4250267177524177e-05, "loss": 0.6535854935646057, "step": 2684 }, { "epoch": 0.7852025149875713, "grad_norm": 1.499355267260573, "learning_rate": 1.4245890396922195e-05, "loss": 0.7141643762588501, "step": 2685 }, { "epoch": 0.7854949554028366, "grad_norm": 1.5067703709229516, "learning_rate": 1.4241512623869143e-05, "loss": 0.6685847640037537, "step": 2686 }, { "epoch": 0.7857873958181021, "grad_norm": 1.4195544467165693, "learning_rate": 1.4237133859388305e-05, "loss": 0.6745196580886841, "step": 2687 }, { "epoch": 0.7860798362333674, "grad_norm": 1.5617010746630147, "learning_rate": 1.423275410450319e-05, "loss": 0.6891968250274658, "step": 2688 }, { "epoch": 0.7863722766486329, "grad_norm": 1.3584703297700564, "learning_rate": 1.422837336023754e-05, "loss": 0.5614763498306274, "step": 2689 }, { "epoch": 0.7866647170638982, "grad_norm": 1.3595148335065306, "learning_rate": 1.4223991627615324e-05, "loss": 0.5867494344711304, "step": 2690 }, { "epoch": 0.7869571574791636, "grad_norm": 1.453264768444311, "learning_rate": 1.421960890766075e-05, "loss": 0.644777774810791, "step": 2691 }, { "epoch": 0.787249597894429, "grad_norm": 1.3023857436912896, "learning_rate": 1.4215225201398249e-05, "loss": 0.7237588167190552, "step": 2692 }, { "epoch": 0.7875420383096944, "grad_norm": 1.45851809360972, "learning_rate": 1.4210840509852484e-05, "loss": 0.6314423680305481, "step": 2693 }, { "epoch": 0.7878344787249598, "grad_norm": 1.2286351961246127, "learning_rate": 1.4206454834048353e-05, "loss": 0.5298433303833008, "step": 2694 }, { "epoch": 0.7881269191402251, "grad_norm": 1.1185262454319822, "learning_rate": 1.420206817501098e-05, "loss": 0.507548451423645, "step": 2695 }, { "epoch": 0.7884193595554906, "grad_norm": 1.7207072983596743, "learning_rate": 1.4197680533765721e-05, "loss": 0.7742520570755005, "step": 2696 }, { "epoch": 0.7887117999707559, "grad_norm": 1.3752660802878722, "learning_rate": 1.4193291911338161e-05, "loss": 0.6261187195777893, "step": 2697 }, { "epoch": 0.7890042403860213, "grad_norm": 1.521521524262885, "learning_rate": 1.4188902308754108e-05, "loss": 0.7501171827316284, "step": 2698 }, { "epoch": 0.7892966808012868, "grad_norm": 1.3001128857102173, "learning_rate": 1.4184511727039612e-05, "loss": 0.5590647459030151, "step": 2699 }, { "epoch": 0.7895891212165521, "grad_norm": 1.4479349527989895, "learning_rate": 1.4180120167220941e-05, "loss": 0.586786150932312, "step": 2700 }, { "epoch": 0.7898815616318176, "grad_norm": 1.2133244570308048, "learning_rate": 1.4175727630324598e-05, "loss": 0.5208219289779663, "step": 2701 }, { "epoch": 0.7901740020470829, "grad_norm": 1.2365924450408214, "learning_rate": 1.4171334117377312e-05, "loss": 0.5925623178482056, "step": 2702 }, { "epoch": 0.7904664424623483, "grad_norm": 1.5006045037979843, "learning_rate": 1.4166939629406034e-05, "loss": 0.7095032930374146, "step": 2703 }, { "epoch": 0.7907588828776136, "grad_norm": 1.167282378609361, "learning_rate": 1.4162544167437955e-05, "loss": 0.5683872699737549, "step": 2704 }, { "epoch": 0.7910513232928791, "grad_norm": 1.2605941476894575, "learning_rate": 1.4158147732500482e-05, "loss": 0.7079274654388428, "step": 2705 }, { "epoch": 0.7913437637081445, "grad_norm": 1.3186161570017685, "learning_rate": 1.415375032562126e-05, "loss": 0.6336439847946167, "step": 2706 }, { "epoch": 0.7916362041234098, "grad_norm": 1.14446239802259, "learning_rate": 1.414935194782816e-05, "loss": 0.4842381477355957, "step": 2707 }, { "epoch": 0.7919286445386753, "grad_norm": 1.4296190875249344, "learning_rate": 1.4144952600149267e-05, "loss": 0.5439653396606445, "step": 2708 }, { "epoch": 0.7922210849539406, "grad_norm": 1.2988205927389838, "learning_rate": 1.4140552283612906e-05, "loss": 0.6365468502044678, "step": 2709 }, { "epoch": 0.792513525369206, "grad_norm": 1.3854921286863888, "learning_rate": 1.4136150999247623e-05, "loss": 0.6192438006401062, "step": 2710 }, { "epoch": 0.7928059657844714, "grad_norm": 1.2293031316317269, "learning_rate": 1.4131748748082191e-05, "loss": 0.5695269703865051, "step": 2711 }, { "epoch": 0.7930984061997368, "grad_norm": 1.3405661548900325, "learning_rate": 1.4127345531145614e-05, "loss": 0.6892319321632385, "step": 2712 }, { "epoch": 0.7933908466150021, "grad_norm": 1.5220370415080073, "learning_rate": 1.4122941349467109e-05, "loss": 0.6294678449630737, "step": 2713 }, { "epoch": 0.7936832870302676, "grad_norm": 1.2086123903849104, "learning_rate": 1.4118536204076135e-05, "loss": 0.6666272878646851, "step": 2714 }, { "epoch": 0.793975727445533, "grad_norm": 1.2066166036349477, "learning_rate": 1.4114130096002363e-05, "loss": 0.5981796383857727, "step": 2715 }, { "epoch": 0.7942681678607983, "grad_norm": 1.5676320725913573, "learning_rate": 1.4109723026275695e-05, "loss": 0.6120023131370544, "step": 2716 }, { "epoch": 0.7945606082760638, "grad_norm": 1.536602454646116, "learning_rate": 1.4105314995926257e-05, "loss": 0.5892866849899292, "step": 2717 }, { "epoch": 0.7948530486913291, "grad_norm": 1.504529299257153, "learning_rate": 1.4100906005984404e-05, "loss": 0.7625553607940674, "step": 2718 }, { "epoch": 0.7951454891065945, "grad_norm": 1.4565362056936688, "learning_rate": 1.40964960574807e-05, "loss": 0.643633246421814, "step": 2719 }, { "epoch": 0.7954379295218599, "grad_norm": 1.2108583839611744, "learning_rate": 1.4092085151445953e-05, "loss": 0.46422284841537476, "step": 2720 }, { "epoch": 0.7957303699371253, "grad_norm": 1.2654408745652597, "learning_rate": 1.4087673288911182e-05, "loss": 0.6290001273155212, "step": 2721 }, { "epoch": 0.7960228103523908, "grad_norm": 1.2400549293858325, "learning_rate": 1.4083260470907632e-05, "loss": 0.5175197124481201, "step": 2722 }, { "epoch": 0.7963152507676561, "grad_norm": 1.4748861405916942, "learning_rate": 1.4078846698466776e-05, "loss": 0.6475427150726318, "step": 2723 }, { "epoch": 0.7966076911829215, "grad_norm": 1.3254407316825372, "learning_rate": 1.40744319726203e-05, "loss": 0.5978254079818726, "step": 2724 }, { "epoch": 0.7969001315981868, "grad_norm": 1.2991181525686113, "learning_rate": 1.4070016294400124e-05, "loss": 0.5738629102706909, "step": 2725 }, { "epoch": 0.7971925720134523, "grad_norm": 1.3493198611941248, "learning_rate": 1.4065599664838388e-05, "loss": 0.5809024572372437, "step": 2726 }, { "epoch": 0.7974850124287176, "grad_norm": 1.1539725667160117, "learning_rate": 1.4061182084967446e-05, "loss": 0.5907782316207886, "step": 2727 }, { "epoch": 0.797777452843983, "grad_norm": 1.4493981600012322, "learning_rate": 1.4056763555819887e-05, "loss": 0.7640036344528198, "step": 2728 }, { "epoch": 0.7980698932592484, "grad_norm": 1.5601806517528776, "learning_rate": 1.4052344078428513e-05, "loss": 0.7472168207168579, "step": 2729 }, { "epoch": 0.7983623336745138, "grad_norm": 1.6018546047693625, "learning_rate": 1.4047923653826347e-05, "loss": 0.6726990342140198, "step": 2730 }, { "epoch": 0.7986547740897793, "grad_norm": 1.3791137229331067, "learning_rate": 1.404350228304664e-05, "loss": 0.5949650406837463, "step": 2731 }, { "epoch": 0.7989472145050446, "grad_norm": 1.386756095528374, "learning_rate": 1.403907996712286e-05, "loss": 0.5578774213790894, "step": 2732 }, { "epoch": 0.79923965492031, "grad_norm": 1.5271585141569006, "learning_rate": 1.4034656707088692e-05, "loss": 0.6092333197593689, "step": 2733 }, { "epoch": 0.7995320953355753, "grad_norm": 1.3098390209876276, "learning_rate": 1.4030232503978053e-05, "loss": 0.5095718502998352, "step": 2734 }, { "epoch": 0.7998245357508408, "grad_norm": 1.3675399597044373, "learning_rate": 1.4025807358825072e-05, "loss": 0.5155727863311768, "step": 2735 }, { "epoch": 0.8001169761661061, "grad_norm": 1.3309663791332569, "learning_rate": 1.4021381272664094e-05, "loss": 0.5752589702606201, "step": 2736 }, { "epoch": 0.8004094165813715, "grad_norm": 1.3619611747950222, "learning_rate": 1.4016954246529697e-05, "loss": 0.6334787607192993, "step": 2737 }, { "epoch": 0.800701856996637, "grad_norm": 1.3830503239164076, "learning_rate": 1.4012526281456666e-05, "loss": 0.7406032085418701, "step": 2738 }, { "epoch": 0.8009942974119023, "grad_norm": 1.2904369174268238, "learning_rate": 1.4008097378480014e-05, "loss": 0.5805078744888306, "step": 2739 }, { "epoch": 0.8012867378271677, "grad_norm": 1.3584200788658642, "learning_rate": 1.4003667538634972e-05, "loss": 0.6849163770675659, "step": 2740 }, { "epoch": 0.8015791782424331, "grad_norm": 1.5354340760410032, "learning_rate": 1.3999236762956985e-05, "loss": 0.7707695960998535, "step": 2741 }, { "epoch": 0.8018716186576985, "grad_norm": 1.426293329050591, "learning_rate": 1.3994805052481715e-05, "loss": 0.6253059506416321, "step": 2742 }, { "epoch": 0.8021640590729638, "grad_norm": 1.274928204575108, "learning_rate": 1.3990372408245057e-05, "loss": 0.6450316905975342, "step": 2743 }, { "epoch": 0.8024564994882293, "grad_norm": 1.2867865996346037, "learning_rate": 1.398593883128311e-05, "loss": 0.672899603843689, "step": 2744 }, { "epoch": 0.8027489399034947, "grad_norm": 1.38176481949922, "learning_rate": 1.3981504322632198e-05, "loss": 0.6203787326812744, "step": 2745 }, { "epoch": 0.80304138031876, "grad_norm": 1.296034523853111, "learning_rate": 1.3977068883328854e-05, "loss": 0.541740894317627, "step": 2746 }, { "epoch": 0.8033338207340255, "grad_norm": 1.3608273440615848, "learning_rate": 1.3972632514409843e-05, "loss": 0.5566504001617432, "step": 2747 }, { "epoch": 0.8036262611492908, "grad_norm": 1.378445494532888, "learning_rate": 1.3968195216912135e-05, "loss": 0.6911404728889465, "step": 2748 }, { "epoch": 0.8039187015645562, "grad_norm": 1.3758218413869647, "learning_rate": 1.3963756991872921e-05, "loss": 0.6744735240936279, "step": 2749 }, { "epoch": 0.8042111419798216, "grad_norm": 1.3810636187989935, "learning_rate": 1.3959317840329613e-05, "loss": 0.6660502552986145, "step": 2750 }, { "epoch": 0.804503582395087, "grad_norm": 1.611467815082346, "learning_rate": 1.3954877763319832e-05, "loss": 0.607395589351654, "step": 2751 }, { "epoch": 0.8047960228103523, "grad_norm": 1.3065536354182021, "learning_rate": 1.395043676188142e-05, "loss": 0.53249192237854, "step": 2752 }, { "epoch": 0.8050884632256178, "grad_norm": 1.384670069600496, "learning_rate": 1.394599483705243e-05, "loss": 0.5728630423545837, "step": 2753 }, { "epoch": 0.8053809036408832, "grad_norm": 1.354298055615179, "learning_rate": 1.3941551989871142e-05, "loss": 0.6912537813186646, "step": 2754 }, { "epoch": 0.8056733440561485, "grad_norm": 1.2211163784496284, "learning_rate": 1.3937108221376041e-05, "loss": 0.6002523899078369, "step": 2755 }, { "epoch": 0.805965784471414, "grad_norm": 1.165855753943377, "learning_rate": 1.3932663532605832e-05, "loss": 0.6573797464370728, "step": 2756 }, { "epoch": 0.8062582248866793, "grad_norm": 1.2846173311931015, "learning_rate": 1.3928217924599433e-05, "loss": 0.6997278928756714, "step": 2757 }, { "epoch": 0.8065506653019447, "grad_norm": 1.3457721921363819, "learning_rate": 1.3923771398395978e-05, "loss": 0.565264105796814, "step": 2758 }, { "epoch": 0.8068431057172101, "grad_norm": 1.7064740069380804, "learning_rate": 1.3919323955034815e-05, "loss": 0.8065239191055298, "step": 2759 }, { "epoch": 0.8071355461324755, "grad_norm": 1.4850507802988735, "learning_rate": 1.3914875595555509e-05, "loss": 0.556678056716919, "step": 2760 }, { "epoch": 0.807427986547741, "grad_norm": 1.653442619870376, "learning_rate": 1.3910426320997834e-05, "loss": 0.5528635382652283, "step": 2761 }, { "epoch": 0.8077204269630063, "grad_norm": 1.4210714864438183, "learning_rate": 1.3905976132401785e-05, "loss": 0.6127038598060608, "step": 2762 }, { "epoch": 0.8080128673782717, "grad_norm": 1.4473812948635245, "learning_rate": 1.390152503080756e-05, "loss": 0.6311757564544678, "step": 2763 }, { "epoch": 0.808305307793537, "grad_norm": 1.256496005559394, "learning_rate": 1.389707301725558e-05, "loss": 0.669788122177124, "step": 2764 }, { "epoch": 0.8085977482088025, "grad_norm": 1.1602455830470428, "learning_rate": 1.3892620092786477e-05, "loss": 0.48408570885658264, "step": 2765 }, { "epoch": 0.8088901886240678, "grad_norm": 1.3816192110102654, "learning_rate": 1.3888166258441098e-05, "loss": 0.5648288726806641, "step": 2766 }, { "epoch": 0.8091826290393332, "grad_norm": 1.359222924847667, "learning_rate": 1.3883711515260497e-05, "loss": 0.5894806385040283, "step": 2767 }, { "epoch": 0.8094750694545986, "grad_norm": 1.609438084965147, "learning_rate": 1.3879255864285939e-05, "loss": 0.8325392603874207, "step": 2768 }, { "epoch": 0.809767509869864, "grad_norm": 1.3200888192290248, "learning_rate": 1.387479930655891e-05, "loss": 0.5282119512557983, "step": 2769 }, { "epoch": 0.8100599502851294, "grad_norm": 1.2020970963419326, "learning_rate": 1.3870341843121104e-05, "loss": 0.7565277218818665, "step": 2770 }, { "epoch": 0.8103523907003948, "grad_norm": 1.20769025145285, "learning_rate": 1.3865883475014424e-05, "loss": 0.5767146944999695, "step": 2771 }, { "epoch": 0.8106448311156602, "grad_norm": 1.3747646237948088, "learning_rate": 1.3861424203280987e-05, "loss": 0.5988898873329163, "step": 2772 }, { "epoch": 0.8109372715309255, "grad_norm": 1.2837797411261327, "learning_rate": 1.3856964028963119e-05, "loss": 0.5752500295639038, "step": 2773 }, { "epoch": 0.811229711946191, "grad_norm": 1.3281997353125305, "learning_rate": 1.385250295310336e-05, "loss": 0.6834297776222229, "step": 2774 }, { "epoch": 0.8115221523614563, "grad_norm": 1.376792748908409, "learning_rate": 1.3848040976744459e-05, "loss": 0.5667037963867188, "step": 2775 }, { "epoch": 0.8118145927767217, "grad_norm": 1.33236222276005, "learning_rate": 1.3843578100929375e-05, "loss": 0.5618781447410583, "step": 2776 }, { "epoch": 0.8121070331919872, "grad_norm": 1.4974631308124338, "learning_rate": 1.3839114326701281e-05, "loss": 0.538033664226532, "step": 2777 }, { "epoch": 0.8123994736072525, "grad_norm": 1.3236430994846111, "learning_rate": 1.3834649655103556e-05, "loss": 0.7218335270881653, "step": 2778 }, { "epoch": 0.812691914022518, "grad_norm": 1.3045533775783231, "learning_rate": 1.383018408717979e-05, "loss": 0.5979611873626709, "step": 2779 }, { "epoch": 0.8129843544377833, "grad_norm": 1.191818251767074, "learning_rate": 1.3825717623973775e-05, "loss": 0.4958215355873108, "step": 2780 }, { "epoch": 0.8132767948530487, "grad_norm": 1.4132643925978479, "learning_rate": 1.3821250266529531e-05, "loss": 0.6759654879570007, "step": 2781 }, { "epoch": 0.813569235268314, "grad_norm": 1.1873413404245543, "learning_rate": 1.3816782015891272e-05, "loss": 0.5499521493911743, "step": 2782 }, { "epoch": 0.8138616756835795, "grad_norm": 1.327517100573182, "learning_rate": 1.3812312873103425e-05, "loss": 0.5308753252029419, "step": 2783 }, { "epoch": 0.8141541160988449, "grad_norm": 1.4850132833469487, "learning_rate": 1.3807842839210617e-05, "loss": 0.585492730140686, "step": 2784 }, { "epoch": 0.8144465565141102, "grad_norm": 1.5985853231384999, "learning_rate": 1.3803371915257702e-05, "loss": 0.6598281860351562, "step": 2785 }, { "epoch": 0.8147389969293757, "grad_norm": 1.2500600856454092, "learning_rate": 1.3798900102289726e-05, "loss": 0.6819334030151367, "step": 2786 }, { "epoch": 0.815031437344641, "grad_norm": 2.1106639284366877, "learning_rate": 1.3794427401351946e-05, "loss": 0.6548545360565186, "step": 2787 }, { "epoch": 0.8153238777599064, "grad_norm": 1.4934248295829666, "learning_rate": 1.3789953813489834e-05, "loss": 0.7836263179779053, "step": 2788 }, { "epoch": 0.8156163181751718, "grad_norm": 1.3092153960785353, "learning_rate": 1.3785479339749062e-05, "loss": 0.6108324527740479, "step": 2789 }, { "epoch": 0.8159087585904372, "grad_norm": 1.4189973842835568, "learning_rate": 1.378100398117551e-05, "loss": 0.7079485058784485, "step": 2790 }, { "epoch": 0.8162011990057025, "grad_norm": 1.2593140459847156, "learning_rate": 1.3776527738815264e-05, "loss": 0.5935578346252441, "step": 2791 }, { "epoch": 0.816493639420968, "grad_norm": 1.159439153093783, "learning_rate": 1.3772050613714623e-05, "loss": 0.5559983253479004, "step": 2792 }, { "epoch": 0.8167860798362334, "grad_norm": 1.2282449471592758, "learning_rate": 1.3767572606920083e-05, "loss": 0.6230447292327881, "step": 2793 }, { "epoch": 0.8170785202514987, "grad_norm": 1.3750755360912204, "learning_rate": 1.3763093719478357e-05, "loss": 0.5672184824943542, "step": 2794 }, { "epoch": 0.8173709606667642, "grad_norm": 1.3345649111405589, "learning_rate": 1.3758613952436353e-05, "loss": 0.6933468580245972, "step": 2795 }, { "epoch": 0.8176634010820295, "grad_norm": 1.299919441217989, "learning_rate": 1.3754133306841188e-05, "loss": 0.5873827934265137, "step": 2796 }, { "epoch": 0.8179558414972949, "grad_norm": 1.3238138716227077, "learning_rate": 1.3749651783740188e-05, "loss": 0.6061393022537231, "step": 2797 }, { "epoch": 0.8182482819125603, "grad_norm": 1.3503137209197107, "learning_rate": 1.3745169384180886e-05, "loss": 0.6218947768211365, "step": 2798 }, { "epoch": 0.8185407223278257, "grad_norm": 1.584036085033884, "learning_rate": 1.3740686109211008e-05, "loss": 0.6092264652252197, "step": 2799 }, { "epoch": 0.8188331627430911, "grad_norm": 1.4327213465282531, "learning_rate": 1.3736201959878497e-05, "loss": 0.6145539283752441, "step": 2800 }, { "epoch": 0.8191256031583565, "grad_norm": 1.1433366189059146, "learning_rate": 1.3731716937231493e-05, "loss": 0.4637746214866638, "step": 2801 }, { "epoch": 0.8194180435736219, "grad_norm": 1.2802202387296946, "learning_rate": 1.3727231042318345e-05, "loss": 0.6102726459503174, "step": 2802 }, { "epoch": 0.8197104839888872, "grad_norm": 1.3432330324336637, "learning_rate": 1.3722744276187603e-05, "loss": 0.5885297060012817, "step": 2803 }, { "epoch": 0.8200029244041527, "grad_norm": 1.4575985112282515, "learning_rate": 1.3718256639888021e-05, "loss": 0.592369019985199, "step": 2804 }, { "epoch": 0.820295364819418, "grad_norm": 1.4943856663354038, "learning_rate": 1.3713768134468557e-05, "loss": 0.5194098949432373, "step": 2805 }, { "epoch": 0.8205878052346834, "grad_norm": 1.3716539173176907, "learning_rate": 1.370927876097837e-05, "loss": 0.6033506393432617, "step": 2806 }, { "epoch": 0.8208802456499488, "grad_norm": 1.686602588559283, "learning_rate": 1.3704788520466828e-05, "loss": 0.6866108179092407, "step": 2807 }, { "epoch": 0.8211726860652142, "grad_norm": 1.564205528186879, "learning_rate": 1.3700297413983492e-05, "loss": 0.7325261831283569, "step": 2808 }, { "epoch": 0.8214651264804796, "grad_norm": 1.531257665763453, "learning_rate": 1.3695805442578136e-05, "loss": 0.5422608852386475, "step": 2809 }, { "epoch": 0.821757566895745, "grad_norm": 1.5581516895112182, "learning_rate": 1.369131260730073e-05, "loss": 0.6124732494354248, "step": 2810 }, { "epoch": 0.8220500073110104, "grad_norm": 1.3009124551880797, "learning_rate": 1.3686818909201442e-05, "loss": 0.6097716093063354, "step": 2811 }, { "epoch": 0.8223424477262757, "grad_norm": 1.302794206877671, "learning_rate": 1.3682324349330652e-05, "loss": 0.6283478140830994, "step": 2812 }, { "epoch": 0.8226348881415412, "grad_norm": 1.6179042229288885, "learning_rate": 1.3677828928738934e-05, "loss": 0.6590027213096619, "step": 2813 }, { "epoch": 0.8229273285568065, "grad_norm": 1.5247617474384554, "learning_rate": 1.3673332648477065e-05, "loss": 0.6417049169540405, "step": 2814 }, { "epoch": 0.8232197689720719, "grad_norm": 1.510678230362789, "learning_rate": 1.3668835509596023e-05, "loss": 0.6217149496078491, "step": 2815 }, { "epoch": 0.8235122093873374, "grad_norm": 1.9022694632783144, "learning_rate": 1.3664337513146993e-05, "loss": 0.7530043125152588, "step": 2816 }, { "epoch": 0.8238046498026027, "grad_norm": 1.3235640761468095, "learning_rate": 1.3659838660181341e-05, "loss": 0.6690578460693359, "step": 2817 }, { "epoch": 0.8240970902178681, "grad_norm": 1.5311368229830338, "learning_rate": 1.3655338951750657e-05, "loss": 0.5348777174949646, "step": 2818 }, { "epoch": 0.8243895306331335, "grad_norm": 1.494896630136579, "learning_rate": 1.3650838388906718e-05, "loss": 0.7076361179351807, "step": 2819 }, { "epoch": 0.8246819710483989, "grad_norm": 1.611810759372966, "learning_rate": 1.3646336972701507e-05, "loss": 0.6649855375289917, "step": 2820 }, { "epoch": 0.8249744114636642, "grad_norm": 1.4188027146347701, "learning_rate": 1.3641834704187194e-05, "loss": 0.6484942436218262, "step": 2821 }, { "epoch": 0.8252668518789297, "grad_norm": 1.066364944063908, "learning_rate": 1.3637331584416163e-05, "loss": 0.5167717337608337, "step": 2822 }, { "epoch": 0.8255592922941951, "grad_norm": 1.4320675291883214, "learning_rate": 1.3632827614440988e-05, "loss": 0.7808440327644348, "step": 2823 }, { "epoch": 0.8258517327094604, "grad_norm": 1.6437853600585473, "learning_rate": 1.3628322795314449e-05, "loss": 0.551183819770813, "step": 2824 }, { "epoch": 0.8261441731247259, "grad_norm": 1.3439080199790612, "learning_rate": 1.3623817128089513e-05, "loss": 0.6084691286087036, "step": 2825 }, { "epoch": 0.8264366135399912, "grad_norm": 1.3974747336185755, "learning_rate": 1.3619310613819363e-05, "loss": 0.6251019239425659, "step": 2826 }, { "epoch": 0.8267290539552566, "grad_norm": 1.237260204163714, "learning_rate": 1.3614803253557358e-05, "loss": 0.5037761926651001, "step": 2827 }, { "epoch": 0.827021494370522, "grad_norm": 1.3461097726205675, "learning_rate": 1.3610295048357072e-05, "loss": 0.5606831312179565, "step": 2828 }, { "epoch": 0.8273139347857874, "grad_norm": 1.3850167464051482, "learning_rate": 1.360578599927227e-05, "loss": 0.6664785146713257, "step": 2829 }, { "epoch": 0.8276063752010527, "grad_norm": 1.3613746427457352, "learning_rate": 1.360127610735691e-05, "loss": 0.7105492353439331, "step": 2830 }, { "epoch": 0.8278988156163182, "grad_norm": 1.3577681820511107, "learning_rate": 1.3596765373665162e-05, "loss": 0.6255359053611755, "step": 2831 }, { "epoch": 0.8281912560315836, "grad_norm": 1.3150522794807806, "learning_rate": 1.3592253799251377e-05, "loss": 0.5422149300575256, "step": 2832 }, { "epoch": 0.8284836964468489, "grad_norm": 1.4383576380181533, "learning_rate": 1.3587741385170104e-05, "loss": 0.6044044494628906, "step": 2833 }, { "epoch": 0.8287761368621144, "grad_norm": 1.2478223452248756, "learning_rate": 1.3583228132476094e-05, "loss": 0.6256763935089111, "step": 2834 }, { "epoch": 0.8290685772773797, "grad_norm": 1.2507601544621354, "learning_rate": 1.3578714042224297e-05, "loss": 0.6759064793586731, "step": 2835 }, { "epoch": 0.8293610176926451, "grad_norm": 1.3610869198536528, "learning_rate": 1.3574199115469852e-05, "loss": 0.5819023251533508, "step": 2836 }, { "epoch": 0.8296534581079105, "grad_norm": 1.331505314238688, "learning_rate": 1.3569683353268098e-05, "loss": 0.5412642359733582, "step": 2837 }, { "epoch": 0.8299458985231759, "grad_norm": 1.0998151045906572, "learning_rate": 1.356516675667456e-05, "loss": 0.5129171013832092, "step": 2838 }, { "epoch": 0.8302383389384413, "grad_norm": 1.310393887156268, "learning_rate": 1.356064932674497e-05, "loss": 0.5165198445320129, "step": 2839 }, { "epoch": 0.8305307793537067, "grad_norm": 1.287643091691659, "learning_rate": 1.3556131064535249e-05, "loss": 0.6545724272727966, "step": 2840 }, { "epoch": 0.8308232197689721, "grad_norm": 1.2180901867245224, "learning_rate": 1.3551611971101513e-05, "loss": 0.5715968608856201, "step": 2841 }, { "epoch": 0.8311156601842374, "grad_norm": 1.1619522611517994, "learning_rate": 1.3547092047500074e-05, "loss": 0.7063779830932617, "step": 2842 }, { "epoch": 0.8314081005995029, "grad_norm": 1.2876429096537105, "learning_rate": 1.3542571294787437e-05, "loss": 0.6391212940216064, "step": 2843 }, { "epoch": 0.8317005410147682, "grad_norm": 1.3047489403917027, "learning_rate": 1.3538049714020298e-05, "loss": 0.7145380973815918, "step": 2844 }, { "epoch": 0.8319929814300336, "grad_norm": 1.4749234473747483, "learning_rate": 1.3533527306255547e-05, "loss": 0.7262213230133057, "step": 2845 }, { "epoch": 0.832285421845299, "grad_norm": 1.5661213009447377, "learning_rate": 1.3529004072550276e-05, "loss": 0.7621959447860718, "step": 2846 }, { "epoch": 0.8325778622605644, "grad_norm": 1.2349365167185542, "learning_rate": 1.3524480013961757e-05, "loss": 0.6372592449188232, "step": 2847 }, { "epoch": 0.8328703026758298, "grad_norm": 1.5746526285594844, "learning_rate": 1.3519955131547469e-05, "loss": 0.6223774552345276, "step": 2848 }, { "epoch": 0.8331627430910952, "grad_norm": 1.3246634087041118, "learning_rate": 1.3515429426365066e-05, "loss": 0.6500433683395386, "step": 2849 }, { "epoch": 0.8334551835063606, "grad_norm": 1.4424195637381385, "learning_rate": 1.3510902899472408e-05, "loss": 0.6136040687561035, "step": 2850 }, { "epoch": 0.8337476239216259, "grad_norm": 1.512738908953339, "learning_rate": 1.3506375551927546e-05, "loss": 0.5297173261642456, "step": 2851 }, { "epoch": 0.8340400643368914, "grad_norm": 1.4629352546381682, "learning_rate": 1.3501847384788718e-05, "loss": 0.6215870976448059, "step": 2852 }, { "epoch": 0.8343325047521567, "grad_norm": 1.3184866454725659, "learning_rate": 1.3497318399114354e-05, "loss": 0.5507583618164062, "step": 2853 }, { "epoch": 0.8346249451674221, "grad_norm": 1.6022185079697295, "learning_rate": 1.349278859596308e-05, "loss": 0.6348794102668762, "step": 2854 }, { "epoch": 0.8349173855826876, "grad_norm": 1.4038791520130975, "learning_rate": 1.3488257976393708e-05, "loss": 0.7009605765342712, "step": 2855 }, { "epoch": 0.8352098259979529, "grad_norm": 1.2288500000369813, "learning_rate": 1.3483726541465238e-05, "loss": 0.6268658638000488, "step": 2856 }, { "epoch": 0.8355022664132183, "grad_norm": 1.1391793971559063, "learning_rate": 1.3479194292236875e-05, "loss": 0.7187683582305908, "step": 2857 }, { "epoch": 0.8357947068284837, "grad_norm": 1.5724396660128028, "learning_rate": 1.3474661229768002e-05, "loss": 0.7016449570655823, "step": 2858 }, { "epoch": 0.8360871472437491, "grad_norm": 1.5882858400771258, "learning_rate": 1.347012735511819e-05, "loss": 0.5852428674697876, "step": 2859 }, { "epoch": 0.8363795876590144, "grad_norm": 1.4143289380031852, "learning_rate": 1.3465592669347207e-05, "loss": 0.6232450008392334, "step": 2860 }, { "epoch": 0.8366720280742799, "grad_norm": 1.3444277392597084, "learning_rate": 1.346105717351501e-05, "loss": 0.526097297668457, "step": 2861 }, { "epoch": 0.8369644684895453, "grad_norm": 1.5627282993073515, "learning_rate": 1.3456520868681741e-05, "loss": 0.6065535545349121, "step": 2862 }, { "epoch": 0.8372569089048106, "grad_norm": 1.3941305759607394, "learning_rate": 1.3451983755907736e-05, "loss": 0.5836296677589417, "step": 2863 }, { "epoch": 0.8375493493200761, "grad_norm": 1.336778139255592, "learning_rate": 1.3447445836253519e-05, "loss": 0.678827166557312, "step": 2864 }, { "epoch": 0.8378417897353414, "grad_norm": 1.3002974651392025, "learning_rate": 1.3442907110779794e-05, "loss": 0.5206096172332764, "step": 2865 }, { "epoch": 0.8381342301506068, "grad_norm": 1.3468789034772342, "learning_rate": 1.3438367580547468e-05, "loss": 0.6424980163574219, "step": 2866 }, { "epoch": 0.8384266705658722, "grad_norm": 1.1467777796306478, "learning_rate": 1.3433827246617624e-05, "loss": 0.6293484568595886, "step": 2867 }, { "epoch": 0.8387191109811376, "grad_norm": 1.2601562582063903, "learning_rate": 1.3429286110051539e-05, "loss": 0.5912167429924011, "step": 2868 }, { "epoch": 0.8390115513964029, "grad_norm": 1.5181261084157656, "learning_rate": 1.342474417191068e-05, "loss": 0.6571674346923828, "step": 2869 }, { "epoch": 0.8393039918116684, "grad_norm": 1.421037061270542, "learning_rate": 1.342020143325669e-05, "loss": 0.5519720911979675, "step": 2870 }, { "epoch": 0.8395964322269338, "grad_norm": 1.3997247827352193, "learning_rate": 1.341565789515141e-05, "loss": 0.6465001106262207, "step": 2871 }, { "epoch": 0.8398888726421991, "grad_norm": 1.398359818513133, "learning_rate": 1.3411113558656865e-05, "loss": 0.6022073030471802, "step": 2872 }, { "epoch": 0.8401813130574646, "grad_norm": 1.361775248337709, "learning_rate": 1.3406568424835264e-05, "loss": 0.610893726348877, "step": 2873 }, { "epoch": 0.8404737534727299, "grad_norm": 1.427563498701008, "learning_rate": 1.340202249474901e-05, "loss": 0.5296563506126404, "step": 2874 }, { "epoch": 0.8407661938879953, "grad_norm": 1.170906744718837, "learning_rate": 1.3397475769460679e-05, "loss": 0.6327008605003357, "step": 2875 }, { "epoch": 0.8410586343032607, "grad_norm": 1.3517928558744952, "learning_rate": 1.3392928250033045e-05, "loss": 0.6437617540359497, "step": 2876 }, { "epoch": 0.8413510747185261, "grad_norm": 1.3416431365752262, "learning_rate": 1.3388379937529063e-05, "loss": 0.5627291202545166, "step": 2877 }, { "epoch": 0.8416435151337915, "grad_norm": 1.3602688623647594, "learning_rate": 1.3383830833011871e-05, "loss": 0.5921163558959961, "step": 2878 }, { "epoch": 0.8419359555490569, "grad_norm": 1.2033937218328357, "learning_rate": 1.3379280937544797e-05, "loss": 0.5749082565307617, "step": 2879 }, { "epoch": 0.8422283959643223, "grad_norm": 1.462463173522237, "learning_rate": 1.3374730252191347e-05, "loss": 0.6294553279876709, "step": 2880 }, { "epoch": 0.8425208363795876, "grad_norm": 1.222130659730857, "learning_rate": 1.3370178778015223e-05, "loss": 0.5172078609466553, "step": 2881 }, { "epoch": 0.8428132767948531, "grad_norm": 1.3695607626504847, "learning_rate": 1.3365626516080301e-05, "loss": 0.44069811701774597, "step": 2882 }, { "epoch": 0.8431057172101184, "grad_norm": 1.31704500891114, "learning_rate": 1.336107346745064e-05, "loss": 0.72663813829422, "step": 2883 }, { "epoch": 0.8433981576253838, "grad_norm": 1.3488066557741722, "learning_rate": 1.3356519633190495e-05, "loss": 0.6562269926071167, "step": 2884 }, { "epoch": 0.8436905980406492, "grad_norm": 1.3994820366244107, "learning_rate": 1.3351965014364293e-05, "loss": 0.699925422668457, "step": 2885 }, { "epoch": 0.8439830384559146, "grad_norm": 1.3294441855934318, "learning_rate": 1.3347409612036651e-05, "loss": 0.5902425646781921, "step": 2886 }, { "epoch": 0.84427547887118, "grad_norm": 1.401705271294413, "learning_rate": 1.3342853427272362e-05, "loss": 0.613966703414917, "step": 2887 }, { "epoch": 0.8445679192864454, "grad_norm": 1.2630848315271062, "learning_rate": 1.333829646113641e-05, "loss": 0.5864139199256897, "step": 2888 }, { "epoch": 0.8448603597017108, "grad_norm": 1.5447722719058155, "learning_rate": 1.3333738714693958e-05, "loss": 0.5851572751998901, "step": 2889 }, { "epoch": 0.8451528001169761, "grad_norm": 1.4679598706703352, "learning_rate": 1.3329180189010348e-05, "loss": 0.6564328074455261, "step": 2890 }, { "epoch": 0.8454452405322416, "grad_norm": 1.3794930949186583, "learning_rate": 1.3324620885151115e-05, "loss": 0.6745615005493164, "step": 2891 }, { "epoch": 0.8457376809475069, "grad_norm": 1.277678612967463, "learning_rate": 1.3320060804181962e-05, "loss": 0.5003606081008911, "step": 2892 }, { "epoch": 0.8460301213627723, "grad_norm": 1.4995028165986726, "learning_rate": 1.3315499947168781e-05, "loss": 0.6646369695663452, "step": 2893 }, { "epoch": 0.8463225617780378, "grad_norm": 1.3696086888087433, "learning_rate": 1.3310938315177647e-05, "loss": 0.6903572082519531, "step": 2894 }, { "epoch": 0.8466150021933031, "grad_norm": 1.633835119151456, "learning_rate": 1.330637590927481e-05, "loss": 0.6221956610679626, "step": 2895 }, { "epoch": 0.8469074426085685, "grad_norm": 1.5369372818354106, "learning_rate": 1.3301812730526713e-05, "loss": 0.5602666139602661, "step": 2896 }, { "epoch": 0.8471998830238339, "grad_norm": 1.2910113915198014, "learning_rate": 1.3297248779999963e-05, "loss": 0.5843783617019653, "step": 2897 }, { "epoch": 0.8474923234390993, "grad_norm": 1.304495064263293, "learning_rate": 1.3292684058761357e-05, "loss": 0.5040254592895508, "step": 2898 }, { "epoch": 0.8477847638543646, "grad_norm": 1.4968280315795712, "learning_rate": 1.3288118567877874e-05, "loss": 0.6180210709571838, "step": 2899 }, { "epoch": 0.8480772042696301, "grad_norm": 1.345230482752467, "learning_rate": 1.3283552308416668e-05, "loss": 0.5050851106643677, "step": 2900 }, { "epoch": 0.8483696446848955, "grad_norm": 1.604217394640997, "learning_rate": 1.3278985281445072e-05, "loss": 0.6627126932144165, "step": 2901 }, { "epoch": 0.8486620851001608, "grad_norm": 1.40930260394039, "learning_rate": 1.3274417488030607e-05, "loss": 0.5984441041946411, "step": 2902 }, { "epoch": 0.8489545255154263, "grad_norm": 1.3584927833580034, "learning_rate": 1.3269848929240958e-05, "loss": 0.611599326133728, "step": 2903 }, { "epoch": 0.8492469659306916, "grad_norm": 1.4743229169395644, "learning_rate": 1.3265279606144006e-05, "loss": 0.6057847142219543, "step": 2904 }, { "epoch": 0.849539406345957, "grad_norm": 1.5324921987406994, "learning_rate": 1.3260709519807797e-05, "loss": 0.7123644948005676, "step": 2905 }, { "epoch": 0.8498318467612224, "grad_norm": 1.4337194400937256, "learning_rate": 1.3256138671300564e-05, "loss": 0.6193811893463135, "step": 2906 }, { "epoch": 0.8501242871764878, "grad_norm": 1.6102821646068017, "learning_rate": 1.3251567061690717e-05, "loss": 0.5775484442710876, "step": 2907 }, { "epoch": 0.8504167275917531, "grad_norm": 1.5171257755680165, "learning_rate": 1.3246994692046837e-05, "loss": 0.5655511617660522, "step": 2908 }, { "epoch": 0.8507091680070186, "grad_norm": 1.8299545213851978, "learning_rate": 1.3242421563437688e-05, "loss": 0.6216102838516235, "step": 2909 }, { "epoch": 0.851001608422284, "grad_norm": 1.4045274179517395, "learning_rate": 1.3237847676932217e-05, "loss": 0.649554967880249, "step": 2910 }, { "epoch": 0.8512940488375493, "grad_norm": 1.5965021256139, "learning_rate": 1.3233273033599534e-05, "loss": 0.6688281297683716, "step": 2911 }, { "epoch": 0.8515864892528148, "grad_norm": 1.2158705367599922, "learning_rate": 1.322869763450894e-05, "loss": 0.664188027381897, "step": 2912 }, { "epoch": 0.8518789296680801, "grad_norm": 1.27994094299147, "learning_rate": 1.3224121480729905e-05, "loss": 0.47189265489578247, "step": 2913 }, { "epoch": 0.8521713700833455, "grad_norm": 1.387813816085696, "learning_rate": 1.3219544573332075e-05, "loss": 0.6190480589866638, "step": 2914 }, { "epoch": 0.8524638104986109, "grad_norm": 1.3459335682790516, "learning_rate": 1.3214966913385277e-05, "loss": 0.6564091444015503, "step": 2915 }, { "epoch": 0.8527562509138763, "grad_norm": 1.563994961699158, "learning_rate": 1.321038850195951e-05, "loss": 0.6083766222000122, "step": 2916 }, { "epoch": 0.8530486913291417, "grad_norm": 1.2689051257322506, "learning_rate": 1.3205809340124951e-05, "loss": 0.5262473821640015, "step": 2917 }, { "epoch": 0.8533411317444071, "grad_norm": 1.3633671661320785, "learning_rate": 1.320122942895195e-05, "loss": 0.6170297861099243, "step": 2918 }, { "epoch": 0.8536335721596725, "grad_norm": 1.3838619263880951, "learning_rate": 1.3196648769511036e-05, "loss": 0.5791536569595337, "step": 2919 }, { "epoch": 0.8539260125749378, "grad_norm": 1.4116909766151964, "learning_rate": 1.3192067362872904e-05, "loss": 0.5870766639709473, "step": 2920 }, { "epoch": 0.8542184529902033, "grad_norm": 1.5317627298998806, "learning_rate": 1.3187485210108438e-05, "loss": 0.604548990726471, "step": 2921 }, { "epoch": 0.8545108934054686, "grad_norm": 1.3458362989469688, "learning_rate": 1.3182902312288682e-05, "loss": 0.5292568206787109, "step": 2922 }, { "epoch": 0.854803333820734, "grad_norm": 1.276264176970529, "learning_rate": 1.3178318670484862e-05, "loss": 0.5638582706451416, "step": 2923 }, { "epoch": 0.8550957742359994, "grad_norm": 1.5369089697533718, "learning_rate": 1.317373428576838e-05, "loss": 0.5730164051055908, "step": 2924 }, { "epoch": 0.8553882146512648, "grad_norm": 1.369500285153578, "learning_rate": 1.3169149159210803e-05, "loss": 0.6170799732208252, "step": 2925 }, { "epoch": 0.8556806550665302, "grad_norm": 1.3401436683949477, "learning_rate": 1.3164563291883879e-05, "loss": 0.591925323009491, "step": 2926 }, { "epoch": 0.8559730954817956, "grad_norm": 1.5178314176439451, "learning_rate": 1.3159976684859528e-05, "loss": 0.7269439697265625, "step": 2927 }, { "epoch": 0.856265535897061, "grad_norm": 1.552203527248451, "learning_rate": 1.3155389339209839e-05, "loss": 0.615471363067627, "step": 2928 }, { "epoch": 0.8565579763123263, "grad_norm": 1.4397776020126687, "learning_rate": 1.3150801256007076e-05, "loss": 0.6264692544937134, "step": 2929 }, { "epoch": 0.8568504167275918, "grad_norm": 1.203302342126932, "learning_rate": 1.314621243632368e-05, "loss": 0.5729779005050659, "step": 2930 }, { "epoch": 0.8571428571428571, "grad_norm": 1.3833464526102248, "learning_rate": 1.314162288123225e-05, "loss": 0.6462980508804321, "step": 2931 }, { "epoch": 0.8574352975581225, "grad_norm": 1.1795102455310789, "learning_rate": 1.3137032591805577e-05, "loss": 0.5493176579475403, "step": 2932 }, { "epoch": 0.857727737973388, "grad_norm": 1.1422942251299026, "learning_rate": 1.3132441569116608e-05, "loss": 0.49161234498023987, "step": 2933 }, { "epoch": 0.8580201783886533, "grad_norm": 1.428090020215004, "learning_rate": 1.312784981423847e-05, "loss": 0.6724506616592407, "step": 2934 }, { "epoch": 0.8583126188039187, "grad_norm": 1.6216709335890533, "learning_rate": 1.3123257328244455e-05, "loss": 0.6180965900421143, "step": 2935 }, { "epoch": 0.8586050592191841, "grad_norm": 1.6797724821518334, "learning_rate": 1.3118664112208027e-05, "loss": 0.6676491498947144, "step": 2936 }, { "epoch": 0.8588974996344495, "grad_norm": 1.1911121778916818, "learning_rate": 1.3114070167202827e-05, "loss": 0.5964041948318481, "step": 2937 }, { "epoch": 0.8591899400497148, "grad_norm": 1.3660050885815391, "learning_rate": 1.3109475494302657e-05, "loss": 0.708328366279602, "step": 2938 }, { "epoch": 0.8594823804649803, "grad_norm": 1.6146616988047677, "learning_rate": 1.3104880094581495e-05, "loss": 0.6360403299331665, "step": 2939 }, { "epoch": 0.8597748208802457, "grad_norm": 1.5628439078603966, "learning_rate": 1.3100283969113494e-05, "loss": 0.5450131893157959, "step": 2940 }, { "epoch": 0.860067261295511, "grad_norm": 1.2422442713506727, "learning_rate": 1.3095687118972962e-05, "loss": 0.4472329020500183, "step": 2941 }, { "epoch": 0.8603597017107765, "grad_norm": 1.2824654152788901, "learning_rate": 1.3091089545234387e-05, "loss": 0.6853972673416138, "step": 2942 }, { "epoch": 0.8606521421260418, "grad_norm": 1.5236765495118778, "learning_rate": 1.3086491248972429e-05, "loss": 0.6547979116439819, "step": 2943 }, { "epoch": 0.8609445825413072, "grad_norm": 1.2521364069886292, "learning_rate": 1.3081892231261903e-05, "loss": 0.46194693446159363, "step": 2944 }, { "epoch": 0.8612370229565726, "grad_norm": 1.3749685968664958, "learning_rate": 1.307729249317781e-05, "loss": 0.5715345144271851, "step": 2945 }, { "epoch": 0.861529463371838, "grad_norm": 1.2925136251134925, "learning_rate": 1.3072692035795305e-05, "loss": 0.5590982437133789, "step": 2946 }, { "epoch": 0.8618219037871033, "grad_norm": 1.4594997051230878, "learning_rate": 1.3068090860189719e-05, "loss": 0.5435009002685547, "step": 2947 }, { "epoch": 0.8621143442023688, "grad_norm": 1.1604259212434795, "learning_rate": 1.3063488967436548e-05, "loss": 0.4528965651988983, "step": 2948 }, { "epoch": 0.8624067846176342, "grad_norm": 1.1967844606343032, "learning_rate": 1.3058886358611457e-05, "loss": 0.5520291328430176, "step": 2949 }, { "epoch": 0.8626992250328995, "grad_norm": 1.3959982999797578, "learning_rate": 1.305428303479028e-05, "loss": 0.6444021463394165, "step": 2950 }, { "epoch": 0.862991665448165, "grad_norm": 1.597979452275331, "learning_rate": 1.3049678997049016e-05, "loss": 0.7808041572570801, "step": 2951 }, { "epoch": 0.8632841058634303, "grad_norm": 1.6855013913251111, "learning_rate": 1.3045074246463825e-05, "loss": 0.6297428607940674, "step": 2952 }, { "epoch": 0.8635765462786957, "grad_norm": 1.5203533995419023, "learning_rate": 1.3040468784111045e-05, "loss": 0.5776612162590027, "step": 2953 }, { "epoch": 0.8638689866939611, "grad_norm": 1.3696314111811954, "learning_rate": 1.3035862611067169e-05, "loss": 0.49298524856567383, "step": 2954 }, { "epoch": 0.8641614271092265, "grad_norm": 1.7023849342400221, "learning_rate": 1.303125572840887e-05, "loss": 0.8061650991439819, "step": 2955 }, { "epoch": 0.8644538675244919, "grad_norm": 1.4509821363343893, "learning_rate": 1.3026648137212976e-05, "loss": 0.7741662859916687, "step": 2956 }, { "epoch": 0.8647463079397573, "grad_norm": 1.350671993753925, "learning_rate": 1.302203983855648e-05, "loss": 0.5589889287948608, "step": 2957 }, { "epoch": 0.8650387483550227, "grad_norm": 1.400755532782556, "learning_rate": 1.3017430833516547e-05, "loss": 0.5801941752433777, "step": 2958 }, { "epoch": 0.865331188770288, "grad_norm": 1.3298019485580883, "learning_rate": 1.30128211231705e-05, "loss": 0.5874185562133789, "step": 2959 }, { "epoch": 0.8656236291855535, "grad_norm": 1.1737111706818832, "learning_rate": 1.3008210708595837e-05, "loss": 0.6062727570533752, "step": 2960 }, { "epoch": 0.8659160696008188, "grad_norm": 1.3334829952801492, "learning_rate": 1.3003599590870209e-05, "loss": 0.571448802947998, "step": 2961 }, { "epoch": 0.8662085100160842, "grad_norm": 1.3654619359177553, "learning_rate": 1.2998987771071442e-05, "loss": 0.7001944780349731, "step": 2962 }, { "epoch": 0.8665009504313496, "grad_norm": 1.48577297171421, "learning_rate": 1.2994375250277516e-05, "loss": 0.49182790517807007, "step": 2963 }, { "epoch": 0.866793390846615, "grad_norm": 1.342673325945858, "learning_rate": 1.298976202956658e-05, "loss": 0.5299041271209717, "step": 2964 }, { "epoch": 0.8670858312618804, "grad_norm": 1.1975267191215118, "learning_rate": 1.2985148110016947e-05, "loss": 0.4955265522003174, "step": 2965 }, { "epoch": 0.8673782716771458, "grad_norm": 1.132262479106049, "learning_rate": 1.2980533492707094e-05, "loss": 0.6395630836486816, "step": 2966 }, { "epoch": 0.8676707120924112, "grad_norm": 1.1303573523984183, "learning_rate": 1.2975918178715661e-05, "loss": 0.5926274061203003, "step": 2967 }, { "epoch": 0.8679631525076765, "grad_norm": 1.194805436445147, "learning_rate": 1.2971302169121447e-05, "loss": 0.5556914806365967, "step": 2968 }, { "epoch": 0.868255592922942, "grad_norm": 1.2766981949480176, "learning_rate": 1.2966685465003415e-05, "loss": 0.5347195863723755, "step": 2969 }, { "epoch": 0.8685480333382073, "grad_norm": 1.3728880032694415, "learning_rate": 1.2962068067440694e-05, "loss": 0.6839208006858826, "step": 2970 }, { "epoch": 0.8688404737534727, "grad_norm": 1.1132776608061867, "learning_rate": 1.295744997751257e-05, "loss": 0.5741337537765503, "step": 2971 }, { "epoch": 0.8691329141687382, "grad_norm": 1.536125480269087, "learning_rate": 1.29528311962985e-05, "loss": 0.7383404970169067, "step": 2972 }, { "epoch": 0.8694253545840035, "grad_norm": 1.4560088611056379, "learning_rate": 1.294821172487809e-05, "loss": 0.5075374245643616, "step": 2973 }, { "epoch": 0.8697177949992689, "grad_norm": 1.235849675897421, "learning_rate": 1.2943591564331113e-05, "loss": 0.557248592376709, "step": 2974 }, { "epoch": 0.8700102354145343, "grad_norm": 1.3655420768672006, "learning_rate": 1.2938970715737506e-05, "loss": 0.5687203407287598, "step": 2975 }, { "epoch": 0.8703026758297997, "grad_norm": 1.3479345698129241, "learning_rate": 1.2934349180177364e-05, "loss": 0.5946108102798462, "step": 2976 }, { "epoch": 0.870595116245065, "grad_norm": 1.258994257926457, "learning_rate": 1.2929726958730942e-05, "loss": 0.6103173494338989, "step": 2977 }, { "epoch": 0.8708875566603305, "grad_norm": 1.4914714674105345, "learning_rate": 1.2925104052478657e-05, "loss": 0.7007244825363159, "step": 2978 }, { "epoch": 0.8711799970755959, "grad_norm": 1.4140285074261345, "learning_rate": 1.2920480462501082e-05, "loss": 0.6157742142677307, "step": 2979 }, { "epoch": 0.8714724374908612, "grad_norm": 1.4708644175648395, "learning_rate": 1.2915856189878956e-05, "loss": 0.6501113176345825, "step": 2980 }, { "epoch": 0.8717648779061267, "grad_norm": 1.2555000815915451, "learning_rate": 1.2911231235693178e-05, "loss": 0.5084626078605652, "step": 2981 }, { "epoch": 0.872057318321392, "grad_norm": 1.343175395168551, "learning_rate": 1.2906605601024796e-05, "loss": 0.5953651666641235, "step": 2982 }, { "epoch": 0.8723497587366574, "grad_norm": 1.218776434986359, "learning_rate": 1.290197928695503e-05, "loss": 0.5733205676078796, "step": 2983 }, { "epoch": 0.8726421991519228, "grad_norm": 1.5420791901099857, "learning_rate": 1.2897352294565248e-05, "loss": 0.5976133942604065, "step": 2984 }, { "epoch": 0.8729346395671882, "grad_norm": 1.2904353456419873, "learning_rate": 1.2892724624936983e-05, "loss": 0.5092414617538452, "step": 2985 }, { "epoch": 0.8732270799824535, "grad_norm": 1.4935525581566107, "learning_rate": 1.2888096279151926e-05, "loss": 0.7244688272476196, "step": 2986 }, { "epoch": 0.873519520397719, "grad_norm": 1.5818576721862576, "learning_rate": 1.2883467258291922e-05, "loss": 0.6943881511688232, "step": 2987 }, { "epoch": 0.8738119608129844, "grad_norm": 1.422762914124539, "learning_rate": 1.287883756343898e-05, "loss": 0.6484338641166687, "step": 2988 }, { "epoch": 0.8741044012282497, "grad_norm": 1.23046146833686, "learning_rate": 1.2874207195675262e-05, "loss": 0.620865523815155, "step": 2989 }, { "epoch": 0.8743968416435152, "grad_norm": 1.245843663622743, "learning_rate": 1.2869576156083085e-05, "loss": 0.5290236473083496, "step": 2990 }, { "epoch": 0.8746892820587805, "grad_norm": 1.383695697280258, "learning_rate": 1.2864944445744932e-05, "loss": 0.7140257358551025, "step": 2991 }, { "epoch": 0.8749817224740459, "grad_norm": 1.3901579888827407, "learning_rate": 1.286031206574343e-05, "loss": 0.7167611122131348, "step": 2992 }, { "epoch": 0.8752741628893113, "grad_norm": 1.4097752029885913, "learning_rate": 1.2855679017161372e-05, "loss": 0.5631322860717773, "step": 2993 }, { "epoch": 0.8755666033045767, "grad_norm": 1.4535459078300315, "learning_rate": 1.2851045301081714e-05, "loss": 0.6250770092010498, "step": 2994 }, { "epoch": 0.8758590437198421, "grad_norm": 1.3041015408341177, "learning_rate": 1.2846410918587546e-05, "loss": 0.5121266841888428, "step": 2995 }, { "epoch": 0.8761514841351075, "grad_norm": 1.2982813372349626, "learning_rate": 1.2841775870762134e-05, "loss": 0.6075780987739563, "step": 2996 }, { "epoch": 0.8764439245503729, "grad_norm": 1.2610269556078437, "learning_rate": 1.283714015868889e-05, "loss": 0.516838014125824, "step": 2997 }, { "epoch": 0.8767363649656382, "grad_norm": 1.3997368275790003, "learning_rate": 1.2832503783451384e-05, "loss": 0.6952051520347595, "step": 2998 }, { "epoch": 0.8770288053809037, "grad_norm": 1.668277066498958, "learning_rate": 1.2827866746133342e-05, "loss": 0.8039685487747192, "step": 2999 }, { "epoch": 0.877321245796169, "grad_norm": 1.3275322129226486, "learning_rate": 1.2823229047818642e-05, "loss": 0.6200549602508545, "step": 3000 }, { "epoch": 0.8776136862114344, "grad_norm": 1.3153882408773916, "learning_rate": 1.2818590689591315e-05, "loss": 0.6666116714477539, "step": 3001 }, { "epoch": 0.8779061266266998, "grad_norm": 1.4255915593552042, "learning_rate": 1.2813951672535551e-05, "loss": 0.566741943359375, "step": 3002 }, { "epoch": 0.8781985670419652, "grad_norm": 1.2390037918473238, "learning_rate": 1.2809311997735697e-05, "loss": 0.6103402376174927, "step": 3003 }, { "epoch": 0.8784910074572306, "grad_norm": 1.1444583076116077, "learning_rate": 1.280467166627624e-05, "loss": 0.48296916484832764, "step": 3004 }, { "epoch": 0.878783447872496, "grad_norm": 1.4235586871910597, "learning_rate": 1.2800030679241834e-05, "loss": 0.5995723605155945, "step": 3005 }, { "epoch": 0.8790758882877614, "grad_norm": 1.5173093942193803, "learning_rate": 1.2795389037717286e-05, "loss": 0.6199642419815063, "step": 3006 }, { "epoch": 0.8793683287030267, "grad_norm": 1.5757356892284924, "learning_rate": 1.279074674278754e-05, "loss": 0.6740807294845581, "step": 3007 }, { "epoch": 0.8796607691182922, "grad_norm": 1.4923318097982954, "learning_rate": 1.2786103795537714e-05, "loss": 0.7330688238143921, "step": 3008 }, { "epoch": 0.8799532095335575, "grad_norm": 1.1357910142893406, "learning_rate": 1.2781460197053066e-05, "loss": 0.5048441290855408, "step": 3009 }, { "epoch": 0.8802456499488229, "grad_norm": 1.2484561154788956, "learning_rate": 1.277681594841901e-05, "loss": 0.6103702187538147, "step": 3010 }, { "epoch": 0.8805380903640884, "grad_norm": 1.3117487221252475, "learning_rate": 1.2772171050721107e-05, "loss": 0.5223366022109985, "step": 3011 }, { "epoch": 0.8808305307793537, "grad_norm": 1.5806437295259135, "learning_rate": 1.2767525505045078e-05, "loss": 0.708305835723877, "step": 3012 }, { "epoch": 0.8811229711946191, "grad_norm": 1.324207789268205, "learning_rate": 1.2762879312476785e-05, "loss": 0.6827911734580994, "step": 3013 }, { "epoch": 0.8814154116098845, "grad_norm": 1.7302207886555443, "learning_rate": 1.2758232474102254e-05, "loss": 0.6977027654647827, "step": 3014 }, { "epoch": 0.8817078520251499, "grad_norm": 1.235299173012923, "learning_rate": 1.2753584991007654e-05, "loss": 0.5534720420837402, "step": 3015 }, { "epoch": 0.8820002924404152, "grad_norm": 1.1722300923390174, "learning_rate": 1.2748936864279305e-05, "loss": 0.541682243347168, "step": 3016 }, { "epoch": 0.8822927328556807, "grad_norm": 1.4134630737456748, "learning_rate": 1.2744288095003674e-05, "loss": 0.6195456981658936, "step": 3017 }, { "epoch": 0.8825851732709461, "grad_norm": 1.1963339495389647, "learning_rate": 1.2739638684267387e-05, "loss": 0.5050234794616699, "step": 3018 }, { "epoch": 0.8828776136862114, "grad_norm": 1.1967088542641229, "learning_rate": 1.2734988633157218e-05, "loss": 0.5397066473960876, "step": 3019 }, { "epoch": 0.8831700541014769, "grad_norm": 1.3480056981854442, "learning_rate": 1.273033794276008e-05, "loss": 0.5932190418243408, "step": 3020 }, { "epoch": 0.8834624945167422, "grad_norm": 1.2383533139434324, "learning_rate": 1.2725686614163055e-05, "loss": 0.5780059099197388, "step": 3021 }, { "epoch": 0.8837549349320076, "grad_norm": 1.4379159594856536, "learning_rate": 1.2721034648453353e-05, "loss": 0.5850226879119873, "step": 3022 }, { "epoch": 0.884047375347273, "grad_norm": 1.351057706249645, "learning_rate": 1.2716382046718346e-05, "loss": 0.6684393882751465, "step": 3023 }, { "epoch": 0.8843398157625384, "grad_norm": 1.3578422906902012, "learning_rate": 1.271172881004555e-05, "loss": 0.6045842170715332, "step": 3024 }, { "epoch": 0.8846322561778037, "grad_norm": 1.4246831207517041, "learning_rate": 1.2707074939522633e-05, "loss": 0.6769551038742065, "step": 3025 }, { "epoch": 0.8849246965930692, "grad_norm": 1.541147063192512, "learning_rate": 1.2702420436237408e-05, "loss": 0.5581091642379761, "step": 3026 }, { "epoch": 0.8852171370083346, "grad_norm": 1.381695049653859, "learning_rate": 1.269776530127784e-05, "loss": 0.5010186433792114, "step": 3027 }, { "epoch": 0.8855095774235999, "grad_norm": 1.3620137613749654, "learning_rate": 1.2693109535732034e-05, "loss": 0.4537884294986725, "step": 3028 }, { "epoch": 0.8858020178388654, "grad_norm": 1.155156838639785, "learning_rate": 1.2688453140688246e-05, "loss": 0.5920443534851074, "step": 3029 }, { "epoch": 0.8860944582541307, "grad_norm": 1.4193738144287875, "learning_rate": 1.2683796117234884e-05, "loss": 0.564072847366333, "step": 3030 }, { "epoch": 0.8863868986693961, "grad_norm": 1.3206014730711304, "learning_rate": 1.26791384664605e-05, "loss": 0.657585620880127, "step": 3031 }, { "epoch": 0.8866793390846615, "grad_norm": 1.4850669504718117, "learning_rate": 1.2674480189453786e-05, "loss": 0.6864298582077026, "step": 3032 }, { "epoch": 0.8869717794999269, "grad_norm": 1.4143994971740543, "learning_rate": 1.266982128730359e-05, "loss": 0.6416069865226746, "step": 3033 }, { "epoch": 0.8872642199151923, "grad_norm": 1.3298070008922416, "learning_rate": 1.2665161761098899e-05, "loss": 0.6405118703842163, "step": 3034 }, { "epoch": 0.8875566603304577, "grad_norm": 1.4036133965159712, "learning_rate": 1.266050161192885e-05, "loss": 0.649673342704773, "step": 3035 }, { "epoch": 0.8878491007457231, "grad_norm": 1.359043965576467, "learning_rate": 1.2655840840882729e-05, "loss": 0.5914620161056519, "step": 3036 }, { "epoch": 0.8881415411609884, "grad_norm": 1.5837746169822255, "learning_rate": 1.2651179449049958e-05, "loss": 0.6080621480941772, "step": 3037 }, { "epoch": 0.8884339815762539, "grad_norm": 1.5302588008128089, "learning_rate": 1.264651743752011e-05, "loss": 0.657015860080719, "step": 3038 }, { "epoch": 0.8887264219915192, "grad_norm": 1.3603604072518423, "learning_rate": 1.26418548073829e-05, "loss": 0.5384848713874817, "step": 3039 }, { "epoch": 0.8890188624067846, "grad_norm": 1.5457096573294893, "learning_rate": 1.2637191559728195e-05, "loss": 0.7452554106712341, "step": 3040 }, { "epoch": 0.88931130282205, "grad_norm": 1.4411555623785637, "learning_rate": 1.2632527695645993e-05, "loss": 0.743236780166626, "step": 3041 }, { "epoch": 0.8896037432373154, "grad_norm": 1.5417347407679962, "learning_rate": 1.2627863216226453e-05, "loss": 0.557692289352417, "step": 3042 }, { "epoch": 0.8898961836525808, "grad_norm": 1.3302198914823486, "learning_rate": 1.2623198122559863e-05, "loss": 0.5637259483337402, "step": 3043 }, { "epoch": 0.8901886240678462, "grad_norm": 1.4403910054587767, "learning_rate": 1.261853241573666e-05, "loss": 0.5217350721359253, "step": 3044 }, { "epoch": 0.8904810644831116, "grad_norm": 1.4659582389098327, "learning_rate": 1.2613866096847423e-05, "loss": 0.5971624255180359, "step": 3045 }, { "epoch": 0.8907735048983769, "grad_norm": 1.5641010174504344, "learning_rate": 1.260919916698288e-05, "loss": 0.6586427092552185, "step": 3046 }, { "epoch": 0.8910659453136424, "grad_norm": 1.8045032510726307, "learning_rate": 1.2604531627233895e-05, "loss": 0.7059915661811829, "step": 3047 }, { "epoch": 0.8913583857289077, "grad_norm": 1.3406441666811264, "learning_rate": 1.2599863478691483e-05, "loss": 0.582252025604248, "step": 3048 }, { "epoch": 0.8916508261441731, "grad_norm": 1.2760858553291834, "learning_rate": 1.2595194722446786e-05, "loss": 0.6901981830596924, "step": 3049 }, { "epoch": 0.8919432665594386, "grad_norm": 1.5789638647855007, "learning_rate": 1.2590525359591101e-05, "loss": 0.7462388873100281, "step": 3050 }, { "epoch": 0.8922357069747039, "grad_norm": 1.1893369289763132, "learning_rate": 1.2585855391215866e-05, "loss": 0.4963245391845703, "step": 3051 }, { "epoch": 0.8925281473899693, "grad_norm": 1.427293357699651, "learning_rate": 1.2581184818412655e-05, "loss": 0.6408337354660034, "step": 3052 }, { "epoch": 0.8928205878052347, "grad_norm": 1.3357664905418998, "learning_rate": 1.257651364227319e-05, "loss": 0.44528326392173767, "step": 3053 }, { "epoch": 0.8931130282205001, "grad_norm": 1.4527206031665332, "learning_rate": 1.2571841863889322e-05, "loss": 0.4595017731189728, "step": 3054 }, { "epoch": 0.8934054686357654, "grad_norm": 1.435143014894245, "learning_rate": 1.2567169484353057e-05, "loss": 0.6934910416603088, "step": 3055 }, { "epoch": 0.8936979090510309, "grad_norm": 1.3543177360296097, "learning_rate": 1.2562496504756535e-05, "loss": 0.6392845511436462, "step": 3056 }, { "epoch": 0.8939903494662963, "grad_norm": 1.3638361282130094, "learning_rate": 1.255782292619203e-05, "loss": 0.5506458878517151, "step": 3057 }, { "epoch": 0.8942827898815616, "grad_norm": 1.3861859212756857, "learning_rate": 1.255314874975197e-05, "loss": 0.5871223211288452, "step": 3058 }, { "epoch": 0.8945752302968271, "grad_norm": 1.4446737131271559, "learning_rate": 1.254847397652892e-05, "loss": 0.603033185005188, "step": 3059 }, { "epoch": 0.8948676707120924, "grad_norm": 1.4764688506929942, "learning_rate": 1.2543798607615566e-05, "loss": 0.667452335357666, "step": 3060 }, { "epoch": 0.8951601111273578, "grad_norm": 1.5052245195755742, "learning_rate": 1.2539122644104755e-05, "loss": 0.6264449954032898, "step": 3061 }, { "epoch": 0.8954525515426232, "grad_norm": 1.2694525054193362, "learning_rate": 1.2534446087089465e-05, "loss": 0.6085609793663025, "step": 3062 }, { "epoch": 0.8957449919578886, "grad_norm": 1.5027824768205942, "learning_rate": 1.252976893766281e-05, "loss": 0.6414828896522522, "step": 3063 }, { "epoch": 0.8960374323731539, "grad_norm": 1.5067492390612103, "learning_rate": 1.2525091196918049e-05, "loss": 0.714614987373352, "step": 3064 }, { "epoch": 0.8963298727884194, "grad_norm": 1.4473594871396505, "learning_rate": 1.2520412865948574e-05, "loss": 0.5966176986694336, "step": 3065 }, { "epoch": 0.8966223132036848, "grad_norm": 1.234582474772498, "learning_rate": 1.2515733945847914e-05, "loss": 0.5162957906723022, "step": 3066 }, { "epoch": 0.8969147536189501, "grad_norm": 1.5378382727824902, "learning_rate": 1.2511054437709743e-05, "loss": 0.6460821628570557, "step": 3067 }, { "epoch": 0.8972071940342156, "grad_norm": 1.3526579806372556, "learning_rate": 1.2506374342627861e-05, "loss": 0.6802507638931274, "step": 3068 }, { "epoch": 0.8974996344494809, "grad_norm": 1.4306769896677902, "learning_rate": 1.2501693661696218e-05, "loss": 0.5966957807540894, "step": 3069 }, { "epoch": 0.8977920748647463, "grad_norm": 1.336293797847081, "learning_rate": 1.2497012396008893e-05, "loss": 0.607227087020874, "step": 3070 }, { "epoch": 0.8980845152800117, "grad_norm": 1.360686606627987, "learning_rate": 1.2492330546660098e-05, "loss": 0.6544637084007263, "step": 3071 }, { "epoch": 0.8983769556952771, "grad_norm": 1.410133865972111, "learning_rate": 1.2487648114744196e-05, "loss": 0.5896593332290649, "step": 3072 }, { "epoch": 0.8986693961105425, "grad_norm": 1.296908458370691, "learning_rate": 1.248296510135567e-05, "loss": 0.5710231065750122, "step": 3073 }, { "epoch": 0.8989618365258079, "grad_norm": 1.2057046094411794, "learning_rate": 1.2478281507589147e-05, "loss": 0.5918926000595093, "step": 3074 }, { "epoch": 0.8992542769410733, "grad_norm": 1.5306817529094334, "learning_rate": 1.2473597334539392e-05, "loss": 0.681663453578949, "step": 3075 }, { "epoch": 0.8995467173563386, "grad_norm": 1.2671727964507529, "learning_rate": 1.24689125833013e-05, "loss": 0.5229436159133911, "step": 3076 }, { "epoch": 0.8998391577716041, "grad_norm": 1.5769374861363958, "learning_rate": 1.2464227254969903e-05, "loss": 0.7165119051933289, "step": 3077 }, { "epoch": 0.9001315981868694, "grad_norm": 1.2324966791017462, "learning_rate": 1.2459541350640368e-05, "loss": 0.514594554901123, "step": 3078 }, { "epoch": 0.9004240386021348, "grad_norm": 1.4144268048636097, "learning_rate": 1.2454854871407993e-05, "loss": 0.6173784732818604, "step": 3079 }, { "epoch": 0.9007164790174002, "grad_norm": 1.6555744107314199, "learning_rate": 1.245016781836822e-05, "loss": 0.6796407103538513, "step": 3080 }, { "epoch": 0.9010089194326656, "grad_norm": 1.3666754181554102, "learning_rate": 1.2445480192616619e-05, "loss": 0.6901683807373047, "step": 3081 }, { "epoch": 0.901301359847931, "grad_norm": 1.295839204252469, "learning_rate": 1.2440791995248886e-05, "loss": 0.6215920448303223, "step": 3082 }, { "epoch": 0.9015938002631964, "grad_norm": 1.29381925555321, "learning_rate": 1.243610322736087e-05, "loss": 0.6109690070152283, "step": 3083 }, { "epoch": 0.9018862406784618, "grad_norm": 1.3751453546430485, "learning_rate": 1.2431413890048534e-05, "loss": 0.5273362398147583, "step": 3084 }, { "epoch": 0.9021786810937271, "grad_norm": 1.197511083408015, "learning_rate": 1.2426723984407982e-05, "loss": 0.5219408273696899, "step": 3085 }, { "epoch": 0.9024711215089926, "grad_norm": 1.4389803986869047, "learning_rate": 1.2422033511535458e-05, "loss": 0.6894690990447998, "step": 3086 }, { "epoch": 0.9027635619242579, "grad_norm": 1.2949596320128054, "learning_rate": 1.2417342472527325e-05, "loss": 0.6135656833648682, "step": 3087 }, { "epoch": 0.9030560023395233, "grad_norm": 1.4997841327771624, "learning_rate": 1.2412650868480088e-05, "loss": 0.595108151435852, "step": 3088 }, { "epoch": 0.9033484427547888, "grad_norm": 1.4068106482758378, "learning_rate": 1.2407958700490376e-05, "loss": 0.6445261240005493, "step": 3089 }, { "epoch": 0.9036408831700541, "grad_norm": 1.1391728287440939, "learning_rate": 1.240326596965496e-05, "loss": 0.5601890087127686, "step": 3090 }, { "epoch": 0.9039333235853195, "grad_norm": 1.4556896662499954, "learning_rate": 1.239857267707074e-05, "loss": 0.6229134798049927, "step": 3091 }, { "epoch": 0.9042257640005849, "grad_norm": 1.3633245090329542, "learning_rate": 1.2393878823834737e-05, "loss": 0.5769803524017334, "step": 3092 }, { "epoch": 0.9045182044158503, "grad_norm": 1.5373386649577192, "learning_rate": 1.2389184411044113e-05, "loss": 0.8101233243942261, "step": 3093 }, { "epoch": 0.9048106448311156, "grad_norm": 1.3507156228218853, "learning_rate": 1.2384489439796159e-05, "loss": 0.5562945604324341, "step": 3094 }, { "epoch": 0.9051030852463811, "grad_norm": 1.6942487879562902, "learning_rate": 1.2379793911188299e-05, "loss": 0.5764975547790527, "step": 3095 }, { "epoch": 0.9053955256616465, "grad_norm": 1.4280019855873591, "learning_rate": 1.2375097826318079e-05, "loss": 0.5951659083366394, "step": 3096 }, { "epoch": 0.9056879660769118, "grad_norm": 1.3804272066554735, "learning_rate": 1.2370401186283186e-05, "loss": 0.5550940632820129, "step": 3097 }, { "epoch": 0.9059804064921773, "grad_norm": 1.5012418323017303, "learning_rate": 1.2365703992181425e-05, "loss": 0.5423737168312073, "step": 3098 }, { "epoch": 0.9062728469074426, "grad_norm": 1.3277873552974655, "learning_rate": 1.236100624511074e-05, "loss": 0.633366048336029, "step": 3099 }, { "epoch": 0.906565287322708, "grad_norm": 1.2113954677804317, "learning_rate": 1.2356307946169202e-05, "loss": 0.6067361831665039, "step": 3100 }, { "epoch": 0.9068577277379734, "grad_norm": 1.1800329005672614, "learning_rate": 1.2351609096455006e-05, "loss": 0.6039519309997559, "step": 3101 }, { "epoch": 0.9071501681532388, "grad_norm": 1.3373962705942997, "learning_rate": 1.2346909697066486e-05, "loss": 0.5643757581710815, "step": 3102 }, { "epoch": 0.9074426085685041, "grad_norm": 1.4963223668806274, "learning_rate": 1.2342209749102088e-05, "loss": 0.5406394004821777, "step": 3103 }, { "epoch": 0.9077350489837696, "grad_norm": 1.3377232980381308, "learning_rate": 1.2337509253660404e-05, "loss": 0.5845915079116821, "step": 3104 }, { "epoch": 0.908027489399035, "grad_norm": 1.614536146442758, "learning_rate": 1.2332808211840147e-05, "loss": 0.6912981271743774, "step": 3105 }, { "epoch": 0.9083199298143003, "grad_norm": 1.2433178855630291, "learning_rate": 1.2328106624740151e-05, "loss": 0.5571672320365906, "step": 3106 }, { "epoch": 0.9086123702295658, "grad_norm": 1.0866011599268561, "learning_rate": 1.2323404493459386e-05, "loss": 0.5219087600708008, "step": 3107 }, { "epoch": 0.9089048106448311, "grad_norm": 1.3110052749572634, "learning_rate": 1.2318701819096952e-05, "loss": 0.5780971050262451, "step": 3108 }, { "epoch": 0.9091972510600965, "grad_norm": 1.4311943893173962, "learning_rate": 1.2313998602752063e-05, "loss": 0.6206589937210083, "step": 3109 }, { "epoch": 0.9094896914753618, "grad_norm": 1.4768884476442792, "learning_rate": 1.2309294845524068e-05, "loss": 0.6063584089279175, "step": 3110 }, { "epoch": 0.9097821318906273, "grad_norm": 1.7547035202334638, "learning_rate": 1.2304590548512445e-05, "loss": 0.5733555555343628, "step": 3111 }, { "epoch": 0.9100745723058927, "grad_norm": 1.0786362412869268, "learning_rate": 1.2299885712816792e-05, "loss": 0.5227848887443542, "step": 3112 }, { "epoch": 0.910367012721158, "grad_norm": 1.3268713618037162, "learning_rate": 1.2295180339536839e-05, "loss": 0.6357969045639038, "step": 3113 }, { "epoch": 0.9106594531364235, "grad_norm": 1.4243975329678797, "learning_rate": 1.2290474429772438e-05, "loss": 0.6194056272506714, "step": 3114 }, { "epoch": 0.9109518935516888, "grad_norm": 1.3151715542581663, "learning_rate": 1.2285767984623563e-05, "loss": 0.5274733304977417, "step": 3115 }, { "epoch": 0.9112443339669543, "grad_norm": 1.370068266036648, "learning_rate": 1.228106100519032e-05, "loss": 0.5612698197364807, "step": 3116 }, { "epoch": 0.9115367743822196, "grad_norm": 1.578530779654035, "learning_rate": 1.2276353492572937e-05, "loss": 0.6261074542999268, "step": 3117 }, { "epoch": 0.911829214797485, "grad_norm": 1.2011662273206838, "learning_rate": 1.2271645447871764e-05, "loss": 0.6407681703567505, "step": 3118 }, { "epoch": 0.9121216552127503, "grad_norm": 1.869370443317622, "learning_rate": 1.226693687218728e-05, "loss": 0.7862328290939331, "step": 3119 }, { "epoch": 0.9124140956280158, "grad_norm": 1.4175623746202768, "learning_rate": 1.2262227766620083e-05, "loss": 0.5079205632209778, "step": 3120 }, { "epoch": 0.9127065360432812, "grad_norm": 1.5666620241066453, "learning_rate": 1.2257518132270903e-05, "loss": 0.6074210405349731, "step": 3121 }, { "epoch": 0.9129989764585466, "grad_norm": 1.5222891825114737, "learning_rate": 1.2252807970240582e-05, "loss": 0.642460823059082, "step": 3122 }, { "epoch": 0.913291416873812, "grad_norm": 1.5105961127505823, "learning_rate": 1.22480972816301e-05, "loss": 0.5996612310409546, "step": 3123 }, { "epoch": 0.9135838572890773, "grad_norm": 1.4191755584361432, "learning_rate": 1.2243386067540548e-05, "loss": 0.5629523992538452, "step": 3124 }, { "epoch": 0.9138762977043428, "grad_norm": 1.488297008451051, "learning_rate": 1.223867432907314e-05, "loss": 0.5794960260391235, "step": 3125 }, { "epoch": 0.9141687381196081, "grad_norm": 1.4839380471480481, "learning_rate": 1.2233962067329217e-05, "loss": 0.6665213108062744, "step": 3126 }, { "epoch": 0.9144611785348735, "grad_norm": 1.7069185609011637, "learning_rate": 1.2229249283410245e-05, "loss": 0.6834249496459961, "step": 3127 }, { "epoch": 0.914753618950139, "grad_norm": 1.472483487554638, "learning_rate": 1.2224535978417809e-05, "loss": 0.5709845423698425, "step": 3128 }, { "epoch": 0.9150460593654043, "grad_norm": 1.3783113695609808, "learning_rate": 1.2219822153453613e-05, "loss": 0.5455344915390015, "step": 3129 }, { "epoch": 0.9153384997806697, "grad_norm": 1.5138708664001599, "learning_rate": 1.2215107809619483e-05, "loss": 0.6291406154632568, "step": 3130 }, { "epoch": 0.915630940195935, "grad_norm": 1.340686035335307, "learning_rate": 1.2210392948017371e-05, "loss": 0.5953069925308228, "step": 3131 }, { "epoch": 0.9159233806112005, "grad_norm": 1.3390197673162056, "learning_rate": 1.2205677569749347e-05, "loss": 0.6958901882171631, "step": 3132 }, { "epoch": 0.9162158210264658, "grad_norm": 2.251590691230911, "learning_rate": 1.2200961675917605e-05, "loss": 0.5867033004760742, "step": 3133 }, { "epoch": 0.9165082614417313, "grad_norm": 1.2167957981489814, "learning_rate": 1.2196245267624449e-05, "loss": 0.5364042520523071, "step": 3134 }, { "epoch": 0.9168007018569967, "grad_norm": 1.0997310314063415, "learning_rate": 1.2191528345972318e-05, "loss": 0.5141438841819763, "step": 3135 }, { "epoch": 0.917093142272262, "grad_norm": 1.1435709173541644, "learning_rate": 1.218681091206376e-05, "loss": 0.5024605393409729, "step": 3136 }, { "epoch": 0.9173855826875275, "grad_norm": 1.4583614763595478, "learning_rate": 1.2182092967001447e-05, "loss": 0.567114531993866, "step": 3137 }, { "epoch": 0.9176780231027928, "grad_norm": 1.4993671644221835, "learning_rate": 1.217737451188817e-05, "loss": 0.7224113941192627, "step": 3138 }, { "epoch": 0.9179704635180582, "grad_norm": 1.368376715547139, "learning_rate": 1.2172655547826839e-05, "loss": 0.6033936738967896, "step": 3139 }, { "epoch": 0.9182629039333235, "grad_norm": 1.4327847369216065, "learning_rate": 1.2167936075920486e-05, "loss": 0.5555745363235474, "step": 3140 }, { "epoch": 0.918555344348589, "grad_norm": 1.1757378939927343, "learning_rate": 1.2163216097272255e-05, "loss": 0.5939170718193054, "step": 3141 }, { "epoch": 0.9188477847638543, "grad_norm": 1.49535441688526, "learning_rate": 1.2158495612985415e-05, "loss": 0.7141895294189453, "step": 3142 }, { "epoch": 0.9191402251791198, "grad_norm": 1.5558405168210478, "learning_rate": 1.2153774624163345e-05, "loss": 0.585646390914917, "step": 3143 }, { "epoch": 0.9194326655943852, "grad_norm": 1.114182805953909, "learning_rate": 1.2149053131909556e-05, "loss": 0.5378825068473816, "step": 3144 }, { "epoch": 0.9197251060096505, "grad_norm": 1.383902731385194, "learning_rate": 1.2144331137327663e-05, "loss": 0.569821834564209, "step": 3145 }, { "epoch": 0.920017546424916, "grad_norm": 1.6457891792908532, "learning_rate": 1.2139608641521406e-05, "loss": 0.6101462244987488, "step": 3146 }, { "epoch": 0.9203099868401813, "grad_norm": 1.2016357640033675, "learning_rate": 1.2134885645594637e-05, "loss": 0.5481746792793274, "step": 3147 }, { "epoch": 0.9206024272554467, "grad_norm": 1.538402380383642, "learning_rate": 1.2130162150651326e-05, "loss": 0.7075197696685791, "step": 3148 }, { "epoch": 0.920894867670712, "grad_norm": 1.7217246005422928, "learning_rate": 1.2125438157795567e-05, "loss": 0.6375464200973511, "step": 3149 }, { "epoch": 0.9211873080859775, "grad_norm": 1.3850395600859229, "learning_rate": 1.2120713668131558e-05, "loss": 0.6954327821731567, "step": 3150 }, { "epoch": 0.9214797485012429, "grad_norm": 1.3658544095341296, "learning_rate": 1.2115988682763626e-05, "loss": 0.5855636596679688, "step": 3151 }, { "epoch": 0.9217721889165083, "grad_norm": 1.4751760026778278, "learning_rate": 1.2111263202796206e-05, "loss": 0.6056143641471863, "step": 3152 }, { "epoch": 0.9220646293317737, "grad_norm": 1.551741495670365, "learning_rate": 1.2106537229333848e-05, "loss": 0.7918239831924438, "step": 3153 }, { "epoch": 0.922357069747039, "grad_norm": 1.7033588700340108, "learning_rate": 1.2101810763481218e-05, "loss": 0.7772212028503418, "step": 3154 }, { "epoch": 0.9226495101623045, "grad_norm": 1.511966147005096, "learning_rate": 1.2097083806343104e-05, "loss": 0.6332443356513977, "step": 3155 }, { "epoch": 0.9229419505775698, "grad_norm": 1.358434184305942, "learning_rate": 1.2092356359024399e-05, "loss": 0.6254568099975586, "step": 3156 }, { "epoch": 0.9232343909928352, "grad_norm": 1.5630990314712985, "learning_rate": 1.208762842263012e-05, "loss": 0.6178697347640991, "step": 3157 }, { "epoch": 0.9235268314081005, "grad_norm": 1.1998616171531247, "learning_rate": 1.2082899998265387e-05, "loss": 0.5049355030059814, "step": 3158 }, { "epoch": 0.923819271823366, "grad_norm": 1.4513160919924062, "learning_rate": 1.2078171087035444e-05, "loss": 0.7013234496116638, "step": 3159 }, { "epoch": 0.9241117122386314, "grad_norm": 1.4119575222677514, "learning_rate": 1.2073441690045647e-05, "loss": 0.576643705368042, "step": 3160 }, { "epoch": 0.9244041526538967, "grad_norm": 1.2307321356514476, "learning_rate": 1.2068711808401459e-05, "loss": 0.5163617134094238, "step": 3161 }, { "epoch": 0.9246965930691622, "grad_norm": 1.39625806011197, "learning_rate": 1.2063981443208466e-05, "loss": 0.571370005607605, "step": 3162 }, { "epoch": 0.9249890334844275, "grad_norm": 1.3814954844513003, "learning_rate": 1.2059250595572358e-05, "loss": 0.7424927949905396, "step": 3163 }, { "epoch": 0.925281473899693, "grad_norm": 1.398481393831642, "learning_rate": 1.2054519266598946e-05, "loss": 0.6661131381988525, "step": 3164 }, { "epoch": 0.9255739143149583, "grad_norm": 1.382448951979987, "learning_rate": 1.2049787457394145e-05, "loss": 0.6416351795196533, "step": 3165 }, { "epoch": 0.9258663547302237, "grad_norm": 1.5012000035545232, "learning_rate": 1.2045055169063988e-05, "loss": 0.6708394289016724, "step": 3166 }, { "epoch": 0.9261587951454892, "grad_norm": 1.5269915566780659, "learning_rate": 1.2040322402714624e-05, "loss": 0.536340057849884, "step": 3167 }, { "epoch": 0.9264512355607545, "grad_norm": 1.4556897812811458, "learning_rate": 1.20355891594523e-05, "loss": 0.5621340274810791, "step": 3168 }, { "epoch": 0.9267436759760199, "grad_norm": 1.274628172323648, "learning_rate": 1.2030855440383387e-05, "loss": 0.5972496271133423, "step": 3169 }, { "epoch": 0.9270361163912852, "grad_norm": 1.4230845419048714, "learning_rate": 1.2026121246614362e-05, "loss": 0.567542314529419, "step": 3170 }, { "epoch": 0.9273285568065507, "grad_norm": 1.092340586033623, "learning_rate": 1.2021386579251814e-05, "loss": 0.5487483739852905, "step": 3171 }, { "epoch": 0.927620997221816, "grad_norm": 1.6219751059797927, "learning_rate": 1.2016651439402445e-05, "loss": 0.7988057136535645, "step": 3172 }, { "epoch": 0.9279134376370815, "grad_norm": 1.2231171520157942, "learning_rate": 1.2011915828173066e-05, "loss": 0.5333850979804993, "step": 3173 }, { "epoch": 0.9282058780523469, "grad_norm": 1.1146388373256622, "learning_rate": 1.2007179746670592e-05, "loss": 0.5640296936035156, "step": 3174 }, { "epoch": 0.9284983184676122, "grad_norm": 1.7918188640848236, "learning_rate": 1.2002443196002057e-05, "loss": 0.7154449820518494, "step": 3175 }, { "epoch": 0.9287907588828777, "grad_norm": 1.533684329230312, "learning_rate": 1.1997706177274597e-05, "loss": 0.8660446405410767, "step": 3176 }, { "epoch": 0.929083199298143, "grad_norm": 1.498753630747748, "learning_rate": 1.1992968691595465e-05, "loss": 0.601166307926178, "step": 3177 }, { "epoch": 0.9293756397134084, "grad_norm": 1.4563708289231845, "learning_rate": 1.1988230740072022e-05, "loss": 0.6197638511657715, "step": 3178 }, { "epoch": 0.9296680801286737, "grad_norm": 1.2218794629813654, "learning_rate": 1.198349232381173e-05, "loss": 0.5716423988342285, "step": 3179 }, { "epoch": 0.9299605205439392, "grad_norm": 1.1601969521725652, "learning_rate": 1.197875344392217e-05, "loss": 0.4319373071193695, "step": 3180 }, { "epoch": 0.9302529609592045, "grad_norm": 1.3226372570662766, "learning_rate": 1.1974014101511018e-05, "loss": 0.5299028158187866, "step": 3181 }, { "epoch": 0.93054540137447, "grad_norm": 1.4024951088839022, "learning_rate": 1.1969274297686075e-05, "loss": 0.7085509300231934, "step": 3182 }, { "epoch": 0.9308378417897354, "grad_norm": 1.3237854936063287, "learning_rate": 1.1964534033555237e-05, "loss": 0.6025770902633667, "step": 3183 }, { "epoch": 0.9311302822050007, "grad_norm": 1.2585066067859425, "learning_rate": 1.1959793310226518e-05, "loss": 0.5624677538871765, "step": 3184 }, { "epoch": 0.9314227226202662, "grad_norm": 1.3607236544497474, "learning_rate": 1.1955052128808025e-05, "loss": 0.602645754814148, "step": 3185 }, { "epoch": 0.9317151630355315, "grad_norm": 1.5196424442530971, "learning_rate": 1.1950310490407984e-05, "loss": 0.6495026350021362, "step": 3186 }, { "epoch": 0.9320076034507969, "grad_norm": 1.2037819566859902, "learning_rate": 1.1945568396134721e-05, "loss": 0.50370192527771, "step": 3187 }, { "epoch": 0.9323000438660622, "grad_norm": 1.4578860564520788, "learning_rate": 1.1940825847096677e-05, "loss": 0.5717373490333557, "step": 3188 }, { "epoch": 0.9325924842813277, "grad_norm": 1.2463647398252022, "learning_rate": 1.1936082844402395e-05, "loss": 0.5863519310951233, "step": 3189 }, { "epoch": 0.9328849246965931, "grad_norm": 1.3634372027202455, "learning_rate": 1.1931339389160516e-05, "loss": 0.6607284545898438, "step": 3190 }, { "epoch": 0.9331773651118584, "grad_norm": 1.2667041686104175, "learning_rate": 1.1926595482479799e-05, "loss": 0.5578058958053589, "step": 3191 }, { "epoch": 0.9334698055271239, "grad_norm": 1.577459199872034, "learning_rate": 1.19218511254691e-05, "loss": 0.6839171648025513, "step": 3192 }, { "epoch": 0.9337622459423892, "grad_norm": 1.4197717809462, "learning_rate": 1.1917106319237386e-05, "loss": 0.5071141719818115, "step": 3193 }, { "epoch": 0.9340546863576547, "grad_norm": 1.3302825340941604, "learning_rate": 1.1912361064893726e-05, "loss": 0.5112525820732117, "step": 3194 }, { "epoch": 0.93434712677292, "grad_norm": 1.3701575961238917, "learning_rate": 1.1907615363547299e-05, "loss": 0.5661873817443848, "step": 3195 }, { "epoch": 0.9346395671881854, "grad_norm": 1.3078991902724904, "learning_rate": 1.190286921630737e-05, "loss": 0.5520195364952087, "step": 3196 }, { "epoch": 0.9349320076034507, "grad_norm": 1.1923433518822224, "learning_rate": 1.1898122624283337e-05, "loss": 0.560089111328125, "step": 3197 }, { "epoch": 0.9352244480187162, "grad_norm": 1.3393482355065873, "learning_rate": 1.1893375588584681e-05, "loss": 0.6431207656860352, "step": 3198 }, { "epoch": 0.9355168884339816, "grad_norm": 1.6025933525200546, "learning_rate": 1.1888628110320995e-05, "loss": 0.7365666031837463, "step": 3199 }, { "epoch": 0.935809328849247, "grad_norm": 1.5181397488734587, "learning_rate": 1.1883880190601968e-05, "loss": 0.5455417633056641, "step": 3200 }, { "epoch": 0.9361017692645124, "grad_norm": 1.2648151177686433, "learning_rate": 1.1879131830537403e-05, "loss": 0.5749938488006592, "step": 3201 }, { "epoch": 0.9363942096797777, "grad_norm": 1.4774526931967815, "learning_rate": 1.1874383031237196e-05, "loss": 0.588424563407898, "step": 3202 }, { "epoch": 0.9366866500950431, "grad_norm": 1.7045519601542285, "learning_rate": 1.1869633793811352e-05, "loss": 0.7039792537689209, "step": 3203 }, { "epoch": 0.9369790905103085, "grad_norm": 1.3777530310932211, "learning_rate": 1.1864884119369977e-05, "loss": 0.5972777009010315, "step": 3204 }, { "epoch": 0.9372715309255739, "grad_norm": 1.5348242749242778, "learning_rate": 1.1860134009023281e-05, "loss": 0.6510647535324097, "step": 3205 }, { "epoch": 0.9375639713408394, "grad_norm": 1.3174058455781212, "learning_rate": 1.1855383463881566e-05, "loss": 0.606874406337738, "step": 3206 }, { "epoch": 0.9378564117561047, "grad_norm": 1.4675285988638056, "learning_rate": 1.1850632485055247e-05, "loss": 0.5527048110961914, "step": 3207 }, { "epoch": 0.9381488521713701, "grad_norm": 1.3531723389548285, "learning_rate": 1.1845881073654838e-05, "loss": 0.6297399997711182, "step": 3208 }, { "epoch": 0.9384412925866354, "grad_norm": 1.4561464002236073, "learning_rate": 1.184112923079095e-05, "loss": 0.5852634310722351, "step": 3209 }, { "epoch": 0.9387337330019009, "grad_norm": 1.276124242645333, "learning_rate": 1.1836376957574301e-05, "loss": 0.5648211240768433, "step": 3210 }, { "epoch": 0.9390261734171662, "grad_norm": 1.4542765956455581, "learning_rate": 1.1831624255115703e-05, "loss": 0.5547506213188171, "step": 3211 }, { "epoch": 0.9393186138324316, "grad_norm": 1.3882723904405088, "learning_rate": 1.1826871124526072e-05, "loss": 0.5927829146385193, "step": 3212 }, { "epoch": 0.9396110542476971, "grad_norm": 1.4870159815211654, "learning_rate": 1.182211756691642e-05, "loss": 0.5705278515815735, "step": 3213 }, { "epoch": 0.9399034946629624, "grad_norm": 1.3481561389317809, "learning_rate": 1.1817363583397868e-05, "loss": 0.547038197517395, "step": 3214 }, { "epoch": 0.9401959350782279, "grad_norm": 1.6799026497887648, "learning_rate": 1.1812609175081626e-05, "loss": 0.6136760115623474, "step": 3215 }, { "epoch": 0.9404883754934932, "grad_norm": 1.3697737055687615, "learning_rate": 1.1807854343079015e-05, "loss": 0.5784845352172852, "step": 3216 }, { "epoch": 0.9407808159087586, "grad_norm": 1.306268521565337, "learning_rate": 1.1803099088501439e-05, "loss": 0.6629599332809448, "step": 3217 }, { "epoch": 0.9410732563240239, "grad_norm": 1.3560413521315915, "learning_rate": 1.1798343412460416e-05, "loss": 0.6058052778244019, "step": 3218 }, { "epoch": 0.9413656967392894, "grad_norm": 1.236587656133179, "learning_rate": 1.1793587316067552e-05, "loss": 0.5689725875854492, "step": 3219 }, { "epoch": 0.9416581371545547, "grad_norm": 1.2722209400014248, "learning_rate": 1.1788830800434561e-05, "loss": 0.5718861818313599, "step": 3220 }, { "epoch": 0.9419505775698201, "grad_norm": 1.4517063699959183, "learning_rate": 1.1784073866673245e-05, "loss": 0.6061254739761353, "step": 3221 }, { "epoch": 0.9422430179850856, "grad_norm": 1.3732176542504997, "learning_rate": 1.1779316515895511e-05, "loss": 0.6805517077445984, "step": 3222 }, { "epoch": 0.9425354584003509, "grad_norm": 1.3828844754339646, "learning_rate": 1.1774558749213358e-05, "loss": 0.5553466081619263, "step": 3223 }, { "epoch": 0.9428278988156163, "grad_norm": 1.2173236944216692, "learning_rate": 1.176980056773889e-05, "loss": 0.6408798694610596, "step": 3224 }, { "epoch": 0.9431203392308817, "grad_norm": 1.222815565053331, "learning_rate": 1.1765041972584296e-05, "loss": 0.5269505381584167, "step": 3225 }, { "epoch": 0.9434127796461471, "grad_norm": 1.424391391794669, "learning_rate": 1.1760282964861873e-05, "loss": 0.682415246963501, "step": 3226 }, { "epoch": 0.9437052200614124, "grad_norm": 1.4623421356805024, "learning_rate": 1.1755523545684016e-05, "loss": 0.507567286491394, "step": 3227 }, { "epoch": 0.9439976604766779, "grad_norm": 1.4192334343942388, "learning_rate": 1.1750763716163199e-05, "loss": 0.6977763175964355, "step": 3228 }, { "epoch": 0.9442901008919433, "grad_norm": 1.3754010773945908, "learning_rate": 1.1746003477412007e-05, "loss": 0.5626407861709595, "step": 3229 }, { "epoch": 0.9445825413072086, "grad_norm": 1.537446067568307, "learning_rate": 1.1741242830543118e-05, "loss": 0.5280323624610901, "step": 3230 }, { "epoch": 0.9448749817224741, "grad_norm": 1.564549447099706, "learning_rate": 1.1736481776669307e-05, "loss": 0.6236885190010071, "step": 3231 }, { "epoch": 0.9451674221377394, "grad_norm": 1.2957140073878561, "learning_rate": 1.1731720316903435e-05, "loss": 0.5250823497772217, "step": 3232 }, { "epoch": 0.9454598625530048, "grad_norm": 1.3562245135276858, "learning_rate": 1.1726958452358472e-05, "loss": 0.5885770320892334, "step": 3233 }, { "epoch": 0.9457523029682702, "grad_norm": 1.5466392002562799, "learning_rate": 1.1722196184147467e-05, "loss": 0.7812498807907104, "step": 3234 }, { "epoch": 0.9460447433835356, "grad_norm": 2.1182720670568678, "learning_rate": 1.1717433513383575e-05, "loss": 0.6763796210289001, "step": 3235 }, { "epoch": 0.9463371837988009, "grad_norm": 1.4130641179603503, "learning_rate": 1.1712670441180045e-05, "loss": 0.5983982682228088, "step": 3236 }, { "epoch": 0.9466296242140664, "grad_norm": 1.4075974845813908, "learning_rate": 1.1707906968650214e-05, "loss": 0.6665002107620239, "step": 3237 }, { "epoch": 0.9469220646293318, "grad_norm": 1.3129047594602676, "learning_rate": 1.1703143096907507e-05, "loss": 0.7676652669906616, "step": 3238 }, { "epoch": 0.9472145050445971, "grad_norm": 1.552106023331421, "learning_rate": 1.1698378827065461e-05, "loss": 0.710014820098877, "step": 3239 }, { "epoch": 0.9475069454598626, "grad_norm": 1.3709978679968329, "learning_rate": 1.169361416023769e-05, "loss": 0.5800554752349854, "step": 3240 }, { "epoch": 0.9477993858751279, "grad_norm": 1.2790925568283578, "learning_rate": 1.1688849097537904e-05, "loss": 0.602012574672699, "step": 3241 }, { "epoch": 0.9480918262903933, "grad_norm": 1.4089569844293444, "learning_rate": 1.1684083640079912e-05, "loss": 0.4943910241127014, "step": 3242 }, { "epoch": 0.9483842667056587, "grad_norm": 1.3173293444454082, "learning_rate": 1.1679317788977609e-05, "loss": 0.49094298481941223, "step": 3243 }, { "epoch": 0.9486767071209241, "grad_norm": 1.1684708220820899, "learning_rate": 1.1674551545344983e-05, "loss": 0.46416157484054565, "step": 3244 }, { "epoch": 0.9489691475361896, "grad_norm": 1.3422229221849986, "learning_rate": 1.1669784910296114e-05, "loss": 0.5170255899429321, "step": 3245 }, { "epoch": 0.9492615879514549, "grad_norm": 1.3467691134757651, "learning_rate": 1.1665017884945174e-05, "loss": 0.7673200368881226, "step": 3246 }, { "epoch": 0.9495540283667203, "grad_norm": 1.194998950326605, "learning_rate": 1.1660250470406426e-05, "loss": 0.49335333704948425, "step": 3247 }, { "epoch": 0.9498464687819856, "grad_norm": 1.5055569823397887, "learning_rate": 1.1655482667794228e-05, "loss": 0.6620640754699707, "step": 3248 }, { "epoch": 0.9501389091972511, "grad_norm": 1.5536985980342881, "learning_rate": 1.1650714478223022e-05, "loss": 0.600047767162323, "step": 3249 }, { "epoch": 0.9504313496125164, "grad_norm": 1.449375702915225, "learning_rate": 1.164594590280734e-05, "loss": 0.668572187423706, "step": 3250 }, { "epoch": 0.9507237900277818, "grad_norm": 1.28696773590094, "learning_rate": 1.1641176942661812e-05, "loss": 0.4460945725440979, "step": 3251 }, { "epoch": 0.9510162304430473, "grad_norm": 1.553130185640807, "learning_rate": 1.1636407598901154e-05, "loss": 0.6650545597076416, "step": 3252 }, { "epoch": 0.9513086708583126, "grad_norm": 1.4537452557116313, "learning_rate": 1.1631637872640166e-05, "loss": 0.5631237030029297, "step": 3253 }, { "epoch": 0.951601111273578, "grad_norm": 1.2642307643713007, "learning_rate": 1.162686776499375e-05, "loss": 0.650580883026123, "step": 3254 }, { "epoch": 0.9518935516888434, "grad_norm": 1.2808622379645098, "learning_rate": 1.1622097277076883e-05, "loss": 0.5606606602668762, "step": 3255 }, { "epoch": 0.9521859921041088, "grad_norm": 1.6059525544711786, "learning_rate": 1.1617326410004639e-05, "loss": 0.667366623878479, "step": 3256 }, { "epoch": 0.9524784325193741, "grad_norm": 1.2848877829061671, "learning_rate": 1.1612555164892181e-05, "loss": 0.5895084738731384, "step": 3257 }, { "epoch": 0.9527708729346396, "grad_norm": 1.3031742059601414, "learning_rate": 1.1607783542854759e-05, "loss": 0.6468119025230408, "step": 3258 }, { "epoch": 0.9530633133499049, "grad_norm": 1.567653748749065, "learning_rate": 1.1603011545007708e-05, "loss": 0.7178056240081787, "step": 3259 }, { "epoch": 0.9533557537651703, "grad_norm": 1.0796246328531958, "learning_rate": 1.1598239172466457e-05, "loss": 0.42994585633277893, "step": 3260 }, { "epoch": 0.9536481941804358, "grad_norm": 1.3208710287997751, "learning_rate": 1.1593466426346513e-05, "loss": 0.4939822554588318, "step": 3261 }, { "epoch": 0.9539406345957011, "grad_norm": 1.4828958620285886, "learning_rate": 1.1588693307763483e-05, "loss": 0.4252137839794159, "step": 3262 }, { "epoch": 0.9542330750109665, "grad_norm": 1.4293991408504185, "learning_rate": 1.1583919817833051e-05, "loss": 0.5772995948791504, "step": 3263 }, { "epoch": 0.9545255154262319, "grad_norm": 1.4892265763022432, "learning_rate": 1.1579145957670992e-05, "loss": 0.6784560680389404, "step": 3264 }, { "epoch": 0.9548179558414973, "grad_norm": 1.4340903064465058, "learning_rate": 1.1574371728393169e-05, "loss": 0.5373483896255493, "step": 3265 }, { "epoch": 0.9551103962567626, "grad_norm": 1.5590731671081544, "learning_rate": 1.1569597131115523e-05, "loss": 0.7517837285995483, "step": 3266 }, { "epoch": 0.9554028366720281, "grad_norm": 1.2323534514024168, "learning_rate": 1.1564822166954092e-05, "loss": 0.6715551614761353, "step": 3267 }, { "epoch": 0.9556952770872935, "grad_norm": 1.5740418428519831, "learning_rate": 1.1560046837024994e-05, "loss": 0.6892265677452087, "step": 3268 }, { "epoch": 0.9559877175025588, "grad_norm": 1.1845546480418727, "learning_rate": 1.1555271142444433e-05, "loss": 0.5564894676208496, "step": 3269 }, { "epoch": 0.9562801579178243, "grad_norm": 1.4735106062071393, "learning_rate": 1.15504950843287e-05, "loss": 0.6211465001106262, "step": 3270 }, { "epoch": 0.9565725983330896, "grad_norm": 1.360797371118281, "learning_rate": 1.1545718663794165e-05, "loss": 0.6189093589782715, "step": 3271 }, { "epoch": 0.956865038748355, "grad_norm": 1.332461163898103, "learning_rate": 1.1540941881957293e-05, "loss": 0.6600508689880371, "step": 3272 }, { "epoch": 0.9571574791636204, "grad_norm": 1.1722369932825303, "learning_rate": 1.1536164739934626e-05, "loss": 0.5891202688217163, "step": 3273 }, { "epoch": 0.9574499195788858, "grad_norm": 1.450456789269031, "learning_rate": 1.1531387238842788e-05, "loss": 0.5996856093406677, "step": 3274 }, { "epoch": 0.9577423599941511, "grad_norm": 1.3947581203143906, "learning_rate": 1.15266093797985e-05, "loss": 0.5645085573196411, "step": 3275 }, { "epoch": 0.9580348004094166, "grad_norm": 1.3192013477387883, "learning_rate": 1.1521831163918545e-05, "loss": 0.5934250354766846, "step": 3276 }, { "epoch": 0.958327240824682, "grad_norm": 1.3125475487560205, "learning_rate": 1.151705259231981e-05, "loss": 0.6659657955169678, "step": 3277 }, { "epoch": 0.9586196812399473, "grad_norm": 1.4439329469838202, "learning_rate": 1.1512273666119255e-05, "loss": 0.518921434879303, "step": 3278 }, { "epoch": 0.9589121216552128, "grad_norm": 1.5520324796179028, "learning_rate": 1.1507494386433927e-05, "loss": 0.6015551686286926, "step": 3279 }, { "epoch": 0.9592045620704781, "grad_norm": 1.3864839845404684, "learning_rate": 1.150271475438095e-05, "loss": 0.5590265393257141, "step": 3280 }, { "epoch": 0.9594970024857435, "grad_norm": 2.135782810317134, "learning_rate": 1.149793477107754e-05, "loss": 0.5820340514183044, "step": 3281 }, { "epoch": 0.9597894429010089, "grad_norm": 1.5263684685914536, "learning_rate": 1.1493154437640981e-05, "loss": 0.5356709957122803, "step": 3282 }, { "epoch": 0.9600818833162743, "grad_norm": 1.6754028625571513, "learning_rate": 1.1488373755188651e-05, "loss": 0.7024146318435669, "step": 3283 }, { "epoch": 0.9603743237315397, "grad_norm": 1.1672092433368113, "learning_rate": 1.1483592724838007e-05, "loss": 0.4929785132408142, "step": 3284 }, { "epoch": 0.9606667641468051, "grad_norm": 1.288237919875972, "learning_rate": 1.147881134770658e-05, "loss": 0.6902902126312256, "step": 3285 }, { "epoch": 0.9609592045620705, "grad_norm": 1.3348356135288268, "learning_rate": 1.1474029624911997e-05, "loss": 0.5339258313179016, "step": 3286 }, { "epoch": 0.9612516449773358, "grad_norm": 1.4657145875756896, "learning_rate": 1.146924755757195e-05, "loss": 0.6998730897903442, "step": 3287 }, { "epoch": 0.9615440853926013, "grad_norm": 1.257948537764273, "learning_rate": 1.1464465146804218e-05, "loss": 0.6174519062042236, "step": 3288 }, { "epoch": 0.9618365258078666, "grad_norm": 1.812192547108516, "learning_rate": 1.145968239372666e-05, "loss": 0.5395258665084839, "step": 3289 }, { "epoch": 0.962128966223132, "grad_norm": 1.4759469600623887, "learning_rate": 1.1454899299457221e-05, "loss": 0.6355341672897339, "step": 3290 }, { "epoch": 0.9624214066383975, "grad_norm": 1.519697305957534, "learning_rate": 1.1450115865113916e-05, "loss": 0.5315179228782654, "step": 3291 }, { "epoch": 0.9627138470536628, "grad_norm": 1.468105168017502, "learning_rate": 1.1445332091814844e-05, "loss": 0.5595142841339111, "step": 3292 }, { "epoch": 0.9630062874689282, "grad_norm": 1.2033736096293444, "learning_rate": 1.1440547980678185e-05, "loss": 0.5509291291236877, "step": 3293 }, { "epoch": 0.9632987278841936, "grad_norm": 1.5381505996084959, "learning_rate": 1.1435763532822191e-05, "loss": 0.6831322908401489, "step": 3294 }, { "epoch": 0.963591168299459, "grad_norm": 1.3733453232745707, "learning_rate": 1.1430978749365203e-05, "loss": 0.5494598150253296, "step": 3295 }, { "epoch": 0.9638836087147243, "grad_norm": 1.498661160088125, "learning_rate": 1.142619363142563e-05, "loss": 0.5613550543785095, "step": 3296 }, { "epoch": 0.9641760491299898, "grad_norm": 1.5212850266198317, "learning_rate": 1.1421408180121972e-05, "loss": 0.656089186668396, "step": 3297 }, { "epoch": 0.9644684895452551, "grad_norm": 1.1510410875603876, "learning_rate": 1.1416622396572791e-05, "loss": 0.5913431644439697, "step": 3298 }, { "epoch": 0.9647609299605205, "grad_norm": 1.3644056514467953, "learning_rate": 1.1411836281896737e-05, "loss": 0.6706565022468567, "step": 3299 }, { "epoch": 0.965053370375786, "grad_norm": 1.3661421058655916, "learning_rate": 1.1407049837212539e-05, "loss": 0.6169217824935913, "step": 3300 }, { "epoch": 0.9653458107910513, "grad_norm": 1.2988460072876178, "learning_rate": 1.1402263063638994e-05, "loss": 0.5516680479049683, "step": 3301 }, { "epoch": 0.9656382512063167, "grad_norm": 1.2914486970247845, "learning_rate": 1.1397475962294986e-05, "loss": 0.7105098962783813, "step": 3302 }, { "epoch": 0.9659306916215821, "grad_norm": 1.5297340917133426, "learning_rate": 1.139268853429947e-05, "loss": 0.6183327436447144, "step": 3303 }, { "epoch": 0.9662231320368475, "grad_norm": 1.4183780196378124, "learning_rate": 1.1387900780771472e-05, "loss": 0.6160033941268921, "step": 3304 }, { "epoch": 0.9665155724521128, "grad_norm": 1.4212044707464202, "learning_rate": 1.1383112702830108e-05, "loss": 0.5526994466781616, "step": 3305 }, { "epoch": 0.9668080128673783, "grad_norm": 1.381901469460175, "learning_rate": 1.137832430159456e-05, "loss": 0.5476477742195129, "step": 3306 }, { "epoch": 0.9671004532826437, "grad_norm": 1.3794404018811846, "learning_rate": 1.1373535578184083e-05, "loss": 0.558393657207489, "step": 3307 }, { "epoch": 0.967392893697909, "grad_norm": 1.4577860579810487, "learning_rate": 1.1368746533718017e-05, "loss": 0.6302276849746704, "step": 3308 }, { "epoch": 0.9676853341131745, "grad_norm": 1.2805956031485568, "learning_rate": 1.1363957169315773e-05, "loss": 0.619697630405426, "step": 3309 }, { "epoch": 0.9679777745284398, "grad_norm": 1.4119075289775231, "learning_rate": 1.135916748609683e-05, "loss": 0.564563512802124, "step": 3310 }, { "epoch": 0.9682702149437052, "grad_norm": 1.6014783450991135, "learning_rate": 1.1354377485180756e-05, "loss": 0.6238751411437988, "step": 3311 }, { "epoch": 0.9685626553589706, "grad_norm": 1.4620948350058627, "learning_rate": 1.1349587167687177e-05, "loss": 0.8079221844673157, "step": 3312 }, { "epoch": 0.968855095774236, "grad_norm": 1.4034979651528738, "learning_rate": 1.1344796534735805e-05, "loss": 0.5547629594802856, "step": 3313 }, { "epoch": 0.9691475361895013, "grad_norm": 1.2187187942390127, "learning_rate": 1.134000558744642e-05, "loss": 0.630042552947998, "step": 3314 }, { "epoch": 0.9694399766047668, "grad_norm": 1.284912675244452, "learning_rate": 1.1335214326938872e-05, "loss": 0.5283412337303162, "step": 3315 }, { "epoch": 0.9697324170200322, "grad_norm": 1.3484514955842084, "learning_rate": 1.1330422754333097e-05, "loss": 0.6356452703475952, "step": 3316 }, { "epoch": 0.9700248574352975, "grad_norm": 1.265116321608699, "learning_rate": 1.132563087074909e-05, "loss": 0.6531886458396912, "step": 3317 }, { "epoch": 0.970317297850563, "grad_norm": 1.6209665553722108, "learning_rate": 1.1320838677306927e-05, "loss": 0.5725178718566895, "step": 3318 }, { "epoch": 0.9706097382658283, "grad_norm": 1.460783947968998, "learning_rate": 1.1316046175126758e-05, "loss": 0.6341495513916016, "step": 3319 }, { "epoch": 0.9709021786810937, "grad_norm": 1.428850290510927, "learning_rate": 1.1311253365328794e-05, "loss": 0.5792768597602844, "step": 3320 }, { "epoch": 0.9711946190963591, "grad_norm": 1.2539734431492524, "learning_rate": 1.1306460249033326e-05, "loss": 0.5495700836181641, "step": 3321 }, { "epoch": 0.9714870595116245, "grad_norm": 1.3779597112573112, "learning_rate": 1.1301666827360721e-05, "loss": 0.7092291116714478, "step": 3322 }, { "epoch": 0.97177949992689, "grad_norm": 1.210154083257435, "learning_rate": 1.1296873101431409e-05, "loss": 0.5368257761001587, "step": 3323 }, { "epoch": 0.9720719403421553, "grad_norm": 1.2901315838159502, "learning_rate": 1.1292079072365898e-05, "loss": 0.6116393804550171, "step": 3324 }, { "epoch": 0.9723643807574207, "grad_norm": 1.6375876584807947, "learning_rate": 1.1287284741284757e-05, "loss": 0.5654028654098511, "step": 3325 }, { "epoch": 0.972656821172686, "grad_norm": 1.4007947938241085, "learning_rate": 1.1282490109308633e-05, "loss": 0.6436389684677124, "step": 3326 }, { "epoch": 0.9729492615879515, "grad_norm": 1.6286174854172328, "learning_rate": 1.1277695177558243e-05, "loss": 0.7687330842018127, "step": 3327 }, { "epoch": 0.9732417020032168, "grad_norm": 1.3338540478099405, "learning_rate": 1.1272899947154377e-05, "loss": 0.5350443124771118, "step": 3328 }, { "epoch": 0.9735341424184822, "grad_norm": 1.5528633763871835, "learning_rate": 1.1268104419217884e-05, "loss": 0.6032785773277283, "step": 3329 }, { "epoch": 0.9738265828337477, "grad_norm": 1.410347655987774, "learning_rate": 1.1263308594869697e-05, "loss": 0.5756093263626099, "step": 3330 }, { "epoch": 0.974119023249013, "grad_norm": 1.5831169693775362, "learning_rate": 1.1258512475230807e-05, "loss": 0.6977418065071106, "step": 3331 }, { "epoch": 0.9744114636642784, "grad_norm": 1.3726893652594243, "learning_rate": 1.1253716061422275e-05, "loss": 0.5409448146820068, "step": 3332 }, { "epoch": 0.9747039040795438, "grad_norm": 1.3626349639764654, "learning_rate": 1.1248919354565237e-05, "loss": 0.5863862037658691, "step": 3333 }, { "epoch": 0.9749963444948092, "grad_norm": 1.313934697737098, "learning_rate": 1.1244122355780895e-05, "loss": 0.6039433479309082, "step": 3334 }, { "epoch": 0.9752887849100745, "grad_norm": 1.4813691831553626, "learning_rate": 1.1239325066190513e-05, "loss": 0.6696581840515137, "step": 3335 }, { "epoch": 0.97558122532534, "grad_norm": 1.5159715106591773, "learning_rate": 1.1234527486915439e-05, "loss": 0.6308715343475342, "step": 3336 }, { "epoch": 0.9758736657406053, "grad_norm": 1.4927391317525602, "learning_rate": 1.1229729619077065e-05, "loss": 0.580268383026123, "step": 3337 }, { "epoch": 0.9761661061558707, "grad_norm": 1.775582999909584, "learning_rate": 1.1224931463796871e-05, "loss": 0.8080834746360779, "step": 3338 }, { "epoch": 0.9764585465711362, "grad_norm": 1.3814988427954438, "learning_rate": 1.1220133022196395e-05, "loss": 0.4933619499206543, "step": 3339 }, { "epoch": 0.9767509869864015, "grad_norm": 1.26412210808527, "learning_rate": 1.1215334295397244e-05, "loss": 0.5639102458953857, "step": 3340 }, { "epoch": 0.9770434274016669, "grad_norm": 1.3947001629341338, "learning_rate": 1.1210535284521094e-05, "loss": 0.6332741975784302, "step": 3341 }, { "epoch": 0.9773358678169323, "grad_norm": 1.4234927806293247, "learning_rate": 1.1205735990689677e-05, "loss": 0.5425227880477905, "step": 3342 }, { "epoch": 0.9776283082321977, "grad_norm": 1.2841671137073696, "learning_rate": 1.1200936415024804e-05, "loss": 0.48746997117996216, "step": 3343 }, { "epoch": 0.977920748647463, "grad_norm": 1.3045240526527524, "learning_rate": 1.1196136558648345e-05, "loss": 0.5509577393531799, "step": 3344 }, { "epoch": 0.9782131890627285, "grad_norm": 1.5306708658005588, "learning_rate": 1.1191336422682237e-05, "loss": 0.5939484238624573, "step": 3345 }, { "epoch": 0.9785056294779939, "grad_norm": 1.4772741629174198, "learning_rate": 1.1186536008248487e-05, "loss": 0.6078917384147644, "step": 3346 }, { "epoch": 0.9787980698932592, "grad_norm": 1.4449426772113496, "learning_rate": 1.1181735316469157e-05, "loss": 0.5578145980834961, "step": 3347 }, { "epoch": 0.9790905103085247, "grad_norm": 1.5556898331182667, "learning_rate": 1.1176934348466384e-05, "loss": 0.6809493899345398, "step": 3348 }, { "epoch": 0.97938295072379, "grad_norm": 1.3454886518258895, "learning_rate": 1.117213310536236e-05, "loss": 0.6057093143463135, "step": 3349 }, { "epoch": 0.9796753911390554, "grad_norm": 1.2918762120947054, "learning_rate": 1.1167331588279351e-05, "loss": 0.6656113266944885, "step": 3350 }, { "epoch": 0.9799678315543208, "grad_norm": 1.3588186351553628, "learning_rate": 1.1162529798339682e-05, "loss": 0.5260547399520874, "step": 3351 }, { "epoch": 0.9802602719695862, "grad_norm": 1.4059510686804249, "learning_rate": 1.115772773666574e-05, "loss": 0.6918379068374634, "step": 3352 }, { "epoch": 0.9805527123848515, "grad_norm": 1.4859264660633271, "learning_rate": 1.115292540437998e-05, "loss": 0.7128825187683105, "step": 3353 }, { "epoch": 0.980845152800117, "grad_norm": 1.7806281788252345, "learning_rate": 1.1148122802604913e-05, "loss": 0.6858257055282593, "step": 3354 }, { "epoch": 0.9811375932153824, "grad_norm": 1.3250069966815017, "learning_rate": 1.1143319932463124e-05, "loss": 0.540290117263794, "step": 3355 }, { "epoch": 0.9814300336306477, "grad_norm": 1.3692222106755043, "learning_rate": 1.1138516795077251e-05, "loss": 0.7293038368225098, "step": 3356 }, { "epoch": 0.9817224740459132, "grad_norm": 1.2337952733643827, "learning_rate": 1.1133713391570003e-05, "loss": 0.5981270670890808, "step": 3357 }, { "epoch": 0.9820149144611785, "grad_norm": 1.282642205016649, "learning_rate": 1.1128909723064138e-05, "loss": 0.6175673604011536, "step": 3358 }, { "epoch": 0.9823073548764439, "grad_norm": 1.233452486411816, "learning_rate": 1.112410579068249e-05, "loss": 0.5385074615478516, "step": 3359 }, { "epoch": 0.9825997952917093, "grad_norm": 1.372295513124522, "learning_rate": 1.1119301595547952e-05, "loss": 0.5754122734069824, "step": 3360 }, { "epoch": 0.9828922357069747, "grad_norm": 1.4139982265628481, "learning_rate": 1.1114497138783469e-05, "loss": 0.5817348957061768, "step": 3361 }, { "epoch": 0.9831846761222401, "grad_norm": 1.5953096945649214, "learning_rate": 1.1109692421512058e-05, "loss": 0.7561115026473999, "step": 3362 }, { "epoch": 0.9834771165375055, "grad_norm": 1.4339527302516233, "learning_rate": 1.1104887444856786e-05, "loss": 0.5972003936767578, "step": 3363 }, { "epoch": 0.9837695569527709, "grad_norm": 1.7933233288020083, "learning_rate": 1.1100082209940795e-05, "loss": 0.7569154500961304, "step": 3364 }, { "epoch": 0.9840619973680362, "grad_norm": 1.6291951934588174, "learning_rate": 1.1095276717887273e-05, "loss": 0.587831437587738, "step": 3365 }, { "epoch": 0.9843544377833017, "grad_norm": 1.3893746663182953, "learning_rate": 1.109047096981948e-05, "loss": 0.5265868902206421, "step": 3366 }, { "epoch": 0.984646878198567, "grad_norm": 1.5308570155926502, "learning_rate": 1.1085664966860728e-05, "loss": 0.6065980792045593, "step": 3367 }, { "epoch": 0.9849393186138324, "grad_norm": 1.2582827679300745, "learning_rate": 1.1080858710134392e-05, "loss": 0.5859705209732056, "step": 3368 }, { "epoch": 0.9852317590290979, "grad_norm": 1.2323676627113982, "learning_rate": 1.1076052200763903e-05, "loss": 0.508766770362854, "step": 3369 }, { "epoch": 0.9855241994443632, "grad_norm": 1.36193145330846, "learning_rate": 1.1071245439872752e-05, "loss": 0.569848358631134, "step": 3370 }, { "epoch": 0.9858166398596286, "grad_norm": 1.5268801014665052, "learning_rate": 1.1066438428584496e-05, "loss": 0.6665600538253784, "step": 3371 }, { "epoch": 0.986109080274894, "grad_norm": 4.0352208239875536, "learning_rate": 1.1061631168022742e-05, "loss": 0.5942315459251404, "step": 3372 }, { "epoch": 0.9864015206901594, "grad_norm": 1.3552035470831052, "learning_rate": 1.1056823659311158e-05, "loss": 0.5270178318023682, "step": 3373 }, { "epoch": 0.9866939611054247, "grad_norm": 1.484191192307279, "learning_rate": 1.1052015903573465e-05, "loss": 0.6879183053970337, "step": 3374 }, { "epoch": 0.9869864015206902, "grad_norm": 1.3455375539569006, "learning_rate": 1.1047207901933453e-05, "loss": 0.5980993509292603, "step": 3375 }, { "epoch": 0.9872788419359555, "grad_norm": 1.3905728698834559, "learning_rate": 1.1042399655514961e-05, "loss": 0.5616245865821838, "step": 3376 }, { "epoch": 0.9875712823512209, "grad_norm": 1.186489901347366, "learning_rate": 1.1037591165441887e-05, "loss": 0.6233900785446167, "step": 3377 }, { "epoch": 0.9878637227664864, "grad_norm": 1.2146885941659273, "learning_rate": 1.1032782432838188e-05, "loss": 0.612476110458374, "step": 3378 }, { "epoch": 0.9881561631817517, "grad_norm": 1.4001611534955285, "learning_rate": 1.1027973458827874e-05, "loss": 0.7109482288360596, "step": 3379 }, { "epoch": 0.9884486035970171, "grad_norm": 1.4339596644962305, "learning_rate": 1.1023164244535013e-05, "loss": 0.7105005383491516, "step": 3380 }, { "epoch": 0.9887410440122825, "grad_norm": 1.1897152470249062, "learning_rate": 1.1018354791083731e-05, "loss": 0.5401301383972168, "step": 3381 }, { "epoch": 0.9890334844275479, "grad_norm": 1.2391450524860042, "learning_rate": 1.101354509959821e-05, "loss": 0.504487156867981, "step": 3382 }, { "epoch": 0.9893259248428132, "grad_norm": 1.5778073649668172, "learning_rate": 1.1008735171202685e-05, "loss": 0.5634675025939941, "step": 3383 }, { "epoch": 0.9896183652580787, "grad_norm": 1.2596231385186676, "learning_rate": 1.1003925007021444e-05, "loss": 0.4828820824623108, "step": 3384 }, { "epoch": 0.9899108056733441, "grad_norm": 1.5274466661026922, "learning_rate": 1.0999114608178837e-05, "loss": 0.7154384851455688, "step": 3385 }, { "epoch": 0.9902032460886094, "grad_norm": 1.4762279403432657, "learning_rate": 1.0994303975799268e-05, "loss": 0.626085638999939, "step": 3386 }, { "epoch": 0.9904956865038749, "grad_norm": 1.2276097303271793, "learning_rate": 1.0989493111007186e-05, "loss": 0.5179756283760071, "step": 3387 }, { "epoch": 0.9907881269191402, "grad_norm": 1.443725456432181, "learning_rate": 1.0984682014927108e-05, "loss": 0.6992131471633911, "step": 3388 }, { "epoch": 0.9910805673344056, "grad_norm": 1.3252934977411588, "learning_rate": 1.0979870688683598e-05, "loss": 0.5791709423065186, "step": 3389 }, { "epoch": 0.991373007749671, "grad_norm": 1.2293406038140111, "learning_rate": 1.097505913340127e-05, "loss": 0.4703817367553711, "step": 3390 }, { "epoch": 0.9916654481649364, "grad_norm": 1.7130975290215298, "learning_rate": 1.0970247350204797e-05, "loss": 0.6042051911354065, "step": 3391 }, { "epoch": 0.9919578885802017, "grad_norm": 1.5075227997294136, "learning_rate": 1.0965435340218905e-05, "loss": 0.6806557178497314, "step": 3392 }, { "epoch": 0.9922503289954672, "grad_norm": 1.4336313879655775, "learning_rate": 1.0960623104568373e-05, "loss": 0.6372751593589783, "step": 3393 }, { "epoch": 0.9925427694107326, "grad_norm": 1.2403325317456615, "learning_rate": 1.0955810644378031e-05, "loss": 0.48651185631752014, "step": 3394 }, { "epoch": 0.9928352098259979, "grad_norm": 1.5056465468012041, "learning_rate": 1.0950997960772764e-05, "loss": 0.5244222283363342, "step": 3395 }, { "epoch": 0.9931276502412634, "grad_norm": 1.4445958557594307, "learning_rate": 1.0946185054877505e-05, "loss": 0.6194322109222412, "step": 3396 }, { "epoch": 0.9934200906565287, "grad_norm": 1.4199918179889868, "learning_rate": 1.0941371927817241e-05, "loss": 0.690010666847229, "step": 3397 }, { "epoch": 0.9937125310717941, "grad_norm": 1.9110036566867663, "learning_rate": 1.0936558580717013e-05, "loss": 0.7332549095153809, "step": 3398 }, { "epoch": 0.9940049714870595, "grad_norm": 1.428619260140058, "learning_rate": 1.093174501470191e-05, "loss": 0.5264838337898254, "step": 3399 }, { "epoch": 0.9942974119023249, "grad_norm": 1.1922668548863515, "learning_rate": 1.092693123089708e-05, "loss": 0.624382734298706, "step": 3400 }, { "epoch": 0.9945898523175903, "grad_norm": 1.6559518933415514, "learning_rate": 1.0922117230427705e-05, "loss": 0.6340548992156982, "step": 3401 }, { "epoch": 0.9948822927328557, "grad_norm": 1.194444639014181, "learning_rate": 1.0917303014419036e-05, "loss": 0.4452754855155945, "step": 3402 }, { "epoch": 0.9951747331481211, "grad_norm": 1.4241998861848877, "learning_rate": 1.0912488583996364e-05, "loss": 0.6180763244628906, "step": 3403 }, { "epoch": 0.9954671735633864, "grad_norm": 1.7347993099568695, "learning_rate": 1.0907673940285032e-05, "loss": 0.7079293727874756, "step": 3404 }, { "epoch": 0.9957596139786519, "grad_norm": 1.6216897448198107, "learning_rate": 1.090285908441044e-05, "loss": 0.6608254909515381, "step": 3405 }, { "epoch": 0.9960520543939172, "grad_norm": 1.6873856420041173, "learning_rate": 1.0898044017498024e-05, "loss": 0.6450251340866089, "step": 3406 }, { "epoch": 0.9963444948091826, "grad_norm": 1.4055094844579619, "learning_rate": 1.089322874067328e-05, "loss": 0.6267623901367188, "step": 3407 }, { "epoch": 0.9966369352244481, "grad_norm": 1.6519553259967432, "learning_rate": 1.0888413255061747e-05, "loss": 0.6756424903869629, "step": 3408 }, { "epoch": 0.9969293756397134, "grad_norm": 1.4122044676522614, "learning_rate": 1.0883597561789017e-05, "loss": 0.6578212976455688, "step": 3409 }, { "epoch": 0.9972218160549788, "grad_norm": 1.600222297323414, "learning_rate": 1.087878166198073e-05, "loss": 0.8186248540878296, "step": 3410 }, { "epoch": 0.9975142564702442, "grad_norm": 1.4575083835366422, "learning_rate": 1.0873965556762573e-05, "loss": 0.6689319610595703, "step": 3411 }, { "epoch": 0.9978066968855096, "grad_norm": 1.5562694813418687, "learning_rate": 1.0869149247260282e-05, "loss": 0.5471278429031372, "step": 3412 }, { "epoch": 0.9980991373007749, "grad_norm": 1.239131034827953, "learning_rate": 1.0864332734599636e-05, "loss": 0.4673747420310974, "step": 3413 }, { "epoch": 0.9983915777160404, "grad_norm": 1.4054798008983762, "learning_rate": 1.085951601990647e-05, "loss": 0.5777568221092224, "step": 3414 }, { "epoch": 0.9986840181313057, "grad_norm": 1.6708797545900484, "learning_rate": 1.0854699104306661e-05, "loss": 0.6758528351783752, "step": 3415 }, { "epoch": 0.9989764585465711, "grad_norm": 1.169154860422915, "learning_rate": 1.0849881988926132e-05, "loss": 0.5759919881820679, "step": 3416 }, { "epoch": 0.9992688989618366, "grad_norm": 1.3291108456245637, "learning_rate": 1.0845064674890857e-05, "loss": 0.606694221496582, "step": 3417 }, { "epoch": 0.9995613393771019, "grad_norm": 1.475290016916602, "learning_rate": 1.0840247163326851e-05, "loss": 0.627873957157135, "step": 3418 }, { "epoch": 0.9998537797923673, "grad_norm": 1.4144594545282698, "learning_rate": 1.083542945536018e-05, "loss": 0.5560880303382874, "step": 3419 }, { "epoch": 1.0, "grad_norm": 2.3650000488034633, "learning_rate": 1.0830611552116952e-05, "loss": 0.5983354449272156, "step": 3420 }, { "epoch": 1.0002924404152653, "grad_norm": 1.1169918975180415, "learning_rate": 1.0825793454723325e-05, "loss": 0.5012353658676147, "step": 3421 }, { "epoch": 1.0005848808305309, "grad_norm": 1.6136465051179143, "learning_rate": 1.0820975164305498e-05, "loss": 0.4585106372833252, "step": 3422 }, { "epoch": 1.0008773212457962, "grad_norm": 1.2831850675969656, "learning_rate": 1.0816156681989717e-05, "loss": 0.5790318846702576, "step": 3423 }, { "epoch": 1.0011697616610615, "grad_norm": 1.5258008126885618, "learning_rate": 1.0811338008902277e-05, "loss": 0.6016381978988647, "step": 3424 }, { "epoch": 1.0014622020763269, "grad_norm": 1.328199543518758, "learning_rate": 1.0806519146169507e-05, "loss": 0.5756744146347046, "step": 3425 }, { "epoch": 1.0017546424915924, "grad_norm": 1.1865012964818713, "learning_rate": 1.0801700094917792e-05, "loss": 0.4776861369609833, "step": 3426 }, { "epoch": 1.0020470829068577, "grad_norm": 1.8629358545914494, "learning_rate": 1.0796880856273557e-05, "loss": 0.645842969417572, "step": 3427 }, { "epoch": 1.002339523322123, "grad_norm": 1.1125775865964678, "learning_rate": 1.0792061431363266e-05, "loss": 0.5645815134048462, "step": 3428 }, { "epoch": 1.0026319637373886, "grad_norm": 1.4821141209987578, "learning_rate": 1.0787241821313428e-05, "loss": 0.5477975606918335, "step": 3429 }, { "epoch": 1.002924404152654, "grad_norm": 1.0992693186116131, "learning_rate": 1.0782422027250604e-05, "loss": 0.4064188599586487, "step": 3430 }, { "epoch": 1.0032168445679193, "grad_norm": 1.3634803374266724, "learning_rate": 1.0777602050301384e-05, "loss": 0.5360208749771118, "step": 3431 }, { "epoch": 1.0035092849831846, "grad_norm": 1.4203435807547533, "learning_rate": 1.0772781891592419e-05, "loss": 0.6189982891082764, "step": 3432 }, { "epoch": 1.0038017253984501, "grad_norm": 1.4406563602891276, "learning_rate": 1.0767961552250382e-05, "loss": 0.4623541533946991, "step": 3433 }, { "epoch": 1.0040941658137155, "grad_norm": 1.4714321386033957, "learning_rate": 1.0763141033402e-05, "loss": 0.6094095706939697, "step": 3434 }, { "epoch": 1.0043866062289808, "grad_norm": 1.8852494834868845, "learning_rate": 1.0758320336174042e-05, "loss": 0.6997445821762085, "step": 3435 }, { "epoch": 1.0046790466442463, "grad_norm": 1.3591852438815977, "learning_rate": 1.0753499461693316e-05, "loss": 0.5447323322296143, "step": 3436 }, { "epoch": 1.0049714870595117, "grad_norm": 1.526403087538078, "learning_rate": 1.0748678411086672e-05, "loss": 0.5851927995681763, "step": 3437 }, { "epoch": 1.005263927474777, "grad_norm": 1.2443699762001765, "learning_rate": 1.0743857185481006e-05, "loss": 0.5897810459136963, "step": 3438 }, { "epoch": 1.0055563678900423, "grad_norm": 1.277276792826896, "learning_rate": 1.073903578600324e-05, "loss": 0.47671592235565186, "step": 3439 }, { "epoch": 1.0058488083053079, "grad_norm": 1.5091606917661848, "learning_rate": 1.0734214213780355e-05, "loss": 0.5586696863174438, "step": 3440 }, { "epoch": 1.0061412487205732, "grad_norm": 1.7171075095449666, "learning_rate": 1.0729392469939362e-05, "loss": 0.6817598342895508, "step": 3441 }, { "epoch": 1.0064336891358385, "grad_norm": 1.4899951597044825, "learning_rate": 1.0724570555607311e-05, "loss": 0.6503750085830688, "step": 3442 }, { "epoch": 1.0067261295511039, "grad_norm": 1.516461978227071, "learning_rate": 1.07197484719113e-05, "loss": 0.7121564149856567, "step": 3443 }, { "epoch": 1.0070185699663694, "grad_norm": 1.2899445236891802, "learning_rate": 1.071492621997846e-05, "loss": 0.5760178565979004, "step": 3444 }, { "epoch": 1.0073110103816347, "grad_norm": 1.2567067936293974, "learning_rate": 1.0710103800935965e-05, "loss": 0.4555765390396118, "step": 3445 }, { "epoch": 1.0076034507969, "grad_norm": 1.73824720674272, "learning_rate": 1.0705281215911021e-05, "loss": 0.6098523736000061, "step": 3446 }, { "epoch": 1.0078958912121656, "grad_norm": 1.3529009112365886, "learning_rate": 1.070045846603088e-05, "loss": 0.49828749895095825, "step": 3447 }, { "epoch": 1.008188331627431, "grad_norm": 1.6747165622943363, "learning_rate": 1.0695635552422834e-05, "loss": 0.5134999752044678, "step": 3448 }, { "epoch": 1.0084807720426963, "grad_norm": 1.6379844761327287, "learning_rate": 1.0690812476214209e-05, "loss": 0.53546142578125, "step": 3449 }, { "epoch": 1.0087732124579616, "grad_norm": 1.353591975524027, "learning_rate": 1.0685989238532364e-05, "loss": 0.4955276846885681, "step": 3450 }, { "epoch": 1.0090656528732271, "grad_norm": 1.5308502126967132, "learning_rate": 1.0681165840504708e-05, "loss": 0.5693827271461487, "step": 3451 }, { "epoch": 1.0093580932884925, "grad_norm": 1.2544327118971752, "learning_rate": 1.0676342283258676e-05, "loss": 0.5023596286773682, "step": 3452 }, { "epoch": 1.0096505337037578, "grad_norm": 1.4830383604575028, "learning_rate": 1.0671518567921748e-05, "loss": 0.5601100921630859, "step": 3453 }, { "epoch": 1.0099429741190233, "grad_norm": 1.5483896672555095, "learning_rate": 1.0666694695621438e-05, "loss": 0.5744563341140747, "step": 3454 }, { "epoch": 1.0102354145342887, "grad_norm": 1.2243241739970807, "learning_rate": 1.0661870667485298e-05, "loss": 0.531909704208374, "step": 3455 }, { "epoch": 1.010527854949554, "grad_norm": 1.5063779223920848, "learning_rate": 1.0657046484640911e-05, "loss": 0.5737274885177612, "step": 3456 }, { "epoch": 1.0108202953648193, "grad_norm": 1.3852723907754825, "learning_rate": 1.0652222148215905e-05, "loss": 0.5550329089164734, "step": 3457 }, { "epoch": 1.0111127357800849, "grad_norm": 1.6139287553682227, "learning_rate": 1.0647397659337936e-05, "loss": 0.47795504331588745, "step": 3458 }, { "epoch": 1.0114051761953502, "grad_norm": 1.4543285146976004, "learning_rate": 1.0642573019134703e-05, "loss": 0.6817550659179688, "step": 3459 }, { "epoch": 1.0116976166106155, "grad_norm": 1.1722820118460164, "learning_rate": 1.063774822873393e-05, "loss": 0.45271044969558716, "step": 3460 }, { "epoch": 1.011990057025881, "grad_norm": 1.537598582173988, "learning_rate": 1.0632923289263389e-05, "loss": 0.611709475517273, "step": 3461 }, { "epoch": 1.0122824974411464, "grad_norm": 1.4188302760105698, "learning_rate": 1.0628098201850876e-05, "loss": 0.5101709961891174, "step": 3462 }, { "epoch": 1.0125749378564117, "grad_norm": 1.433548611715836, "learning_rate": 1.0623272967624227e-05, "loss": 0.6550514698028564, "step": 3463 }, { "epoch": 1.012867378271677, "grad_norm": 1.2796248072280718, "learning_rate": 1.0618447587711312e-05, "loss": 0.479978084564209, "step": 3464 }, { "epoch": 1.0131598186869426, "grad_norm": 1.5575466316491844, "learning_rate": 1.0613622063240035e-05, "loss": 0.5616719722747803, "step": 3465 }, { "epoch": 1.013452259102208, "grad_norm": 1.5865800035698945, "learning_rate": 1.060879639533833e-05, "loss": 0.5160953998565674, "step": 3466 }, { "epoch": 1.0137446995174733, "grad_norm": 1.5690447549246889, "learning_rate": 1.0603970585134168e-05, "loss": 0.6069898009300232, "step": 3467 }, { "epoch": 1.0140371399327388, "grad_norm": 1.4806335128762829, "learning_rate": 1.0599144633755555e-05, "loss": 0.5800961256027222, "step": 3468 }, { "epoch": 1.0143295803480041, "grad_norm": 1.2794607035027592, "learning_rate": 1.0594318542330528e-05, "loss": 0.5286555290222168, "step": 3469 }, { "epoch": 1.0146220207632695, "grad_norm": 1.3098421389423984, "learning_rate": 1.0589492311987157e-05, "loss": 0.44960829615592957, "step": 3470 }, { "epoch": 1.0149144611785348, "grad_norm": 1.787788159345536, "learning_rate": 1.0584665943853538e-05, "loss": 0.5799434781074524, "step": 3471 }, { "epoch": 1.0152069015938003, "grad_norm": 1.3655057393381103, "learning_rate": 1.057983943905781e-05, "loss": 0.5142421126365662, "step": 3472 }, { "epoch": 1.0154993420090657, "grad_norm": 1.3605211166498987, "learning_rate": 1.0575012798728141e-05, "loss": 0.5184981226921082, "step": 3473 }, { "epoch": 1.015791782424331, "grad_norm": 1.6630390830837942, "learning_rate": 1.0570186023992724e-05, "loss": 0.5747173428535461, "step": 3474 }, { "epoch": 1.0160842228395965, "grad_norm": 1.4307323575447104, "learning_rate": 1.0565359115979792e-05, "loss": 0.5994119644165039, "step": 3475 }, { "epoch": 1.0163766632548619, "grad_norm": 1.4001969418816858, "learning_rate": 1.0560532075817605e-05, "loss": 0.5020599365234375, "step": 3476 }, { "epoch": 1.0166691036701272, "grad_norm": 1.5266027572877992, "learning_rate": 1.0555704904634451e-05, "loss": 0.5023698806762695, "step": 3477 }, { "epoch": 1.0169615440853925, "grad_norm": 1.3247610849347196, "learning_rate": 1.0550877603558656e-05, "loss": 0.3998676538467407, "step": 3478 }, { "epoch": 1.017253984500658, "grad_norm": 1.2513443496343235, "learning_rate": 1.0546050173718569e-05, "loss": 0.5083760619163513, "step": 3479 }, { "epoch": 1.0175464249159234, "grad_norm": 1.3684676716830397, "learning_rate": 1.0541222616242575e-05, "loss": 0.49840620160102844, "step": 3480 }, { "epoch": 1.0178388653311887, "grad_norm": 1.3303553104888959, "learning_rate": 1.0536394932259085e-05, "loss": 0.5302960276603699, "step": 3481 }, { "epoch": 1.018131305746454, "grad_norm": 1.338379797222235, "learning_rate": 1.0531567122896543e-05, "loss": 0.5694236755371094, "step": 3482 }, { "epoch": 1.0184237461617196, "grad_norm": 1.4305833876226657, "learning_rate": 1.0526739189283414e-05, "loss": 0.5155326128005981, "step": 3483 }, { "epoch": 1.018716186576985, "grad_norm": 1.3829306833852764, "learning_rate": 1.0521911132548207e-05, "loss": 0.6254806518554688, "step": 3484 }, { "epoch": 1.0190086269922503, "grad_norm": 1.9177430357611984, "learning_rate": 1.0517082953819442e-05, "loss": 0.5623525977134705, "step": 3485 }, { "epoch": 1.0193010674075158, "grad_norm": 1.67092732120196, "learning_rate": 1.051225465422568e-05, "loss": 0.6289865970611572, "step": 3486 }, { "epoch": 1.0195935078227811, "grad_norm": 1.4045798370952283, "learning_rate": 1.050742623489551e-05, "loss": 0.5935345888137817, "step": 3487 }, { "epoch": 1.0198859482380465, "grad_norm": 1.696103524125264, "learning_rate": 1.0502597696957542e-05, "loss": 0.5223839282989502, "step": 3488 }, { "epoch": 1.0201783886533118, "grad_norm": 1.9382869881093494, "learning_rate": 1.0497769041540418e-05, "loss": 0.6766373515129089, "step": 3489 }, { "epoch": 1.0204708290685773, "grad_norm": 1.7017290392950901, "learning_rate": 1.0492940269772806e-05, "loss": 0.4934672713279724, "step": 3490 }, { "epoch": 1.0207632694838427, "grad_norm": 1.345123127698455, "learning_rate": 1.0488111382783403e-05, "loss": 0.5207735300064087, "step": 3491 }, { "epoch": 1.021055709899108, "grad_norm": 1.6293706929191067, "learning_rate": 1.0483282381700933e-05, "loss": 0.6090695261955261, "step": 3492 }, { "epoch": 1.0213481503143735, "grad_norm": 1.2927953162345942, "learning_rate": 1.0478453267654147e-05, "loss": 0.5777665376663208, "step": 3493 }, { "epoch": 1.0216405907296389, "grad_norm": 1.5951555841510592, "learning_rate": 1.0473624041771814e-05, "loss": 0.7241395711898804, "step": 3494 }, { "epoch": 1.0219330311449042, "grad_norm": 1.4480767991556562, "learning_rate": 1.0468794705182742e-05, "loss": 0.45545506477355957, "step": 3495 }, { "epoch": 1.0222254715601695, "grad_norm": 1.422698945534055, "learning_rate": 1.0463965259015761e-05, "loss": 0.5519885420799255, "step": 3496 }, { "epoch": 1.022517911975435, "grad_norm": 1.509316262763282, "learning_rate": 1.045913570439972e-05, "loss": 0.558646559715271, "step": 3497 }, { "epoch": 1.0228103523907004, "grad_norm": 1.4960690347564465, "learning_rate": 1.0454306042463499e-05, "loss": 0.5259999632835388, "step": 3498 }, { "epoch": 1.0231027928059657, "grad_norm": 1.2679527875669403, "learning_rate": 1.0449476274336004e-05, "loss": 0.4711627960205078, "step": 3499 }, { "epoch": 1.0233952332212313, "grad_norm": 1.5395810801486782, "learning_rate": 1.0444646401146161e-05, "loss": 0.5893874168395996, "step": 3500 }, { "epoch": 1.0236876736364966, "grad_norm": 1.498228532943397, "learning_rate": 1.0439816424022926e-05, "loss": 0.5596123933792114, "step": 3501 }, { "epoch": 1.023980114051762, "grad_norm": 1.3706228388690522, "learning_rate": 1.0434986344095276e-05, "loss": 0.5228658318519592, "step": 3502 }, { "epoch": 1.0242725544670273, "grad_norm": 1.3956010390337459, "learning_rate": 1.0430156162492216e-05, "loss": 0.5520567297935486, "step": 3503 }, { "epoch": 1.0245649948822928, "grad_norm": 1.2988010194163804, "learning_rate": 1.0425325880342762e-05, "loss": 0.531911313533783, "step": 3504 }, { "epoch": 1.0248574352975581, "grad_norm": 1.5296749459710133, "learning_rate": 1.0420495498775974e-05, "loss": 0.58717942237854, "step": 3505 }, { "epoch": 1.0251498757128235, "grad_norm": 1.3937094974123596, "learning_rate": 1.0415665018920919e-05, "loss": 0.4972108006477356, "step": 3506 }, { "epoch": 1.025442316128089, "grad_norm": 1.4653045497635373, "learning_rate": 1.0410834441906692e-05, "loss": 0.567977249622345, "step": 3507 }, { "epoch": 1.0257347565433543, "grad_norm": 1.4984249963013099, "learning_rate": 1.0406003768862416e-05, "loss": 0.568755567073822, "step": 3508 }, { "epoch": 1.0260271969586197, "grad_norm": 1.5140899451878516, "learning_rate": 1.0401173000917224e-05, "loss": 0.5668960809707642, "step": 3509 }, { "epoch": 1.026319637373885, "grad_norm": 1.5737165138245863, "learning_rate": 1.0396342139200282e-05, "loss": 0.5956743955612183, "step": 3510 }, { "epoch": 1.0266120777891505, "grad_norm": 1.3000472899601168, "learning_rate": 1.0391511184840775e-05, "loss": 0.5258834362030029, "step": 3511 }, { "epoch": 1.0269045182044159, "grad_norm": 1.52676259543146, "learning_rate": 1.038668013896791e-05, "loss": 0.7358168363571167, "step": 3512 }, { "epoch": 1.0271969586196812, "grad_norm": 1.6868440270891885, "learning_rate": 1.0381849002710914e-05, "loss": 0.5845209956169128, "step": 3513 }, { "epoch": 1.0274893990349467, "grad_norm": 1.4837942506085555, "learning_rate": 1.0377017777199034e-05, "loss": 0.4475495219230652, "step": 3514 }, { "epoch": 1.027781839450212, "grad_norm": 1.2830033919091985, "learning_rate": 1.0372186463561542e-05, "loss": 0.5555804371833801, "step": 3515 }, { "epoch": 1.0280742798654774, "grad_norm": 1.65016913167245, "learning_rate": 1.0367355062927726e-05, "loss": 0.5927316546440125, "step": 3516 }, { "epoch": 1.0283667202807427, "grad_norm": 1.3376999356667882, "learning_rate": 1.0362523576426897e-05, "loss": 0.47281715273857117, "step": 3517 }, { "epoch": 1.0286591606960083, "grad_norm": 1.4195049172993812, "learning_rate": 1.0357692005188387e-05, "loss": 0.5275483727455139, "step": 3518 }, { "epoch": 1.0289516011112736, "grad_norm": 1.6670234220228792, "learning_rate": 1.0352860350341547e-05, "loss": 0.5740839242935181, "step": 3519 }, { "epoch": 1.029244041526539, "grad_norm": 1.3668449892598942, "learning_rate": 1.0348028613015747e-05, "loss": 0.6030054688453674, "step": 3520 }, { "epoch": 1.0295364819418042, "grad_norm": 1.4423080423666719, "learning_rate": 1.034319679434037e-05, "loss": 0.5415347814559937, "step": 3521 }, { "epoch": 1.0298289223570698, "grad_norm": 1.4756281264212951, "learning_rate": 1.033836489544483e-05, "loss": 0.5850083231925964, "step": 3522 }, { "epoch": 1.0301213627723351, "grad_norm": 1.516707487989418, "learning_rate": 1.0333532917458556e-05, "loss": 0.47614163160324097, "step": 3523 }, { "epoch": 1.0304138031876005, "grad_norm": 1.5357316287676814, "learning_rate": 1.0328700861510987e-05, "loss": 0.5645745992660522, "step": 3524 }, { "epoch": 1.030706243602866, "grad_norm": 1.3186548714848774, "learning_rate": 1.0323868728731591e-05, "loss": 0.5729008913040161, "step": 3525 }, { "epoch": 1.0309986840181313, "grad_norm": 1.373781447264802, "learning_rate": 1.031903652024985e-05, "loss": 0.5177778005599976, "step": 3526 }, { "epoch": 1.0312911244333967, "grad_norm": 1.390457184292636, "learning_rate": 1.0314204237195263e-05, "loss": 0.49413079023361206, "step": 3527 }, { "epoch": 1.031583564848662, "grad_norm": 1.4789369230243037, "learning_rate": 1.0309371880697342e-05, "loss": 0.5074756145477295, "step": 3528 }, { "epoch": 1.0318760052639275, "grad_norm": 1.590543948205407, "learning_rate": 1.0304539451885629e-05, "loss": 0.5601285696029663, "step": 3529 }, { "epoch": 1.0321684456791929, "grad_norm": 1.3273904087281212, "learning_rate": 1.029970695188967e-05, "loss": 0.48358121514320374, "step": 3530 }, { "epoch": 1.0324608860944582, "grad_norm": 1.4772927313727484, "learning_rate": 1.0294874381839033e-05, "loss": 0.4472161829471588, "step": 3531 }, { "epoch": 1.0327533265097237, "grad_norm": 1.4129544794929634, "learning_rate": 1.02900417428633e-05, "loss": 0.6011627912521362, "step": 3532 }, { "epoch": 1.033045766924989, "grad_norm": 1.354725840134447, "learning_rate": 1.0285209036092076e-05, "loss": 0.5212395191192627, "step": 3533 }, { "epoch": 1.0333382073402544, "grad_norm": 1.844431950477259, "learning_rate": 1.0280376262654971e-05, "loss": 0.5433810949325562, "step": 3534 }, { "epoch": 1.0336306477555197, "grad_norm": 1.4124385690995565, "learning_rate": 1.0275543423681622e-05, "loss": 0.5215464234352112, "step": 3535 }, { "epoch": 1.0339230881707853, "grad_norm": 1.3386210311441036, "learning_rate": 1.0270710520301672e-05, "loss": 0.511099100112915, "step": 3536 }, { "epoch": 1.0342155285860506, "grad_norm": 1.3822305233430652, "learning_rate": 1.0265877553644783e-05, "loss": 0.4954407811164856, "step": 3537 }, { "epoch": 1.034507969001316, "grad_norm": 1.5424734752588294, "learning_rate": 1.0261044524840633e-05, "loss": 0.5491081476211548, "step": 3538 }, { "epoch": 1.0348004094165815, "grad_norm": 1.5108040554468096, "learning_rate": 1.0256211435018912e-05, "loss": 0.43202829360961914, "step": 3539 }, { "epoch": 1.0350928498318468, "grad_norm": 1.5814180623509084, "learning_rate": 1.0251378285309326e-05, "loss": 0.4721212089061737, "step": 3540 }, { "epoch": 1.0353852902471121, "grad_norm": 1.6070602892086314, "learning_rate": 1.0246545076841596e-05, "loss": 0.5621099472045898, "step": 3541 }, { "epoch": 1.0356777306623774, "grad_norm": 1.5170284121136077, "learning_rate": 1.0241711810745452e-05, "loss": 0.5572346448898315, "step": 3542 }, { "epoch": 1.035970171077643, "grad_norm": 1.3590672633285579, "learning_rate": 1.023687848815064e-05, "loss": 0.40916550159454346, "step": 3543 }, { "epoch": 1.0362626114929083, "grad_norm": 1.5018716604616227, "learning_rate": 1.0232045110186926e-05, "loss": 0.5370572805404663, "step": 3544 }, { "epoch": 1.0365550519081737, "grad_norm": 1.603253593979403, "learning_rate": 1.0227211677984074e-05, "loss": 0.5381634831428528, "step": 3545 }, { "epoch": 1.0368474923234392, "grad_norm": 1.3795492267662186, "learning_rate": 1.0222378192671878e-05, "loss": 0.4807749092578888, "step": 3546 }, { "epoch": 1.0371399327387045, "grad_norm": 1.4973562396665303, "learning_rate": 1.0217544655380129e-05, "loss": 0.5673447847366333, "step": 3547 }, { "epoch": 1.0374323731539699, "grad_norm": 1.6360254172890698, "learning_rate": 1.0212711067238639e-05, "loss": 0.5259549021720886, "step": 3548 }, { "epoch": 1.0377248135692352, "grad_norm": 1.4439961362376934, "learning_rate": 1.0207877429377232e-05, "loss": 0.48267534375190735, "step": 3549 }, { "epoch": 1.0380172539845007, "grad_norm": 1.438603988067733, "learning_rate": 1.0203043742925738e-05, "loss": 0.44843387603759766, "step": 3550 }, { "epoch": 1.038309694399766, "grad_norm": 1.5765887333733293, "learning_rate": 1.0198210009014005e-05, "loss": 0.8050575256347656, "step": 3551 }, { "epoch": 1.0386021348150314, "grad_norm": 1.3559927051954717, "learning_rate": 1.0193376228771887e-05, "loss": 0.590203046798706, "step": 3552 }, { "epoch": 1.0388945752302967, "grad_norm": 1.4420953878245995, "learning_rate": 1.0188542403329252e-05, "loss": 0.5974458456039429, "step": 3553 }, { "epoch": 1.0391870156455623, "grad_norm": 1.4408311686918343, "learning_rate": 1.0183708533815975e-05, "loss": 0.4628743827342987, "step": 3554 }, { "epoch": 1.0394794560608276, "grad_norm": 1.538902326182442, "learning_rate": 1.0178874621361944e-05, "loss": 0.6738137006759644, "step": 3555 }, { "epoch": 1.039771896476093, "grad_norm": 1.2584091446339778, "learning_rate": 1.0174040667097061e-05, "loss": 0.48062413930892944, "step": 3556 }, { "epoch": 1.0400643368913585, "grad_norm": 1.4180020858721523, "learning_rate": 1.016920667215123e-05, "loss": 0.564401388168335, "step": 3557 }, { "epoch": 1.0403567773066238, "grad_norm": 1.5220611788966263, "learning_rate": 1.0164372637654367e-05, "loss": 0.4035246968269348, "step": 3558 }, { "epoch": 1.0406492177218891, "grad_norm": 1.3759176374876299, "learning_rate": 1.0159538564736399e-05, "loss": 0.4484536051750183, "step": 3559 }, { "epoch": 1.0409416581371547, "grad_norm": 1.5320485493087415, "learning_rate": 1.0154704454527265e-05, "loss": 0.6257200837135315, "step": 3560 }, { "epoch": 1.04123409855242, "grad_norm": 1.7250809702027206, "learning_rate": 1.0149870308156899e-05, "loss": 0.5541477799415588, "step": 3561 }, { "epoch": 1.0415265389676853, "grad_norm": 1.5360272319586679, "learning_rate": 1.0145036126755264e-05, "loss": 0.6248821020126343, "step": 3562 }, { "epoch": 1.0418189793829506, "grad_norm": 1.3930925306710389, "learning_rate": 1.0140201911452318e-05, "loss": 0.574689507484436, "step": 3563 }, { "epoch": 1.0421114197982162, "grad_norm": 1.45907196010364, "learning_rate": 1.0135367663378025e-05, "loss": 0.5873313546180725, "step": 3564 }, { "epoch": 1.0424038602134815, "grad_norm": 1.7911480245961826, "learning_rate": 1.0130533383662361e-05, "loss": 0.6662088632583618, "step": 3565 }, { "epoch": 1.0426963006287469, "grad_norm": 1.688392121046196, "learning_rate": 1.0125699073435316e-05, "loss": 0.6517773866653442, "step": 3566 }, { "epoch": 1.0429887410440122, "grad_norm": 1.8273298961737783, "learning_rate": 1.0120864733826877e-05, "loss": 0.6311444640159607, "step": 3567 }, { "epoch": 1.0432811814592777, "grad_norm": 1.4367651958960501, "learning_rate": 1.0116030365967037e-05, "loss": 0.49060457944869995, "step": 3568 }, { "epoch": 1.043573621874543, "grad_norm": 1.609897253932932, "learning_rate": 1.0111195970985813e-05, "loss": 0.5405893921852112, "step": 3569 }, { "epoch": 1.0438660622898084, "grad_norm": 1.4830806836977097, "learning_rate": 1.01063615500132e-05, "loss": 0.482162743806839, "step": 3570 }, { "epoch": 1.044158502705074, "grad_norm": 1.4107369824500982, "learning_rate": 1.0101527104179224e-05, "loss": 0.4542362093925476, "step": 3571 }, { "epoch": 1.0444509431203393, "grad_norm": 1.5628480243599212, "learning_rate": 1.00966926346139e-05, "loss": 0.6157265305519104, "step": 3572 }, { "epoch": 1.0447433835356046, "grad_norm": 1.6143915430154057, "learning_rate": 1.0091858142447266e-05, "loss": 0.6591875553131104, "step": 3573 }, { "epoch": 1.04503582395087, "grad_norm": 1.410506710976703, "learning_rate": 1.0087023628809347e-05, "loss": 0.5686256885528564, "step": 3574 }, { "epoch": 1.0453282643661355, "grad_norm": 1.2971662039691743, "learning_rate": 1.0082189094830183e-05, "loss": 0.45131799578666687, "step": 3575 }, { "epoch": 1.0456207047814008, "grad_norm": 1.6508365467694242, "learning_rate": 1.0077354541639821e-05, "loss": 0.5787829160690308, "step": 3576 }, { "epoch": 1.0459131451966661, "grad_norm": 1.6915833775625508, "learning_rate": 1.0072519970368303e-05, "loss": 0.5755574107170105, "step": 3577 }, { "epoch": 1.0462055856119317, "grad_norm": 1.4591194150184388, "learning_rate": 1.0067685382145683e-05, "loss": 0.5017693638801575, "step": 3578 }, { "epoch": 1.046498026027197, "grad_norm": 1.508478769597254, "learning_rate": 1.0062850778102017e-05, "loss": 0.5096016526222229, "step": 3579 }, { "epoch": 1.0467904664424623, "grad_norm": 1.443966956114079, "learning_rate": 1.0058016159367365e-05, "loss": 0.4988967180252075, "step": 3580 }, { "epoch": 1.0470829068577276, "grad_norm": 1.5186890104543016, "learning_rate": 1.0053181527071786e-05, "loss": 0.5410172939300537, "step": 3581 }, { "epoch": 1.0473753472729932, "grad_norm": 1.7546625585964495, "learning_rate": 1.004834688234535e-05, "loss": 0.5980710983276367, "step": 3582 }, { "epoch": 1.0476677876882585, "grad_norm": 1.347751797857706, "learning_rate": 1.0043512226318124e-05, "loss": 0.4737449586391449, "step": 3583 }, { "epoch": 1.0479602281035238, "grad_norm": 1.5493397390355739, "learning_rate": 1.003867756012018e-05, "loss": 0.6106469631195068, "step": 3584 }, { "epoch": 1.0482526685187894, "grad_norm": 1.6077524420960543, "learning_rate": 1.0033842884881593e-05, "loss": 0.48002901673316956, "step": 3585 }, { "epoch": 1.0485451089340547, "grad_norm": 1.4065529576638647, "learning_rate": 1.0029008201732433e-05, "loss": 0.5101731419563293, "step": 3586 }, { "epoch": 1.04883754934932, "grad_norm": 1.6961382740739117, "learning_rate": 1.0024173511802786e-05, "loss": 0.6350706219673157, "step": 3587 }, { "epoch": 1.0491299897645854, "grad_norm": 1.4947432010936612, "learning_rate": 1.0019338816222725e-05, "loss": 0.5268979072570801, "step": 3588 }, { "epoch": 1.049422430179851, "grad_norm": 1.4955724361545546, "learning_rate": 1.0014504116122335e-05, "loss": 0.5670457482337952, "step": 3589 }, { "epoch": 1.0497148705951163, "grad_norm": 1.7472274991386971, "learning_rate": 1.0009669412631697e-05, "loss": 0.6200711727142334, "step": 3590 }, { "epoch": 1.0500073110103816, "grad_norm": 1.5117580085419962, "learning_rate": 1.0004834706880891e-05, "loss": 0.44014686346054077, "step": 3591 }, { "epoch": 1.050299751425647, "grad_norm": 1.4806608082423456, "learning_rate": 1e-05, "loss": 0.4690900146961212, "step": 3592 }, { "epoch": 1.0505921918409125, "grad_norm": 1.5061085663062508, "learning_rate": 9.995165293119112e-06, "loss": 0.5791969299316406, "step": 3593 }, { "epoch": 1.0508846322561778, "grad_norm": 1.403652610849375, "learning_rate": 9.990330587368306e-06, "loss": 0.5566244125366211, "step": 3594 }, { "epoch": 1.0511770726714431, "grad_norm": 1.47068511144412, "learning_rate": 9.985495883877668e-06, "loss": 0.5201646685600281, "step": 3595 }, { "epoch": 1.0514695130867087, "grad_norm": 1.3147681531847344, "learning_rate": 9.980661183777277e-06, "loss": 0.44774526357650757, "step": 3596 }, { "epoch": 1.051761953501974, "grad_norm": 1.641682032458417, "learning_rate": 9.975826488197217e-06, "loss": 0.5346901416778564, "step": 3597 }, { "epoch": 1.0520543939172393, "grad_norm": 1.516503297952313, "learning_rate": 9.970991798267568e-06, "loss": 0.4639764428138733, "step": 3598 }, { "epoch": 1.0523468343325049, "grad_norm": 1.5385061459553095, "learning_rate": 9.966157115118412e-06, "loss": 0.5505763292312622, "step": 3599 }, { "epoch": 1.0526392747477702, "grad_norm": 1.5065604638146801, "learning_rate": 9.961322439879821e-06, "loss": 0.5187631845474243, "step": 3600 }, { "epoch": 1.0529317151630355, "grad_norm": 1.5837365707911437, "learning_rate": 9.95648777368188e-06, "loss": 0.5990081429481506, "step": 3601 }, { "epoch": 1.0532241555783008, "grad_norm": 1.5943954940503307, "learning_rate": 9.951653117654653e-06, "loss": 0.5926306843757629, "step": 3602 }, { "epoch": 1.0535165959935664, "grad_norm": 1.5828616151591308, "learning_rate": 9.946818472928215e-06, "loss": 0.5294582843780518, "step": 3603 }, { "epoch": 1.0538090364088317, "grad_norm": 1.4492789926079117, "learning_rate": 9.941983840632637e-06, "loss": 0.5442140102386475, "step": 3604 }, { "epoch": 1.054101476824097, "grad_norm": 1.5960181258924353, "learning_rate": 9.937149221897984e-06, "loss": 0.5888028740882874, "step": 3605 }, { "epoch": 1.0543939172393624, "grad_norm": 1.6823030520405429, "learning_rate": 9.93231461785432e-06, "loss": 0.7545796632766724, "step": 3606 }, { "epoch": 1.054686357654628, "grad_norm": 1.4193397986001617, "learning_rate": 9.9274800296317e-06, "loss": 0.4850383996963501, "step": 3607 }, { "epoch": 1.0549787980698933, "grad_norm": 1.7761903590602732, "learning_rate": 9.922645458360182e-06, "loss": 0.5658243894577026, "step": 3608 }, { "epoch": 1.0552712384851586, "grad_norm": 1.913627443584159, "learning_rate": 9.917810905169818e-06, "loss": 0.6526712775230408, "step": 3609 }, { "epoch": 1.0555636789004241, "grad_norm": 1.7132894383948376, "learning_rate": 9.912976371190657e-06, "loss": 0.6125987768173218, "step": 3610 }, { "epoch": 1.0558561193156895, "grad_norm": 1.3139938490016692, "learning_rate": 9.908141857552737e-06, "loss": 0.40159785747528076, "step": 3611 }, { "epoch": 1.0561485597309548, "grad_norm": 1.7052081125083998, "learning_rate": 9.903307365386103e-06, "loss": 0.6628924608230591, "step": 3612 }, { "epoch": 1.05644100014622, "grad_norm": 1.638888923278887, "learning_rate": 9.898472895820783e-06, "loss": 0.6083816289901733, "step": 3613 }, { "epoch": 1.0567334405614857, "grad_norm": 1.564812875636552, "learning_rate": 9.893638449986806e-06, "loss": 0.5349488854408264, "step": 3614 }, { "epoch": 1.057025880976751, "grad_norm": 1.5340813216184335, "learning_rate": 9.888804029014194e-06, "loss": 0.6119222044944763, "step": 3615 }, { "epoch": 1.0573183213920163, "grad_norm": 1.367693459120948, "learning_rate": 9.883969634032964e-06, "loss": 0.531359851360321, "step": 3616 }, { "epoch": 1.0576107618072819, "grad_norm": 1.6344237981695606, "learning_rate": 9.879135266173127e-06, "loss": 0.6604791879653931, "step": 3617 }, { "epoch": 1.0579032022225472, "grad_norm": 1.4352324880813543, "learning_rate": 9.874300926564689e-06, "loss": 0.4691445231437683, "step": 3618 }, { "epoch": 1.0581956426378125, "grad_norm": 1.2910646539258182, "learning_rate": 9.869466616337642e-06, "loss": 0.5690087080001831, "step": 3619 }, { "epoch": 1.0584880830530778, "grad_norm": 1.403700057828388, "learning_rate": 9.86463233662198e-06, "loss": 0.5426729917526245, "step": 3620 }, { "epoch": 1.0587805234683434, "grad_norm": 1.578075476325045, "learning_rate": 9.859798088547687e-06, "loss": 0.5640411376953125, "step": 3621 }, { "epoch": 1.0590729638836087, "grad_norm": 1.4838032713556162, "learning_rate": 9.854963873244738e-06, "loss": 0.6724091172218323, "step": 3622 }, { "epoch": 1.059365404298874, "grad_norm": 1.4145337335983883, "learning_rate": 9.850129691843105e-06, "loss": 0.5448887348175049, "step": 3623 }, { "epoch": 1.0596578447141396, "grad_norm": 1.5190623574509117, "learning_rate": 9.845295545472742e-06, "loss": 0.5555344820022583, "step": 3624 }, { "epoch": 1.059950285129405, "grad_norm": 1.6879154347320564, "learning_rate": 9.840461435263604e-06, "loss": 0.5053969621658325, "step": 3625 }, { "epoch": 1.0602427255446703, "grad_norm": 1.5675488432589333, "learning_rate": 9.835627362345636e-06, "loss": 0.5866390466690063, "step": 3626 }, { "epoch": 1.0605351659599356, "grad_norm": 1.81247497722172, "learning_rate": 9.830793327848773e-06, "loss": 0.5936717987060547, "step": 3627 }, { "epoch": 1.0608276063752011, "grad_norm": 1.5536122437945554, "learning_rate": 9.82595933290294e-06, "loss": 0.6009070873260498, "step": 3628 }, { "epoch": 1.0611200467904665, "grad_norm": 1.588445125911092, "learning_rate": 9.821125378638059e-06, "loss": 0.5361435413360596, "step": 3629 }, { "epoch": 1.0614124872057318, "grad_norm": 1.4856331412797505, "learning_rate": 9.816291466184025e-06, "loss": 0.5763939619064331, "step": 3630 }, { "epoch": 1.061704927620997, "grad_norm": 1.618308780160016, "learning_rate": 9.81145759667075e-06, "loss": 0.57512366771698, "step": 3631 }, { "epoch": 1.0619973680362627, "grad_norm": 1.4990484363196022, "learning_rate": 9.806623771228115e-06, "loss": 0.6144367456436157, "step": 3632 }, { "epoch": 1.062289808451528, "grad_norm": 1.5222649609215075, "learning_rate": 9.801789990985997e-06, "loss": 0.5715698003768921, "step": 3633 }, { "epoch": 1.0625822488667933, "grad_norm": 1.3438421364889925, "learning_rate": 9.796956257074263e-06, "loss": 0.632681131362915, "step": 3634 }, { "epoch": 1.0628746892820589, "grad_norm": 1.2996961363437054, "learning_rate": 9.79212257062277e-06, "loss": 0.5362547636032104, "step": 3635 }, { "epoch": 1.0631671296973242, "grad_norm": 1.2451948790215157, "learning_rate": 9.787288932761361e-06, "loss": 0.553846538066864, "step": 3636 }, { "epoch": 1.0634595701125895, "grad_norm": 2.0033616068213456, "learning_rate": 9.782455344619871e-06, "loss": 0.7200362682342529, "step": 3637 }, { "epoch": 1.063752010527855, "grad_norm": 1.5986858016901493, "learning_rate": 9.777621807328126e-06, "loss": 0.5544596910476685, "step": 3638 }, { "epoch": 1.0640444509431204, "grad_norm": 1.9336329750915207, "learning_rate": 9.772788322015926e-06, "loss": 0.687321126461029, "step": 3639 }, { "epoch": 1.0643368913583857, "grad_norm": 1.4658162923896687, "learning_rate": 9.767954889813076e-06, "loss": 0.4986167550086975, "step": 3640 }, { "epoch": 1.064629331773651, "grad_norm": 1.6835767903522258, "learning_rate": 9.763121511849358e-06, "loss": 0.5021307468414307, "step": 3641 }, { "epoch": 1.0649217721889166, "grad_norm": 1.6084332451713093, "learning_rate": 9.758288189254548e-06, "loss": 0.5542711019515991, "step": 3642 }, { "epoch": 1.065214212604182, "grad_norm": 1.4567212868909125, "learning_rate": 9.753454923158407e-06, "loss": 0.5161126852035522, "step": 3643 }, { "epoch": 1.0655066530194472, "grad_norm": 1.3588587385016027, "learning_rate": 9.748621714690674e-06, "loss": 0.6041361093521118, "step": 3644 }, { "epoch": 1.0657990934347126, "grad_norm": 1.5312936542968558, "learning_rate": 9.74378856498109e-06, "loss": 0.5252672433853149, "step": 3645 }, { "epoch": 1.0660915338499781, "grad_norm": 1.508976518247356, "learning_rate": 9.738955475159369e-06, "loss": 0.5198208093643188, "step": 3646 }, { "epoch": 1.0663839742652435, "grad_norm": 1.617831688267231, "learning_rate": 9.734122446355219e-06, "loss": 0.5547968149185181, "step": 3647 }, { "epoch": 1.0666764146805088, "grad_norm": 1.3192996989880752, "learning_rate": 9.72928947969833e-06, "loss": 0.5854370594024658, "step": 3648 }, { "epoch": 1.0669688550957743, "grad_norm": 1.4612935433441103, "learning_rate": 9.724456576318383e-06, "loss": 0.5199173092842102, "step": 3649 }, { "epoch": 1.0672612955110397, "grad_norm": 1.5597306303032106, "learning_rate": 9.71962373734503e-06, "loss": 0.49684566259384155, "step": 3650 }, { "epoch": 1.067553735926305, "grad_norm": 1.5081407431370675, "learning_rate": 9.714790963907927e-06, "loss": 0.593805193901062, "step": 3651 }, { "epoch": 1.0678461763415703, "grad_norm": 1.6501383657240702, "learning_rate": 9.7099582571367e-06, "loss": 0.5524622201919556, "step": 3652 }, { "epoch": 1.0681386167568359, "grad_norm": 1.589706723326761, "learning_rate": 9.70512561816097e-06, "loss": 0.5796955227851868, "step": 3653 }, { "epoch": 1.0684310571721012, "grad_norm": 1.6252059263075247, "learning_rate": 9.700293048110335e-06, "loss": 0.5470535159111023, "step": 3654 }, { "epoch": 1.0687234975873665, "grad_norm": 1.180447413588476, "learning_rate": 9.695460548114374e-06, "loss": 0.5438790321350098, "step": 3655 }, { "epoch": 1.069015938002632, "grad_norm": 1.5271792603913512, "learning_rate": 9.69062811930266e-06, "loss": 0.6324823498725891, "step": 3656 }, { "epoch": 1.0693083784178974, "grad_norm": 1.5347219744388463, "learning_rate": 9.68579576280474e-06, "loss": 0.5261266231536865, "step": 3657 }, { "epoch": 1.0696008188331627, "grad_norm": 1.408009396375569, "learning_rate": 9.680963479750152e-06, "loss": 0.49827292561531067, "step": 3658 }, { "epoch": 1.069893259248428, "grad_norm": 1.8715423798930795, "learning_rate": 9.67613127126841e-06, "loss": 0.5273935794830322, "step": 3659 }, { "epoch": 1.0701856996636936, "grad_norm": 1.5578682729768194, "learning_rate": 9.671299138489017e-06, "loss": 0.5816709995269775, "step": 3660 }, { "epoch": 1.070478140078959, "grad_norm": 1.7016426471813102, "learning_rate": 9.66646708254145e-06, "loss": 0.5591616630554199, "step": 3661 }, { "epoch": 1.0707705804942242, "grad_norm": 1.5738449439513973, "learning_rate": 9.661635104555172e-06, "loss": 0.581566572189331, "step": 3662 }, { "epoch": 1.0710630209094898, "grad_norm": 1.5518333497561696, "learning_rate": 9.656803205659632e-06, "loss": 0.5339047312736511, "step": 3663 }, { "epoch": 1.0713554613247551, "grad_norm": 1.6271916881343873, "learning_rate": 9.651971386984258e-06, "loss": 0.5200103521347046, "step": 3664 }, { "epoch": 1.0716479017400204, "grad_norm": 1.6521270716003156, "learning_rate": 9.647139649658454e-06, "loss": 0.7201805114746094, "step": 3665 }, { "epoch": 1.0719403421552858, "grad_norm": 1.534541270100013, "learning_rate": 9.642307994811614e-06, "loss": 0.4801551103591919, "step": 3666 }, { "epoch": 1.0722327825705513, "grad_norm": 1.5215862158184845, "learning_rate": 9.637476423573106e-06, "loss": 0.5809728503227234, "step": 3667 }, { "epoch": 1.0725252229858167, "grad_norm": 1.6423129831570165, "learning_rate": 9.632644937072277e-06, "loss": 0.6493573188781738, "step": 3668 }, { "epoch": 1.072817663401082, "grad_norm": 1.5984538738730298, "learning_rate": 9.627813536438461e-06, "loss": 0.5858349800109863, "step": 3669 }, { "epoch": 1.0731101038163473, "grad_norm": 1.5154205099747375, "learning_rate": 9.622982222800968e-06, "loss": 0.604835033416748, "step": 3670 }, { "epoch": 1.0734025442316129, "grad_norm": 1.6814842296922758, "learning_rate": 9.618150997289091e-06, "loss": 0.6168441772460938, "step": 3671 }, { "epoch": 1.0736949846468782, "grad_norm": 1.4221905571438933, "learning_rate": 9.613319861032093e-06, "loss": 0.5297094583511353, "step": 3672 }, { "epoch": 1.0739874250621435, "grad_norm": 1.4440813284349416, "learning_rate": 9.608488815159226e-06, "loss": 0.513571560382843, "step": 3673 }, { "epoch": 1.074279865477409, "grad_norm": 1.4202335692197015, "learning_rate": 9.603657860799721e-06, "loss": 0.4383837580680847, "step": 3674 }, { "epoch": 1.0745723058926744, "grad_norm": 1.660966167075539, "learning_rate": 9.59882699908278e-06, "loss": 0.5428420305252075, "step": 3675 }, { "epoch": 1.0748647463079397, "grad_norm": 1.331252403406651, "learning_rate": 9.593996231137587e-06, "loss": 0.5193662047386169, "step": 3676 }, { "epoch": 1.0751571867232053, "grad_norm": 1.1890998376752542, "learning_rate": 9.589165558093311e-06, "loss": 0.47949904203414917, "step": 3677 }, { "epoch": 1.0754496271384706, "grad_norm": 1.4440336102087743, "learning_rate": 9.584334981079085e-06, "loss": 0.5092326402664185, "step": 3678 }, { "epoch": 1.075742067553736, "grad_norm": 1.642845621448486, "learning_rate": 9.579504501224028e-06, "loss": 0.6627280712127686, "step": 3679 }, { "epoch": 1.0760345079690012, "grad_norm": 1.4633415466571795, "learning_rate": 9.57467411965724e-06, "loss": 0.45087775588035583, "step": 3680 }, { "epoch": 1.0763269483842668, "grad_norm": 1.5441336288481917, "learning_rate": 9.569843837507788e-06, "loss": 0.5745380520820618, "step": 3681 }, { "epoch": 1.0766193887995321, "grad_norm": 1.4663672637613454, "learning_rate": 9.565013655904728e-06, "loss": 0.4410436749458313, "step": 3682 }, { "epoch": 1.0769118292147974, "grad_norm": 1.5197962338342057, "learning_rate": 9.560183575977079e-06, "loss": 0.4991244375705719, "step": 3683 }, { "epoch": 1.0772042696300628, "grad_norm": 1.760205368894331, "learning_rate": 9.555353598853842e-06, "loss": 0.6316145658493042, "step": 3684 }, { "epoch": 1.0774967100453283, "grad_norm": 1.7400994246729, "learning_rate": 9.550523725664e-06, "loss": 0.5593908429145813, "step": 3685 }, { "epoch": 1.0777891504605936, "grad_norm": 1.360696277932948, "learning_rate": 9.545693957536503e-06, "loss": 0.5491319894790649, "step": 3686 }, { "epoch": 1.078081590875859, "grad_norm": 1.6733496726210937, "learning_rate": 9.540864295600282e-06, "loss": 0.6299821138381958, "step": 3687 }, { "epoch": 1.0783740312911245, "grad_norm": 1.584478567774571, "learning_rate": 9.536034740984244e-06, "loss": 0.5673841238021851, "step": 3688 }, { "epoch": 1.0786664717063899, "grad_norm": 1.2029070866459273, "learning_rate": 9.53120529481726e-06, "loss": 0.45966464281082153, "step": 3689 }, { "epoch": 1.0789589121216552, "grad_norm": 1.5763188044346095, "learning_rate": 9.526375958228191e-06, "loss": 0.5831631422042847, "step": 3690 }, { "epoch": 1.0792513525369205, "grad_norm": 1.6299976133727174, "learning_rate": 9.52154673234586e-06, "loss": 0.5456256866455078, "step": 3691 }, { "epoch": 1.079543792952186, "grad_norm": 1.4868906970264604, "learning_rate": 9.516717618299069e-06, "loss": 0.46428292989730835, "step": 3692 }, { "epoch": 1.0798362333674514, "grad_norm": 1.4498481381133475, "learning_rate": 9.511888617216602e-06, "loss": 0.47320839762687683, "step": 3693 }, { "epoch": 1.0801286737827167, "grad_norm": 1.4932376641022789, "learning_rate": 9.507059730227199e-06, "loss": 0.5205492973327637, "step": 3694 }, { "epoch": 1.0804211141979823, "grad_norm": 1.631704411581211, "learning_rate": 9.502230958459587e-06, "loss": 0.42696553468704224, "step": 3695 }, { "epoch": 1.0807135546132476, "grad_norm": 1.5001123816983175, "learning_rate": 9.497402303042463e-06, "loss": 0.5147116780281067, "step": 3696 }, { "epoch": 1.081005995028513, "grad_norm": 1.38029323867701, "learning_rate": 9.492573765104494e-06, "loss": 0.5080294609069824, "step": 3697 }, { "epoch": 1.0812984354437782, "grad_norm": 1.6652094239637947, "learning_rate": 9.487745345774323e-06, "loss": 0.6228866577148438, "step": 3698 }, { "epoch": 1.0815908758590438, "grad_norm": 1.5822778586922481, "learning_rate": 9.482917046180563e-06, "loss": 0.5560915470123291, "step": 3699 }, { "epoch": 1.0818833162743091, "grad_norm": 1.5535091238731367, "learning_rate": 9.4780888674518e-06, "loss": 0.5245859622955322, "step": 3700 }, { "epoch": 1.0821757566895744, "grad_norm": 1.5051094804368905, "learning_rate": 9.47326081071659e-06, "loss": 0.6462790966033936, "step": 3701 }, { "epoch": 1.08246819710484, "grad_norm": 1.5924758840128848, "learning_rate": 9.468432877103462e-06, "loss": 0.5196692943572998, "step": 3702 }, { "epoch": 1.0827606375201053, "grad_norm": 1.7568328506180717, "learning_rate": 9.463605067740917e-06, "loss": 0.5487779974937439, "step": 3703 }, { "epoch": 1.0830530779353706, "grad_norm": 1.6433197945872438, "learning_rate": 9.458777383757428e-06, "loss": 0.5471592545509338, "step": 3704 }, { "epoch": 1.083345518350636, "grad_norm": 1.7295248979937683, "learning_rate": 9.453949826281436e-06, "loss": 0.6927378177642822, "step": 3705 }, { "epoch": 1.0836379587659015, "grad_norm": 1.645450906929874, "learning_rate": 9.449122396441344e-06, "loss": 0.569003164768219, "step": 3706 }, { "epoch": 1.0839303991811668, "grad_norm": 1.5204128580175535, "learning_rate": 9.444295095365549e-06, "loss": 0.5655964612960815, "step": 3707 }, { "epoch": 1.0842228395964322, "grad_norm": 1.5653417821245283, "learning_rate": 9.439467924182397e-06, "loss": 0.6223032474517822, "step": 3708 }, { "epoch": 1.0845152800116975, "grad_norm": 1.8058201614843348, "learning_rate": 9.43464088402021e-06, "loss": 0.6553555727005005, "step": 3709 }, { "epoch": 1.084807720426963, "grad_norm": 1.7065419655088354, "learning_rate": 9.429813976007277e-06, "loss": 0.534509539604187, "step": 3710 }, { "epoch": 1.0851001608422284, "grad_norm": 1.7341944929762452, "learning_rate": 9.42498720127186e-06, "loss": 0.5801417827606201, "step": 3711 }, { "epoch": 1.0853926012574937, "grad_norm": 1.4311879630985456, "learning_rate": 9.42016056094219e-06, "loss": 0.47260361909866333, "step": 3712 }, { "epoch": 1.0856850416727593, "grad_norm": 1.5640804855296242, "learning_rate": 9.415334056146464e-06, "loss": 0.5924841165542603, "step": 3713 }, { "epoch": 1.0859774820880246, "grad_norm": 1.7346051575584198, "learning_rate": 9.410507688012847e-06, "loss": 0.6029725074768066, "step": 3714 }, { "epoch": 1.08626992250329, "grad_norm": 1.6762909361099274, "learning_rate": 9.405681457669472e-06, "loss": 0.5838413834571838, "step": 3715 }, { "epoch": 1.0865623629185555, "grad_norm": 1.277586165055191, "learning_rate": 9.400855366244445e-06, "loss": 0.4739546775817871, "step": 3716 }, { "epoch": 1.0868548033338208, "grad_norm": 1.5391172094714582, "learning_rate": 9.396029414865832e-06, "loss": 0.4870055913925171, "step": 3717 }, { "epoch": 1.0871472437490861, "grad_norm": 1.4254039758246118, "learning_rate": 9.39120360466167e-06, "loss": 0.5572132468223572, "step": 3718 }, { "epoch": 1.0874396841643514, "grad_norm": 1.6824352313774058, "learning_rate": 9.386377936759966e-06, "loss": 0.5601439476013184, "step": 3719 }, { "epoch": 1.087732124579617, "grad_norm": 1.4548205788512927, "learning_rate": 9.38155241228869e-06, "loss": 0.4551504850387573, "step": 3720 }, { "epoch": 1.0880245649948823, "grad_norm": 1.447968175073075, "learning_rate": 9.376727032375773e-06, "loss": 0.5656375885009766, "step": 3721 }, { "epoch": 1.0883170054101476, "grad_norm": 1.4767808933411752, "learning_rate": 9.371901798149124e-06, "loss": 0.5597153902053833, "step": 3722 }, { "epoch": 1.088609445825413, "grad_norm": 1.5252235269095387, "learning_rate": 9.367076710736613e-06, "loss": 0.5946288108825684, "step": 3723 }, { "epoch": 1.0889018862406785, "grad_norm": 1.9924638298376933, "learning_rate": 9.36225177126607e-06, "loss": 0.5951449871063232, "step": 3724 }, { "epoch": 1.0891943266559438, "grad_norm": 1.7845167649533908, "learning_rate": 9.3574269808653e-06, "loss": 0.5755487680435181, "step": 3725 }, { "epoch": 1.0894867670712092, "grad_norm": 1.5254834641419546, "learning_rate": 9.352602340662065e-06, "loss": 0.5118892788887024, "step": 3726 }, { "epoch": 1.0897792074864747, "grad_norm": 1.596558008598135, "learning_rate": 9.347777851784097e-06, "loss": 0.5652351975440979, "step": 3727 }, { "epoch": 1.09007164790174, "grad_norm": 1.5215560380827415, "learning_rate": 9.34295351535909e-06, "loss": 0.624887228012085, "step": 3728 }, { "epoch": 1.0903640883170054, "grad_norm": 1.447383452488018, "learning_rate": 9.338129332514705e-06, "loss": 0.534363329410553, "step": 3729 }, { "epoch": 1.0906565287322707, "grad_norm": 1.477841435635963, "learning_rate": 9.333305304378565e-06, "loss": 0.6203521490097046, "step": 3730 }, { "epoch": 1.0909489691475363, "grad_norm": 1.7401174715864398, "learning_rate": 9.328481432078254e-06, "loss": 0.64560866355896, "step": 3731 }, { "epoch": 1.0912414095628016, "grad_norm": 1.5841972191853104, "learning_rate": 9.323657716741327e-06, "loss": 0.5389514565467834, "step": 3732 }, { "epoch": 1.091533849978067, "grad_norm": 1.4621625707128454, "learning_rate": 9.318834159495295e-06, "loss": 0.5245277881622314, "step": 3733 }, { "epoch": 1.0918262903933325, "grad_norm": 1.6486990138865423, "learning_rate": 9.314010761467637e-06, "loss": 0.603967010974884, "step": 3734 }, { "epoch": 1.0921187308085978, "grad_norm": 1.7983997195133608, "learning_rate": 9.309187523785794e-06, "loss": 0.5426995754241943, "step": 3735 }, { "epoch": 1.092411171223863, "grad_norm": 1.6248514181798874, "learning_rate": 9.30436444757717e-06, "loss": 0.5400352478027344, "step": 3736 }, { "epoch": 1.0927036116391284, "grad_norm": 1.5009984854869718, "learning_rate": 9.299541533969121e-06, "loss": 0.5016524195671082, "step": 3737 }, { "epoch": 1.092996052054394, "grad_norm": 1.7929437285814107, "learning_rate": 9.294718784088982e-06, "loss": 0.526217520236969, "step": 3738 }, { "epoch": 1.0932884924696593, "grad_norm": 1.7293517567202035, "learning_rate": 9.289896199064038e-06, "loss": 0.525063157081604, "step": 3739 }, { "epoch": 1.0935809328849246, "grad_norm": 1.269101628653969, "learning_rate": 9.285073780021541e-06, "loss": 0.3792048692703247, "step": 3740 }, { "epoch": 1.0938733733001902, "grad_norm": 1.4416380651624152, "learning_rate": 9.280251528088702e-06, "loss": 0.5326308012008667, "step": 3741 }, { "epoch": 1.0941658137154555, "grad_norm": 1.3946561055322027, "learning_rate": 9.275429444392692e-06, "loss": 0.5675199627876282, "step": 3742 }, { "epoch": 1.0944582541307208, "grad_norm": 1.640552639536372, "learning_rate": 9.270607530060643e-06, "loss": 0.6525516510009766, "step": 3743 }, { "epoch": 1.0947506945459862, "grad_norm": 1.563647681973335, "learning_rate": 9.265785786219647e-06, "loss": 0.6376343369483948, "step": 3744 }, { "epoch": 1.0950431349612517, "grad_norm": 1.7701418719133022, "learning_rate": 9.260964213996763e-06, "loss": 0.6440377235412598, "step": 3745 }, { "epoch": 1.095335575376517, "grad_norm": 1.886853414823259, "learning_rate": 9.256142814518997e-06, "loss": 0.5971434116363525, "step": 3746 }, { "epoch": 1.0956280157917824, "grad_norm": 1.3797760891901851, "learning_rate": 9.251321588913331e-06, "loss": 0.5096890330314636, "step": 3747 }, { "epoch": 1.0959204562070477, "grad_norm": 1.7099901744739332, "learning_rate": 9.246500538306686e-06, "loss": 0.4303498864173889, "step": 3748 }, { "epoch": 1.0962128966223132, "grad_norm": 1.5934571510718554, "learning_rate": 9.241679663825961e-06, "loss": 0.5484192371368408, "step": 3749 }, { "epoch": 1.0965053370375786, "grad_norm": 1.6268147624989107, "learning_rate": 9.236858966598004e-06, "loss": 0.6057884693145752, "step": 3750 }, { "epoch": 1.096797777452844, "grad_norm": 1.565840426411154, "learning_rate": 9.232038447749623e-06, "loss": 0.5261536836624146, "step": 3751 }, { "epoch": 1.0970902178681095, "grad_norm": 1.3242416099520606, "learning_rate": 9.227218108407586e-06, "loss": 0.470365047454834, "step": 3752 }, { "epoch": 1.0973826582833748, "grad_norm": 1.8694075496184692, "learning_rate": 9.222397949698618e-06, "loss": 0.6158323287963867, "step": 3753 }, { "epoch": 1.09767509869864, "grad_norm": 1.4353847976975904, "learning_rate": 9.217577972749401e-06, "loss": 0.582190990447998, "step": 3754 }, { "epoch": 1.0979675391139057, "grad_norm": 1.5377732823861585, "learning_rate": 9.212758178686575e-06, "loss": 0.4939305782318115, "step": 3755 }, { "epoch": 1.098259979529171, "grad_norm": 1.501946006392042, "learning_rate": 9.207938568636739e-06, "loss": 0.576829731464386, "step": 3756 }, { "epoch": 1.0985524199444363, "grad_norm": 1.582387804664269, "learning_rate": 9.203119143726445e-06, "loss": 0.581257164478302, "step": 3757 }, { "epoch": 1.0988448603597016, "grad_norm": 1.4501950316688965, "learning_rate": 9.19829990508221e-06, "loss": 0.6105127334594727, "step": 3758 }, { "epoch": 1.0991373007749672, "grad_norm": 1.7379854400774775, "learning_rate": 9.193480853830495e-06, "loss": 0.5311432480812073, "step": 3759 }, { "epoch": 1.0994297411902325, "grad_norm": 1.3707297007944412, "learning_rate": 9.188661991097726e-06, "loss": 0.44334596395492554, "step": 3760 }, { "epoch": 1.0997221816054978, "grad_norm": 1.6175971035022318, "learning_rate": 9.183843318010285e-06, "loss": 0.5795773267745972, "step": 3761 }, { "epoch": 1.1000146220207632, "grad_norm": 1.4465404341375856, "learning_rate": 9.179024835694504e-06, "loss": 0.619825541973114, "step": 3762 }, { "epoch": 1.1003070624360287, "grad_norm": 1.754450237430447, "learning_rate": 9.174206545276678e-06, "loss": 0.633934497833252, "step": 3763 }, { "epoch": 1.100599502851294, "grad_norm": 1.34560762533496, "learning_rate": 9.169388447883053e-06, "loss": 0.48922473192214966, "step": 3764 }, { "epoch": 1.1008919432665594, "grad_norm": 1.7340747304342141, "learning_rate": 9.164570544639825e-06, "loss": 0.6125025153160095, "step": 3765 }, { "epoch": 1.101184383681825, "grad_norm": 1.4327034643571392, "learning_rate": 9.159752836673154e-06, "loss": 0.5428078174591064, "step": 3766 }, { "epoch": 1.1014768240970902, "grad_norm": 1.4335551572069505, "learning_rate": 9.154935325109148e-06, "loss": 0.5848157405853271, "step": 3767 }, { "epoch": 1.1017692645123556, "grad_norm": 1.5053579548838565, "learning_rate": 9.150118011073872e-06, "loss": 0.5150102376937866, "step": 3768 }, { "epoch": 1.102061704927621, "grad_norm": 1.429407171536289, "learning_rate": 9.145300895693344e-06, "loss": 0.6106699705123901, "step": 3769 }, { "epoch": 1.1023541453428864, "grad_norm": 1.4079938603953852, "learning_rate": 9.140483980093534e-06, "loss": 0.5819482803344727, "step": 3770 }, { "epoch": 1.1026465857581518, "grad_norm": 1.7060315490040079, "learning_rate": 9.135667265400369e-06, "loss": 0.6499812602996826, "step": 3771 }, { "epoch": 1.102939026173417, "grad_norm": 1.520551323323022, "learning_rate": 9.130850752739724e-06, "loss": 0.5375189781188965, "step": 3772 }, { "epoch": 1.1032314665886827, "grad_norm": 1.5200340564855783, "learning_rate": 9.12603444323743e-06, "loss": 0.5582318902015686, "step": 3773 }, { "epoch": 1.103523907003948, "grad_norm": 1.6010357553720616, "learning_rate": 9.121218338019273e-06, "loss": 0.5549799203872681, "step": 3774 }, { "epoch": 1.1038163474192133, "grad_norm": 1.672600820514396, "learning_rate": 9.116402438210988e-06, "loss": 0.4942197799682617, "step": 3775 }, { "epoch": 1.1041087878344786, "grad_norm": 1.294858704528479, "learning_rate": 9.11158674493826e-06, "loss": 0.5039837956428528, "step": 3776 }, { "epoch": 1.1044012282497442, "grad_norm": 1.4904483423531274, "learning_rate": 9.106771259326726e-06, "loss": 0.49781280755996704, "step": 3777 }, { "epoch": 1.1046936686650095, "grad_norm": 1.5058975394537781, "learning_rate": 9.101955982501981e-06, "loss": 0.41755813360214233, "step": 3778 }, { "epoch": 1.1049861090802748, "grad_norm": 1.3247888444316807, "learning_rate": 9.097140915589564e-06, "loss": 0.5605067014694214, "step": 3779 }, { "epoch": 1.1052785494955404, "grad_norm": 1.6960736504408462, "learning_rate": 9.092326059714971e-06, "loss": 0.6291122436523438, "step": 3780 }, { "epoch": 1.1055709899108057, "grad_norm": 1.5309788529424204, "learning_rate": 9.087511416003636e-06, "loss": 0.5164260864257812, "step": 3781 }, { "epoch": 1.105863430326071, "grad_norm": 1.481065256446166, "learning_rate": 9.082696985580964e-06, "loss": 0.5002986192703247, "step": 3782 }, { "epoch": 1.1061558707413364, "grad_norm": 1.8553995759252653, "learning_rate": 9.077882769572295e-06, "loss": 0.5149055123329163, "step": 3783 }, { "epoch": 1.106448311156602, "grad_norm": 1.4637547819206846, "learning_rate": 9.073068769102925e-06, "loss": 0.5375808477401733, "step": 3784 }, { "epoch": 1.1067407515718672, "grad_norm": 1.4438276838658128, "learning_rate": 9.06825498529809e-06, "loss": 0.5574408173561096, "step": 3785 }, { "epoch": 1.1070331919871326, "grad_norm": 1.8566533611842586, "learning_rate": 9.063441419282989e-06, "loss": 0.7410034537315369, "step": 3786 }, { "epoch": 1.107325632402398, "grad_norm": 1.4780218137550694, "learning_rate": 9.058628072182759e-06, "loss": 0.4890757203102112, "step": 3787 }, { "epoch": 1.1076180728176634, "grad_norm": 1.449027088222319, "learning_rate": 9.053814945122496e-06, "loss": 0.5012304782867432, "step": 3788 }, { "epoch": 1.1079105132329288, "grad_norm": 1.6277147220392454, "learning_rate": 9.049002039227239e-06, "loss": 0.5235648155212402, "step": 3789 }, { "epoch": 1.108202953648194, "grad_norm": 1.5099212526378973, "learning_rate": 9.044189355621969e-06, "loss": 0.44732457399368286, "step": 3790 }, { "epoch": 1.1084953940634596, "grad_norm": 1.6131396298332503, "learning_rate": 9.039376895431627e-06, "loss": 0.5771712064743042, "step": 3791 }, { "epoch": 1.108787834478725, "grad_norm": 2.537465666899194, "learning_rate": 9.034564659781096e-06, "loss": 0.5361784100532532, "step": 3792 }, { "epoch": 1.1090802748939903, "grad_norm": 1.3520934517992165, "learning_rate": 9.029752649795203e-06, "loss": 0.5305893421173096, "step": 3793 }, { "epoch": 1.1093727153092559, "grad_norm": 1.3303918593615456, "learning_rate": 9.02494086659873e-06, "loss": 0.5094715356826782, "step": 3794 }, { "epoch": 1.1096651557245212, "grad_norm": 1.4162243148383913, "learning_rate": 9.020129311316405e-06, "loss": 0.5406676530838013, "step": 3795 }, { "epoch": 1.1099575961397865, "grad_norm": 1.5431545303983976, "learning_rate": 9.015317985072893e-06, "loss": 0.5170687437057495, "step": 3796 }, { "epoch": 1.1102500365550518, "grad_norm": 1.454438976249235, "learning_rate": 9.010506888992814e-06, "loss": 0.4632429778575897, "step": 3797 }, { "epoch": 1.1105424769703174, "grad_norm": 1.8257270837662332, "learning_rate": 9.005696024200734e-06, "loss": 0.5614180564880371, "step": 3798 }, { "epoch": 1.1108349173855827, "grad_norm": 1.5187438448472135, "learning_rate": 9.000885391821164e-06, "loss": 0.5660920143127441, "step": 3799 }, { "epoch": 1.111127357800848, "grad_norm": 1.616333702810617, "learning_rate": 8.996074992978558e-06, "loss": 0.6346436142921448, "step": 3800 }, { "epoch": 1.1114197982161134, "grad_norm": 1.2613316779938173, "learning_rate": 8.991264828797319e-06, "loss": 0.4295850396156311, "step": 3801 }, { "epoch": 1.111712238631379, "grad_norm": 1.4545086499056976, "learning_rate": 8.986454900401791e-06, "loss": 0.4797070622444153, "step": 3802 }, { "epoch": 1.1120046790466442, "grad_norm": 1.3353593055033692, "learning_rate": 8.98164520891627e-06, "loss": 0.4912114143371582, "step": 3803 }, { "epoch": 1.1122971194619096, "grad_norm": 1.6135433736276805, "learning_rate": 8.976835755464988e-06, "loss": 0.4156647026538849, "step": 3804 }, { "epoch": 1.1125895598771751, "grad_norm": 1.6120031027815822, "learning_rate": 8.97202654117213e-06, "loss": 0.4527992010116577, "step": 3805 }, { "epoch": 1.1128820002924404, "grad_norm": 1.6881758541294942, "learning_rate": 8.967217567161817e-06, "loss": 0.5969425439834595, "step": 3806 }, { "epoch": 1.1131744407077058, "grad_norm": 1.5313114259080804, "learning_rate": 8.962408834558116e-06, "loss": 0.5867633819580078, "step": 3807 }, { "epoch": 1.113466881122971, "grad_norm": 1.4924056676350326, "learning_rate": 8.957600344485042e-06, "loss": 0.549109697341919, "step": 3808 }, { "epoch": 1.1137593215382366, "grad_norm": 1.6602567019426782, "learning_rate": 8.952792098066549e-06, "loss": 0.6336593627929688, "step": 3809 }, { "epoch": 1.114051761953502, "grad_norm": 1.4867429859275132, "learning_rate": 8.947984096426537e-06, "loss": 0.5403220653533936, "step": 3810 }, { "epoch": 1.1143442023687673, "grad_norm": 1.3422567204959701, "learning_rate": 8.943176340688846e-06, "loss": 0.37941914796829224, "step": 3811 }, { "epoch": 1.1146366427840328, "grad_norm": 1.7322077540170269, "learning_rate": 8.938368831977262e-06, "loss": 0.5509335994720459, "step": 3812 }, { "epoch": 1.1149290831992982, "grad_norm": 1.7077554301344111, "learning_rate": 8.933561571415506e-06, "loss": 0.5798860788345337, "step": 3813 }, { "epoch": 1.1152215236145635, "grad_norm": 1.8693354922278385, "learning_rate": 8.92875456012725e-06, "loss": 0.5549412965774536, "step": 3814 }, { "epoch": 1.1155139640298288, "grad_norm": 1.5992402094758784, "learning_rate": 8.9239477992361e-06, "loss": 0.4707058072090149, "step": 3815 }, { "epoch": 1.1158064044450944, "grad_norm": 1.5838333385974708, "learning_rate": 8.919141289865611e-06, "loss": 0.4717002511024475, "step": 3816 }, { "epoch": 1.1160988448603597, "grad_norm": 1.288572308356885, "learning_rate": 8.914335033139274e-06, "loss": 0.48403650522232056, "step": 3817 }, { "epoch": 1.116391285275625, "grad_norm": 1.6715157915340426, "learning_rate": 8.909529030180522e-06, "loss": 0.48592090606689453, "step": 3818 }, { "epoch": 1.1166837256908906, "grad_norm": 1.566157541574177, "learning_rate": 8.904723282112728e-06, "loss": 0.5052220225334167, "step": 3819 }, { "epoch": 1.116976166106156, "grad_norm": 1.6151321192825796, "learning_rate": 8.899917790059208e-06, "loss": 0.7858535051345825, "step": 3820 }, { "epoch": 1.1172686065214212, "grad_norm": 1.8369545909174703, "learning_rate": 8.895112555143217e-06, "loss": 0.6768159866333008, "step": 3821 }, { "epoch": 1.1175610469366866, "grad_norm": 1.8079763728482598, "learning_rate": 8.890307578487947e-06, "loss": 0.5661243200302124, "step": 3822 }, { "epoch": 1.1178534873519521, "grad_norm": 1.7067515294047517, "learning_rate": 8.885502861216535e-06, "loss": 0.5129438638687134, "step": 3823 }, { "epoch": 1.1181459277672174, "grad_norm": 1.5735393429941704, "learning_rate": 8.880698404452051e-06, "loss": 0.4813467264175415, "step": 3824 }, { "epoch": 1.1184383681824828, "grad_norm": 1.5840908667031388, "learning_rate": 8.87589420931751e-06, "loss": 0.5165577530860901, "step": 3825 }, { "epoch": 1.118730808597748, "grad_norm": 1.4425390765128903, "learning_rate": 8.871090276935863e-06, "loss": 0.47335073351860046, "step": 3826 }, { "epoch": 1.1190232490130136, "grad_norm": 1.6934955516318184, "learning_rate": 8.86628660843e-06, "loss": 0.4902348518371582, "step": 3827 }, { "epoch": 1.119315689428279, "grad_norm": 1.7245920316429901, "learning_rate": 8.861483204922752e-06, "loss": 0.5933388471603394, "step": 3828 }, { "epoch": 1.1196081298435443, "grad_norm": 1.5505961542425288, "learning_rate": 8.85668006753688e-06, "loss": 0.4898201823234558, "step": 3829 }, { "epoch": 1.1199005702588098, "grad_norm": 1.3287782031202422, "learning_rate": 8.851877197395088e-06, "loss": 0.4745003879070282, "step": 3830 }, { "epoch": 1.1201930106740752, "grad_norm": 1.5081067046883336, "learning_rate": 8.847074595620024e-06, "loss": 0.5246972441673279, "step": 3831 }, { "epoch": 1.1204854510893405, "grad_norm": 1.498399687409688, "learning_rate": 8.842272263334263e-06, "loss": 0.5196787714958191, "step": 3832 }, { "epoch": 1.120777891504606, "grad_norm": 2.0301798075149446, "learning_rate": 8.83747020166032e-06, "loss": 0.6721034049987793, "step": 3833 }, { "epoch": 1.1210703319198714, "grad_norm": 1.4930580521199184, "learning_rate": 8.832668411720652e-06, "loss": 0.5654234886169434, "step": 3834 }, { "epoch": 1.1213627723351367, "grad_norm": 1.91001506609742, "learning_rate": 8.827866894637642e-06, "loss": 0.7520767450332642, "step": 3835 }, { "epoch": 1.121655212750402, "grad_norm": 1.3753523987373926, "learning_rate": 8.82306565153362e-06, "loss": 0.43645960092544556, "step": 3836 }, { "epoch": 1.1219476531656676, "grad_norm": 1.688542605024225, "learning_rate": 8.818264683530845e-06, "loss": 0.5802274942398071, "step": 3837 }, { "epoch": 1.122240093580933, "grad_norm": 1.5706370149670577, "learning_rate": 8.813463991751516e-06, "loss": 0.5593410134315491, "step": 3838 }, { "epoch": 1.1225325339961982, "grad_norm": 1.56949134961986, "learning_rate": 8.808663577317765e-06, "loss": 0.6126681566238403, "step": 3839 }, { "epoch": 1.1228249744114636, "grad_norm": 1.6396156905409707, "learning_rate": 8.80386344135166e-06, "loss": 0.6245180368423462, "step": 3840 }, { "epoch": 1.123117414826729, "grad_norm": 1.3541654958690765, "learning_rate": 8.799063584975201e-06, "loss": 0.6611473560333252, "step": 3841 }, { "epoch": 1.1234098552419944, "grad_norm": 1.2310988629927149, "learning_rate": 8.79426400931033e-06, "loss": 0.40020978450775146, "step": 3842 }, { "epoch": 1.1237022956572598, "grad_norm": 1.3644507366239775, "learning_rate": 8.789464715478913e-06, "loss": 0.4965318441390991, "step": 3843 }, { "epoch": 1.1239947360725253, "grad_norm": 1.4326851806590044, "learning_rate": 8.784665704602758e-06, "loss": 0.4838374853134155, "step": 3844 }, { "epoch": 1.1242871764877906, "grad_norm": 1.389039662475551, "learning_rate": 8.77986697780361e-06, "loss": 0.5756508708000183, "step": 3845 }, { "epoch": 1.124579616903056, "grad_norm": 1.484286888056792, "learning_rate": 8.775068536203132e-06, "loss": 0.5341511964797974, "step": 3846 }, { "epoch": 1.1248720573183213, "grad_norm": 1.5007549282773276, "learning_rate": 8.77027038092294e-06, "loss": 0.6239134073257446, "step": 3847 }, { "epoch": 1.1251644977335868, "grad_norm": 1.818555508500906, "learning_rate": 8.765472513084566e-06, "loss": 0.5642406940460205, "step": 3848 }, { "epoch": 1.1254569381488522, "grad_norm": 1.5841401225303304, "learning_rate": 8.760674933809488e-06, "loss": 0.5242771506309509, "step": 3849 }, { "epoch": 1.1257493785641175, "grad_norm": 1.5608207104848433, "learning_rate": 8.755877644219108e-06, "loss": 0.5205737352371216, "step": 3850 }, { "epoch": 1.126041818979383, "grad_norm": 1.6760248633979633, "learning_rate": 8.751080645434768e-06, "loss": 0.5005168318748474, "step": 3851 }, { "epoch": 1.1263342593946484, "grad_norm": 1.539730717074913, "learning_rate": 8.74628393857773e-06, "loss": 0.44978275895118713, "step": 3852 }, { "epoch": 1.1266266998099137, "grad_norm": 1.3558571119447433, "learning_rate": 8.741487524769198e-06, "loss": 0.43631571531295776, "step": 3853 }, { "epoch": 1.126919140225179, "grad_norm": 1.220093214706796, "learning_rate": 8.736691405130306e-06, "loss": 0.4196016788482666, "step": 3854 }, { "epoch": 1.1272115806404446, "grad_norm": 1.6296314839875645, "learning_rate": 8.731895580782118e-06, "loss": 0.6389856338500977, "step": 3855 }, { "epoch": 1.12750402105571, "grad_norm": 1.726359030533187, "learning_rate": 8.72710005284563e-06, "loss": 0.5465584993362427, "step": 3856 }, { "epoch": 1.1277964614709752, "grad_norm": 1.469192647678069, "learning_rate": 8.722304822441757e-06, "loss": 0.5513765811920166, "step": 3857 }, { "epoch": 1.1280889018862408, "grad_norm": 1.5516390698184288, "learning_rate": 8.717509890691369e-06, "loss": 0.6984349489212036, "step": 3858 }, { "epoch": 1.128381342301506, "grad_norm": 1.6096511723205336, "learning_rate": 8.712715258715248e-06, "loss": 0.5311027765274048, "step": 3859 }, { "epoch": 1.1286737827167714, "grad_norm": 1.5113126886002746, "learning_rate": 8.707920927634105e-06, "loss": 0.4598672091960907, "step": 3860 }, { "epoch": 1.1289662231320368, "grad_norm": 1.8202302284240548, "learning_rate": 8.703126898568591e-06, "loss": 0.6177612543106079, "step": 3861 }, { "epoch": 1.1292586635473023, "grad_norm": 1.8043964275332298, "learning_rate": 8.69833317263928e-06, "loss": 0.6442389488220215, "step": 3862 }, { "epoch": 1.1295511039625676, "grad_norm": 1.4793698971631246, "learning_rate": 8.693539750966672e-06, "loss": 0.5925737023353577, "step": 3863 }, { "epoch": 1.129843544377833, "grad_norm": 1.3730688779887357, "learning_rate": 8.688746634671207e-06, "loss": 0.46009114384651184, "step": 3864 }, { "epoch": 1.1301359847930983, "grad_norm": 1.6065358861472605, "learning_rate": 8.683953824873246e-06, "loss": 0.5438460111618042, "step": 3865 }, { "epoch": 1.1304284252083638, "grad_norm": 1.6436751318662282, "learning_rate": 8.679161322693073e-06, "loss": 0.5355101823806763, "step": 3866 }, { "epoch": 1.1307208656236292, "grad_norm": 1.5636124606467166, "learning_rate": 8.67436912925091e-06, "loss": 0.4494459629058838, "step": 3867 }, { "epoch": 1.1310133060388945, "grad_norm": 1.5118698872161136, "learning_rate": 8.669577245666905e-06, "loss": 0.5828550457954407, "step": 3868 }, { "epoch": 1.13130574645416, "grad_norm": 1.43455699505813, "learning_rate": 8.664785673061127e-06, "loss": 0.4956590235233307, "step": 3869 }, { "epoch": 1.1315981868694254, "grad_norm": 1.5146504272638424, "learning_rate": 8.659994412553582e-06, "loss": 0.5447779893875122, "step": 3870 }, { "epoch": 1.1318906272846907, "grad_norm": 1.6512585184867246, "learning_rate": 8.655203465264196e-06, "loss": 0.6275361776351929, "step": 3871 }, { "epoch": 1.1321830676999562, "grad_norm": 1.564521343459816, "learning_rate": 8.650412832312823e-06, "loss": 0.47899991273880005, "step": 3872 }, { "epoch": 1.1324755081152216, "grad_norm": 1.1875547206815094, "learning_rate": 8.645622514819243e-06, "loss": 0.3356127142906189, "step": 3873 }, { "epoch": 1.132767948530487, "grad_norm": 1.4442401622701144, "learning_rate": 8.640832513903168e-06, "loss": 0.48855727910995483, "step": 3874 }, { "epoch": 1.1330603889457522, "grad_norm": 1.4528018972795056, "learning_rate": 8.636042830684227e-06, "loss": 0.46642380952835083, "step": 3875 }, { "epoch": 1.1333528293610178, "grad_norm": 1.8421536572224761, "learning_rate": 8.631253466281984e-06, "loss": 0.6179598569869995, "step": 3876 }, { "epoch": 1.133645269776283, "grad_norm": 1.6762180368596016, "learning_rate": 8.626464421815919e-06, "loss": 0.6361704468727112, "step": 3877 }, { "epoch": 1.1339377101915484, "grad_norm": 1.574443230288469, "learning_rate": 8.621675698405446e-06, "loss": 0.6243701577186584, "step": 3878 }, { "epoch": 1.1342301506068138, "grad_norm": 1.6113304231540622, "learning_rate": 8.616887297169895e-06, "loss": 0.5402215123176575, "step": 3879 }, { "epoch": 1.1345225910220793, "grad_norm": 1.6390191276422172, "learning_rate": 8.61209921922853e-06, "loss": 0.6050009727478027, "step": 3880 }, { "epoch": 1.1348150314373446, "grad_norm": 1.6106875040973343, "learning_rate": 8.607311465700534e-06, "loss": 0.5705801248550415, "step": 3881 }, { "epoch": 1.13510747185261, "grad_norm": 1.5380461037587805, "learning_rate": 8.602524037705018e-06, "loss": 0.5467248558998108, "step": 3882 }, { "epoch": 1.1353999122678755, "grad_norm": 1.7121108266736746, "learning_rate": 8.597736936361007e-06, "loss": 0.5903012752532959, "step": 3883 }, { "epoch": 1.1356923526831408, "grad_norm": 1.6218348221942134, "learning_rate": 8.592950162787463e-06, "loss": 0.6034090518951416, "step": 3884 }, { "epoch": 1.1359847930984062, "grad_norm": 1.3056254339924755, "learning_rate": 8.588163718103264e-06, "loss": 0.4282987117767334, "step": 3885 }, { "epoch": 1.1362772335136717, "grad_norm": 1.5127630417626896, "learning_rate": 8.583377603427212e-06, "loss": 0.47374194860458374, "step": 3886 }, { "epoch": 1.136569673928937, "grad_norm": 1.5841934947134406, "learning_rate": 8.578591819878033e-06, "loss": 0.43954724073410034, "step": 3887 }, { "epoch": 1.1368621143442024, "grad_norm": 1.4278799477191386, "learning_rate": 8.573806368574372e-06, "loss": 0.4731065034866333, "step": 3888 }, { "epoch": 1.1371545547594677, "grad_norm": 1.4184800646863156, "learning_rate": 8.5690212506348e-06, "loss": 0.5241256356239319, "step": 3889 }, { "epoch": 1.1374469951747332, "grad_norm": 1.631021419370316, "learning_rate": 8.56423646717781e-06, "loss": 0.5823307037353516, "step": 3890 }, { "epoch": 1.1377394355899986, "grad_norm": 1.654201038343883, "learning_rate": 8.55945201932182e-06, "loss": 0.5360631346702576, "step": 3891 }, { "epoch": 1.138031876005264, "grad_norm": 1.5773624073994579, "learning_rate": 8.554667908185158e-06, "loss": 0.5227797627449036, "step": 3892 }, { "epoch": 1.1383243164205292, "grad_norm": 1.7414634806893152, "learning_rate": 8.549884134886089e-06, "loss": 0.6232806444168091, "step": 3893 }, { "epoch": 1.1386167568357948, "grad_norm": 1.567438316916472, "learning_rate": 8.545100700542782e-06, "loss": 0.6697877049446106, "step": 3894 }, { "epoch": 1.13890919725106, "grad_norm": 1.5115348655280192, "learning_rate": 8.540317606273343e-06, "loss": 0.6348206400871277, "step": 3895 }, { "epoch": 1.1392016376663254, "grad_norm": 1.5453537409734852, "learning_rate": 8.535534853195786e-06, "loss": 0.5578476190567017, "step": 3896 }, { "epoch": 1.139494078081591, "grad_norm": 1.6674507953444782, "learning_rate": 8.530752442428055e-06, "loss": 0.6439946889877319, "step": 3897 }, { "epoch": 1.1397865184968563, "grad_norm": 1.5233786551580588, "learning_rate": 8.525970375088006e-06, "loss": 0.5292261242866516, "step": 3898 }, { "epoch": 1.1400789589121216, "grad_norm": 1.6870433422022266, "learning_rate": 8.521188652293421e-06, "loss": 0.5836480855941772, "step": 3899 }, { "epoch": 1.140371399327387, "grad_norm": 1.731988866581243, "learning_rate": 8.516407275161998e-06, "loss": 0.5166354775428772, "step": 3900 }, { "epoch": 1.1406638397426525, "grad_norm": 1.61853635946673, "learning_rate": 8.511626244811352e-06, "loss": 0.5236127972602844, "step": 3901 }, { "epoch": 1.1409562801579178, "grad_norm": 1.3903461786321225, "learning_rate": 8.506845562359022e-06, "loss": 0.4900703430175781, "step": 3902 }, { "epoch": 1.1412487205731832, "grad_norm": 1.484704749479714, "learning_rate": 8.502065228922464e-06, "loss": 0.5200212001800537, "step": 3903 }, { "epoch": 1.1415411609884485, "grad_norm": 1.588712114908106, "learning_rate": 8.497285245619053e-06, "loss": 0.5553300976753235, "step": 3904 }, { "epoch": 1.141833601403714, "grad_norm": 1.5947362241383982, "learning_rate": 8.492505613566075e-06, "loss": 0.5650131702423096, "step": 3905 }, { "epoch": 1.1421260418189794, "grad_norm": 1.7370414648582224, "learning_rate": 8.487726333880746e-06, "loss": 0.4732077121734619, "step": 3906 }, { "epoch": 1.1424184822342447, "grad_norm": 1.4560698890341355, "learning_rate": 8.482947407680193e-06, "loss": 0.46741920709609985, "step": 3907 }, { "epoch": 1.1427109226495102, "grad_norm": 1.717154367813477, "learning_rate": 8.478168836081457e-06, "loss": 0.606191873550415, "step": 3908 }, { "epoch": 1.1430033630647756, "grad_norm": 1.4018605845855592, "learning_rate": 8.473390620201505e-06, "loss": 0.4373897314071655, "step": 3909 }, { "epoch": 1.143295803480041, "grad_norm": 1.4705540951964773, "learning_rate": 8.468612761157215e-06, "loss": 0.5460623502731323, "step": 3910 }, { "epoch": 1.1435882438953064, "grad_norm": 1.311062743935516, "learning_rate": 8.463835260065379e-06, "loss": 0.4939531087875366, "step": 3911 }, { "epoch": 1.1438806843105718, "grad_norm": 1.4297382144675803, "learning_rate": 8.459058118042708e-06, "loss": 0.544964611530304, "step": 3912 }, { "epoch": 1.144173124725837, "grad_norm": 1.600083847682917, "learning_rate": 8.454281336205836e-06, "loss": 0.6118921041488647, "step": 3913 }, { "epoch": 1.1444655651411024, "grad_norm": 1.4018893231050742, "learning_rate": 8.449504915671304e-06, "loss": 0.561060905456543, "step": 3914 }, { "epoch": 1.144758005556368, "grad_norm": 1.3967184209578762, "learning_rate": 8.444728857555572e-06, "loss": 0.430827796459198, "step": 3915 }, { "epoch": 1.1450504459716333, "grad_norm": 1.7776445971156332, "learning_rate": 8.439953162975011e-06, "loss": 0.5482884645462036, "step": 3916 }, { "epoch": 1.1453428863868986, "grad_norm": 1.8487697311002218, "learning_rate": 8.435177833045911e-06, "loss": 0.6614879965782166, "step": 3917 }, { "epoch": 1.145635326802164, "grad_norm": 1.686073678884194, "learning_rate": 8.430402868884482e-06, "loss": 0.6290509104728699, "step": 3918 }, { "epoch": 1.1459277672174295, "grad_norm": 1.479686951025449, "learning_rate": 8.425628271606836e-06, "loss": 0.404970645904541, "step": 3919 }, { "epoch": 1.1462202076326948, "grad_norm": 1.3978968237521616, "learning_rate": 8.420854042329011e-06, "loss": 0.4902762174606323, "step": 3920 }, { "epoch": 1.1465126480479602, "grad_norm": 1.6869046118960203, "learning_rate": 8.416080182166955e-06, "loss": 0.5757346153259277, "step": 3921 }, { "epoch": 1.1468050884632257, "grad_norm": 1.5541954452670608, "learning_rate": 8.41130669223652e-06, "loss": 0.5453485250473022, "step": 3922 }, { "epoch": 1.147097528878491, "grad_norm": 1.7189844130617113, "learning_rate": 8.40653357365349e-06, "loss": 0.5660290122032166, "step": 3923 }, { "epoch": 1.1473899692937564, "grad_norm": 1.576466831282747, "learning_rate": 8.40176082753355e-06, "loss": 0.46013498306274414, "step": 3924 }, { "epoch": 1.147682409709022, "grad_norm": 1.4364824711460213, "learning_rate": 8.396988454992296e-06, "loss": 0.5183000564575195, "step": 3925 }, { "epoch": 1.1479748501242872, "grad_norm": 1.7258149850246205, "learning_rate": 8.392216457145246e-06, "loss": 0.5407284498214722, "step": 3926 }, { "epoch": 1.1482672905395526, "grad_norm": 1.735168999167248, "learning_rate": 8.387444835107824e-06, "loss": 0.5960655808448792, "step": 3927 }, { "epoch": 1.148559730954818, "grad_norm": 1.2891916158500891, "learning_rate": 8.382673589995365e-06, "loss": 0.4363316297531128, "step": 3928 }, { "epoch": 1.1488521713700834, "grad_norm": 1.4413045514377891, "learning_rate": 8.377902722923122e-06, "loss": 0.5143908262252808, "step": 3929 }, { "epoch": 1.1491446117853488, "grad_norm": 1.696244956095385, "learning_rate": 8.373132235006254e-06, "loss": 0.6016460657119751, "step": 3930 }, { "epoch": 1.149437052200614, "grad_norm": 1.4319561508465357, "learning_rate": 8.368362127359835e-06, "loss": 0.5120511651039124, "step": 3931 }, { "epoch": 1.1497294926158794, "grad_norm": 1.5254857110351325, "learning_rate": 8.363592401098853e-06, "loss": 0.49658435583114624, "step": 3932 }, { "epoch": 1.150021933031145, "grad_norm": 1.7705521617533395, "learning_rate": 8.358823057338188e-06, "loss": 0.584032416343689, "step": 3933 }, { "epoch": 1.1503143734464103, "grad_norm": 1.5012587623360505, "learning_rate": 8.35405409719266e-06, "loss": 0.4673706293106079, "step": 3934 }, { "epoch": 1.1506068138616756, "grad_norm": 1.618555555366979, "learning_rate": 8.349285521776982e-06, "loss": 0.633565366268158, "step": 3935 }, { "epoch": 1.1508992542769412, "grad_norm": 1.6576478038135816, "learning_rate": 8.344517332205774e-06, "loss": 0.6029015779495239, "step": 3936 }, { "epoch": 1.1511916946922065, "grad_norm": 1.519081286345544, "learning_rate": 8.339749529593574e-06, "loss": 0.45594489574432373, "step": 3937 }, { "epoch": 1.1514841351074718, "grad_norm": 1.5262842564669963, "learning_rate": 8.334982115054828e-06, "loss": 0.4413541257381439, "step": 3938 }, { "epoch": 1.1517765755227372, "grad_norm": 1.6373893488771099, "learning_rate": 8.330215089703887e-06, "loss": 0.5674389004707336, "step": 3939 }, { "epoch": 1.1520690159380027, "grad_norm": 1.386401958621656, "learning_rate": 8.325448454655019e-06, "loss": 0.43449294567108154, "step": 3940 }, { "epoch": 1.152361456353268, "grad_norm": 1.6283403091444353, "learning_rate": 8.320682211022393e-06, "loss": 0.5190714597702026, "step": 3941 }, { "epoch": 1.1526538967685334, "grad_norm": 1.5774508757028434, "learning_rate": 8.31591635992009e-06, "loss": 0.56162428855896, "step": 3942 }, { "epoch": 1.1529463371837987, "grad_norm": 1.4891934876919055, "learning_rate": 8.311150902462096e-06, "loss": 0.5588958263397217, "step": 3943 }, { "epoch": 1.1532387775990642, "grad_norm": 1.5108312938903155, "learning_rate": 8.306385839762312e-06, "loss": 0.5438264608383179, "step": 3944 }, { "epoch": 1.1535312180143296, "grad_norm": 1.575513080138648, "learning_rate": 8.30162117293454e-06, "loss": 0.5860258340835571, "step": 3945 }, { "epoch": 1.153823658429595, "grad_norm": 1.552005958726473, "learning_rate": 8.296856903092494e-06, "loss": 0.4742947220802307, "step": 3946 }, { "epoch": 1.1541160988448604, "grad_norm": 1.44195573685015, "learning_rate": 8.292093031349791e-06, "loss": 0.47963109612464905, "step": 3947 }, { "epoch": 1.1544085392601258, "grad_norm": 1.5340226225614597, "learning_rate": 8.287329558819957e-06, "loss": 0.5404704213142395, "step": 3948 }, { "epoch": 1.154700979675391, "grad_norm": 1.8054477659796657, "learning_rate": 8.282566486616425e-06, "loss": 0.6559766530990601, "step": 3949 }, { "epoch": 1.1549934200906566, "grad_norm": 1.507763379787764, "learning_rate": 8.277803815852535e-06, "loss": 0.4462929368019104, "step": 3950 }, { "epoch": 1.155285860505922, "grad_norm": 1.6398920335039024, "learning_rate": 8.273041547641531e-06, "loss": 0.5672504901885986, "step": 3951 }, { "epoch": 1.1555783009211873, "grad_norm": 1.5384582587859306, "learning_rate": 8.268279683096567e-06, "loss": 0.4040188193321228, "step": 3952 }, { "epoch": 1.1558707413364526, "grad_norm": 1.4954603260099153, "learning_rate": 8.263518223330698e-06, "loss": 0.4639814794063568, "step": 3953 }, { "epoch": 1.1561631817517182, "grad_norm": 1.3560290444841174, "learning_rate": 8.258757169456885e-06, "loss": 0.384866327047348, "step": 3954 }, { "epoch": 1.1564556221669835, "grad_norm": 1.5360587849114566, "learning_rate": 8.253996522587997e-06, "loss": 0.452106773853302, "step": 3955 }, { "epoch": 1.1567480625822488, "grad_norm": 1.5044138285106523, "learning_rate": 8.249236283836806e-06, "loss": 0.487504780292511, "step": 3956 }, { "epoch": 1.1570405029975142, "grad_norm": 1.6199121483000312, "learning_rate": 8.244476454315989e-06, "loss": 0.6225916147232056, "step": 3957 }, { "epoch": 1.1573329434127797, "grad_norm": 1.7421167385988239, "learning_rate": 8.239717035138128e-06, "loss": 0.5254271030426025, "step": 3958 }, { "epoch": 1.157625383828045, "grad_norm": 1.6240162719096014, "learning_rate": 8.234958027415707e-06, "loss": 0.5759135484695435, "step": 3959 }, { "epoch": 1.1579178242433104, "grad_norm": 1.6959935899735565, "learning_rate": 8.230199432261115e-06, "loss": 0.5720966458320618, "step": 3960 }, { "epoch": 1.158210264658576, "grad_norm": 1.5797174163929866, "learning_rate": 8.225441250786643e-06, "loss": 0.4807323217391968, "step": 3961 }, { "epoch": 1.1585027050738412, "grad_norm": 1.6197693861653146, "learning_rate": 8.22068348410449e-06, "loss": 0.5049746036529541, "step": 3962 }, { "epoch": 1.1587951454891066, "grad_norm": 1.673364031578337, "learning_rate": 8.215926133326758e-06, "loss": 0.5321973562240601, "step": 3963 }, { "epoch": 1.159087585904372, "grad_norm": 1.3992709586079797, "learning_rate": 8.211169199565444e-06, "loss": 0.5176634788513184, "step": 3964 }, { "epoch": 1.1593800263196374, "grad_norm": 1.5661593234971032, "learning_rate": 8.20641268393245e-06, "loss": 0.5345112681388855, "step": 3965 }, { "epoch": 1.1596724667349028, "grad_norm": 1.8309312482061675, "learning_rate": 8.201656587539589e-06, "loss": 0.47578325867652893, "step": 3966 }, { "epoch": 1.159964907150168, "grad_norm": 1.5996140092470157, "learning_rate": 8.196900911498563e-06, "loss": 0.5018264651298523, "step": 3967 }, { "epoch": 1.1602573475654336, "grad_norm": 1.530612277867195, "learning_rate": 8.192145656920989e-06, "loss": 0.4643394351005554, "step": 3968 }, { "epoch": 1.160549787980699, "grad_norm": 1.6066179328722245, "learning_rate": 8.187390824918375e-06, "loss": 0.5391045808792114, "step": 3969 }, { "epoch": 1.1608422283959643, "grad_norm": 1.4691594768883462, "learning_rate": 8.182636416602136e-06, "loss": 0.5168124437332153, "step": 3970 }, { "epoch": 1.1611346688112296, "grad_norm": 1.4702658109064293, "learning_rate": 8.177882433083583e-06, "loss": 0.5821055173873901, "step": 3971 }, { "epoch": 1.1614271092264952, "grad_norm": 1.597748811964364, "learning_rate": 8.173128875473933e-06, "loss": 0.6031824946403503, "step": 3972 }, { "epoch": 1.1617195496417605, "grad_norm": 1.598311083454874, "learning_rate": 8.1683757448843e-06, "loss": 0.5085259675979614, "step": 3973 }, { "epoch": 1.1620119900570258, "grad_norm": 1.6218562380492636, "learning_rate": 8.163623042425702e-06, "loss": 0.5654903650283813, "step": 3974 }, { "epoch": 1.1623044304722914, "grad_norm": 1.6279393236171642, "learning_rate": 8.158870769209051e-06, "loss": 0.3920902609825134, "step": 3975 }, { "epoch": 1.1625968708875567, "grad_norm": 1.6100798425685794, "learning_rate": 8.154118926345165e-06, "loss": 0.5334979891777039, "step": 3976 }, { "epoch": 1.162889311302822, "grad_norm": 1.7332980039574648, "learning_rate": 8.149367514944754e-06, "loss": 0.6212184429168701, "step": 3977 }, { "epoch": 1.1631817517180874, "grad_norm": 1.847204612085083, "learning_rate": 8.144616536118437e-06, "loss": 0.71863853931427, "step": 3978 }, { "epoch": 1.163474192133353, "grad_norm": 1.7297963031597574, "learning_rate": 8.139865990976722e-06, "loss": 0.5263794660568237, "step": 3979 }, { "epoch": 1.1637666325486182, "grad_norm": 1.5706968019905152, "learning_rate": 8.135115880630025e-06, "loss": 0.5035576224327087, "step": 3980 }, { "epoch": 1.1640590729638836, "grad_norm": 1.4183002447341373, "learning_rate": 8.130366206188651e-06, "loss": 0.5695084929466248, "step": 3981 }, { "epoch": 1.1643515133791489, "grad_norm": 1.51980370598088, "learning_rate": 8.125616968762806e-06, "loss": 0.5826396942138672, "step": 3982 }, { "epoch": 1.1646439537944144, "grad_norm": 1.5991682342910063, "learning_rate": 8.1208681694626e-06, "loss": 0.5132841467857361, "step": 3983 }, { "epoch": 1.1649363942096798, "grad_norm": 1.7073185800473716, "learning_rate": 8.116119809398034e-06, "loss": 0.6572669744491577, "step": 3984 }, { "epoch": 1.165228834624945, "grad_norm": 1.8729301131644296, "learning_rate": 8.111371889679007e-06, "loss": 0.5365801453590393, "step": 3985 }, { "epoch": 1.1655212750402106, "grad_norm": 1.4561472169130645, "learning_rate": 8.10662441141532e-06, "loss": 0.44511687755584717, "step": 3986 }, { "epoch": 1.165813715455476, "grad_norm": 1.596383666869324, "learning_rate": 8.101877375716666e-06, "loss": 0.47212404012680054, "step": 3987 }, { "epoch": 1.1661061558707413, "grad_norm": 1.5859450593798408, "learning_rate": 8.097130783692631e-06, "loss": 0.5942205786705017, "step": 3988 }, { "epoch": 1.1663985962860068, "grad_norm": 1.6678058947227146, "learning_rate": 8.092384636452708e-06, "loss": 0.49162304401397705, "step": 3989 }, { "epoch": 1.1666910367012722, "grad_norm": 2.635849062548634, "learning_rate": 8.087638935106277e-06, "loss": 0.6544803380966187, "step": 3990 }, { "epoch": 1.1669834771165375, "grad_norm": 1.677008396527972, "learning_rate": 8.082893680762619e-06, "loss": 0.5572186708450317, "step": 3991 }, { "epoch": 1.1672759175318028, "grad_norm": 1.6443546400872178, "learning_rate": 8.078148874530906e-06, "loss": 0.5836775898933411, "step": 3992 }, { "epoch": 1.1675683579470684, "grad_norm": 1.3079140035223278, "learning_rate": 8.073404517520208e-06, "loss": 0.5507068634033203, "step": 3993 }, { "epoch": 1.1678607983623337, "grad_norm": 1.9861505555993526, "learning_rate": 8.068660610839489e-06, "loss": 0.5312684178352356, "step": 3994 }, { "epoch": 1.168153238777599, "grad_norm": 1.6552821635427635, "learning_rate": 8.06391715559761e-06, "loss": 0.5555688142776489, "step": 3995 }, { "epoch": 1.1684456791928644, "grad_norm": 1.6871575092969338, "learning_rate": 8.059174152903324e-06, "loss": 0.5724596977233887, "step": 3996 }, { "epoch": 1.16873811960813, "grad_norm": 1.6937240718062052, "learning_rate": 8.054431603865282e-06, "loss": 0.6212218999862671, "step": 3997 }, { "epoch": 1.1690305600233952, "grad_norm": 1.4230492945656301, "learning_rate": 8.049689509592023e-06, "loss": 0.5061509609222412, "step": 3998 }, { "epoch": 1.1693230004386606, "grad_norm": 1.695677070671476, "learning_rate": 8.044947871191982e-06, "loss": 0.6143001914024353, "step": 3999 }, { "epoch": 1.169615440853926, "grad_norm": 1.32203821023621, "learning_rate": 8.040206689773487e-06, "loss": 0.5079911351203918, "step": 4000 }, { "epoch": 1.1699078812691914, "grad_norm": 1.6638666755727167, "learning_rate": 8.035465966444764e-06, "loss": 0.6104908585548401, "step": 4001 }, { "epoch": 1.1702003216844568, "grad_norm": 1.680147004679776, "learning_rate": 8.03072570231393e-06, "loss": 0.5953013896942139, "step": 4002 }, { "epoch": 1.1704927620997223, "grad_norm": 1.3075175590117196, "learning_rate": 8.025985898488986e-06, "loss": 0.4541323781013489, "step": 4003 }, { "epoch": 1.1707852025149876, "grad_norm": 1.5142685350846732, "learning_rate": 8.021246556077838e-06, "loss": 0.5708850026130676, "step": 4004 }, { "epoch": 1.171077642930253, "grad_norm": 1.6574340180310174, "learning_rate": 8.016507676188275e-06, "loss": 0.5430601835250854, "step": 4005 }, { "epoch": 1.1713700833455183, "grad_norm": 1.504278683910439, "learning_rate": 8.011769259927981e-06, "loss": 0.5621174573898315, "step": 4006 }, { "epoch": 1.1716625237607838, "grad_norm": 1.5473800123062453, "learning_rate": 8.007031308404536e-06, "loss": 0.48092782497406006, "step": 4007 }, { "epoch": 1.1719549641760492, "grad_norm": 1.9401766125340165, "learning_rate": 8.002293822725404e-06, "loss": 0.5770663022994995, "step": 4008 }, { "epoch": 1.1722474045913145, "grad_norm": 1.7123399188942874, "learning_rate": 7.997556803997945e-06, "loss": 0.5692728757858276, "step": 4009 }, { "epoch": 1.1725398450065798, "grad_norm": 1.6496884851556144, "learning_rate": 7.99282025332941e-06, "loss": 0.6256895065307617, "step": 4010 }, { "epoch": 1.1728322854218454, "grad_norm": 1.4529405935008253, "learning_rate": 7.988084171826937e-06, "loss": 0.4272884130477905, "step": 4011 }, { "epoch": 1.1731247258371107, "grad_norm": 1.548325850009333, "learning_rate": 7.983348560597557e-06, "loss": 0.5113184452056885, "step": 4012 }, { "epoch": 1.173417166252376, "grad_norm": 1.4489276426544837, "learning_rate": 7.978613420748186e-06, "loss": 0.45635539293289185, "step": 4013 }, { "epoch": 1.1737096066676416, "grad_norm": 1.6347983233600756, "learning_rate": 7.973878753385638e-06, "loss": 0.5539636611938477, "step": 4014 }, { "epoch": 1.174002047082907, "grad_norm": 1.4140693021111321, "learning_rate": 7.969144559616615e-06, "loss": 0.6083431243896484, "step": 4015 }, { "epoch": 1.1742944874981722, "grad_norm": 1.372102806580561, "learning_rate": 7.9644108405477e-06, "loss": 0.5268326997756958, "step": 4016 }, { "epoch": 1.1745869279134376, "grad_norm": 1.5989300144328094, "learning_rate": 7.95967759728538e-06, "loss": 0.5680301189422607, "step": 4017 }, { "epoch": 1.174879368328703, "grad_norm": 1.62105042514946, "learning_rate": 7.954944830936012e-06, "loss": 0.5457121133804321, "step": 4018 }, { "epoch": 1.1751718087439684, "grad_norm": 1.59579517284719, "learning_rate": 7.950212542605857e-06, "loss": 0.5358338356018066, "step": 4019 }, { "epoch": 1.1754642491592338, "grad_norm": 1.5630110417390142, "learning_rate": 7.945480733401056e-06, "loss": 0.6094579696655273, "step": 4020 }, { "epoch": 1.175756689574499, "grad_norm": 1.6732097827507912, "learning_rate": 7.940749404427642e-06, "loss": 0.5108463764190674, "step": 4021 }, { "epoch": 1.1760491299897646, "grad_norm": 1.6495017651653137, "learning_rate": 7.936018556791537e-06, "loss": 0.4946494698524475, "step": 4022 }, { "epoch": 1.17634157040503, "grad_norm": 1.7757854212337651, "learning_rate": 7.931288191598543e-06, "loss": 0.5056017637252808, "step": 4023 }, { "epoch": 1.1766340108202953, "grad_norm": 1.5276820294687934, "learning_rate": 7.926558309954354e-06, "loss": 0.5242294073104858, "step": 4024 }, { "epoch": 1.1769264512355608, "grad_norm": 1.682705103807737, "learning_rate": 7.921828912964556e-06, "loss": 0.5667276382446289, "step": 4025 }, { "epoch": 1.1772188916508262, "grad_norm": 1.6370912674167624, "learning_rate": 7.917100001734614e-06, "loss": 0.5282422304153442, "step": 4026 }, { "epoch": 1.1775113320660915, "grad_norm": 1.6026370834828365, "learning_rate": 7.912371577369881e-06, "loss": 0.4887520670890808, "step": 4027 }, { "epoch": 1.177803772481357, "grad_norm": 1.6287890532743194, "learning_rate": 7.907643640975603e-06, "loss": 0.5082155466079712, "step": 4028 }, { "epoch": 1.1780962128966224, "grad_norm": 1.649507152949628, "learning_rate": 7.902916193656898e-06, "loss": 0.5432984828948975, "step": 4029 }, { "epoch": 1.1783886533118877, "grad_norm": 1.7249606112651144, "learning_rate": 7.898189236518783e-06, "loss": 0.4313681721687317, "step": 4030 }, { "epoch": 1.178681093727153, "grad_norm": 1.494399406404666, "learning_rate": 7.893462770666155e-06, "loss": 0.6051831245422363, "step": 4031 }, { "epoch": 1.1789735341424186, "grad_norm": 1.5057449817059945, "learning_rate": 7.888736797203796e-06, "loss": 0.45805442333221436, "step": 4032 }, { "epoch": 1.179265974557684, "grad_norm": 1.7917233044229635, "learning_rate": 7.884011317236376e-06, "loss": 0.4998340606689453, "step": 4033 }, { "epoch": 1.1795584149729492, "grad_norm": 1.651259706746187, "learning_rate": 7.879286331868443e-06, "loss": 0.5298212170600891, "step": 4034 }, { "epoch": 1.1798508553882145, "grad_norm": 1.7028183419777814, "learning_rate": 7.874561842204437e-06, "loss": 0.5104682445526123, "step": 4035 }, { "epoch": 1.18014329580348, "grad_norm": 1.6393724776910414, "learning_rate": 7.869837849348676e-06, "loss": 0.5793051719665527, "step": 4036 }, { "epoch": 1.1804357362187454, "grad_norm": 1.4839435154715734, "learning_rate": 7.865114354405367e-06, "loss": 0.42913323640823364, "step": 4037 }, { "epoch": 1.1807281766340108, "grad_norm": 2.104724599006863, "learning_rate": 7.860391358478596e-06, "loss": 0.5183675289154053, "step": 4038 }, { "epoch": 1.1810206170492763, "grad_norm": 1.5685744104736703, "learning_rate": 7.855668862672339e-06, "loss": 0.444034218788147, "step": 4039 }, { "epoch": 1.1813130574645416, "grad_norm": 1.486556561749613, "learning_rate": 7.850946868090446e-06, "loss": 0.4357207417488098, "step": 4040 }, { "epoch": 1.181605497879807, "grad_norm": 1.6923285770365775, "learning_rate": 7.846225375836657e-06, "loss": 0.4517707824707031, "step": 4041 }, { "epoch": 1.1818979382950725, "grad_norm": 1.5865011864132745, "learning_rate": 7.841504387014589e-06, "loss": 0.4437381625175476, "step": 4042 }, { "epoch": 1.1821903787103378, "grad_norm": 1.4744521314451464, "learning_rate": 7.836783902727746e-06, "loss": 0.5364828109741211, "step": 4043 }, { "epoch": 1.1824828191256032, "grad_norm": 1.650227369991675, "learning_rate": 7.832063924079516e-06, "loss": 0.4814251661300659, "step": 4044 }, { "epoch": 1.1827752595408685, "grad_norm": 1.9016693432010778, "learning_rate": 7.827344452173163e-06, "loss": 0.5376232862472534, "step": 4045 }, { "epoch": 1.183067699956134, "grad_norm": 1.7007887018924743, "learning_rate": 7.822625488111833e-06, "loss": 0.6005147695541382, "step": 4046 }, { "epoch": 1.1833601403713994, "grad_norm": 1.5696670197669271, "learning_rate": 7.817907032998556e-06, "loss": 0.5276827216148376, "step": 4047 }, { "epoch": 1.1836525807866647, "grad_norm": 2.1978111734105994, "learning_rate": 7.813189087936243e-06, "loss": 0.6425626277923584, "step": 4048 }, { "epoch": 1.18394502120193, "grad_norm": 1.272646490936496, "learning_rate": 7.808471654027685e-06, "loss": 0.44388407468795776, "step": 4049 }, { "epoch": 1.1842374616171956, "grad_norm": 1.743245771156321, "learning_rate": 7.803754732375554e-06, "loss": 0.5044336318969727, "step": 4050 }, { "epoch": 1.184529902032461, "grad_norm": 1.9415496480441554, "learning_rate": 7.7990383240824e-06, "loss": 0.6964906454086304, "step": 4051 }, { "epoch": 1.1848223424477262, "grad_norm": 1.9029191440552455, "learning_rate": 7.794322430250654e-06, "loss": 0.6093637943267822, "step": 4052 }, { "epoch": 1.1851147828629918, "grad_norm": 1.8079016024144563, "learning_rate": 7.78960705198263e-06, "loss": 0.5264803171157837, "step": 4053 }, { "epoch": 1.185407223278257, "grad_norm": 1.444425047773482, "learning_rate": 7.78489219038052e-06, "loss": 0.5336456298828125, "step": 4054 }, { "epoch": 1.1856996636935224, "grad_norm": 1.7563642817078289, "learning_rate": 7.78017784654639e-06, "loss": 0.5266311168670654, "step": 4055 }, { "epoch": 1.1859921041087877, "grad_norm": 1.6538609406479838, "learning_rate": 7.775464021582195e-06, "loss": 0.6281685829162598, "step": 4056 }, { "epoch": 1.1862845445240533, "grad_norm": 1.6081255371588656, "learning_rate": 7.770750716589758e-06, "loss": 0.560591995716095, "step": 4057 }, { "epoch": 1.1865769849393186, "grad_norm": 1.7912692279763305, "learning_rate": 7.766037932670786e-06, "loss": 0.5751859545707703, "step": 4058 }, { "epoch": 1.186869425354584, "grad_norm": 1.629657999448518, "learning_rate": 7.761325670926864e-06, "loss": 0.5404624938964844, "step": 4059 }, { "epoch": 1.1871618657698493, "grad_norm": 1.6997280704374504, "learning_rate": 7.756613932459456e-06, "loss": 0.4714626669883728, "step": 4060 }, { "epoch": 1.1874543061851148, "grad_norm": 1.4471766418666208, "learning_rate": 7.751902718369903e-06, "loss": 0.5449519157409668, "step": 4061 }, { "epoch": 1.1877467466003802, "grad_norm": 1.6279611933236646, "learning_rate": 7.747192029759419e-06, "loss": 0.6518754959106445, "step": 4062 }, { "epoch": 1.1880391870156455, "grad_norm": 1.4655931664348079, "learning_rate": 7.7424818677291e-06, "loss": 0.47224369645118713, "step": 4063 }, { "epoch": 1.188331627430911, "grad_norm": 1.6924798895194766, "learning_rate": 7.737772233379919e-06, "loss": 0.5482417345046997, "step": 4064 }, { "epoch": 1.1886240678461764, "grad_norm": 1.3910277085667344, "learning_rate": 7.733063127812724e-06, "loss": 0.5401996374130249, "step": 4065 }, { "epoch": 1.1889165082614417, "grad_norm": 2.4517381628425547, "learning_rate": 7.72835455212824e-06, "loss": 0.4678424596786499, "step": 4066 }, { "epoch": 1.1892089486767072, "grad_norm": 1.6156459518768798, "learning_rate": 7.72364650742707e-06, "loss": 0.5191294550895691, "step": 4067 }, { "epoch": 1.1895013890919726, "grad_norm": 1.4433917939096517, "learning_rate": 7.718938994809685e-06, "loss": 0.44018834829330444, "step": 4068 }, { "epoch": 1.1897938295072379, "grad_norm": 1.3955169745603861, "learning_rate": 7.714232015376442e-06, "loss": 0.47852614521980286, "step": 4069 }, { "epoch": 1.1900862699225032, "grad_norm": 1.523334975304476, "learning_rate": 7.709525570227567e-06, "loss": 0.5748994946479797, "step": 4070 }, { "epoch": 1.1903787103377688, "grad_norm": 1.7714529908638612, "learning_rate": 7.704819660463164e-06, "loss": 0.5015645027160645, "step": 4071 }, { "epoch": 1.190671150753034, "grad_norm": 1.8100962592275294, "learning_rate": 7.70011428718321e-06, "loss": 0.6200511455535889, "step": 4072 }, { "epoch": 1.1909635911682994, "grad_norm": 1.531990990921369, "learning_rate": 7.69540945148756e-06, "loss": 0.6311289668083191, "step": 4073 }, { "epoch": 1.1912560315835647, "grad_norm": 1.5403717728586237, "learning_rate": 7.690705154475937e-06, "loss": 0.5707247257232666, "step": 4074 }, { "epoch": 1.1915484719988303, "grad_norm": 2.0693191702072107, "learning_rate": 7.686001397247944e-06, "loss": 0.5616360902786255, "step": 4075 }, { "epoch": 1.1918409124140956, "grad_norm": 1.7144278887449431, "learning_rate": 7.681298180903054e-06, "loss": 0.5955555438995361, "step": 4076 }, { "epoch": 1.192133352829361, "grad_norm": 1.6252826516162207, "learning_rate": 7.676595506540615e-06, "loss": 0.5057257413864136, "step": 4077 }, { "epoch": 1.1924257932446265, "grad_norm": 1.8445544744897249, "learning_rate": 7.671893375259854e-06, "loss": 0.5795278549194336, "step": 4078 }, { "epoch": 1.1927182336598918, "grad_norm": 1.5541021220011975, "learning_rate": 7.66719178815986e-06, "loss": 0.5213087797164917, "step": 4079 }, { "epoch": 1.1930106740751572, "grad_norm": 1.5183242886274189, "learning_rate": 7.662490746339601e-06, "loss": 0.5333693027496338, "step": 4080 }, { "epoch": 1.1933031144904227, "grad_norm": 1.5253876680230323, "learning_rate": 7.657790250897916e-06, "loss": 0.4705297648906708, "step": 4081 }, { "epoch": 1.193595554905688, "grad_norm": 1.5875026444946445, "learning_rate": 7.65309030293352e-06, "loss": 0.5376054644584656, "step": 4082 }, { "epoch": 1.1938879953209534, "grad_norm": 1.4103452849520708, "learning_rate": 7.648390903544997e-06, "loss": 0.47457355260849, "step": 4083 }, { "epoch": 1.1941804357362187, "grad_norm": 1.5068528532277095, "learning_rate": 7.6436920538308e-06, "loss": 0.48752763867378235, "step": 4084 }, { "epoch": 1.1944728761514842, "grad_norm": 1.5226531730849548, "learning_rate": 7.63899375488926e-06, "loss": 0.48227858543395996, "step": 4085 }, { "epoch": 1.1947653165667496, "grad_norm": 1.4101996785965327, "learning_rate": 7.634296007818576e-06, "loss": 0.4294116497039795, "step": 4086 }, { "epoch": 1.1950577569820149, "grad_norm": 1.24669252589954, "learning_rate": 7.629598813716817e-06, "loss": 0.5562552809715271, "step": 4087 }, { "epoch": 1.1953501973972802, "grad_norm": 1.6628602240304204, "learning_rate": 7.624902173681923e-06, "loss": 0.6466431617736816, "step": 4088 }, { "epoch": 1.1956426378125458, "grad_norm": 1.7596122427030323, "learning_rate": 7.620206088811704e-06, "loss": 0.7183903455734253, "step": 4089 }, { "epoch": 1.195935078227811, "grad_norm": 1.666055880141139, "learning_rate": 7.615510560203841e-06, "loss": 0.5667496919631958, "step": 4090 }, { "epoch": 1.1962275186430764, "grad_norm": 1.5232682591562918, "learning_rate": 7.610815588955888e-06, "loss": 0.5603050589561462, "step": 4091 }, { "epoch": 1.196519959058342, "grad_norm": 1.6556784363331365, "learning_rate": 7.606121176165267e-06, "loss": 0.5305474996566772, "step": 4092 }, { "epoch": 1.1968123994736073, "grad_norm": 2.0140258709167163, "learning_rate": 7.6014273229292625e-06, "loss": 0.7321374416351318, "step": 4093 }, { "epoch": 1.1971048398888726, "grad_norm": 1.5891169030075603, "learning_rate": 7.5967340303450385e-06, "loss": 0.44885972142219543, "step": 4094 }, { "epoch": 1.197397280304138, "grad_norm": 1.7955283190373275, "learning_rate": 7.592041299509624e-06, "loss": 0.593859076499939, "step": 4095 }, { "epoch": 1.1976897207194035, "grad_norm": 1.4553428657338656, "learning_rate": 7.587349131519913e-06, "loss": 0.6701182723045349, "step": 4096 }, { "epoch": 1.1979821611346688, "grad_norm": 1.9268795339399152, "learning_rate": 7.582657527472674e-06, "loss": 0.6456711292266846, "step": 4097 }, { "epoch": 1.1982746015499341, "grad_norm": 1.6602700214896833, "learning_rate": 7.577966488464543e-06, "loss": 0.5933864116668701, "step": 4098 }, { "epoch": 1.1985670419651995, "grad_norm": 1.398305705152583, "learning_rate": 7.5732760155920175e-06, "loss": 0.4609876275062561, "step": 4099 }, { "epoch": 1.198859482380465, "grad_norm": 1.4827488477589208, "learning_rate": 7.568586109951468e-06, "loss": 0.540961503982544, "step": 4100 }, { "epoch": 1.1991519227957304, "grad_norm": 1.629963355664156, "learning_rate": 7.563896772639132e-06, "loss": 0.5522942543029785, "step": 4101 }, { "epoch": 1.1994443632109957, "grad_norm": 1.739784480619601, "learning_rate": 7.559208004751114e-06, "loss": 0.483737587928772, "step": 4102 }, { "epoch": 1.1997368036262612, "grad_norm": 1.2743684109876499, "learning_rate": 7.554519807383384e-06, "loss": 0.3760339915752411, "step": 4103 }, { "epoch": 1.2000292440415266, "grad_norm": 1.494486709964621, "learning_rate": 7.549832181631782e-06, "loss": 0.5034801959991455, "step": 4104 }, { "epoch": 1.2003216844567919, "grad_norm": 1.6403057961263519, "learning_rate": 7.545145128592009e-06, "loss": 0.5605261325836182, "step": 4105 }, { "epoch": 1.2006141248720574, "grad_norm": 1.4179033673825343, "learning_rate": 7.540458649359637e-06, "loss": 0.4724245071411133, "step": 4106 }, { "epoch": 1.2009065652873228, "grad_norm": 1.609040907971216, "learning_rate": 7.535772745030101e-06, "loss": 0.564873218536377, "step": 4107 }, { "epoch": 1.201199005702588, "grad_norm": 1.9431151220409157, "learning_rate": 7.531087416698702e-06, "loss": 0.699596643447876, "step": 4108 }, { "epoch": 1.2014914461178534, "grad_norm": 1.5180492689699372, "learning_rate": 7.526402665460612e-06, "loss": 0.47448351979255676, "step": 4109 }, { "epoch": 1.201783886533119, "grad_norm": 1.4606225624905942, "learning_rate": 7.521718492410855e-06, "loss": 0.4681323766708374, "step": 4110 }, { "epoch": 1.2020763269483843, "grad_norm": 1.8550718864551587, "learning_rate": 7.517034898644333e-06, "loss": 0.6361842155456543, "step": 4111 }, { "epoch": 1.2023687673636496, "grad_norm": 1.5211596606564617, "learning_rate": 7.5123518852558075e-06, "loss": 0.4732646942138672, "step": 4112 }, { "epoch": 1.202661207778915, "grad_norm": 1.6512929892036816, "learning_rate": 7.507669453339903e-06, "loss": 0.57124263048172, "step": 4113 }, { "epoch": 1.2029536481941805, "grad_norm": 1.758611342292707, "learning_rate": 7.502987603991111e-06, "loss": 0.5228173732757568, "step": 4114 }, { "epoch": 1.2032460886094458, "grad_norm": 1.7352024129193708, "learning_rate": 7.4983063383037864e-06, "loss": 0.5501765012741089, "step": 4115 }, { "epoch": 1.2035385290247111, "grad_norm": 1.6782467710972089, "learning_rate": 7.493625657372141e-06, "loss": 0.5062840580940247, "step": 4116 }, { "epoch": 1.2038309694399767, "grad_norm": 1.6960273401585455, "learning_rate": 7.4889455622902616e-06, "loss": 0.7060763835906982, "step": 4117 }, { "epoch": 1.204123409855242, "grad_norm": 1.478061987478783, "learning_rate": 7.484266054152088e-06, "loss": 0.42127668857574463, "step": 4118 }, { "epoch": 1.2044158502705073, "grad_norm": 1.3574946815299211, "learning_rate": 7.479587134051429e-06, "loss": 0.490860253572464, "step": 4119 }, { "epoch": 1.204708290685773, "grad_norm": 1.443033575116078, "learning_rate": 7.474908803081955e-06, "loss": 0.45786625146865845, "step": 4120 }, { "epoch": 1.2050007311010382, "grad_norm": 1.810733388901398, "learning_rate": 7.470231062337192e-06, "loss": 0.5267277359962463, "step": 4121 }, { "epoch": 1.2052931715163036, "grad_norm": 1.670838162040588, "learning_rate": 7.465553912910539e-06, "loss": 0.47834646701812744, "step": 4122 }, { "epoch": 1.2055856119315689, "grad_norm": 1.4366745635956868, "learning_rate": 7.460877355895249e-06, "loss": 0.5348576903343201, "step": 4123 }, { "epoch": 1.2058780523468344, "grad_norm": 1.7186674622129299, "learning_rate": 7.456201392384437e-06, "loss": 0.47992441058158875, "step": 4124 }, { "epoch": 1.2061704927620998, "grad_norm": 1.6108537844876905, "learning_rate": 7.451526023471085e-06, "loss": 0.5693913698196411, "step": 4125 }, { "epoch": 1.206462933177365, "grad_norm": 1.8995573488864546, "learning_rate": 7.4468512502480305e-06, "loss": 0.5165153741836548, "step": 4126 }, { "epoch": 1.2067553735926304, "grad_norm": 1.4833110616884417, "learning_rate": 7.442177073807973e-06, "loss": 0.522534966468811, "step": 4127 }, { "epoch": 1.207047814007896, "grad_norm": 2.0276890753098464, "learning_rate": 7.43750349524347e-06, "loss": 0.6298432946205139, "step": 4128 }, { "epoch": 1.2073402544231613, "grad_norm": 1.5840348747117112, "learning_rate": 7.432830515646947e-06, "loss": 0.5077394247055054, "step": 4129 }, { "epoch": 1.2076326948384266, "grad_norm": 2.0187303897146682, "learning_rate": 7.428158136110681e-06, "loss": 0.6492841839790344, "step": 4130 }, { "epoch": 1.2079251352536922, "grad_norm": 1.8651226738731277, "learning_rate": 7.423486357726813e-06, "loss": 0.5204535126686096, "step": 4131 }, { "epoch": 1.2082175756689575, "grad_norm": 1.7208004693147547, "learning_rate": 7.418815181587347e-06, "loss": 0.56598961353302, "step": 4132 }, { "epoch": 1.2085100160842228, "grad_norm": 1.7632065676998485, "learning_rate": 7.4141446087841364e-06, "loss": 0.486950159072876, "step": 4133 }, { "epoch": 1.2088024564994881, "grad_norm": 1.8229002651567825, "learning_rate": 7.4094746404089e-06, "loss": 0.7218466997146606, "step": 4134 }, { "epoch": 1.2090948969147537, "grad_norm": 1.715700034058204, "learning_rate": 7.404805277553218e-06, "loss": 0.6486172676086426, "step": 4135 }, { "epoch": 1.209387337330019, "grad_norm": 1.8385918373460561, "learning_rate": 7.400136521308521e-06, "loss": 0.6160574555397034, "step": 4136 }, { "epoch": 1.2096797777452843, "grad_norm": 1.6766631954981184, "learning_rate": 7.395468372766107e-06, "loss": 0.6184699535369873, "step": 4137 }, { "epoch": 1.2099722181605497, "grad_norm": 1.6881704887676476, "learning_rate": 7.390800833017124e-06, "loss": 0.5795263051986694, "step": 4138 }, { "epoch": 1.2102646585758152, "grad_norm": 1.6841718896097397, "learning_rate": 7.386133903152581e-06, "loss": 0.5409367084503174, "step": 4139 }, { "epoch": 1.2105570989910805, "grad_norm": 1.678948206873695, "learning_rate": 7.3814675842633465e-06, "loss": 0.47924935817718506, "step": 4140 }, { "epoch": 1.2108495394063459, "grad_norm": 1.6532202064740131, "learning_rate": 7.376801877440143e-06, "loss": 0.5737412571907043, "step": 4141 }, { "epoch": 1.2111419798216114, "grad_norm": 1.5307761286613382, "learning_rate": 7.372136783773551e-06, "loss": 0.538013219833374, "step": 4142 }, { "epoch": 1.2114344202368768, "grad_norm": 1.4940902719253717, "learning_rate": 7.367472304354011e-06, "loss": 0.4523904323577881, "step": 4143 }, { "epoch": 1.211726860652142, "grad_norm": 1.4793905716399964, "learning_rate": 7.362808440271811e-06, "loss": 0.5057293176651001, "step": 4144 }, { "epoch": 1.2120193010674076, "grad_norm": 1.4373562566302274, "learning_rate": 7.358145192617103e-06, "loss": 0.4653171896934509, "step": 4145 }, { "epoch": 1.212311741482673, "grad_norm": 1.6048946971271119, "learning_rate": 7.353482562479896e-06, "loss": 0.607070803642273, "step": 4146 }, { "epoch": 1.2126041818979383, "grad_norm": 1.51939699208445, "learning_rate": 7.348820550950047e-06, "loss": 0.4721861481666565, "step": 4147 }, { "epoch": 1.2128966223132036, "grad_norm": 1.8802239228266517, "learning_rate": 7.3441591591172765e-06, "loss": 0.6656746864318848, "step": 4148 }, { "epoch": 1.2131890627284692, "grad_norm": 1.504596663567376, "learning_rate": 7.339498388071154e-06, "loss": 0.5231848359107971, "step": 4149 }, { "epoch": 1.2134815031437345, "grad_norm": 1.4250712810936565, "learning_rate": 7.334838238901106e-06, "loss": 0.42241257429122925, "step": 4150 }, { "epoch": 1.2137739435589998, "grad_norm": 1.4721862632309721, "learning_rate": 7.3301787126964165e-06, "loss": 0.427111953496933, "step": 4151 }, { "epoch": 1.2140663839742651, "grad_norm": 2.038283523639075, "learning_rate": 7.325519810546219e-06, "loss": 0.6208339929580688, "step": 4152 }, { "epoch": 1.2143588243895307, "grad_norm": 1.3970516014119925, "learning_rate": 7.320861533539505e-06, "loss": 0.5031273365020752, "step": 4153 }, { "epoch": 1.214651264804796, "grad_norm": 1.5153681425347725, "learning_rate": 7.3162038827651205e-06, "loss": 0.5617444515228271, "step": 4154 }, { "epoch": 1.2149437052200613, "grad_norm": 1.4855483785732004, "learning_rate": 7.311546859311758e-06, "loss": 0.4616255462169647, "step": 4155 }, { "epoch": 1.215236145635327, "grad_norm": 1.5704453976932513, "learning_rate": 7.306890464267972e-06, "loss": 0.5799977779388428, "step": 4156 }, { "epoch": 1.2155285860505922, "grad_norm": 1.521477491941422, "learning_rate": 7.302234698722165e-06, "loss": 0.5669786930084229, "step": 4157 }, { "epoch": 1.2158210264658575, "grad_norm": 1.5325381791627977, "learning_rate": 7.297579563762595e-06, "loss": 0.5622642040252686, "step": 4158 }, { "epoch": 1.216113466881123, "grad_norm": 1.8789411887268221, "learning_rate": 7.292925060477367e-06, "loss": 0.6896791458129883, "step": 4159 }, { "epoch": 1.2164059072963884, "grad_norm": 1.5263918361022677, "learning_rate": 7.288271189954451e-06, "loss": 0.6704437136650085, "step": 4160 }, { "epoch": 1.2166983477116537, "grad_norm": 1.6192057061391554, "learning_rate": 7.2836179532816565e-06, "loss": 0.6340646743774414, "step": 4161 }, { "epoch": 1.216990788126919, "grad_norm": 1.4283430296516553, "learning_rate": 7.278965351546648e-06, "loss": 0.528992772102356, "step": 4162 }, { "epoch": 1.2172832285421846, "grad_norm": 1.4842100691170903, "learning_rate": 7.274313385836949e-06, "loss": 0.45160621404647827, "step": 4163 }, { "epoch": 1.21757566895745, "grad_norm": 1.3859373993268853, "learning_rate": 7.269662057239919e-06, "loss": 0.5398670434951782, "step": 4164 }, { "epoch": 1.2178681093727153, "grad_norm": 1.7598892874276293, "learning_rate": 7.265011366842785e-06, "loss": 0.5174476504325867, "step": 4165 }, { "epoch": 1.2181605497879806, "grad_norm": 1.663231631427072, "learning_rate": 7.260361315732613e-06, "loss": 0.4830206632614136, "step": 4166 }, { "epoch": 1.2184529902032462, "grad_norm": 1.4149457900973579, "learning_rate": 7.2557119049963266e-06, "loss": 0.42422181367874146, "step": 4167 }, { "epoch": 1.2187454306185115, "grad_norm": 1.363467777836694, "learning_rate": 7.251063135720699e-06, "loss": 0.43544018268585205, "step": 4168 }, { "epoch": 1.2190378710337768, "grad_norm": 1.4776092804767433, "learning_rate": 7.2464150089923465e-06, "loss": 0.5352005362510681, "step": 4169 }, { "epoch": 1.2193303114490424, "grad_norm": 1.5459436268475357, "learning_rate": 7.241767525897746e-06, "loss": 0.4718678891658783, "step": 4170 }, { "epoch": 1.2196227518643077, "grad_norm": 1.4994134423194976, "learning_rate": 7.237120687523214e-06, "loss": 0.618084192276001, "step": 4171 }, { "epoch": 1.219915192279573, "grad_norm": 1.8137589794234399, "learning_rate": 7.232474494954924e-06, "loss": 0.625995397567749, "step": 4172 }, { "epoch": 1.2202076326948383, "grad_norm": 1.4989590312422592, "learning_rate": 7.227828949278894e-06, "loss": 0.5382465124130249, "step": 4173 }, { "epoch": 1.220500073110104, "grad_norm": 1.702878462884744, "learning_rate": 7.223184051580992e-06, "loss": 0.5299465656280518, "step": 4174 }, { "epoch": 1.2207925135253692, "grad_norm": 1.7776293184889576, "learning_rate": 7.218539802946934e-06, "loss": 0.5899940729141235, "step": 4175 }, { "epoch": 1.2210849539406345, "grad_norm": 1.9763552708522982, "learning_rate": 7.213896204462286e-06, "loss": 0.6126594543457031, "step": 4176 }, { "epoch": 1.2213773943558999, "grad_norm": 1.6580044033592523, "learning_rate": 7.20925325721246e-06, "loss": 0.5576338768005371, "step": 4177 }, { "epoch": 1.2216698347711654, "grad_norm": 1.5044012673537284, "learning_rate": 7.204610962282717e-06, "loss": 0.540515661239624, "step": 4178 }, { "epoch": 1.2219622751864307, "grad_norm": 1.5281012838641301, "learning_rate": 7.1999693207581675e-06, "loss": 0.5306440591812134, "step": 4179 }, { "epoch": 1.222254715601696, "grad_norm": 2.02113466617051, "learning_rate": 7.195328333723763e-06, "loss": 0.6274853944778442, "step": 4180 }, { "epoch": 1.2225471560169616, "grad_norm": 1.6954554706562375, "learning_rate": 7.190688002264308e-06, "loss": 0.5626333951950073, "step": 4181 }, { "epoch": 1.222839596432227, "grad_norm": 1.6364457786315536, "learning_rate": 7.18604832746445e-06, "loss": 0.5938719511032104, "step": 4182 }, { "epoch": 1.2231320368474923, "grad_norm": 1.4010331016668016, "learning_rate": 7.181409310408688e-06, "loss": 0.4599727988243103, "step": 4183 }, { "epoch": 1.2234244772627578, "grad_norm": 1.516823379099723, "learning_rate": 7.176770952181363e-06, "loss": 0.5912302732467651, "step": 4184 }, { "epoch": 1.2237169176780232, "grad_norm": 1.24563200951521, "learning_rate": 7.172133253866662e-06, "loss": 0.534631073474884, "step": 4185 }, { "epoch": 1.2240093580932885, "grad_norm": 1.3825393422514298, "learning_rate": 7.167496216548618e-06, "loss": 0.5084418058395386, "step": 4186 }, { "epoch": 1.2243017985085538, "grad_norm": 1.6343841724383257, "learning_rate": 7.162859841311112e-06, "loss": 0.6906956434249878, "step": 4187 }, { "epoch": 1.2245942389238194, "grad_norm": 1.6583835426138527, "learning_rate": 7.158224129237867e-06, "loss": 0.5578658580780029, "step": 4188 }, { "epoch": 1.2248866793390847, "grad_norm": 1.4116232043960963, "learning_rate": 7.153589081412455e-06, "loss": 0.4438907206058502, "step": 4189 }, { "epoch": 1.22517911975435, "grad_norm": 1.9189119615156511, "learning_rate": 7.148954698918289e-06, "loss": 0.6366580724716187, "step": 4190 }, { "epoch": 1.2254715601696153, "grad_norm": 1.674796821883658, "learning_rate": 7.144320982838628e-06, "loss": 0.5532524585723877, "step": 4191 }, { "epoch": 1.2257640005848809, "grad_norm": 1.3678471530217577, "learning_rate": 7.139687934256574e-06, "loss": 0.4847594201564789, "step": 4192 }, { "epoch": 1.2260564410001462, "grad_norm": 1.651582950772816, "learning_rate": 7.135055554255073e-06, "loss": 0.6273454427719116, "step": 4193 }, { "epoch": 1.2263488814154115, "grad_norm": 1.826963047999446, "learning_rate": 7.130423843916917e-06, "loss": 0.6320512294769287, "step": 4194 }, { "epoch": 1.226641321830677, "grad_norm": 1.9938752870068028, "learning_rate": 7.125792804324741e-06, "loss": 0.5499723553657532, "step": 4195 }, { "epoch": 1.2269337622459424, "grad_norm": 1.531512294163018, "learning_rate": 7.121162436561023e-06, "loss": 0.5855484008789062, "step": 4196 }, { "epoch": 1.2272262026612077, "grad_norm": 1.5092619418718032, "learning_rate": 7.11653274170808e-06, "loss": 0.5998305678367615, "step": 4197 }, { "epoch": 1.2275186430764733, "grad_norm": 1.6613439290789596, "learning_rate": 7.111903720848077e-06, "loss": 0.6963703632354736, "step": 4198 }, { "epoch": 1.2278110834917386, "grad_norm": 1.7273945695579416, "learning_rate": 7.10727537506302e-06, "loss": 0.5664974451065063, "step": 4199 }, { "epoch": 1.228103523907004, "grad_norm": 1.8611907189119672, "learning_rate": 7.102647705434755e-06, "loss": 0.6502630710601807, "step": 4200 }, { "epoch": 1.2283959643222693, "grad_norm": 1.5674599206950446, "learning_rate": 7.098020713044973e-06, "loss": 0.5727233290672302, "step": 4201 }, { "epoch": 1.2286884047375348, "grad_norm": 1.4105219463780128, "learning_rate": 7.093394398975206e-06, "loss": 0.47885602712631226, "step": 4202 }, { "epoch": 1.2289808451528001, "grad_norm": 1.62325320016664, "learning_rate": 7.088768764306826e-06, "loss": 0.46089547872543335, "step": 4203 }, { "epoch": 1.2292732855680655, "grad_norm": 1.3853973501267451, "learning_rate": 7.084143810121044e-06, "loss": 0.48920977115631104, "step": 4204 }, { "epoch": 1.2295657259833308, "grad_norm": 1.4371671531095065, "learning_rate": 7.07951953749892e-06, "loss": 0.5320104956626892, "step": 4205 }, { "epoch": 1.2298581663985964, "grad_norm": 1.693565977205871, "learning_rate": 7.074895947521347e-06, "loss": 0.6403206586837769, "step": 4206 }, { "epoch": 1.2301506068138617, "grad_norm": 1.3774390509755927, "learning_rate": 7.070273041269062e-06, "loss": 0.5522217750549316, "step": 4207 }, { "epoch": 1.230443047229127, "grad_norm": 1.644407790392686, "learning_rate": 7.0656508198226405e-06, "loss": 0.5235073566436768, "step": 4208 }, { "epoch": 1.2307354876443926, "grad_norm": 1.9076552987416457, "learning_rate": 7.061029284262497e-06, "loss": 0.5972521305084229, "step": 4209 }, { "epoch": 1.2310279280596579, "grad_norm": 1.7443828706372393, "learning_rate": 7.0564084356688885e-06, "loss": 0.5989280343055725, "step": 4210 }, { "epoch": 1.2313203684749232, "grad_norm": 1.4723000244161777, "learning_rate": 7.051788275121913e-06, "loss": 0.5714213848114014, "step": 4211 }, { "epoch": 1.2316128088901885, "grad_norm": 1.715005842824084, "learning_rate": 7.047168803701502e-06, "loss": 0.5588504076004028, "step": 4212 }, { "epoch": 1.231905249305454, "grad_norm": 1.3648320017744335, "learning_rate": 7.042550022487431e-06, "loss": 0.47527533769607544, "step": 4213 }, { "epoch": 1.2321976897207194, "grad_norm": 1.4838404108317171, "learning_rate": 7.03793193255931e-06, "loss": 0.5281137228012085, "step": 4214 }, { "epoch": 1.2324901301359847, "grad_norm": 1.7839268972332825, "learning_rate": 7.033314534996589e-06, "loss": 0.5509631037712097, "step": 4215 }, { "epoch": 1.23278257055125, "grad_norm": 1.5991883103171023, "learning_rate": 7.028697830878557e-06, "loss": 0.5291438698768616, "step": 4216 }, { "epoch": 1.2330750109665156, "grad_norm": 1.9253124571991533, "learning_rate": 7.024081821284343e-06, "loss": 0.5931780934333801, "step": 4217 }, { "epoch": 1.233367451381781, "grad_norm": 1.5978832259158926, "learning_rate": 7.019466507292908e-06, "loss": 0.4883537292480469, "step": 4218 }, { "epoch": 1.2336598917970463, "grad_norm": 1.5441369085427046, "learning_rate": 7.014851889983058e-06, "loss": 0.45155030488967896, "step": 4219 }, { "epoch": 1.2339523322123118, "grad_norm": 1.7603110515675113, "learning_rate": 7.010237970433426e-06, "loss": 0.6107507944107056, "step": 4220 }, { "epoch": 1.2342447726275771, "grad_norm": 1.4005214588133317, "learning_rate": 7.0056247497224905e-06, "loss": 0.41764840483665466, "step": 4221 }, { "epoch": 1.2345372130428425, "grad_norm": 1.4727432689856292, "learning_rate": 7.0010122289285635e-06, "loss": 0.6786199808120728, "step": 4222 }, { "epoch": 1.234829653458108, "grad_norm": 1.6328773458986388, "learning_rate": 6.996400409129793e-06, "loss": 0.5378292798995972, "step": 4223 }, { "epoch": 1.2351220938733734, "grad_norm": 1.5470680329093456, "learning_rate": 6.9917892914041685e-06, "loss": 0.47646570205688477, "step": 4224 }, { "epoch": 1.2354145342886387, "grad_norm": 1.6302332764801317, "learning_rate": 6.987178876829503e-06, "loss": 0.554225504398346, "step": 4225 }, { "epoch": 1.235706974703904, "grad_norm": 1.8888599643549215, "learning_rate": 6.982569166483459e-06, "loss": 0.42614030838012695, "step": 4226 }, { "epoch": 1.2359994151191696, "grad_norm": 1.523118498051214, "learning_rate": 6.977960161443524e-06, "loss": 0.5043676495552063, "step": 4227 }, { "epoch": 1.2362918555344349, "grad_norm": 1.454372819437309, "learning_rate": 6.973351862787029e-06, "loss": 0.4905642569065094, "step": 4228 }, { "epoch": 1.2365842959497002, "grad_norm": 1.6152329822736995, "learning_rate": 6.9687442715911325e-06, "loss": 0.5860332250595093, "step": 4229 }, { "epoch": 1.2368767363649655, "grad_norm": 1.3841079659340747, "learning_rate": 6.9641373889328345e-06, "loss": 0.4900137782096863, "step": 4230 }, { "epoch": 1.237169176780231, "grad_norm": 1.7249957815195471, "learning_rate": 6.959531215888961e-06, "loss": 0.5736855268478394, "step": 4231 }, { "epoch": 1.2374616171954964, "grad_norm": 1.6635333389812996, "learning_rate": 6.95492575353618e-06, "loss": 0.6390400528907776, "step": 4232 }, { "epoch": 1.2377540576107617, "grad_norm": 1.6623693676348965, "learning_rate": 6.95032100295099e-06, "loss": 0.6553822159767151, "step": 4233 }, { "epoch": 1.2380464980260273, "grad_norm": 1.871056647578711, "learning_rate": 6.945716965209723e-06, "loss": 0.6685863733291626, "step": 4234 }, { "epoch": 1.2383389384412926, "grad_norm": 1.7090289188063175, "learning_rate": 6.941113641388542e-06, "loss": 0.5172277688980103, "step": 4235 }, { "epoch": 1.238631378856558, "grad_norm": 1.9648968097135298, "learning_rate": 6.936511032563451e-06, "loss": 0.6578007936477661, "step": 4236 }, { "epoch": 1.2389238192718235, "grad_norm": 1.5304274814539944, "learning_rate": 6.931909139810283e-06, "loss": 0.5679500699043274, "step": 4237 }, { "epoch": 1.2392162596870888, "grad_norm": 1.6592749019605815, "learning_rate": 6.927307964204695e-06, "loss": 0.49142318964004517, "step": 4238 }, { "epoch": 1.2395087001023541, "grad_norm": 1.497996058585022, "learning_rate": 6.9227075068221926e-06, "loss": 0.5339487195014954, "step": 4239 }, { "epoch": 1.2398011405176195, "grad_norm": 1.9993237065248757, "learning_rate": 6.918107768738097e-06, "loss": 0.5845860242843628, "step": 4240 }, { "epoch": 1.240093580932885, "grad_norm": 2.5543699126297823, "learning_rate": 6.9135087510275735e-06, "loss": 0.6767281889915466, "step": 4241 }, { "epoch": 1.2403860213481503, "grad_norm": 1.850547226886836, "learning_rate": 6.908910454765612e-06, "loss": 0.6119472980499268, "step": 4242 }, { "epoch": 1.2406784617634157, "grad_norm": 1.6013723709723773, "learning_rate": 6.904312881027038e-06, "loss": 0.6375409364700317, "step": 4243 }, { "epoch": 1.240970902178681, "grad_norm": 1.9482571730059268, "learning_rate": 6.899716030886508e-06, "loss": 0.7059881687164307, "step": 4244 }, { "epoch": 1.2412633425939466, "grad_norm": 1.9206862231453385, "learning_rate": 6.895119905418504e-06, "loss": 0.6463328003883362, "step": 4245 }, { "epoch": 1.2415557830092119, "grad_norm": 1.5219372029025222, "learning_rate": 6.890524505697345e-06, "loss": 0.5374869108200073, "step": 4246 }, { "epoch": 1.2418482234244772, "grad_norm": 1.625313205404651, "learning_rate": 6.885929832797176e-06, "loss": 0.5219276547431946, "step": 4247 }, { "epoch": 1.2421406638397428, "grad_norm": 1.4315105659194174, "learning_rate": 6.881335887791973e-06, "loss": 0.4815624952316284, "step": 4248 }, { "epoch": 1.242433104255008, "grad_norm": 1.318059168550072, "learning_rate": 6.8767426717555475e-06, "loss": 0.5111992955207825, "step": 4249 }, { "epoch": 1.2427255446702734, "grad_norm": 1.6870166439076426, "learning_rate": 6.872150185761533e-06, "loss": 0.5331606268882751, "step": 4250 }, { "epoch": 1.2430179850855387, "grad_norm": 1.5572023614320247, "learning_rate": 6.867558430883393e-06, "loss": 0.5375202894210815, "step": 4251 }, { "epoch": 1.2433104255008043, "grad_norm": 1.495445158871636, "learning_rate": 6.862967408194425e-06, "loss": 0.5667152404785156, "step": 4252 }, { "epoch": 1.2436028659160696, "grad_norm": 2.036302557289267, "learning_rate": 6.858377118767752e-06, "loss": 0.5679255723953247, "step": 4253 }, { "epoch": 1.243895306331335, "grad_norm": 1.7798647531094058, "learning_rate": 6.853787563676324e-06, "loss": 0.6097947359085083, "step": 4254 }, { "epoch": 1.2441877467466003, "grad_norm": 1.458407608257313, "learning_rate": 6.849198743992927e-06, "loss": 0.41869044303894043, "step": 4255 }, { "epoch": 1.2444801871618658, "grad_norm": 1.595586166137391, "learning_rate": 6.8446106607901655e-06, "loss": 0.6414821147918701, "step": 4256 }, { "epoch": 1.2447726275771311, "grad_norm": 1.9180058965370612, "learning_rate": 6.840023315140476e-06, "loss": 0.5985021591186523, "step": 4257 }, { "epoch": 1.2450650679923965, "grad_norm": 1.429348085027092, "learning_rate": 6.8354367081161235e-06, "loss": 0.4718092381954193, "step": 4258 }, { "epoch": 1.245357508407662, "grad_norm": 1.374927912317877, "learning_rate": 6.8308508407892e-06, "loss": 0.46431800723075867, "step": 4259 }, { "epoch": 1.2456499488229273, "grad_norm": 1.4906925043469428, "learning_rate": 6.826265714231624e-06, "loss": 0.5499997735023499, "step": 4260 }, { "epoch": 1.2459423892381927, "grad_norm": 1.605653884930273, "learning_rate": 6.8216813295151415e-06, "loss": 0.6078206300735474, "step": 4261 }, { "epoch": 1.2462348296534582, "grad_norm": 1.6116067904051048, "learning_rate": 6.817097687711322e-06, "loss": 0.5706520080566406, "step": 4262 }, { "epoch": 1.2465272700687235, "grad_norm": 1.4579793726336556, "learning_rate": 6.812514789891566e-06, "loss": 0.5210137367248535, "step": 4263 }, { "epoch": 1.2468197104839889, "grad_norm": 1.5969341972097826, "learning_rate": 6.807932637127097e-06, "loss": 0.42632028460502625, "step": 4264 }, { "epoch": 1.2471121508992542, "grad_norm": 1.3281470644259092, "learning_rate": 6.803351230488967e-06, "loss": 0.49990004301071167, "step": 4265 }, { "epoch": 1.2474045913145198, "grad_norm": 1.6439327542913937, "learning_rate": 6.798770571048052e-06, "loss": 0.557829737663269, "step": 4266 }, { "epoch": 1.247697031729785, "grad_norm": 1.6838717466364301, "learning_rate": 6.794190659875052e-06, "loss": 0.4784187078475952, "step": 4267 }, { "epoch": 1.2479894721450504, "grad_norm": 1.6243877795123443, "learning_rate": 6.789611498040492e-06, "loss": 0.4795057773590088, "step": 4268 }, { "epoch": 1.2482819125603157, "grad_norm": 1.4149752899303223, "learning_rate": 6.785033086614725e-06, "loss": 0.415715754032135, "step": 4269 }, { "epoch": 1.2485743529755813, "grad_norm": 1.4478921102692126, "learning_rate": 6.7804554266679266e-06, "loss": 0.49056607484817505, "step": 4270 }, { "epoch": 1.2488667933908466, "grad_norm": 1.8227279880342706, "learning_rate": 6.775878519270098e-06, "loss": 0.5268200039863586, "step": 4271 }, { "epoch": 1.249159233806112, "grad_norm": 1.5664194732567784, "learning_rate": 6.771302365491064e-06, "loss": 0.6250356435775757, "step": 4272 }, { "epoch": 1.2494516742213775, "grad_norm": 1.5152208337758115, "learning_rate": 6.76672696640047e-06, "loss": 0.5403029918670654, "step": 4273 }, { "epoch": 1.2497441146366428, "grad_norm": 1.6699524807174595, "learning_rate": 6.762152323067787e-06, "loss": 0.47006577253341675, "step": 4274 }, { "epoch": 1.2500365550519081, "grad_norm": 1.7406248179582138, "learning_rate": 6.7575784365623134e-06, "loss": 0.5088232755661011, "step": 4275 }, { "epoch": 1.2503289954671737, "grad_norm": 1.7598214720338152, "learning_rate": 6.7530053079531664e-06, "loss": 0.5438642501831055, "step": 4276 }, { "epoch": 1.250621435882439, "grad_norm": 1.4316922317447767, "learning_rate": 6.748432938309286e-06, "loss": 0.45436567068099976, "step": 4277 }, { "epoch": 1.2509138762977043, "grad_norm": 1.5793052704561465, "learning_rate": 6.743861328699438e-06, "loss": 0.5298944115638733, "step": 4278 }, { "epoch": 1.2512063167129697, "grad_norm": 1.3504092629468785, "learning_rate": 6.7392904801922055e-06, "loss": 0.49393707513809204, "step": 4279 }, { "epoch": 1.251498757128235, "grad_norm": 1.4852717426676887, "learning_rate": 6.734720393855998e-06, "loss": 0.5540947318077087, "step": 4280 }, { "epoch": 1.2517911975435005, "grad_norm": 1.4330918355062934, "learning_rate": 6.730151070759043e-06, "loss": 0.47406166791915894, "step": 4281 }, { "epoch": 1.2520836379587659, "grad_norm": 1.5653956712736337, "learning_rate": 6.725582511969397e-06, "loss": 0.46885907649993896, "step": 4282 }, { "epoch": 1.2523760783740312, "grad_norm": 1.7710771095422673, "learning_rate": 6.721014718554931e-06, "loss": 0.537517786026001, "step": 4283 }, { "epoch": 1.2526685187892967, "grad_norm": 1.5323701554592244, "learning_rate": 6.716447691583336e-06, "loss": 0.514340341091156, "step": 4284 }, { "epoch": 1.252960959204562, "grad_norm": 1.6716715067641383, "learning_rate": 6.711881432122129e-06, "loss": 0.5696117281913757, "step": 4285 }, { "epoch": 1.2532533996198274, "grad_norm": 1.632492076185155, "learning_rate": 6.707315941238645e-06, "loss": 0.5620799660682678, "step": 4286 }, { "epoch": 1.253545840035093, "grad_norm": 1.7721487037647632, "learning_rate": 6.702751220000039e-06, "loss": 0.4832923412322998, "step": 4287 }, { "epoch": 1.2538382804503583, "grad_norm": 1.7195688873272827, "learning_rate": 6.698187269473289e-06, "loss": 0.6608176231384277, "step": 4288 }, { "epoch": 1.2541307208656236, "grad_norm": 1.67536250359078, "learning_rate": 6.69362409072519e-06, "loss": 0.6002779006958008, "step": 4289 }, { "epoch": 1.2544231612808892, "grad_norm": 1.5859756058231869, "learning_rate": 6.689061684822357e-06, "loss": 0.49898988008499146, "step": 4290 }, { "epoch": 1.2547156016961545, "grad_norm": 1.908707186131175, "learning_rate": 6.684500052831222e-06, "loss": 0.5887055397033691, "step": 4291 }, { "epoch": 1.2550080421114198, "grad_norm": 1.7680049519728702, "learning_rate": 6.679939195818043e-06, "loss": 0.6494714617729187, "step": 4292 }, { "epoch": 1.2553004825266851, "grad_norm": 2.320887096811341, "learning_rate": 6.67537911484889e-06, "loss": 0.5708397626876831, "step": 4293 }, { "epoch": 1.2555929229419505, "grad_norm": 1.4472817266256797, "learning_rate": 6.670819810989656e-06, "loss": 0.40412014722824097, "step": 4294 }, { "epoch": 1.255885363357216, "grad_norm": 1.675200347061479, "learning_rate": 6.666261285306048e-06, "loss": 0.5141078233718872, "step": 4295 }, { "epoch": 1.2561778037724813, "grad_norm": 1.8039877813287382, "learning_rate": 6.661703538863595e-06, "loss": 0.6463406085968018, "step": 4296 }, { "epoch": 1.2564702441877467, "grad_norm": 1.5123528456732447, "learning_rate": 6.657146572727643e-06, "loss": 0.5809177160263062, "step": 4297 }, { "epoch": 1.2567626846030122, "grad_norm": 1.6628802038143384, "learning_rate": 6.652590387963354e-06, "loss": 0.5124412775039673, "step": 4298 }, { "epoch": 1.2570551250182775, "grad_norm": 1.8011842610745197, "learning_rate": 6.64803498563571e-06, "loss": 0.5399736762046814, "step": 4299 }, { "epoch": 1.2573475654335429, "grad_norm": 1.4403786785249715, "learning_rate": 6.6434803668095095e-06, "loss": 0.548133373260498, "step": 4300 }, { "epoch": 1.2576400058488084, "grad_norm": 1.7736401224051406, "learning_rate": 6.638926532549364e-06, "loss": 0.45056310296058655, "step": 4301 }, { "epoch": 1.2579324462640737, "grad_norm": 2.434184879977136, "learning_rate": 6.634373483919705e-06, "loss": 0.5191814303398132, "step": 4302 }, { "epoch": 1.258224886679339, "grad_norm": 1.4188278481806091, "learning_rate": 6.62982122198478e-06, "loss": 0.41939109563827515, "step": 4303 }, { "epoch": 1.2585173270946044, "grad_norm": 1.6631261031278954, "learning_rate": 6.625269747808655e-06, "loss": 0.6535190939903259, "step": 4304 }, { "epoch": 1.2588097675098697, "grad_norm": 1.7210614964326925, "learning_rate": 6.620719062455207e-06, "loss": 0.6282539367675781, "step": 4305 }, { "epoch": 1.2591022079251353, "grad_norm": 1.5686327106153548, "learning_rate": 6.616169166988133e-06, "loss": 0.5378686189651489, "step": 4306 }, { "epoch": 1.2593946483404006, "grad_norm": 1.80292094791683, "learning_rate": 6.611620062470942e-06, "loss": 0.5278643369674683, "step": 4307 }, { "epoch": 1.259687088755666, "grad_norm": 1.5211478183195457, "learning_rate": 6.607071749966958e-06, "loss": 0.5578285455703735, "step": 4308 }, { "epoch": 1.2599795291709315, "grad_norm": 1.7646090466366875, "learning_rate": 6.602524230539324e-06, "loss": 0.6452580094337463, "step": 4309 }, { "epoch": 1.2602719695861968, "grad_norm": 1.7812547970338353, "learning_rate": 6.597977505250992e-06, "loss": 0.6133028268814087, "step": 4310 }, { "epoch": 1.2605644100014621, "grad_norm": 1.552230597230507, "learning_rate": 6.5934315751647345e-06, "loss": 0.4930221140384674, "step": 4311 }, { "epoch": 1.2608568504167277, "grad_norm": 2.197359143106273, "learning_rate": 6.588886441343136e-06, "loss": 0.48653531074523926, "step": 4312 }, { "epoch": 1.261149290831993, "grad_norm": 1.851387133095935, "learning_rate": 6.5843421048485915e-06, "loss": 0.6594399213790894, "step": 4313 }, { "epoch": 1.2614417312472583, "grad_norm": 1.66909694599425, "learning_rate": 6.579798566743314e-06, "loss": 0.5164401531219482, "step": 4314 }, { "epoch": 1.2617341716625239, "grad_norm": 1.7484363064869977, "learning_rate": 6.5752558280893245e-06, "loss": 0.6338971853256226, "step": 4315 }, { "epoch": 1.2620266120777892, "grad_norm": 1.7526913055276123, "learning_rate": 6.570713889948461e-06, "loss": 0.5301859974861145, "step": 4316 }, { "epoch": 1.2623190524930545, "grad_norm": 1.5016995868339762, "learning_rate": 6.566172753382376e-06, "loss": 0.4572887420654297, "step": 4317 }, { "epoch": 1.2626114929083199, "grad_norm": 1.5874066468532555, "learning_rate": 6.561632419452532e-06, "loss": 0.5235984325408936, "step": 4318 }, { "epoch": 1.2629039333235852, "grad_norm": 1.5456604836068861, "learning_rate": 6.557092889220206e-06, "loss": 0.586036205291748, "step": 4319 }, { "epoch": 1.2631963737388507, "grad_norm": 1.6865403223453492, "learning_rate": 6.5525541637464855e-06, "loss": 0.4728356599807739, "step": 4320 }, { "epoch": 1.263488814154116, "grad_norm": 1.5435862254535146, "learning_rate": 6.548016244092265e-06, "loss": 0.4932190179824829, "step": 4321 }, { "epoch": 1.2637812545693814, "grad_norm": 1.6817765339416926, "learning_rate": 6.543479131318259e-06, "loss": 0.525676429271698, "step": 4322 }, { "epoch": 1.264073694984647, "grad_norm": 1.4602981048339732, "learning_rate": 6.538942826484991e-06, "loss": 0.5462610721588135, "step": 4323 }, { "epoch": 1.2643661353999123, "grad_norm": 1.6170865165049584, "learning_rate": 6.534407330652792e-06, "loss": 0.5391229391098022, "step": 4324 }, { "epoch": 1.2646585758151776, "grad_norm": 1.7047610503615187, "learning_rate": 6.529872644881811e-06, "loss": 0.5361309051513672, "step": 4325 }, { "epoch": 1.2649510162304431, "grad_norm": 1.7296167923882715, "learning_rate": 6.525338770232001e-06, "loss": 0.5692390203475952, "step": 4326 }, { "epoch": 1.2652434566457085, "grad_norm": 1.7314833561159049, "learning_rate": 6.520805707763125e-06, "loss": 0.5337555408477783, "step": 4327 }, { "epoch": 1.2655358970609738, "grad_norm": 1.5538338127930955, "learning_rate": 6.5162734585347605e-06, "loss": 0.604168176651001, "step": 4328 }, { "epoch": 1.2658283374762394, "grad_norm": 1.621069176676038, "learning_rate": 6.5117420236062955e-06, "loss": 0.5404821038246155, "step": 4329 }, { "epoch": 1.2661207778915047, "grad_norm": 1.8779165644410452, "learning_rate": 6.507211404036922e-06, "loss": 0.6097038388252258, "step": 4330 }, { "epoch": 1.26641321830677, "grad_norm": 1.41106750899854, "learning_rate": 6.50268160088565e-06, "loss": 0.44309180974960327, "step": 4331 }, { "epoch": 1.2667056587220353, "grad_norm": 1.633689199912191, "learning_rate": 6.498152615211286e-06, "loss": 0.5703015923500061, "step": 4332 }, { "epoch": 1.2669980991373007, "grad_norm": 1.9239494523704173, "learning_rate": 6.4936244480724575e-06, "loss": 0.5745347738265991, "step": 4333 }, { "epoch": 1.2672905395525662, "grad_norm": 1.7558467932702122, "learning_rate": 6.489097100527595e-06, "loss": 0.6611922979354858, "step": 4334 }, { "epoch": 1.2675829799678315, "grad_norm": 1.373367301388142, "learning_rate": 6.484570573634939e-06, "loss": 0.4560534358024597, "step": 4335 }, { "epoch": 1.2678754203830969, "grad_norm": 1.3735982195225196, "learning_rate": 6.480044868452535e-06, "loss": 0.3765673041343689, "step": 4336 }, { "epoch": 1.2681678607983624, "grad_norm": 1.631255659187599, "learning_rate": 6.475519986038246e-06, "loss": 0.6471004486083984, "step": 4337 }, { "epoch": 1.2684603012136277, "grad_norm": 1.6199016829966775, "learning_rate": 6.4709959274497284e-06, "loss": 0.5639084577560425, "step": 4338 }, { "epoch": 1.268752741628893, "grad_norm": 1.6880087227037737, "learning_rate": 6.4664726937444545e-06, "loss": 0.6367507576942444, "step": 4339 }, { "epoch": 1.2690451820441586, "grad_norm": 2.0302420653268958, "learning_rate": 6.4619502859797055e-06, "loss": 0.6803586483001709, "step": 4340 }, { "epoch": 1.269337622459424, "grad_norm": 1.7398101139995543, "learning_rate": 6.457428705212565e-06, "loss": 0.49068397283554077, "step": 4341 }, { "epoch": 1.2696300628746893, "grad_norm": 1.8759736386903334, "learning_rate": 6.4529079524999296e-06, "loss": 0.616880476474762, "step": 4342 }, { "epoch": 1.2699225032899546, "grad_norm": 1.3483643409763457, "learning_rate": 6.448388028898489e-06, "loss": 0.45614945888519287, "step": 4343 }, { "epoch": 1.27021494370522, "grad_norm": 1.4554785032074153, "learning_rate": 6.443868935464754e-06, "loss": 0.49267178773880005, "step": 4344 }, { "epoch": 1.2705073841204855, "grad_norm": 1.6269409722468795, "learning_rate": 6.439350673255033e-06, "loss": 0.5169225335121155, "step": 4345 }, { "epoch": 1.2707998245357508, "grad_norm": 1.4955295461512919, "learning_rate": 6.434833243325442e-06, "loss": 0.4999169111251831, "step": 4346 }, { "epoch": 1.2710922649510161, "grad_norm": 1.6243334237328435, "learning_rate": 6.430316646731906e-06, "loss": 0.6282567977905273, "step": 4347 }, { "epoch": 1.2713847053662817, "grad_norm": 1.6085299245102849, "learning_rate": 6.425800884530151e-06, "loss": 0.5007494688034058, "step": 4348 }, { "epoch": 1.271677145781547, "grad_norm": 1.656568917278449, "learning_rate": 6.421285957775705e-06, "loss": 0.5178118944168091, "step": 4349 }, { "epoch": 1.2719695861968123, "grad_norm": 1.560370266514351, "learning_rate": 6.4167718675239075e-06, "loss": 0.5473636388778687, "step": 4350 }, { "epoch": 1.2722620266120779, "grad_norm": 1.6953423126666767, "learning_rate": 6.4122586148299004e-06, "loss": 0.5863620042800903, "step": 4351 }, { "epoch": 1.2725544670273432, "grad_norm": 1.8607908969719156, "learning_rate": 6.407746200748628e-06, "loss": 0.5301654934883118, "step": 4352 }, { "epoch": 1.2728469074426085, "grad_norm": 1.6932378497792755, "learning_rate": 6.403234626334842e-06, "loss": 0.5856075286865234, "step": 4353 }, { "epoch": 1.273139347857874, "grad_norm": 1.678003179838639, "learning_rate": 6.39872389264309e-06, "loss": 0.49686455726623535, "step": 4354 }, { "epoch": 1.2734317882731394, "grad_norm": 1.4854139308295418, "learning_rate": 6.394214000727734e-06, "loss": 0.5032684803009033, "step": 4355 }, { "epoch": 1.2737242286884047, "grad_norm": 1.8801294667488437, "learning_rate": 6.389704951642931e-06, "loss": 0.6855330467224121, "step": 4356 }, { "epoch": 1.27401666910367, "grad_norm": 1.479367610859775, "learning_rate": 6.385196746442644e-06, "loss": 0.5333864688873291, "step": 4357 }, { "epoch": 1.2743091095189354, "grad_norm": 1.5944305875728124, "learning_rate": 6.380689386180641e-06, "loss": 0.5597629547119141, "step": 4358 }, { "epoch": 1.274601549934201, "grad_norm": 1.467403558865203, "learning_rate": 6.376182871910488e-06, "loss": 0.4576488137245178, "step": 4359 }, { "epoch": 1.2748939903494663, "grad_norm": 1.7247772731373485, "learning_rate": 6.371677204685555e-06, "loss": 0.45165061950683594, "step": 4360 }, { "epoch": 1.2751864307647316, "grad_norm": 1.5415632861050979, "learning_rate": 6.367172385559014e-06, "loss": 0.5451514720916748, "step": 4361 }, { "epoch": 1.2754788711799971, "grad_norm": 1.874618224476165, "learning_rate": 6.362668415583841e-06, "loss": 0.6141163110733032, "step": 4362 }, { "epoch": 1.2757713115952625, "grad_norm": 1.6869879622469415, "learning_rate": 6.358165295812809e-06, "loss": 0.5156669020652771, "step": 4363 }, { "epoch": 1.2760637520105278, "grad_norm": 1.8328178355603366, "learning_rate": 6.3536630272984974e-06, "loss": 0.41485118865966797, "step": 4364 }, { "epoch": 1.2763561924257933, "grad_norm": 1.546563271256682, "learning_rate": 6.3491616110932845e-06, "loss": 0.386514276266098, "step": 4365 }, { "epoch": 1.2766486328410587, "grad_norm": 1.472426766767245, "learning_rate": 6.344661048249345e-06, "loss": 0.5620483160018921, "step": 4366 }, { "epoch": 1.276941073256324, "grad_norm": 1.6328857080628636, "learning_rate": 6.340161339818662e-06, "loss": 0.4910007119178772, "step": 4367 }, { "epoch": 1.2772335136715895, "grad_norm": 1.3312787841228058, "learning_rate": 6.335662486853014e-06, "loss": 0.4628123939037323, "step": 4368 }, { "epoch": 1.2775259540868549, "grad_norm": 1.7576669653081538, "learning_rate": 6.331164490403978e-06, "loss": 0.5129125118255615, "step": 4369 }, { "epoch": 1.2778183945021202, "grad_norm": 1.3282548492081792, "learning_rate": 6.326667351522939e-06, "loss": 0.45091521739959717, "step": 4370 }, { "epoch": 1.2781108349173855, "grad_norm": 1.4312089210542207, "learning_rate": 6.322171071261071e-06, "loss": 0.4914324879646301, "step": 4371 }, { "epoch": 1.2784032753326509, "grad_norm": 1.7409991660962885, "learning_rate": 6.317675650669353e-06, "loss": 0.6361461877822876, "step": 4372 }, { "epoch": 1.2786957157479164, "grad_norm": 1.6196651007639755, "learning_rate": 6.313181090798561e-06, "loss": 0.4251636564731598, "step": 4373 }, { "epoch": 1.2789881561631817, "grad_norm": 1.7204832108380748, "learning_rate": 6.308687392699275e-06, "loss": 0.5605714321136475, "step": 4374 }, { "epoch": 1.279280596578447, "grad_norm": 1.5898129202606366, "learning_rate": 6.304194557421867e-06, "loss": 0.5366392731666565, "step": 4375 }, { "epoch": 1.2795730369937126, "grad_norm": 1.9084263306328586, "learning_rate": 6.299702586016512e-06, "loss": 0.5501587986946106, "step": 4376 }, { "epoch": 1.279865477408978, "grad_norm": 1.856477952130892, "learning_rate": 6.295211479533177e-06, "loss": 0.6145694851875305, "step": 4377 }, { "epoch": 1.2801579178242433, "grad_norm": 1.9271512769721166, "learning_rate": 6.2907212390216335e-06, "loss": 0.5921984910964966, "step": 4378 }, { "epoch": 1.2804503582395088, "grad_norm": 1.5061577707687395, "learning_rate": 6.286231865531447e-06, "loss": 0.4376833140850067, "step": 4379 }, { "epoch": 1.2807427986547741, "grad_norm": 1.5348932565255202, "learning_rate": 6.281743360111983e-06, "loss": 0.5141662955284119, "step": 4380 }, { "epoch": 1.2810352390700395, "grad_norm": 1.700541758244486, "learning_rate": 6.2772557238124025e-06, "loss": 0.7065848112106323, "step": 4381 }, { "epoch": 1.2813276794853048, "grad_norm": 1.500203661604044, "learning_rate": 6.272768957681659e-06, "loss": 0.5662813186645508, "step": 4382 }, { "epoch": 1.2816201199005701, "grad_norm": 1.5006210101215816, "learning_rate": 6.268283062768512e-06, "loss": 0.46340662240982056, "step": 4383 }, { "epoch": 1.2819125603158357, "grad_norm": 1.5406586553103667, "learning_rate": 6.263798040121508e-06, "loss": 0.5258422493934631, "step": 4384 }, { "epoch": 1.282205000731101, "grad_norm": 1.8313859097442655, "learning_rate": 6.2593138907889965e-06, "loss": 0.5586943030357361, "step": 4385 }, { "epoch": 1.2824974411463663, "grad_norm": 1.707661958872181, "learning_rate": 6.254830615819116e-06, "loss": 0.5224723815917969, "step": 4386 }, { "epoch": 1.2827898815616319, "grad_norm": 1.8755820352841006, "learning_rate": 6.250348216259812e-06, "loss": 0.6092125177383423, "step": 4387 }, { "epoch": 1.2830823219768972, "grad_norm": 1.6601692047393128, "learning_rate": 6.245866693158813e-06, "loss": 0.5582839250564575, "step": 4388 }, { "epoch": 1.2833747623921625, "grad_norm": 1.529218817283274, "learning_rate": 6.241386047563649e-06, "loss": 0.6074620485305786, "step": 4389 }, { "epoch": 1.283667202807428, "grad_norm": 1.3747332990929297, "learning_rate": 6.236906280521646e-06, "loss": 0.6247550845146179, "step": 4390 }, { "epoch": 1.2839596432226934, "grad_norm": 1.6645308511195784, "learning_rate": 6.232427393079919e-06, "loss": 0.5325940847396851, "step": 4391 }, { "epoch": 1.2842520836379587, "grad_norm": 1.5279900789464966, "learning_rate": 6.227949386285379e-06, "loss": 0.5082288980484009, "step": 4392 }, { "epoch": 1.2845445240532243, "grad_norm": 1.587332587045442, "learning_rate": 6.223472261184738e-06, "loss": 0.5704036355018616, "step": 4393 }, { "epoch": 1.2848369644684896, "grad_norm": 1.7646477307813349, "learning_rate": 6.218996018824492e-06, "loss": 0.5301543474197388, "step": 4394 }, { "epoch": 1.285129404883755, "grad_norm": 1.6829663682000435, "learning_rate": 6.21452066025094e-06, "loss": 0.48660725355148315, "step": 4395 }, { "epoch": 1.2854218452990203, "grad_norm": 1.7324467857194032, "learning_rate": 6.210046186510168e-06, "loss": 0.5744560956954956, "step": 4396 }, { "epoch": 1.2857142857142856, "grad_norm": 1.6645302463411007, "learning_rate": 6.205572598648055e-06, "loss": 0.5714898109436035, "step": 4397 }, { "epoch": 1.2860067261295511, "grad_norm": 1.8166911532739076, "learning_rate": 6.201099897710277e-06, "loss": 0.6616571545600891, "step": 4398 }, { "epoch": 1.2862991665448165, "grad_norm": 1.75450880953695, "learning_rate": 6.1966280847423e-06, "loss": 0.5552959442138672, "step": 4399 }, { "epoch": 1.2865916069600818, "grad_norm": 1.6738534376194054, "learning_rate": 6.192157160789382e-06, "loss": 0.5544919967651367, "step": 4400 }, { "epoch": 1.2868840473753473, "grad_norm": 1.6448049553355306, "learning_rate": 6.18768712689658e-06, "loss": 0.5914726853370667, "step": 4401 }, { "epoch": 1.2871764877906127, "grad_norm": 1.76025336575331, "learning_rate": 6.183217984108729e-06, "loss": 0.47191259264945984, "step": 4402 }, { "epoch": 1.287468928205878, "grad_norm": 1.690038062727397, "learning_rate": 6.178749733470468e-06, "loss": 0.6479181051254272, "step": 4403 }, { "epoch": 1.2877613686211435, "grad_norm": 1.5093061541159978, "learning_rate": 6.174282376026225e-06, "loss": 0.42491137981414795, "step": 4404 }, { "epoch": 1.2880538090364089, "grad_norm": 1.5952968160469727, "learning_rate": 6.169815912820214e-06, "loss": 0.6037728786468506, "step": 4405 }, { "epoch": 1.2883462494516742, "grad_norm": 1.6035701682484467, "learning_rate": 6.165350344896446e-06, "loss": 0.4979787766933441, "step": 4406 }, { "epoch": 1.2886386898669397, "grad_norm": 1.800062229580063, "learning_rate": 6.160885673298722e-06, "loss": 0.5863564014434814, "step": 4407 }, { "epoch": 1.288931130282205, "grad_norm": 1.735193401842224, "learning_rate": 6.156421899070628e-06, "loss": 0.6516878008842468, "step": 4408 }, { "epoch": 1.2892235706974704, "grad_norm": 1.3644068122534347, "learning_rate": 6.151959023255545e-06, "loss": 0.45655903220176697, "step": 4409 }, { "epoch": 1.2895160111127357, "grad_norm": 1.5401566996811273, "learning_rate": 6.147497046896644e-06, "loss": 0.4751289486885071, "step": 4410 }, { "epoch": 1.289808451528001, "grad_norm": 1.6902527178920421, "learning_rate": 6.1430359710368845e-06, "loss": 0.48472684621810913, "step": 4411 }, { "epoch": 1.2901008919432666, "grad_norm": 1.5473669029252384, "learning_rate": 6.138575796719017e-06, "loss": 0.5014214515686035, "step": 4412 }, { "epoch": 1.290393332358532, "grad_norm": 1.7827106404845192, "learning_rate": 6.134116524985581e-06, "loss": 0.5979991555213928, "step": 4413 }, { "epoch": 1.2906857727737973, "grad_norm": 1.325839826079579, "learning_rate": 6.129658156878899e-06, "loss": 0.4651130437850952, "step": 4414 }, { "epoch": 1.2909782131890628, "grad_norm": 1.7806648175874917, "learning_rate": 6.125200693441092e-06, "loss": 0.5938215255737305, "step": 4415 }, { "epoch": 1.2912706536043281, "grad_norm": 1.5490961027602033, "learning_rate": 6.1207441357140626e-06, "loss": 0.4893927574157715, "step": 4416 }, { "epoch": 1.2915630940195935, "grad_norm": 1.7524993955466766, "learning_rate": 6.116288484739507e-06, "loss": 0.5546435713768005, "step": 4417 }, { "epoch": 1.291855534434859, "grad_norm": 1.8413981048239587, "learning_rate": 6.111833741558905e-06, "loss": 0.545367419719696, "step": 4418 }, { "epoch": 1.2921479748501243, "grad_norm": 1.4120684443774227, "learning_rate": 6.1073799072135245e-06, "loss": 0.47479283809661865, "step": 4419 }, { "epoch": 1.2924404152653897, "grad_norm": 1.6721044710471762, "learning_rate": 6.102926982744423e-06, "loss": 0.5109270215034485, "step": 4420 }, { "epoch": 1.292732855680655, "grad_norm": 1.774842272860347, "learning_rate": 6.098474969192445e-06, "loss": 0.5862404108047485, "step": 4421 }, { "epoch": 1.2930252960959203, "grad_norm": 1.5821200459355214, "learning_rate": 6.09402386759822e-06, "loss": 0.5031660795211792, "step": 4422 }, { "epoch": 1.2933177365111859, "grad_norm": 1.7397846198854208, "learning_rate": 6.089573679002168e-06, "loss": 0.47179776430130005, "step": 4423 }, { "epoch": 1.2936101769264512, "grad_norm": 1.5340233803824985, "learning_rate": 6.085124404444495e-06, "loss": 0.45889902114868164, "step": 4424 }, { "epoch": 1.2939026173417165, "grad_norm": 1.5550814946749143, "learning_rate": 6.080676044965188e-06, "loss": 0.49759042263031006, "step": 4425 }, { "epoch": 1.294195057756982, "grad_norm": 1.9841525065569887, "learning_rate": 6.076228601604024e-06, "loss": 0.5980732440948486, "step": 4426 }, { "epoch": 1.2944874981722474, "grad_norm": 1.6256180215634828, "learning_rate": 6.07178207540057e-06, "loss": 0.6167548894882202, "step": 4427 }, { "epoch": 1.2947799385875127, "grad_norm": 1.7343822678821683, "learning_rate": 6.067336467394169e-06, "loss": 0.5632568597793579, "step": 4428 }, { "epoch": 1.2950723790027783, "grad_norm": 1.713926568632917, "learning_rate": 6.062891778623961e-06, "loss": 0.5521456003189087, "step": 4429 }, { "epoch": 1.2953648194180436, "grad_norm": 1.4514202434870498, "learning_rate": 6.058448010128861e-06, "loss": 0.5916576385498047, "step": 4430 }, { "epoch": 1.295657259833309, "grad_norm": 1.4200773171635346, "learning_rate": 6.054005162947571e-06, "loss": 0.546825647354126, "step": 4431 }, { "epoch": 1.2959497002485745, "grad_norm": 1.903586469303659, "learning_rate": 6.049563238118584e-06, "loss": 0.5704302787780762, "step": 4432 }, { "epoch": 1.2962421406638398, "grad_norm": 1.6923235048512564, "learning_rate": 6.0451222366801706e-06, "loss": 0.5791710615158081, "step": 4433 }, { "epoch": 1.2965345810791051, "grad_norm": 1.5242567102891653, "learning_rate": 6.040682159670389e-06, "loss": 0.41179752349853516, "step": 4434 }, { "epoch": 1.2968270214943705, "grad_norm": 1.7120079687188825, "learning_rate": 6.03624300812708e-06, "loss": 0.5213680267333984, "step": 4435 }, { "epoch": 1.2971194619096358, "grad_norm": 1.6198208396506975, "learning_rate": 6.0318047830878675e-06, "loss": 0.4917318522930145, "step": 4436 }, { "epoch": 1.2974119023249013, "grad_norm": 1.9301576881874427, "learning_rate": 6.027367485590159e-06, "loss": 0.6347956657409668, "step": 4437 }, { "epoch": 1.2977043427401667, "grad_norm": 1.454096730257314, "learning_rate": 6.022931116671147e-06, "loss": 0.5263427495956421, "step": 4438 }, { "epoch": 1.297996783155432, "grad_norm": 1.3982615348649814, "learning_rate": 6.018495677367806e-06, "loss": 0.5686784982681274, "step": 4439 }, { "epoch": 1.2982892235706975, "grad_norm": 1.6986790860575087, "learning_rate": 6.0140611687168934e-06, "loss": 0.576974630355835, "step": 4440 }, { "epoch": 1.2985816639859629, "grad_norm": 1.7183954732732796, "learning_rate": 6.009627591754946e-06, "loss": 0.5375877618789673, "step": 4441 }, { "epoch": 1.2988741044012282, "grad_norm": 1.7026702794952187, "learning_rate": 6.005194947518287e-06, "loss": 0.6106576919555664, "step": 4442 }, { "epoch": 1.2991665448164937, "grad_norm": 1.6076086367802058, "learning_rate": 6.000763237043021e-06, "loss": 0.475483238697052, "step": 4443 }, { "epoch": 1.299458985231759, "grad_norm": 1.7568326021636087, "learning_rate": 5.9963324613650335e-06, "loss": 0.5819226503372192, "step": 4444 }, { "epoch": 1.2997514256470244, "grad_norm": 1.6384408260054233, "learning_rate": 5.991902621519988e-06, "loss": 0.6394410133361816, "step": 4445 }, { "epoch": 1.30004386606229, "grad_norm": 1.653615111391099, "learning_rate": 5.987473718543338e-06, "loss": 0.48502016067504883, "step": 4446 }, { "epoch": 1.3003363064775553, "grad_norm": 1.5217151928427126, "learning_rate": 5.983045753470308e-06, "loss": 0.5782333612442017, "step": 4447 }, { "epoch": 1.3006287468928206, "grad_norm": 1.8358895387455052, "learning_rate": 5.97861872733591e-06, "loss": 0.5498893857002258, "step": 4448 }, { "epoch": 1.300921187308086, "grad_norm": 1.5773905938706185, "learning_rate": 5.974192641174934e-06, "loss": 0.47757571935653687, "step": 4449 }, { "epoch": 1.3012136277233513, "grad_norm": 1.751650457738534, "learning_rate": 5.96976749602195e-06, "loss": 0.5401994585990906, "step": 4450 }, { "epoch": 1.3015060681386168, "grad_norm": 1.7445816604225337, "learning_rate": 5.965343292911309e-06, "loss": 0.5818814635276794, "step": 4451 }, { "epoch": 1.3017985085538821, "grad_norm": 1.890298335476633, "learning_rate": 5.9609200328771465e-06, "loss": 0.524645984172821, "step": 4452 }, { "epoch": 1.3020909489691475, "grad_norm": 1.6124004265504417, "learning_rate": 5.956497716953365e-06, "loss": 0.46523183584213257, "step": 4453 }, { "epoch": 1.302383389384413, "grad_norm": 1.6328139064911342, "learning_rate": 5.952076346173657e-06, "loss": 0.6066159009933472, "step": 4454 }, { "epoch": 1.3026758297996783, "grad_norm": 1.5743831575113747, "learning_rate": 5.947655921571491e-06, "loss": 0.48635774850845337, "step": 4455 }, { "epoch": 1.3029682702149437, "grad_norm": 1.7296441740948125, "learning_rate": 5.943236444180116e-06, "loss": 0.5159435868263245, "step": 4456 }, { "epoch": 1.3032607106302092, "grad_norm": 1.43545214825073, "learning_rate": 5.938817915032558e-06, "loss": 0.5566878914833069, "step": 4457 }, { "epoch": 1.3035531510454745, "grad_norm": 1.5736652583628634, "learning_rate": 5.934400335161618e-06, "loss": 0.46998029947280884, "step": 4458 }, { "epoch": 1.3038455914607399, "grad_norm": 1.7808256717613173, "learning_rate": 5.92998370559988e-06, "loss": 0.5554553270339966, "step": 4459 }, { "epoch": 1.3041380318760052, "grad_norm": 1.7335497855414168, "learning_rate": 5.925568027379704e-06, "loss": 0.5659651756286621, "step": 4460 }, { "epoch": 1.3044304722912705, "grad_norm": 1.4784849199972236, "learning_rate": 5.921153301533229e-06, "loss": 0.5105445981025696, "step": 4461 }, { "epoch": 1.304722912706536, "grad_norm": 1.6833489269681376, "learning_rate": 5.91673952909237e-06, "loss": 0.5255740284919739, "step": 4462 }, { "epoch": 1.3050153531218014, "grad_norm": 1.6388447853221406, "learning_rate": 5.912326711088821e-06, "loss": 0.5691270232200623, "step": 4463 }, { "epoch": 1.3053077935370667, "grad_norm": 1.64945916767282, "learning_rate": 5.907914848554048e-06, "loss": 0.5783474445343018, "step": 4464 }, { "epoch": 1.3056002339523323, "grad_norm": 1.631334603802349, "learning_rate": 5.903503942519299e-06, "loss": 0.6305002570152283, "step": 4465 }, { "epoch": 1.3058926743675976, "grad_norm": 1.9357776829199835, "learning_rate": 5.8990939940156e-06, "loss": 0.6465631723403931, "step": 4466 }, { "epoch": 1.306185114782863, "grad_norm": 1.8264406193491898, "learning_rate": 5.8946850040737434e-06, "loss": 0.4883456230163574, "step": 4467 }, { "epoch": 1.3064775551981285, "grad_norm": 1.3902013367704193, "learning_rate": 5.890276973724305e-06, "loss": 0.4896056056022644, "step": 4468 }, { "epoch": 1.3067699956133938, "grad_norm": 1.6292986861573446, "learning_rate": 5.885869903997638e-06, "loss": 0.603757917881012, "step": 4469 }, { "epoch": 1.3070624360286591, "grad_norm": 1.6368879465310389, "learning_rate": 5.881463795923866e-06, "loss": 0.5412129163742065, "step": 4470 }, { "epoch": 1.3073548764439247, "grad_norm": 1.576979548849775, "learning_rate": 5.877058650532891e-06, "loss": 0.5255335569381714, "step": 4471 }, { "epoch": 1.30764731685919, "grad_norm": 1.861250264495057, "learning_rate": 5.87265446885439e-06, "loss": 0.5855039358139038, "step": 4472 }, { "epoch": 1.3079397572744553, "grad_norm": 1.7387082626664492, "learning_rate": 5.868251251917811e-06, "loss": 0.5763603448867798, "step": 4473 }, { "epoch": 1.3082321976897207, "grad_norm": 1.7494976398773932, "learning_rate": 5.86384900075238e-06, "loss": 0.5148910880088806, "step": 4474 }, { "epoch": 1.308524638104986, "grad_norm": 1.726220320494232, "learning_rate": 5.859447716387097e-06, "loss": 0.6387143135070801, "step": 4475 }, { "epoch": 1.3088170785202515, "grad_norm": 1.6421362434800872, "learning_rate": 5.855047399850735e-06, "loss": 0.5492211580276489, "step": 4476 }, { "epoch": 1.3091095189355169, "grad_norm": 1.748321310864673, "learning_rate": 5.850648052171843e-06, "loss": 0.5715115070343018, "step": 4477 }, { "epoch": 1.3094019593507822, "grad_norm": 1.8948603499593957, "learning_rate": 5.8462496743787385e-06, "loss": 0.6295989155769348, "step": 4478 }, { "epoch": 1.3096943997660477, "grad_norm": 1.6169983680834699, "learning_rate": 5.841852267499518e-06, "loss": 0.5843105316162109, "step": 4479 }, { "epoch": 1.309986840181313, "grad_norm": 1.443044009123256, "learning_rate": 5.837455832562049e-06, "loss": 0.43283605575561523, "step": 4480 }, { "epoch": 1.3102792805965784, "grad_norm": 1.6217104179487012, "learning_rate": 5.8330603705939684e-06, "loss": 0.6115404367446899, "step": 4481 }, { "epoch": 1.310571721011844, "grad_norm": 1.2325386929467517, "learning_rate": 5.828665882622692e-06, "loss": 0.4274179935455322, "step": 4482 }, { "epoch": 1.3108641614271093, "grad_norm": 1.3722363792161896, "learning_rate": 5.824272369675403e-06, "loss": 0.4385778307914734, "step": 4483 }, { "epoch": 1.3111566018423746, "grad_norm": 1.939305382555819, "learning_rate": 5.819879832779058e-06, "loss": 0.6310205459594727, "step": 4484 }, { "epoch": 1.3114490422576401, "grad_norm": 1.5511013635003787, "learning_rate": 5.815488272960388e-06, "loss": 0.6309192180633545, "step": 4485 }, { "epoch": 1.3117414826729055, "grad_norm": 1.8051032087296774, "learning_rate": 5.811097691245895e-06, "loss": 0.4751497507095337, "step": 4486 }, { "epoch": 1.3120339230881708, "grad_norm": 1.5897893613027336, "learning_rate": 5.806708088661846e-06, "loss": 0.5540175437927246, "step": 4487 }, { "epoch": 1.3123263635034361, "grad_norm": 1.924801228279098, "learning_rate": 5.802319466234283e-06, "loss": 0.5533273816108704, "step": 4488 }, { "epoch": 1.3126188039187014, "grad_norm": 1.5486991099512135, "learning_rate": 5.797931824989023e-06, "loss": 0.463643878698349, "step": 4489 }, { "epoch": 1.312911244333967, "grad_norm": 1.9073169839874196, "learning_rate": 5.79354516595165e-06, "loss": 0.5990232229232788, "step": 4490 }, { "epoch": 1.3132036847492323, "grad_norm": 1.7681103257151853, "learning_rate": 5.789159490147518e-06, "loss": 0.5569760799407959, "step": 4491 }, { "epoch": 1.3134961251644977, "grad_norm": 1.598897244778613, "learning_rate": 5.784774798601755e-06, "loss": 0.5016749501228333, "step": 4492 }, { "epoch": 1.3137885655797632, "grad_norm": 1.8830720070455038, "learning_rate": 5.780391092339253e-06, "loss": 0.5624934434890747, "step": 4493 }, { "epoch": 1.3140810059950285, "grad_norm": 2.146444811832683, "learning_rate": 5.776008372384676e-06, "loss": 0.7445797920227051, "step": 4494 }, { "epoch": 1.3143734464102939, "grad_norm": 1.9276650555591395, "learning_rate": 5.771626639762461e-06, "loss": 0.5849495530128479, "step": 4495 }, { "epoch": 1.3146658868255594, "grad_norm": 1.6679644602081254, "learning_rate": 5.767245895496809e-06, "loss": 0.5672163367271423, "step": 4496 }, { "epoch": 1.3149583272408247, "grad_norm": 1.4482015307125622, "learning_rate": 5.762866140611698e-06, "loss": 0.5278276801109314, "step": 4497 }, { "epoch": 1.31525076765609, "grad_norm": 1.8273800354421317, "learning_rate": 5.7584873761308615e-06, "loss": 0.54908686876297, "step": 4498 }, { "epoch": 1.3155432080713554, "grad_norm": 1.7592605115208164, "learning_rate": 5.754109603077811e-06, "loss": 0.5257589817047119, "step": 4499 }, { "epoch": 1.3158356484866207, "grad_norm": 1.4910358958486878, "learning_rate": 5.749732822475825e-06, "loss": 0.5744988918304443, "step": 4500 }, { "epoch": 1.3161280889018863, "grad_norm": 1.4827754689170145, "learning_rate": 5.74535703534795e-06, "loss": 0.5186365246772766, "step": 4501 }, { "epoch": 1.3164205293171516, "grad_norm": 1.6539527720112557, "learning_rate": 5.740982242716999e-06, "loss": 0.53574538230896, "step": 4502 }, { "epoch": 1.316712969732417, "grad_norm": 1.5347054109635063, "learning_rate": 5.736608445605555e-06, "loss": 0.6087717413902283, "step": 4503 }, { "epoch": 1.3170054101476825, "grad_norm": 1.5413257189374059, "learning_rate": 5.732235645035964e-06, "loss": 0.5132769346237183, "step": 4504 }, { "epoch": 1.3172978505629478, "grad_norm": 1.6361856291197476, "learning_rate": 5.727863842030342e-06, "loss": 0.588458776473999, "step": 4505 }, { "epoch": 1.3175902909782131, "grad_norm": 1.6129388653597692, "learning_rate": 5.723493037610572e-06, "loss": 0.5154894590377808, "step": 4506 }, { "epoch": 1.3178827313934787, "grad_norm": 1.5507002889867831, "learning_rate": 5.719123232798304e-06, "loss": 0.586688220500946, "step": 4507 }, { "epoch": 1.318175171808744, "grad_norm": 1.8125403251714918, "learning_rate": 5.714754428614956e-06, "loss": 0.4948856830596924, "step": 4508 }, { "epoch": 1.3184676122240093, "grad_norm": 1.5128350944665496, "learning_rate": 5.7103866260817005e-06, "loss": 0.6179821491241455, "step": 4509 }, { "epoch": 1.3187600526392749, "grad_norm": 1.876290206668384, "learning_rate": 5.7060198262194914e-06, "loss": 0.5865011811256409, "step": 4510 }, { "epoch": 1.3190524930545402, "grad_norm": 1.660419141577327, "learning_rate": 5.701654030049038e-06, "loss": 0.519783079624176, "step": 4511 }, { "epoch": 1.3193449334698055, "grad_norm": 1.5035780556155738, "learning_rate": 5.697289238590822e-06, "loss": 0.4238147437572479, "step": 4512 }, { "epoch": 1.3196373738850709, "grad_norm": 1.6350345014151721, "learning_rate": 5.6929254528650855e-06, "loss": 0.5931107997894287, "step": 4513 }, { "epoch": 1.3199298143003362, "grad_norm": 1.7485415603348589, "learning_rate": 5.688562673891837e-06, "loss": 0.7454524040222168, "step": 4514 }, { "epoch": 1.3202222547156017, "grad_norm": 1.6756127294636487, "learning_rate": 5.684200902690848e-06, "loss": 0.5909554362297058, "step": 4515 }, { "epoch": 1.320514695130867, "grad_norm": 1.449068353866628, "learning_rate": 5.67984014028166e-06, "loss": 0.5059943199157715, "step": 4516 }, { "epoch": 1.3208071355461324, "grad_norm": 1.3855018310443914, "learning_rate": 5.675480387683572e-06, "loss": 0.4387373924255371, "step": 4517 }, { "epoch": 1.321099575961398, "grad_norm": 1.6368288915875209, "learning_rate": 5.671121645915648e-06, "loss": 0.6452310681343079, "step": 4518 }, { "epoch": 1.3213920163766633, "grad_norm": 1.4569471180570228, "learning_rate": 5.666763915996725e-06, "loss": 0.5629088282585144, "step": 4519 }, { "epoch": 1.3216844567919286, "grad_norm": 1.6108062624448902, "learning_rate": 5.662407198945386e-06, "loss": 0.6442849636077881, "step": 4520 }, { "epoch": 1.3219768972071941, "grad_norm": 1.4707356833436183, "learning_rate": 5.6580514957799894e-06, "loss": 0.5330031514167786, "step": 4521 }, { "epoch": 1.3222693376224595, "grad_norm": 1.4396348923376052, "learning_rate": 5.6536968075186575e-06, "loss": 0.471035361289978, "step": 4522 }, { "epoch": 1.3225617780377248, "grad_norm": 1.5589169874424196, "learning_rate": 5.649343135179271e-06, "loss": 0.5675650835037231, "step": 4523 }, { "epoch": 1.3228542184529903, "grad_norm": 1.6961906881686575, "learning_rate": 5.644990479779473e-06, "loss": 0.5458093881607056, "step": 4524 }, { "epoch": 1.3231466588682557, "grad_norm": 1.5690712646364733, "learning_rate": 5.640638842336672e-06, "loss": 0.5625189542770386, "step": 4525 }, { "epoch": 1.323439099283521, "grad_norm": 1.778677748743509, "learning_rate": 5.636288223868038e-06, "loss": 0.5868214964866638, "step": 4526 }, { "epoch": 1.3237315396987863, "grad_norm": 1.6502123203157841, "learning_rate": 5.631938625390498e-06, "loss": 0.5340765714645386, "step": 4527 }, { "epoch": 1.3240239801140516, "grad_norm": 1.4463169385647288, "learning_rate": 5.627590047920747e-06, "loss": 0.4487069845199585, "step": 4528 }, { "epoch": 1.3243164205293172, "grad_norm": 1.5750183859940412, "learning_rate": 5.623242492475237e-06, "loss": 0.4246913194656372, "step": 4529 }, { "epoch": 1.3246088609445825, "grad_norm": 1.6537085849345186, "learning_rate": 5.618895960070188e-06, "loss": 0.49904564023017883, "step": 4530 }, { "epoch": 1.3249013013598478, "grad_norm": 1.6201874773916152, "learning_rate": 5.614550451721566e-06, "loss": 0.5506085157394409, "step": 4531 }, { "epoch": 1.3251937417751134, "grad_norm": 1.6929750939693964, "learning_rate": 5.610205968445111e-06, "loss": 0.4861884117126465, "step": 4532 }, { "epoch": 1.3254861821903787, "grad_norm": 1.5616728357477914, "learning_rate": 5.605862511256322e-06, "loss": 0.5639146566390991, "step": 4533 }, { "epoch": 1.325778622605644, "grad_norm": 1.3747626231277423, "learning_rate": 5.601520081170455e-06, "loss": 0.43305879831314087, "step": 4534 }, { "epoch": 1.3260710630209096, "grad_norm": 1.4728588464752952, "learning_rate": 5.597178679202524e-06, "loss": 0.4820408821105957, "step": 4535 }, { "epoch": 1.326363503436175, "grad_norm": 2.092875019342334, "learning_rate": 5.592838306367307e-06, "loss": 0.5601707100868225, "step": 4536 }, { "epoch": 1.3266559438514403, "grad_norm": 1.6269012393440097, "learning_rate": 5.588498963679339e-06, "loss": 0.5655055046081543, "step": 4537 }, { "epoch": 1.3269483842667056, "grad_norm": 1.871556737283143, "learning_rate": 5.584160652152917e-06, "loss": 0.5425975322723389, "step": 4538 }, { "epoch": 1.327240824681971, "grad_norm": 1.5388263554547548, "learning_rate": 5.579823372802098e-06, "loss": 0.607103168964386, "step": 4539 }, { "epoch": 1.3275332650972365, "grad_norm": 1.6396827179367406, "learning_rate": 5.575487126640686e-06, "loss": 0.6011538505554199, "step": 4540 }, { "epoch": 1.3278257055125018, "grad_norm": 1.6364470669862505, "learning_rate": 5.571151914682258e-06, "loss": 0.5333601236343384, "step": 4541 }, { "epoch": 1.3281181459277671, "grad_norm": 1.7756177203838306, "learning_rate": 5.566817737940142e-06, "loss": 0.576410174369812, "step": 4542 }, { "epoch": 1.3284105863430327, "grad_norm": 1.8060302167235907, "learning_rate": 5.562484597427425e-06, "loss": 0.506458044052124, "step": 4543 }, { "epoch": 1.328703026758298, "grad_norm": 2.0174061298696975, "learning_rate": 5.558152494156955e-06, "loss": 0.5893718004226685, "step": 4544 }, { "epoch": 1.3289954671735633, "grad_norm": 1.6979483029237916, "learning_rate": 5.55382142914133e-06, "loss": 0.508120059967041, "step": 4545 }, { "epoch": 1.3292879075888289, "grad_norm": 1.5737735987577735, "learning_rate": 5.5494914033929126e-06, "loss": 0.6103616952896118, "step": 4546 }, { "epoch": 1.3295803480040942, "grad_norm": 1.7304904972315491, "learning_rate": 5.545162417923822e-06, "loss": 0.5290235280990601, "step": 4547 }, { "epoch": 1.3298727884193595, "grad_norm": 1.5350904839753017, "learning_rate": 5.540834473745929e-06, "loss": 0.5729631185531616, "step": 4548 }, { "epoch": 1.330165228834625, "grad_norm": 1.5574358916011883, "learning_rate": 5.536507571870866e-06, "loss": 0.48720547556877136, "step": 4549 }, { "epoch": 1.3304576692498904, "grad_norm": 1.5393587740053045, "learning_rate": 5.532181713310023e-06, "loss": 0.4987955689430237, "step": 4550 }, { "epoch": 1.3307501096651557, "grad_norm": 1.3126988702980638, "learning_rate": 5.527856899074536e-06, "loss": 0.4002467393875122, "step": 4551 }, { "epoch": 1.331042550080421, "grad_norm": 2.0947575410388866, "learning_rate": 5.523533130175308e-06, "loss": 0.7435724139213562, "step": 4552 }, { "epoch": 1.3313349904956864, "grad_norm": 1.541726198150986, "learning_rate": 5.519210407622993e-06, "loss": 0.34711340069770813, "step": 4553 }, { "epoch": 1.331627430910952, "grad_norm": 1.6396721749099359, "learning_rate": 5.514888732428003e-06, "loss": 0.4749720096588135, "step": 4554 }, { "epoch": 1.3319198713262173, "grad_norm": 1.7586628740577253, "learning_rate": 5.5105681056005e-06, "loss": 0.5818741321563721, "step": 4555 }, { "epoch": 1.3322123117414826, "grad_norm": 1.7095504305078453, "learning_rate": 5.506248528150407e-06, "loss": 0.5715004801750183, "step": 4556 }, { "epoch": 1.3325047521567481, "grad_norm": 1.7722621684818736, "learning_rate": 5.501930001087399e-06, "loss": 0.5465661287307739, "step": 4557 }, { "epoch": 1.3327971925720135, "grad_norm": 1.7620411170921917, "learning_rate": 5.4976125254209035e-06, "loss": 0.6324847936630249, "step": 4558 }, { "epoch": 1.3330896329872788, "grad_norm": 1.4165701736936904, "learning_rate": 5.493296102160105e-06, "loss": 0.4616294503211975, "step": 4559 }, { "epoch": 1.3333820734025443, "grad_norm": 1.6922106714814378, "learning_rate": 5.488980732313942e-06, "loss": 0.5187079310417175, "step": 4560 }, { "epoch": 1.3336745138178097, "grad_norm": 1.8396067182286635, "learning_rate": 5.484666416891109e-06, "loss": 0.6120654344558716, "step": 4561 }, { "epoch": 1.333966954233075, "grad_norm": 1.6878860661661148, "learning_rate": 5.480353156900044e-06, "loss": 0.6171379685401917, "step": 4562 }, { "epoch": 1.3342593946483405, "grad_norm": 1.510636167770684, "learning_rate": 5.4760409533489475e-06, "loss": 0.4690072536468506, "step": 4563 }, { "epoch": 1.3345518350636059, "grad_norm": 1.5961764389633983, "learning_rate": 5.471729807245773e-06, "loss": 0.511309802532196, "step": 4564 }, { "epoch": 1.3348442754788712, "grad_norm": 1.6355911684199975, "learning_rate": 5.467419719598223e-06, "loss": 0.5657862424850464, "step": 4565 }, { "epoch": 1.3351367158941365, "grad_norm": 1.7641189489668823, "learning_rate": 5.4631106914137555e-06, "loss": 0.4263400733470917, "step": 4566 }, { "epoch": 1.3354291563094018, "grad_norm": 1.8179548841156754, "learning_rate": 5.458802723699579e-06, "loss": 0.6275177001953125, "step": 4567 }, { "epoch": 1.3357215967246674, "grad_norm": 1.6668120373290058, "learning_rate": 5.454495817462655e-06, "loss": 0.3857421278953552, "step": 4568 }, { "epoch": 1.3360140371399327, "grad_norm": 1.7165178528012586, "learning_rate": 5.450189973709697e-06, "loss": 0.5834560394287109, "step": 4569 }, { "epoch": 1.336306477555198, "grad_norm": 1.6632572235317495, "learning_rate": 5.445885193447169e-06, "loss": 0.6165010929107666, "step": 4570 }, { "epoch": 1.3365989179704636, "grad_norm": 1.7470412065212853, "learning_rate": 5.441581477681288e-06, "loss": 0.6034595966339111, "step": 4571 }, { "epoch": 1.336891358385729, "grad_norm": 1.740024112758077, "learning_rate": 5.43727882741802e-06, "loss": 0.570164144039154, "step": 4572 }, { "epoch": 1.3371837988009942, "grad_norm": 1.4917354928366209, "learning_rate": 5.432977243663089e-06, "loss": 0.5369169116020203, "step": 4573 }, { "epoch": 1.3374762392162598, "grad_norm": 1.7875464183853407, "learning_rate": 5.428676727421954e-06, "loss": 0.5624364614486694, "step": 4574 }, { "epoch": 1.3377686796315251, "grad_norm": 1.517348885410251, "learning_rate": 5.424377279699842e-06, "loss": 0.5002127885818481, "step": 4575 }, { "epoch": 1.3380611200467905, "grad_norm": 1.7071888960959534, "learning_rate": 5.42007890150172e-06, "loss": 0.5998499393463135, "step": 4576 }, { "epoch": 1.3383535604620558, "grad_norm": 1.7074905497433162, "learning_rate": 5.415781593832307e-06, "loss": 0.5988572835922241, "step": 4577 }, { "epoch": 1.338646000877321, "grad_norm": 1.6551550553396004, "learning_rate": 5.411485357696075e-06, "loss": 0.5202064514160156, "step": 4578 }, { "epoch": 1.3389384412925867, "grad_norm": 2.519364812628366, "learning_rate": 5.407190194097241e-06, "loss": 0.5246714949607849, "step": 4579 }, { "epoch": 1.339230881707852, "grad_norm": 1.5907571805696734, "learning_rate": 5.4028961040397765e-06, "loss": 0.5998588800430298, "step": 4580 }, { "epoch": 1.3395233221231173, "grad_norm": 1.7851321190756844, "learning_rate": 5.3986030885273945e-06, "loss": 0.5971418023109436, "step": 4581 }, { "epoch": 1.3398157625383829, "grad_norm": 1.5857061971181772, "learning_rate": 5.3943111485635644e-06, "loss": 0.4638952910900116, "step": 4582 }, { "epoch": 1.3401082029536482, "grad_norm": 1.5981773831835344, "learning_rate": 5.390020285151502e-06, "loss": 0.5007182955741882, "step": 4583 }, { "epoch": 1.3404006433689135, "grad_norm": 1.610643010141743, "learning_rate": 5.385730499294171e-06, "loss": 0.5013964772224426, "step": 4584 }, { "epoch": 1.340693083784179, "grad_norm": 1.6360724667305655, "learning_rate": 5.381441791994276e-06, "loss": 0.5699980854988098, "step": 4585 }, { "epoch": 1.3409855241994444, "grad_norm": 1.6423818252193456, "learning_rate": 5.377154164254283e-06, "loss": 0.5326210260391235, "step": 4586 }, { "epoch": 1.3412779646147097, "grad_norm": 1.5111806674915849, "learning_rate": 5.372867617076395e-06, "loss": 0.6065158843994141, "step": 4587 }, { "epoch": 1.3415704050299753, "grad_norm": 1.356022290658006, "learning_rate": 5.368582151462569e-06, "loss": 0.48427143692970276, "step": 4588 }, { "epoch": 1.3418628454452406, "grad_norm": 1.4868111001385538, "learning_rate": 5.364297768414505e-06, "loss": 0.5755994915962219, "step": 4589 }, { "epoch": 1.342155285860506, "grad_norm": 1.4690268021295017, "learning_rate": 5.360014468933652e-06, "loss": 0.4959644377231598, "step": 4590 }, { "epoch": 1.3424477262757712, "grad_norm": 1.5383458553689457, "learning_rate": 5.355732254021205e-06, "loss": 0.5374274253845215, "step": 4591 }, { "epoch": 1.3427401666910366, "grad_norm": 1.6286753609495908, "learning_rate": 5.351451124678106e-06, "loss": 0.5875111818313599, "step": 4592 }, { "epoch": 1.3430326071063021, "grad_norm": 1.7964496178319949, "learning_rate": 5.347171081905045e-06, "loss": 0.5230692028999329, "step": 4593 }, { "epoch": 1.3433250475215675, "grad_norm": 1.424672908012482, "learning_rate": 5.342892126702453e-06, "loss": 0.4624518156051636, "step": 4594 }, { "epoch": 1.3436174879368328, "grad_norm": 1.9140370650793175, "learning_rate": 5.3386142600705134e-06, "loss": 0.5141074061393738, "step": 4595 }, { "epoch": 1.3439099283520983, "grad_norm": 1.6249918744835086, "learning_rate": 5.334337483009147e-06, "loss": 0.4655565023422241, "step": 4596 }, { "epoch": 1.3442023687673637, "grad_norm": 1.6516547156710706, "learning_rate": 5.330061796518025e-06, "loss": 0.6135094165802002, "step": 4597 }, { "epoch": 1.344494809182629, "grad_norm": 1.595543646054287, "learning_rate": 5.325787201596563e-06, "loss": 0.5865254402160645, "step": 4598 }, { "epoch": 1.3447872495978945, "grad_norm": 1.8032344885262006, "learning_rate": 5.321513699243924e-06, "loss": 0.5290840268135071, "step": 4599 }, { "epoch": 1.3450796900131599, "grad_norm": 1.5294052976370318, "learning_rate": 5.317241290459012e-06, "loss": 0.554675817489624, "step": 4600 }, { "epoch": 1.3453721304284252, "grad_norm": 1.499219614332531, "learning_rate": 5.312969976240479e-06, "loss": 0.5033853650093079, "step": 4601 }, { "epoch": 1.3456645708436907, "grad_norm": 1.8108264508032192, "learning_rate": 5.308699757586713e-06, "loss": 0.44666093587875366, "step": 4602 }, { "epoch": 1.345957011258956, "grad_norm": 1.5332559280539126, "learning_rate": 5.304430635495856e-06, "loss": 0.5447900891304016, "step": 4603 }, { "epoch": 1.3462494516742214, "grad_norm": 1.507503116151542, "learning_rate": 5.30016261096579e-06, "loss": 0.4425917863845825, "step": 4604 }, { "epoch": 1.3465418920894867, "grad_norm": 1.508411296889156, "learning_rate": 5.295895684994137e-06, "loss": 0.4411497712135315, "step": 4605 }, { "epoch": 1.346834332504752, "grad_norm": 1.537668383754579, "learning_rate": 5.291629858578271e-06, "loss": 0.5577414631843567, "step": 4606 }, { "epoch": 1.3471267729200176, "grad_norm": 1.7128549715372505, "learning_rate": 5.287365132715293e-06, "loss": 0.4754186272621155, "step": 4607 }, { "epoch": 1.347419213335283, "grad_norm": 1.6521724702121328, "learning_rate": 5.283101508402063e-06, "loss": 0.5582431554794312, "step": 4608 }, { "epoch": 1.3477116537505482, "grad_norm": 1.7476811492664892, "learning_rate": 5.2788389866351755e-06, "loss": 0.5552654266357422, "step": 4609 }, { "epoch": 1.3480040941658138, "grad_norm": 1.8662632335270106, "learning_rate": 5.2745775684109705e-06, "loss": 0.5776556730270386, "step": 4610 }, { "epoch": 1.3482965345810791, "grad_norm": 1.7735552141557176, "learning_rate": 5.270317254725528e-06, "loss": 0.5859286785125732, "step": 4611 }, { "epoch": 1.3485889749963444, "grad_norm": 1.5182169678473143, "learning_rate": 5.2660580465746694e-06, "loss": 0.5914887189865112, "step": 4612 }, { "epoch": 1.34888141541161, "grad_norm": 1.6371325039607922, "learning_rate": 5.261799944953956e-06, "loss": 0.43669426441192627, "step": 4613 }, { "epoch": 1.3491738558268753, "grad_norm": 1.718792113074269, "learning_rate": 5.2575429508587e-06, "loss": 0.473773717880249, "step": 4614 }, { "epoch": 1.3494662962421407, "grad_norm": 1.7451807781202082, "learning_rate": 5.253287065283949e-06, "loss": 0.5011228919029236, "step": 4615 }, { "epoch": 1.349758736657406, "grad_norm": 1.6598931266775088, "learning_rate": 5.249032289224483e-06, "loss": 0.5839254856109619, "step": 4616 }, { "epoch": 1.3500511770726713, "grad_norm": 1.7262514320572941, "learning_rate": 5.244778623674831e-06, "loss": 0.5375077128410339, "step": 4617 }, { "epoch": 1.3503436174879369, "grad_norm": 1.4572654878782452, "learning_rate": 5.240526069629265e-06, "loss": 0.49445679783821106, "step": 4618 }, { "epoch": 1.3506360579032022, "grad_norm": 1.5263979209526246, "learning_rate": 5.236274628081792e-06, "loss": 0.5369694828987122, "step": 4619 }, { "epoch": 1.3509284983184675, "grad_norm": 1.8018674546255473, "learning_rate": 5.23202430002616e-06, "loss": 0.6017554402351379, "step": 4620 }, { "epoch": 1.351220938733733, "grad_norm": 1.9428924144840352, "learning_rate": 5.227775086455859e-06, "loss": 0.5380403995513916, "step": 4621 }, { "epoch": 1.3515133791489984, "grad_norm": 1.6665289001084298, "learning_rate": 5.223526988364116e-06, "loss": 0.5650593042373657, "step": 4622 }, { "epoch": 1.3518058195642637, "grad_norm": 1.5672489406384107, "learning_rate": 5.219280006743897e-06, "loss": 0.5572884678840637, "step": 4623 }, { "epoch": 1.3520982599795293, "grad_norm": 1.839257774768153, "learning_rate": 5.21503414258791e-06, "loss": 0.5304458141326904, "step": 4624 }, { "epoch": 1.3523907003947946, "grad_norm": 1.8264084905380675, "learning_rate": 5.2107893968886005e-06, "loss": 0.6702588796615601, "step": 4625 }, { "epoch": 1.35268314081006, "grad_norm": 1.5301776431109881, "learning_rate": 5.206545770638152e-06, "loss": 0.4607279300689697, "step": 4626 }, { "epoch": 1.3529755812253255, "grad_norm": 1.4702386368708713, "learning_rate": 5.202303264828482e-06, "loss": 0.5759040713310242, "step": 4627 }, { "epoch": 1.3532680216405908, "grad_norm": 1.6340224609334149, "learning_rate": 5.198061880451253e-06, "loss": 0.446469783782959, "step": 4628 }, { "epoch": 1.3535604620558561, "grad_norm": 1.6416831158378962, "learning_rate": 5.193821618497864e-06, "loss": 0.4869040846824646, "step": 4629 }, { "epoch": 1.3538529024711214, "grad_norm": 1.59588454548975, "learning_rate": 5.189582479959449e-06, "loss": 0.5153477191925049, "step": 4630 }, { "epoch": 1.3541453428863868, "grad_norm": 1.6964185114911852, "learning_rate": 5.185344465826883e-06, "loss": 0.4958652853965759, "step": 4631 }, { "epoch": 1.3544377833016523, "grad_norm": 1.544404184800908, "learning_rate": 5.1811075770907715e-06, "loss": 0.5314347743988037, "step": 4632 }, { "epoch": 1.3547302237169176, "grad_norm": 1.6488125019330604, "learning_rate": 5.176871814741466e-06, "loss": 0.5366088151931763, "step": 4633 }, { "epoch": 1.355022664132183, "grad_norm": 1.7011582339400138, "learning_rate": 5.172637179769049e-06, "loss": 0.6239185929298401, "step": 4634 }, { "epoch": 1.3553151045474485, "grad_norm": 1.8789833552926098, "learning_rate": 5.168403673163341e-06, "loss": 0.5516507625579834, "step": 4635 }, { "epoch": 1.3556075449627139, "grad_norm": 1.6420696506744512, "learning_rate": 5.164171295913898e-06, "loss": 0.5859683156013489, "step": 4636 }, { "epoch": 1.3558999853779792, "grad_norm": 1.6138084463921514, "learning_rate": 5.159940049010015e-06, "loss": 0.5913225412368774, "step": 4637 }, { "epoch": 1.3561924257932447, "grad_norm": 1.690951404825549, "learning_rate": 5.155709933440714e-06, "loss": 0.650983989238739, "step": 4638 }, { "epoch": 1.35648486620851, "grad_norm": 1.7360324268029201, "learning_rate": 5.151480950194762e-06, "loss": 0.5631625652313232, "step": 4639 }, { "epoch": 1.3567773066237754, "grad_norm": 1.9305214623229574, "learning_rate": 5.147253100260659e-06, "loss": 0.48153650760650635, "step": 4640 }, { "epoch": 1.357069747039041, "grad_norm": 1.382159174171422, "learning_rate": 5.143026384626637e-06, "loss": 0.43598422408103943, "step": 4641 }, { "epoch": 1.3573621874543063, "grad_norm": 1.5586949144187017, "learning_rate": 5.138800804280668e-06, "loss": 0.5323987007141113, "step": 4642 }, { "epoch": 1.3576546278695716, "grad_norm": 1.739858834969472, "learning_rate": 5.134576360210454e-06, "loss": 0.5386587977409363, "step": 4643 }, { "epoch": 1.357947068284837, "grad_norm": 1.7229356194902612, "learning_rate": 5.130353053403434e-06, "loss": 0.4913867115974426, "step": 4644 }, { "epoch": 1.3582395087001022, "grad_norm": 2.681042611993396, "learning_rate": 5.12613088484678e-06, "loss": 0.6516048908233643, "step": 4645 }, { "epoch": 1.3585319491153678, "grad_norm": 1.7863407962771196, "learning_rate": 5.121909855527398e-06, "loss": 0.5290599465370178, "step": 4646 }, { "epoch": 1.3588243895306331, "grad_norm": 1.992281323100596, "learning_rate": 5.117689966431927e-06, "loss": 0.7909928560256958, "step": 4647 }, { "epoch": 1.3591168299458984, "grad_norm": 1.7798386890797042, "learning_rate": 5.113471218546746e-06, "loss": 0.4751276969909668, "step": 4648 }, { "epoch": 1.359409270361164, "grad_norm": 1.3934486662021524, "learning_rate": 5.109253612857954e-06, "loss": 0.4542301893234253, "step": 4649 }, { "epoch": 1.3597017107764293, "grad_norm": 1.6724566490890436, "learning_rate": 5.105037150351393e-06, "loss": 0.5355349779129028, "step": 4650 }, { "epoch": 1.3599941511916946, "grad_norm": 1.7131391763754547, "learning_rate": 5.100821832012637e-06, "loss": 0.4994719326496124, "step": 4651 }, { "epoch": 1.3602865916069602, "grad_norm": 1.7061763475820229, "learning_rate": 5.096607658826989e-06, "loss": 0.6171674728393555, "step": 4652 }, { "epoch": 1.3605790320222255, "grad_norm": 1.6851325839422124, "learning_rate": 5.092394631779487e-06, "loss": 0.5386878252029419, "step": 4653 }, { "epoch": 1.3608714724374908, "grad_norm": 1.4863597978488459, "learning_rate": 5.088182751854903e-06, "loss": 0.4495810270309448, "step": 4654 }, { "epoch": 1.3611639128527562, "grad_norm": 1.560829764762291, "learning_rate": 5.083972020037735e-06, "loss": 0.5540642142295837, "step": 4655 }, { "epoch": 1.3614563532680215, "grad_norm": 1.7743988570673719, "learning_rate": 5.079762437312219e-06, "loss": 0.6020554900169373, "step": 4656 }, { "epoch": 1.361748793683287, "grad_norm": 1.5410143370370128, "learning_rate": 5.075554004662316e-06, "loss": 0.47981250286102295, "step": 4657 }, { "epoch": 1.3620412340985524, "grad_norm": 1.6809006565320033, "learning_rate": 5.071346723071724e-06, "loss": 0.6206443905830383, "step": 4658 }, { "epoch": 1.3623336745138177, "grad_norm": 1.2946163710464256, "learning_rate": 5.067140593523869e-06, "loss": 0.46899446845054626, "step": 4659 }, { "epoch": 1.3626261149290833, "grad_norm": 1.3692435027739418, "learning_rate": 5.062935617001912e-06, "loss": 0.5695985555648804, "step": 4660 }, { "epoch": 1.3629185553443486, "grad_norm": 1.5567765237338644, "learning_rate": 5.058731794488732e-06, "loss": 0.5524671077728271, "step": 4661 }, { "epoch": 1.363210995759614, "grad_norm": 1.5953543121744755, "learning_rate": 5.054529126966953e-06, "loss": 0.4655245244503021, "step": 4662 }, { "epoch": 1.3635034361748795, "grad_norm": 1.6197588686677031, "learning_rate": 5.050327615418921e-06, "loss": 0.5617693662643433, "step": 4663 }, { "epoch": 1.3637958765901448, "grad_norm": 1.515126796303483, "learning_rate": 5.046127260826714e-06, "loss": 0.52044677734375, "step": 4664 }, { "epoch": 1.3640883170054101, "grad_norm": 1.6797173356320934, "learning_rate": 5.041928064172139e-06, "loss": 0.4567520022392273, "step": 4665 }, { "epoch": 1.3643807574206757, "grad_norm": 1.5794296901996336, "learning_rate": 5.037730026436736e-06, "loss": 0.5942729711532593, "step": 4666 }, { "epoch": 1.364673197835941, "grad_norm": 1.6501244665537385, "learning_rate": 5.033533148601766e-06, "loss": 0.3824811279773712, "step": 4667 }, { "epoch": 1.3649656382512063, "grad_norm": 1.4770402468740385, "learning_rate": 5.029337431648227e-06, "loss": 0.4710771441459656, "step": 4668 }, { "epoch": 1.3652580786664716, "grad_norm": 1.5059979846835174, "learning_rate": 5.02514287655684e-06, "loss": 0.6617978811264038, "step": 4669 }, { "epoch": 1.365550519081737, "grad_norm": 1.5829629132621983, "learning_rate": 5.020949484308058e-06, "loss": 0.5237355828285217, "step": 4670 }, { "epoch": 1.3658429594970025, "grad_norm": 1.4158253094169178, "learning_rate": 5.016757255882065e-06, "loss": 0.4544803500175476, "step": 4671 }, { "epoch": 1.3661353999122678, "grad_norm": 1.8761810485620272, "learning_rate": 5.012566192258763e-06, "loss": 0.5854490399360657, "step": 4672 }, { "epoch": 1.3664278403275332, "grad_norm": 1.902502544434852, "learning_rate": 5.008376294417787e-06, "loss": 0.6275635361671448, "step": 4673 }, { "epoch": 1.3667202807427987, "grad_norm": 1.6133596882151136, "learning_rate": 5.004187563338504e-06, "loss": 0.5160082578659058, "step": 4674 }, { "epoch": 1.367012721158064, "grad_norm": 1.439845673979846, "learning_rate": 5.000000000000003e-06, "loss": 0.5203640460968018, "step": 4675 }, { "epoch": 1.3673051615733294, "grad_norm": 2.025079516078861, "learning_rate": 4.9958136053811e-06, "loss": 0.6836066246032715, "step": 4676 }, { "epoch": 1.367597601988595, "grad_norm": 1.5727820508513324, "learning_rate": 4.991628380460343e-06, "loss": 0.5566641092300415, "step": 4677 }, { "epoch": 1.3678900424038603, "grad_norm": 1.643119627925769, "learning_rate": 4.9874443262159984e-06, "loss": 0.5618000030517578, "step": 4678 }, { "epoch": 1.3681824828191256, "grad_norm": 1.4054605482949574, "learning_rate": 4.983261443626068e-06, "loss": 0.4605063796043396, "step": 4679 }, { "epoch": 1.3684749232343911, "grad_norm": 1.7557732951775291, "learning_rate": 4.97907973366827e-06, "loss": 0.48282021284103394, "step": 4680 }, { "epoch": 1.3687673636496565, "grad_norm": 1.467194830130128, "learning_rate": 4.974899197320059e-06, "loss": 0.42356133460998535, "step": 4681 }, { "epoch": 1.3690598040649218, "grad_norm": 1.3266470239270218, "learning_rate": 4.97071983555861e-06, "loss": 0.459377646446228, "step": 4682 }, { "epoch": 1.369352244480187, "grad_norm": 1.9278413810039654, "learning_rate": 4.966541649360819e-06, "loss": 0.5539775490760803, "step": 4683 }, { "epoch": 1.3696446848954524, "grad_norm": 1.7014699336581571, "learning_rate": 4.962364639703311e-06, "loss": 0.5593239068984985, "step": 4684 }, { "epoch": 1.369937125310718, "grad_norm": 1.8333805174527635, "learning_rate": 4.958188807562441e-06, "loss": 0.5425251722335815, "step": 4685 }, { "epoch": 1.3702295657259833, "grad_norm": 1.564182289934299, "learning_rate": 4.954014153914282e-06, "loss": 0.5183289051055908, "step": 4686 }, { "epoch": 1.3705220061412486, "grad_norm": 1.6834251116472225, "learning_rate": 4.9498406797346345e-06, "loss": 0.5278980731964111, "step": 4687 }, { "epoch": 1.3708144465565142, "grad_norm": 1.6861784833580373, "learning_rate": 4.9456683859990185e-06, "loss": 0.4857858419418335, "step": 4688 }, { "epoch": 1.3711068869717795, "grad_norm": 1.4955733852507764, "learning_rate": 4.94149727368269e-06, "loss": 0.4889591336250305, "step": 4689 }, { "epoch": 1.3713993273870448, "grad_norm": 2.1119376280699105, "learning_rate": 4.937327343760617e-06, "loss": 0.5475220680236816, "step": 4690 }, { "epoch": 1.3716917678023104, "grad_norm": 1.8065068083746048, "learning_rate": 4.933158597207501e-06, "loss": 0.5794380903244019, "step": 4691 }, { "epoch": 1.3719842082175757, "grad_norm": 1.5916906211687458, "learning_rate": 4.928991034997752e-06, "loss": 0.42212024331092834, "step": 4692 }, { "epoch": 1.372276648632841, "grad_norm": 1.8447627986814241, "learning_rate": 4.924824658105516e-06, "loss": 0.6091631054878235, "step": 4693 }, { "epoch": 1.3725690890481064, "grad_norm": 1.8839419484958528, "learning_rate": 4.9206594675046595e-06, "loss": 0.544279158115387, "step": 4694 }, { "epoch": 1.3728615294633717, "grad_norm": 1.4361678658463186, "learning_rate": 4.916495464168768e-06, "loss": 0.46237099170684814, "step": 4695 }, { "epoch": 1.3731539698786372, "grad_norm": 1.5990237040506552, "learning_rate": 4.912332649071154e-06, "loss": 0.5615352392196655, "step": 4696 }, { "epoch": 1.3734464102939026, "grad_norm": 1.7554295249178744, "learning_rate": 4.90817102318485e-06, "loss": 0.5552200078964233, "step": 4697 }, { "epoch": 1.373738850709168, "grad_norm": 1.798510214490848, "learning_rate": 4.904010587482612e-06, "loss": 0.5466557741165161, "step": 4698 }, { "epoch": 1.3740312911244335, "grad_norm": 1.8536275815794498, "learning_rate": 4.8998513429369135e-06, "loss": 0.6131544709205627, "step": 4699 }, { "epoch": 1.3743237315396988, "grad_norm": 1.7671899353023186, "learning_rate": 4.895693290519954e-06, "loss": 0.5264796018600464, "step": 4700 }, { "epoch": 1.374616171954964, "grad_norm": 1.6582809024037055, "learning_rate": 4.891536431203653e-06, "loss": 0.5179097652435303, "step": 4701 }, { "epoch": 1.3749086123702297, "grad_norm": 1.7203915102871608, "learning_rate": 4.887380765959655e-06, "loss": 0.46007782220840454, "step": 4702 }, { "epoch": 1.375201052785495, "grad_norm": 1.3949646851760964, "learning_rate": 4.8832262957593145e-06, "loss": 0.48182815313339233, "step": 4703 }, { "epoch": 1.3754934932007603, "grad_norm": 1.6488295590740498, "learning_rate": 4.879073021573717e-06, "loss": 0.5334529280662537, "step": 4704 }, { "epoch": 1.3757859336160259, "grad_norm": 1.824410831192183, "learning_rate": 4.874920944373665e-06, "loss": 0.5984899997711182, "step": 4705 }, { "epoch": 1.3760783740312912, "grad_norm": 1.633539262172952, "learning_rate": 4.870770065129681e-06, "loss": 0.46676474809646606, "step": 4706 }, { "epoch": 1.3763708144465565, "grad_norm": 1.6766360321424407, "learning_rate": 4.866620384812008e-06, "loss": 0.4608241617679596, "step": 4707 }, { "epoch": 1.3766632548618218, "grad_norm": 1.6783484732888503, "learning_rate": 4.862471904390609e-06, "loss": 0.5877207517623901, "step": 4708 }, { "epoch": 1.3769556952770872, "grad_norm": 1.9194747868225221, "learning_rate": 4.858324624835164e-06, "loss": 0.5243252515792847, "step": 4709 }, { "epoch": 1.3772481356923527, "grad_norm": 1.7326979192308607, "learning_rate": 4.854178547115078e-06, "loss": 0.528606653213501, "step": 4710 }, { "epoch": 1.377540576107618, "grad_norm": 1.761919042167513, "learning_rate": 4.850033672199469e-06, "loss": 0.46468549966812134, "step": 4711 }, { "epoch": 1.3778330165228834, "grad_norm": 1.5919653348557072, "learning_rate": 4.8458900010571765e-06, "loss": 0.5368300676345825, "step": 4712 }, { "epoch": 1.378125456938149, "grad_norm": 1.6462148743894651, "learning_rate": 4.8417475346567635e-06, "loss": 0.5156906843185425, "step": 4713 }, { "epoch": 1.3784178973534142, "grad_norm": 1.718628393460986, "learning_rate": 4.837606273966496e-06, "loss": 0.5899196863174438, "step": 4714 }, { "epoch": 1.3787103377686796, "grad_norm": 1.6725614455419595, "learning_rate": 4.833466219954376e-06, "loss": 0.5820844769477844, "step": 4715 }, { "epoch": 1.3790027781839451, "grad_norm": 1.5883271974734077, "learning_rate": 4.829327373588113e-06, "loss": 0.4926246404647827, "step": 4716 }, { "epoch": 1.3792952185992104, "grad_norm": 1.5404696535835014, "learning_rate": 4.825189735835138e-06, "loss": 0.5417006611824036, "step": 4717 }, { "epoch": 1.3795876590144758, "grad_norm": 1.5296186550545692, "learning_rate": 4.821053307662599e-06, "loss": 0.4130229949951172, "step": 4718 }, { "epoch": 1.3798800994297413, "grad_norm": 1.279729123751172, "learning_rate": 4.8169180900373615e-06, "loss": 0.4553627371788025, "step": 4719 }, { "epoch": 1.3801725398450067, "grad_norm": 1.3535233614920503, "learning_rate": 4.812784083926005e-06, "loss": 0.523567259311676, "step": 4720 }, { "epoch": 1.380464980260272, "grad_norm": 1.585136917164004, "learning_rate": 4.808651290294832e-06, "loss": 0.4643239378929138, "step": 4721 }, { "epoch": 1.3807574206755373, "grad_norm": 1.4443352165881056, "learning_rate": 4.804519710109856e-06, "loss": 0.4631537199020386, "step": 4722 }, { "epoch": 1.3810498610908026, "grad_norm": 1.9168786498716517, "learning_rate": 4.8003893443368075e-06, "loss": 0.5304736495018005, "step": 4723 }, { "epoch": 1.3813423015060682, "grad_norm": 1.7679231174871453, "learning_rate": 4.79626019394114e-06, "loss": 0.4357796907424927, "step": 4724 }, { "epoch": 1.3816347419213335, "grad_norm": 1.9313439900637919, "learning_rate": 4.7921322598880095e-06, "loss": 0.6693407297134399, "step": 4725 }, { "epoch": 1.3819271823365988, "grad_norm": 1.614277655310262, "learning_rate": 4.788005543142299e-06, "loss": 0.5333320498466492, "step": 4726 }, { "epoch": 1.3822196227518644, "grad_norm": 1.900002017358812, "learning_rate": 4.783880044668603e-06, "loss": 0.5782167911529541, "step": 4727 }, { "epoch": 1.3825120631671297, "grad_norm": 1.8216810622231216, "learning_rate": 4.779755765431231e-06, "loss": 0.581318199634552, "step": 4728 }, { "epoch": 1.382804503582395, "grad_norm": 1.6899321824779212, "learning_rate": 4.775632706394211e-06, "loss": 0.5812945365905762, "step": 4729 }, { "epoch": 1.3830969439976606, "grad_norm": 1.7981132988330288, "learning_rate": 4.771510868521279e-06, "loss": 0.460615873336792, "step": 4730 }, { "epoch": 1.383389384412926, "grad_norm": 1.8316112888726737, "learning_rate": 4.767390252775894e-06, "loss": 0.5934186577796936, "step": 4731 }, { "epoch": 1.3836818248281912, "grad_norm": 1.6355522234245776, "learning_rate": 4.763270860121222e-06, "loss": 0.4928584098815918, "step": 4732 }, { "epoch": 1.3839742652434566, "grad_norm": 1.6231538800234695, "learning_rate": 4.759152691520146e-06, "loss": 0.505489706993103, "step": 4733 }, { "epoch": 1.3842667056587221, "grad_norm": 1.5771553081820557, "learning_rate": 4.755035747935264e-06, "loss": 0.5679354667663574, "step": 4734 }, { "epoch": 1.3845591460739874, "grad_norm": 1.7096467723863036, "learning_rate": 4.750920030328889e-06, "loss": 0.5744746923446655, "step": 4735 }, { "epoch": 1.3848515864892528, "grad_norm": 1.6483531613381477, "learning_rate": 4.7468055396630395e-06, "loss": 0.4953685402870178, "step": 4736 }, { "epoch": 1.385144026904518, "grad_norm": 1.8803927120396235, "learning_rate": 4.742692276899454e-06, "loss": 0.6083461046218872, "step": 4737 }, { "epoch": 1.3854364673197836, "grad_norm": 1.5633925902592396, "learning_rate": 4.738580242999584e-06, "loss": 0.4980735778808594, "step": 4738 }, { "epoch": 1.385728907735049, "grad_norm": 1.4499409145464446, "learning_rate": 4.734469438924594e-06, "loss": 0.46363019943237305, "step": 4739 }, { "epoch": 1.3860213481503143, "grad_norm": 1.818813219831182, "learning_rate": 4.730359865635355e-06, "loss": 0.5946298837661743, "step": 4740 }, { "epoch": 1.3863137885655799, "grad_norm": 1.6327330611392554, "learning_rate": 4.726251524092459e-06, "loss": 0.5630123615264893, "step": 4741 }, { "epoch": 1.3866062289808452, "grad_norm": 1.5382056004014089, "learning_rate": 4.7221444152562045e-06, "loss": 0.5353481769561768, "step": 4742 }, { "epoch": 1.3868986693961105, "grad_norm": 1.7585652476725264, "learning_rate": 4.718038540086602e-06, "loss": 0.5170711874961853, "step": 4743 }, { "epoch": 1.387191109811376, "grad_norm": 1.8043747351160766, "learning_rate": 4.713933899543377e-06, "loss": 0.600492000579834, "step": 4744 }, { "epoch": 1.3874835502266414, "grad_norm": 1.5446435468278237, "learning_rate": 4.709830494585962e-06, "loss": 0.5291938781738281, "step": 4745 }, { "epoch": 1.3877759906419067, "grad_norm": 1.658022225410227, "learning_rate": 4.7057283261735055e-06, "loss": 0.5664317011833191, "step": 4746 }, { "epoch": 1.388068431057172, "grad_norm": 1.8477945736694077, "learning_rate": 4.701627395264866e-06, "loss": 0.606655478477478, "step": 4747 }, { "epoch": 1.3883608714724374, "grad_norm": 1.5930247770190467, "learning_rate": 4.697527702818604e-06, "loss": 0.6160893440246582, "step": 4748 }, { "epoch": 1.388653311887703, "grad_norm": 1.510283707012234, "learning_rate": 4.693429249793002e-06, "loss": 0.45944249629974365, "step": 4749 }, { "epoch": 1.3889457523029682, "grad_norm": 1.7369442621234958, "learning_rate": 4.689332037146049e-06, "loss": 0.5737302303314209, "step": 4750 }, { "epoch": 1.3892381927182336, "grad_norm": 1.7885159565933124, "learning_rate": 4.685236065835443e-06, "loss": 0.4075150787830353, "step": 4751 }, { "epoch": 1.3895306331334991, "grad_norm": 1.7699683741602097, "learning_rate": 4.681141336818592e-06, "loss": 0.5832744836807251, "step": 4752 }, { "epoch": 1.3898230735487644, "grad_norm": 1.6617741591328279, "learning_rate": 4.6770478510526155e-06, "loss": 0.5444560647010803, "step": 4753 }, { "epoch": 1.3901155139640298, "grad_norm": 1.5343212819990357, "learning_rate": 4.672955609494339e-06, "loss": 0.6087433695793152, "step": 4754 }, { "epoch": 1.3904079543792953, "grad_norm": 1.3783003966189016, "learning_rate": 4.6688646131002995e-06, "loss": 0.3781468868255615, "step": 4755 }, { "epoch": 1.3907003947945606, "grad_norm": 2.0008130334792953, "learning_rate": 4.664774862826742e-06, "loss": 0.43719804286956787, "step": 4756 }, { "epoch": 1.390992835209826, "grad_norm": 1.7926138812382992, "learning_rate": 4.660686359629623e-06, "loss": 0.550011932849884, "step": 4757 }, { "epoch": 1.3912852756250915, "grad_norm": 1.670816081047031, "learning_rate": 4.656599104464607e-06, "loss": 0.6060909032821655, "step": 4758 }, { "epoch": 1.3915777160403568, "grad_norm": 1.727898538684726, "learning_rate": 4.652513098287058e-06, "loss": 0.5169791579246521, "step": 4759 }, { "epoch": 1.3918701564556222, "grad_norm": 1.667801698839589, "learning_rate": 4.6484283420520594e-06, "loss": 0.43063026666641235, "step": 4760 }, { "epoch": 1.3921625968708875, "grad_norm": 1.6770983664766483, "learning_rate": 4.644344836714397e-06, "loss": 0.5426993370056152, "step": 4761 }, { "epoch": 1.3924550372861528, "grad_norm": 1.7220159777866155, "learning_rate": 4.6402625832285665e-06, "loss": 0.5260995030403137, "step": 4762 }, { "epoch": 1.3927474777014184, "grad_norm": 1.791130103339175, "learning_rate": 4.63618158254877e-06, "loss": 0.5206680297851562, "step": 4763 }, { "epoch": 1.3930399181166837, "grad_norm": 1.8800757395074672, "learning_rate": 4.632101835628912e-06, "loss": 0.5250430703163147, "step": 4764 }, { "epoch": 1.393332358531949, "grad_norm": 1.5663601185417966, "learning_rate": 4.628023343422616e-06, "loss": 0.5409445762634277, "step": 4765 }, { "epoch": 1.3936247989472146, "grad_norm": 1.6199099812994435, "learning_rate": 4.6239461068832056e-06, "loss": 0.4676284193992615, "step": 4766 }, { "epoch": 1.39391723936248, "grad_norm": 1.6644750420264167, "learning_rate": 4.6198701269637014e-06, "loss": 0.6019079089164734, "step": 4767 }, { "epoch": 1.3942096797777452, "grad_norm": 1.6721679687151758, "learning_rate": 4.615795404616844e-06, "loss": 0.5434615612030029, "step": 4768 }, { "epoch": 1.3945021201930108, "grad_norm": 1.8615818009836036, "learning_rate": 4.611721940795074e-06, "loss": 0.5817157030105591, "step": 4769 }, { "epoch": 1.3947945606082761, "grad_norm": 1.7318982025014367, "learning_rate": 4.607649736450539e-06, "loss": 0.5601100921630859, "step": 4770 }, { "epoch": 1.3950870010235414, "grad_norm": 1.8105361405271991, "learning_rate": 4.6035787925350915e-06, "loss": 0.5955039262771606, "step": 4771 }, { "epoch": 1.3953794414388068, "grad_norm": 1.735716832820506, "learning_rate": 4.5995091100002905e-06, "loss": 0.47491732239723206, "step": 4772 }, { "epoch": 1.3956718818540723, "grad_norm": 1.7916635810918338, "learning_rate": 4.595440689797402e-06, "loss": 0.5451281070709229, "step": 4773 }, { "epoch": 1.3959643222693376, "grad_norm": 1.5652511418689858, "learning_rate": 4.591373532877389e-06, "loss": 0.3973035514354706, "step": 4774 }, { "epoch": 1.396256762684603, "grad_norm": 1.6712606601404056, "learning_rate": 4.587307640190929e-06, "loss": 0.604694128036499, "step": 4775 }, { "epoch": 1.3965492030998683, "grad_norm": 1.3684363761943823, "learning_rate": 4.583243012688397e-06, "loss": 0.4120032489299774, "step": 4776 }, { "epoch": 1.3968416435151338, "grad_norm": 1.5200379644064634, "learning_rate": 4.579179651319878e-06, "loss": 0.4864089787006378, "step": 4777 }, { "epoch": 1.3971340839303992, "grad_norm": 1.7660999886821023, "learning_rate": 4.57511755703516e-06, "loss": 0.5774982571601868, "step": 4778 }, { "epoch": 1.3974265243456645, "grad_norm": 1.7243096372475708, "learning_rate": 4.571056730783725e-06, "loss": 0.48220688104629517, "step": 4779 }, { "epoch": 1.39771896476093, "grad_norm": 1.4235878512993427, "learning_rate": 4.566997173514771e-06, "loss": 0.4636304974555969, "step": 4780 }, { "epoch": 1.3980114051761954, "grad_norm": 1.3469561341500977, "learning_rate": 4.562938886177194e-06, "loss": 0.500522792339325, "step": 4781 }, { "epoch": 1.3983038455914607, "grad_norm": 1.8391525606302594, "learning_rate": 4.558881869719595e-06, "loss": 0.5322657823562622, "step": 4782 }, { "epoch": 1.3985962860067263, "grad_norm": 1.8673725266705359, "learning_rate": 4.554826125090276e-06, "loss": 0.5013759136199951, "step": 4783 }, { "epoch": 1.3988887264219916, "grad_norm": 1.5888002392216285, "learning_rate": 4.550771653237242e-06, "loss": 0.4261836111545563, "step": 4784 }, { "epoch": 1.399181166837257, "grad_norm": 1.6811392186782483, "learning_rate": 4.546718455108205e-06, "loss": 0.6181522607803345, "step": 4785 }, { "epoch": 1.3994736072525222, "grad_norm": 1.7420663714537028, "learning_rate": 4.54266653165057e-06, "loss": 0.6267478466033936, "step": 4786 }, { "epoch": 1.3997660476677876, "grad_norm": 1.841391700351839, "learning_rate": 4.5386158838114535e-06, "loss": 0.5382452607154846, "step": 4787 }, { "epoch": 1.400058488083053, "grad_norm": 1.5361116059310378, "learning_rate": 4.534566512537668e-06, "loss": 0.5973625183105469, "step": 4788 }, { "epoch": 1.4003509284983184, "grad_norm": 1.7115299901221885, "learning_rate": 4.530518418775734e-06, "loss": 0.57401442527771, "step": 4789 }, { "epoch": 1.4006433689135838, "grad_norm": 1.7539136213830773, "learning_rate": 4.52647160347186e-06, "loss": 0.5712965726852417, "step": 4790 }, { "epoch": 1.4009358093288493, "grad_norm": 1.7324506482257287, "learning_rate": 4.52242606757197e-06, "loss": 0.5678268671035767, "step": 4791 }, { "epoch": 1.4012282497441146, "grad_norm": 1.8696367540913243, "learning_rate": 4.518381812021682e-06, "loss": 0.4798399806022644, "step": 4792 }, { "epoch": 1.40152069015938, "grad_norm": 1.570253187142898, "learning_rate": 4.514338837766317e-06, "loss": 0.48918360471725464, "step": 4793 }, { "epoch": 1.4018131305746455, "grad_norm": 1.4711408699123494, "learning_rate": 4.510297145750894e-06, "loss": 0.47836846113204956, "step": 4794 }, { "epoch": 1.4021055709899108, "grad_norm": 1.6409652265079098, "learning_rate": 4.506256736920136e-06, "loss": 0.4956067204475403, "step": 4795 }, { "epoch": 1.4023980114051762, "grad_norm": 1.6571409914414528, "learning_rate": 4.502217612218463e-06, "loss": 0.39146924018859863, "step": 4796 }, { "epoch": 1.4026904518204417, "grad_norm": 1.6190957574837974, "learning_rate": 4.498179772589998e-06, "loss": 0.46657800674438477, "step": 4797 }, { "epoch": 1.402982892235707, "grad_norm": 1.5760103505209448, "learning_rate": 4.4941432189785574e-06, "loss": 0.4949738383293152, "step": 4798 }, { "epoch": 1.4032753326509724, "grad_norm": 1.882895838026707, "learning_rate": 4.490107952327663e-06, "loss": 0.5256912708282471, "step": 4799 }, { "epoch": 1.4035677730662377, "grad_norm": 1.7128737744359326, "learning_rate": 4.486073973580539e-06, "loss": 0.38139551877975464, "step": 4800 }, { "epoch": 1.403860213481503, "grad_norm": 1.8140605273544137, "learning_rate": 4.482041283680095e-06, "loss": 0.5014597177505493, "step": 4801 }, { "epoch": 1.4041526538967686, "grad_norm": 1.8595922924331247, "learning_rate": 4.478009883568951e-06, "loss": 0.5497276186943054, "step": 4802 }, { "epoch": 1.404445094312034, "grad_norm": 2.0532585085438524, "learning_rate": 4.473979774189422e-06, "loss": 0.6098340749740601, "step": 4803 }, { "epoch": 1.4047375347272992, "grad_norm": 1.7416135071315817, "learning_rate": 4.469950956483522e-06, "loss": 0.40206801891326904, "step": 4804 }, { "epoch": 1.4050299751425648, "grad_norm": 1.5567497019384768, "learning_rate": 4.465923431392962e-06, "loss": 0.5362050533294678, "step": 4805 }, { "epoch": 1.40532241555783, "grad_norm": 1.6896555289921489, "learning_rate": 4.461897199859153e-06, "loss": 0.5688962936401367, "step": 4806 }, { "epoch": 1.4056148559730954, "grad_norm": 2.0519988466480723, "learning_rate": 4.457872262823202e-06, "loss": 0.5270779132843018, "step": 4807 }, { "epoch": 1.405907296388361, "grad_norm": 1.9613398978608871, "learning_rate": 4.453848621225913e-06, "loss": 0.5656974911689758, "step": 4808 }, { "epoch": 1.4061997368036263, "grad_norm": 1.517853308784437, "learning_rate": 4.449826276007786e-06, "loss": 0.44072896242141724, "step": 4809 }, { "epoch": 1.4064921772188916, "grad_norm": 1.642033723460973, "learning_rate": 4.445805228109022e-06, "loss": 0.5851765871047974, "step": 4810 }, { "epoch": 1.406784617634157, "grad_norm": 1.71031586004946, "learning_rate": 4.441785478469519e-06, "loss": 0.6174030303955078, "step": 4811 }, { "epoch": 1.4070770580494225, "grad_norm": 1.5609662983326855, "learning_rate": 4.437767028028863e-06, "loss": 0.542346715927124, "step": 4812 }, { "epoch": 1.4073694984646878, "grad_norm": 1.855237193625426, "learning_rate": 4.433749877726345e-06, "loss": 0.4964073598384857, "step": 4813 }, { "epoch": 1.4076619388799532, "grad_norm": 1.798693836443108, "learning_rate": 4.429734028500951e-06, "loss": 0.5309566259384155, "step": 4814 }, { "epoch": 1.4079543792952185, "grad_norm": 1.7569401782763947, "learning_rate": 4.425719481291359e-06, "loss": 0.5799233913421631, "step": 4815 }, { "epoch": 1.408246819710484, "grad_norm": 1.6640340310451727, "learning_rate": 4.4217062370359456e-06, "loss": 0.37344229221343994, "step": 4816 }, { "epoch": 1.4085392601257494, "grad_norm": 1.9633336456325348, "learning_rate": 4.417694296672783e-06, "loss": 0.5752555727958679, "step": 4817 }, { "epoch": 1.4088317005410147, "grad_norm": 1.8625982582112681, "learning_rate": 4.413683661139638e-06, "loss": 0.61701500415802, "step": 4818 }, { "epoch": 1.4091241409562802, "grad_norm": 1.6641617857653193, "learning_rate": 4.409674331373972e-06, "loss": 0.4163259267807007, "step": 4819 }, { "epoch": 1.4094165813715456, "grad_norm": 1.4025408210631873, "learning_rate": 4.40566630831294e-06, "loss": 0.46583253145217896, "step": 4820 }, { "epoch": 1.409709021786811, "grad_norm": 1.739036857290848, "learning_rate": 4.401659592893396e-06, "loss": 0.5230617523193359, "step": 4821 }, { "epoch": 1.4100014622020764, "grad_norm": 1.7435910389535008, "learning_rate": 4.397654186051887e-06, "loss": 0.6351375579833984, "step": 4822 }, { "epoch": 1.4102939026173418, "grad_norm": 1.6526547277716674, "learning_rate": 4.3936500887246445e-06, "loss": 0.5895766615867615, "step": 4823 }, { "epoch": 1.410586343032607, "grad_norm": 1.7357556256264726, "learning_rate": 4.389647301847607e-06, "loss": 0.49772539734840393, "step": 4824 }, { "epoch": 1.4108787834478724, "grad_norm": 1.6867136550948763, "learning_rate": 4.385645826356402e-06, "loss": 0.593197226524353, "step": 4825 }, { "epoch": 1.4111712238631378, "grad_norm": 1.497358571958903, "learning_rate": 4.381645663186348e-06, "loss": 0.4971385598182678, "step": 4826 }, { "epoch": 1.4114636642784033, "grad_norm": 1.772016135609381, "learning_rate": 4.3776468132724605e-06, "loss": 0.5452263951301575, "step": 4827 }, { "epoch": 1.4117561046936686, "grad_norm": 1.9896815505139207, "learning_rate": 4.373649277549446e-06, "loss": 0.6085976362228394, "step": 4828 }, { "epoch": 1.412048545108934, "grad_norm": 1.4346670326917912, "learning_rate": 4.369653056951705e-06, "loss": 0.5594700574874878, "step": 4829 }, { "epoch": 1.4123409855241995, "grad_norm": 1.6570477364640872, "learning_rate": 4.365658152413328e-06, "loss": 0.5099719166755676, "step": 4830 }, { "epoch": 1.4126334259394648, "grad_norm": 1.557110878077197, "learning_rate": 4.3616645648681e-06, "loss": 0.5683532953262329, "step": 4831 }, { "epoch": 1.4129258663547302, "grad_norm": 1.9307182018155977, "learning_rate": 4.3576722952495e-06, "loss": 0.5311406850814819, "step": 4832 }, { "epoch": 1.4132183067699957, "grad_norm": 1.6214149336480879, "learning_rate": 4.353681344490693e-06, "loss": 0.5299100875854492, "step": 4833 }, { "epoch": 1.413510747185261, "grad_norm": 1.6883675181677418, "learning_rate": 4.349691713524546e-06, "loss": 0.5531362891197205, "step": 4834 }, { "epoch": 1.4138031876005264, "grad_norm": 1.7469666557337236, "learning_rate": 4.345703403283603e-06, "loss": 0.5315259099006653, "step": 4835 }, { "epoch": 1.414095628015792, "grad_norm": 2.0019997249517645, "learning_rate": 4.341716414700112e-06, "loss": 0.583083987236023, "step": 4836 }, { "epoch": 1.4143880684310572, "grad_norm": 1.680867008867613, "learning_rate": 4.337730748706005e-06, "loss": 0.5273857116699219, "step": 4837 }, { "epoch": 1.4146805088463226, "grad_norm": 1.6688598484210682, "learning_rate": 4.333746406232908e-06, "loss": 0.4903373718261719, "step": 4838 }, { "epoch": 1.414972949261588, "grad_norm": 1.4926269811940354, "learning_rate": 4.329763388212134e-06, "loss": 0.5807479619979858, "step": 4839 }, { "epoch": 1.4152653896768532, "grad_norm": 1.6552276273685866, "learning_rate": 4.325781695574695e-06, "loss": 0.5613743662834167, "step": 4840 }, { "epoch": 1.4155578300921188, "grad_norm": 1.6028157865716284, "learning_rate": 4.321801329251286e-06, "loss": 0.5801016092300415, "step": 4841 }, { "epoch": 1.415850270507384, "grad_norm": 1.6267997915866552, "learning_rate": 4.3178222901722956e-06, "loss": 0.6412584781646729, "step": 4842 }, { "epoch": 1.4161427109226494, "grad_norm": 1.7251596479619187, "learning_rate": 4.313844579267793e-06, "loss": 0.5687737464904785, "step": 4843 }, { "epoch": 1.416435151337915, "grad_norm": 1.6343964176323358, "learning_rate": 4.309868197467548e-06, "loss": 0.5668497085571289, "step": 4844 }, { "epoch": 1.4167275917531803, "grad_norm": 1.811368112437045, "learning_rate": 4.305893145701015e-06, "loss": 0.5814717411994934, "step": 4845 }, { "epoch": 1.4170200321684456, "grad_norm": 1.9246707148702022, "learning_rate": 4.301919424897339e-06, "loss": 0.5974467992782593, "step": 4846 }, { "epoch": 1.4173124725837112, "grad_norm": 1.5643373795961777, "learning_rate": 4.297947035985351e-06, "loss": 0.48333030939102173, "step": 4847 }, { "epoch": 1.4176049129989765, "grad_norm": 1.7102352976297683, "learning_rate": 4.293975979893576e-06, "loss": 0.5851039886474609, "step": 4848 }, { "epoch": 1.4178973534142418, "grad_norm": 1.4778659468844006, "learning_rate": 4.290006257550221e-06, "loss": 0.5510480403900146, "step": 4849 }, { "epoch": 1.4181897938295072, "grad_norm": 1.6670833236483533, "learning_rate": 4.286037869883187e-06, "loss": 0.6053529977798462, "step": 4850 }, { "epoch": 1.4184822342447727, "grad_norm": 1.5745047113214952, "learning_rate": 4.282070817820059e-06, "loss": 0.471671462059021, "step": 4851 }, { "epoch": 1.418774674660038, "grad_norm": 1.6834167266574704, "learning_rate": 4.278105102288113e-06, "loss": 0.4864043593406677, "step": 4852 }, { "epoch": 1.4190671150753034, "grad_norm": 1.7275065448049989, "learning_rate": 4.274140724214311e-06, "loss": 0.6283255815505981, "step": 4853 }, { "epoch": 1.4193595554905687, "grad_norm": 1.7634272907173199, "learning_rate": 4.270177684525299e-06, "loss": 0.4990651607513428, "step": 4854 }, { "epoch": 1.4196519959058342, "grad_norm": 1.6718595783894241, "learning_rate": 4.2662159841474145e-06, "loss": 0.6053239703178406, "step": 4855 }, { "epoch": 1.4199444363210996, "grad_norm": 1.541217587678611, "learning_rate": 4.262255624006683e-06, "loss": 0.45790988206863403, "step": 4856 }, { "epoch": 1.420236876736365, "grad_norm": 1.5408074963828202, "learning_rate": 4.2582966050288125e-06, "loss": 0.49944519996643066, "step": 4857 }, { "epoch": 1.4205293171516304, "grad_norm": 1.7145691587216874, "learning_rate": 4.2543389281392e-06, "loss": 0.5365482568740845, "step": 4858 }, { "epoch": 1.4208217575668958, "grad_norm": 1.709871732141181, "learning_rate": 4.2503825942629285e-06, "loss": 0.7763599157333374, "step": 4859 }, { "epoch": 1.421114197982161, "grad_norm": 1.6376653647841246, "learning_rate": 4.246427604324768e-06, "loss": 0.6125203371047974, "step": 4860 }, { "epoch": 1.4214066383974266, "grad_norm": 1.8190946758346407, "learning_rate": 4.242473959249172e-06, "loss": 0.6634939312934875, "step": 4861 }, { "epoch": 1.421699078812692, "grad_norm": 1.607723662080485, "learning_rate": 4.238521659960283e-06, "loss": 0.5117735862731934, "step": 4862 }, { "epoch": 1.4219915192279573, "grad_norm": 1.6860730867984624, "learning_rate": 4.234570707381925e-06, "loss": 0.5700962543487549, "step": 4863 }, { "epoch": 1.4222839596432226, "grad_norm": 1.5634193566609638, "learning_rate": 4.23062110243761e-06, "loss": 0.5443791151046753, "step": 4864 }, { "epoch": 1.422576400058488, "grad_norm": 1.4504951290152908, "learning_rate": 4.226672846050538e-06, "loss": 0.5474614500999451, "step": 4865 }, { "epoch": 1.4228688404737535, "grad_norm": 1.9578528314343135, "learning_rate": 4.222725939143582e-06, "loss": 0.5938940048217773, "step": 4866 }, { "epoch": 1.4231612808890188, "grad_norm": 1.720980371359197, "learning_rate": 4.21878038263931e-06, "loss": 0.5010229349136353, "step": 4867 }, { "epoch": 1.4234537213042842, "grad_norm": 1.8142108741121714, "learning_rate": 4.214836177459975e-06, "loss": 0.5186876058578491, "step": 4868 }, { "epoch": 1.4237461617195497, "grad_norm": 1.6608706852165134, "learning_rate": 4.210893324527507e-06, "loss": 0.5998060703277588, "step": 4869 }, { "epoch": 1.424038602134815, "grad_norm": 1.9807145100005583, "learning_rate": 4.206951824763528e-06, "loss": 0.5127147436141968, "step": 4870 }, { "epoch": 1.4243310425500804, "grad_norm": 1.4194980170815183, "learning_rate": 4.203011679089336e-06, "loss": 0.5134439468383789, "step": 4871 }, { "epoch": 1.424623482965346, "grad_norm": 1.728900083762804, "learning_rate": 4.199072888425919e-06, "loss": 0.6244111657142639, "step": 4872 }, { "epoch": 1.4249159233806112, "grad_norm": 1.6442803911967188, "learning_rate": 4.195135453693944e-06, "loss": 0.4431127905845642, "step": 4873 }, { "epoch": 1.4252083637958766, "grad_norm": 1.7030697753848931, "learning_rate": 4.191199375813761e-06, "loss": 0.6479794979095459, "step": 4874 }, { "epoch": 1.4255008042111421, "grad_norm": 2.04011086867295, "learning_rate": 4.187264655705407e-06, "loss": 0.6386070847511292, "step": 4875 }, { "epoch": 1.4257932446264074, "grad_norm": 1.6039579455905961, "learning_rate": 4.183331294288603e-06, "loss": 0.5201597213745117, "step": 4876 }, { "epoch": 1.4260856850416728, "grad_norm": 1.7232164566002766, "learning_rate": 4.179399292482737e-06, "loss": 0.46355581283569336, "step": 4877 }, { "epoch": 1.426378125456938, "grad_norm": 2.2615584884797975, "learning_rate": 4.175468651206898e-06, "loss": 0.5360985398292542, "step": 4878 }, { "epoch": 1.4266705658722034, "grad_norm": 1.552480099700309, "learning_rate": 4.171539371379847e-06, "loss": 0.5545670390129089, "step": 4879 }, { "epoch": 1.426963006287469, "grad_norm": 1.4276797255790008, "learning_rate": 4.167611453920031e-06, "loss": 0.445978581905365, "step": 4880 }, { "epoch": 1.4272554467027343, "grad_norm": 1.7199888948749738, "learning_rate": 4.163684899745576e-06, "loss": 0.5242947340011597, "step": 4881 }, { "epoch": 1.4275478871179996, "grad_norm": 1.7383193525416518, "learning_rate": 4.15975970977429e-06, "loss": 0.5544728636741638, "step": 4882 }, { "epoch": 1.4278403275332652, "grad_norm": 2.073499174067984, "learning_rate": 4.1558358849236626e-06, "loss": 0.5400837063789368, "step": 4883 }, { "epoch": 1.4281327679485305, "grad_norm": 1.6385411261569034, "learning_rate": 4.151913426110864e-06, "loss": 0.5201395153999329, "step": 4884 }, { "epoch": 1.4284252083637958, "grad_norm": 1.7888379069815619, "learning_rate": 4.147992334252745e-06, "loss": 0.4414210319519043, "step": 4885 }, { "epoch": 1.4287176487790614, "grad_norm": 1.7818076981346203, "learning_rate": 4.144072610265838e-06, "loss": 0.6590272188186646, "step": 4886 }, { "epoch": 1.4290100891943267, "grad_norm": 1.4800084296243576, "learning_rate": 4.140154255066356e-06, "loss": 0.4734429717063904, "step": 4887 }, { "epoch": 1.429302529609592, "grad_norm": 1.5398179955798732, "learning_rate": 4.136237269570186e-06, "loss": 0.45204073190689087, "step": 4888 }, { "epoch": 1.4295949700248574, "grad_norm": 1.6199970278575915, "learning_rate": 4.132321654692901e-06, "loss": 0.6570174694061279, "step": 4889 }, { "epoch": 1.429887410440123, "grad_norm": 1.7926483421459931, "learning_rate": 4.128407411349754e-06, "loss": 0.5159077644348145, "step": 4890 }, { "epoch": 1.4301798508553882, "grad_norm": 1.603963849008659, "learning_rate": 4.124494540455674e-06, "loss": 0.5778994560241699, "step": 4891 }, { "epoch": 1.4304722912706536, "grad_norm": 1.4954754441376699, "learning_rate": 4.120583042925273e-06, "loss": 0.4740722179412842, "step": 4892 }, { "epoch": 1.430764731685919, "grad_norm": 1.4416066465695618, "learning_rate": 4.116672919672837e-06, "loss": 0.5561014413833618, "step": 4893 }, { "epoch": 1.4310571721011844, "grad_norm": 1.5040800316270475, "learning_rate": 4.112764171612335e-06, "loss": 0.4834856688976288, "step": 4894 }, { "epoch": 1.4313496125164498, "grad_norm": 1.691313354112802, "learning_rate": 4.108856799657412e-06, "loss": 0.5565547943115234, "step": 4895 }, { "epoch": 1.431642052931715, "grad_norm": 1.8883359305911547, "learning_rate": 4.104950804721395e-06, "loss": 0.5401065349578857, "step": 4896 }, { "epoch": 1.4319344933469806, "grad_norm": 1.3793655379788223, "learning_rate": 4.101046187717284e-06, "loss": 0.4792686700820923, "step": 4897 }, { "epoch": 1.432226933762246, "grad_norm": 1.5922549032476903, "learning_rate": 4.097142949557764e-06, "loss": 0.5255981683731079, "step": 4898 }, { "epoch": 1.4325193741775113, "grad_norm": 1.614736024187036, "learning_rate": 4.093241091155187e-06, "loss": 0.5535293817520142, "step": 4899 }, { "epoch": 1.4328118145927768, "grad_norm": 1.8976199736566215, "learning_rate": 4.089340613421589e-06, "loss": 0.5235373973846436, "step": 4900 }, { "epoch": 1.4331042550080422, "grad_norm": 1.8120415147677507, "learning_rate": 4.085441517268687e-06, "loss": 0.5538134574890137, "step": 4901 }, { "epoch": 1.4333966954233075, "grad_norm": 1.5442149105119904, "learning_rate": 4.081543803607869e-06, "loss": 0.5394395589828491, "step": 4902 }, { "epoch": 1.4336891358385728, "grad_norm": 1.6068663887611208, "learning_rate": 4.077647473350201e-06, "loss": 0.522742509841919, "step": 4903 }, { "epoch": 1.4339815762538382, "grad_norm": 1.6377229499845016, "learning_rate": 4.073752527406429e-06, "loss": 0.559830367565155, "step": 4904 }, { "epoch": 1.4342740166691037, "grad_norm": 1.7578675965544384, "learning_rate": 4.069858966686971e-06, "loss": 0.42535799741744995, "step": 4905 }, { "epoch": 1.434566457084369, "grad_norm": 1.7745987719575682, "learning_rate": 4.065966792101924e-06, "loss": 0.6075177192687988, "step": 4906 }, { "epoch": 1.4348588974996344, "grad_norm": 1.7444570198074862, "learning_rate": 4.06207600456106e-06, "loss": 0.5010570883750916, "step": 4907 }, { "epoch": 1.4351513379149, "grad_norm": 1.621587467371749, "learning_rate": 4.058186604973826e-06, "loss": 0.571307897567749, "step": 4908 }, { "epoch": 1.4354437783301652, "grad_norm": 1.643170818508206, "learning_rate": 4.0542985942493505e-06, "loss": 0.4918866455554962, "step": 4909 }, { "epoch": 1.4357362187454306, "grad_norm": 1.8933520643034856, "learning_rate": 4.050411973296425e-06, "loss": 0.6588176488876343, "step": 4910 }, { "epoch": 1.436028659160696, "grad_norm": 1.9180926902562168, "learning_rate": 4.046526743023526e-06, "loss": 0.7341527938842773, "step": 4911 }, { "epoch": 1.4363210995759614, "grad_norm": 1.7782521784505012, "learning_rate": 4.042642904338801e-06, "loss": 0.5233849287033081, "step": 4912 }, { "epoch": 1.4366135399912268, "grad_norm": 1.6182742405882007, "learning_rate": 4.038760458150079e-06, "loss": 0.5144373178482056, "step": 4913 }, { "epoch": 1.4369059804064923, "grad_norm": 1.55901993468911, "learning_rate": 4.034879405364853e-06, "loss": 0.4520954489707947, "step": 4914 }, { "epoch": 1.4371984208217576, "grad_norm": 1.6208081934978835, "learning_rate": 4.030999746890295e-06, "loss": 0.5632743835449219, "step": 4915 }, { "epoch": 1.437490861237023, "grad_norm": 1.5950473237167822, "learning_rate": 4.027121483633257e-06, "loss": 0.49681180715560913, "step": 4916 }, { "epoch": 1.4377833016522883, "grad_norm": 1.684721295445507, "learning_rate": 4.023244616500257e-06, "loss": 0.5182398557662964, "step": 4917 }, { "epoch": 1.4380757420675536, "grad_norm": 1.6044294787301046, "learning_rate": 4.019369146397493e-06, "loss": 0.5686701536178589, "step": 4918 }, { "epoch": 1.4383681824828192, "grad_norm": 1.682926006912085, "learning_rate": 4.015495074230823e-06, "loss": 0.5668520927429199, "step": 4919 }, { "epoch": 1.4386606228980845, "grad_norm": 1.556828511748538, "learning_rate": 4.011622400905794e-06, "loss": 0.4511116147041321, "step": 4920 }, { "epoch": 1.4389530633133498, "grad_norm": 1.677757503686359, "learning_rate": 4.007751127327618e-06, "loss": 0.4736326336860657, "step": 4921 }, { "epoch": 1.4392455037286154, "grad_norm": 1.68287466179835, "learning_rate": 4.003881254401183e-06, "loss": 0.5705248117446899, "step": 4922 }, { "epoch": 1.4395379441438807, "grad_norm": 1.4732853876066263, "learning_rate": 4.000012783031047e-06, "loss": 0.45527490973472595, "step": 4923 }, { "epoch": 1.439830384559146, "grad_norm": 1.5504418192282816, "learning_rate": 3.996145714121444e-06, "loss": 0.4926735758781433, "step": 4924 }, { "epoch": 1.4401228249744116, "grad_norm": 1.523617382800049, "learning_rate": 3.992280048576276e-06, "loss": 0.42700374126434326, "step": 4925 }, { "epoch": 1.440415265389677, "grad_norm": 1.6783270187790582, "learning_rate": 3.988415787299118e-06, "loss": 0.5833145976066589, "step": 4926 }, { "epoch": 1.4407077058049422, "grad_norm": 1.70461399954195, "learning_rate": 3.98455293119322e-06, "loss": 0.5290282964706421, "step": 4927 }, { "epoch": 1.4410001462202076, "grad_norm": 1.9146871710495363, "learning_rate": 3.9806914811614984e-06, "loss": 0.4489266872406006, "step": 4928 }, { "epoch": 1.441292586635473, "grad_norm": 1.9109717939773812, "learning_rate": 3.97683143810655e-06, "loss": 0.5630865097045898, "step": 4929 }, { "epoch": 1.4415850270507384, "grad_norm": 1.6030492821452516, "learning_rate": 3.972972802930627e-06, "loss": 0.5962105989456177, "step": 4930 }, { "epoch": 1.4418774674660038, "grad_norm": 1.789368844700869, "learning_rate": 3.9691155765356674e-06, "loss": 0.6059410572052002, "step": 4931 }, { "epoch": 1.442169907881269, "grad_norm": 1.6894490985884645, "learning_rate": 3.965259759823272e-06, "loss": 0.5476605296134949, "step": 4932 }, { "epoch": 1.4424623482965346, "grad_norm": 1.7561171676767597, "learning_rate": 3.961405353694716e-06, "loss": 0.70278000831604, "step": 4933 }, { "epoch": 1.4427547887118, "grad_norm": 1.6884311650773163, "learning_rate": 3.9575523590509445e-06, "loss": 0.5838963389396667, "step": 4934 }, { "epoch": 1.4430472291270653, "grad_norm": 1.536536052995308, "learning_rate": 3.95370077679257e-06, "loss": 0.508273720741272, "step": 4935 }, { "epoch": 1.4433396695423308, "grad_norm": 1.4692622152510404, "learning_rate": 3.949850607819876e-06, "loss": 0.5053583383560181, "step": 4936 }, { "epoch": 1.4436321099575962, "grad_norm": 1.5754477318406401, "learning_rate": 3.946001853032818e-06, "loss": 0.5729954242706299, "step": 4937 }, { "epoch": 1.4439245503728615, "grad_norm": 1.833619886253515, "learning_rate": 3.942154513331018e-06, "loss": 0.5261870622634888, "step": 4938 }, { "epoch": 1.444216990788127, "grad_norm": 1.3956467871190747, "learning_rate": 3.9383085896137675e-06, "loss": 0.34802311658859253, "step": 4939 }, { "epoch": 1.4445094312033924, "grad_norm": 1.8896307306874633, "learning_rate": 3.934464082780032e-06, "loss": 0.48302024602890015, "step": 4940 }, { "epoch": 1.4448018716186577, "grad_norm": 1.8507631130251807, "learning_rate": 3.930620993728434e-06, "loss": 0.6649061441421509, "step": 4941 }, { "epoch": 1.445094312033923, "grad_norm": 1.705526500334542, "learning_rate": 3.926779323357278e-06, "loss": 0.5945848822593689, "step": 4942 }, { "epoch": 1.4453867524491884, "grad_norm": 1.5476382055190478, "learning_rate": 3.922939072564528e-06, "loss": 0.4783032536506653, "step": 4943 }, { "epoch": 1.445679192864454, "grad_norm": 1.6453487782833462, "learning_rate": 3.919100242247821e-06, "loss": 0.4619516134262085, "step": 4944 }, { "epoch": 1.4459716332797192, "grad_norm": 1.5327149597771257, "learning_rate": 3.915262833304461e-06, "loss": 0.5652358531951904, "step": 4945 }, { "epoch": 1.4462640736949846, "grad_norm": 1.4734419470243802, "learning_rate": 3.911426846631416e-06, "loss": 0.4523610472679138, "step": 4946 }, { "epoch": 1.44655651411025, "grad_norm": 1.5670101583017915, "learning_rate": 3.9075922831253276e-06, "loss": 0.4914482831954956, "step": 4947 }, { "epoch": 1.4468489545255154, "grad_norm": 1.7113071980283088, "learning_rate": 3.9037591436825005e-06, "loss": 0.4060005247592926, "step": 4948 }, { "epoch": 1.4471413949407808, "grad_norm": 1.9320743237560347, "learning_rate": 3.899927429198908e-06, "loss": 0.49987125396728516, "step": 4949 }, { "epoch": 1.4474338353560463, "grad_norm": 2.0596677045202036, "learning_rate": 3.896097140570189e-06, "loss": 0.6205358505249023, "step": 4950 }, { "epoch": 1.4477262757713116, "grad_norm": 1.7670476784744638, "learning_rate": 3.892268278691651e-06, "loss": 0.5302955508232117, "step": 4951 }, { "epoch": 1.448018716186577, "grad_norm": 1.7962585212488547, "learning_rate": 3.888440844458272e-06, "loss": 0.5225962400436401, "step": 4952 }, { "epoch": 1.4483111566018425, "grad_norm": 1.8247561425410785, "learning_rate": 3.884614838764682e-06, "loss": 0.5030089616775513, "step": 4953 }, { "epoch": 1.4486035970171078, "grad_norm": 1.8999355010605985, "learning_rate": 3.880790262505192e-06, "loss": 0.6060030460357666, "step": 4954 }, { "epoch": 1.4488960374323732, "grad_norm": 1.8229751812699673, "learning_rate": 3.8769671165737725e-06, "loss": 0.5244846343994141, "step": 4955 }, { "epoch": 1.4491884778476385, "grad_norm": 1.4616444667042836, "learning_rate": 3.873145401864061e-06, "loss": 0.46979671716690063, "step": 4956 }, { "epoch": 1.4494809182629038, "grad_norm": 1.8452052569073554, "learning_rate": 3.8693251192693596e-06, "loss": 0.5201131105422974, "step": 4957 }, { "epoch": 1.4497733586781694, "grad_norm": 1.679443447217904, "learning_rate": 3.865506269682638e-06, "loss": 0.5124838352203369, "step": 4958 }, { "epoch": 1.4500657990934347, "grad_norm": 1.830132365627518, "learning_rate": 3.861688853996525e-06, "loss": 0.5613473653793335, "step": 4959 }, { "epoch": 1.4503582395087, "grad_norm": 1.5976816836472583, "learning_rate": 3.857872873103322e-06, "loss": 0.46196621656417847, "step": 4960 }, { "epoch": 1.4506506799239656, "grad_norm": 1.9393165963504067, "learning_rate": 3.8540583278949905e-06, "loss": 0.6427509784698486, "step": 4961 }, { "epoch": 1.450943120339231, "grad_norm": 1.7485862700938968, "learning_rate": 3.850245219263157e-06, "loss": 0.6306381821632385, "step": 4962 }, { "epoch": 1.4512355607544962, "grad_norm": 1.5645194602237047, "learning_rate": 3.846433548099114e-06, "loss": 0.46638673543930054, "step": 4963 }, { "epoch": 1.4515280011697618, "grad_norm": 1.5360842567610604, "learning_rate": 3.842623315293814e-06, "loss": 0.4950143098831177, "step": 4964 }, { "epoch": 1.451820441585027, "grad_norm": 1.5810107141405056, "learning_rate": 3.838814521737875e-06, "loss": 0.45698249340057373, "step": 4965 }, { "epoch": 1.4521128820002924, "grad_norm": 1.6457012436395508, "learning_rate": 3.8350071683215814e-06, "loss": 0.6068260669708252, "step": 4966 }, { "epoch": 1.4524053224155578, "grad_norm": 1.8188775401166803, "learning_rate": 3.831201255934879e-06, "loss": 0.5264104008674622, "step": 4967 }, { "epoch": 1.4526977628308233, "grad_norm": 1.6372667669239498, "learning_rate": 3.827396785467375e-06, "loss": 0.5198315978050232, "step": 4968 }, { "epoch": 1.4529902032460886, "grad_norm": 1.6294906688066837, "learning_rate": 3.823593757808342e-06, "loss": 0.504194438457489, "step": 4969 }, { "epoch": 1.453282643661354, "grad_norm": 1.6016674444230832, "learning_rate": 3.819792173846717e-06, "loss": 0.5018986463546753, "step": 4970 }, { "epoch": 1.4535750840766193, "grad_norm": 1.6893120935929504, "learning_rate": 3.8159920344710936e-06, "loss": 0.4847358465194702, "step": 4971 }, { "epoch": 1.4538675244918848, "grad_norm": 1.6703483014148515, "learning_rate": 3.812193340569733e-06, "loss": 0.547623872756958, "step": 4972 }, { "epoch": 1.4541599649071502, "grad_norm": 2.1389235560975615, "learning_rate": 3.8083960930305562e-06, "loss": 0.534354031085968, "step": 4973 }, { "epoch": 1.4544524053224155, "grad_norm": 1.788418032061747, "learning_rate": 3.8046002927411506e-06, "loss": 0.6123033165931702, "step": 4974 }, { "epoch": 1.454744845737681, "grad_norm": 1.6087574153138633, "learning_rate": 3.8008059405887553e-06, "loss": 0.5222622752189636, "step": 4975 }, { "epoch": 1.4550372861529464, "grad_norm": 1.684901707974216, "learning_rate": 3.7970130374602785e-06, "loss": 0.5568759441375732, "step": 4976 }, { "epoch": 1.4553297265682117, "grad_norm": 1.7459991230210548, "learning_rate": 3.7932215842422903e-06, "loss": 0.5458661317825317, "step": 4977 }, { "epoch": 1.4556221669834772, "grad_norm": 1.6216302867008319, "learning_rate": 3.789431581821019e-06, "loss": 0.48293566703796387, "step": 4978 }, { "epoch": 1.4559146073987426, "grad_norm": 1.893470262052562, "learning_rate": 3.7856430310823546e-06, "loss": 0.647431492805481, "step": 4979 }, { "epoch": 1.456207047814008, "grad_norm": 1.6735249045743477, "learning_rate": 3.7818559329118475e-06, "loss": 0.48039543628692627, "step": 4980 }, { "epoch": 1.4564994882292732, "grad_norm": 1.6704036620696165, "learning_rate": 3.7780702881947084e-06, "loss": 0.6705803871154785, "step": 4981 }, { "epoch": 1.4567919286445385, "grad_norm": 1.7404901320645014, "learning_rate": 3.7742860978158103e-06, "loss": 0.564405083656311, "step": 4982 }, { "epoch": 1.457084369059804, "grad_norm": 1.7081222209997355, "learning_rate": 3.7705033626596844e-06, "loss": 0.5208612084388733, "step": 4983 }, { "epoch": 1.4573768094750694, "grad_norm": 1.909829427679328, "learning_rate": 3.766722083610521e-06, "loss": 0.6230732202529907, "step": 4984 }, { "epoch": 1.4576692498903348, "grad_norm": 1.6601663066885601, "learning_rate": 3.7629422615521747e-06, "loss": 0.5741504430770874, "step": 4985 }, { "epoch": 1.4579616903056003, "grad_norm": 1.584208244849031, "learning_rate": 3.75916389736815e-06, "loss": 0.5321571826934814, "step": 4986 }, { "epoch": 1.4582541307208656, "grad_norm": 1.95685306597155, "learning_rate": 3.7553869919416186e-06, "loss": 0.6367009878158569, "step": 4987 }, { "epoch": 1.458546571136131, "grad_norm": 1.5904913997392975, "learning_rate": 3.75161154615541e-06, "loss": 0.5736235976219177, "step": 4988 }, { "epoch": 1.4588390115513965, "grad_norm": 2.0157501917439866, "learning_rate": 3.7478375608920127e-06, "loss": 0.5799358487129211, "step": 4989 }, { "epoch": 1.4591314519666618, "grad_norm": 1.7515991790236536, "learning_rate": 3.7440650370335675e-06, "loss": 0.6065561771392822, "step": 4990 }, { "epoch": 1.4594238923819272, "grad_norm": 1.4583944256149548, "learning_rate": 3.740293975461886e-06, "loss": 0.5182442665100098, "step": 4991 }, { "epoch": 1.4597163327971927, "grad_norm": 1.6877116508095484, "learning_rate": 3.736524377058429e-06, "loss": 0.5065605640411377, "step": 4992 }, { "epoch": 1.460008773212458, "grad_norm": 1.5024812411134352, "learning_rate": 3.7327562427043163e-06, "loss": 0.44326460361480713, "step": 4993 }, { "epoch": 1.4603012136277234, "grad_norm": 1.9166701258714811, "learning_rate": 3.7289895732803306e-06, "loss": 0.6192547082901001, "step": 4994 }, { "epoch": 1.4605936540429887, "grad_norm": 1.794387571688338, "learning_rate": 3.725224369666899e-06, "loss": 0.5487738847732544, "step": 4995 }, { "epoch": 1.460886094458254, "grad_norm": 1.922772286834415, "learning_rate": 3.7214606327441203e-06, "loss": 0.558982253074646, "step": 4996 }, { "epoch": 1.4611785348735196, "grad_norm": 1.770836311904495, "learning_rate": 3.717698363391744e-06, "loss": 0.5277853012084961, "step": 4997 }, { "epoch": 1.461470975288785, "grad_norm": 1.7748123557502546, "learning_rate": 3.7139375624891795e-06, "loss": 0.6561184525489807, "step": 4998 }, { "epoch": 1.4617634157040502, "grad_norm": 1.5647900159041126, "learning_rate": 3.710178230915489e-06, "loss": 0.46555888652801514, "step": 4999 }, { "epoch": 1.4620558561193158, "grad_norm": 1.7414970962586886, "learning_rate": 3.706420369549394e-06, "loss": 0.5808060765266418, "step": 5000 }, { "epoch": 1.462348296534581, "grad_norm": 1.442227314234909, "learning_rate": 3.7026639792692722e-06, "loss": 0.5407893061637878, "step": 5001 }, { "epoch": 1.4626407369498464, "grad_norm": 2.580423891920115, "learning_rate": 3.6989090609531574e-06, "loss": 0.538393497467041, "step": 5002 }, { "epoch": 1.462933177365112, "grad_norm": 1.8751864874321293, "learning_rate": 3.6951556154787373e-06, "loss": 0.530704140663147, "step": 5003 }, { "epoch": 1.4632256177803773, "grad_norm": 1.4470439364888814, "learning_rate": 3.691403643723359e-06, "loss": 0.43352627754211426, "step": 5004 }, { "epoch": 1.4635180581956426, "grad_norm": 1.6573279039642985, "learning_rate": 3.687653146564025e-06, "loss": 0.6047205924987793, "step": 5005 }, { "epoch": 1.463810498610908, "grad_norm": 1.6556697002732312, "learning_rate": 3.6839041248773857e-06, "loss": 0.44708865880966187, "step": 5006 }, { "epoch": 1.4641029390261735, "grad_norm": 1.6445747944839355, "learning_rate": 3.680156579539753e-06, "loss": 0.5653451681137085, "step": 5007 }, { "epoch": 1.4643953794414388, "grad_norm": 1.750839565103172, "learning_rate": 3.6764105114270966e-06, "loss": 0.49293750524520874, "step": 5008 }, { "epoch": 1.4646878198567042, "grad_norm": 1.7691390827672615, "learning_rate": 3.672665921415034e-06, "loss": 0.5761851072311401, "step": 5009 }, { "epoch": 1.4649802602719695, "grad_norm": 1.7025752756263197, "learning_rate": 3.668922810378841e-06, "loss": 0.5188437700271606, "step": 5010 }, { "epoch": 1.465272700687235, "grad_norm": 1.7765263620108804, "learning_rate": 3.6651811791934476e-06, "loss": 0.5159400701522827, "step": 5011 }, { "epoch": 1.4655651411025004, "grad_norm": 1.4463295265937102, "learning_rate": 3.6614410287334377e-06, "loss": 0.478866845369339, "step": 5012 }, { "epoch": 1.4658575815177657, "grad_norm": 1.6006806590634375, "learning_rate": 3.6577023598730486e-06, "loss": 0.5509926080703735, "step": 5013 }, { "epoch": 1.4661500219330312, "grad_norm": 1.5613591503777215, "learning_rate": 3.6539651734861705e-06, "loss": 0.4872981309890747, "step": 5014 }, { "epoch": 1.4664424623482966, "grad_norm": 1.4569843282992687, "learning_rate": 3.6502294704463493e-06, "loss": 0.47478264570236206, "step": 5015 }, { "epoch": 1.4667349027635619, "grad_norm": 1.765955621655722, "learning_rate": 3.646495251626785e-06, "loss": 0.5140335559844971, "step": 5016 }, { "epoch": 1.4670273431788274, "grad_norm": 1.5785594027919339, "learning_rate": 3.6427625179003223e-06, "loss": 0.41033172607421875, "step": 5017 }, { "epoch": 1.4673197835940928, "grad_norm": 1.7731644033346952, "learning_rate": 3.639031270139468e-06, "loss": 0.4290558099746704, "step": 5018 }, { "epoch": 1.467612224009358, "grad_norm": 1.8964888989060893, "learning_rate": 3.635301509216379e-06, "loss": 0.5903435349464417, "step": 5019 }, { "epoch": 1.4679046644246234, "grad_norm": 1.7302589846174075, "learning_rate": 3.6315732360028655e-06, "loss": 0.6410748958587646, "step": 5020 }, { "epoch": 1.4681971048398887, "grad_norm": 1.584781169707585, "learning_rate": 3.6278464513703858e-06, "loss": 0.5499910712242126, "step": 5021 }, { "epoch": 1.4684895452551543, "grad_norm": 1.4876234400926511, "learning_rate": 3.624121156190056e-06, "loss": 0.4980154037475586, "step": 5022 }, { "epoch": 1.4687819856704196, "grad_norm": 1.7622618315552074, "learning_rate": 3.6203973513326395e-06, "loss": 0.5910995006561279, "step": 5023 }, { "epoch": 1.469074426085685, "grad_norm": 1.837302229581672, "learning_rate": 3.6166750376685534e-06, "loss": 0.6003058552742004, "step": 5024 }, { "epoch": 1.4693668665009505, "grad_norm": 2.0086634437416215, "learning_rate": 3.6129542160678655e-06, "loss": 0.5655561685562134, "step": 5025 }, { "epoch": 1.4696593069162158, "grad_norm": 1.6720399704395428, "learning_rate": 3.609234887400297e-06, "loss": 0.713152289390564, "step": 5026 }, { "epoch": 1.4699517473314812, "grad_norm": 1.3619130802184511, "learning_rate": 3.605517052535219e-06, "loss": 0.41018784046173096, "step": 5027 }, { "epoch": 1.4702441877467467, "grad_norm": 1.7429761856148576, "learning_rate": 3.6018007123416486e-06, "loss": 0.5852759480476379, "step": 5028 }, { "epoch": 1.470536628162012, "grad_norm": 1.6763203292398523, "learning_rate": 3.598085867688259e-06, "loss": 0.5942279696464539, "step": 5029 }, { "epoch": 1.4708290685772774, "grad_norm": 1.5957062749275768, "learning_rate": 3.594372519443374e-06, "loss": 0.6265639662742615, "step": 5030 }, { "epoch": 1.471121508992543, "grad_norm": 1.6944518172910965, "learning_rate": 3.5906606684749668e-06, "loss": 0.4539163112640381, "step": 5031 }, { "epoch": 1.4714139494078082, "grad_norm": 1.8810670575321342, "learning_rate": 3.586950315650658e-06, "loss": 0.5682815909385681, "step": 5032 }, { "epoch": 1.4717063898230736, "grad_norm": 1.5382985580447415, "learning_rate": 3.583241461837721e-06, "loss": 0.5188582539558411, "step": 5033 }, { "epoch": 1.4719988302383389, "grad_norm": 1.923705094705072, "learning_rate": 3.5795341079030777e-06, "loss": 0.501958966255188, "step": 5034 }, { "epoch": 1.4722912706536042, "grad_norm": 1.769758245215022, "learning_rate": 3.5758282547132995e-06, "loss": 0.5748735666275024, "step": 5035 }, { "epoch": 1.4725837110688698, "grad_norm": 1.720811530645175, "learning_rate": 3.5721239031346067e-06, "loss": 0.5796875357627869, "step": 5036 }, { "epoch": 1.472876151484135, "grad_norm": 1.7760443740240528, "learning_rate": 3.56842105403287e-06, "loss": 0.457103431224823, "step": 5037 }, { "epoch": 1.4731685918994004, "grad_norm": 1.607843165834991, "learning_rate": 3.564719708273607e-06, "loss": 0.5300487875938416, "step": 5038 }, { "epoch": 1.473461032314666, "grad_norm": 1.7877129065541937, "learning_rate": 3.5610198667219886e-06, "loss": 0.48143619298934937, "step": 5039 }, { "epoch": 1.4737534727299313, "grad_norm": 1.9171325817627416, "learning_rate": 3.557321530242824e-06, "loss": 0.5523685216903687, "step": 5040 }, { "epoch": 1.4740459131451966, "grad_norm": 1.7367077785146405, "learning_rate": 3.5536246997005785e-06, "loss": 0.5820931196212769, "step": 5041 }, { "epoch": 1.4743383535604622, "grad_norm": 1.6717570524697325, "learning_rate": 3.5499293759593656e-06, "loss": 0.6287394762039185, "step": 5042 }, { "epoch": 1.4746307939757275, "grad_norm": 1.737914835396703, "learning_rate": 3.5462355598829433e-06, "loss": 0.4621254801750183, "step": 5043 }, { "epoch": 1.4749232343909928, "grad_norm": 1.687652415457897, "learning_rate": 3.5425432523347205e-06, "loss": 0.5571160316467285, "step": 5044 }, { "epoch": 1.4752156748062581, "grad_norm": 1.716802557057107, "learning_rate": 3.5388524541777492e-06, "loss": 0.4135715365409851, "step": 5045 }, { "epoch": 1.4755081152215237, "grad_norm": 1.868527213017395, "learning_rate": 3.535163166274733e-06, "loss": 0.524153470993042, "step": 5046 }, { "epoch": 1.475800555636789, "grad_norm": 1.9441558365554423, "learning_rate": 3.5314753894880205e-06, "loss": 0.6330267786979675, "step": 5047 }, { "epoch": 1.4760929960520544, "grad_norm": 1.7270524835767156, "learning_rate": 3.527789124679605e-06, "loss": 0.46210330724716187, "step": 5048 }, { "epoch": 1.4763854364673197, "grad_norm": 1.8799684878196978, "learning_rate": 3.524104372711131e-06, "loss": 0.49293309450149536, "step": 5049 }, { "epoch": 1.4766778768825852, "grad_norm": 1.7601042593478657, "learning_rate": 3.520421134443889e-06, "loss": 0.6196815967559814, "step": 5050 }, { "epoch": 1.4769703172978506, "grad_norm": 1.568738566408146, "learning_rate": 3.5167394107388064e-06, "loss": 0.42622530460357666, "step": 5051 }, { "epoch": 1.4772627577131159, "grad_norm": 1.6087834768838942, "learning_rate": 3.513059202456468e-06, "loss": 0.4475107491016388, "step": 5052 }, { "epoch": 1.4775551981283814, "grad_norm": 1.549049360877832, "learning_rate": 3.5093805104571e-06, "loss": 0.4295683205127716, "step": 5053 }, { "epoch": 1.4778476385436468, "grad_norm": 1.512499491264911, "learning_rate": 3.505703335600573e-06, "loss": 0.5331642627716064, "step": 5054 }, { "epoch": 1.478140078958912, "grad_norm": 1.7125050045051866, "learning_rate": 3.5020276787464058e-06, "loss": 0.5615599155426025, "step": 5055 }, { "epoch": 1.4784325193741776, "grad_norm": 1.470462641632426, "learning_rate": 3.4983535407537618e-06, "loss": 0.5611366033554077, "step": 5056 }, { "epoch": 1.478724959789443, "grad_norm": 2.0861134690908325, "learning_rate": 3.494680922481445e-06, "loss": 0.5891577005386353, "step": 5057 }, { "epoch": 1.4790174002047083, "grad_norm": 1.981139638659905, "learning_rate": 3.491009824787911e-06, "loss": 0.5583761930465698, "step": 5058 }, { "epoch": 1.4793098406199736, "grad_norm": 1.5020288470897978, "learning_rate": 3.4873402485312548e-06, "loss": 0.5001339912414551, "step": 5059 }, { "epoch": 1.479602281035239, "grad_norm": 1.445341864944132, "learning_rate": 3.4836721945692175e-06, "loss": 0.5050641894340515, "step": 5060 }, { "epoch": 1.4798947214505045, "grad_norm": 1.5825314066620513, "learning_rate": 3.4800056637591885e-06, "loss": 0.5377815365791321, "step": 5061 }, { "epoch": 1.4801871618657698, "grad_norm": 1.6490614330323619, "learning_rate": 3.4763406569581892e-06, "loss": 0.5517662763595581, "step": 5062 }, { "epoch": 1.4804796022810351, "grad_norm": 1.7535356829599726, "learning_rate": 3.4726771750228984e-06, "loss": 0.5908320546150208, "step": 5063 }, { "epoch": 1.4807720426963007, "grad_norm": 1.640782634903257, "learning_rate": 3.4690152188096293e-06, "loss": 0.5169299840927124, "step": 5064 }, { "epoch": 1.481064483111566, "grad_norm": 1.5566091974805318, "learning_rate": 3.4653547891743457e-06, "loss": 0.6198064088821411, "step": 5065 }, { "epoch": 1.4813569235268313, "grad_norm": 1.7822104060368598, "learning_rate": 3.4616958869726436e-06, "loss": 0.4971558153629303, "step": 5066 }, { "epoch": 1.481649363942097, "grad_norm": 1.8117473020924466, "learning_rate": 3.4580385130597794e-06, "loss": 0.556640088558197, "step": 5067 }, { "epoch": 1.4819418043573622, "grad_norm": 1.7297037385384992, "learning_rate": 3.4543826682906358e-06, "loss": 0.5336956977844238, "step": 5068 }, { "epoch": 1.4822342447726276, "grad_norm": 1.8723627634024749, "learning_rate": 3.4507283535197454e-06, "loss": 0.5185145735740662, "step": 5069 }, { "epoch": 1.482526685187893, "grad_norm": 1.5962927751585108, "learning_rate": 3.447075569601287e-06, "loss": 0.5460748672485352, "step": 5070 }, { "epoch": 1.4828191256031584, "grad_norm": 1.7486536420516579, "learning_rate": 3.4434243173890667e-06, "loss": 0.5860699415206909, "step": 5071 }, { "epoch": 1.4831115660184238, "grad_norm": 1.5377337582646984, "learning_rate": 3.4397745977365482e-06, "loss": 0.5818450450897217, "step": 5072 }, { "epoch": 1.483404006433689, "grad_norm": 1.6591511763241749, "learning_rate": 3.4361264114968316e-06, "loss": 0.4205876588821411, "step": 5073 }, { "epoch": 1.4836964468489544, "grad_norm": 1.6097740909701606, "learning_rate": 3.4324797595226567e-06, "loss": 0.5503501892089844, "step": 5074 }, { "epoch": 1.48398888726422, "grad_norm": 1.7613851561474803, "learning_rate": 3.4288346426664063e-06, "loss": 0.5388503074645996, "step": 5075 }, { "epoch": 1.4842813276794853, "grad_norm": 1.5726280695427581, "learning_rate": 3.4251910617801054e-06, "loss": 0.5866841673851013, "step": 5076 }, { "epoch": 1.4845737680947506, "grad_norm": 1.7063663913828162, "learning_rate": 3.4215490177154176e-06, "loss": 0.5377970337867737, "step": 5077 }, { "epoch": 1.4848662085100162, "grad_norm": 2.013961516297246, "learning_rate": 3.41790851132365e-06, "loss": 0.6311028003692627, "step": 5078 }, { "epoch": 1.4851586489252815, "grad_norm": 1.7100175604987324, "learning_rate": 3.414269543455747e-06, "loss": 0.5226441621780396, "step": 5079 }, { "epoch": 1.4854510893405468, "grad_norm": 1.73285658375087, "learning_rate": 3.410632114962298e-06, "loss": 0.6306775212287903, "step": 5080 }, { "epoch": 1.4857435297558124, "grad_norm": 1.8061194998201888, "learning_rate": 3.406996226693531e-06, "loss": 0.5432136058807373, "step": 5081 }, { "epoch": 1.4860359701710777, "grad_norm": 1.564250952291821, "learning_rate": 3.403361879499305e-06, "loss": 0.4218754470348358, "step": 5082 }, { "epoch": 1.486328410586343, "grad_norm": 1.7436245532279955, "learning_rate": 3.3997290742291335e-06, "loss": 0.5121650099754333, "step": 5083 }, { "epoch": 1.4866208510016083, "grad_norm": 1.713174617853516, "learning_rate": 3.39609781173216e-06, "loss": 0.5489382743835449, "step": 5084 }, { "epoch": 1.486913291416874, "grad_norm": 1.7492646537049668, "learning_rate": 3.3924680928571694e-06, "loss": 0.4190993309020996, "step": 5085 }, { "epoch": 1.4872057318321392, "grad_norm": 2.012504952292692, "learning_rate": 3.388839918452589e-06, "loss": 0.5927796363830566, "step": 5086 }, { "epoch": 1.4874981722474045, "grad_norm": 1.5385674447124333, "learning_rate": 3.3852132893664803e-06, "loss": 0.43746429681777954, "step": 5087 }, { "epoch": 1.4877906126626699, "grad_norm": 1.592965785800762, "learning_rate": 3.381588206446548e-06, "loss": 0.41599413752555847, "step": 5088 }, { "epoch": 1.4880830530779354, "grad_norm": 1.640030018717508, "learning_rate": 3.3779646705401305e-06, "loss": 0.5803484320640564, "step": 5089 }, { "epoch": 1.4883754934932008, "grad_norm": 1.6162932555816476, "learning_rate": 3.3743426824942082e-06, "loss": 0.5277384519577026, "step": 5090 }, { "epoch": 1.488667933908466, "grad_norm": 1.5149011711130314, "learning_rate": 3.370722243155401e-06, "loss": 0.5842317342758179, "step": 5091 }, { "epoch": 1.4889603743237316, "grad_norm": 1.8602157485440332, "learning_rate": 3.367103353369965e-06, "loss": 0.5394416451454163, "step": 5092 }, { "epoch": 1.489252814738997, "grad_norm": 1.6652727466684587, "learning_rate": 3.3634860139837877e-06, "loss": 0.5457144975662231, "step": 5093 }, { "epoch": 1.4895452551542623, "grad_norm": 1.6270719194791377, "learning_rate": 3.3598702258424044e-06, "loss": 0.49552473425865173, "step": 5094 }, { "epoch": 1.4898376955695278, "grad_norm": 1.8756044563450258, "learning_rate": 3.3562559897909842e-06, "loss": 0.5922214984893799, "step": 5095 }, { "epoch": 1.4901301359847932, "grad_norm": 1.6902952443841357, "learning_rate": 3.35264330667433e-06, "loss": 0.5844507217407227, "step": 5096 }, { "epoch": 1.4904225764000585, "grad_norm": 1.6441848915551236, "learning_rate": 3.3490321773368872e-06, "loss": 0.5096029043197632, "step": 5097 }, { "epoch": 1.4907150168153238, "grad_norm": 1.8296617417124132, "learning_rate": 3.345422602622734e-06, "loss": 0.6343984603881836, "step": 5098 }, { "epoch": 1.4910074572305891, "grad_norm": 1.7032992920741425, "learning_rate": 3.3418145833755875e-06, "loss": 0.5319832563400269, "step": 5099 }, { "epoch": 1.4912998976458547, "grad_norm": 1.8127365107062148, "learning_rate": 3.3382081204388006e-06, "loss": 0.6453676819801331, "step": 5100 }, { "epoch": 1.49159233806112, "grad_norm": 1.7068058578414038, "learning_rate": 3.33460321465536e-06, "loss": 0.5129305720329285, "step": 5101 }, { "epoch": 1.4918847784763853, "grad_norm": 1.7103748262888143, "learning_rate": 3.3309998668678912e-06, "loss": 0.5680958032608032, "step": 5102 }, { "epoch": 1.492177218891651, "grad_norm": 1.654140366409291, "learning_rate": 3.32739807791866e-06, "loss": 0.5959445834159851, "step": 5103 }, { "epoch": 1.4924696593069162, "grad_norm": 1.5546485584978795, "learning_rate": 3.3237978486495536e-06, "loss": 0.5549102425575256, "step": 5104 }, { "epoch": 1.4927620997221815, "grad_norm": 1.5522771682213525, "learning_rate": 3.3201991799021084e-06, "loss": 0.4219816327095032, "step": 5105 }, { "epoch": 1.493054540137447, "grad_norm": 1.8150814493123832, "learning_rate": 3.3166020725174906e-06, "loss": 0.46013522148132324, "step": 5106 }, { "epoch": 1.4933469805527124, "grad_norm": 2.2057259724068885, "learning_rate": 3.3130065273365033e-06, "loss": 0.6013174057006836, "step": 5107 }, { "epoch": 1.4936394209679777, "grad_norm": 1.9081850485789635, "learning_rate": 3.3094125451995827e-06, "loss": 0.7097996473312378, "step": 5108 }, { "epoch": 1.4939318613832433, "grad_norm": 1.6725604100107134, "learning_rate": 3.305820126946799e-06, "loss": 0.6704884767532349, "step": 5109 }, { "epoch": 1.4942243017985086, "grad_norm": 1.735486744932862, "learning_rate": 3.3022292734178605e-06, "loss": 0.5211119651794434, "step": 5110 }, { "epoch": 1.494516742213774, "grad_norm": 1.7718418689676594, "learning_rate": 3.2986399854521065e-06, "loss": 0.5830427408218384, "step": 5111 }, { "epoch": 1.4948091826290393, "grad_norm": 1.574048881929475, "learning_rate": 3.2950522638885106e-06, "loss": 0.5647883415222168, "step": 5112 }, { "epoch": 1.4951016230443046, "grad_norm": 1.3783682279274316, "learning_rate": 3.2914661095656807e-06, "loss": 0.46678125858306885, "step": 5113 }, { "epoch": 1.4953940634595702, "grad_norm": 1.768460226758459, "learning_rate": 3.287881523321863e-06, "loss": 0.5391934514045715, "step": 5114 }, { "epoch": 1.4956865038748355, "grad_norm": 1.532723290545503, "learning_rate": 3.284298505994926e-06, "loss": 0.4039243459701538, "step": 5115 }, { "epoch": 1.4959789442901008, "grad_norm": 1.8718379114919181, "learning_rate": 3.2807170584223802e-06, "loss": 0.6187412738800049, "step": 5116 }, { "epoch": 1.4962713847053664, "grad_norm": 1.730072311160077, "learning_rate": 3.277137181441369e-06, "loss": 0.5165137648582458, "step": 5117 }, { "epoch": 1.4965638251206317, "grad_norm": 1.7402216150888872, "learning_rate": 3.273558875888665e-06, "loss": 0.6315420866012573, "step": 5118 }, { "epoch": 1.496856265535897, "grad_norm": 1.6811341442796868, "learning_rate": 3.269982142600677e-06, "loss": 0.5522993206977844, "step": 5119 }, { "epoch": 1.4971487059511626, "grad_norm": 1.8103742244487522, "learning_rate": 3.266406982413444e-06, "loss": 0.5751636028289795, "step": 5120 }, { "epoch": 1.4974411463664279, "grad_norm": 1.8346826868047423, "learning_rate": 3.262833396162637e-06, "loss": 0.5552358031272888, "step": 5121 }, { "epoch": 1.4977335867816932, "grad_norm": 1.4553347230926987, "learning_rate": 3.259261384683562e-06, "loss": 0.4971257150173187, "step": 5122 }, { "epoch": 1.4980260271969585, "grad_norm": 1.7328825599332134, "learning_rate": 3.2556909488111533e-06, "loss": 0.3803454637527466, "step": 5123 }, { "epoch": 1.498318467612224, "grad_norm": 1.7448185442015292, "learning_rate": 3.25212208937998e-06, "loss": 0.45348531007766724, "step": 5124 }, { "epoch": 1.4986109080274894, "grad_norm": 1.6593501166731528, "learning_rate": 3.2485548072242403e-06, "loss": 0.4839708209037781, "step": 5125 }, { "epoch": 1.4989033484427547, "grad_norm": 1.7004886969570365, "learning_rate": 3.244989103177768e-06, "loss": 0.4743500351905823, "step": 5126 }, { "epoch": 1.49919578885802, "grad_norm": 1.7042585723205583, "learning_rate": 3.241424978074018e-06, "loss": 0.558182954788208, "step": 5127 }, { "epoch": 1.4994882292732856, "grad_norm": 1.5886443982701122, "learning_rate": 3.2378624327460874e-06, "loss": 0.41309911012649536, "step": 5128 }, { "epoch": 1.499780669688551, "grad_norm": 1.7452725700601364, "learning_rate": 3.2343014680266984e-06, "loss": 0.5627751350402832, "step": 5129 }, { "epoch": 1.5000731101038163, "grad_norm": 1.8911076385977756, "learning_rate": 3.230742084748204e-06, "loss": 0.5374714732170105, "step": 5130 }, { "epoch": 1.5003655505190818, "grad_norm": 1.7659792305895352, "learning_rate": 3.2271842837425917e-06, "loss": 0.4264039993286133, "step": 5131 }, { "epoch": 1.5006579909343472, "grad_norm": 1.8312136055327797, "learning_rate": 3.223628065841472e-06, "loss": 0.44204217195510864, "step": 5132 }, { "epoch": 1.5009504313496125, "grad_norm": 1.6892686547824762, "learning_rate": 3.220073431876092e-06, "loss": 0.5322041511535645, "step": 5133 }, { "epoch": 1.501242871764878, "grad_norm": 1.6801975106342348, "learning_rate": 3.216520382677324e-06, "loss": 0.4741417169570923, "step": 5134 }, { "epoch": 1.5015353121801431, "grad_norm": 1.9712166683153383, "learning_rate": 3.212968919075672e-06, "loss": 0.7069851756095886, "step": 5135 }, { "epoch": 1.5018277525954087, "grad_norm": 1.6644566597906936, "learning_rate": 3.2094190419012694e-06, "loss": 0.6049044132232666, "step": 5136 }, { "epoch": 1.5021201930106742, "grad_norm": 1.6420500389509403, "learning_rate": 3.2058707519838817e-06, "loss": 0.556586503982544, "step": 5137 }, { "epoch": 1.5024126334259393, "grad_norm": 1.4612168804015682, "learning_rate": 3.202324050152894e-06, "loss": 0.46489936113357544, "step": 5138 }, { "epoch": 1.5027050738412049, "grad_norm": 1.6808104719845611, "learning_rate": 3.1987789372373292e-06, "loss": 0.5332333445549011, "step": 5139 }, { "epoch": 1.5029975142564702, "grad_norm": 1.5897163584111842, "learning_rate": 3.1952354140658346e-06, "loss": 0.5547586679458618, "step": 5140 }, { "epoch": 1.5032899546717355, "grad_norm": 1.7343008366786887, "learning_rate": 3.1916934814666858e-06, "loss": 0.5500372648239136, "step": 5141 }, { "epoch": 1.503582395087001, "grad_norm": 1.6657659858957796, "learning_rate": 3.1881531402677934e-06, "loss": 0.5065571069717407, "step": 5142 }, { "epoch": 1.5038748355022664, "grad_norm": 2.106659003681642, "learning_rate": 3.1846143912966887e-06, "loss": 0.5942833423614502, "step": 5143 }, { "epoch": 1.5041672759175317, "grad_norm": 1.5318136638727409, "learning_rate": 3.181077235380531e-06, "loss": 0.4089720547199249, "step": 5144 }, { "epoch": 1.5044597163327973, "grad_norm": 1.959628279475518, "learning_rate": 3.1775416733461107e-06, "loss": 0.5360317230224609, "step": 5145 }, { "epoch": 1.5047521567480626, "grad_norm": 1.8497642502339247, "learning_rate": 3.174007706019845e-06, "loss": 0.5403856635093689, "step": 5146 }, { "epoch": 1.505044597163328, "grad_norm": 1.583723666722825, "learning_rate": 3.1704753342277727e-06, "loss": 0.5377147197723389, "step": 5147 }, { "epoch": 1.5053370375785935, "grad_norm": 1.908833197627838, "learning_rate": 3.166944558795567e-06, "loss": 0.49888312816619873, "step": 5148 }, { "epoch": 1.5056294779938586, "grad_norm": 1.604723023798687, "learning_rate": 3.1634153805485245e-06, "loss": 0.5105957984924316, "step": 5149 }, { "epoch": 1.5059219184091241, "grad_norm": 1.530550544138999, "learning_rate": 3.1598878003115694e-06, "loss": 0.5653882026672363, "step": 5150 }, { "epoch": 1.5062143588243895, "grad_norm": 1.7528922447010231, "learning_rate": 3.1563618189092536e-06, "loss": 0.5293145179748535, "step": 5151 }, { "epoch": 1.5065067992396548, "grad_norm": 1.7049104339852403, "learning_rate": 3.1528374371657524e-06, "loss": 0.5852463841438293, "step": 5152 }, { "epoch": 1.5067992396549204, "grad_norm": 1.7074372465536334, "learning_rate": 3.1493146559048683e-06, "loss": 0.5986759662628174, "step": 5153 }, { "epoch": 1.5070916800701857, "grad_norm": 1.8410699226798701, "learning_rate": 3.1457934759500298e-06, "loss": 0.6363133788108826, "step": 5154 }, { "epoch": 1.507384120485451, "grad_norm": 1.7703119171725752, "learning_rate": 3.1422738981242927e-06, "loss": 0.4757901430130005, "step": 5155 }, { "epoch": 1.5076765609007166, "grad_norm": 1.8042941675603332, "learning_rate": 3.1387559232503374e-06, "loss": 0.7614980936050415, "step": 5156 }, { "epoch": 1.5079690013159819, "grad_norm": 1.8353916940267578, "learning_rate": 3.13523955215047e-06, "loss": 0.5739883184432983, "step": 5157 }, { "epoch": 1.5082614417312472, "grad_norm": 1.6405466984899346, "learning_rate": 3.131724785646616e-06, "loss": 0.5893388390541077, "step": 5158 }, { "epoch": 1.5085538821465128, "grad_norm": 1.4613031069188664, "learning_rate": 3.1282116245603333e-06, "loss": 0.5809957981109619, "step": 5159 }, { "epoch": 1.508846322561778, "grad_norm": 1.558509757762028, "learning_rate": 3.124700069712803e-06, "loss": 0.5651090741157532, "step": 5160 }, { "epoch": 1.5091387629770434, "grad_norm": 1.5870160926102073, "learning_rate": 3.1211901219248273e-06, "loss": 0.3736303448677063, "step": 5161 }, { "epoch": 1.509431203392309, "grad_norm": 1.744264206007829, "learning_rate": 3.117681782016838e-06, "loss": 0.5501068234443665, "step": 5162 }, { "epoch": 1.509723643807574, "grad_norm": 1.7377852819958348, "learning_rate": 3.1141750508088865e-06, "loss": 0.6210630536079407, "step": 5163 }, { "epoch": 1.5100160842228396, "grad_norm": 1.5741938339988393, "learning_rate": 3.110669929120651e-06, "loss": 0.5722042322158813, "step": 5164 }, { "epoch": 1.510308524638105, "grad_norm": 1.617906406413033, "learning_rate": 3.107166417771431e-06, "loss": 0.5813776254653931, "step": 5165 }, { "epoch": 1.5106009650533703, "grad_norm": 1.5816945478856634, "learning_rate": 3.1036645175801515e-06, "loss": 0.4911368787288666, "step": 5166 }, { "epoch": 1.5108934054686358, "grad_norm": 1.5812988749732655, "learning_rate": 3.100164229365361e-06, "loss": 0.5136172771453857, "step": 5167 }, { "epoch": 1.5111858458839011, "grad_norm": 1.7202185949801794, "learning_rate": 3.096665553945234e-06, "loss": 0.5746543407440186, "step": 5168 }, { "epoch": 1.5114782862991665, "grad_norm": 1.8577610332100818, "learning_rate": 3.0931684921375572e-06, "loss": 0.4949193000793457, "step": 5169 }, { "epoch": 1.511770726714432, "grad_norm": 1.6744220879324234, "learning_rate": 3.089673044759751e-06, "loss": 0.5732932090759277, "step": 5170 }, { "epoch": 1.5120631671296973, "grad_norm": 1.5865659073822531, "learning_rate": 3.086179212628855e-06, "loss": 0.5329696536064148, "step": 5171 }, { "epoch": 1.5123556075449627, "grad_norm": 1.7970382860153173, "learning_rate": 3.082686996561531e-06, "loss": 0.631770670413971, "step": 5172 }, { "epoch": 1.5126480479602282, "grad_norm": 1.5998021767601671, "learning_rate": 3.0791963973740646e-06, "loss": 0.5183405876159668, "step": 5173 }, { "epoch": 1.5129404883754933, "grad_norm": 1.7133603210505308, "learning_rate": 3.075707415882361e-06, "loss": 0.5616034269332886, "step": 5174 }, { "epoch": 1.5132329287907589, "grad_norm": 1.5912245556380846, "learning_rate": 3.0722200529019477e-06, "loss": 0.48513877391815186, "step": 5175 }, { "epoch": 1.5135253692060244, "grad_norm": 1.574805808870548, "learning_rate": 3.068734309247976e-06, "loss": 0.5226399898529053, "step": 5176 }, { "epoch": 1.5138178096212895, "grad_norm": 1.592402045128277, "learning_rate": 3.0652501857352167e-06, "loss": 0.48817533254623413, "step": 5177 }, { "epoch": 1.514110250036555, "grad_norm": 1.5523305292465257, "learning_rate": 3.061767683178063e-06, "loss": 0.4163327217102051, "step": 5178 }, { "epoch": 1.5144026904518204, "grad_norm": 1.6254224030737643, "learning_rate": 3.058286802390531e-06, "loss": 0.5984256267547607, "step": 5179 }, { "epoch": 1.5146951308670857, "grad_norm": 1.8006518354372911, "learning_rate": 3.054807544186249e-06, "loss": 0.47233515977859497, "step": 5180 }, { "epoch": 1.5149875712823513, "grad_norm": 1.6896342506826862, "learning_rate": 3.0513299093784766e-06, "loss": 0.5545482635498047, "step": 5181 }, { "epoch": 1.5152800116976166, "grad_norm": 1.5925171354605219, "learning_rate": 3.047853898780089e-06, "loss": 0.46200019121170044, "step": 5182 }, { "epoch": 1.515572452112882, "grad_norm": 1.7986358499610187, "learning_rate": 3.0443795132035824e-06, "loss": 0.6146235466003418, "step": 5183 }, { "epoch": 1.5158648925281475, "grad_norm": 1.6180210942837954, "learning_rate": 3.040906753461075e-06, "loss": 0.5653461217880249, "step": 5184 }, { "epoch": 1.5161573329434128, "grad_norm": 1.7782122645526974, "learning_rate": 3.0374356203643008e-06, "loss": 0.6514929533004761, "step": 5185 }, { "epoch": 1.5164497733586781, "grad_norm": 1.6488410817366923, "learning_rate": 3.033966114724618e-06, "loss": 0.48213401436805725, "step": 5186 }, { "epoch": 1.5167422137739437, "grad_norm": 1.8810893536328739, "learning_rate": 3.0304982373530013e-06, "loss": 0.4935530424118042, "step": 5187 }, { "epoch": 1.5170346541892088, "grad_norm": 1.9406636249591702, "learning_rate": 3.0270319890600465e-06, "loss": 0.6435343027114868, "step": 5188 }, { "epoch": 1.5173270946044743, "grad_norm": 1.4722259236044228, "learning_rate": 3.0235673706559675e-06, "loss": 0.49350717663764954, "step": 5189 }, { "epoch": 1.5176195350197397, "grad_norm": 1.636152242750681, "learning_rate": 3.0201043829506015e-06, "loss": 0.4745938777923584, "step": 5190 }, { "epoch": 1.517911975435005, "grad_norm": 1.747247707841839, "learning_rate": 3.0166430267533944e-06, "loss": 0.5867031812667847, "step": 5191 }, { "epoch": 1.5182044158502705, "grad_norm": 2.0836038611604275, "learning_rate": 3.01318330287342e-06, "loss": 0.5477231740951538, "step": 5192 }, { "epoch": 1.5184968562655359, "grad_norm": 1.5825293698408722, "learning_rate": 3.0097252121193687e-06, "loss": 0.5788818597793579, "step": 5193 }, { "epoch": 1.5187892966808012, "grad_norm": 1.5819522244244852, "learning_rate": 3.0062687552995475e-06, "loss": 0.4967714548110962, "step": 5194 }, { "epoch": 1.5190817370960668, "grad_norm": 1.810354148695448, "learning_rate": 3.002813933221882e-06, "loss": 0.6427319645881653, "step": 5195 }, { "epoch": 1.519374177511332, "grad_norm": 1.7324283900525337, "learning_rate": 2.999360746693916e-06, "loss": 0.5615307688713074, "step": 5196 }, { "epoch": 1.5196666179265974, "grad_norm": 1.8017068269121923, "learning_rate": 2.9959091965228102e-06, "loss": 0.6646313667297363, "step": 5197 }, { "epoch": 1.519959058341863, "grad_norm": 1.4648905848591907, "learning_rate": 2.9924592835153454e-06, "loss": 0.47536247968673706, "step": 5198 }, { "epoch": 1.5202514987571283, "grad_norm": 1.701001149097395, "learning_rate": 2.9890110084779157e-06, "loss": 0.5850256681442261, "step": 5199 }, { "epoch": 1.5205439391723936, "grad_norm": 1.6650942638342863, "learning_rate": 2.985564372216536e-06, "loss": 0.5724887251853943, "step": 5200 }, { "epoch": 1.5208363795876592, "grad_norm": 1.6379341688791944, "learning_rate": 2.9821193755368383e-06, "loss": 0.5052510499954224, "step": 5201 }, { "epoch": 1.5211288200029243, "grad_norm": 1.5270508750040293, "learning_rate": 2.9786760192440644e-06, "loss": 0.439144492149353, "step": 5202 }, { "epoch": 1.5214212604181898, "grad_norm": 1.624134940512823, "learning_rate": 2.97523430414308e-06, "loss": 0.4560511112213135, "step": 5203 }, { "epoch": 1.5217137008334551, "grad_norm": 1.9447169329839864, "learning_rate": 2.9717942310383664e-06, "loss": 0.6848068237304688, "step": 5204 }, { "epoch": 1.5220061412487205, "grad_norm": 1.5338251170475576, "learning_rate": 2.9683558007340184e-06, "loss": 0.5541313886642456, "step": 5205 }, { "epoch": 1.522298581663986, "grad_norm": 1.4921475223936211, "learning_rate": 2.964919014033749e-06, "loss": 0.5117338299751282, "step": 5206 }, { "epoch": 1.5225910220792513, "grad_norm": 1.8454970950489444, "learning_rate": 2.9614838717408866e-06, "loss": 0.5164151191711426, "step": 5207 }, { "epoch": 1.5228834624945167, "grad_norm": 1.6612213438595136, "learning_rate": 2.9580503746583744e-06, "loss": 0.5461020469665527, "step": 5208 }, { "epoch": 1.5231759029097822, "grad_norm": 1.580589085309813, "learning_rate": 2.9546185235887705e-06, "loss": 0.4265401065349579, "step": 5209 }, { "epoch": 1.5234683433250475, "grad_norm": 1.822483254200033, "learning_rate": 2.9511883193342505e-06, "loss": 0.47372496128082275, "step": 5210 }, { "epoch": 1.5237607837403129, "grad_norm": 1.5409548150660597, "learning_rate": 2.9477597626966036e-06, "loss": 0.43951019644737244, "step": 5211 }, { "epoch": 1.5240532241555784, "grad_norm": 2.1038432849237862, "learning_rate": 2.9443328544772343e-06, "loss": 0.6514073610305786, "step": 5212 }, { "epoch": 1.5243456645708435, "grad_norm": 1.6794879789857167, "learning_rate": 2.940907595477164e-06, "loss": 0.523013710975647, "step": 5213 }, { "epoch": 1.524638104986109, "grad_norm": 1.6399154124434079, "learning_rate": 2.9374839864970194e-06, "loss": 0.4945281744003296, "step": 5214 }, { "epoch": 1.5249305454013746, "grad_norm": 1.83414324289986, "learning_rate": 2.9340620283370525e-06, "loss": 0.5768609046936035, "step": 5215 }, { "epoch": 1.5252229858166397, "grad_norm": 1.7611799606025424, "learning_rate": 2.930641721797125e-06, "loss": 0.45644205808639526, "step": 5216 }, { "epoch": 1.5255154262319053, "grad_norm": 1.5932175762441756, "learning_rate": 2.92722306767671e-06, "loss": 0.590227484703064, "step": 5217 }, { "epoch": 1.5258078666471706, "grad_norm": 1.8078838529845034, "learning_rate": 2.9238060667749014e-06, "loss": 0.5618122816085815, "step": 5218 }, { "epoch": 1.526100307062436, "grad_norm": 1.9135498575527394, "learning_rate": 2.9203907198904027e-06, "loss": 0.6431877613067627, "step": 5219 }, { "epoch": 1.5263927474777015, "grad_norm": 1.5548470750003383, "learning_rate": 2.916977027821527e-06, "loss": 0.5019941329956055, "step": 5220 }, { "epoch": 1.5266851878929668, "grad_norm": 1.9013308084843434, "learning_rate": 2.913564991366209e-06, "loss": 0.5413016080856323, "step": 5221 }, { "epoch": 1.5269776283082321, "grad_norm": 1.6880920277336984, "learning_rate": 2.9101546113219846e-06, "loss": 0.6546905636787415, "step": 5222 }, { "epoch": 1.5272700687234977, "grad_norm": 1.7013707157233615, "learning_rate": 2.906745888486013e-06, "loss": 0.5689815878868103, "step": 5223 }, { "epoch": 1.527562509138763, "grad_norm": 1.8369848799419313, "learning_rate": 2.9033388236550632e-06, "loss": 0.5134810209274292, "step": 5224 }, { "epoch": 1.5278549495540283, "grad_norm": 1.4280052174004847, "learning_rate": 2.8999334176255143e-06, "loss": 0.4880787134170532, "step": 5225 }, { "epoch": 1.528147389969294, "grad_norm": 1.8292283637694566, "learning_rate": 2.89652967119336e-06, "loss": 0.4345950782299042, "step": 5226 }, { "epoch": 1.528439830384559, "grad_norm": 1.724451812949585, "learning_rate": 2.893127585154205e-06, "loss": 0.43327242136001587, "step": 5227 }, { "epoch": 1.5287322707998245, "grad_norm": 1.780345207484487, "learning_rate": 2.889727160303266e-06, "loss": 0.6423674821853638, "step": 5228 }, { "epoch": 1.5290247112150899, "grad_norm": 1.5540524492201802, "learning_rate": 2.886328397435374e-06, "loss": 0.5263554453849792, "step": 5229 }, { "epoch": 1.5293171516303552, "grad_norm": 1.6433428703006638, "learning_rate": 2.882931297344965e-06, "loss": 0.4111948013305664, "step": 5230 }, { "epoch": 1.5296095920456207, "grad_norm": 1.804627326985323, "learning_rate": 2.8795358608260936e-06, "loss": 0.43803131580352783, "step": 5231 }, { "epoch": 1.529902032460886, "grad_norm": 1.5504311785369362, "learning_rate": 2.8761420886724223e-06, "loss": 0.4708956778049469, "step": 5232 }, { "epoch": 1.5301944728761514, "grad_norm": 1.7185936460565197, "learning_rate": 2.8727499816772265e-06, "loss": 0.5268635749816895, "step": 5233 }, { "epoch": 1.530486913291417, "grad_norm": 1.6977720322438927, "learning_rate": 2.869359540633385e-06, "loss": 0.5092788934707642, "step": 5234 }, { "epoch": 1.5307793537066823, "grad_norm": 1.630735809850627, "learning_rate": 2.8659707663333958e-06, "loss": 0.4603293836116791, "step": 5235 }, { "epoch": 1.5310717941219476, "grad_norm": 1.7857705195277582, "learning_rate": 2.8625836595693646e-06, "loss": 0.545462965965271, "step": 5236 }, { "epoch": 1.5313642345372132, "grad_norm": 1.6146415057105645, "learning_rate": 2.8591982211330073e-06, "loss": 0.511603832244873, "step": 5237 }, { "epoch": 1.5316566749524785, "grad_norm": 1.7935851159627383, "learning_rate": 2.8558144518156485e-06, "loss": 0.5076707601547241, "step": 5238 }, { "epoch": 1.5319491153677438, "grad_norm": 1.7012818042378361, "learning_rate": 2.852432352408224e-06, "loss": 0.5923745632171631, "step": 5239 }, { "epoch": 1.5322415557830094, "grad_norm": 1.8251553548092714, "learning_rate": 2.849051923701279e-06, "loss": 0.5588465332984924, "step": 5240 }, { "epoch": 1.5325339961982745, "grad_norm": 1.6493521356208132, "learning_rate": 2.845673166484969e-06, "loss": 0.6681923270225525, "step": 5241 }, { "epoch": 1.53282643661354, "grad_norm": 1.8683876960783266, "learning_rate": 2.8422960815490564e-06, "loss": 0.5702543258666992, "step": 5242 }, { "epoch": 1.5331188770288053, "grad_norm": 1.8090012581479555, "learning_rate": 2.8389206696829165e-06, "loss": 0.5401744842529297, "step": 5243 }, { "epoch": 1.5334113174440707, "grad_norm": 1.6641276436242072, "learning_rate": 2.8355469316755324e-06, "loss": 0.43371906876564026, "step": 5244 }, { "epoch": 1.5337037578593362, "grad_norm": 1.6323739542625777, "learning_rate": 2.8321748683154893e-06, "loss": 0.5598163604736328, "step": 5245 }, { "epoch": 1.5339961982746015, "grad_norm": 1.8330291281030966, "learning_rate": 2.8288044803909896e-06, "loss": 0.5836831331253052, "step": 5246 }, { "epoch": 1.5342886386898669, "grad_norm": 1.6637462764959579, "learning_rate": 2.8254357686898404e-06, "loss": 0.5308898687362671, "step": 5247 }, { "epoch": 1.5345810791051324, "grad_norm": 1.7589253104867197, "learning_rate": 2.822068733999459e-06, "loss": 0.6104828119277954, "step": 5248 }, { "epoch": 1.5348735195203977, "grad_norm": 1.9266285032289332, "learning_rate": 2.8187033771068685e-06, "loss": 0.48373985290527344, "step": 5249 }, { "epoch": 1.535165959935663, "grad_norm": 1.745809860715047, "learning_rate": 2.8153396987987e-06, "loss": 0.5213532447814941, "step": 5250 }, { "epoch": 1.5354584003509286, "grad_norm": 1.7052291407432676, "learning_rate": 2.811977699861195e-06, "loss": 0.5241051912307739, "step": 5251 }, { "epoch": 1.5357508407661937, "grad_norm": 1.54399807563896, "learning_rate": 2.8086173810801974e-06, "loss": 0.48321712017059326, "step": 5252 }, { "epoch": 1.5360432811814593, "grad_norm": 1.831716416150244, "learning_rate": 2.8052587432411626e-06, "loss": 0.5352765917778015, "step": 5253 }, { "epoch": 1.5363357215967248, "grad_norm": 1.7051244593885417, "learning_rate": 2.8019017871291522e-06, "loss": 0.5402188301086426, "step": 5254 }, { "epoch": 1.53662816201199, "grad_norm": 1.5780940900489064, "learning_rate": 2.798546513528837e-06, "loss": 0.4398813545703888, "step": 5255 }, { "epoch": 1.5369206024272555, "grad_norm": 1.6682503262337565, "learning_rate": 2.7951929232244855e-06, "loss": 0.5661803483963013, "step": 5256 }, { "epoch": 1.5372130428425208, "grad_norm": 1.9389870116334766, "learning_rate": 2.791841016999982e-06, "loss": 0.5051732063293457, "step": 5257 }, { "epoch": 1.5375054832577861, "grad_norm": 1.7323475801875265, "learning_rate": 2.788490795638815e-06, "loss": 0.5712389945983887, "step": 5258 }, { "epoch": 1.5377979236730517, "grad_norm": 1.7189716580722423, "learning_rate": 2.7851422599240773e-06, "loss": 0.6257319450378418, "step": 5259 }, { "epoch": 1.538090364088317, "grad_norm": 1.7862483931054027, "learning_rate": 2.7817954106384704e-06, "loss": 0.5788396596908569, "step": 5260 }, { "epoch": 1.5383828045035823, "grad_norm": 1.508089974245087, "learning_rate": 2.7784502485642985e-06, "loss": 0.37253260612487793, "step": 5261 }, { "epoch": 1.5386752449188479, "grad_norm": 2.206166372523085, "learning_rate": 2.7751067744834726e-06, "loss": 0.6547001004219055, "step": 5262 }, { "epoch": 1.5389676853341132, "grad_norm": 1.551783656656575, "learning_rate": 2.77176498917751e-06, "loss": 0.510914146900177, "step": 5263 }, { "epoch": 1.5392601257493785, "grad_norm": 1.731638922465708, "learning_rate": 2.7684248934275327e-06, "loss": 0.4387754201889038, "step": 5264 }, { "epoch": 1.539552566164644, "grad_norm": 1.573259655998941, "learning_rate": 2.765086488014268e-06, "loss": 0.5640195608139038, "step": 5265 }, { "epoch": 1.5398450065799092, "grad_norm": 2.3327619392306684, "learning_rate": 2.7617497737180508e-06, "loss": 0.5780993103981018, "step": 5266 }, { "epoch": 1.5401374469951747, "grad_norm": 1.7296077762304434, "learning_rate": 2.758414751318813e-06, "loss": 0.5190057158470154, "step": 5267 }, { "epoch": 1.54042988741044, "grad_norm": 1.6180118608432006, "learning_rate": 2.7550814215960964e-06, "loss": 0.4204869270324707, "step": 5268 }, { "epoch": 1.5407223278257054, "grad_norm": 1.5345717637092124, "learning_rate": 2.7517497853290477e-06, "loss": 0.5649294853210449, "step": 5269 }, { "epoch": 1.541014768240971, "grad_norm": 1.8541084629609554, "learning_rate": 2.748419843296416e-06, "loss": 0.49142545461654663, "step": 5270 }, { "epoch": 1.5413072086562363, "grad_norm": 2.006144774477858, "learning_rate": 2.745091596276557e-06, "loss": 0.483539879322052, "step": 5271 }, { "epoch": 1.5415996490715016, "grad_norm": 1.8772157933692841, "learning_rate": 2.7417650450474253e-06, "loss": 0.5400283336639404, "step": 5272 }, { "epoch": 1.5418920894867671, "grad_norm": 1.6915167892784866, "learning_rate": 2.7384401903865844e-06, "loss": 0.5490765571594238, "step": 5273 }, { "epoch": 1.5421845299020325, "grad_norm": 2.267512124400057, "learning_rate": 2.7351170330711975e-06, "loss": 0.5434873700141907, "step": 5274 }, { "epoch": 1.5424769703172978, "grad_norm": 1.8064402200670897, "learning_rate": 2.7317955738780333e-06, "loss": 0.6195025444030762, "step": 5275 }, { "epoch": 1.5427694107325634, "grad_norm": 1.6751288499310806, "learning_rate": 2.728475813583462e-06, "loss": 0.5552260875701904, "step": 5276 }, { "epoch": 1.5430618511478287, "grad_norm": 1.8146552227089312, "learning_rate": 2.725157752963461e-06, "loss": 0.5430501699447632, "step": 5277 }, { "epoch": 1.543354291563094, "grad_norm": 2.1339271947469047, "learning_rate": 2.7218413927936006e-06, "loss": 0.633337676525116, "step": 5278 }, { "epoch": 1.5436467319783596, "grad_norm": 1.6483089945499043, "learning_rate": 2.718526733849062e-06, "loss": 0.4974183738231659, "step": 5279 }, { "epoch": 1.5439391723936247, "grad_norm": 2.06701718299293, "learning_rate": 2.715213776904628e-06, "loss": 0.5840449929237366, "step": 5280 }, { "epoch": 1.5442316128088902, "grad_norm": 1.480832016038464, "learning_rate": 2.7119025227346807e-06, "loss": 0.4684101343154907, "step": 5281 }, { "epoch": 1.5445240532241555, "grad_norm": 1.5849030043466241, "learning_rate": 2.7085929721132078e-06, "loss": 0.48402637243270874, "step": 5282 }, { "epoch": 1.5448164936394209, "grad_norm": 1.6449199299919448, "learning_rate": 2.7052851258137936e-06, "loss": 0.6122831106185913, "step": 5283 }, { "epoch": 1.5451089340546864, "grad_norm": 1.6951661547391625, "learning_rate": 2.701978984609629e-06, "loss": 0.5731217861175537, "step": 5284 }, { "epoch": 1.5454013744699517, "grad_norm": 1.869052563685483, "learning_rate": 2.6986745492735044e-06, "loss": 0.5610803961753845, "step": 5285 }, { "epoch": 1.545693814885217, "grad_norm": 1.4190791359210344, "learning_rate": 2.695371820577811e-06, "loss": 0.46112626791000366, "step": 5286 }, { "epoch": 1.5459862553004826, "grad_norm": 2.1150576387004247, "learning_rate": 2.692070799294542e-06, "loss": 0.5368741154670715, "step": 5287 }, { "epoch": 1.546278695715748, "grad_norm": 1.905327182706658, "learning_rate": 2.688771486195293e-06, "loss": 0.5991438627243042, "step": 5288 }, { "epoch": 1.5465711361310133, "grad_norm": 1.9084615434749013, "learning_rate": 2.685473882051254e-06, "loss": 0.5751149654388428, "step": 5289 }, { "epoch": 1.5468635765462788, "grad_norm": 2.0751264575493247, "learning_rate": 2.682177987633221e-06, "loss": 0.6055437326431274, "step": 5290 }, { "epoch": 1.547156016961544, "grad_norm": 1.8883429200709412, "learning_rate": 2.6788838037115916e-06, "loss": 0.6009221076965332, "step": 5291 }, { "epoch": 1.5474484573768095, "grad_norm": 1.8170478309101001, "learning_rate": 2.6755913310563585e-06, "loss": 0.6071531772613525, "step": 5292 }, { "epoch": 1.547740897792075, "grad_norm": 1.4851824864906211, "learning_rate": 2.6723005704371164e-06, "loss": 0.4102080464363098, "step": 5293 }, { "epoch": 1.5480333382073401, "grad_norm": 1.861843061560023, "learning_rate": 2.6690115226230663e-06, "loss": 0.48021870851516724, "step": 5294 }, { "epoch": 1.5483257786226057, "grad_norm": 1.916351154521063, "learning_rate": 2.665724188382999e-06, "loss": 0.4893236458301544, "step": 5295 }, { "epoch": 1.548618219037871, "grad_norm": 1.611822755629755, "learning_rate": 2.6624385684853095e-06, "loss": 0.6365019083023071, "step": 5296 }, { "epoch": 1.5489106594531363, "grad_norm": 1.8901541843584413, "learning_rate": 2.659154663697995e-06, "loss": 0.46510767936706543, "step": 5297 }, { "epoch": 1.5492030998684019, "grad_norm": 1.4887188273793392, "learning_rate": 2.655872474788641e-06, "loss": 0.4355175495147705, "step": 5298 }, { "epoch": 1.5494955402836672, "grad_norm": 1.3536753107928572, "learning_rate": 2.6525920025244432e-06, "loss": 0.5180836915969849, "step": 5299 }, { "epoch": 1.5497879806989325, "grad_norm": 1.9072335806805663, "learning_rate": 2.6493132476721927e-06, "loss": 0.5597968101501465, "step": 5300 }, { "epoch": 1.550080421114198, "grad_norm": 1.7134796878533993, "learning_rate": 2.646036210998276e-06, "loss": 0.6581016778945923, "step": 5301 }, { "epoch": 1.5503728615294634, "grad_norm": 1.8671635537156963, "learning_rate": 2.642760893268684e-06, "loss": 0.4875848889350891, "step": 5302 }, { "epoch": 1.5506653019447287, "grad_norm": 1.571897962721608, "learning_rate": 2.639487295248999e-06, "loss": 0.4410843253135681, "step": 5303 }, { "epoch": 1.5509577423599943, "grad_norm": 1.8113376757557438, "learning_rate": 2.6362154177044076e-06, "loss": 0.5829580426216125, "step": 5304 }, { "epoch": 1.5512501827752594, "grad_norm": 1.6979805053981243, "learning_rate": 2.6329452613996886e-06, "loss": 0.6281459927558899, "step": 5305 }, { "epoch": 1.551542623190525, "grad_norm": 1.6778942363253981, "learning_rate": 2.629676827099222e-06, "loss": 0.525640606880188, "step": 5306 }, { "epoch": 1.5518350636057903, "grad_norm": 1.710219412838542, "learning_rate": 2.626410115566985e-06, "loss": 0.5219406485557556, "step": 5307 }, { "epoch": 1.5521275040210556, "grad_norm": 1.7812622188686809, "learning_rate": 2.623145127566555e-06, "loss": 0.5120927691459656, "step": 5308 }, { "epoch": 1.5524199444363211, "grad_norm": 1.856533490372594, "learning_rate": 2.6198818638610967e-06, "loss": 0.586410641670227, "step": 5309 }, { "epoch": 1.5527123848515865, "grad_norm": 1.726189213717832, "learning_rate": 2.6166203252133803e-06, "loss": 0.5014485120773315, "step": 5310 }, { "epoch": 1.5530048252668518, "grad_norm": 1.7251785105103856, "learning_rate": 2.6133605123857707e-06, "loss": 0.5087070465087891, "step": 5311 }, { "epoch": 1.5532972656821173, "grad_norm": 1.9411711444593984, "learning_rate": 2.610102426140231e-06, "loss": 0.5829774737358093, "step": 5312 }, { "epoch": 1.5535897060973827, "grad_norm": 1.9403338817582965, "learning_rate": 2.6068460672383166e-06, "loss": 0.5273870229721069, "step": 5313 }, { "epoch": 1.553882146512648, "grad_norm": 1.6781304796241345, "learning_rate": 2.603591436441183e-06, "loss": 0.528778076171875, "step": 5314 }, { "epoch": 1.5541745869279135, "grad_norm": 1.6477790459502455, "learning_rate": 2.600338534509581e-06, "loss": 0.4914259612560272, "step": 5315 }, { "epoch": 1.5544670273431789, "grad_norm": 1.5838952242674544, "learning_rate": 2.597087362203855e-06, "loss": 0.48063480854034424, "step": 5316 }, { "epoch": 1.5547594677584442, "grad_norm": 1.6948007690415343, "learning_rate": 2.593837920283949e-06, "loss": 0.4406088888645172, "step": 5317 }, { "epoch": 1.5550519081737098, "grad_norm": 1.5839061375343884, "learning_rate": 2.590590209509398e-06, "loss": 0.5027159452438354, "step": 5318 }, { "epoch": 1.5553443485889749, "grad_norm": 1.447462212774582, "learning_rate": 2.5873442306393357e-06, "loss": 0.3894188404083252, "step": 5319 }, { "epoch": 1.5556367890042404, "grad_norm": 1.8834380096125083, "learning_rate": 2.584099984432492e-06, "loss": 0.5393104553222656, "step": 5320 }, { "epoch": 1.5559292294195057, "grad_norm": 1.640256381642302, "learning_rate": 2.580857471647186e-06, "loss": 0.5701737999916077, "step": 5321 }, { "epoch": 1.556221669834771, "grad_norm": 1.9050066043706444, "learning_rate": 2.577616693041336e-06, "loss": 0.6173145174980164, "step": 5322 }, { "epoch": 1.5565141102500366, "grad_norm": 1.718666562714064, "learning_rate": 2.5743776493724548e-06, "loss": 0.534600555896759, "step": 5323 }, { "epoch": 1.556806550665302, "grad_norm": 1.7258193752543447, "learning_rate": 2.571140341397651e-06, "loss": 0.5205268859863281, "step": 5324 }, { "epoch": 1.5570989910805673, "grad_norm": 1.9160383524514086, "learning_rate": 2.5679047698736224e-06, "loss": 0.5631835460662842, "step": 5325 }, { "epoch": 1.5573914314958328, "grad_norm": 1.786367865175988, "learning_rate": 2.564670935556667e-06, "loss": 0.5855015516281128, "step": 5326 }, { "epoch": 1.5576838719110981, "grad_norm": 1.538967985462843, "learning_rate": 2.5614388392026735e-06, "loss": 0.5219928026199341, "step": 5327 }, { "epoch": 1.5579763123263635, "grad_norm": 1.6118392863192783, "learning_rate": 2.5582084815671225e-06, "loss": 0.50178462266922, "step": 5328 }, { "epoch": 1.558268752741629, "grad_norm": 1.65351304969076, "learning_rate": 2.554979863405094e-06, "loss": 0.643866777420044, "step": 5329 }, { "epoch": 1.5585611931568941, "grad_norm": 1.6117676019433484, "learning_rate": 2.5517529854712543e-06, "loss": 0.4976714849472046, "step": 5330 }, { "epoch": 1.5588536335721597, "grad_norm": 1.6012275122207043, "learning_rate": 2.5485278485198716e-06, "loss": 0.47352612018585205, "step": 5331 }, { "epoch": 1.5591460739874252, "grad_norm": 1.5967917267320113, "learning_rate": 2.5453044533047955e-06, "loss": 0.6319230794906616, "step": 5332 }, { "epoch": 1.5594385144026903, "grad_norm": 1.9005541524381997, "learning_rate": 2.5420828005794786e-06, "loss": 0.724555253982544, "step": 5333 }, { "epoch": 1.5597309548179559, "grad_norm": 1.987695201205215, "learning_rate": 2.5388628910969625e-06, "loss": 0.6235928535461426, "step": 5334 }, { "epoch": 1.5600233952332212, "grad_norm": 1.9501926966829706, "learning_rate": 2.5356447256098805e-06, "loss": 0.47880417108535767, "step": 5335 }, { "epoch": 1.5603158356484865, "grad_norm": 1.451114547860928, "learning_rate": 2.53242830487046e-06, "loss": 0.3986828029155731, "step": 5336 }, { "epoch": 1.560608276063752, "grad_norm": 1.747029246487311, "learning_rate": 2.529213629630519e-06, "loss": 0.515389084815979, "step": 5337 }, { "epoch": 1.5609007164790174, "grad_norm": 1.4773319281213657, "learning_rate": 2.52600070064147e-06, "loss": 0.611845076084137, "step": 5338 }, { "epoch": 1.5611931568942827, "grad_norm": 1.4758258492307896, "learning_rate": 2.522789518654314e-06, "loss": 0.4417461156845093, "step": 5339 }, { "epoch": 1.5614855973095483, "grad_norm": 1.819505142519117, "learning_rate": 2.519580084419646e-06, "loss": 0.5082979798316956, "step": 5340 }, { "epoch": 1.5617780377248136, "grad_norm": 1.6547823991622836, "learning_rate": 2.516372398687652e-06, "loss": 0.4535973072052002, "step": 5341 }, { "epoch": 1.562070478140079, "grad_norm": 1.5836674832459754, "learning_rate": 2.513166462208111e-06, "loss": 0.5528950095176697, "step": 5342 }, { "epoch": 1.5623629185553445, "grad_norm": 1.9642626952112248, "learning_rate": 2.5099622757303865e-06, "loss": 0.6272662281990051, "step": 5343 }, { "epoch": 1.5626553589706096, "grad_norm": 1.6065246572629583, "learning_rate": 2.506759840003439e-06, "loss": 0.602135181427002, "step": 5344 }, { "epoch": 1.5629477993858751, "grad_norm": 1.6289588222907745, "learning_rate": 2.5035591557758197e-06, "loss": 0.6336733102798462, "step": 5345 }, { "epoch": 1.5632402398011405, "grad_norm": 1.6487862192612195, "learning_rate": 2.500360223795668e-06, "loss": 0.5819063186645508, "step": 5346 }, { "epoch": 1.5635326802164058, "grad_norm": 1.9625665043715836, "learning_rate": 2.4971630448107166e-06, "loss": 0.6384624242782593, "step": 5347 }, { "epoch": 1.5638251206316713, "grad_norm": 1.7408709214756897, "learning_rate": 2.493967619568285e-06, "loss": 0.5495754480361938, "step": 5348 }, { "epoch": 1.5641175610469367, "grad_norm": 1.7544921790911043, "learning_rate": 2.490773948815284e-06, "loss": 0.5661545395851135, "step": 5349 }, { "epoch": 1.564410001462202, "grad_norm": 1.6122536544450556, "learning_rate": 2.487582033298217e-06, "loss": 0.47731083631515503, "step": 5350 }, { "epoch": 1.5647024418774675, "grad_norm": 1.6660059461046859, "learning_rate": 2.4843918737631724e-06, "loss": 0.5081999897956848, "step": 5351 }, { "epoch": 1.5649948822927329, "grad_norm": 1.7409567692793637, "learning_rate": 2.481203470955832e-06, "loss": 0.4803314208984375, "step": 5352 }, { "epoch": 1.5652873227079982, "grad_norm": 1.5751543533365946, "learning_rate": 2.4780168256214687e-06, "loss": 0.5049692392349243, "step": 5353 }, { "epoch": 1.5655797631232637, "grad_norm": 1.5980094392584046, "learning_rate": 2.4748319385049346e-06, "loss": 0.46404945850372314, "step": 5354 }, { "epoch": 1.565872203538529, "grad_norm": 1.8809652221147528, "learning_rate": 2.471648810350681e-06, "loss": 0.426737904548645, "step": 5355 }, { "epoch": 1.5661646439537944, "grad_norm": 1.8658447876398343, "learning_rate": 2.4684674419027445e-06, "loss": 0.511459231376648, "step": 5356 }, { "epoch": 1.56645708436906, "grad_norm": 1.6030611377734088, "learning_rate": 2.4652878339047516e-06, "loss": 0.5199254155158997, "step": 5357 }, { "epoch": 1.566749524784325, "grad_norm": 1.8647690278368902, "learning_rate": 2.4621099870999156e-06, "loss": 0.6220999360084534, "step": 5358 }, { "epoch": 1.5670419651995906, "grad_norm": 1.6243824818203765, "learning_rate": 2.4589339022310386e-06, "loss": 0.598499059677124, "step": 5359 }, { "epoch": 1.567334405614856, "grad_norm": 1.6070369897776633, "learning_rate": 2.455759580040512e-06, "loss": 0.4726351499557495, "step": 5360 }, { "epoch": 1.5676268460301213, "grad_norm": 1.5276631939356082, "learning_rate": 2.452587021270314e-06, "loss": 0.4492379426956177, "step": 5361 }, { "epoch": 1.5679192864453868, "grad_norm": 1.5322598639207448, "learning_rate": 2.4494162266620105e-06, "loss": 0.46546655893325806, "step": 5362 }, { "epoch": 1.5682117268606521, "grad_norm": 1.5784589531224524, "learning_rate": 2.446247196956756e-06, "loss": 0.45048198103904724, "step": 5363 }, { "epoch": 1.5685041672759175, "grad_norm": 1.7001549698958467, "learning_rate": 2.4430799328952935e-06, "loss": 0.543383002281189, "step": 5364 }, { "epoch": 1.568796607691183, "grad_norm": 1.881054972907132, "learning_rate": 2.4399144352179484e-06, "loss": 0.560661256313324, "step": 5365 }, { "epoch": 1.5690890481064483, "grad_norm": 1.7380225532335671, "learning_rate": 2.4367507046646367e-06, "loss": 0.4915887117385864, "step": 5366 }, { "epoch": 1.5693814885217137, "grad_norm": 3.6756946542988396, "learning_rate": 2.433588741974863e-06, "loss": 0.576668918132782, "step": 5367 }, { "epoch": 1.5696739289369792, "grad_norm": 1.9696979271734443, "learning_rate": 2.4304285478877134e-06, "loss": 0.615422248840332, "step": 5368 }, { "epoch": 1.5699663693522443, "grad_norm": 1.7262412669866045, "learning_rate": 2.4272701231418706e-06, "loss": 0.505649209022522, "step": 5369 }, { "epoch": 1.5702588097675099, "grad_norm": 1.6721925296757776, "learning_rate": 2.424113468475593e-06, "loss": 0.4803265929222107, "step": 5370 }, { "epoch": 1.5705512501827754, "grad_norm": 1.5546849518292136, "learning_rate": 2.4209585846267293e-06, "loss": 0.43251073360443115, "step": 5371 }, { "epoch": 1.5708436905980405, "grad_norm": 1.517432850414526, "learning_rate": 2.417805472332716e-06, "loss": 0.6021081209182739, "step": 5372 }, { "epoch": 1.571136131013306, "grad_norm": 1.5438721648404399, "learning_rate": 2.414654132330575e-06, "loss": 0.5236715078353882, "step": 5373 }, { "epoch": 1.5714285714285714, "grad_norm": 1.7272971424194805, "learning_rate": 2.4115045653569092e-06, "loss": 0.45632290840148926, "step": 5374 }, { "epoch": 1.5717210118438367, "grad_norm": 1.51681371819029, "learning_rate": 2.408356772147912e-06, "loss": 0.5745086669921875, "step": 5375 }, { "epoch": 1.5720134522591023, "grad_norm": 1.7235832219181546, "learning_rate": 2.405210753439361e-06, "loss": 0.6032901406288147, "step": 5376 }, { "epoch": 1.5723058926743676, "grad_norm": 1.9887425059975659, "learning_rate": 2.40206650996662e-06, "loss": 0.579899787902832, "step": 5377 }, { "epoch": 1.572598333089633, "grad_norm": 1.84593228973349, "learning_rate": 2.3989240424646355e-06, "loss": 0.5920897722244263, "step": 5378 }, { "epoch": 1.5728907735048985, "grad_norm": 1.6814027292095717, "learning_rate": 2.395783351667941e-06, "loss": 0.5080469846725464, "step": 5379 }, { "epoch": 1.5731832139201638, "grad_norm": 1.6852885660534134, "learning_rate": 2.392644438310654e-06, "loss": 0.6438730955123901, "step": 5380 }, { "epoch": 1.5734756543354291, "grad_norm": 1.5835392817230642, "learning_rate": 2.389507303126475e-06, "loss": 0.6496621370315552, "step": 5381 }, { "epoch": 1.5737680947506947, "grad_norm": 2.056471050614057, "learning_rate": 2.3863719468486925e-06, "loss": 0.5780459642410278, "step": 5382 }, { "epoch": 1.5740605351659598, "grad_norm": 1.6854861118133662, "learning_rate": 2.3832383702101747e-06, "loss": 0.47817176580429077, "step": 5383 }, { "epoch": 1.5743529755812253, "grad_norm": 1.8294128359408837, "learning_rate": 2.3801065739433816e-06, "loss": 0.565629243850708, "step": 5384 }, { "epoch": 1.5746454159964907, "grad_norm": 1.6612699899563574, "learning_rate": 2.376976558780343e-06, "loss": 0.6291453838348389, "step": 5385 }, { "epoch": 1.574937856411756, "grad_norm": 1.538236610732314, "learning_rate": 2.3738483254526856e-06, "loss": 0.5309170484542847, "step": 5386 }, { "epoch": 1.5752302968270215, "grad_norm": 1.5901478294831086, "learning_rate": 2.370721874691614e-06, "loss": 0.36860692501068115, "step": 5387 }, { "epoch": 1.5755227372422869, "grad_norm": 1.4970687777761233, "learning_rate": 2.3675972072279172e-06, "loss": 0.4871997833251953, "step": 5388 }, { "epoch": 1.5758151776575522, "grad_norm": 1.7243858787556505, "learning_rate": 2.3644743237919674e-06, "loss": 0.5318939685821533, "step": 5389 }, { "epoch": 1.5761076180728177, "grad_norm": 1.6509311118620078, "learning_rate": 2.3613532251137205e-06, "loss": 0.5851289629936218, "step": 5390 }, { "epoch": 1.576400058488083, "grad_norm": 1.7554122423009038, "learning_rate": 2.358233911922713e-06, "loss": 0.5535321235656738, "step": 5391 }, { "epoch": 1.5766924989033484, "grad_norm": 1.6614076147074466, "learning_rate": 2.3551163849480664e-06, "loss": 0.5443980693817139, "step": 5392 }, { "epoch": 1.576984939318614, "grad_norm": 1.7236213464789372, "learning_rate": 2.352000644918483e-06, "loss": 0.6381241083145142, "step": 5393 }, { "epoch": 1.5772773797338793, "grad_norm": 1.7284545309348427, "learning_rate": 2.348886692562248e-06, "loss": 0.5710772275924683, "step": 5394 }, { "epoch": 1.5775698201491446, "grad_norm": 1.5430684665624785, "learning_rate": 2.3457745286072307e-06, "loss": 0.5507428050041199, "step": 5395 }, { "epoch": 1.5778622605644101, "grad_norm": 1.4206197407713899, "learning_rate": 2.342664153780878e-06, "loss": 0.4475744366645813, "step": 5396 }, { "epoch": 1.5781547009796753, "grad_norm": 1.636583588423456, "learning_rate": 2.339555568810221e-06, "loss": 0.5237560868263245, "step": 5397 }, { "epoch": 1.5784471413949408, "grad_norm": 1.8224385271688819, "learning_rate": 2.3364487744218735e-06, "loss": 0.513353705406189, "step": 5398 }, { "epoch": 1.5787395818102061, "grad_norm": 1.7286392562782233, "learning_rate": 2.3333437713420305e-06, "loss": 0.5986731052398682, "step": 5399 }, { "epoch": 1.5790320222254715, "grad_norm": 1.5907081834202914, "learning_rate": 2.330240560296466e-06, "loss": 0.5834506750106812, "step": 5400 }, { "epoch": 1.579324462640737, "grad_norm": 1.4316449017872799, "learning_rate": 2.3271391420105384e-06, "loss": 0.4756021499633789, "step": 5401 }, { "epoch": 1.5796169030560023, "grad_norm": 1.828748410964233, "learning_rate": 2.3240395172091847e-06, "loss": 0.5524263978004456, "step": 5402 }, { "epoch": 1.5799093434712677, "grad_norm": 1.7797701447484084, "learning_rate": 2.320941686616922e-06, "loss": 0.5689926743507385, "step": 5403 }, { "epoch": 1.5802017838865332, "grad_norm": 2.079791124123793, "learning_rate": 2.317845650957852e-06, "loss": 0.5737600326538086, "step": 5404 }, { "epoch": 1.5804942243017985, "grad_norm": 2.1591480990218406, "learning_rate": 2.314751410955652e-06, "loss": 0.585626482963562, "step": 5405 }, { "epoch": 1.5807866647170639, "grad_norm": 1.3475179143489473, "learning_rate": 2.3116589673335833e-06, "loss": 0.4410518407821655, "step": 5406 }, { "epoch": 1.5810791051323294, "grad_norm": 1.4002471500541231, "learning_rate": 2.308568320814487e-06, "loss": 0.49071764945983887, "step": 5407 }, { "epoch": 1.5813715455475945, "grad_norm": 1.7384943405251394, "learning_rate": 2.3054794721207796e-06, "loss": 0.5332186818122864, "step": 5408 }, { "epoch": 1.58166398596286, "grad_norm": 1.672632129609112, "learning_rate": 2.3023924219744607e-06, "loss": 0.4655637741088867, "step": 5409 }, { "epoch": 1.5819564263781256, "grad_norm": 1.8700821530052487, "learning_rate": 2.2993071710971115e-06, "loss": 0.4226027727127075, "step": 5410 }, { "epoch": 1.5822488667933907, "grad_norm": 1.662889108823369, "learning_rate": 2.2962237202098903e-06, "loss": 0.5582948923110962, "step": 5411 }, { "epoch": 1.5825413072086563, "grad_norm": 1.9177043486104604, "learning_rate": 2.293142070033535e-06, "loss": 0.6695314645767212, "step": 5412 }, { "epoch": 1.5828337476239216, "grad_norm": 1.3346239854361734, "learning_rate": 2.2900622212883617e-06, "loss": 0.39315858483314514, "step": 5413 }, { "epoch": 1.583126188039187, "grad_norm": 1.6781692583647863, "learning_rate": 2.2869841746942666e-06, "loss": 0.5034759044647217, "step": 5414 }, { "epoch": 1.5834186284544525, "grad_norm": 1.9091862181504, "learning_rate": 2.2839079309707256e-06, "loss": 0.6739548444747925, "step": 5415 }, { "epoch": 1.5837110688697178, "grad_norm": 1.700292089346711, "learning_rate": 2.2808334908367914e-06, "loss": 0.4091438949108124, "step": 5416 }, { "epoch": 1.5840035092849831, "grad_norm": 1.9132208987373394, "learning_rate": 2.277760855011094e-06, "loss": 0.5543409585952759, "step": 5417 }, { "epoch": 1.5842959497002487, "grad_norm": 1.5448108643055853, "learning_rate": 2.2746900242118487e-06, "loss": 0.44680702686309814, "step": 5418 }, { "epoch": 1.584588390115514, "grad_norm": 1.812422444695138, "learning_rate": 2.271620999156837e-06, "loss": 0.604156494140625, "step": 5419 }, { "epoch": 1.5848808305307793, "grad_norm": 1.7746704953171426, "learning_rate": 2.268553780563427e-06, "loss": 0.6055774688720703, "step": 5420 }, { "epoch": 1.5851732709460449, "grad_norm": 1.6413153541100303, "learning_rate": 2.265488369148563e-06, "loss": 0.5826502442359924, "step": 5421 }, { "epoch": 1.58546571136131, "grad_norm": 1.6438604610732335, "learning_rate": 2.2624247656287658e-06, "loss": 0.61782306432724, "step": 5422 }, { "epoch": 1.5857581517765755, "grad_norm": 1.6412325546038886, "learning_rate": 2.2593629707201348e-06, "loss": 0.5561526417732239, "step": 5423 }, { "epoch": 1.5860505921918409, "grad_norm": 1.934339107757701, "learning_rate": 2.2563029851383447e-06, "loss": 0.6122138500213623, "step": 5424 }, { "epoch": 1.5863430326071062, "grad_norm": 1.7721974769204, "learning_rate": 2.2532448095986504e-06, "loss": 0.5694067478179932, "step": 5425 }, { "epoch": 1.5866354730223717, "grad_norm": 2.0424311158796145, "learning_rate": 2.2501884448158804e-06, "loss": 0.5243874788284302, "step": 5426 }, { "epoch": 1.586927913437637, "grad_norm": 1.8166715080001115, "learning_rate": 2.2471338915044414e-06, "loss": 0.5144485831260681, "step": 5427 }, { "epoch": 1.5872203538529024, "grad_norm": 1.853424108367526, "learning_rate": 2.244081150378318e-06, "loss": 0.5013881325721741, "step": 5428 }, { "epoch": 1.587512794268168, "grad_norm": 1.7554305935150418, "learning_rate": 2.2410302221510704e-06, "loss": 0.45199382305145264, "step": 5429 }, { "epoch": 1.5878052346834333, "grad_norm": 1.7321007114143003, "learning_rate": 2.2379811075358315e-06, "loss": 0.4699060022830963, "step": 5430 }, { "epoch": 1.5880976750986986, "grad_norm": 1.6542253790144112, "learning_rate": 2.234933807245314e-06, "loss": 0.6530928611755371, "step": 5431 }, { "epoch": 1.5883901155139641, "grad_norm": 2.1734435533671337, "learning_rate": 2.2318883219918075e-06, "loss": 0.653563380241394, "step": 5432 }, { "epoch": 1.5886825559292295, "grad_norm": 1.6977334736027891, "learning_rate": 2.2288446524871743e-06, "loss": 0.5283595323562622, "step": 5433 }, { "epoch": 1.5889749963444948, "grad_norm": 1.8947978942641126, "learning_rate": 2.2258027994428543e-06, "loss": 0.4382442831993103, "step": 5434 }, { "epoch": 1.5892674367597603, "grad_norm": 1.5530659953902877, "learning_rate": 2.2227627635698624e-06, "loss": 0.427448570728302, "step": 5435 }, { "epoch": 1.5895598771750254, "grad_norm": 1.9154573086486193, "learning_rate": 2.2197245455787875e-06, "loss": 0.5794345140457153, "step": 5436 }, { "epoch": 1.589852317590291, "grad_norm": 1.7112908574959096, "learning_rate": 2.2166881461797953e-06, "loss": 0.4996277987957001, "step": 5437 }, { "epoch": 1.5901447580055563, "grad_norm": 1.6191576283665394, "learning_rate": 2.213653566082625e-06, "loss": 0.580248236656189, "step": 5438 }, { "epoch": 1.5904371984208217, "grad_norm": 1.7775881602556973, "learning_rate": 2.210620805996594e-06, "loss": 0.5173758864402771, "step": 5439 }, { "epoch": 1.5907296388360872, "grad_norm": 1.9899233221127093, "learning_rate": 2.2075898666305908e-06, "loss": 0.5336873531341553, "step": 5440 }, { "epoch": 1.5910220792513525, "grad_norm": 1.6076505919691177, "learning_rate": 2.204560748693074e-06, "loss": 0.44921910762786865, "step": 5441 }, { "epoch": 1.5913145196666179, "grad_norm": 1.6059480320951056, "learning_rate": 2.201533452892086e-06, "loss": 0.46475526690483093, "step": 5442 }, { "epoch": 1.5916069600818834, "grad_norm": 1.9029311381102771, "learning_rate": 2.1985079799352383e-06, "loss": 0.6213991045951843, "step": 5443 }, { "epoch": 1.5918994004971487, "grad_norm": 1.7215123021526133, "learning_rate": 2.1954843305297138e-06, "loss": 0.5271334648132324, "step": 5444 }, { "epoch": 1.592191840912414, "grad_norm": 2.0224569757299333, "learning_rate": 2.192462505382277e-06, "loss": 0.6957610249519348, "step": 5445 }, { "epoch": 1.5924842813276796, "grad_norm": 1.474394106198892, "learning_rate": 2.1894425051992587e-06, "loss": 0.4935681223869324, "step": 5446 }, { "epoch": 1.5927767217429447, "grad_norm": 1.8779887346615283, "learning_rate": 2.1864243306865663e-06, "loss": 0.7389976978302002, "step": 5447 }, { "epoch": 1.5930691621582103, "grad_norm": 1.6663547791548505, "learning_rate": 2.183407982549679e-06, "loss": 0.4711039662361145, "step": 5448 }, { "epoch": 1.5933616025734758, "grad_norm": 1.7966119865723598, "learning_rate": 2.180393461493654e-06, "loss": 0.5640024542808533, "step": 5449 }, { "epoch": 1.593654042988741, "grad_norm": 1.8836976245237465, "learning_rate": 2.1773807682231095e-06, "loss": 0.5471343994140625, "step": 5450 }, { "epoch": 1.5939464834040065, "grad_norm": 1.948314000978572, "learning_rate": 2.1743699034422483e-06, "loss": 0.4971361756324768, "step": 5451 }, { "epoch": 1.5942389238192718, "grad_norm": 1.816359724345797, "learning_rate": 2.1713608678548414e-06, "loss": 0.6338681578636169, "step": 5452 }, { "epoch": 1.5945313642345371, "grad_norm": 1.5944974938870278, "learning_rate": 2.168353662164233e-06, "loss": 0.5218038558959961, "step": 5453 }, { "epoch": 1.5948238046498027, "grad_norm": 1.4835669226996004, "learning_rate": 2.165348287073339e-06, "loss": 0.44414108991622925, "step": 5454 }, { "epoch": 1.595116245065068, "grad_norm": 1.741912524884312, "learning_rate": 2.162344743284647e-06, "loss": 0.5994665622711182, "step": 5455 }, { "epoch": 1.5954086854803333, "grad_norm": 1.8002543813503216, "learning_rate": 2.159343031500217e-06, "loss": 0.6745023727416992, "step": 5456 }, { "epoch": 1.5957011258955989, "grad_norm": 1.8583415901166598, "learning_rate": 2.1563431524216825e-06, "loss": 0.4678364396095276, "step": 5457 }, { "epoch": 1.5959935663108642, "grad_norm": 1.5733048792098263, "learning_rate": 2.1533451067502464e-06, "loss": 0.5792031288146973, "step": 5458 }, { "epoch": 1.5962860067261295, "grad_norm": 1.575360514250564, "learning_rate": 2.1503488951866822e-06, "loss": 0.48152512311935425, "step": 5459 }, { "epoch": 1.596578447141395, "grad_norm": 1.6753593421486697, "learning_rate": 2.147354518431339e-06, "loss": 0.4407780170440674, "step": 5460 }, { "epoch": 1.5968708875566602, "grad_norm": 1.5845476508430212, "learning_rate": 2.1443619771841308e-06, "loss": 0.41062241792678833, "step": 5461 }, { "epoch": 1.5971633279719257, "grad_norm": 1.6329985009235597, "learning_rate": 2.1413712721445478e-06, "loss": 0.4564778208732605, "step": 5462 }, { "epoch": 1.597455768387191, "grad_norm": 1.7819738842734478, "learning_rate": 2.1383824040116474e-06, "loss": 0.4347888231277466, "step": 5463 }, { "epoch": 1.5977482088024564, "grad_norm": 1.7547086253653914, "learning_rate": 2.1353953734840615e-06, "loss": 0.574216902256012, "step": 5464 }, { "epoch": 1.598040649217722, "grad_norm": 1.5449681232026575, "learning_rate": 2.1324101812599884e-06, "loss": 0.46540650725364685, "step": 5465 }, { "epoch": 1.5983330896329873, "grad_norm": 1.7330971380509632, "learning_rate": 2.129426828037201e-06, "loss": 0.5446870923042297, "step": 5466 }, { "epoch": 1.5986255300482526, "grad_norm": 1.5387720739202952, "learning_rate": 2.126445314513038e-06, "loss": 0.5442406535148621, "step": 5467 }, { "epoch": 1.5989179704635181, "grad_norm": 1.5552673745283687, "learning_rate": 2.1234656413844114e-06, "loss": 0.48960334062576294, "step": 5468 }, { "epoch": 1.5992104108787835, "grad_norm": 1.6554781479614895, "learning_rate": 2.1204878093477998e-06, "loss": 0.5053935647010803, "step": 5469 }, { "epoch": 1.5995028512940488, "grad_norm": 1.9853825289751812, "learning_rate": 2.117511819099256e-06, "loss": 0.5984711647033691, "step": 5470 }, { "epoch": 1.5997952917093143, "grad_norm": 1.7887732493049897, "learning_rate": 2.1145376713344e-06, "loss": 0.6060935258865356, "step": 5471 }, { "epoch": 1.6000877321245797, "grad_norm": 1.7731884284372257, "learning_rate": 2.111565366748416e-06, "loss": 0.5640311241149902, "step": 5472 }, { "epoch": 1.600380172539845, "grad_norm": 1.4780823569090165, "learning_rate": 2.1085949060360654e-06, "loss": 0.5127131342887878, "step": 5473 }, { "epoch": 1.6006726129551105, "grad_norm": 1.7137118890776333, "learning_rate": 2.1056262898916747e-06, "loss": 0.5630159378051758, "step": 5474 }, { "epoch": 1.6009650533703756, "grad_norm": 1.6419339983794916, "learning_rate": 2.1026595190091403e-06, "loss": 0.4511195421218872, "step": 5475 }, { "epoch": 1.6012574937856412, "grad_norm": 1.5933389134682139, "learning_rate": 2.099694594081927e-06, "loss": 0.47073638439178467, "step": 5476 }, { "epoch": 1.6015499342009065, "grad_norm": 1.7678159005173808, "learning_rate": 2.0967315158030675e-06, "loss": 0.47757452726364136, "step": 5477 }, { "epoch": 1.6018423746161718, "grad_norm": 1.612539233178663, "learning_rate": 2.093770284865164e-06, "loss": 0.4703200161457062, "step": 5478 }, { "epoch": 1.6021348150314374, "grad_norm": 1.7112390228319339, "learning_rate": 2.090810901960385e-06, "loss": 0.47457796335220337, "step": 5479 }, { "epoch": 1.6024272554467027, "grad_norm": 1.6069409002673796, "learning_rate": 2.087853367780469e-06, "loss": 0.4907105267047882, "step": 5480 }, { "epoch": 1.602719695861968, "grad_norm": 1.8859078577608002, "learning_rate": 2.0848976830167224e-06, "loss": 0.5329782962799072, "step": 5481 }, { "epoch": 1.6030121362772336, "grad_norm": 1.8407304692969428, "learning_rate": 2.0819438483600197e-06, "loss": 0.45858579874038696, "step": 5482 }, { "epoch": 1.603304576692499, "grad_norm": 1.7103287599993058, "learning_rate": 2.0789918645007977e-06, "loss": 0.47545814514160156, "step": 5483 }, { "epoch": 1.6035970171077643, "grad_norm": 1.7521375813446352, "learning_rate": 2.076041732129066e-06, "loss": 0.5482660531997681, "step": 5484 }, { "epoch": 1.6038894575230298, "grad_norm": 1.650951498750666, "learning_rate": 2.0730934519344025e-06, "loss": 0.5252633094787598, "step": 5485 }, { "epoch": 1.604181897938295, "grad_norm": 2.7727108215969882, "learning_rate": 2.0701470246059472e-06, "loss": 0.5400367379188538, "step": 5486 }, { "epoch": 1.6044743383535605, "grad_norm": 1.5423948281806983, "learning_rate": 2.0672024508324107e-06, "loss": 0.4788953363895416, "step": 5487 }, { "epoch": 1.604766778768826, "grad_norm": 1.6092306606930025, "learning_rate": 2.0642597313020685e-06, "loss": 0.5430850982666016, "step": 5488 }, { "epoch": 1.6050592191840911, "grad_norm": 1.8683302543522238, "learning_rate": 2.061318866702765e-06, "loss": 0.5833520293235779, "step": 5489 }, { "epoch": 1.6053516595993567, "grad_norm": 1.7369107165445012, "learning_rate": 2.058379857721908e-06, "loss": 0.5854958295822144, "step": 5490 }, { "epoch": 1.605644100014622, "grad_norm": 1.6603772170749127, "learning_rate": 2.0554427050464742e-06, "loss": 0.5577352643013, "step": 5491 }, { "epoch": 1.6059365404298873, "grad_norm": 1.6757677840410201, "learning_rate": 2.052507409363004e-06, "loss": 0.5328816175460815, "step": 5492 }, { "epoch": 1.6062289808451529, "grad_norm": 1.7643397031335737, "learning_rate": 2.0495739713576046e-06, "loss": 0.5606744289398193, "step": 5493 }, { "epoch": 1.6065214212604182, "grad_norm": 1.7836115172074085, "learning_rate": 2.0466423917159526e-06, "loss": 0.541358470916748, "step": 5494 }, { "epoch": 1.6068138616756835, "grad_norm": 2.1455011977132714, "learning_rate": 2.0437126711232826e-06, "loss": 0.6578946709632874, "step": 5495 }, { "epoch": 1.607106302090949, "grad_norm": 1.9512378226148355, "learning_rate": 2.0407848102644002e-06, "loss": 0.5967978239059448, "step": 5496 }, { "epoch": 1.6073987425062144, "grad_norm": 1.623105883994405, "learning_rate": 2.037858809823675e-06, "loss": 0.46947693824768066, "step": 5497 }, { "epoch": 1.6076911829214797, "grad_norm": 1.5763151196056784, "learning_rate": 2.0349346704850436e-06, "loss": 0.5014760494232178, "step": 5498 }, { "epoch": 1.6079836233367453, "grad_norm": 1.5417734514532708, "learning_rate": 2.0320123929320033e-06, "loss": 0.4399675726890564, "step": 5499 }, { "epoch": 1.6082760637520104, "grad_norm": 1.8719036359624468, "learning_rate": 2.0290919778476214e-06, "loss": 0.4729107618331909, "step": 5500 }, { "epoch": 1.608568504167276, "grad_norm": 1.5894079730285777, "learning_rate": 2.0261734259145248e-06, "loss": 0.5669134259223938, "step": 5501 }, { "epoch": 1.6088609445825413, "grad_norm": 1.554035864612711, "learning_rate": 2.0232567378149082e-06, "loss": 0.4200817942619324, "step": 5502 }, { "epoch": 1.6091533849978066, "grad_norm": 1.8154865090092227, "learning_rate": 2.0203419142305303e-06, "loss": 0.6057849526405334, "step": 5503 }, { "epoch": 1.6094458254130721, "grad_norm": 1.7156552575659618, "learning_rate": 2.017428955842713e-06, "loss": 0.5644170045852661, "step": 5504 }, { "epoch": 1.6097382658283375, "grad_norm": 1.9102243104698693, "learning_rate": 2.014517863332345e-06, "loss": 0.6368730068206787, "step": 5505 }, { "epoch": 1.6100307062436028, "grad_norm": 1.5712918255487374, "learning_rate": 2.0116086373798704e-06, "loss": 0.4829355478286743, "step": 5506 }, { "epoch": 1.6103231466588683, "grad_norm": 1.642541904242283, "learning_rate": 2.0087012786653072e-06, "loss": 0.5604796409606934, "step": 5507 }, { "epoch": 1.6106155870741337, "grad_norm": 1.8591393596163848, "learning_rate": 2.005795787868232e-06, "loss": 0.5594274997711182, "step": 5508 }, { "epoch": 1.610908027489399, "grad_norm": 1.607362999733334, "learning_rate": 2.0028921656677857e-06, "loss": 0.5553449988365173, "step": 5509 }, { "epoch": 1.6112004679046645, "grad_norm": 1.7968941470299316, "learning_rate": 1.999990412742673e-06, "loss": 0.5056631565093994, "step": 5510 }, { "epoch": 1.6114929083199299, "grad_norm": 1.5654499452702673, "learning_rate": 1.9970905297711606e-06, "loss": 0.432037353515625, "step": 5511 }, { "epoch": 1.6117853487351952, "grad_norm": 1.6991047972494284, "learning_rate": 1.9941925174310773e-06, "loss": 0.5152974128723145, "step": 5512 }, { "epoch": 1.6120777891504607, "grad_norm": 2.1530610582321015, "learning_rate": 1.9912963763998185e-06, "loss": 0.59015291929245, "step": 5513 }, { "epoch": 1.6123702295657258, "grad_norm": 2.024675130869183, "learning_rate": 1.9884021073543368e-06, "loss": 0.564031720161438, "step": 5514 }, { "epoch": 1.6126626699809914, "grad_norm": 1.560415326953441, "learning_rate": 1.985509710971152e-06, "loss": 0.5930228233337402, "step": 5515 }, { "epoch": 1.6129551103962567, "grad_norm": 1.6853261691368011, "learning_rate": 1.9826191879263446e-06, "loss": 0.540229082107544, "step": 5516 }, { "epoch": 1.613247550811522, "grad_norm": 1.3918983021829734, "learning_rate": 1.9797305388955547e-06, "loss": 0.5473166704177856, "step": 5517 }, { "epoch": 1.6135399912267876, "grad_norm": 1.888180196247059, "learning_rate": 1.976843764553986e-06, "loss": 0.5814535617828369, "step": 5518 }, { "epoch": 1.613832431642053, "grad_norm": 1.556089571926902, "learning_rate": 1.973958865576403e-06, "loss": 0.4892576038837433, "step": 5519 }, { "epoch": 1.6141248720573182, "grad_norm": 2.0461002845877454, "learning_rate": 1.97107584263714e-06, "loss": 0.5416869521141052, "step": 5520 }, { "epoch": 1.6144173124725838, "grad_norm": 1.9685372161480885, "learning_rate": 1.9681946964100807e-06, "loss": 0.5956105589866638, "step": 5521 }, { "epoch": 1.6147097528878491, "grad_norm": 1.7885384988170376, "learning_rate": 1.9653154275686782e-06, "loss": 0.5722565650939941, "step": 5522 }, { "epoch": 1.6150021933031145, "grad_norm": 1.7917880328936266, "learning_rate": 1.962438036785942e-06, "loss": 0.3984888195991516, "step": 5523 }, { "epoch": 1.61529463371838, "grad_norm": 1.6334267618118792, "learning_rate": 1.959562524734445e-06, "loss": 0.601211428642273, "step": 5524 }, { "epoch": 1.615587074133645, "grad_norm": 1.8080265301577823, "learning_rate": 1.9566888920863247e-06, "loss": 0.4803691506385803, "step": 5525 }, { "epoch": 1.6158795145489107, "grad_norm": 1.7017865626810558, "learning_rate": 1.9538171395132688e-06, "loss": 0.6914256811141968, "step": 5526 }, { "epoch": 1.6161719549641762, "grad_norm": 1.6511977253132817, "learning_rate": 1.950947267686536e-06, "loss": 0.49076569080352783, "step": 5527 }, { "epoch": 1.6164643953794413, "grad_norm": 1.747888743558531, "learning_rate": 1.9480792772769384e-06, "loss": 0.45781368017196655, "step": 5528 }, { "epoch": 1.6167568357947069, "grad_norm": 1.6564602036044371, "learning_rate": 1.9452131689548547e-06, "loss": 0.5257985591888428, "step": 5529 }, { "epoch": 1.6170492762099722, "grad_norm": 1.6440311298220422, "learning_rate": 1.9423489433902186e-06, "loss": 0.4170517921447754, "step": 5530 }, { "epoch": 1.6173417166252375, "grad_norm": 2.0566981290938386, "learning_rate": 1.939486601252525e-06, "loss": 0.5612319707870483, "step": 5531 }, { "epoch": 1.617634157040503, "grad_norm": 1.6971941543602107, "learning_rate": 1.93662614321083e-06, "loss": 0.4543185234069824, "step": 5532 }, { "epoch": 1.6179265974557684, "grad_norm": 1.8366998724664239, "learning_rate": 1.933767569933749e-06, "loss": 0.5506256222724915, "step": 5533 }, { "epoch": 1.6182190378710337, "grad_norm": 1.9975995427991473, "learning_rate": 1.930910882089454e-06, "loss": 0.5411139130592346, "step": 5534 }, { "epoch": 1.6185114782862993, "grad_norm": 1.5549343206880035, "learning_rate": 1.9280560803456794e-06, "loss": 0.5332196950912476, "step": 5535 }, { "epoch": 1.6188039187015646, "grad_norm": 1.6015028384804206, "learning_rate": 1.92520316536972e-06, "loss": 0.5159808993339539, "step": 5536 }, { "epoch": 1.61909635911683, "grad_norm": 1.6182793083642761, "learning_rate": 1.9223521378284227e-06, "loss": 0.5483378767967224, "step": 5537 }, { "epoch": 1.6193887995320955, "grad_norm": 1.748287896704832, "learning_rate": 1.9195029983882008e-06, "loss": 0.6451961994171143, "step": 5538 }, { "epoch": 1.6196812399473606, "grad_norm": 1.799346834276764, "learning_rate": 1.9166557477150227e-06, "loss": 0.5904289484024048, "step": 5539 }, { "epoch": 1.6199736803626261, "grad_norm": 1.7892510148499787, "learning_rate": 1.9138103864744164e-06, "loss": 0.6688845753669739, "step": 5540 }, { "epoch": 1.6202661207778914, "grad_norm": 2.050448659373089, "learning_rate": 1.910966915331467e-06, "loss": 0.6299211382865906, "step": 5541 }, { "epoch": 1.6205585611931568, "grad_norm": 1.7426964698819027, "learning_rate": 1.908125334950819e-06, "loss": 0.5502864122390747, "step": 5542 }, { "epoch": 1.6208510016084223, "grad_norm": 1.7235241145346871, "learning_rate": 1.905285645996674e-06, "loss": 0.5332865118980408, "step": 5543 }, { "epoch": 1.6211434420236877, "grad_norm": 1.6383658575716402, "learning_rate": 1.9024478491327936e-06, "loss": 0.43371304869651794, "step": 5544 }, { "epoch": 1.621435882438953, "grad_norm": 1.6217362880484214, "learning_rate": 1.8996119450224936e-06, "loss": 0.6992501616477966, "step": 5545 }, { "epoch": 1.6217283228542185, "grad_norm": 1.6128570910751827, "learning_rate": 1.8967779343286507e-06, "loss": 0.46558254957199097, "step": 5546 }, { "epoch": 1.6220207632694839, "grad_norm": 1.8944422030763228, "learning_rate": 1.8939458177136994e-06, "loss": 0.48943620920181274, "step": 5547 }, { "epoch": 1.6223132036847492, "grad_norm": 1.7707340805275773, "learning_rate": 1.8911155958396256e-06, "loss": 0.6041419506072998, "step": 5548 }, { "epoch": 1.6226056441000147, "grad_norm": 1.9134744412177116, "learning_rate": 1.8882872693679787e-06, "loss": 0.5695589780807495, "step": 5549 }, { "epoch": 1.62289808451528, "grad_norm": 1.5970321508292495, "learning_rate": 1.8854608389598616e-06, "loss": 0.45147764682769775, "step": 5550 }, { "epoch": 1.6231905249305454, "grad_norm": 1.6145559649024994, "learning_rate": 1.8826363052759367e-06, "loss": 0.45560893416404724, "step": 5551 }, { "epoch": 1.623482965345811, "grad_norm": 2.0039561405471855, "learning_rate": 1.8798136689764213e-06, "loss": 0.5714661478996277, "step": 5552 }, { "epoch": 1.623775405761076, "grad_norm": 1.963169578207157, "learning_rate": 1.8769929307210889e-06, "loss": 0.6074325442314148, "step": 5553 }, { "epoch": 1.6240678461763416, "grad_norm": 1.8912554250379197, "learning_rate": 1.8741740911692708e-06, "loss": 0.5406322479248047, "step": 5554 }, { "epoch": 1.624360286591607, "grad_norm": 1.6201303288999382, "learning_rate": 1.8713571509798524e-06, "loss": 0.6098664999008179, "step": 5555 }, { "epoch": 1.6246527270068722, "grad_norm": 1.5939995677707521, "learning_rate": 1.8685421108112778e-06, "loss": 0.4424859881401062, "step": 5556 }, { "epoch": 1.6249451674221378, "grad_norm": 1.939602482231334, "learning_rate": 1.8657289713215442e-06, "loss": 0.5893913507461548, "step": 5557 }, { "epoch": 1.6252376078374031, "grad_norm": 1.7158163961672592, "learning_rate": 1.862917733168208e-06, "loss": 0.5462610125541687, "step": 5558 }, { "epoch": 1.6255300482526684, "grad_norm": 1.6886650692922842, "learning_rate": 1.8601083970083766e-06, "loss": 0.6044303178787231, "step": 5559 }, { "epoch": 1.625822488667934, "grad_norm": 1.9164005939081288, "learning_rate": 1.857300963498715e-06, "loss": 0.4110264778137207, "step": 5560 }, { "epoch": 1.6261149290831993, "grad_norm": 1.863152431496286, "learning_rate": 1.8544954332954445e-06, "loss": 0.5640783309936523, "step": 5561 }, { "epoch": 1.6264073694984647, "grad_norm": 1.7604141548514003, "learning_rate": 1.851691807054342e-06, "loss": 0.43247851729393005, "step": 5562 }, { "epoch": 1.6266998099137302, "grad_norm": 1.5534888737518595, "learning_rate": 1.8488900854307367e-06, "loss": 0.4909735918045044, "step": 5563 }, { "epoch": 1.6269922503289953, "grad_norm": 2.0368143734269983, "learning_rate": 1.8460902690795135e-06, "loss": 0.5705426335334778, "step": 5564 }, { "epoch": 1.6272846907442609, "grad_norm": 1.7974229709801126, "learning_rate": 1.8432923586551144e-06, "loss": 0.6455575823783875, "step": 5565 }, { "epoch": 1.6275771311595264, "grad_norm": 1.5854644304225498, "learning_rate": 1.8404963548115318e-06, "loss": 0.4156351089477539, "step": 5566 }, { "epoch": 1.6278695715747915, "grad_norm": 1.5858505218040218, "learning_rate": 1.8377022582023163e-06, "loss": 0.5497896075248718, "step": 5567 }, { "epoch": 1.628162011990057, "grad_norm": 1.5247094519796704, "learning_rate": 1.8349100694805711e-06, "loss": 0.5237758159637451, "step": 5568 }, { "epoch": 1.6284544524053224, "grad_norm": 1.8340347816856337, "learning_rate": 1.832119789298954e-06, "loss": 0.5140771865844727, "step": 5569 }, { "epoch": 1.6287468928205877, "grad_norm": 2.04051717357991, "learning_rate": 1.8293314183096721e-06, "loss": 0.5942349433898926, "step": 5570 }, { "epoch": 1.6290393332358533, "grad_norm": 1.7868670881272706, "learning_rate": 1.8265449571644933e-06, "loss": 0.6316613554954529, "step": 5571 }, { "epoch": 1.6293317736511186, "grad_norm": 1.7168155291178147, "learning_rate": 1.823760406514735e-06, "loss": 0.4789954423904419, "step": 5572 }, { "epoch": 1.629624214066384, "grad_norm": 1.67674259516067, "learning_rate": 1.8209777670112706e-06, "loss": 0.596744179725647, "step": 5573 }, { "epoch": 1.6299166544816495, "grad_norm": 1.7162317239554103, "learning_rate": 1.8181970393045223e-06, "loss": 0.5785890817642212, "step": 5574 }, { "epoch": 1.6302090948969148, "grad_norm": 1.72408279785472, "learning_rate": 1.8154182240444706e-06, "loss": 0.5399461388587952, "step": 5575 }, { "epoch": 1.6305015353121801, "grad_norm": 1.789842505433769, "learning_rate": 1.812641321880645e-06, "loss": 0.5251961946487427, "step": 5576 }, { "epoch": 1.6307939757274457, "grad_norm": 1.5658430659550284, "learning_rate": 1.8098663334621314e-06, "loss": 0.6094855070114136, "step": 5577 }, { "epoch": 1.6310864161427108, "grad_norm": 1.7839781318616403, "learning_rate": 1.8070932594375656e-06, "loss": 0.5586157441139221, "step": 5578 }, { "epoch": 1.6313788565579763, "grad_norm": 1.6074136925381057, "learning_rate": 1.804322100455136e-06, "loss": 0.5572035312652588, "step": 5579 }, { "epoch": 1.6316712969732416, "grad_norm": 1.8419935059375991, "learning_rate": 1.801552857162585e-06, "loss": 0.5567929148674011, "step": 5580 }, { "epoch": 1.631963737388507, "grad_norm": 1.6633256712541593, "learning_rate": 1.79878553020721e-06, "loss": 0.4823629558086395, "step": 5581 }, { "epoch": 1.6322561778037725, "grad_norm": 1.6367496398860508, "learning_rate": 1.7960201202358495e-06, "loss": 0.52935791015625, "step": 5582 }, { "epoch": 1.6325486182190379, "grad_norm": 1.612437469487566, "learning_rate": 1.7932566278949049e-06, "loss": 0.5486055016517639, "step": 5583 }, { "epoch": 1.6328410586343032, "grad_norm": 1.638977663987494, "learning_rate": 1.7904950538303256e-06, "loss": 0.5606030225753784, "step": 5584 }, { "epoch": 1.6331334990495687, "grad_norm": 1.5693957426770746, "learning_rate": 1.7877353986876134e-06, "loss": 0.5394873023033142, "step": 5585 }, { "epoch": 1.633425939464834, "grad_norm": 1.7113121312436326, "learning_rate": 1.7849776631118198e-06, "loss": 0.6015416383743286, "step": 5586 }, { "epoch": 1.6337183798800994, "grad_norm": 1.308708455891742, "learning_rate": 1.7822218477475496e-06, "loss": 0.3476119041442871, "step": 5587 }, { "epoch": 1.634010820295365, "grad_norm": 1.6872606261874499, "learning_rate": 1.7794679532389569e-06, "loss": 0.43062901496887207, "step": 5588 }, { "epoch": 1.6343032607106303, "grad_norm": 1.5715829289628913, "learning_rate": 1.7767159802297497e-06, "loss": 0.5267136096954346, "step": 5589 }, { "epoch": 1.6345957011258956, "grad_norm": 1.5843839322860915, "learning_rate": 1.7739659293631828e-06, "loss": 0.40477365255355835, "step": 5590 }, { "epoch": 1.6348881415411611, "grad_norm": 1.5464703907052304, "learning_rate": 1.7712178012820657e-06, "loss": 0.5166594386100769, "step": 5591 }, { "epoch": 1.6351805819564262, "grad_norm": 1.8725681057880097, "learning_rate": 1.768471596628757e-06, "loss": 0.577332615852356, "step": 5592 }, { "epoch": 1.6354730223716918, "grad_norm": 1.9361068668488919, "learning_rate": 1.7657273160451626e-06, "loss": 0.6265558004379272, "step": 5593 }, { "epoch": 1.6357654627869571, "grad_norm": 1.359788014623014, "learning_rate": 1.7629849601727422e-06, "loss": 0.46483689546585083, "step": 5594 }, { "epoch": 1.6360579032022224, "grad_norm": 1.763922790196176, "learning_rate": 1.760244529652504e-06, "loss": 0.5217114090919495, "step": 5595 }, { "epoch": 1.636350343617488, "grad_norm": 1.6050490395737056, "learning_rate": 1.7575060251250098e-06, "loss": 0.40754032135009766, "step": 5596 }, { "epoch": 1.6366427840327533, "grad_norm": 1.8321306870013994, "learning_rate": 1.7547694472303677e-06, "loss": 0.5153856873512268, "step": 5597 }, { "epoch": 1.6369352244480186, "grad_norm": 1.7719174136737381, "learning_rate": 1.7520347966082352e-06, "loss": 0.47374534606933594, "step": 5598 }, { "epoch": 1.6372276648632842, "grad_norm": 1.85303064846871, "learning_rate": 1.7493020738978205e-06, "loss": 0.375232070684433, "step": 5599 }, { "epoch": 1.6375201052785495, "grad_norm": 1.6844665277996391, "learning_rate": 1.746571279737884e-06, "loss": 0.5731218457221985, "step": 5600 }, { "epoch": 1.6378125456938148, "grad_norm": 1.7604017420749336, "learning_rate": 1.7438424147667267e-06, "loss": 0.4908478260040283, "step": 5601 }, { "epoch": 1.6381049861090804, "grad_norm": 1.759771030770569, "learning_rate": 1.741115479622205e-06, "loss": 0.6114420890808105, "step": 5602 }, { "epoch": 1.6383974265243455, "grad_norm": 1.6860969538693165, "learning_rate": 1.738390474941727e-06, "loss": 0.6207842826843262, "step": 5603 }, { "epoch": 1.638689866939611, "grad_norm": 1.8627303036453442, "learning_rate": 1.7356674013622431e-06, "loss": 0.4745057225227356, "step": 5604 }, { "epoch": 1.6389823073548766, "grad_norm": 1.7448279838579288, "learning_rate": 1.7329462595202573e-06, "loss": 0.5501791834831238, "step": 5605 }, { "epoch": 1.6392747477701417, "grad_norm": 1.5723514930569527, "learning_rate": 1.7302270500518181e-06, "loss": 0.5497169494628906, "step": 5606 }, { "epoch": 1.6395671881854073, "grad_norm": 1.5582550082102102, "learning_rate": 1.7275097735925239e-06, "loss": 0.4439499080181122, "step": 5607 }, { "epoch": 1.6398596286006726, "grad_norm": 1.5421642594165323, "learning_rate": 1.7247944307775245e-06, "loss": 0.5869239568710327, "step": 5608 }, { "epoch": 1.640152069015938, "grad_norm": 1.8811101308859866, "learning_rate": 1.722081022241512e-06, "loss": 0.6979252099990845, "step": 5609 }, { "epoch": 1.6404445094312035, "grad_norm": 1.6052357503320651, "learning_rate": 1.719369548618729e-06, "loss": 0.43291550874710083, "step": 5610 }, { "epoch": 1.6407369498464688, "grad_norm": 1.9414841639869573, "learning_rate": 1.7166600105429676e-06, "loss": 0.5670255422592163, "step": 5611 }, { "epoch": 1.6410293902617341, "grad_norm": 1.8236286685742322, "learning_rate": 1.7139524086475679e-06, "loss": 0.5956759452819824, "step": 5612 }, { "epoch": 1.6413218306769997, "grad_norm": 1.8379778243010318, "learning_rate": 1.71124674356541e-06, "loss": 0.624202311038971, "step": 5613 }, { "epoch": 1.641614271092265, "grad_norm": 1.546976601945301, "learning_rate": 1.7085430159289295e-06, "loss": 0.5394845604896545, "step": 5614 }, { "epoch": 1.6419067115075303, "grad_norm": 1.689395226298913, "learning_rate": 1.7058412263701063e-06, "loss": 0.5320364236831665, "step": 5615 }, { "epoch": 1.6421991519227959, "grad_norm": 1.719103506089404, "learning_rate": 1.7031413755204673e-06, "loss": 0.5889087915420532, "step": 5616 }, { "epoch": 1.642491592338061, "grad_norm": 2.1274597082343103, "learning_rate": 1.7004434640110857e-06, "loss": 0.63529372215271, "step": 5617 }, { "epoch": 1.6427840327533265, "grad_norm": 1.4153766033649497, "learning_rate": 1.6977474924725823e-06, "loss": 0.48696887493133545, "step": 5618 }, { "epoch": 1.6430764731685918, "grad_norm": 1.592412158520241, "learning_rate": 1.6950534615351234e-06, "loss": 0.5998564958572388, "step": 5619 }, { "epoch": 1.6433689135838572, "grad_norm": 2.0156836237169142, "learning_rate": 1.6923613718284237e-06, "loss": 0.5256673693656921, "step": 5620 }, { "epoch": 1.6436613539991227, "grad_norm": 1.6125826678096948, "learning_rate": 1.6896712239817425e-06, "loss": 0.4609792232513428, "step": 5621 }, { "epoch": 1.643953794414388, "grad_norm": 1.6601641389435113, "learning_rate": 1.6869830186238846e-06, "loss": 0.6816249489784241, "step": 5622 }, { "epoch": 1.6442462348296534, "grad_norm": 1.7917988570441037, "learning_rate": 1.6842967563832036e-06, "loss": 0.4622993767261505, "step": 5623 }, { "epoch": 1.644538675244919, "grad_norm": 1.7039192593388794, "learning_rate": 1.6816124378875942e-06, "loss": 0.5089092254638672, "step": 5624 }, { "epoch": 1.6448311156601843, "grad_norm": 1.804753905417491, "learning_rate": 1.6789300637645e-06, "loss": 0.49178463220596313, "step": 5625 }, { "epoch": 1.6451235560754496, "grad_norm": 1.5823189990427826, "learning_rate": 1.676249634640912e-06, "loss": 0.4943847358226776, "step": 5626 }, { "epoch": 1.6454159964907151, "grad_norm": 1.368767643177251, "learning_rate": 1.6735711511433606e-06, "loss": 0.38509243726730347, "step": 5627 }, { "epoch": 1.6457084369059805, "grad_norm": 1.8368901340386043, "learning_rate": 1.6708946138979288e-06, "loss": 0.4765651822090149, "step": 5628 }, { "epoch": 1.6460008773212458, "grad_norm": 1.9577136986762462, "learning_rate": 1.6682200235302383e-06, "loss": 0.5667406916618347, "step": 5629 }, { "epoch": 1.6462933177365113, "grad_norm": 1.7936789387136831, "learning_rate": 1.66554738066546e-06, "loss": 0.702905535697937, "step": 5630 }, { "epoch": 1.6465857581517764, "grad_norm": 1.8218045279879265, "learning_rate": 1.6628766859283064e-06, "loss": 0.5056663155555725, "step": 5631 }, { "epoch": 1.646878198567042, "grad_norm": 1.5059989898819282, "learning_rate": 1.660207939943037e-06, "loss": 0.3949700593948364, "step": 5632 }, { "epoch": 1.6471706389823073, "grad_norm": 1.8365180821647582, "learning_rate": 1.6575411433334553e-06, "loss": 0.5562522411346436, "step": 5633 }, { "epoch": 1.6474630793975726, "grad_norm": 1.853282636299631, "learning_rate": 1.6548762967229104e-06, "loss": 0.5046012997627258, "step": 5634 }, { "epoch": 1.6477555198128382, "grad_norm": 1.873405273649113, "learning_rate": 1.6522134007342894e-06, "loss": 0.510586678981781, "step": 5635 }, { "epoch": 1.6480479602281035, "grad_norm": 1.7244709678320052, "learning_rate": 1.649552455990031e-06, "loss": 0.5587502717971802, "step": 5636 }, { "epoch": 1.6483404006433688, "grad_norm": 1.4244703013642708, "learning_rate": 1.6468934631121147e-06, "loss": 0.4774302840232849, "step": 5637 }, { "epoch": 1.6486328410586344, "grad_norm": 1.7840577383362874, "learning_rate": 1.644236422722063e-06, "loss": 0.5969966650009155, "step": 5638 }, { "epoch": 1.6489252814738997, "grad_norm": 1.9166026144170052, "learning_rate": 1.6415813354409438e-06, "loss": 0.6344267129898071, "step": 5639 }, { "epoch": 1.649217721889165, "grad_norm": 1.6455866581497667, "learning_rate": 1.638928201889367e-06, "loss": 0.5252394676208496, "step": 5640 }, { "epoch": 1.6495101623044306, "grad_norm": 1.7211145798255698, "learning_rate": 1.636277022687488e-06, "loss": 0.5092496871948242, "step": 5641 }, { "epoch": 1.6498026027196957, "grad_norm": 1.797167898340461, "learning_rate": 1.633627798455002e-06, "loss": 0.6530938148498535, "step": 5642 }, { "epoch": 1.6500950431349612, "grad_norm": 1.9009398203220143, "learning_rate": 1.6309805298111492e-06, "loss": 0.5152128338813782, "step": 5643 }, { "epoch": 1.6503874835502268, "grad_norm": 1.4233567646508596, "learning_rate": 1.6283352173747148e-06, "loss": 0.4670040011405945, "step": 5644 }, { "epoch": 1.650679923965492, "grad_norm": 1.718468492793745, "learning_rate": 1.625691861764024e-06, "loss": 0.47373896837234497, "step": 5645 }, { "epoch": 1.6509723643807575, "grad_norm": 1.7609405530750961, "learning_rate": 1.6230504635969413e-06, "loss": 0.44277217984199524, "step": 5646 }, { "epoch": 1.6512648047960228, "grad_norm": 1.9781946361433098, "learning_rate": 1.6204110234908798e-06, "loss": 0.6000313758850098, "step": 5647 }, { "epoch": 1.651557245211288, "grad_norm": 1.7535547863968097, "learning_rate": 1.6177735420627939e-06, "loss": 0.5950880646705627, "step": 5648 }, { "epoch": 1.6518496856265537, "grad_norm": 1.6566653852646755, "learning_rate": 1.6151380199291767e-06, "loss": 0.6308536529541016, "step": 5649 }, { "epoch": 1.652142126041819, "grad_norm": 1.6807098070808562, "learning_rate": 1.6125044577060667e-06, "loss": 0.619708240032196, "step": 5650 }, { "epoch": 1.6524345664570843, "grad_norm": 1.5330415165514617, "learning_rate": 1.6098728560090438e-06, "loss": 0.4912105202674866, "step": 5651 }, { "epoch": 1.6527270068723499, "grad_norm": 1.6147329529882195, "learning_rate": 1.607243215453227e-06, "loss": 0.5078046917915344, "step": 5652 }, { "epoch": 1.6530194472876152, "grad_norm": 1.562057881587638, "learning_rate": 1.60461553665328e-06, "loss": 0.4845188856124878, "step": 5653 }, { "epoch": 1.6533118877028805, "grad_norm": 1.7846716530712952, "learning_rate": 1.6019898202234075e-06, "loss": 0.6323055028915405, "step": 5654 }, { "epoch": 1.653604328118146, "grad_norm": 1.6998274797888833, "learning_rate": 1.5993660667773524e-06, "loss": 0.5700039863586426, "step": 5655 }, { "epoch": 1.6538967685334112, "grad_norm": 1.567032216624363, "learning_rate": 1.596744276928406e-06, "loss": 0.5415322780609131, "step": 5656 }, { "epoch": 1.6541892089486767, "grad_norm": 1.5705725451812174, "learning_rate": 1.5941244512893894e-06, "loss": 0.47339457273483276, "step": 5657 }, { "epoch": 1.654481649363942, "grad_norm": 1.7467509306260278, "learning_rate": 1.5915065904726735e-06, "loss": 0.5391967296600342, "step": 5658 }, { "epoch": 1.6547740897792074, "grad_norm": 1.4866752002795596, "learning_rate": 1.5888906950901683e-06, "loss": 0.4832335114479065, "step": 5659 }, { "epoch": 1.655066530194473, "grad_norm": 1.6779111992749078, "learning_rate": 1.5862767657533217e-06, "loss": 0.4539526104927063, "step": 5660 }, { "epoch": 1.6553589706097382, "grad_norm": 1.6528644114250834, "learning_rate": 1.583664803073125e-06, "loss": 0.5261383056640625, "step": 5661 }, { "epoch": 1.6556514110250036, "grad_norm": 1.497445031852123, "learning_rate": 1.5810548076601096e-06, "loss": 0.44060665369033813, "step": 5662 }, { "epoch": 1.6559438514402691, "grad_norm": 1.5492116356252563, "learning_rate": 1.578446780124344e-06, "loss": 0.4202715754508972, "step": 5663 }, { "epoch": 1.6562362918555344, "grad_norm": 1.6539664834530166, "learning_rate": 1.57584072107544e-06, "loss": 0.4736124873161316, "step": 5664 }, { "epoch": 1.6565287322707998, "grad_norm": 1.71312353367257, "learning_rate": 1.5732366311225466e-06, "loss": 0.46696585416793823, "step": 5665 }, { "epoch": 1.6568211726860653, "grad_norm": 1.9879711761174368, "learning_rate": 1.570634510874356e-06, "loss": 0.566236138343811, "step": 5666 }, { "epoch": 1.6571136131013307, "grad_norm": 1.8521270783851422, "learning_rate": 1.568034360939098e-06, "loss": 0.5486587882041931, "step": 5667 }, { "epoch": 1.657406053516596, "grad_norm": 1.7248593346342498, "learning_rate": 1.5654361819245423e-06, "loss": 0.49735748767852783, "step": 5668 }, { "epoch": 1.6576984939318615, "grad_norm": 1.7146086057727925, "learning_rate": 1.562839974437993e-06, "loss": 0.6306062340736389, "step": 5669 }, { "epoch": 1.6579909343471266, "grad_norm": 1.524921725608077, "learning_rate": 1.5602457390863e-06, "loss": 0.5062750577926636, "step": 5670 }, { "epoch": 1.6582833747623922, "grad_norm": 1.511094812843301, "learning_rate": 1.5576534764758522e-06, "loss": 0.5037271976470947, "step": 5671 }, { "epoch": 1.6585758151776575, "grad_norm": 1.5731242321067331, "learning_rate": 1.5550631872125743e-06, "loss": 0.5749099254608154, "step": 5672 }, { "epoch": 1.6588682555929228, "grad_norm": 1.7434864310127793, "learning_rate": 1.5524748719019312e-06, "loss": 0.5241814255714417, "step": 5673 }, { "epoch": 1.6591606960081884, "grad_norm": 1.6599956977784314, "learning_rate": 1.5498885311489243e-06, "loss": 0.5410301685333252, "step": 5674 }, { "epoch": 1.6594531364234537, "grad_norm": 1.6011154372945764, "learning_rate": 1.5473041655580956e-06, "loss": 0.6363968253135681, "step": 5675 }, { "epoch": 1.659745576838719, "grad_norm": 1.7262614943682328, "learning_rate": 1.5447217757335264e-06, "loss": 0.5476758480072021, "step": 5676 }, { "epoch": 1.6600380172539846, "grad_norm": 2.1890795829733465, "learning_rate": 1.5421413622788328e-06, "loss": 0.5375553369522095, "step": 5677 }, { "epoch": 1.66033045766925, "grad_norm": 1.6882335328441256, "learning_rate": 1.53956292579717e-06, "loss": 0.6227232217788696, "step": 5678 }, { "epoch": 1.6606228980845152, "grad_norm": 1.6718724900526085, "learning_rate": 1.5369864668912327e-06, "loss": 0.5981070399284363, "step": 5679 }, { "epoch": 1.6609153384997808, "grad_norm": 1.6935493861773532, "learning_rate": 1.5344119861632535e-06, "loss": 0.5535466074943542, "step": 5680 }, { "epoch": 1.661207778915046, "grad_norm": 1.5610811166933365, "learning_rate": 1.5318394842150009e-06, "loss": 0.5562780499458313, "step": 5681 }, { "epoch": 1.6615002193303114, "grad_norm": 2.8725866349660683, "learning_rate": 1.5292689616477808e-06, "loss": 0.5937552452087402, "step": 5682 }, { "epoch": 1.661792659745577, "grad_norm": 1.578580766715999, "learning_rate": 1.526700419062439e-06, "loss": 0.5154576301574707, "step": 5683 }, { "epoch": 1.662085100160842, "grad_norm": 1.691867883182686, "learning_rate": 1.5241338570593557e-06, "loss": 0.6887973546981812, "step": 5684 }, { "epoch": 1.6623775405761076, "grad_norm": 1.6575847736482525, "learning_rate": 1.5215692762384481e-06, "loss": 0.5365385413169861, "step": 5685 }, { "epoch": 1.662669980991373, "grad_norm": 1.8209681086307343, "learning_rate": 1.519006677199173e-06, "loss": 0.5906165838241577, "step": 5686 }, { "epoch": 1.6629624214066383, "grad_norm": 1.5861180854035228, "learning_rate": 1.5164460605405252e-06, "loss": 0.5752634406089783, "step": 5687 }, { "epoch": 1.6632548618219039, "grad_norm": 1.5319119666926662, "learning_rate": 1.5138874268610259e-06, "loss": 0.6265667676925659, "step": 5688 }, { "epoch": 1.6635473022371692, "grad_norm": 1.9588164368468703, "learning_rate": 1.5113307767587449e-06, "loss": 0.5032769441604614, "step": 5689 }, { "epoch": 1.6638397426524345, "grad_norm": 1.799708728422752, "learning_rate": 1.5087761108312837e-06, "loss": 0.5740037560462952, "step": 5690 }, { "epoch": 1.6641321830677, "grad_norm": 1.7236751091410876, "learning_rate": 1.5062234296757782e-06, "loss": 0.5745523571968079, "step": 5691 }, { "epoch": 1.6644246234829654, "grad_norm": 1.4898534105858117, "learning_rate": 1.5036727338889035e-06, "loss": 0.4448510408401489, "step": 5692 }, { "epoch": 1.6647170638982307, "grad_norm": 1.7482222251428332, "learning_rate": 1.5011240240668678e-06, "loss": 0.5142196416854858, "step": 5693 }, { "epoch": 1.6650095043134963, "grad_norm": 1.484675519827308, "learning_rate": 1.4985773008054184e-06, "loss": 0.3317479193210602, "step": 5694 }, { "epoch": 1.6653019447287614, "grad_norm": 1.9471413048062423, "learning_rate": 1.4960325646998353e-06, "loss": 0.5721619129180908, "step": 5695 }, { "epoch": 1.665594385144027, "grad_norm": 1.7546330993452042, "learning_rate": 1.4934898163449341e-06, "loss": 0.4937021732330322, "step": 5696 }, { "epoch": 1.6658868255592922, "grad_norm": 1.9665727915679332, "learning_rate": 1.4909490563350694e-06, "loss": 0.6414870023727417, "step": 5697 }, { "epoch": 1.6661792659745576, "grad_norm": 2.144217430639584, "learning_rate": 1.4884102852641258e-06, "loss": 0.6265281438827515, "step": 5698 }, { "epoch": 1.6664717063898231, "grad_norm": 1.9171094003176723, "learning_rate": 1.48587350372553e-06, "loss": 0.5908917784690857, "step": 5699 }, { "epoch": 1.6667641468050884, "grad_norm": 1.5794126433874063, "learning_rate": 1.4833387123122334e-06, "loss": 0.6098382472991943, "step": 5700 }, { "epoch": 1.6670565872203538, "grad_norm": 1.7930907914682574, "learning_rate": 1.4808059116167306e-06, "loss": 0.5106536746025085, "step": 5701 }, { "epoch": 1.6673490276356193, "grad_norm": 1.9054848074745216, "learning_rate": 1.4782751022310481e-06, "loss": 0.5548620820045471, "step": 5702 }, { "epoch": 1.6676414680508846, "grad_norm": 1.5332665904029121, "learning_rate": 1.4757462847467475e-06, "loss": 0.4596245288848877, "step": 5703 }, { "epoch": 1.66793390846615, "grad_norm": 1.98056012220508, "learning_rate": 1.4732194597549244e-06, "loss": 0.6000612378120422, "step": 5704 }, { "epoch": 1.6682263488814155, "grad_norm": 1.53003821533968, "learning_rate": 1.4706946278462097e-06, "loss": 0.5522277355194092, "step": 5705 }, { "epoch": 1.6685187892966808, "grad_norm": 1.741978737228361, "learning_rate": 1.468171789610766e-06, "loss": 0.4765724837779999, "step": 5706 }, { "epoch": 1.6688112297119462, "grad_norm": 1.737640693413614, "learning_rate": 1.4656509456382927e-06, "loss": 0.564188539981842, "step": 5707 }, { "epoch": 1.6691036701272117, "grad_norm": 1.8586603218062736, "learning_rate": 1.4631320965180208e-06, "loss": 0.4910390377044678, "step": 5708 }, { "epoch": 1.6693961105424768, "grad_norm": 1.8232002468100077, "learning_rate": 1.4606152428387166e-06, "loss": 0.5992041826248169, "step": 5709 }, { "epoch": 1.6696885509577424, "grad_norm": 1.7676126822410316, "learning_rate": 1.4581003851886811e-06, "loss": 0.4873291850090027, "step": 5710 }, { "epoch": 1.6699809913730077, "grad_norm": 1.7366762629360202, "learning_rate": 1.4555875241557426e-06, "loss": 0.6487013101577759, "step": 5711 }, { "epoch": 1.670273431788273, "grad_norm": 1.5436242943423213, "learning_rate": 1.4530766603272695e-06, "loss": 0.4624609351158142, "step": 5712 }, { "epoch": 1.6705658722035386, "grad_norm": 1.8279691880026145, "learning_rate": 1.4505677942901609e-06, "loss": 0.5765592455863953, "step": 5713 }, { "epoch": 1.670858312618804, "grad_norm": 1.6171369345434061, "learning_rate": 1.4480609266308488e-06, "loss": 0.6730339527130127, "step": 5714 }, { "epoch": 1.6711507530340692, "grad_norm": 1.7616383708984635, "learning_rate": 1.445556057935299e-06, "loss": 0.6381770372390747, "step": 5715 }, { "epoch": 1.6714431934493348, "grad_norm": 1.8270298461203718, "learning_rate": 1.4430531887890076e-06, "loss": 0.6236029863357544, "step": 5716 }, { "epoch": 1.6717356338646001, "grad_norm": 1.8837930420569144, "learning_rate": 1.4405523197770076e-06, "loss": 0.521639347076416, "step": 5717 }, { "epoch": 1.6720280742798654, "grad_norm": 1.651464369232987, "learning_rate": 1.4380534514838596e-06, "loss": 0.5912468433380127, "step": 5718 }, { "epoch": 1.672320514695131, "grad_norm": 1.7062190862435904, "learning_rate": 1.4355565844936602e-06, "loss": 0.5533329248428345, "step": 5719 }, { "epoch": 1.672612955110396, "grad_norm": 1.8368834191923704, "learning_rate": 1.4330617193900365e-06, "loss": 0.5901006460189819, "step": 5720 }, { "epoch": 1.6729053955256616, "grad_norm": 1.7501848609248272, "learning_rate": 1.4305688567561503e-06, "loss": 0.5083344578742981, "step": 5721 }, { "epoch": 1.6731978359409272, "grad_norm": 1.359271864269329, "learning_rate": 1.4280779971746894e-06, "loss": 0.4443317651748657, "step": 5722 }, { "epoch": 1.6734902763561923, "grad_norm": 1.931591797384203, "learning_rate": 1.4255891412278778e-06, "loss": 0.6355078220367432, "step": 5723 }, { "epoch": 1.6737827167714578, "grad_norm": 1.751894451134603, "learning_rate": 1.423102289497471e-06, "loss": 0.5403381586074829, "step": 5724 }, { "epoch": 1.6740751571867232, "grad_norm": 1.7703719298346523, "learning_rate": 1.4206174425647556e-06, "loss": 0.5272151231765747, "step": 5725 }, { "epoch": 1.6743675976019885, "grad_norm": 1.6453865684399975, "learning_rate": 1.41813460101055e-06, "loss": 0.6750346422195435, "step": 5726 }, { "epoch": 1.674660038017254, "grad_norm": 1.7471006401236766, "learning_rate": 1.4156537654152026e-06, "loss": 0.552655816078186, "step": 5727 }, { "epoch": 1.6749524784325194, "grad_norm": 1.5245393452927156, "learning_rate": 1.4131749363585933e-06, "loss": 0.3947732448577881, "step": 5728 }, { "epoch": 1.6752449188477847, "grad_norm": 1.4115886314708204, "learning_rate": 1.4106981144201337e-06, "loss": 0.4910270571708679, "step": 5729 }, { "epoch": 1.6755373592630503, "grad_norm": 1.9268335112778272, "learning_rate": 1.408223300178767e-06, "loss": 0.5717943906784058, "step": 5730 }, { "epoch": 1.6758297996783156, "grad_norm": 1.6782914146067396, "learning_rate": 1.4057504942129652e-06, "loss": 0.4993055462837219, "step": 5731 }, { "epoch": 1.676122240093581, "grad_norm": 1.851203153701759, "learning_rate": 1.4032796971007322e-06, "loss": 0.4772619605064392, "step": 5732 }, { "epoch": 1.6764146805088465, "grad_norm": 1.6250346930838577, "learning_rate": 1.400810909419601e-06, "loss": 0.3824518322944641, "step": 5733 }, { "epoch": 1.6767071209241116, "grad_norm": 1.815105841906862, "learning_rate": 1.398344131746634e-06, "loss": 0.5302368402481079, "step": 5734 }, { "epoch": 1.676999561339377, "grad_norm": 1.738439871277175, "learning_rate": 1.3958793646584279e-06, "loss": 0.5776697397232056, "step": 5735 }, { "epoch": 1.6772920017546424, "grad_norm": 1.7943869884408015, "learning_rate": 1.3934166087311063e-06, "loss": 0.53890061378479, "step": 5736 }, { "epoch": 1.6775844421699078, "grad_norm": 1.71821325954837, "learning_rate": 1.3909558645403243e-06, "loss": 0.47210827469825745, "step": 5737 }, { "epoch": 1.6778768825851733, "grad_norm": 1.8689211559459666, "learning_rate": 1.388497132661264e-06, "loss": 0.6020913124084473, "step": 5738 }, { "epoch": 1.6781693230004386, "grad_norm": 1.8612626444994878, "learning_rate": 1.3860404136686411e-06, "loss": 0.4244590997695923, "step": 5739 }, { "epoch": 1.678461763415704, "grad_norm": 1.8281285744352933, "learning_rate": 1.3835857081366965e-06, "loss": 0.5969624519348145, "step": 5740 }, { "epoch": 1.6787542038309695, "grad_norm": 1.774375495611947, "learning_rate": 1.3811330166392057e-06, "loss": 0.6573030352592468, "step": 5741 }, { "epoch": 1.6790466442462348, "grad_norm": 1.641770470616675, "learning_rate": 1.3786823397494675e-06, "loss": 0.4251132905483246, "step": 5742 }, { "epoch": 1.6793390846615002, "grad_norm": 1.504067842347657, "learning_rate": 1.3762336780403163e-06, "loss": 0.5555700659751892, "step": 5743 }, { "epoch": 1.6796315250767657, "grad_norm": 2.1042602241156128, "learning_rate": 1.3737870320841073e-06, "loss": 0.5651364326477051, "step": 5744 }, { "epoch": 1.679923965492031, "grad_norm": 1.7135873981198582, "learning_rate": 1.371342402452731e-06, "loss": 0.6283698678016663, "step": 5745 }, { "epoch": 1.6802164059072964, "grad_norm": 1.4720135811876174, "learning_rate": 1.3688997897176037e-06, "loss": 0.47864413261413574, "step": 5746 }, { "epoch": 1.680508846322562, "grad_norm": 1.9268157997034314, "learning_rate": 1.366459194449674e-06, "loss": 0.6254131197929382, "step": 5747 }, { "epoch": 1.680801286737827, "grad_norm": 1.4626288934383243, "learning_rate": 1.364020617219415e-06, "loss": 0.35147637128829956, "step": 5748 }, { "epoch": 1.6810937271530926, "grad_norm": 1.9627536321629568, "learning_rate": 1.3615840585968287e-06, "loss": 0.6126410961151123, "step": 5749 }, { "epoch": 1.681386167568358, "grad_norm": 1.7732443093164585, "learning_rate": 1.359149519151447e-06, "loss": 0.5807974338531494, "step": 5750 }, { "epoch": 1.6816786079836232, "grad_norm": 1.5024396656291241, "learning_rate": 1.3567169994523277e-06, "loss": 0.5033349990844727, "step": 5751 }, { "epoch": 1.6819710483988888, "grad_norm": 1.5694630419560385, "learning_rate": 1.3542865000680604e-06, "loss": 0.47656023502349854, "step": 5752 }, { "epoch": 1.682263488814154, "grad_norm": 1.6495545571688441, "learning_rate": 1.3518580215667542e-06, "loss": 0.5137293338775635, "step": 5753 }, { "epoch": 1.6825559292294194, "grad_norm": 1.5950800812601422, "learning_rate": 1.3494315645160539e-06, "loss": 0.4636800289154053, "step": 5754 }, { "epoch": 1.682848369644685, "grad_norm": 1.937366310273075, "learning_rate": 1.3470071294831289e-06, "loss": 0.5825523138046265, "step": 5755 }, { "epoch": 1.6831408100599503, "grad_norm": 1.798274160020649, "learning_rate": 1.344584717034677e-06, "loss": 0.49282288551330566, "step": 5756 }, { "epoch": 1.6834332504752156, "grad_norm": 1.6393172330125654, "learning_rate": 1.3421643277369211e-06, "loss": 0.5551935434341431, "step": 5757 }, { "epoch": 1.6837256908904812, "grad_norm": 1.7844394306187494, "learning_rate": 1.339745962155613e-06, "loss": 0.6423832178115845, "step": 5758 }, { "epoch": 1.6840181313057463, "grad_norm": 1.7061841923170233, "learning_rate": 1.3373296208560316e-06, "loss": 0.6178075671195984, "step": 5759 }, { "epoch": 1.6843105717210118, "grad_norm": 1.7099687312543272, "learning_rate": 1.3349153044029816e-06, "loss": 0.6781176328659058, "step": 5760 }, { "epoch": 1.6846030121362774, "grad_norm": 1.837996192806761, "learning_rate": 1.332503013360794e-06, "loss": 0.6511910557746887, "step": 5761 }, { "epoch": 1.6848954525515425, "grad_norm": 1.3861294384859772, "learning_rate": 1.3300927482933279e-06, "loss": 0.4980696141719818, "step": 5762 }, { "epoch": 1.685187892966808, "grad_norm": 1.6100202697936232, "learning_rate": 1.3276845097639702e-06, "loss": 0.49176928400993347, "step": 5763 }, { "epoch": 1.6854803333820734, "grad_norm": 1.893755486996651, "learning_rate": 1.3252782983356272e-06, "loss": 0.5198799967765808, "step": 5764 }, { "epoch": 1.6857727737973387, "grad_norm": 1.4629269004624288, "learning_rate": 1.322874114570739e-06, "loss": 0.5058869123458862, "step": 5765 }, { "epoch": 1.6860652142126042, "grad_norm": 1.7429625548536576, "learning_rate": 1.3204719590312698e-06, "loss": 0.46573105454444885, "step": 5766 }, { "epoch": 1.6863576546278696, "grad_norm": 1.702952537068074, "learning_rate": 1.3180718322787067e-06, "loss": 0.5033260583877563, "step": 5767 }, { "epoch": 1.686650095043135, "grad_norm": 1.4816614067920655, "learning_rate": 1.3156737348740655e-06, "loss": 0.5306515693664551, "step": 5768 }, { "epoch": 1.6869425354584004, "grad_norm": 1.5781671263541353, "learning_rate": 1.313277667377888e-06, "loss": 0.45660221576690674, "step": 5769 }, { "epoch": 1.6872349758736658, "grad_norm": 1.8450654821638361, "learning_rate": 1.3108836303502392e-06, "loss": 0.5353757739067078, "step": 5770 }, { "epoch": 1.687527416288931, "grad_norm": 1.719850536675987, "learning_rate": 1.3084916243507118e-06, "loss": 0.5415239930152893, "step": 5771 }, { "epoch": 1.6878198567041967, "grad_norm": 1.7512787251419633, "learning_rate": 1.3061016499384217e-06, "loss": 0.5860229730606079, "step": 5772 }, { "epoch": 1.6881122971194618, "grad_norm": 1.7755386406909037, "learning_rate": 1.3037137076720107e-06, "loss": 0.5524891018867493, "step": 5773 }, { "epoch": 1.6884047375347273, "grad_norm": 1.9368287931198411, "learning_rate": 1.3013277981096484e-06, "loss": 0.5557498931884766, "step": 5774 }, { "epoch": 1.6886971779499926, "grad_norm": 1.6092314723070207, "learning_rate": 1.2989439218090227e-06, "loss": 0.45877397060394287, "step": 5775 }, { "epoch": 1.688989618365258, "grad_norm": 1.587163397973365, "learning_rate": 1.2965620793273515e-06, "loss": 0.5310335159301758, "step": 5776 }, { "epoch": 1.6892820587805235, "grad_norm": 1.4561579735469703, "learning_rate": 1.294182271221377e-06, "loss": 0.4855915904045105, "step": 5777 }, { "epoch": 1.6895744991957888, "grad_norm": 1.5707049211364605, "learning_rate": 1.2918044980473643e-06, "loss": 0.6070747971534729, "step": 5778 }, { "epoch": 1.6898669396110542, "grad_norm": 1.6739979529118527, "learning_rate": 1.2894287603611033e-06, "loss": 0.5108609795570374, "step": 5779 }, { "epoch": 1.6901593800263197, "grad_norm": 1.7647615756485302, "learning_rate": 1.2870550587179087e-06, "loss": 0.49141189455986023, "step": 5780 }, { "epoch": 1.690451820441585, "grad_norm": 1.4176033732152467, "learning_rate": 1.2846833936726178e-06, "loss": 0.4239678382873535, "step": 5781 }, { "epoch": 1.6907442608568504, "grad_norm": 1.8431022697656632, "learning_rate": 1.2823137657795948e-06, "loss": 0.6348937153816223, "step": 5782 }, { "epoch": 1.691036701272116, "grad_norm": 1.4853353146024342, "learning_rate": 1.2799461755927233e-06, "loss": 0.4561845064163208, "step": 5783 }, { "epoch": 1.6913291416873812, "grad_norm": 2.1521785942560197, "learning_rate": 1.2775806236654153e-06, "loss": 0.5663880109786987, "step": 5784 }, { "epoch": 1.6916215821026466, "grad_norm": 1.5729874297711008, "learning_rate": 1.275217110550604e-06, "loss": 0.5200550556182861, "step": 5785 }, { "epoch": 1.6919140225179121, "grad_norm": 1.7072937541006934, "learning_rate": 1.2728556368007461e-06, "loss": 0.5401214361190796, "step": 5786 }, { "epoch": 1.6922064629331772, "grad_norm": 2.0089316276908917, "learning_rate": 1.2704962029678202e-06, "loss": 0.5409752130508423, "step": 5787 }, { "epoch": 1.6924989033484428, "grad_norm": 1.6551832796501305, "learning_rate": 1.2681388096033298e-06, "loss": 0.46215158700942993, "step": 5788 }, { "epoch": 1.692791343763708, "grad_norm": 1.508586050733543, "learning_rate": 1.2657834572583027e-06, "loss": 0.44687867164611816, "step": 5789 }, { "epoch": 1.6930837841789734, "grad_norm": 1.5112474922130816, "learning_rate": 1.2634301464832877e-06, "loss": 0.47882723808288574, "step": 5790 }, { "epoch": 1.693376224594239, "grad_norm": 2.0838428918534264, "learning_rate": 1.2610788778283567e-06, "loss": 0.6108201742172241, "step": 5791 }, { "epoch": 1.6936686650095043, "grad_norm": 1.4370335670353505, "learning_rate": 1.2587296518431036e-06, "loss": 0.45024657249450684, "step": 5792 }, { "epoch": 1.6939611054247696, "grad_norm": 2.263053324487421, "learning_rate": 1.256382469076648e-06, "loss": 0.6746254563331604, "step": 5793 }, { "epoch": 1.6942535458400352, "grad_norm": 1.7423805800598553, "learning_rate": 1.2540373300776264e-06, "loss": 0.6439248323440552, "step": 5794 }, { "epoch": 1.6945459862553005, "grad_norm": 1.5101648188878154, "learning_rate": 1.251694235394204e-06, "loss": 0.467510461807251, "step": 5795 }, { "epoch": 1.6948384266705658, "grad_norm": 2.0083904845815117, "learning_rate": 1.2493531855740626e-06, "loss": 0.5509516596794128, "step": 5796 }, { "epoch": 1.6951308670858314, "grad_norm": 1.6291523574406077, "learning_rate": 1.247014181164412e-06, "loss": 0.49178194999694824, "step": 5797 }, { "epoch": 1.6954233075010965, "grad_norm": 1.6626228068208797, "learning_rate": 1.2446772227119753e-06, "loss": 0.4825005531311035, "step": 5798 }, { "epoch": 1.695715747916362, "grad_norm": 1.7609982762736733, "learning_rate": 1.242342310763005e-06, "loss": 0.7441064715385437, "step": 5799 }, { "epoch": 1.6960081883316276, "grad_norm": 1.6166055740202077, "learning_rate": 1.2400094458632717e-06, "loss": 0.5020110011100769, "step": 5800 }, { "epoch": 1.6963006287468927, "grad_norm": 1.6328086702132818, "learning_rate": 1.237678628558069e-06, "loss": 0.5439830422401428, "step": 5801 }, { "epoch": 1.6965930691621582, "grad_norm": 2.0549338843530136, "learning_rate": 1.235349859392211e-06, "loss": 0.6235179901123047, "step": 5802 }, { "epoch": 1.6968855095774236, "grad_norm": 1.7141848290041162, "learning_rate": 1.2330231389100323e-06, "loss": 0.6176612377166748, "step": 5803 }, { "epoch": 1.697177949992689, "grad_norm": 1.7381769122607003, "learning_rate": 1.2306984676553924e-06, "loss": 0.5956840515136719, "step": 5804 }, { "epoch": 1.6974703904079544, "grad_norm": 1.6857909163061566, "learning_rate": 1.2283758461716667e-06, "loss": 0.5025947690010071, "step": 5805 }, { "epoch": 1.6977628308232198, "grad_norm": 1.507035347865144, "learning_rate": 1.2260552750017551e-06, "loss": 0.5772436857223511, "step": 5806 }, { "epoch": 1.698055271238485, "grad_norm": 1.6665432076063584, "learning_rate": 1.223736754688075e-06, "loss": 0.4336615204811096, "step": 5807 }, { "epoch": 1.6983477116537506, "grad_norm": 1.695081220374435, "learning_rate": 1.221420285772572e-06, "loss": 0.5697668790817261, "step": 5808 }, { "epoch": 1.698640152069016, "grad_norm": 1.8545475442236217, "learning_rate": 1.2191058687966995e-06, "loss": 0.4966861605644226, "step": 5809 }, { "epoch": 1.6989325924842813, "grad_norm": 1.777484506048346, "learning_rate": 1.2167935043014411e-06, "loss": 0.5805951952934265, "step": 5810 }, { "epoch": 1.6992250328995469, "grad_norm": 1.6055305498040644, "learning_rate": 1.2144831928272994e-06, "loss": 0.4669906497001648, "step": 5811 }, { "epoch": 1.699517473314812, "grad_norm": 1.7730179282571827, "learning_rate": 1.212174934914294e-06, "loss": 0.5630965828895569, "step": 5812 }, { "epoch": 1.6998099137300775, "grad_norm": 1.7272395334456936, "learning_rate": 1.2098687311019663e-06, "loss": 0.5345104932785034, "step": 5813 }, { "epoch": 1.7001023541453428, "grad_norm": 1.9547814584710963, "learning_rate": 1.207564581929378e-06, "loss": 0.5760249495506287, "step": 5814 }, { "epoch": 1.7003947945606082, "grad_norm": 1.563397994600299, "learning_rate": 1.2052624879351105e-06, "loss": 0.506635308265686, "step": 5815 }, { "epoch": 1.7006872349758737, "grad_norm": 1.792775034126629, "learning_rate": 1.2029624496572622e-06, "loss": 0.5107032656669617, "step": 5816 }, { "epoch": 1.700979675391139, "grad_norm": 1.5891211780153636, "learning_rate": 1.2006644676334557e-06, "loss": 0.5888187885284424, "step": 5817 }, { "epoch": 1.7012721158064044, "grad_norm": 1.8008314810247776, "learning_rate": 1.1983685424008285e-06, "loss": 0.5326075553894043, "step": 5818 }, { "epoch": 1.70156455622167, "grad_norm": 1.6515493940564925, "learning_rate": 1.1960746744960417e-06, "loss": 0.5097993612289429, "step": 5819 }, { "epoch": 1.7018569966369352, "grad_norm": 1.6532256911128915, "learning_rate": 1.1937828644552696e-06, "loss": 0.6001093983650208, "step": 5820 }, { "epoch": 1.7021494370522006, "grad_norm": 1.7728326525757572, "learning_rate": 1.1914931128142072e-06, "loss": 0.513684093952179, "step": 5821 }, { "epoch": 1.7024418774674661, "grad_norm": 1.6118848482453871, "learning_rate": 1.189205420108076e-06, "loss": 0.4688597321510315, "step": 5822 }, { "epoch": 1.7027343178827314, "grad_norm": 1.6755720349462948, "learning_rate": 1.1869197868716075e-06, "loss": 0.4537498354911804, "step": 5823 }, { "epoch": 1.7030267582979968, "grad_norm": 1.625108439053771, "learning_rate": 1.1846362136390531e-06, "loss": 0.43031078577041626, "step": 5824 }, { "epoch": 1.7033191987132623, "grad_norm": 1.9244406734438975, "learning_rate": 1.182354700944187e-06, "loss": 0.5139330625534058, "step": 5825 }, { "epoch": 1.7036116391285274, "grad_norm": 1.7897651312393703, "learning_rate": 1.180075249320296e-06, "loss": 0.6542010307312012, "step": 5826 }, { "epoch": 1.703904079543793, "grad_norm": 1.6365189888188503, "learning_rate": 1.1777978593001903e-06, "loss": 0.5371676087379456, "step": 5827 }, { "epoch": 1.7041965199590583, "grad_norm": 1.6793659914593386, "learning_rate": 1.1755225314161967e-06, "loss": 0.47583359479904175, "step": 5828 }, { "epoch": 1.7044889603743236, "grad_norm": 1.7363884838234833, "learning_rate": 1.173249266200156e-06, "loss": 0.5471247434616089, "step": 5829 }, { "epoch": 1.7047814007895892, "grad_norm": 1.850508925320166, "learning_rate": 1.1709780641834323e-06, "loss": 0.5095713138580322, "step": 5830 }, { "epoch": 1.7050738412048545, "grad_norm": 1.5373790027628114, "learning_rate": 1.1687089258969041e-06, "loss": 0.41944777965545654, "step": 5831 }, { "epoch": 1.7053662816201198, "grad_norm": 1.5434472143224902, "learning_rate": 1.1664418518709697e-06, "loss": 0.42380404472351074, "step": 5832 }, { "epoch": 1.7056587220353854, "grad_norm": 1.8798510100106, "learning_rate": 1.1641768426355427e-06, "loss": 0.5688038468360901, "step": 5833 }, { "epoch": 1.7059511624506507, "grad_norm": 1.6396391570153137, "learning_rate": 1.1619138987200562e-06, "loss": 0.5432788133621216, "step": 5834 }, { "epoch": 1.706243602865916, "grad_norm": 1.699260651340017, "learning_rate": 1.1596530206534606e-06, "loss": 0.5408512949943542, "step": 5835 }, { "epoch": 1.7065360432811816, "grad_norm": 1.5364052920051108, "learning_rate": 1.1573942089642198e-06, "loss": 0.5149247646331787, "step": 5836 }, { "epoch": 1.7068284836964467, "grad_norm": 1.6490213140214325, "learning_rate": 1.1551374641803193e-06, "loss": 0.36905592679977417, "step": 5837 }, { "epoch": 1.7071209241117122, "grad_norm": 1.7960598101415164, "learning_rate": 1.152882786829259e-06, "loss": 0.5370720624923706, "step": 5838 }, { "epoch": 1.7074133645269778, "grad_norm": 1.5874644037104577, "learning_rate": 1.1506301774380578e-06, "loss": 0.4535629153251648, "step": 5839 }, { "epoch": 1.7077058049422429, "grad_norm": 1.79916689116012, "learning_rate": 1.1483796365332455e-06, "loss": 0.5456075668334961, "step": 5840 }, { "epoch": 1.7079982453575084, "grad_norm": 1.4286640626946725, "learning_rate": 1.1461311646408756e-06, "loss": 0.5884554386138916, "step": 5841 }, { "epoch": 1.7082906857727738, "grad_norm": 1.6397329737807809, "learning_rate": 1.1438847622865125e-06, "loss": 0.605168879032135, "step": 5842 }, { "epoch": 1.708583126188039, "grad_norm": 1.5178839829112376, "learning_rate": 1.14164042999524e-06, "loss": 0.43739163875579834, "step": 5843 }, { "epoch": 1.7088755666033046, "grad_norm": 1.46949260133067, "learning_rate": 1.1393981682916578e-06, "loss": 0.4508574306964874, "step": 5844 }, { "epoch": 1.70916800701857, "grad_norm": 1.822138537734332, "learning_rate": 1.1371579776998798e-06, "loss": 0.5918034315109253, "step": 5845 }, { "epoch": 1.7094604474338353, "grad_norm": 2.0746386130567873, "learning_rate": 1.1349198587435373e-06, "loss": 0.5668582320213318, "step": 5846 }, { "epoch": 1.7097528878491008, "grad_norm": 1.9282537614980426, "learning_rate": 1.1326838119457784e-06, "loss": 0.6374846696853638, "step": 5847 }, { "epoch": 1.7100453282643662, "grad_norm": 1.8903891011788552, "learning_rate": 1.130449837829264e-06, "loss": 0.5074985027313232, "step": 5848 }, { "epoch": 1.7103377686796315, "grad_norm": 1.5190057242638555, "learning_rate": 1.1282179369161717e-06, "loss": 0.5012484788894653, "step": 5849 }, { "epoch": 1.710630209094897, "grad_norm": 1.632090745734556, "learning_rate": 1.1259881097281977e-06, "loss": 0.4417869746685028, "step": 5850 }, { "epoch": 1.7109226495101622, "grad_norm": 1.8294483106085377, "learning_rate": 1.1237603567865452e-06, "loss": 0.6032637357711792, "step": 5851 }, { "epoch": 1.7112150899254277, "grad_norm": 1.688609377749929, "learning_rate": 1.121534678611942e-06, "loss": 0.5790234804153442, "step": 5852 }, { "epoch": 1.711507530340693, "grad_norm": 1.591889646615377, "learning_rate": 1.1193110757246251e-06, "loss": 0.5436397194862366, "step": 5853 }, { "epoch": 1.7117999707559584, "grad_norm": 1.7117652881589365, "learning_rate": 1.11708954864435e-06, "loss": 0.5088083744049072, "step": 5854 }, { "epoch": 1.712092411171224, "grad_norm": 1.889980799223528, "learning_rate": 1.1148700978903826e-06, "loss": 0.5907719135284424, "step": 5855 }, { "epoch": 1.7123848515864892, "grad_norm": 1.567722853126729, "learning_rate": 1.1126527239815078e-06, "loss": 0.4744384288787842, "step": 5856 }, { "epoch": 1.7126772920017546, "grad_norm": 1.9312865174889629, "learning_rate": 1.110437427436023e-06, "loss": 0.6644346714019775, "step": 5857 }, { "epoch": 1.71296973241702, "grad_norm": 1.6765623554239069, "learning_rate": 1.10822420877174e-06, "loss": 0.4926042854785919, "step": 5858 }, { "epoch": 1.7132621728322854, "grad_norm": 1.6722485452227753, "learning_rate": 1.1060130685059845e-06, "loss": 0.47684335708618164, "step": 5859 }, { "epoch": 1.7135546132475508, "grad_norm": 1.8519560247307543, "learning_rate": 1.1038040071555988e-06, "loss": 0.5574014186859131, "step": 5860 }, { "epoch": 1.7138470536628163, "grad_norm": 1.737717748392033, "learning_rate": 1.101597025236939e-06, "loss": 0.6276485323905945, "step": 5861 }, { "epoch": 1.7141394940780816, "grad_norm": 1.7853097232505406, "learning_rate": 1.099392123265869e-06, "loss": 0.558611273765564, "step": 5862 }, { "epoch": 1.714431934493347, "grad_norm": 1.8318989515664625, "learning_rate": 1.097189301757773e-06, "loss": 0.5561566948890686, "step": 5863 }, { "epoch": 1.7147243749086125, "grad_norm": 1.7772127580066208, "learning_rate": 1.094988561227548e-06, "loss": 0.5360273122787476, "step": 5864 }, { "epoch": 1.7150168153238776, "grad_norm": 1.9869672499266697, "learning_rate": 1.0927899021896038e-06, "loss": 0.5572026968002319, "step": 5865 }, { "epoch": 1.7153092557391432, "grad_norm": 1.696631763346783, "learning_rate": 1.0905933251578626e-06, "loss": 0.4593105912208557, "step": 5866 }, { "epoch": 1.7156016961544085, "grad_norm": 1.7954251083275348, "learning_rate": 1.0883988306457627e-06, "loss": 0.5017558336257935, "step": 5867 }, { "epoch": 1.7158941365696738, "grad_norm": 1.6294086582619267, "learning_rate": 1.0862064191662524e-06, "loss": 0.4982030391693115, "step": 5868 }, { "epoch": 1.7161865769849394, "grad_norm": 1.5832146918310879, "learning_rate": 1.0840160912317943e-06, "loss": 0.5563114881515503, "step": 5869 }, { "epoch": 1.7164790174002047, "grad_norm": 1.6522408781609719, "learning_rate": 1.0818278473543652e-06, "loss": 0.4817348122596741, "step": 5870 }, { "epoch": 1.71677145781547, "grad_norm": 1.6923338540004815, "learning_rate": 1.079641688045453e-06, "loss": 0.47907108068466187, "step": 5871 }, { "epoch": 1.7170638982307356, "grad_norm": 1.985278987997586, "learning_rate": 1.0774576138160596e-06, "loss": 0.6158252954483032, "step": 5872 }, { "epoch": 1.717356338646001, "grad_norm": 1.712800633970283, "learning_rate": 1.0752756251767015e-06, "loss": 0.5336505174636841, "step": 5873 }, { "epoch": 1.7176487790612662, "grad_norm": 1.6889119428738892, "learning_rate": 1.0730957226374006e-06, "loss": 0.5806115865707397, "step": 5874 }, { "epoch": 1.7179412194765318, "grad_norm": 1.7163109676688793, "learning_rate": 1.070917906707698e-06, "loss": 0.3701411485671997, "step": 5875 }, { "epoch": 1.7182336598917969, "grad_norm": 1.5519162070562529, "learning_rate": 1.0687421778966445e-06, "loss": 0.5779517292976379, "step": 5876 }, { "epoch": 1.7185261003070624, "grad_norm": 1.5444011974555767, "learning_rate": 1.0665685367128041e-06, "loss": 0.43965232372283936, "step": 5877 }, { "epoch": 1.718818540722328, "grad_norm": 1.7154722678485648, "learning_rate": 1.064396983664253e-06, "loss": 0.4768058657646179, "step": 5878 }, { "epoch": 1.719110981137593, "grad_norm": 1.6286437020829267, "learning_rate": 1.0622275192585773e-06, "loss": 0.5331600904464722, "step": 5879 }, { "epoch": 1.7194034215528586, "grad_norm": 1.6603687606186237, "learning_rate": 1.0600601440028758e-06, "loss": 0.5495625734329224, "step": 5880 }, { "epoch": 1.719695861968124, "grad_norm": 1.6915455937474744, "learning_rate": 1.0578948584037608e-06, "loss": 0.4244312345981598, "step": 5881 }, { "epoch": 1.7199883023833893, "grad_norm": 1.7562786480710206, "learning_rate": 1.0557316629673531e-06, "loss": 0.4618447721004486, "step": 5882 }, { "epoch": 1.7202807427986548, "grad_norm": 1.3835850144546908, "learning_rate": 1.0535705581992873e-06, "loss": 0.4226785898208618, "step": 5883 }, { "epoch": 1.7205731832139202, "grad_norm": 1.8373576265806915, "learning_rate": 1.0514115446047101e-06, "loss": 0.5813404321670532, "step": 5884 }, { "epoch": 1.7208656236291855, "grad_norm": 1.774672318962678, "learning_rate": 1.0492546226882738e-06, "loss": 0.6700260639190674, "step": 5885 }, { "epoch": 1.721158064044451, "grad_norm": 1.8100136828076652, "learning_rate": 1.0470997929541494e-06, "loss": 0.6024131178855896, "step": 5886 }, { "epoch": 1.7214505044597164, "grad_norm": 1.8033126749427817, "learning_rate": 1.0449470559060125e-06, "loss": 0.6015123724937439, "step": 5887 }, { "epoch": 1.7217429448749817, "grad_norm": 1.714487906410119, "learning_rate": 1.0427964120470534e-06, "loss": 0.6631267070770264, "step": 5888 }, { "epoch": 1.7220353852902472, "grad_norm": 1.7445362923992234, "learning_rate": 1.0406478618799731e-06, "loss": 0.5267488956451416, "step": 5889 }, { "epoch": 1.7223278257055123, "grad_norm": 1.6683876570881706, "learning_rate": 1.038501405906982e-06, "loss": 0.5190263986587524, "step": 5890 }, { "epoch": 1.722620266120778, "grad_norm": 1.6678272928853268, "learning_rate": 1.0363570446297999e-06, "loss": 0.5253189206123352, "step": 5891 }, { "epoch": 1.7229127065360432, "grad_norm": 1.6306770585402846, "learning_rate": 1.0342147785496581e-06, "loss": 0.5271278619766235, "step": 5892 }, { "epoch": 1.7232051469513086, "grad_norm": 1.7373442044536598, "learning_rate": 1.0320746081672994e-06, "loss": 0.5284109711647034, "step": 5893 }, { "epoch": 1.723497587366574, "grad_norm": 1.764049872395232, "learning_rate": 1.0299365339829747e-06, "loss": 0.6119050979614258, "step": 5894 }, { "epoch": 1.7237900277818394, "grad_norm": 1.583925980020329, "learning_rate": 1.0278005564964488e-06, "loss": 0.42297711968421936, "step": 5895 }, { "epoch": 1.7240824681971048, "grad_norm": 1.7105013452989373, "learning_rate": 1.02566667620699e-06, "loss": 0.5923792123794556, "step": 5896 }, { "epoch": 1.7243749086123703, "grad_norm": 1.6831938137571334, "learning_rate": 1.023534893613377e-06, "loss": 0.4999189078807831, "step": 5897 }, { "epoch": 1.7246673490276356, "grad_norm": 1.6907699986400666, "learning_rate": 1.0214052092139082e-06, "loss": 0.49083560705184937, "step": 5898 }, { "epoch": 1.724959789442901, "grad_norm": 3.9391609144586437, "learning_rate": 1.0192776235063795e-06, "loss": 0.6001632213592529, "step": 5899 }, { "epoch": 1.7252522298581665, "grad_norm": 1.473933103211581, "learning_rate": 1.0171521369881044e-06, "loss": 0.4897228479385376, "step": 5900 }, { "epoch": 1.7255446702734318, "grad_norm": 1.6762179044603425, "learning_rate": 1.0150287501558997e-06, "loss": 0.44784292578697205, "step": 5901 }, { "epoch": 1.7258371106886972, "grad_norm": 1.542625612657722, "learning_rate": 1.0129074635060943e-06, "loss": 0.46105432510375977, "step": 5902 }, { "epoch": 1.7261295511039627, "grad_norm": 1.9028079699425045, "learning_rate": 1.0107882775345278e-06, "loss": 0.5805546045303345, "step": 5903 }, { "epoch": 1.7264219915192278, "grad_norm": 1.719859761694945, "learning_rate": 1.0086711927365488e-06, "loss": 0.560761570930481, "step": 5904 }, { "epoch": 1.7267144319344934, "grad_norm": 1.475103420661766, "learning_rate": 1.006556209607007e-06, "loss": 0.533979058265686, "step": 5905 }, { "epoch": 1.7270068723497587, "grad_norm": 1.7039894712110264, "learning_rate": 1.004443328640271e-06, "loss": 0.5742807984352112, "step": 5906 }, { "epoch": 1.727299312765024, "grad_norm": 1.9394101910903232, "learning_rate": 1.0023325503302129e-06, "loss": 0.5617523789405823, "step": 5907 }, { "epoch": 1.7275917531802896, "grad_norm": 1.620137966655423, "learning_rate": 1.0002238751702143e-06, "loss": 0.45596855878829956, "step": 5908 }, { "epoch": 1.727884193595555, "grad_norm": 1.523715810181856, "learning_rate": 9.981173036531655e-07, "loss": 0.4917908012866974, "step": 5909 }, { "epoch": 1.7281766340108202, "grad_norm": 1.8089323806924067, "learning_rate": 9.960128362714637e-07, "loss": 0.6204911470413208, "step": 5910 }, { "epoch": 1.7284690744260858, "grad_norm": 1.615074466715287, "learning_rate": 9.93910473517018e-07, "loss": 0.47288352251052856, "step": 5911 }, { "epoch": 1.728761514841351, "grad_norm": 1.9414111913595387, "learning_rate": 9.918102158812404e-07, "loss": 0.48668670654296875, "step": 5912 }, { "epoch": 1.7290539552566164, "grad_norm": 1.723740686191889, "learning_rate": 9.89712063855054e-07, "loss": 0.43311381340026855, "step": 5913 }, { "epoch": 1.729346395671882, "grad_norm": 1.748275288399291, "learning_rate": 9.876160179288886e-07, "loss": 0.5066087245941162, "step": 5914 }, { "epoch": 1.729638836087147, "grad_norm": 1.6099318260759374, "learning_rate": 9.855220785926856e-07, "loss": 0.6022528409957886, "step": 5915 }, { "epoch": 1.7299312765024126, "grad_norm": 1.6809686879748886, "learning_rate": 9.834302463358858e-07, "loss": 0.5288707613945007, "step": 5916 }, { "epoch": 1.7302237169176782, "grad_norm": 1.7087060764928856, "learning_rate": 9.813405216474436e-07, "loss": 0.6150302290916443, "step": 5917 }, { "epoch": 1.7305161573329433, "grad_norm": 1.7234099983807605, "learning_rate": 9.792529050158218e-07, "loss": 0.5431156158447266, "step": 5918 }, { "epoch": 1.7308085977482088, "grad_norm": 1.7871856102017598, "learning_rate": 9.771673969289851e-07, "loss": 0.6844080686569214, "step": 5919 }, { "epoch": 1.7311010381634742, "grad_norm": 1.9483136158091563, "learning_rate": 9.750839978744098e-07, "loss": 0.4778372049331665, "step": 5920 }, { "epoch": 1.7313934785787395, "grad_norm": 1.803034120095624, "learning_rate": 9.73002708339077e-07, "loss": 0.6913809776306152, "step": 5921 }, { "epoch": 1.731685918994005, "grad_norm": 1.5934425203745812, "learning_rate": 9.709235288094765e-07, "loss": 0.6289864778518677, "step": 5922 }, { "epoch": 1.7319783594092704, "grad_norm": 1.7803434049533318, "learning_rate": 9.68846459771604e-07, "loss": 0.4735794961452484, "step": 5923 }, { "epoch": 1.7322707998245357, "grad_norm": 1.7329775832839742, "learning_rate": 9.667715017109614e-07, "loss": 0.53554767370224, "step": 5924 }, { "epoch": 1.7325632402398012, "grad_norm": 1.9726609824515038, "learning_rate": 9.64698655112557e-07, "loss": 0.5118460655212402, "step": 5925 }, { "epoch": 1.7328556806550666, "grad_norm": 1.819236864509276, "learning_rate": 9.626279204609079e-07, "loss": 0.5739814043045044, "step": 5926 }, { "epoch": 1.733148121070332, "grad_norm": 1.6784156679062403, "learning_rate": 9.605592982400325e-07, "loss": 0.5716123580932617, "step": 5927 }, { "epoch": 1.7334405614855974, "grad_norm": 1.7916971306174196, "learning_rate": 9.584927889334605e-07, "loss": 0.5091898441314697, "step": 5928 }, { "epoch": 1.7337330019008625, "grad_norm": 1.6267503374739263, "learning_rate": 9.564283930242258e-07, "loss": 0.46946650743484497, "step": 5929 }, { "epoch": 1.734025442316128, "grad_norm": 1.6625890698419732, "learning_rate": 9.543661109948688e-07, "loss": 0.6238217353820801, "step": 5930 }, { "epoch": 1.7343178827313934, "grad_norm": 1.8870256552743607, "learning_rate": 9.52305943327434e-07, "loss": 0.7464175224304199, "step": 5931 }, { "epoch": 1.7346103231466588, "grad_norm": 2.417177332317345, "learning_rate": 9.502478905034751e-07, "loss": 0.6064578294754028, "step": 5932 }, { "epoch": 1.7349027635619243, "grad_norm": 1.5711166860907437, "learning_rate": 9.481919530040484e-07, "loss": 0.5703303813934326, "step": 5933 }, { "epoch": 1.7351952039771896, "grad_norm": 1.773413757718004, "learning_rate": 9.461381313097162e-07, "loss": 0.5570278167724609, "step": 5934 }, { "epoch": 1.735487644392455, "grad_norm": 1.795987369299435, "learning_rate": 9.440864259005477e-07, "loss": 0.54972243309021, "step": 5935 }, { "epoch": 1.7357800848077205, "grad_norm": 1.6140356285907533, "learning_rate": 9.420368372561161e-07, "loss": 0.5670010447502136, "step": 5936 }, { "epoch": 1.7360725252229858, "grad_norm": 1.642113144044588, "learning_rate": 9.399893658555026e-07, "loss": 0.5306927561759949, "step": 5937 }, { "epoch": 1.7363649656382512, "grad_norm": 1.5565759572639428, "learning_rate": 9.379440121772876e-07, "loss": 0.5080308318138123, "step": 5938 }, { "epoch": 1.7366574060535167, "grad_norm": 1.5576076668453387, "learning_rate": 9.359007766995609e-07, "loss": 0.5444519519805908, "step": 5939 }, { "epoch": 1.736949846468782, "grad_norm": 1.737287044912212, "learning_rate": 9.338596598999172e-07, "loss": 0.5353262424468994, "step": 5940 }, { "epoch": 1.7372422868840474, "grad_norm": 1.5405646785157867, "learning_rate": 9.318206622554549e-07, "loss": 0.4766794443130493, "step": 5941 }, { "epoch": 1.737534727299313, "grad_norm": 1.5818937282065444, "learning_rate": 9.29783784242777e-07, "loss": 0.4913482666015625, "step": 5942 }, { "epoch": 1.737827167714578, "grad_norm": 1.5030657740252151, "learning_rate": 9.277490263379918e-07, "loss": 0.47637009620666504, "step": 5943 }, { "epoch": 1.7381196081298436, "grad_norm": 1.8131560819786492, "learning_rate": 9.25716389016712e-07, "loss": 0.5122126936912537, "step": 5944 }, { "epoch": 1.738412048545109, "grad_norm": 1.51801570238093, "learning_rate": 9.236858727540543e-07, "loss": 0.5263532400131226, "step": 5945 }, { "epoch": 1.7387044889603742, "grad_norm": 1.607505719698849, "learning_rate": 9.216574780246379e-07, "loss": 0.5214182734489441, "step": 5946 }, { "epoch": 1.7389969293756398, "grad_norm": 1.877073258708154, "learning_rate": 9.196312053025891e-07, "loss": 0.5955429077148438, "step": 5947 }, { "epoch": 1.739289369790905, "grad_norm": 1.6543213511410424, "learning_rate": 9.176070550615379e-07, "loss": 0.4809807538986206, "step": 5948 }, { "epoch": 1.7395818102061704, "grad_norm": 1.882804975326707, "learning_rate": 9.155850277746148e-07, "loss": 0.4769969582557678, "step": 5949 }, { "epoch": 1.739874250621436, "grad_norm": 2.4545379886365954, "learning_rate": 9.135651239144561e-07, "loss": 0.48527538776397705, "step": 5950 }, { "epoch": 1.7401666910367013, "grad_norm": 1.696389032166004, "learning_rate": 9.115473439532041e-07, "loss": 0.6703393459320068, "step": 5951 }, { "epoch": 1.7404591314519666, "grad_norm": 1.807366721076005, "learning_rate": 9.095316883625016e-07, "loss": 0.5742951035499573, "step": 5952 }, { "epoch": 1.7407515718672322, "grad_norm": 1.9552666950039521, "learning_rate": 9.075181576134961e-07, "loss": 0.6285614967346191, "step": 5953 }, { "epoch": 1.7410440122824973, "grad_norm": 1.6961990538831806, "learning_rate": 9.055067521768379e-07, "loss": 0.5872488021850586, "step": 5954 }, { "epoch": 1.7413364526977628, "grad_norm": 1.6900638369260592, "learning_rate": 9.034974725226808e-07, "loss": 0.5483776330947876, "step": 5955 }, { "epoch": 1.7416288931130284, "grad_norm": 1.838843211951185, "learning_rate": 9.014903191206825e-07, "loss": 0.4913061261177063, "step": 5956 }, { "epoch": 1.7419213335282935, "grad_norm": 1.6413412279440867, "learning_rate": 8.994852924400022e-07, "loss": 0.5431212186813354, "step": 5957 }, { "epoch": 1.742213773943559, "grad_norm": 1.735940615294129, "learning_rate": 8.974823929493015e-07, "loss": 0.5391141176223755, "step": 5958 }, { "epoch": 1.7425062143588244, "grad_norm": 1.455007956070738, "learning_rate": 8.954816211167483e-07, "loss": 0.48980265855789185, "step": 5959 }, { "epoch": 1.7427986547740897, "grad_norm": 1.6465722416646151, "learning_rate": 8.934829774100118e-07, "loss": 0.6747336387634277, "step": 5960 }, { "epoch": 1.7430910951893552, "grad_norm": 2.048914745001018, "learning_rate": 8.914864622962582e-07, "loss": 0.4911282956600189, "step": 5961 }, { "epoch": 1.7433835356046206, "grad_norm": 1.6999465895023511, "learning_rate": 8.894920762421644e-07, "loss": 0.5863965153694153, "step": 5962 }, { "epoch": 1.7436759760198859, "grad_norm": 1.6964011957158196, "learning_rate": 8.87499819713904e-07, "loss": 0.5413792729377747, "step": 5963 }, { "epoch": 1.7439684164351514, "grad_norm": 1.8650590121272839, "learning_rate": 8.855096931771568e-07, "loss": 0.5288723707199097, "step": 5964 }, { "epoch": 1.7442608568504168, "grad_norm": 1.72339918808855, "learning_rate": 8.835216970971006e-07, "loss": 0.5129783749580383, "step": 5965 }, { "epoch": 1.744553297265682, "grad_norm": 1.7489856693904517, "learning_rate": 8.815358319384193e-07, "loss": 0.5606918334960938, "step": 5966 }, { "epoch": 1.7448457376809476, "grad_norm": 1.9036006380739827, "learning_rate": 8.79552098165296e-07, "loss": 0.6277288198471069, "step": 5967 }, { "epoch": 1.7451381780962127, "grad_norm": 1.7432749923566282, "learning_rate": 8.775704962414167e-07, "loss": 0.5390176773071289, "step": 5968 }, { "epoch": 1.7454306185114783, "grad_norm": 1.8645943677337757, "learning_rate": 8.755910266299684e-07, "loss": 0.680462121963501, "step": 5969 }, { "epoch": 1.7457230589267436, "grad_norm": 1.678166381653785, "learning_rate": 8.736136897936398e-07, "loss": 0.5134397149085999, "step": 5970 }, { "epoch": 1.746015499342009, "grad_norm": 1.796274905651791, "learning_rate": 8.716384861946248e-07, "loss": 0.6280460357666016, "step": 5971 }, { "epoch": 1.7463079397572745, "grad_norm": 1.8396010080181593, "learning_rate": 8.696654162946094e-07, "loss": 0.5425370931625366, "step": 5972 }, { "epoch": 1.7466003801725398, "grad_norm": 1.7657169836698012, "learning_rate": 8.676944805547882e-07, "loss": 0.5831055045127869, "step": 5973 }, { "epoch": 1.7468928205878052, "grad_norm": 1.3865571188404813, "learning_rate": 8.657256794358592e-07, "loss": 0.4615570306777954, "step": 5974 }, { "epoch": 1.7471852610030707, "grad_norm": 1.6631260131171646, "learning_rate": 8.637590133980145e-07, "loss": 0.5727440118789673, "step": 5975 }, { "epoch": 1.747477701418336, "grad_norm": 1.6981377401436724, "learning_rate": 8.617944829009517e-07, "loss": 0.5652801990509033, "step": 5976 }, { "epoch": 1.7477701418336014, "grad_norm": 1.6704888560345945, "learning_rate": 8.59832088403868e-07, "loss": 0.42455562949180603, "step": 5977 }, { "epoch": 1.748062582248867, "grad_norm": 1.8565352683598422, "learning_rate": 8.578718303654588e-07, "loss": 0.526951789855957, "step": 5978 }, { "epoch": 1.7483550226641322, "grad_norm": 1.5113931171346078, "learning_rate": 8.559137092439252e-07, "loss": 0.45547354221343994, "step": 5979 }, { "epoch": 1.7486474630793976, "grad_norm": 1.843493314178274, "learning_rate": 8.539577254969667e-07, "loss": 0.5470790863037109, "step": 5980 }, { "epoch": 1.748939903494663, "grad_norm": 1.6766357010415907, "learning_rate": 8.520038795817798e-07, "loss": 0.5608032941818237, "step": 5981 }, { "epoch": 1.7492323439099282, "grad_norm": 1.7251948475523264, "learning_rate": 8.500521719550648e-07, "loss": 0.5243809223175049, "step": 5982 }, { "epoch": 1.7495247843251938, "grad_norm": 1.7843504248813373, "learning_rate": 8.481026030730222e-07, "loss": 0.5040958523750305, "step": 5983 }, { "epoch": 1.749817224740459, "grad_norm": 1.9016982717852353, "learning_rate": 8.461551733913509e-07, "loss": 0.5026291012763977, "step": 5984 }, { "epoch": 1.7501096651557244, "grad_norm": 1.578287817505696, "learning_rate": 8.442098833652523e-07, "loss": 0.5273059010505676, "step": 5985 }, { "epoch": 1.75040210557099, "grad_norm": 1.7872787423587175, "learning_rate": 8.42266733449425e-07, "loss": 0.5811910629272461, "step": 5986 }, { "epoch": 1.7506945459862553, "grad_norm": 1.9383664928260165, "learning_rate": 8.4032572409807e-07, "loss": 0.6078274250030518, "step": 5987 }, { "epoch": 1.7509869864015206, "grad_norm": 1.8956639494069205, "learning_rate": 8.383868557648833e-07, "loss": 0.5214031338691711, "step": 5988 }, { "epoch": 1.7512794268167862, "grad_norm": 1.8686899180431094, "learning_rate": 8.364501289030669e-07, "loss": 0.5464918613433838, "step": 5989 }, { "epoch": 1.7515718672320515, "grad_norm": 1.9577387999849984, "learning_rate": 8.345155439653175e-07, "loss": 0.48296278715133667, "step": 5990 }, { "epoch": 1.7518643076473168, "grad_norm": 1.6097156536359971, "learning_rate": 8.325831014038344e-07, "loss": 0.5441919565200806, "step": 5991 }, { "epoch": 1.7521567480625824, "grad_norm": 1.524249865256617, "learning_rate": 8.306528016703097e-07, "loss": 0.4623905420303345, "step": 5992 }, { "epoch": 1.7524491884778475, "grad_norm": 1.7850630013083288, "learning_rate": 8.287246452159437e-07, "loss": 0.5671495795249939, "step": 5993 }, { "epoch": 1.752741628893113, "grad_norm": 1.7371249179959158, "learning_rate": 8.267986324914278e-07, "loss": 0.5400685667991638, "step": 5994 }, { "epoch": 1.7530340693083786, "grad_norm": 1.7239850907759944, "learning_rate": 8.24874763946959e-07, "loss": 0.4362148642539978, "step": 5995 }, { "epoch": 1.7533265097236437, "grad_norm": 1.7548276097653166, "learning_rate": 8.229530400322283e-07, "loss": 0.554877519607544, "step": 5996 }, { "epoch": 1.7536189501389092, "grad_norm": 1.6421753593232726, "learning_rate": 8.210334611964266e-07, "loss": 0.5239896774291992, "step": 5997 }, { "epoch": 1.7539113905541746, "grad_norm": 1.9442998633220852, "learning_rate": 8.191160278882438e-07, "loss": 0.4729669988155365, "step": 5998 }, { "epoch": 1.7542038309694399, "grad_norm": 1.5789777380903094, "learning_rate": 8.172007405558702e-07, "loss": 0.5449322462081909, "step": 5999 }, { "epoch": 1.7544962713847054, "grad_norm": 1.6329056939447448, "learning_rate": 8.1528759964699e-07, "loss": 0.5422194600105286, "step": 6000 }, { "epoch": 1.7547887117999708, "grad_norm": 1.4174806038648198, "learning_rate": 8.1337660560879e-07, "loss": 0.3854302763938904, "step": 6001 }, { "epoch": 1.755081152215236, "grad_norm": 1.7209418471597917, "learning_rate": 8.114677588879549e-07, "loss": 0.4678449034690857, "step": 6002 }, { "epoch": 1.7553735926305016, "grad_norm": 1.5464176931987315, "learning_rate": 8.095610599306614e-07, "loss": 0.5462471842765808, "step": 6003 }, { "epoch": 1.755666033045767, "grad_norm": 2.204727836372247, "learning_rate": 8.076565091825916e-07, "loss": 0.6314511299133301, "step": 6004 }, { "epoch": 1.7559584734610323, "grad_norm": 1.5748030564701405, "learning_rate": 8.057541070889229e-07, "loss": 0.6373077630996704, "step": 6005 }, { "epoch": 1.7562509138762978, "grad_norm": 1.3531361411828478, "learning_rate": 8.038538540943297e-07, "loss": 0.5279273986816406, "step": 6006 }, { "epoch": 1.756543354291563, "grad_norm": 1.8888532901747122, "learning_rate": 8.019557506429843e-07, "loss": 0.5645443201065063, "step": 6007 }, { "epoch": 1.7568357947068285, "grad_norm": 1.776791412383221, "learning_rate": 8.000597971785573e-07, "loss": 0.552385151386261, "step": 6008 }, { "epoch": 1.757128235122094, "grad_norm": 1.8476397874412334, "learning_rate": 7.981659941442154e-07, "loss": 0.5790541172027588, "step": 6009 }, { "epoch": 1.7574206755373591, "grad_norm": 1.8909444917759248, "learning_rate": 7.962743419826247e-07, "loss": 0.550809383392334, "step": 6010 }, { "epoch": 1.7577131159526247, "grad_norm": 1.845124979293289, "learning_rate": 7.943848411359479e-07, "loss": 0.4659814238548279, "step": 6011 }, { "epoch": 1.75800555636789, "grad_norm": 1.8856668900422473, "learning_rate": 7.924974920458428e-07, "loss": 0.5099040269851685, "step": 6012 }, { "epoch": 1.7582979967831553, "grad_norm": 1.8389637809818669, "learning_rate": 7.906122951534678e-07, "loss": 0.4819038510322571, "step": 6013 }, { "epoch": 1.758590437198421, "grad_norm": 1.6198962208116707, "learning_rate": 7.887292508994737e-07, "loss": 0.4033840298652649, "step": 6014 }, { "epoch": 1.7588828776136862, "grad_norm": 2.0694906070649397, "learning_rate": 7.868483597240117e-07, "loss": 0.6316418647766113, "step": 6015 }, { "epoch": 1.7591753180289516, "grad_norm": 1.574018695954754, "learning_rate": 7.84969622066728e-07, "loss": 0.5141040682792664, "step": 6016 }, { "epoch": 1.759467758444217, "grad_norm": 1.625714616568423, "learning_rate": 7.830930383667668e-07, "loss": 0.44808077812194824, "step": 6017 }, { "epoch": 1.7597601988594824, "grad_norm": 1.6060850378753004, "learning_rate": 7.812186090627694e-07, "loss": 0.5661089420318604, "step": 6018 }, { "epoch": 1.7600526392747478, "grad_norm": 1.553528332849771, "learning_rate": 7.793463345928697e-07, "loss": 0.487590491771698, "step": 6019 }, { "epoch": 1.7603450796900133, "grad_norm": 1.9699234516767667, "learning_rate": 7.774762153947024e-07, "loss": 0.5775022506713867, "step": 6020 }, { "epoch": 1.7606375201052784, "grad_norm": 1.7091892859281639, "learning_rate": 7.756082519053965e-07, "loss": 0.5714563131332397, "step": 6021 }, { "epoch": 1.760929960520544, "grad_norm": 1.8764497127243964, "learning_rate": 7.73742444561576e-07, "loss": 0.6063593626022339, "step": 6022 }, { "epoch": 1.7612224009358093, "grad_norm": 1.7254357706950765, "learning_rate": 7.718787937993622e-07, "loss": 0.48034632205963135, "step": 6023 }, { "epoch": 1.7615148413510746, "grad_norm": 1.4591503666123415, "learning_rate": 7.700173000543742e-07, "loss": 0.6003588438034058, "step": 6024 }, { "epoch": 1.7618072817663402, "grad_norm": 1.6378303717233282, "learning_rate": 7.681579637617209e-07, "loss": 0.42842140793800354, "step": 6025 }, { "epoch": 1.7620997221816055, "grad_norm": 1.502196803812996, "learning_rate": 7.663007853560145e-07, "loss": 0.5235859155654907, "step": 6026 }, { "epoch": 1.7623921625968708, "grad_norm": 1.6904284507093605, "learning_rate": 7.644457652713566e-07, "loss": 0.47140365839004517, "step": 6027 }, { "epoch": 1.7626846030121364, "grad_norm": 1.6014343948293654, "learning_rate": 7.625929039413483e-07, "loss": 0.53680419921875, "step": 6028 }, { "epoch": 1.7629770434274017, "grad_norm": 1.6173156649426792, "learning_rate": 7.60742201799084e-07, "loss": 0.5280998349189758, "step": 6029 }, { "epoch": 1.763269483842667, "grad_norm": 1.58299525140219, "learning_rate": 7.588936592771545e-07, "loss": 0.49653276801109314, "step": 6030 }, { "epoch": 1.7635619242579326, "grad_norm": 1.6031325431493386, "learning_rate": 7.570472768076464e-07, "loss": 0.511070966720581, "step": 6031 }, { "epoch": 1.7638543646731977, "grad_norm": 1.9173967106238505, "learning_rate": 7.552030548221379e-07, "loss": 0.6601030826568604, "step": 6032 }, { "epoch": 1.7641468050884632, "grad_norm": 1.7630822043129881, "learning_rate": 7.533609937517072e-07, "loss": 0.6216480731964111, "step": 6033 }, { "epoch": 1.7644392455037288, "grad_norm": 2.1444282721386765, "learning_rate": 7.515210940269224e-07, "loss": 0.7237618565559387, "step": 6034 }, { "epoch": 1.7647316859189939, "grad_norm": 1.9895778147794236, "learning_rate": 7.496833560778527e-07, "loss": 0.4979498088359833, "step": 6035 }, { "epoch": 1.7650241263342594, "grad_norm": 1.5816967377469986, "learning_rate": 7.478477803340533e-07, "loss": 0.49408191442489624, "step": 6036 }, { "epoch": 1.7653165667495248, "grad_norm": 1.8439349693338256, "learning_rate": 7.460143672245823e-07, "loss": 0.524259626865387, "step": 6037 }, { "epoch": 1.76560900716479, "grad_norm": 1.8574151410796558, "learning_rate": 7.441831171779878e-07, "loss": 0.625320315361023, "step": 6038 }, { "epoch": 1.7659014475800556, "grad_norm": 1.7217980866482836, "learning_rate": 7.42354030622312e-07, "loss": 0.5971028804779053, "step": 6039 }, { "epoch": 1.766193887995321, "grad_norm": 1.5069481360511938, "learning_rate": 7.405271079850951e-07, "loss": 0.48935002088546753, "step": 6040 }, { "epoch": 1.7664863284105863, "grad_norm": 1.7616973297205794, "learning_rate": 7.387023496933687e-07, "loss": 0.46346336603164673, "step": 6041 }, { "epoch": 1.7667787688258518, "grad_norm": 1.5425066644175864, "learning_rate": 7.368797561736574e-07, "loss": 0.5135314464569092, "step": 6042 }, { "epoch": 1.7670712092411172, "grad_norm": 1.7938719309176694, "learning_rate": 7.350593278519824e-07, "loss": 0.45815128087997437, "step": 6043 }, { "epoch": 1.7673636496563825, "grad_norm": 1.8253657375894647, "learning_rate": 7.332410651538591e-07, "loss": 0.5663015246391296, "step": 6044 }, { "epoch": 1.767656090071648, "grad_norm": 1.6737365706300193, "learning_rate": 7.314249685042929e-07, "loss": 0.5323490500450134, "step": 6045 }, { "epoch": 1.7679485304869131, "grad_norm": 1.8380863614801877, "learning_rate": 7.296110383277866e-07, "loss": 0.5489768981933594, "step": 6046 }, { "epoch": 1.7682409709021787, "grad_norm": 1.867533811207324, "learning_rate": 7.277992750483364e-07, "loss": 0.5951086282730103, "step": 6047 }, { "epoch": 1.7685334113174442, "grad_norm": 1.6688539257267474, "learning_rate": 7.259896790894271e-07, "loss": 0.48228102922439575, "step": 6048 }, { "epoch": 1.7688258517327093, "grad_norm": 1.7579049817410466, "learning_rate": 7.241822508740448e-07, "loss": 0.6318891644477844, "step": 6049 }, { "epoch": 1.769118292147975, "grad_norm": 1.967894881109258, "learning_rate": 7.223769908246636e-07, "loss": 0.4966656267642975, "step": 6050 }, { "epoch": 1.7694107325632402, "grad_norm": 1.7465352091582635, "learning_rate": 7.205738993632516e-07, "loss": 0.5645290613174438, "step": 6051 }, { "epoch": 1.7697031729785055, "grad_norm": 1.8324400656837103, "learning_rate": 7.187729769112717e-07, "loss": 0.560075044631958, "step": 6052 }, { "epoch": 1.769995613393771, "grad_norm": 1.658346896913261, "learning_rate": 7.169742238896771e-07, "loss": 0.6375163793563843, "step": 6053 }, { "epoch": 1.7702880538090364, "grad_norm": 1.9991114191844357, "learning_rate": 7.15177640718916e-07, "loss": 0.5620392560958862, "step": 6054 }, { "epoch": 1.7705804942243017, "grad_norm": 1.7885795694198106, "learning_rate": 7.133832278189301e-07, "loss": 0.5382653474807739, "step": 6055 }, { "epoch": 1.7708729346395673, "grad_norm": 1.954649524899457, "learning_rate": 7.115909856091497e-07, "loss": 0.502597451210022, "step": 6056 }, { "epoch": 1.7711653750548326, "grad_norm": 1.782753780230982, "learning_rate": 7.098009145085016e-07, "loss": 0.5876599550247192, "step": 6057 }, { "epoch": 1.771457815470098, "grad_norm": 1.7624219528533958, "learning_rate": 7.080130149354048e-07, "loss": 0.5164280533790588, "step": 6058 }, { "epoch": 1.7717502558853635, "grad_norm": 1.7004652166347358, "learning_rate": 7.062272873077691e-07, "loss": 0.5192137360572815, "step": 6059 }, { "epoch": 1.7720426963006286, "grad_norm": 1.6924472823946135, "learning_rate": 7.044437320429987e-07, "loss": 0.5298370122909546, "step": 6060 }, { "epoch": 1.7723351367158942, "grad_norm": 1.671988873461514, "learning_rate": 7.026623495579876e-07, "loss": 0.5099462270736694, "step": 6061 }, { "epoch": 1.7726275771311595, "grad_norm": 1.8314661737989666, "learning_rate": 7.00883140269123e-07, "loss": 0.6061269640922546, "step": 6062 }, { "epoch": 1.7729200175464248, "grad_norm": 1.9189229950794147, "learning_rate": 6.991061045922854e-07, "loss": 0.683641254901886, "step": 6063 }, { "epoch": 1.7732124579616904, "grad_norm": 2.089118565246571, "learning_rate": 6.973312429428458e-07, "loss": 0.6294830441474915, "step": 6064 }, { "epoch": 1.7735048983769557, "grad_norm": 1.6252098698149335, "learning_rate": 6.95558555735667e-07, "loss": 0.40493613481521606, "step": 6065 }, { "epoch": 1.773797338792221, "grad_norm": 1.7745752298261492, "learning_rate": 6.93788043385103e-07, "loss": 0.501255452632904, "step": 6066 }, { "epoch": 1.7740897792074866, "grad_norm": 1.7883463098117711, "learning_rate": 6.920197063050038e-07, "loss": 0.6004104614257812, "step": 6067 }, { "epoch": 1.7743822196227519, "grad_norm": 1.5939834110995985, "learning_rate": 6.902535449087023e-07, "loss": 0.48683321475982666, "step": 6068 }, { "epoch": 1.7746746600380172, "grad_norm": 1.7279814402431617, "learning_rate": 6.884895596090302e-07, "loss": 0.6048111319541931, "step": 6069 }, { "epoch": 1.7749671004532828, "grad_norm": 1.8759604993064984, "learning_rate": 6.867277508183101e-07, "loss": 0.5532732009887695, "step": 6070 }, { "epoch": 1.7752595408685479, "grad_norm": 2.066556008321799, "learning_rate": 6.849681189483515e-07, "loss": 0.544552206993103, "step": 6071 }, { "epoch": 1.7755519812838134, "grad_norm": 1.9161876673278242, "learning_rate": 6.832106644104586e-07, "loss": 0.5114158391952515, "step": 6072 }, { "epoch": 1.775844421699079, "grad_norm": 1.6996182780694216, "learning_rate": 6.814553876154273e-07, "loss": 0.45777493715286255, "step": 6073 }, { "epoch": 1.776136862114344, "grad_norm": 1.6209289540377791, "learning_rate": 6.797022889735405e-07, "loss": 0.5449005365371704, "step": 6074 }, { "epoch": 1.7764293025296096, "grad_norm": 1.8749070330960134, "learning_rate": 6.779513688945749e-07, "loss": 0.6308485865592957, "step": 6075 }, { "epoch": 1.776721742944875, "grad_norm": 1.951122544814841, "learning_rate": 6.762026277877986e-07, "loss": 0.5904842019081116, "step": 6076 }, { "epoch": 1.7770141833601403, "grad_norm": 1.8358819377761475, "learning_rate": 6.744560660619681e-07, "loss": 0.6681115627288818, "step": 6077 }, { "epoch": 1.7773066237754058, "grad_norm": 1.7337774705028348, "learning_rate": 6.727116841253334e-07, "loss": 0.5084429979324341, "step": 6078 }, { "epoch": 1.7775990641906712, "grad_norm": 1.706737040250044, "learning_rate": 6.709694823856305e-07, "loss": 0.5705291032791138, "step": 6079 }, { "epoch": 1.7778915046059365, "grad_norm": 1.541912819246542, "learning_rate": 6.692294612500894e-07, "loss": 0.6481744050979614, "step": 6080 }, { "epoch": 1.778183945021202, "grad_norm": 1.5164317234096627, "learning_rate": 6.67491621125429e-07, "loss": 0.5236573815345764, "step": 6081 }, { "epoch": 1.7784763854364674, "grad_norm": 1.761941770239031, "learning_rate": 6.657559624178611e-07, "loss": 0.5169326663017273, "step": 6082 }, { "epoch": 1.7787688258517327, "grad_norm": 1.7653960525219785, "learning_rate": 6.640224855330824e-07, "loss": 0.5304254293441772, "step": 6083 }, { "epoch": 1.7790612662669982, "grad_norm": 1.7073706399680681, "learning_rate": 6.622911908762852e-07, "loss": 0.457882285118103, "step": 6084 }, { "epoch": 1.7793537066822633, "grad_norm": 1.4459810475641077, "learning_rate": 6.605620788521472e-07, "loss": 0.48427796363830566, "step": 6085 }, { "epoch": 1.7796461470975289, "grad_norm": 1.7511368613506917, "learning_rate": 6.588351498648382e-07, "loss": 0.598512589931488, "step": 6086 }, { "epoch": 1.7799385875127944, "grad_norm": 1.6445184894388314, "learning_rate": 6.571104043180188e-07, "loss": 0.5065094232559204, "step": 6087 }, { "epoch": 1.7802310279280595, "grad_norm": 1.7505635404599922, "learning_rate": 6.553878426148364e-07, "loss": 0.5493142008781433, "step": 6088 }, { "epoch": 1.780523468343325, "grad_norm": 1.5236545905427594, "learning_rate": 6.5366746515793e-07, "loss": 0.40520578622817993, "step": 6089 }, { "epoch": 1.7808159087585904, "grad_norm": 1.6562045226817075, "learning_rate": 6.51949272349427e-07, "loss": 0.5416547656059265, "step": 6090 }, { "epoch": 1.7811083491738557, "grad_norm": 1.5389792406208165, "learning_rate": 6.502332645909438e-07, "loss": 0.4531989097595215, "step": 6091 }, { "epoch": 1.7814007895891213, "grad_norm": 1.9811412419033423, "learning_rate": 6.485194422835872e-07, "loss": 0.6385304927825928, "step": 6092 }, { "epoch": 1.7816932300043866, "grad_norm": 1.631678357707061, "learning_rate": 6.468078058279537e-07, "loss": 0.5503095388412476, "step": 6093 }, { "epoch": 1.781985670419652, "grad_norm": 1.810992666384156, "learning_rate": 6.450983556241264e-07, "loss": 0.5184366703033447, "step": 6094 }, { "epoch": 1.7822781108349175, "grad_norm": 1.8021498649724184, "learning_rate": 6.433910920716813e-07, "loss": 0.5211689472198486, "step": 6095 }, { "epoch": 1.7825705512501828, "grad_norm": 1.5495698877916986, "learning_rate": 6.416860155696781e-07, "loss": 0.7357909679412842, "step": 6096 }, { "epoch": 1.7828629916654481, "grad_norm": 1.6814949660424658, "learning_rate": 6.399831265166689e-07, "loss": 0.6283953189849854, "step": 6097 }, { "epoch": 1.7831554320807137, "grad_norm": 1.7274003515879492, "learning_rate": 6.382824253106945e-07, "loss": 0.45040953159332275, "step": 6098 }, { "epoch": 1.7834478724959788, "grad_norm": 1.9179221464776945, "learning_rate": 6.365839123492834e-07, "loss": 0.5056609511375427, "step": 6099 }, { "epoch": 1.7837403129112444, "grad_norm": 1.4295507016254647, "learning_rate": 6.348875880294536e-07, "loss": 0.4940416216850281, "step": 6100 }, { "epoch": 1.7840327533265097, "grad_norm": 1.487738102541406, "learning_rate": 6.33193452747708e-07, "loss": 0.45796072483062744, "step": 6101 }, { "epoch": 1.784325193741775, "grad_norm": 1.5314389713015535, "learning_rate": 6.315015069000408e-07, "loss": 0.4828432500362396, "step": 6102 }, { "epoch": 1.7846176341570406, "grad_norm": 1.7652995666195541, "learning_rate": 6.298117508819357e-07, "loss": 0.5564515590667725, "step": 6103 }, { "epoch": 1.7849100745723059, "grad_norm": 1.7672116497467336, "learning_rate": 6.281241850883624e-07, "loss": 0.5160977840423584, "step": 6104 }, { "epoch": 1.7852025149875712, "grad_norm": 1.6835388368372863, "learning_rate": 6.264388099137775e-07, "loss": 0.585543155670166, "step": 6105 }, { "epoch": 1.7854949554028368, "grad_norm": 1.9025389414417693, "learning_rate": 6.247556257521303e-07, "loss": 0.5377194881439209, "step": 6106 }, { "epoch": 1.785787395818102, "grad_norm": 1.6124331818311004, "learning_rate": 6.230746329968518e-07, "loss": 0.46788060665130615, "step": 6107 }, { "epoch": 1.7860798362333674, "grad_norm": 1.481941465563148, "learning_rate": 6.213958320408664e-07, "loss": 0.511722207069397, "step": 6108 }, { "epoch": 1.786372276648633, "grad_norm": 1.7380505303184415, "learning_rate": 6.197192232765814e-07, "loss": 0.5609079599380493, "step": 6109 }, { "epoch": 1.786664717063898, "grad_norm": 1.5715739237199864, "learning_rate": 6.180448070958955e-07, "loss": 0.47641855478286743, "step": 6110 }, { "epoch": 1.7869571574791636, "grad_norm": 1.4072609352957208, "learning_rate": 6.163725838901946e-07, "loss": 0.4209919273853302, "step": 6111 }, { "epoch": 1.7872495978944292, "grad_norm": 1.7120783337900378, "learning_rate": 6.147025540503459e-07, "loss": 0.6012829542160034, "step": 6112 }, { "epoch": 1.7875420383096943, "grad_norm": 1.8789998564305304, "learning_rate": 6.130347179667129e-07, "loss": 0.6112918853759766, "step": 6113 }, { "epoch": 1.7878344787249598, "grad_norm": 1.8641199827985835, "learning_rate": 6.113690760291402e-07, "loss": 0.6370030641555786, "step": 6114 }, { "epoch": 1.7881269191402251, "grad_norm": 1.837749741108103, "learning_rate": 6.097056286269631e-07, "loss": 0.5385129451751709, "step": 6115 }, { "epoch": 1.7884193595554905, "grad_norm": 1.7733960362556163, "learning_rate": 6.080443761490007e-07, "loss": 0.4707196354866028, "step": 6116 }, { "epoch": 1.788711799970756, "grad_norm": 1.8302621423982353, "learning_rate": 6.063853189835611e-07, "loss": 0.5361602306365967, "step": 6117 }, { "epoch": 1.7890042403860213, "grad_norm": 1.592603561791519, "learning_rate": 6.047284575184398e-07, "loss": 0.48841261863708496, "step": 6118 }, { "epoch": 1.7892966808012867, "grad_norm": 1.6413123655048356, "learning_rate": 6.030737921409169e-07, "loss": 0.47491973638534546, "step": 6119 }, { "epoch": 1.7895891212165522, "grad_norm": 1.608045516338794, "learning_rate": 6.014213232377608e-07, "loss": 0.4579542875289917, "step": 6120 }, { "epoch": 1.7898815616318176, "grad_norm": 1.7739986275669979, "learning_rate": 5.997710511952259e-07, "loss": 0.4517485499382019, "step": 6121 }, { "epoch": 1.7901740020470829, "grad_norm": 1.61243285020885, "learning_rate": 5.981229763990559e-07, "loss": 0.5656695365905762, "step": 6122 }, { "epoch": 1.7904664424623484, "grad_norm": 1.8328920976142473, "learning_rate": 5.964770992344737e-07, "loss": 0.5000064373016357, "step": 6123 }, { "epoch": 1.7907588828776135, "grad_norm": 1.691423776793607, "learning_rate": 5.948334200861927e-07, "loss": 0.4823925495147705, "step": 6124 }, { "epoch": 1.791051323292879, "grad_norm": 1.6081373509153076, "learning_rate": 5.931919393384189e-07, "loss": 0.45079779624938965, "step": 6125 }, { "epoch": 1.7913437637081446, "grad_norm": 1.7368976771393152, "learning_rate": 5.915526573748331e-07, "loss": 0.5887237787246704, "step": 6126 }, { "epoch": 1.7916362041234097, "grad_norm": 1.5326002891728705, "learning_rate": 5.8991557457861e-07, "loss": 0.5625102519989014, "step": 6127 }, { "epoch": 1.7919286445386753, "grad_norm": 1.773152580661058, "learning_rate": 5.882806913324079e-07, "loss": 0.5290789604187012, "step": 6128 }, { "epoch": 1.7922210849539406, "grad_norm": 1.8240731968563617, "learning_rate": 5.86648008018369e-07, "loss": 0.47694748640060425, "step": 6129 }, { "epoch": 1.792513525369206, "grad_norm": 1.7480468996944738, "learning_rate": 5.850175250181244e-07, "loss": 0.6297628879547119, "step": 6130 }, { "epoch": 1.7928059657844715, "grad_norm": 1.767468792446569, "learning_rate": 5.833892427127908e-07, "loss": 0.5748087167739868, "step": 6131 }, { "epoch": 1.7930984061997368, "grad_norm": 2.0367130445902313, "learning_rate": 5.817631614829666e-07, "loss": 0.552059531211853, "step": 6132 }, { "epoch": 1.7933908466150021, "grad_norm": 1.881082319886368, "learning_rate": 5.801392817087392e-07, "loss": 0.5980287790298462, "step": 6133 }, { "epoch": 1.7936832870302677, "grad_norm": 1.7948740811393897, "learning_rate": 5.785176037696815e-07, "loss": 0.5682743191719055, "step": 6134 }, { "epoch": 1.793975727445533, "grad_norm": 1.6227048981437364, "learning_rate": 5.768981280448494e-07, "loss": 0.6907520294189453, "step": 6135 }, { "epoch": 1.7942681678607983, "grad_norm": 1.82613812962419, "learning_rate": 5.752808549127875e-07, "loss": 0.5939712524414062, "step": 6136 }, { "epoch": 1.794560608276064, "grad_norm": 1.961952469296216, "learning_rate": 5.736657847515215e-07, "loss": 0.5169910192489624, "step": 6137 }, { "epoch": 1.794853048691329, "grad_norm": 1.7101466149490088, "learning_rate": 5.720529179385659e-07, "loss": 0.5795155167579651, "step": 6138 }, { "epoch": 1.7951454891065945, "grad_norm": 1.6643593680063449, "learning_rate": 5.704422548509181e-07, "loss": 0.4296284317970276, "step": 6139 }, { "epoch": 1.7954379295218599, "grad_norm": 1.780840768711558, "learning_rate": 5.688337958650603e-07, "loss": 0.5175303220748901, "step": 6140 }, { "epoch": 1.7957303699371252, "grad_norm": 1.5534990300027502, "learning_rate": 5.672275413569605e-07, "loss": 0.49900466203689575, "step": 6141 }, { "epoch": 1.7960228103523908, "grad_norm": 1.741229060320259, "learning_rate": 5.65623491702072e-07, "loss": 0.5047665238380432, "step": 6142 }, { "epoch": 1.796315250767656, "grad_norm": 1.6004175896698871, "learning_rate": 5.64021647275329e-07, "loss": 0.5309686660766602, "step": 6143 }, { "epoch": 1.7966076911829214, "grad_norm": 1.84753723892279, "learning_rate": 5.624220084511544e-07, "loss": 0.7270892858505249, "step": 6144 }, { "epoch": 1.796900131598187, "grad_norm": 1.8607152469266723, "learning_rate": 5.608245756034536e-07, "loss": 0.515272319316864, "step": 6145 }, { "epoch": 1.7971925720134523, "grad_norm": 1.5111910050436628, "learning_rate": 5.592293491056167e-07, "loss": 0.4919237196445465, "step": 6146 }, { "epoch": 1.7974850124287176, "grad_norm": 1.8345189418412804, "learning_rate": 5.576363293305187e-07, "loss": 0.5812259316444397, "step": 6147 }, { "epoch": 1.7977774528439832, "grad_norm": 1.7464814721572284, "learning_rate": 5.560455166505185e-07, "loss": 0.434345006942749, "step": 6148 }, { "epoch": 1.7980698932592483, "grad_norm": 1.6287087584719833, "learning_rate": 5.544569114374588e-07, "loss": 0.4670771360397339, "step": 6149 }, { "epoch": 1.7983623336745138, "grad_norm": 1.5038620849892772, "learning_rate": 5.528705140626667e-07, "loss": 0.5867526531219482, "step": 6150 }, { "epoch": 1.7986547740897794, "grad_norm": 1.8981858755166237, "learning_rate": 5.512863248969513e-07, "loss": 0.5453605651855469, "step": 6151 }, { "epoch": 1.7989472145050445, "grad_norm": 1.9030067654858334, "learning_rate": 5.497043443106087e-07, "loss": 0.5535463690757751, "step": 6152 }, { "epoch": 1.79923965492031, "grad_norm": 1.72031713178446, "learning_rate": 5.481245726734174e-07, "loss": 0.6250847578048706, "step": 6153 }, { "epoch": 1.7995320953355753, "grad_norm": 1.625961067284692, "learning_rate": 5.465470103546399e-07, "loss": 0.45504581928253174, "step": 6154 }, { "epoch": 1.7998245357508407, "grad_norm": 2.039802523536217, "learning_rate": 5.449716577230202e-07, "loss": 0.6192604303359985, "step": 6155 }, { "epoch": 1.8001169761661062, "grad_norm": 1.8695276161806251, "learning_rate": 5.433985151467869e-07, "loss": 0.5624358654022217, "step": 6156 }, { "epoch": 1.8004094165813715, "grad_norm": 1.7494457460727728, "learning_rate": 5.418275829936537e-07, "loss": 0.5759576559066772, "step": 6157 }, { "epoch": 1.8007018569966369, "grad_norm": 1.752894288026352, "learning_rate": 5.402588616308169e-07, "loss": 0.5710508227348328, "step": 6158 }, { "epoch": 1.8009942974119024, "grad_norm": 1.6781697189669698, "learning_rate": 5.386923514249542e-07, "loss": 0.6146141290664673, "step": 6159 }, { "epoch": 1.8012867378271677, "grad_norm": 1.618055518270054, "learning_rate": 5.371280527422296e-07, "loss": 0.425834983587265, "step": 6160 }, { "epoch": 1.801579178242433, "grad_norm": 1.8062077594882358, "learning_rate": 5.35565965948287e-07, "loss": 0.4353194236755371, "step": 6161 }, { "epoch": 1.8018716186576986, "grad_norm": 2.0598668441022037, "learning_rate": 5.340060914082546e-07, "loss": 0.7202355861663818, "step": 6162 }, { "epoch": 1.8021640590729637, "grad_norm": 1.552014134498689, "learning_rate": 5.324484294867449e-07, "loss": 0.5371845960617065, "step": 6163 }, { "epoch": 1.8024564994882293, "grad_norm": 1.7812688374701713, "learning_rate": 5.308929805478513e-07, "loss": 0.4995431900024414, "step": 6164 }, { "epoch": 1.8027489399034948, "grad_norm": 1.9376433940202618, "learning_rate": 5.293397449551519e-07, "loss": 0.6503393650054932, "step": 6165 }, { "epoch": 1.80304138031876, "grad_norm": 1.608511841040304, "learning_rate": 5.277887230717027e-07, "loss": 0.5083032250404358, "step": 6166 }, { "epoch": 1.8033338207340255, "grad_norm": 1.7910725457082355, "learning_rate": 5.262399152600473e-07, "loss": 0.6067851781845093, "step": 6167 }, { "epoch": 1.8036262611492908, "grad_norm": 1.6601362559713981, "learning_rate": 5.246933218822104e-07, "loss": 0.6446479558944702, "step": 6168 }, { "epoch": 1.8039187015645561, "grad_norm": 1.9668874595165033, "learning_rate": 5.231489432996984e-07, "loss": 0.6940749883651733, "step": 6169 }, { "epoch": 1.8042111419798217, "grad_norm": 1.6254914024201104, "learning_rate": 5.216067798735014e-07, "loss": 0.558691143989563, "step": 6170 }, { "epoch": 1.804503582395087, "grad_norm": 1.706821795047188, "learning_rate": 5.2006683196409e-07, "loss": 0.4561213254928589, "step": 6171 }, { "epoch": 1.8047960228103523, "grad_norm": 1.5741713506995776, "learning_rate": 5.185290999314174e-07, "loss": 0.514278769493103, "step": 6172 }, { "epoch": 1.805088463225618, "grad_norm": 1.7438493762338294, "learning_rate": 5.169935841349194e-07, "loss": 0.41933614015579224, "step": 6173 }, { "epoch": 1.8053809036408832, "grad_norm": 1.5639626592195386, "learning_rate": 5.154602849335133e-07, "loss": 0.5590407848358154, "step": 6174 }, { "epoch": 1.8056733440561485, "grad_norm": 1.7923343761763981, "learning_rate": 5.139292026855991e-07, "loss": 0.49428898096084595, "step": 6175 }, { "epoch": 1.805965784471414, "grad_norm": 1.6980318077322492, "learning_rate": 5.124003377490582e-07, "loss": 0.4737596809864044, "step": 6176 }, { "epoch": 1.8062582248866792, "grad_norm": 1.6716862203734568, "learning_rate": 5.108736904812517e-07, "loss": 0.5017397403717041, "step": 6177 }, { "epoch": 1.8065506653019447, "grad_norm": 1.733919571237643, "learning_rate": 5.09349261239026e-07, "loss": 0.4509057402610779, "step": 6178 }, { "epoch": 1.80684310571721, "grad_norm": 1.9095997808768526, "learning_rate": 5.078270503787053e-07, "loss": 0.4440206289291382, "step": 6179 }, { "epoch": 1.8071355461324754, "grad_norm": 1.6672235625660048, "learning_rate": 5.063070582560991e-07, "loss": 0.4981609582901001, "step": 6180 }, { "epoch": 1.807427986547741, "grad_norm": 1.4041701397189061, "learning_rate": 5.047892852264946e-07, "loss": 0.4057808518409729, "step": 6181 }, { "epoch": 1.8077204269630063, "grad_norm": 1.8238388895662465, "learning_rate": 5.032737316446634e-07, "loss": 0.5770435333251953, "step": 6182 }, { "epoch": 1.8080128673782716, "grad_norm": 1.5817149529336438, "learning_rate": 5.017603978648567e-07, "loss": 0.5431563258171082, "step": 6183 }, { "epoch": 1.8083053077935372, "grad_norm": 1.7959973431061746, "learning_rate": 5.002492842408058e-07, "loss": 0.469868928194046, "step": 6184 }, { "epoch": 1.8085977482088025, "grad_norm": 1.6470575782998251, "learning_rate": 4.98740391125726e-07, "loss": 0.4581238925457001, "step": 6185 }, { "epoch": 1.8088901886240678, "grad_norm": 1.5613704220145663, "learning_rate": 4.972337188723108e-07, "loss": 0.43255913257598877, "step": 6186 }, { "epoch": 1.8091826290393334, "grad_norm": 1.6405804521880538, "learning_rate": 4.957292678327374e-07, "loss": 0.5817975997924805, "step": 6187 }, { "epoch": 1.8094750694545985, "grad_norm": 1.701175567145501, "learning_rate": 4.9422703835866e-07, "loss": 0.506614089012146, "step": 6188 }, { "epoch": 1.809767509869864, "grad_norm": 1.8093255501568073, "learning_rate": 4.927270308012155e-07, "loss": 0.5245084762573242, "step": 6189 }, { "epoch": 1.8100599502851296, "grad_norm": 1.9638481802757681, "learning_rate": 4.912292455110235e-07, "loss": 0.48700785636901855, "step": 6190 }, { "epoch": 1.8103523907003947, "grad_norm": 1.7084108143801102, "learning_rate": 4.897336828381794e-07, "loss": 0.5512829422950745, "step": 6191 }, { "epoch": 1.8106448311156602, "grad_norm": 1.9425355962156208, "learning_rate": 4.882403431322647e-07, "loss": 0.444965660572052, "step": 6192 }, { "epoch": 1.8109372715309255, "grad_norm": 1.6773870360526466, "learning_rate": 4.86749226742338e-07, "loss": 0.49120527505874634, "step": 6193 }, { "epoch": 1.8112297119461909, "grad_norm": 1.5444026883137385, "learning_rate": 4.852603340169371e-07, "loss": 0.47114405035972595, "step": 6194 }, { "epoch": 1.8115221523614564, "grad_norm": 1.3641759741105037, "learning_rate": 4.837736653040825e-07, "loss": 0.41404014825820923, "step": 6195 }, { "epoch": 1.8118145927767217, "grad_norm": 1.5779692763243462, "learning_rate": 4.822892209512742e-07, "loss": 0.5773917436599731, "step": 6196 }, { "epoch": 1.812107033191987, "grad_norm": 1.5867022738126413, "learning_rate": 4.808070013054911e-07, "loss": 0.5048927068710327, "step": 6197 }, { "epoch": 1.8123994736072526, "grad_norm": 1.4880382186782968, "learning_rate": 4.793270067131961e-07, "loss": 0.48112595081329346, "step": 6198 }, { "epoch": 1.812691914022518, "grad_norm": 1.5982708355484612, "learning_rate": 4.778492375203236e-07, "loss": 0.465067982673645, "step": 6199 }, { "epoch": 1.8129843544377833, "grad_norm": 2.10382956966043, "learning_rate": 4.763736940722985e-07, "loss": 0.5456488132476807, "step": 6200 }, { "epoch": 1.8132767948530488, "grad_norm": 1.7197696401081977, "learning_rate": 4.74900376714017e-07, "loss": 0.5078476071357727, "step": 6201 }, { "epoch": 1.813569235268314, "grad_norm": 1.8035895737751002, "learning_rate": 4.7342928578985814e-07, "loss": 0.5087896585464478, "step": 6202 }, { "epoch": 1.8138616756835795, "grad_norm": 1.8289842367399733, "learning_rate": 4.719604216436824e-07, "loss": 0.5734537243843079, "step": 6203 }, { "epoch": 1.814154116098845, "grad_norm": 1.8255387764821909, "learning_rate": 4.704937846188262e-07, "loss": 0.5163359045982361, "step": 6204 }, { "epoch": 1.8144465565141101, "grad_norm": 1.7367361746759034, "learning_rate": 4.6902937505810765e-07, "loss": 0.5884007811546326, "step": 6205 }, { "epoch": 1.8147389969293757, "grad_norm": 1.459881439563451, "learning_rate": 4.675671933038228e-07, "loss": 0.454215407371521, "step": 6206 }, { "epoch": 1.815031437344641, "grad_norm": 1.4834270754413148, "learning_rate": 4.661072396977506e-07, "loss": 0.4380212426185608, "step": 6207 }, { "epoch": 1.8153238777599063, "grad_norm": 1.5724796080178702, "learning_rate": 4.646495145811425e-07, "loss": 0.6138126850128174, "step": 6208 }, { "epoch": 1.8156163181751719, "grad_norm": 1.7578891144089137, "learning_rate": 4.6319401829473366e-07, "loss": 0.560515284538269, "step": 6209 }, { "epoch": 1.8159087585904372, "grad_norm": 1.6717823771103892, "learning_rate": 4.6174075117873976e-07, "loss": 0.4744090735912323, "step": 6210 }, { "epoch": 1.8162011990057025, "grad_norm": 1.566667953265204, "learning_rate": 4.6028971357285126e-07, "loss": 0.4508114457130432, "step": 6211 }, { "epoch": 1.816493639420968, "grad_norm": 1.6686159118306128, "learning_rate": 4.5884090581623906e-07, "loss": 0.5437598824501038, "step": 6212 }, { "epoch": 1.8167860798362334, "grad_norm": 1.871048661690424, "learning_rate": 4.5739432824755456e-07, "loss": 0.608635425567627, "step": 6213 }, { "epoch": 1.8170785202514987, "grad_norm": 1.683927429440131, "learning_rate": 4.5594998120492505e-07, "loss": 0.45614784955978394, "step": 6214 }, { "epoch": 1.8173709606667643, "grad_norm": 1.8175326303925177, "learning_rate": 4.5450786502595933e-07, "loss": 0.46722525358200073, "step": 6215 }, { "epoch": 1.8176634010820294, "grad_norm": 1.6729337536988582, "learning_rate": 4.5306798004774333e-07, "loss": 0.5424127578735352, "step": 6216 }, { "epoch": 1.817955841497295, "grad_norm": 1.8512870023540355, "learning_rate": 4.5163032660684e-07, "loss": 0.4360300302505493, "step": 6217 }, { "epoch": 1.8182482819125603, "grad_norm": 1.4671759860658016, "learning_rate": 4.5019490503929395e-07, "loss": 0.43406206369400024, "step": 6218 }, { "epoch": 1.8185407223278256, "grad_norm": 1.5669201854687904, "learning_rate": 4.4876171568062346e-07, "loss": 0.5435998439788818, "step": 6219 }, { "epoch": 1.8188331627430911, "grad_norm": 1.7571994730111475, "learning_rate": 4.4733075886583043e-07, "loss": 0.4555914103984833, "step": 6220 }, { "epoch": 1.8191256031583565, "grad_norm": 1.9267993644134682, "learning_rate": 4.4590203492939076e-07, "loss": 0.5246081352233887, "step": 6221 }, { "epoch": 1.8194180435736218, "grad_norm": 1.4234567063452161, "learning_rate": 4.4447554420525954e-07, "loss": 0.5093664526939392, "step": 6222 }, { "epoch": 1.8197104839888874, "grad_norm": 1.9251138549109805, "learning_rate": 4.430512870268733e-07, "loss": 0.5759550333023071, "step": 6223 }, { "epoch": 1.8200029244041527, "grad_norm": 2.2446814471076184, "learning_rate": 4.416292637271402e-07, "loss": 0.5477207899093628, "step": 6224 }, { "epoch": 1.820295364819418, "grad_norm": 1.7579783947323675, "learning_rate": 4.402094746384511e-07, "loss": 0.5786882638931274, "step": 6225 }, { "epoch": 1.8205878052346836, "grad_norm": 1.6652775403735034, "learning_rate": 4.3879192009267266e-07, "loss": 0.36909428238868713, "step": 6226 }, { "epoch": 1.8208802456499487, "grad_norm": 1.6359565015929571, "learning_rate": 4.3737660042114993e-07, "loss": 0.5471982955932617, "step": 6227 }, { "epoch": 1.8211726860652142, "grad_norm": 1.633893653092529, "learning_rate": 4.3596351595470596e-07, "loss": 0.49737733602523804, "step": 6228 }, { "epoch": 1.8214651264804798, "grad_norm": 1.8445639233475513, "learning_rate": 4.3455266702363997e-07, "loss": 0.70830237865448, "step": 6229 }, { "epoch": 1.8217575668957449, "grad_norm": 1.5312305470870462, "learning_rate": 4.331440539577281e-07, "loss": 0.5844424962997437, "step": 6230 }, { "epoch": 1.8220500073110104, "grad_norm": 1.5427896071730656, "learning_rate": 4.317376770862269e-07, "loss": 0.42457354068756104, "step": 6231 }, { "epoch": 1.8223424477262757, "grad_norm": 2.058390634719774, "learning_rate": 4.3033353673786695e-07, "loss": 0.5154321193695068, "step": 6232 }, { "epoch": 1.822634888141541, "grad_norm": 1.7898699548834731, "learning_rate": 4.2893163324085886e-07, "loss": 0.5896856784820557, "step": 6233 }, { "epoch": 1.8229273285568066, "grad_norm": 1.8303948048078211, "learning_rate": 4.2753196692288835e-07, "loss": 0.5032835006713867, "step": 6234 }, { "epoch": 1.823219768972072, "grad_norm": 1.8584560183845538, "learning_rate": 4.2613453811111814e-07, "loss": 0.4691713750362396, "step": 6235 }, { "epoch": 1.8235122093873373, "grad_norm": 1.5627513261590378, "learning_rate": 4.2473934713219033e-07, "loss": 0.595095694065094, "step": 6236 }, { "epoch": 1.8238046498026028, "grad_norm": 1.6531612719483142, "learning_rate": 4.233463943122218e-07, "loss": 0.5004895329475403, "step": 6237 }, { "epoch": 1.8240970902178681, "grad_norm": 1.7047690953050751, "learning_rate": 4.2195567997680654e-07, "loss": 0.4924081563949585, "step": 6238 }, { "epoch": 1.8243895306331335, "grad_norm": 1.7572886707576447, "learning_rate": 4.2056720445101565e-07, "loss": 0.5350006818771362, "step": 6239 }, { "epoch": 1.824681971048399, "grad_norm": 1.9485734179206806, "learning_rate": 4.191809680593961e-07, "loss": 0.5404629707336426, "step": 6240 }, { "epoch": 1.8249744114636641, "grad_norm": 1.6023324600099473, "learning_rate": 4.177969711259744e-07, "loss": 0.727859377861023, "step": 6241 }, { "epoch": 1.8252668518789297, "grad_norm": 1.553973004264676, "learning_rate": 4.164152139742494e-07, "loss": 0.4805057644844055, "step": 6242 }, { "epoch": 1.8255592922941952, "grad_norm": 1.7536116301732134, "learning_rate": 4.1503569692719847e-07, "loss": 0.5520761013031006, "step": 6243 }, { "epoch": 1.8258517327094603, "grad_norm": 1.8327055737656117, "learning_rate": 4.1365842030727576e-07, "loss": 0.6130107641220093, "step": 6244 }, { "epoch": 1.8261441731247259, "grad_norm": 1.7887203227793926, "learning_rate": 4.122833844364116e-07, "loss": 0.6048229932785034, "step": 6245 }, { "epoch": 1.8264366135399912, "grad_norm": 1.717414490213998, "learning_rate": 4.1091058963601214e-07, "loss": 0.667324960231781, "step": 6246 }, { "epoch": 1.8267290539552565, "grad_norm": 2.083699506724501, "learning_rate": 4.095400362269597e-07, "loss": 0.45595815777778625, "step": 6247 }, { "epoch": 1.827021494370522, "grad_norm": 1.7162831332631867, "learning_rate": 4.081717245296124e-07, "loss": 0.49015533924102783, "step": 6248 }, { "epoch": 1.8273139347857874, "grad_norm": 2.1906207360630763, "learning_rate": 4.068056548638055e-07, "loss": 0.5230038166046143, "step": 6249 }, { "epoch": 1.8276063752010527, "grad_norm": 1.6860531929221865, "learning_rate": 4.054418275488492e-07, "loss": 0.5025942325592041, "step": 6250 }, { "epoch": 1.8278988156163183, "grad_norm": 1.736980191753769, "learning_rate": 4.0408024290352955e-07, "loss": 0.5136677026748657, "step": 6251 }, { "epoch": 1.8281912560315836, "grad_norm": 1.7988212644666006, "learning_rate": 4.0272090124611086e-07, "loss": 0.6209211945533752, "step": 6252 }, { "epoch": 1.828483696446849, "grad_norm": 1.9742781188768104, "learning_rate": 4.0136380289432784e-07, "loss": 0.5913738012313843, "step": 6253 }, { "epoch": 1.8287761368621145, "grad_norm": 1.9710058674803597, "learning_rate": 4.000089481653946e-07, "loss": 0.5745095610618591, "step": 6254 }, { "epoch": 1.8290685772773796, "grad_norm": 1.4867167586867893, "learning_rate": 3.9865633737600105e-07, "loss": 0.4566704034805298, "step": 6255 }, { "epoch": 1.8293610176926451, "grad_norm": 1.672257025513455, "learning_rate": 3.9730597084231105e-07, "loss": 0.49784860014915466, "step": 6256 }, { "epoch": 1.8296534581079105, "grad_norm": 1.7381596787517106, "learning_rate": 3.9595784887996647e-07, "loss": 0.4489399790763855, "step": 6257 }, { "epoch": 1.8299458985231758, "grad_norm": 1.9703484082158151, "learning_rate": 3.946119718040797e-07, "loss": 0.6335956454277039, "step": 6258 }, { "epoch": 1.8302383389384413, "grad_norm": 1.4097270774574866, "learning_rate": 3.932683399292436e-07, "loss": 0.44865918159484863, "step": 6259 }, { "epoch": 1.8305307793537067, "grad_norm": 1.6485718017332285, "learning_rate": 3.919269535695225e-07, "loss": 0.4328421354293823, "step": 6260 }, { "epoch": 1.830823219768972, "grad_norm": 1.6528043958881276, "learning_rate": 3.9058781303845886e-07, "loss": 0.463814377784729, "step": 6261 }, { "epoch": 1.8311156601842375, "grad_norm": 1.9336577936651187, "learning_rate": 3.892509186490667e-07, "loss": 0.5857536196708679, "step": 6262 }, { "epoch": 1.8314081005995029, "grad_norm": 1.4512027972560333, "learning_rate": 3.879162707138395e-07, "loss": 0.4873831272125244, "step": 6263 }, { "epoch": 1.8317005410147682, "grad_norm": 1.89367526659171, "learning_rate": 3.8658386954474104e-07, "loss": 0.5428040027618408, "step": 6264 }, { "epoch": 1.8319929814300338, "grad_norm": 1.759804366679343, "learning_rate": 3.852537154532121e-07, "loss": 0.49092623591423035, "step": 6265 }, { "epoch": 1.8322854218452989, "grad_norm": 1.7919708064212196, "learning_rate": 3.839258087501685e-07, "loss": 0.5515817999839783, "step": 6266 }, { "epoch": 1.8325778622605644, "grad_norm": 1.5550731443697672, "learning_rate": 3.8260014974600077e-07, "loss": 0.48080340027809143, "step": 6267 }, { "epoch": 1.83287030267583, "grad_norm": 2.292962123842254, "learning_rate": 3.812767387505734e-07, "loss": 0.6129888296127319, "step": 6268 }, { "epoch": 1.833162743091095, "grad_norm": 1.8203026764024284, "learning_rate": 3.7995557607322543e-07, "loss": 0.5843402147293091, "step": 6269 }, { "epoch": 1.8334551835063606, "grad_norm": 1.9423893526281284, "learning_rate": 3.7863666202276996e-07, "loss": 0.5573143362998962, "step": 6270 }, { "epoch": 1.833747623921626, "grad_norm": 1.9386384718546945, "learning_rate": 3.773199969074959e-07, "loss": 0.552756667137146, "step": 6271 }, { "epoch": 1.8340400643368913, "grad_norm": 1.7629811878645265, "learning_rate": 3.7600558103516706e-07, "loss": 0.5559083223342896, "step": 6272 }, { "epoch": 1.8343325047521568, "grad_norm": 1.9388416947858518, "learning_rate": 3.746934147130177e-07, "loss": 0.5388067364692688, "step": 6273 }, { "epoch": 1.8346249451674221, "grad_norm": 1.694909278172827, "learning_rate": 3.7338349824776133e-07, "loss": 0.5816110968589783, "step": 6274 }, { "epoch": 1.8349173855826875, "grad_norm": 1.9312358476553817, "learning_rate": 3.720758319455786e-07, "loss": 0.5720102787017822, "step": 6275 }, { "epoch": 1.835209825997953, "grad_norm": 1.5440220572809102, "learning_rate": 3.707704161121328e-07, "loss": 0.46005699038505554, "step": 6276 }, { "epoch": 1.8355022664132183, "grad_norm": 2.0613584980065776, "learning_rate": 3.6946725105255656e-07, "loss": 0.5602168440818787, "step": 6277 }, { "epoch": 1.8357947068284837, "grad_norm": 1.6156922208810771, "learning_rate": 3.68166337071455e-07, "loss": 0.5390583276748657, "step": 6278 }, { "epoch": 1.8360871472437492, "grad_norm": 1.558407958302267, "learning_rate": 3.668676744729094e-07, "loss": 0.48980700969696045, "step": 6279 }, { "epoch": 1.8363795876590143, "grad_norm": 1.5853357453165142, "learning_rate": 3.655712635604747e-07, "loss": 0.6565061807632446, "step": 6280 }, { "epoch": 1.8366720280742799, "grad_norm": 1.5692146512642422, "learning_rate": 3.642771046371785e-07, "loss": 0.465609610080719, "step": 6281 }, { "epoch": 1.8369644684895454, "grad_norm": 1.7219983092976099, "learning_rate": 3.6298519800552434e-07, "loss": 0.5698891282081604, "step": 6282 }, { "epoch": 1.8372569089048105, "grad_norm": 2.103680074754177, "learning_rate": 3.616955439674863e-07, "loss": 0.5885399580001831, "step": 6283 }, { "epoch": 1.837549349320076, "grad_norm": 1.7028861151189467, "learning_rate": 3.60408142824511e-07, "loss": 0.5158063173294067, "step": 6284 }, { "epoch": 1.8378417897353414, "grad_norm": 1.6728867893623607, "learning_rate": 3.5912299487752434e-07, "loss": 0.49203822016716003, "step": 6285 }, { "epoch": 1.8381342301506067, "grad_norm": 1.991753525300203, "learning_rate": 3.578401004269183e-07, "loss": 0.5756489038467407, "step": 6286 }, { "epoch": 1.8384266705658723, "grad_norm": 1.9424738806131756, "learning_rate": 3.565594597725652e-07, "loss": 0.5970584154129028, "step": 6287 }, { "epoch": 1.8387191109811376, "grad_norm": 1.4438564684738853, "learning_rate": 3.552810732138046e-07, "loss": 0.48702481389045715, "step": 6288 }, { "epoch": 1.839011551396403, "grad_norm": 1.6632334435868308, "learning_rate": 3.540049410494517e-07, "loss": 0.4818963408470154, "step": 6289 }, { "epoch": 1.8393039918116685, "grad_norm": 1.6617150886827665, "learning_rate": 3.5273106357779585e-07, "loss": 0.389699786901474, "step": 6290 }, { "epoch": 1.8395964322269338, "grad_norm": 1.7654595369504777, "learning_rate": 3.514594410965977e-07, "loss": 0.6438174247741699, "step": 6291 }, { "epoch": 1.8398888726421991, "grad_norm": 1.9409260673022277, "learning_rate": 3.501900739030906e-07, "loss": 0.654021143913269, "step": 6292 }, { "epoch": 1.8401813130574647, "grad_norm": 1.921461492738401, "learning_rate": 3.489229622939827e-07, "loss": 0.748673677444458, "step": 6293 }, { "epoch": 1.8404737534727298, "grad_norm": 1.850157344469969, "learning_rate": 3.476581065654527e-07, "loss": 0.47883105278015137, "step": 6294 }, { "epoch": 1.8407661938879953, "grad_norm": 1.555147241743972, "learning_rate": 3.4639550701315303e-07, "loss": 0.5221554040908813, "step": 6295 }, { "epoch": 1.8410586343032607, "grad_norm": 1.7256564846330384, "learning_rate": 3.451351639322087e-07, "loss": 0.482231080532074, "step": 6296 }, { "epoch": 1.841351074718526, "grad_norm": 1.797442509245834, "learning_rate": 3.4387707761721625e-07, "loss": 0.5407366752624512, "step": 6297 }, { "epoch": 1.8416435151337915, "grad_norm": 1.9177358417772523, "learning_rate": 3.426212483622482e-07, "loss": 0.626631498336792, "step": 6298 }, { "epoch": 1.8419359555490569, "grad_norm": 1.4729327167263073, "learning_rate": 3.4136767646084424e-07, "loss": 0.4401513338088989, "step": 6299 }, { "epoch": 1.8422283959643222, "grad_norm": 1.756926078765411, "learning_rate": 3.4011636220602106e-07, "loss": 0.48130229115486145, "step": 6300 }, { "epoch": 1.8425208363795877, "grad_norm": 1.9010914484665373, "learning_rate": 3.3886730589026475e-07, "loss": 0.7132935523986816, "step": 6301 }, { "epoch": 1.842813276794853, "grad_norm": 1.692313625720156, "learning_rate": 3.37620507805535e-07, "loss": 0.6665343642234802, "step": 6302 }, { "epoch": 1.8431057172101184, "grad_norm": 1.7909091838212496, "learning_rate": 3.3637596824326435e-07, "loss": 0.4313231408596039, "step": 6303 }, { "epoch": 1.843398157625384, "grad_norm": 1.6745971926171657, "learning_rate": 3.3513368749435447e-07, "loss": 0.6263744235038757, "step": 6304 }, { "epoch": 1.843690598040649, "grad_norm": 1.6133043168174617, "learning_rate": 3.3389366584918313e-07, "loss": 0.6215947866439819, "step": 6305 }, { "epoch": 1.8439830384559146, "grad_norm": 1.6349014502820445, "learning_rate": 3.3265590359759517e-07, "loss": 0.45956021547317505, "step": 6306 }, { "epoch": 1.8442754788711802, "grad_norm": 1.6194578088821072, "learning_rate": 3.3142040102891126e-07, "loss": 0.5363642573356628, "step": 6307 }, { "epoch": 1.8445679192864453, "grad_norm": 1.7115305858843777, "learning_rate": 3.3018715843192273e-07, "loss": 0.4574592709541321, "step": 6308 }, { "epoch": 1.8448603597017108, "grad_norm": 1.6684239678735615, "learning_rate": 3.2895617609489337e-07, "loss": 0.43236005306243896, "step": 6309 }, { "epoch": 1.8451528001169761, "grad_norm": 1.574172974777944, "learning_rate": 3.277274543055564e-07, "loss": 0.46349820494651794, "step": 6310 }, { "epoch": 1.8454452405322415, "grad_norm": 1.9135327602518888, "learning_rate": 3.265009933511176e-07, "loss": 0.5233386754989624, "step": 6311 }, { "epoch": 1.845737680947507, "grad_norm": 1.5165768096310508, "learning_rate": 3.252767935182566e-07, "loss": 0.44902727007865906, "step": 6312 }, { "epoch": 1.8460301213627723, "grad_norm": 1.9281348385682333, "learning_rate": 3.240548550931222e-07, "loss": 0.709855854511261, "step": 6313 }, { "epoch": 1.8463225617780377, "grad_norm": 1.8532989008830933, "learning_rate": 3.228351783613348e-07, "loss": 0.5194632411003113, "step": 6314 }, { "epoch": 1.8466150021933032, "grad_norm": 1.750242735396334, "learning_rate": 3.2161776360798535e-07, "loss": 0.6027804017066956, "step": 6315 }, { "epoch": 1.8469074426085685, "grad_norm": 1.591118544218686, "learning_rate": 3.2040261111763946e-07, "loss": 0.5047632455825806, "step": 6316 }, { "epoch": 1.8471998830238339, "grad_norm": 2.082041129535105, "learning_rate": 3.1918972117433e-07, "loss": 0.5763708353042603, "step": 6317 }, { "epoch": 1.8474923234390994, "grad_norm": 1.7701935148884373, "learning_rate": 3.1797909406156234e-07, "loss": 0.4725028872489929, "step": 6318 }, { "epoch": 1.8477847638543645, "grad_norm": 1.5419878667068574, "learning_rate": 3.167707300623135e-07, "loss": 0.523047924041748, "step": 6319 }, { "epoch": 1.84807720426963, "grad_norm": 1.6321175932285703, "learning_rate": 3.15564629459032e-07, "loss": 0.5100070238113403, "step": 6320 }, { "epoch": 1.8483696446848956, "grad_norm": 1.7375024362733555, "learning_rate": 3.143607925336356e-07, "loss": 0.6019359827041626, "step": 6321 }, { "epoch": 1.8486620851001607, "grad_norm": 1.8195133886893664, "learning_rate": 3.1315921956751483e-07, "loss": 0.5514570474624634, "step": 6322 }, { "epoch": 1.8489545255154263, "grad_norm": 1.6002643586013279, "learning_rate": 3.1195991084152944e-07, "loss": 0.49585646390914917, "step": 6323 }, { "epoch": 1.8492469659306916, "grad_norm": 1.724322382501938, "learning_rate": 3.1076286663601076e-07, "loss": 0.5738509297370911, "step": 6324 }, { "epoch": 1.849539406345957, "grad_norm": 1.8621720995112787, "learning_rate": 3.095680872307605e-07, "loss": 0.5149112939834595, "step": 6325 }, { "epoch": 1.8498318467612225, "grad_norm": 1.6738148879498993, "learning_rate": 3.0837557290505083e-07, "loss": 0.45808184146881104, "step": 6326 }, { "epoch": 1.8501242871764878, "grad_norm": 1.6155317269058609, "learning_rate": 3.0718532393762435e-07, "loss": 0.5173396468162537, "step": 6327 }, { "epoch": 1.8504167275917531, "grad_norm": 1.6905273546590853, "learning_rate": 3.059973406066963e-07, "loss": 0.6229383945465088, "step": 6328 }, { "epoch": 1.8507091680070187, "grad_norm": 1.6794531990129002, "learning_rate": 3.0481162318994894e-07, "loss": 0.45520371198654175, "step": 6329 }, { "epoch": 1.851001608422284, "grad_norm": 1.5024073523898138, "learning_rate": 3.036281719645373e-07, "loss": 0.43216121196746826, "step": 6330 }, { "epoch": 1.8512940488375493, "grad_norm": 1.9238309164883824, "learning_rate": 3.0244698720708456e-07, "loss": 0.5440583825111389, "step": 6331 }, { "epoch": 1.8515864892528149, "grad_norm": 1.8189444343843324, "learning_rate": 3.0126806919368756e-07, "loss": 0.5474626421928406, "step": 6332 }, { "epoch": 1.85187892966808, "grad_norm": 1.7800420936387606, "learning_rate": 3.000914181999093e-07, "loss": 0.5122883915901184, "step": 6333 }, { "epoch": 1.8521713700833455, "grad_norm": 1.776220435476035, "learning_rate": 2.989170345007852e-07, "loss": 0.48304370045661926, "step": 6334 }, { "epoch": 1.8524638104986109, "grad_norm": 1.6949801188317577, "learning_rate": 2.977449183708214e-07, "loss": 0.566180408000946, "step": 6335 }, { "epoch": 1.8527562509138762, "grad_norm": 1.7482351137010406, "learning_rate": 2.96575070083992e-07, "loss": 0.5218988656997681, "step": 6336 }, { "epoch": 1.8530486913291417, "grad_norm": 1.8289145949576808, "learning_rate": 2.954074899137427e-07, "loss": 0.49669283628463745, "step": 6337 }, { "epoch": 1.853341131744407, "grad_norm": 1.6012219042297557, "learning_rate": 2.942421781329874e-07, "loss": 0.5505487322807312, "step": 6338 }, { "epoch": 1.8536335721596724, "grad_norm": 1.6156483149639533, "learning_rate": 2.930791350141116e-07, "loss": 0.5386735200881958, "step": 6339 }, { "epoch": 1.853926012574938, "grad_norm": 2.0764057670166776, "learning_rate": 2.919183608289689e-07, "loss": 0.5266523957252502, "step": 6340 }, { "epoch": 1.8542184529902033, "grad_norm": 1.573480922837112, "learning_rate": 2.907598558488822e-07, "loss": 0.5335103273391724, "step": 6341 }, { "epoch": 1.8545108934054686, "grad_norm": 1.8447961626822076, "learning_rate": 2.896036203446473e-07, "loss": 0.6155405044555664, "step": 6342 }, { "epoch": 1.8548033338207341, "grad_norm": 1.5602039082453873, "learning_rate": 2.884496545865245e-07, "loss": 0.5258159041404724, "step": 6343 }, { "epoch": 1.8550957742359993, "grad_norm": 1.7894466773590292, "learning_rate": 2.8729795884424927e-07, "loss": 0.5428795218467712, "step": 6344 }, { "epoch": 1.8553882146512648, "grad_norm": 1.4344098630811726, "learning_rate": 2.8614853338702066e-07, "loss": 0.4876418709754944, "step": 6345 }, { "epoch": 1.8556806550665303, "grad_norm": 1.606511441088432, "learning_rate": 2.850013784835115e-07, "loss": 0.49640393257141113, "step": 6346 }, { "epoch": 1.8559730954817955, "grad_norm": 1.8316843043903746, "learning_rate": 2.838564944018618e-07, "loss": 0.5726122260093689, "step": 6347 }, { "epoch": 1.856265535897061, "grad_norm": 1.653087716973347, "learning_rate": 2.827138814096819e-07, "loss": 0.5106557011604309, "step": 6348 }, { "epoch": 1.8565579763123263, "grad_norm": 1.5025453294784719, "learning_rate": 2.8157353977405044e-07, "loss": 0.45941129326820374, "step": 6349 }, { "epoch": 1.8568504167275917, "grad_norm": 1.781767756464568, "learning_rate": 2.8043546976151414e-07, "loss": 0.488609254360199, "step": 6350 }, { "epoch": 1.8571428571428572, "grad_norm": 1.764244860072195, "learning_rate": 2.7929967163809135e-07, "loss": 0.639745831489563, "step": 6351 }, { "epoch": 1.8574352975581225, "grad_norm": 1.498822179909691, "learning_rate": 2.7816614566926747e-07, "loss": 0.45327228307724, "step": 6352 }, { "epoch": 1.8577277379733879, "grad_norm": 1.625074504661963, "learning_rate": 2.7703489211999725e-07, "loss": 0.5606091022491455, "step": 6353 }, { "epoch": 1.8580201783886534, "grad_norm": 1.7312129624633084, "learning_rate": 2.759059112547047e-07, "loss": 0.5078528523445129, "step": 6354 }, { "epoch": 1.8583126188039187, "grad_norm": 1.6687258508972733, "learning_rate": 2.74779203337282e-07, "loss": 0.5558253526687622, "step": 6355 }, { "epoch": 1.858605059219184, "grad_norm": 1.7014892476807573, "learning_rate": 2.7365476863108974e-07, "loss": 0.3962102234363556, "step": 6356 }, { "epoch": 1.8588974996344496, "grad_norm": 1.6957699860554467, "learning_rate": 2.725326073989587e-07, "loss": 0.4737718105316162, "step": 6357 }, { "epoch": 1.8591899400497147, "grad_norm": 3.313281560384309, "learning_rate": 2.7141271990318576e-07, "loss": 0.5389090180397034, "step": 6358 }, { "epoch": 1.8594823804649803, "grad_norm": 1.7840378938084138, "learning_rate": 2.7029510640554033e-07, "loss": 0.5311479568481445, "step": 6359 }, { "epoch": 1.8597748208802458, "grad_norm": 1.631290291956445, "learning_rate": 2.691797671672558e-07, "loss": 0.4753482937812805, "step": 6360 }, { "epoch": 1.860067261295511, "grad_norm": 1.581254208029566, "learning_rate": 2.6806670244903577e-07, "loss": 0.5192427635192871, "step": 6361 }, { "epoch": 1.8603597017107765, "grad_norm": 1.9540580966263197, "learning_rate": 2.6695591251105214e-07, "loss": 0.5910875797271729, "step": 6362 }, { "epoch": 1.8606521421260418, "grad_norm": 1.7486575397054567, "learning_rate": 2.658473976129472e-07, "loss": 0.5465212464332581, "step": 6363 }, { "epoch": 1.8609445825413071, "grad_norm": 1.7446293681201037, "learning_rate": 2.647411580138282e-07, "loss": 0.43188267946243286, "step": 6364 }, { "epoch": 1.8612370229565727, "grad_norm": 2.144472636918694, "learning_rate": 2.636371939722715e-07, "loss": 0.5723724365234375, "step": 6365 }, { "epoch": 1.861529463371838, "grad_norm": 1.6310859619397844, "learning_rate": 2.62535505746323e-07, "loss": 0.47383856773376465, "step": 6366 }, { "epoch": 1.8618219037871033, "grad_norm": 1.764378835172625, "learning_rate": 2.6143609359349566e-07, "loss": 0.502855658531189, "step": 6367 }, { "epoch": 1.8621143442023689, "grad_norm": 2.265501418087609, "learning_rate": 2.6033895777077043e-07, "loss": 0.5934205055236816, "step": 6368 }, { "epoch": 1.8624067846176342, "grad_norm": 1.469455820490925, "learning_rate": 2.5924409853459455e-07, "loss": 0.4157971143722534, "step": 6369 }, { "epoch": 1.8626992250328995, "grad_norm": 1.8051847044948597, "learning_rate": 2.5815151614088764e-07, "loss": 0.5944307446479797, "step": 6370 }, { "epoch": 1.862991665448165, "grad_norm": 2.0081645135491812, "learning_rate": 2.57061210845031e-07, "loss": 0.5603153705596924, "step": 6371 }, { "epoch": 1.8632841058634302, "grad_norm": 1.752999497142634, "learning_rate": 2.559731829018786e-07, "loss": 0.49231380224227905, "step": 6372 }, { "epoch": 1.8635765462786957, "grad_norm": 1.666251917997058, "learning_rate": 2.548874325657502e-07, "loss": 0.46984565258026123, "step": 6373 }, { "epoch": 1.863868986693961, "grad_norm": 1.7373025752546019, "learning_rate": 2.5380396009043297e-07, "loss": 0.5088338255882263, "step": 6374 }, { "epoch": 1.8641614271092264, "grad_norm": 1.7554684094014161, "learning_rate": 2.52722765729182e-07, "loss": 0.4760589599609375, "step": 6375 }, { "epoch": 1.864453867524492, "grad_norm": 1.6521606786384044, "learning_rate": 2.5164384973471954e-07, "loss": 0.44232040643692017, "step": 6376 }, { "epoch": 1.8647463079397573, "grad_norm": 1.736903879415624, "learning_rate": 2.505672123592373e-07, "loss": 0.46714338660240173, "step": 6377 }, { "epoch": 1.8650387483550226, "grad_norm": 1.9333860177281759, "learning_rate": 2.494928538543917e-07, "loss": 0.5527149438858032, "step": 6378 }, { "epoch": 1.8653311887702881, "grad_norm": 1.690422887605866, "learning_rate": 2.484207744713074e-07, "loss": 0.5006313323974609, "step": 6379 }, { "epoch": 1.8656236291855535, "grad_norm": 1.5247883016042734, "learning_rate": 2.473509744605751e-07, "loss": 0.5007860660552979, "step": 6380 }, { "epoch": 1.8659160696008188, "grad_norm": 1.683063597354387, "learning_rate": 2.4628345407225804e-07, "loss": 0.4354132413864136, "step": 6381 }, { "epoch": 1.8662085100160843, "grad_norm": 1.718309113338333, "learning_rate": 2.452182135558789e-07, "loss": 0.5199555158615112, "step": 6382 }, { "epoch": 1.8665009504313494, "grad_norm": 1.6260046663066803, "learning_rate": 2.441552531604319e-07, "loss": 0.5117326974868774, "step": 6383 }, { "epoch": 1.866793390846615, "grad_norm": 1.803024051218915, "learning_rate": 2.43094573134377e-07, "loss": 0.5169814825057983, "step": 6384 }, { "epoch": 1.8670858312618805, "grad_norm": 1.7012998015666523, "learning_rate": 2.420361737256438e-07, "loss": 0.563339352607727, "step": 6385 }, { "epoch": 1.8673782716771457, "grad_norm": 2.1248949598274325, "learning_rate": 2.409800551816255e-07, "loss": 0.710465133190155, "step": 6386 }, { "epoch": 1.8676707120924112, "grad_norm": 1.6580658731053397, "learning_rate": 2.3992621774918343e-07, "loss": 0.6894562244415283, "step": 6387 }, { "epoch": 1.8679631525076765, "grad_norm": 1.7380197058585787, "learning_rate": 2.388746616746462e-07, "loss": 0.5105183124542236, "step": 6388 }, { "epoch": 1.8682555929229419, "grad_norm": 2.0034985048956684, "learning_rate": 2.3782538720380722e-07, "loss": 0.4602908492088318, "step": 6389 }, { "epoch": 1.8685480333382074, "grad_norm": 1.7787197864367217, "learning_rate": 2.3677839458192908e-07, "loss": 0.5395161509513855, "step": 6390 }, { "epoch": 1.8688404737534727, "grad_norm": 1.6121023481071262, "learning_rate": 2.3573368405374054e-07, "loss": 0.5842725038528442, "step": 6391 }, { "epoch": 1.869132914168738, "grad_norm": 1.6354709739233064, "learning_rate": 2.346912558634362e-07, "loss": 0.5837947130203247, "step": 6392 }, { "epoch": 1.8694253545840036, "grad_norm": 1.8136211176417363, "learning_rate": 2.3365111025467568e-07, "loss": 0.5255596041679382, "step": 6393 }, { "epoch": 1.869717794999269, "grad_norm": 1.5586602271443384, "learning_rate": 2.326132474705889e-07, "loss": 0.5614485144615173, "step": 6394 }, { "epoch": 1.8700102354145343, "grad_norm": 1.5895893761997042, "learning_rate": 2.3157766775376733e-07, "loss": 0.5510128736495972, "step": 6395 }, { "epoch": 1.8703026758297998, "grad_norm": 2.295988070565878, "learning_rate": 2.3054437134627406e-07, "loss": 0.690884530544281, "step": 6396 }, { "epoch": 1.870595116245065, "grad_norm": 1.94960784120805, "learning_rate": 2.2951335848963364e-07, "loss": 0.637476921081543, "step": 6397 }, { "epoch": 1.8708875566603305, "grad_norm": 1.6526446878259382, "learning_rate": 2.2848462942484108e-07, "loss": 0.5254319906234741, "step": 6398 }, { "epoch": 1.871179997075596, "grad_norm": 1.7552717813182315, "learning_rate": 2.27458184392354e-07, "loss": 0.5038233995437622, "step": 6399 }, { "epoch": 1.8714724374908611, "grad_norm": 1.4123258498894362, "learning_rate": 2.2643402363209832e-07, "loss": 0.43701431155204773, "step": 6400 }, { "epoch": 1.8717648779061267, "grad_norm": 1.8138198755485717, "learning_rate": 2.2541214738346583e-07, "loss": 0.5490877628326416, "step": 6401 }, { "epoch": 1.872057318321392, "grad_norm": 1.5452561215431913, "learning_rate": 2.2439255588531327e-07, "loss": 0.48393410444259644, "step": 6402 }, { "epoch": 1.8723497587366573, "grad_norm": 1.6213926610567049, "learning_rate": 2.2337524937596444e-07, "loss": 0.5439243912696838, "step": 6403 }, { "epoch": 1.8726421991519229, "grad_norm": 1.6026974016529494, "learning_rate": 2.22360228093208e-07, "loss": 0.5272157192230225, "step": 6404 }, { "epoch": 1.8729346395671882, "grad_norm": 1.6750451870732375, "learning_rate": 2.2134749227429864e-07, "loss": 0.6323473453521729, "step": 6405 }, { "epoch": 1.8732270799824535, "grad_norm": 1.6749139186520705, "learning_rate": 2.2033704215595808e-07, "loss": 0.4568995237350464, "step": 6406 }, { "epoch": 1.873519520397719, "grad_norm": 1.8331627672377568, "learning_rate": 2.1932887797437296e-07, "loss": 0.5817153453826904, "step": 6407 }, { "epoch": 1.8738119608129844, "grad_norm": 1.4674902238035163, "learning_rate": 2.183229999651948e-07, "loss": 0.5104260444641113, "step": 6408 }, { "epoch": 1.8741044012282497, "grad_norm": 1.7946613600749395, "learning_rate": 2.1731940836354105e-07, "loss": 0.44944921135902405, "step": 6409 }, { "epoch": 1.8743968416435153, "grad_norm": 1.794977484250215, "learning_rate": 2.163181034039974e-07, "loss": 0.6935169696807861, "step": 6410 }, { "epoch": 1.8746892820587804, "grad_norm": 1.7330999339843873, "learning_rate": 2.1531908532060998e-07, "loss": 0.55609130859375, "step": 6411 }, { "epoch": 1.874981722474046, "grad_norm": 1.6428359107019144, "learning_rate": 2.143223543468953e-07, "loss": 0.5402215719223022, "step": 6412 }, { "epoch": 1.8752741628893113, "grad_norm": 1.8163043216263146, "learning_rate": 2.1332791071583258e-07, "loss": 0.5669365525245667, "step": 6413 }, { "epoch": 1.8755666033045766, "grad_norm": 2.2122008806914044, "learning_rate": 2.123357546598659e-07, "loss": 0.46257615089416504, "step": 6414 }, { "epoch": 1.8758590437198421, "grad_norm": 1.6308794717153283, "learning_rate": 2.1134588641090858e-07, "loss": 0.4596136212348938, "step": 6415 }, { "epoch": 1.8761514841351075, "grad_norm": 1.6758615624094995, "learning_rate": 2.1035830620033227e-07, "loss": 0.5086819529533386, "step": 6416 }, { "epoch": 1.8764439245503728, "grad_norm": 1.8974547658257448, "learning_rate": 2.0937301425898115e-07, "loss": 0.6008501052856445, "step": 6417 }, { "epoch": 1.8767363649656383, "grad_norm": 1.8448672190670345, "learning_rate": 2.0839001081715882e-07, "loss": 0.5943784713745117, "step": 6418 }, { "epoch": 1.8770288053809037, "grad_norm": 1.3203141385144623, "learning_rate": 2.0740929610463813e-07, "loss": 0.5006660223007202, "step": 6419 }, { "epoch": 1.877321245796169, "grad_norm": 1.7508035137785818, "learning_rate": 2.0643087035065458e-07, "loss": 0.5434073805809021, "step": 6420 }, { "epoch": 1.8776136862114345, "grad_norm": 1.8446497118213794, "learning_rate": 2.0545473378390858e-07, "loss": 0.6426963210105896, "step": 6421 }, { "epoch": 1.8779061266266996, "grad_norm": 1.7388169538440008, "learning_rate": 2.044808866325676e-07, "loss": 0.5190218687057495, "step": 6422 }, { "epoch": 1.8781985670419652, "grad_norm": 1.5291942184143035, "learning_rate": 2.035093291242607e-07, "loss": 0.40918534994125366, "step": 6423 }, { "epoch": 1.8784910074572307, "grad_norm": 1.719713887519883, "learning_rate": 2.0254006148608507e-07, "loss": 0.5403652191162109, "step": 6424 }, { "epoch": 1.8787834478724958, "grad_norm": 1.3839892041506006, "learning_rate": 2.0157308394460062e-07, "loss": 0.49781516194343567, "step": 6425 }, { "epoch": 1.8790758882877614, "grad_norm": 1.8332751958303748, "learning_rate": 2.006083967258321e-07, "loss": 0.5841303467750549, "step": 6426 }, { "epoch": 1.8793683287030267, "grad_norm": 1.679945923485487, "learning_rate": 1.9964600005527024e-07, "loss": 0.5054808855056763, "step": 6427 }, { "epoch": 1.879660769118292, "grad_norm": 1.7695393284467882, "learning_rate": 1.9868589415786843e-07, "loss": 0.4801362454891205, "step": 6428 }, { "epoch": 1.8799532095335576, "grad_norm": 1.8547174560912147, "learning_rate": 1.9772807925804494e-07, "loss": 0.4709380269050598, "step": 6429 }, { "epoch": 1.880245649948823, "grad_norm": 1.8447220446699908, "learning_rate": 1.9677255557968511e-07, "loss": 0.665968120098114, "step": 6430 }, { "epoch": 1.8805380903640883, "grad_norm": 1.7494009698963573, "learning_rate": 1.9581932334613585e-07, "loss": 0.515839159488678, "step": 6431 }, { "epoch": 1.8808305307793538, "grad_norm": 1.6699738562759978, "learning_rate": 1.948683827802089e-07, "loss": 0.5399242043495178, "step": 6432 }, { "epoch": 1.8811229711946191, "grad_norm": 1.7478095955612059, "learning_rate": 1.9391973410418097e-07, "loss": 0.6167087554931641, "step": 6433 }, { "epoch": 1.8814154116098845, "grad_norm": 1.826500337038364, "learning_rate": 1.9297337753979462e-07, "loss": 0.6139745116233826, "step": 6434 }, { "epoch": 1.88170785202515, "grad_norm": 2.0873679343118257, "learning_rate": 1.9202931330825292e-07, "loss": 0.7103149890899658, "step": 6435 }, { "epoch": 1.8820002924404151, "grad_norm": 1.6777685812633742, "learning_rate": 1.9108754163022602e-07, "loss": 0.5958741903305054, "step": 6436 }, { "epoch": 1.8822927328556807, "grad_norm": 1.2489160599157765, "learning_rate": 1.9014806272584673e-07, "loss": 0.32660478353500366, "step": 6437 }, { "epoch": 1.8825851732709462, "grad_norm": 1.822465954469875, "learning_rate": 1.8921087681471272e-07, "loss": 0.49485981464385986, "step": 6438 }, { "epoch": 1.8828776136862113, "grad_norm": 1.5404253681507418, "learning_rate": 1.8827598411588544e-07, "loss": 0.5106277465820312, "step": 6439 }, { "epoch": 1.8831700541014769, "grad_norm": 1.5696470040532076, "learning_rate": 1.8734338484789115e-07, "loss": 0.50006502866745, "step": 6440 }, { "epoch": 1.8834624945167422, "grad_norm": 1.5827360977472946, "learning_rate": 1.8641307922871887e-07, "loss": 0.47097745537757874, "step": 6441 }, { "epoch": 1.8837549349320075, "grad_norm": 1.718260594389779, "learning_rate": 1.854850674758213e-07, "loss": 0.5874402523040771, "step": 6442 }, { "epoch": 1.884047375347273, "grad_norm": 1.7055917291229012, "learning_rate": 1.8455934980611602e-07, "loss": 0.45705318450927734, "step": 6443 }, { "epoch": 1.8843398157625384, "grad_norm": 1.8262667617041222, "learning_rate": 1.8363592643598328e-07, "loss": 0.4949952960014343, "step": 6444 }, { "epoch": 1.8846322561778037, "grad_norm": 2.0005095204142056, "learning_rate": 1.827147975812693e-07, "loss": 0.5311721563339233, "step": 6445 }, { "epoch": 1.8849246965930693, "grad_norm": 1.8075375628836245, "learning_rate": 1.817959634572819e-07, "loss": 0.5652828216552734, "step": 6446 }, { "epoch": 1.8852171370083346, "grad_norm": 1.7007026167846622, "learning_rate": 1.8087942427879146e-07, "loss": 0.4856044054031372, "step": 6447 }, { "epoch": 1.8855095774236, "grad_norm": 1.6920105837383546, "learning_rate": 1.799651802600344e-07, "loss": 0.55420982837677, "step": 6448 }, { "epoch": 1.8858020178388655, "grad_norm": 1.8804834035548856, "learning_rate": 1.7905323161470867e-07, "loss": 0.5869326591491699, "step": 6449 }, { "epoch": 1.8860944582541306, "grad_norm": 1.761061751635786, "learning_rate": 1.781435785559793e-07, "loss": 0.4505504369735718, "step": 6450 }, { "epoch": 1.8863868986693961, "grad_norm": 1.7194415376329713, "learning_rate": 1.7723622129646955e-07, "loss": 0.5460773706436157, "step": 6451 }, { "epoch": 1.8866793390846615, "grad_norm": 1.7253684204963688, "learning_rate": 1.7633116004826978e-07, "loss": 0.6214778423309326, "step": 6452 }, { "epoch": 1.8869717794999268, "grad_norm": 1.786722853658628, "learning_rate": 1.7542839502293297e-07, "loss": 0.4900703430175781, "step": 6453 }, { "epoch": 1.8872642199151923, "grad_norm": 1.8351888114829378, "learning_rate": 1.7452792643147364e-07, "loss": 0.5177547931671143, "step": 6454 }, { "epoch": 1.8875566603304577, "grad_norm": 1.6033594290974305, "learning_rate": 1.7362975448437236e-07, "loss": 0.3914458453655243, "step": 6455 }, { "epoch": 1.887849100745723, "grad_norm": 1.7306995937297311, "learning_rate": 1.7273387939157116e-07, "loss": 0.5222523212432861, "step": 6456 }, { "epoch": 1.8881415411609885, "grad_norm": 1.8351026582741266, "learning_rate": 1.7184030136247477e-07, "loss": 0.5097587704658508, "step": 6457 }, { "epoch": 1.8884339815762539, "grad_norm": 1.711376264331189, "learning_rate": 1.7094902060595053e-07, "loss": 0.517410397529602, "step": 6458 }, { "epoch": 1.8887264219915192, "grad_norm": 1.5054067124169248, "learning_rate": 1.7006003733033182e-07, "loss": 0.4951689839363098, "step": 6459 }, { "epoch": 1.8890188624067847, "grad_norm": 1.8698243351971042, "learning_rate": 1.6917335174341242e-07, "loss": 0.5530004501342773, "step": 6460 }, { "epoch": 1.8893113028220498, "grad_norm": 1.3793759581483827, "learning_rate": 1.6828896405244988e-07, "loss": 0.5231990814208984, "step": 6461 }, { "epoch": 1.8896037432373154, "grad_norm": 1.7109665283076239, "learning_rate": 1.6740687446416326e-07, "loss": 0.5142268538475037, "step": 6462 }, { "epoch": 1.889896183652581, "grad_norm": 1.5939124952252972, "learning_rate": 1.6652708318473765e-07, "loss": 0.4803999364376068, "step": 6463 }, { "epoch": 1.890188624067846, "grad_norm": 1.8261203070041963, "learning_rate": 1.6564959041981743e-07, "loss": 0.38822099566459656, "step": 6464 }, { "epoch": 1.8904810644831116, "grad_norm": 1.7158195687276572, "learning_rate": 1.6477439637451186e-07, "loss": 0.4778556823730469, "step": 6465 }, { "epoch": 1.890773504898377, "grad_norm": 1.548976438279917, "learning_rate": 1.6390150125339178e-07, "loss": 0.5083664059638977, "step": 6466 }, { "epoch": 1.8910659453136422, "grad_norm": 2.298817115631298, "learning_rate": 1.6303090526049058e-07, "loss": 0.6592142581939697, "step": 6467 }, { "epoch": 1.8913583857289078, "grad_norm": 1.7188849828284447, "learning_rate": 1.6216260859930776e-07, "loss": 0.6350588798522949, "step": 6468 }, { "epoch": 1.8916508261441731, "grad_norm": 1.900981319900476, "learning_rate": 1.6129661147279763e-07, "loss": 0.5542852282524109, "step": 6469 }, { "epoch": 1.8919432665594385, "grad_norm": 1.7094379727839777, "learning_rate": 1.6043291408338602e-07, "loss": 0.572988748550415, "step": 6470 }, { "epoch": 1.892235706974704, "grad_norm": 1.578693569659532, "learning_rate": 1.5957151663295367e-07, "loss": 0.4801466763019562, "step": 6471 }, { "epoch": 1.8925281473899693, "grad_norm": 2.0149025268161207, "learning_rate": 1.5871241932284953e-07, "loss": 0.6286160349845886, "step": 6472 }, { "epoch": 1.8928205878052347, "grad_norm": 1.8739502258074872, "learning_rate": 1.5785562235388074e-07, "loss": 0.5731645822525024, "step": 6473 }, { "epoch": 1.8931130282205002, "grad_norm": 2.02559646967304, "learning_rate": 1.5700112592631933e-07, "loss": 0.47890836000442505, "step": 6474 }, { "epoch": 1.8934054686357653, "grad_norm": 1.8833158182705436, "learning_rate": 1.5614893023989886e-07, "loss": 0.4379703998565674, "step": 6475 }, { "epoch": 1.8936979090510309, "grad_norm": 1.886508266764503, "learning_rate": 1.5529903549381331e-07, "loss": 0.5629044771194458, "step": 6476 }, { "epoch": 1.8939903494662964, "grad_norm": 1.6388873220258502, "learning_rate": 1.5445144188672268e-07, "loss": 0.4995439052581787, "step": 6477 }, { "epoch": 1.8942827898815615, "grad_norm": 1.54762620576383, "learning_rate": 1.5360614961674403e-07, "loss": 0.5350549221038818, "step": 6478 }, { "epoch": 1.894575230296827, "grad_norm": 1.636976407400752, "learning_rate": 1.5276315888146266e-07, "loss": 0.5245925188064575, "step": 6479 }, { "epoch": 1.8948676707120924, "grad_norm": 1.870112790684546, "learning_rate": 1.519224698779198e-07, "loss": 0.5159675478935242, "step": 6480 }, { "epoch": 1.8951601111273577, "grad_norm": 1.6327790205426773, "learning_rate": 1.5108408280262276e-07, "loss": 0.5046014189720154, "step": 6481 }, { "epoch": 1.8954525515426233, "grad_norm": 1.5658787677393426, "learning_rate": 1.502479978515381e-07, "loss": 0.35977911949157715, "step": 6482 }, { "epoch": 1.8957449919578886, "grad_norm": 1.6374646749200208, "learning_rate": 1.4941421522009725e-07, "loss": 0.4689600467681885, "step": 6483 }, { "epoch": 1.896037432373154, "grad_norm": 1.713919299692529, "learning_rate": 1.485827351031899e-07, "loss": 0.5729683637619019, "step": 6484 }, { "epoch": 1.8963298727884195, "grad_norm": 2.081397285004385, "learning_rate": 1.4775355769517163e-07, "loss": 0.5929673314094543, "step": 6485 }, { "epoch": 1.8966223132036848, "grad_norm": 1.6003411415494537, "learning_rate": 1.4692668318985636e-07, "loss": 0.43075594305992126, "step": 6486 }, { "epoch": 1.8969147536189501, "grad_norm": 1.7646064022155787, "learning_rate": 1.461021117805217e-07, "loss": 0.5247992277145386, "step": 6487 }, { "epoch": 1.8972071940342157, "grad_norm": 1.7947400732319756, "learning_rate": 1.4527984365990455e-07, "loss": 0.4930630326271057, "step": 6488 }, { "epoch": 1.8974996344494808, "grad_norm": 2.490399223660391, "learning_rate": 1.4445987902020676e-07, "loss": 0.7183758616447449, "step": 6489 }, { "epoch": 1.8977920748647463, "grad_norm": 1.6603594705802933, "learning_rate": 1.4364221805309052e-07, "loss": 0.4766094982624054, "step": 6490 }, { "epoch": 1.8980845152800117, "grad_norm": 1.7401365125544646, "learning_rate": 1.4282686094967747e-07, "loss": 0.43594151735305786, "step": 6491 }, { "epoch": 1.898376955695277, "grad_norm": 1.4953976915814553, "learning_rate": 1.4201380790055397e-07, "loss": 0.49320366978645325, "step": 6492 }, { "epoch": 1.8986693961105425, "grad_norm": 1.7835092237734465, "learning_rate": 1.4120305909576359e-07, "loss": 0.600296139717102, "step": 6493 }, { "epoch": 1.8989618365258079, "grad_norm": 1.8325915671317163, "learning_rate": 1.4039461472481696e-07, "loss": 0.6692827939987183, "step": 6494 }, { "epoch": 1.8992542769410732, "grad_norm": 1.5707947665490356, "learning_rate": 1.395884749766807e-07, "loss": 0.49206262826919556, "step": 6495 }, { "epoch": 1.8995467173563387, "grad_norm": 1.5801197568349268, "learning_rate": 1.3878464003978741e-07, "loss": 0.4987361431121826, "step": 6496 }, { "epoch": 1.899839157771604, "grad_norm": 1.5345056226134064, "learning_rate": 1.3798311010202681e-07, "loss": 0.5020350217819214, "step": 6497 }, { "epoch": 1.9001315981868694, "grad_norm": 1.804856300616187, "learning_rate": 1.3718388535075123e-07, "loss": 0.5906451344490051, "step": 6498 }, { "epoch": 1.900424038602135, "grad_norm": 1.7402170644717794, "learning_rate": 1.3638696597277678e-07, "loss": 0.5089905858039856, "step": 6499 }, { "epoch": 1.9007164790174, "grad_norm": 1.7322257732687294, "learning_rate": 1.3559235215437672e-07, "loss": 0.4633820056915283, "step": 6500 }, { "epoch": 1.9010089194326656, "grad_norm": 1.760704522549711, "learning_rate": 1.34800044081288e-07, "loss": 0.4214053750038147, "step": 6501 }, { "epoch": 1.9013013598479311, "grad_norm": 1.8502899980656935, "learning_rate": 1.3401004193870694e-07, "loss": 0.6652689576148987, "step": 6502 }, { "epoch": 1.9015938002631962, "grad_norm": 1.9902189127655232, "learning_rate": 1.3322234591129247e-07, "loss": 0.610877275466919, "step": 6503 }, { "epoch": 1.9018862406784618, "grad_norm": 1.8174576160077263, "learning_rate": 1.324369561831651e-07, "loss": 0.5051777958869934, "step": 6504 }, { "epoch": 1.9021786810937271, "grad_norm": 1.679424427708786, "learning_rate": 1.3165387293790133e-07, "loss": 0.5004675984382629, "step": 6505 }, { "epoch": 1.9024711215089924, "grad_norm": 1.7771913721647656, "learning_rate": 1.3087309635854583e-07, "loss": 0.5778615474700928, "step": 6506 }, { "epoch": 1.902763561924258, "grad_norm": 2.0292378822767807, "learning_rate": 1.300946266275982e-07, "loss": 0.5282145738601685, "step": 6507 }, { "epoch": 1.9030560023395233, "grad_norm": 1.7978860546574749, "learning_rate": 1.2931846392702174e-07, "loss": 0.5965359807014465, "step": 6508 }, { "epoch": 1.9033484427547886, "grad_norm": 1.6652110616876246, "learning_rate": 1.2854460843823912e-07, "loss": 0.5891281366348267, "step": 6509 }, { "epoch": 1.9036408831700542, "grad_norm": 1.6248785946895787, "learning_rate": 1.2777306034213677e-07, "loss": 0.516204297542572, "step": 6510 }, { "epoch": 1.9039333235853195, "grad_norm": 1.6814946652270708, "learning_rate": 1.2700381981905486e-07, "loss": 0.5148355960845947, "step": 6511 }, { "epoch": 1.9042257640005849, "grad_norm": 1.8013354973349966, "learning_rate": 1.2623688704880287e-07, "loss": 0.5599791407585144, "step": 6512 }, { "epoch": 1.9045182044158504, "grad_norm": 1.4831223871376102, "learning_rate": 1.2547226221064412e-07, "loss": 0.44349417090415955, "step": 6513 }, { "epoch": 1.9048106448311155, "grad_norm": 1.8442502212828862, "learning_rate": 1.2470994548330672e-07, "loss": 0.5919830799102783, "step": 6514 }, { "epoch": 1.905103085246381, "grad_norm": 1.5907952124169482, "learning_rate": 1.2394993704497592e-07, "loss": 0.4615499675273895, "step": 6515 }, { "epoch": 1.9053955256616466, "grad_norm": 2.080938429630683, "learning_rate": 1.2319223707330074e-07, "loss": 0.5217719674110413, "step": 6516 }, { "epoch": 1.9056879660769117, "grad_norm": 1.7218384779241716, "learning_rate": 1.2243684574538838e-07, "loss": 0.510722279548645, "step": 6517 }, { "epoch": 1.9059804064921773, "grad_norm": 1.7577870608967676, "learning_rate": 1.2168376323780652e-07, "loss": 0.6744403839111328, "step": 6518 }, { "epoch": 1.9062728469074426, "grad_norm": 1.852387461765699, "learning_rate": 1.209329897265832e-07, "loss": 0.4991394281387329, "step": 6519 }, { "epoch": 1.906565287322708, "grad_norm": 1.5123359691224252, "learning_rate": 1.2018452538720805e-07, "loss": 0.43237754702568054, "step": 6520 }, { "epoch": 1.9068577277379735, "grad_norm": 1.929873331270624, "learning_rate": 1.1943837039463112e-07, "loss": 0.6042662262916565, "step": 6521 }, { "epoch": 1.9071501681532388, "grad_norm": 1.5924831654811167, "learning_rate": 1.186945249232585e-07, "loss": 0.4275910556316376, "step": 6522 }, { "epoch": 1.9074426085685041, "grad_norm": 1.6404715584331906, "learning_rate": 1.1795298914696219e-07, "loss": 0.4368266463279724, "step": 6523 }, { "epoch": 1.9077350489837697, "grad_norm": 1.7092717646033881, "learning_rate": 1.172137632390713e-07, "loss": 0.49492496252059937, "step": 6524 }, { "epoch": 1.908027489399035, "grad_norm": 1.587478317321713, "learning_rate": 1.164768473723743e-07, "loss": 0.4296407103538513, "step": 6525 }, { "epoch": 1.9083199298143003, "grad_norm": 1.746911434628144, "learning_rate": 1.1574224171912118e-07, "loss": 0.4609370231628418, "step": 6526 }, { "epoch": 1.9086123702295659, "grad_norm": 1.687321204236502, "learning_rate": 1.1500994645102237e-07, "loss": 0.5201660394668579, "step": 6527 }, { "epoch": 1.908904810644831, "grad_norm": 1.686028014701993, "learning_rate": 1.1427996173924649e-07, "loss": 0.49946731328964233, "step": 6528 }, { "epoch": 1.9091972510600965, "grad_norm": 1.700984250030961, "learning_rate": 1.1355228775442262e-07, "loss": 0.5479187369346619, "step": 6529 }, { "epoch": 1.9094896914753618, "grad_norm": 1.6485232123504545, "learning_rate": 1.1282692466664247e-07, "loss": 0.5227243900299072, "step": 6530 }, { "epoch": 1.9097821318906272, "grad_norm": 1.896983089459967, "learning_rate": 1.1210387264545264e-07, "loss": 0.42863208055496216, "step": 6531 }, { "epoch": 1.9100745723058927, "grad_norm": 1.552171259240321, "learning_rate": 1.113831318598635e-07, "loss": 0.37858498096466064, "step": 6532 }, { "epoch": 1.910367012721158, "grad_norm": 1.852509398879224, "learning_rate": 1.1066470247834471e-07, "loss": 0.6447315216064453, "step": 6533 }, { "epoch": 1.9106594531364234, "grad_norm": 1.4833597844037574, "learning_rate": 1.0994858466882197e-07, "loss": 0.4159877300262451, "step": 6534 }, { "epoch": 1.910951893551689, "grad_norm": 1.7056274655886765, "learning_rate": 1.0923477859868581e-07, "loss": 0.5042530298233032, "step": 6535 }, { "epoch": 1.9112443339669543, "grad_norm": 1.6939120860687955, "learning_rate": 1.0852328443478278e-07, "loss": 0.35955798625946045, "step": 6536 }, { "epoch": 1.9115367743822196, "grad_norm": 1.6272843503399623, "learning_rate": 1.0781410234342093e-07, "loss": 0.561823308467865, "step": 6537 }, { "epoch": 1.9118292147974851, "grad_norm": 1.6724333597123697, "learning_rate": 1.0710723249036659e-07, "loss": 0.44518136978149414, "step": 6538 }, { "epoch": 1.9121216552127502, "grad_norm": 2.0012454930429397, "learning_rate": 1.0640267504084756e-07, "loss": 0.5657057166099548, "step": 6539 }, { "epoch": 1.9124140956280158, "grad_norm": 1.5762808769057957, "learning_rate": 1.0570043015954989e-07, "loss": 0.5659947395324707, "step": 6540 }, { "epoch": 1.9127065360432813, "grad_norm": 1.5811137035723222, "learning_rate": 1.0500049801061784e-07, "loss": 0.45648419857025146, "step": 6541 }, { "epoch": 1.9129989764585464, "grad_norm": 1.8646406465839787, "learning_rate": 1.0430287875765611e-07, "loss": 0.4978141784667969, "step": 6542 }, { "epoch": 1.913291416873812, "grad_norm": 1.54273033799953, "learning_rate": 1.0360757256372977e-07, "loss": 0.5397627949714661, "step": 6543 }, { "epoch": 1.9135838572890773, "grad_norm": 1.8918413526412523, "learning_rate": 1.029145795913633e-07, "loss": 0.6359304189682007, "step": 6544 }, { "epoch": 1.9138762977043426, "grad_norm": 1.70706044627556, "learning_rate": 1.0222390000253824e-07, "loss": 0.5023899078369141, "step": 6545 }, { "epoch": 1.9141687381196082, "grad_norm": 1.8668808073409142, "learning_rate": 1.0153553395869654e-07, "loss": 0.5231877565383911, "step": 6546 }, { "epoch": 1.9144611785348735, "grad_norm": 1.7146199886416342, "learning_rate": 1.008494816207406e-07, "loss": 0.5925711393356323, "step": 6547 }, { "epoch": 1.9147536189501388, "grad_norm": 1.5881527564838034, "learning_rate": 1.0016574314902993e-07, "loss": 0.42732810974121094, "step": 6548 }, { "epoch": 1.9150460593654044, "grad_norm": 1.8539790257850415, "learning_rate": 9.948431870338559e-08, "loss": 0.5011821985244751, "step": 6549 }, { "epoch": 1.9153384997806697, "grad_norm": 1.7063021653673758, "learning_rate": 9.88052084430846e-08, "loss": 0.5112487077713013, "step": 6550 }, { "epoch": 1.915630940195935, "grad_norm": 1.8003514575818433, "learning_rate": 9.812841252686667e-08, "loss": 0.4751431345939636, "step": 6551 }, { "epoch": 1.9159233806112006, "grad_norm": 1.9933791417538373, "learning_rate": 9.745393111292745e-08, "loss": 0.5343109369277954, "step": 6552 }, { "epoch": 1.9162158210264657, "grad_norm": 1.4980785147509508, "learning_rate": 9.678176435892417e-08, "loss": 0.4602724015712738, "step": 6553 }, { "epoch": 1.9165082614417313, "grad_norm": 1.5436966250785777, "learning_rate": 9.611191242197005e-08, "loss": 0.4756245017051697, "step": 6554 }, { "epoch": 1.9168007018569968, "grad_norm": 1.6531719135209273, "learning_rate": 9.544437545864093e-08, "loss": 0.5291459560394287, "step": 6555 }, { "epoch": 1.917093142272262, "grad_norm": 2.0976196168420946, "learning_rate": 9.47791536249676e-08, "loss": 0.5357412099838257, "step": 6556 }, { "epoch": 1.9173855826875275, "grad_norm": 2.238353466121697, "learning_rate": 9.411624707644229e-08, "loss": 0.6298913955688477, "step": 6557 }, { "epoch": 1.9176780231027928, "grad_norm": 1.4485326554294644, "learning_rate": 9.345565596801553e-08, "loss": 0.5150517225265503, "step": 6558 }, { "epoch": 1.917970463518058, "grad_norm": 1.8563821954536717, "learning_rate": 9.279738045409603e-08, "loss": 0.6264858245849609, "step": 6559 }, { "epoch": 1.9182629039333237, "grad_norm": 1.46383829182073, "learning_rate": 9.214142068855292e-08, "loss": 0.33123475313186646, "step": 6560 }, { "epoch": 1.918555344348589, "grad_norm": 1.6384165039446617, "learning_rate": 9.148777682471133e-08, "loss": 0.5540212392807007, "step": 6561 }, { "epoch": 1.9188477847638543, "grad_norm": 1.8427168178125763, "learning_rate": 9.083644901535793e-08, "loss": 0.5633922219276428, "step": 6562 }, { "epoch": 1.9191402251791199, "grad_norm": 1.7743383669625796, "learning_rate": 9.018743741273428e-08, "loss": 0.58629310131073, "step": 6563 }, { "epoch": 1.9194326655943852, "grad_norm": 1.8674136448530827, "learning_rate": 8.95407421685457e-08, "loss": 0.5985243320465088, "step": 6564 }, { "epoch": 1.9197251060096505, "grad_norm": 1.6803719834498339, "learning_rate": 8.889636343395235e-08, "loss": 0.5344138741493225, "step": 6565 }, { "epoch": 1.920017546424916, "grad_norm": 1.783895238536977, "learning_rate": 8.825430135957381e-08, "loss": 0.6139744520187378, "step": 6566 }, { "epoch": 1.9203099868401812, "grad_norm": 1.4220884637268112, "learning_rate": 8.761455609548663e-08, "loss": 0.46376854181289673, "step": 6567 }, { "epoch": 1.9206024272554467, "grad_norm": 1.7412635159811354, "learning_rate": 8.697712779122902e-08, "loss": 0.5053622722625732, "step": 6568 }, { "epoch": 1.920894867670712, "grad_norm": 1.52795636278423, "learning_rate": 8.634201659579622e-08, "loss": 0.4363771080970764, "step": 6569 }, { "epoch": 1.9211873080859774, "grad_norm": 1.6799265353987254, "learning_rate": 8.570922265764059e-08, "loss": 0.4167904853820801, "step": 6570 }, { "epoch": 1.921479748501243, "grad_norm": 1.7506509667217935, "learning_rate": 8.507874612467382e-08, "loss": 0.525320291519165, "step": 6571 }, { "epoch": 1.9217721889165083, "grad_norm": 1.5127507314447914, "learning_rate": 8.445058714426691e-08, "loss": 0.4087376594543457, "step": 6572 }, { "epoch": 1.9220646293317736, "grad_norm": 1.975359435328043, "learning_rate": 8.382474586324796e-08, "loss": 0.471457839012146, "step": 6573 }, { "epoch": 1.9223570697470391, "grad_norm": 1.5584377744842253, "learning_rate": 8.32012224279033e-08, "loss": 0.6125116348266602, "step": 6574 }, { "epoch": 1.9226495101623045, "grad_norm": 1.8527915049964467, "learning_rate": 8.258001698397744e-08, "loss": 0.3800301253795624, "step": 6575 }, { "epoch": 1.9229419505775698, "grad_norm": 1.7927235022665284, "learning_rate": 8.196112967667313e-08, "loss": 0.561034083366394, "step": 6576 }, { "epoch": 1.9232343909928353, "grad_norm": 1.8012018638552385, "learning_rate": 8.134456065065354e-08, "loss": 0.5768460631370544, "step": 6577 }, { "epoch": 1.9235268314081004, "grad_norm": 1.809882879975094, "learning_rate": 8.073031005003562e-08, "loss": 0.47440657019615173, "step": 6578 }, { "epoch": 1.923819271823366, "grad_norm": 1.4902012429082565, "learning_rate": 8.011837801839672e-08, "loss": 0.5315208435058594, "step": 6579 }, { "epoch": 1.9241117122386315, "grad_norm": 1.7054296975282524, "learning_rate": 7.950876469877467e-08, "loss": 0.4587036371231079, "step": 6580 }, { "epoch": 1.9244041526538966, "grad_norm": 1.6717861291166198, "learning_rate": 7.890147023366101e-08, "loss": 0.5356466770172119, "step": 6581 }, { "epoch": 1.9246965930691622, "grad_norm": 1.8066170712430372, "learning_rate": 7.829649476500667e-08, "loss": 0.48034095764160156, "step": 6582 }, { "epoch": 1.9249890334844275, "grad_norm": 1.9403707417182101, "learning_rate": 7.769383843422185e-08, "loss": 0.502929151058197, "step": 6583 }, { "epoch": 1.9252814738996928, "grad_norm": 1.5994546211401888, "learning_rate": 7.709350138217386e-08, "loss": 0.44771361351013184, "step": 6584 }, { "epoch": 1.9255739143149584, "grad_norm": 1.7058923530240673, "learning_rate": 7.649548374918824e-08, "loss": 0.462479829788208, "step": 6585 }, { "epoch": 1.9258663547302237, "grad_norm": 1.7481939511400157, "learning_rate": 7.589978567504763e-08, "loss": 0.4758496880531311, "step": 6586 }, { "epoch": 1.926158795145489, "grad_norm": 1.8447645858435646, "learning_rate": 7.530640729899174e-08, "loss": 0.521172285079956, "step": 6587 }, { "epoch": 1.9264512355607546, "grad_norm": 1.685029384432281, "learning_rate": 7.471534875971964e-08, "loss": 0.5274392366409302, "step": 6588 }, { "epoch": 1.92674367597602, "grad_norm": 1.5547682278755586, "learning_rate": 7.412661019538858e-08, "loss": 0.4350961446762085, "step": 6589 }, { "epoch": 1.9270361163912852, "grad_norm": 1.5773569785123847, "learning_rate": 7.354019174361183e-08, "loss": 0.6298524737358093, "step": 6590 }, { "epoch": 1.9273285568065508, "grad_norm": 1.7494178023153484, "learning_rate": 7.295609354146194e-08, "loss": 0.5451292395591736, "step": 6591 }, { "epoch": 1.927620997221816, "grad_norm": 1.8824055292173802, "learning_rate": 7.23743157254675e-08, "loss": 0.5371264219284058, "step": 6592 }, { "epoch": 1.9279134376370815, "grad_norm": 1.714393478017535, "learning_rate": 7.179485843161526e-08, "loss": 0.5805129408836365, "step": 6593 }, { "epoch": 1.928205878052347, "grad_norm": 1.9692321834579947, "learning_rate": 7.121772179535135e-08, "loss": 0.5542718172073364, "step": 6594 }, { "epoch": 1.928498318467612, "grad_norm": 1.7503350699121312, "learning_rate": 7.064290595157675e-08, "loss": 0.5668192505836487, "step": 6595 }, { "epoch": 1.9287907588828777, "grad_norm": 1.6293975396756264, "learning_rate": 7.007041103465062e-08, "loss": 0.5107895731925964, "step": 6596 }, { "epoch": 1.929083199298143, "grad_norm": 1.847055531354174, "learning_rate": 6.950023717839261e-08, "loss": 0.47974276542663574, "step": 6597 }, { "epoch": 1.9293756397134083, "grad_norm": 1.5624753949857668, "learning_rate": 6.893238451607387e-08, "loss": 0.5641148090362549, "step": 6598 }, { "epoch": 1.9296680801286739, "grad_norm": 1.7181332365296518, "learning_rate": 6.836685318042935e-08, "loss": 0.5940253734588623, "step": 6599 }, { "epoch": 1.9299605205439392, "grad_norm": 1.6880020580834156, "learning_rate": 6.780364330364775e-08, "loss": 0.46844422817230225, "step": 6600 }, { "epoch": 1.9302529609592045, "grad_norm": 1.6235992853167036, "learning_rate": 6.724275501737487e-08, "loss": 0.3933336138725281, "step": 6601 }, { "epoch": 1.93054540137447, "grad_norm": 1.4538666395679365, "learning_rate": 6.668418845271695e-08, "loss": 0.4786602258682251, "step": 6602 }, { "epoch": 1.9308378417897354, "grad_norm": 1.798637107768398, "learning_rate": 6.612794374023402e-08, "loss": 0.49695518612861633, "step": 6603 }, { "epoch": 1.9311302822050007, "grad_norm": 1.5049309556488495, "learning_rate": 6.557402100994426e-08, "loss": 0.4798729121685028, "step": 6604 }, { "epoch": 1.9314227226202663, "grad_norm": 1.7300127457609986, "learning_rate": 6.502242039132634e-08, "loss": 0.4187319278717041, "step": 6605 }, { "epoch": 1.9317151630355314, "grad_norm": 2.050722935709042, "learning_rate": 6.447314201331156e-08, "loss": 0.4945526719093323, "step": 6606 }, { "epoch": 1.932007603450797, "grad_norm": 1.8976456851513979, "learning_rate": 6.392618600429057e-08, "loss": 0.5721586346626282, "step": 6607 }, { "epoch": 1.9323000438660622, "grad_norm": 1.6286185694607815, "learning_rate": 6.338155249211109e-08, "loss": 0.45542022585868835, "step": 6608 }, { "epoch": 1.9325924842813276, "grad_norm": 1.7597762099762242, "learning_rate": 6.283924160407796e-08, "loss": 0.5627170205116272, "step": 6609 }, { "epoch": 1.9328849246965931, "grad_norm": 1.6951677907486626, "learning_rate": 6.22992534669542e-08, "loss": 0.5369620323181152, "step": 6610 }, { "epoch": 1.9331773651118584, "grad_norm": 1.619968087818578, "learning_rate": 6.176158820695665e-08, "loss": 0.5268368124961853, "step": 6611 }, { "epoch": 1.9334698055271238, "grad_norm": 1.6828649754520415, "learning_rate": 6.122624594976257e-08, "loss": 0.5734575986862183, "step": 6612 }, { "epoch": 1.9337622459423893, "grad_norm": 1.86766787540182, "learning_rate": 6.069322682050516e-08, "loss": 0.5066978931427002, "step": 6613 }, { "epoch": 1.9340546863576547, "grad_norm": 1.68962846891993, "learning_rate": 6.016253094377366e-08, "loss": 0.5462731719017029, "step": 6614 }, { "epoch": 1.93434712677292, "grad_norm": 1.8689912619353801, "learning_rate": 5.963415844361553e-08, "loss": 0.5407041311264038, "step": 6615 }, { "epoch": 1.9346395671881855, "grad_norm": 1.792133188360025, "learning_rate": 5.910810944353418e-08, "loss": 0.48977869749069214, "step": 6616 }, { "epoch": 1.9349320076034506, "grad_norm": 1.8900630995604775, "learning_rate": 5.858438406649125e-08, "loss": 0.5320937037467957, "step": 6617 }, { "epoch": 1.9352244480187162, "grad_norm": 1.6602834270947344, "learning_rate": 5.806298243490327e-08, "loss": 0.5860059261322021, "step": 6618 }, { "epoch": 1.9355168884339817, "grad_norm": 1.7299178033338176, "learning_rate": 5.7543904670644965e-08, "loss": 0.49517208337783813, "step": 6619 }, { "epoch": 1.9358093288492468, "grad_norm": 1.4975030277698207, "learning_rate": 5.7027150895049286e-08, "loss": 0.5060882568359375, "step": 6620 }, { "epoch": 1.9361017692645124, "grad_norm": 1.7387399518104565, "learning_rate": 5.651272122890184e-08, "loss": 0.5887798070907593, "step": 6621 }, { "epoch": 1.9363942096797777, "grad_norm": 2.006477050241073, "learning_rate": 5.600061579244753e-08, "loss": 0.6567577123641968, "step": 6622 }, { "epoch": 1.936686650095043, "grad_norm": 1.7419376875296542, "learning_rate": 5.549083470538952e-08, "loss": 0.5672584176063538, "step": 6623 }, { "epoch": 1.9369790905103086, "grad_norm": 1.6312975104255192, "learning_rate": 5.4983378086885806e-08, "loss": 0.5166369676589966, "step": 6624 }, { "epoch": 1.937271530925574, "grad_norm": 1.7351407182284893, "learning_rate": 5.447824605555041e-08, "loss": 0.5157661437988281, "step": 6625 }, { "epoch": 1.9375639713408392, "grad_norm": 1.5452343867654343, "learning_rate": 5.397543872945443e-08, "loss": 0.5001711845397949, "step": 6626 }, { "epoch": 1.9378564117561048, "grad_norm": 1.5666441918912, "learning_rate": 5.34749562261272e-08, "loss": 0.48944878578186035, "step": 6627 }, { "epoch": 1.9381488521713701, "grad_norm": 1.8943450842549039, "learning_rate": 5.297679866255401e-08, "loss": 0.5400780439376831, "step": 6628 }, { "epoch": 1.9384412925866354, "grad_norm": 1.6944930575034618, "learning_rate": 5.248096615517395e-08, "loss": 0.544346809387207, "step": 6629 }, { "epoch": 1.938733733001901, "grad_norm": 1.8360261063384646, "learning_rate": 5.1987458819886535e-08, "loss": 0.5283153653144836, "step": 6630 }, { "epoch": 1.939026173417166, "grad_norm": 1.8162414803988312, "learning_rate": 5.149627677204616e-08, "loss": 0.555808424949646, "step": 6631 }, { "epoch": 1.9393186138324316, "grad_norm": 1.7068645601820531, "learning_rate": 5.10074201264632e-08, "loss": 0.5230466723442078, "step": 6632 }, { "epoch": 1.9396110542476972, "grad_norm": 1.592321180041504, "learning_rate": 5.052088899740515e-08, "loss": 0.4810416102409363, "step": 6633 }, { "epoch": 1.9399034946629623, "grad_norm": 1.2489690563293379, "learning_rate": 5.0036683498594365e-08, "loss": 0.35233962535858154, "step": 6634 }, { "epoch": 1.9401959350782279, "grad_norm": 1.5949248677680616, "learning_rate": 4.955480374321253e-08, "loss": 0.5250035524368286, "step": 6635 }, { "epoch": 1.9404883754934932, "grad_norm": 1.5547636594172098, "learning_rate": 4.907524984389622e-08, "loss": 0.5896221399307251, "step": 6636 }, { "epoch": 1.9407808159087585, "grad_norm": 1.5725705573586048, "learning_rate": 4.859802191273688e-08, "loss": 0.5410518050193787, "step": 6637 }, { "epoch": 1.941073256324024, "grad_norm": 1.5273512663488045, "learning_rate": 4.812312006128528e-08, "loss": 0.5044152736663818, "step": 6638 }, { "epoch": 1.9413656967392894, "grad_norm": 1.6537481992077037, "learning_rate": 4.765054440054484e-08, "loss": 0.5388177633285522, "step": 6639 }, { "epoch": 1.9416581371545547, "grad_norm": 2.0702365693466485, "learning_rate": 4.718029504097943e-08, "loss": 0.5074491500854492, "step": 6640 }, { "epoch": 1.9419505775698203, "grad_norm": 1.6224415285858116, "learning_rate": 4.671237209250557e-08, "loss": 0.47772669792175293, "step": 6641 }, { "epoch": 1.9422430179850856, "grad_norm": 1.6570845374645817, "learning_rate": 4.624677566449798e-08, "loss": 0.4682825207710266, "step": 6642 }, { "epoch": 1.942535458400351, "grad_norm": 1.5100328644654928, "learning_rate": 4.578350586578628e-08, "loss": 0.48880642652511597, "step": 6643 }, { "epoch": 1.9428278988156165, "grad_norm": 1.6890744037677652, "learning_rate": 4.532256280465719e-08, "loss": 0.4590389132499695, "step": 6644 }, { "epoch": 1.9431203392308816, "grad_norm": 1.903981857624826, "learning_rate": 4.48639465888534e-08, "loss": 0.5893105268478394, "step": 6645 }, { "epoch": 1.9434127796461471, "grad_norm": 1.7274912065627603, "learning_rate": 4.4407657325574725e-08, "loss": 0.561900794506073, "step": 6646 }, { "epoch": 1.9437052200614124, "grad_norm": 1.662019693277273, "learning_rate": 4.395369512147474e-08, "loss": 0.4140210747718811, "step": 6647 }, { "epoch": 1.9439976604766778, "grad_norm": 1.7955978434650512, "learning_rate": 4.350206008266522e-08, "loss": 0.6220303773880005, "step": 6648 }, { "epoch": 1.9442901008919433, "grad_norm": 1.771531678180808, "learning_rate": 4.3052752314712844e-08, "loss": 0.4903472065925598, "step": 6649 }, { "epoch": 1.9445825413072086, "grad_norm": 1.889992657698585, "learning_rate": 4.260577192263915e-08, "loss": 0.4519340991973877, "step": 6650 }, { "epoch": 1.944874981722474, "grad_norm": 1.7435292517018475, "learning_rate": 4.216111901092501e-08, "loss": 0.49067920446395874, "step": 6651 }, { "epoch": 1.9451674221377395, "grad_norm": 1.8654652047797853, "learning_rate": 4.1718793683505066e-08, "loss": 0.5935854911804199, "step": 6652 }, { "epoch": 1.9454598625530048, "grad_norm": 1.7744411864937968, "learning_rate": 4.127879604376883e-08, "loss": 0.5209576487541199, "step": 6653 }, { "epoch": 1.9457523029682702, "grad_norm": 1.50564473891113, "learning_rate": 4.084112619456515e-08, "loss": 0.4454221725463867, "step": 6654 }, { "epoch": 1.9460447433835357, "grad_norm": 1.8157940398905494, "learning_rate": 4.0405784238194415e-08, "loss": 0.5129591226577759, "step": 6655 }, { "epoch": 1.9463371837988008, "grad_norm": 1.63185696744402, "learning_rate": 3.997277027641744e-08, "loss": 0.48704665899276733, "step": 6656 }, { "epoch": 1.9466296242140664, "grad_norm": 1.8037751571098388, "learning_rate": 3.95420844104466e-08, "loss": 0.4510651230812073, "step": 6657 }, { "epoch": 1.946922064629332, "grad_norm": 1.7817975919339482, "learning_rate": 3.911372674095249e-08, "loss": 0.5116807222366333, "step": 6658 }, { "epoch": 1.947214505044597, "grad_norm": 1.7985765763419883, "learning_rate": 3.868769736806277e-08, "loss": 0.592056393623352, "step": 6659 }, { "epoch": 1.9475069454598626, "grad_norm": 1.7881377609654638, "learning_rate": 3.8263996391357805e-08, "loss": 0.579146146774292, "step": 6660 }, { "epoch": 1.947799385875128, "grad_norm": 1.6202416659647267, "learning_rate": 3.784262390987503e-08, "loss": 0.5253209471702576, "step": 6661 }, { "epoch": 1.9480918262903932, "grad_norm": 2.008309380522338, "learning_rate": 3.742358002210789e-08, "loss": 0.5614888072013855, "step": 6662 }, { "epoch": 1.9483842667056588, "grad_norm": 1.6491223001780133, "learning_rate": 3.7006864826005796e-08, "loss": 0.5630952715873718, "step": 6663 }, { "epoch": 1.9486767071209241, "grad_norm": 2.1390311477096944, "learning_rate": 3.659247841897306e-08, "loss": 0.5990846157073975, "step": 6664 }, { "epoch": 1.9489691475361894, "grad_norm": 1.6162006621933969, "learning_rate": 3.6180420897868886e-08, "loss": 0.5290813446044922, "step": 6665 }, { "epoch": 1.949261587951455, "grad_norm": 2.6144126732722803, "learning_rate": 3.577069235901176e-08, "loss": 0.6710211038589478, "step": 6666 }, { "epoch": 1.9495540283667203, "grad_norm": 1.71689411729531, "learning_rate": 3.536329289817064e-08, "loss": 0.4802299737930298, "step": 6667 }, { "epoch": 1.9498464687819856, "grad_norm": 1.6268319596207468, "learning_rate": 3.495822261057491e-08, "loss": 0.5432649850845337, "step": 6668 }, { "epoch": 1.9501389091972512, "grad_norm": 1.9426982793491434, "learning_rate": 3.4555481590905495e-08, "loss": 0.5824951529502869, "step": 6669 }, { "epoch": 1.9504313496125163, "grad_norm": 1.5773733844612365, "learning_rate": 3.4155069933301535e-08, "loss": 0.48428961634635925, "step": 6670 }, { "epoch": 1.9507237900277818, "grad_norm": 1.7258198741312958, "learning_rate": 3.375698773135705e-08, "loss": 0.5684780478477478, "step": 6671 }, { "epoch": 1.9510162304430474, "grad_norm": 1.7742355369350526, "learning_rate": 3.336123507811983e-08, "loss": 0.5658689737319946, "step": 6672 }, { "epoch": 1.9513086708583125, "grad_norm": 1.7743474017748566, "learning_rate": 3.2967812066097006e-08, "loss": 0.6265745162963867, "step": 6673 }, { "epoch": 1.951601111273578, "grad_norm": 1.768397532537575, "learning_rate": 3.257671878724722e-08, "loss": 0.5732975006103516, "step": 6674 }, { "epoch": 1.9518935516888434, "grad_norm": 2.3801499199920273, "learning_rate": 3.218795533298624e-08, "loss": 0.46968942880630493, "step": 6675 }, { "epoch": 1.9521859921041087, "grad_norm": 1.9250466851177817, "learning_rate": 3.180152179418472e-08, "loss": 0.5651586055755615, "step": 6676 }, { "epoch": 1.9524784325193743, "grad_norm": 1.4699414350235678, "learning_rate": 3.141741826117151e-08, "loss": 0.46789437532424927, "step": 6677 }, { "epoch": 1.9527708729346396, "grad_norm": 1.6701838665271502, "learning_rate": 3.1035644823725896e-08, "loss": 0.5332610011100769, "step": 6678 }, { "epoch": 1.953063313349905, "grad_norm": 1.825129394239336, "learning_rate": 3.06562015710854e-08, "loss": 0.49613600969314575, "step": 6679 }, { "epoch": 1.9533557537651705, "grad_norm": 2.1340240197713265, "learning_rate": 3.027908859194351e-08, "loss": 0.5498408079147339, "step": 6680 }, { "epoch": 1.9536481941804358, "grad_norm": 1.8887907896186948, "learning_rate": 2.99043059744486e-08, "loss": 0.6802657842636108, "step": 6681 }, { "epoch": 1.953940634595701, "grad_norm": 1.8609256911752867, "learning_rate": 2.9531853806201716e-08, "loss": 0.5149989724159241, "step": 6682 }, { "epoch": 1.9542330750109667, "grad_norm": 1.7262483706342455, "learning_rate": 2.9161732174263212e-08, "loss": 0.5249730944633484, "step": 6683 }, { "epoch": 1.9545255154262318, "grad_norm": 1.7003943133697261, "learning_rate": 2.8793941165147222e-08, "loss": 0.5711483359336853, "step": 6684 }, { "epoch": 1.9548179558414973, "grad_norm": 1.7303037823896377, "learning_rate": 2.842848086482053e-08, "loss": 0.4591020345687866, "step": 6685 }, { "epoch": 1.9551103962567626, "grad_norm": 1.887004603599524, "learning_rate": 2.8065351358708136e-08, "loss": 0.575869083404541, "step": 6686 }, { "epoch": 1.955402836672028, "grad_norm": 1.7563501117497715, "learning_rate": 2.7704552731688816e-08, "loss": 0.5664101839065552, "step": 6687 }, { "epoch": 1.9556952770872935, "grad_norm": 1.5280681451949298, "learning_rate": 2.7346085068098437e-08, "loss": 0.5739811062812805, "step": 6688 }, { "epoch": 1.9559877175025588, "grad_norm": 1.64304520297204, "learning_rate": 2.6989948451726643e-08, "loss": 0.4707348942756653, "step": 6689 }, { "epoch": 1.9562801579178242, "grad_norm": 1.4347028954089904, "learning_rate": 2.6636142965816848e-08, "loss": 0.38842523097991943, "step": 6690 }, { "epoch": 1.9565725983330897, "grad_norm": 1.9429266961932796, "learning_rate": 2.628466869306956e-08, "loss": 0.4295673668384552, "step": 6691 }, { "epoch": 1.956865038748355, "grad_norm": 1.9886421076178336, "learning_rate": 2.5935525715640176e-08, "loss": 0.5358999967575073, "step": 6692 }, { "epoch": 1.9571574791636204, "grad_norm": 1.8207487442928234, "learning_rate": 2.5588714115137857e-08, "loss": 0.49730730056762695, "step": 6693 }, { "epoch": 1.957449919578886, "grad_norm": 1.8975782350563493, "learning_rate": 2.5244233972627762e-08, "loss": 0.5368232131004333, "step": 6694 }, { "epoch": 1.957742359994151, "grad_norm": 1.6616905607648789, "learning_rate": 2.4902085368632144e-08, "loss": 0.48084500432014465, "step": 6695 }, { "epoch": 1.9580348004094166, "grad_norm": 1.6503756551181779, "learning_rate": 2.45622683831237e-08, "loss": 0.5197296142578125, "step": 6696 }, { "epoch": 1.9583272408246821, "grad_norm": 1.7005704554604877, "learning_rate": 2.4224783095532224e-08, "loss": 0.4807678163051605, "step": 6697 }, { "epoch": 1.9586196812399472, "grad_norm": 1.5200854711140026, "learning_rate": 2.388962958474461e-08, "loss": 0.5117641687393188, "step": 6698 }, { "epoch": 1.9589121216552128, "grad_norm": 1.5153035364420055, "learning_rate": 2.355680792910153e-08, "loss": 0.5318149328231812, "step": 6699 }, { "epoch": 1.959204562070478, "grad_norm": 1.642749755305391, "learning_rate": 2.3226318206395206e-08, "loss": 0.5590193271636963, "step": 6700 }, { "epoch": 1.9594970024857434, "grad_norm": 1.9273854799208605, "learning_rate": 2.2898160493878275e-08, "loss": 0.7686688899993896, "step": 6701 }, { "epoch": 1.959789442901009, "grad_norm": 1.7479554033366604, "learning_rate": 2.257233486825383e-08, "loss": 0.5085177421569824, "step": 6702 }, { "epoch": 1.9600818833162743, "grad_norm": 1.4224817781801729, "learning_rate": 2.2248841405683176e-08, "loss": 0.44002413749694824, "step": 6703 }, { "epoch": 1.9603743237315396, "grad_norm": 1.6541616903883845, "learning_rate": 2.1927680181779154e-08, "loss": 0.5369126796722412, "step": 6704 }, { "epoch": 1.9606667641468052, "grad_norm": 1.5811100430561291, "learning_rate": 2.1608851271612828e-08, "loss": 0.516021728515625, "step": 6705 }, { "epoch": 1.9609592045620705, "grad_norm": 1.577385822778267, "learning_rate": 2.1292354749707922e-08, "loss": 0.5215185284614563, "step": 6706 }, { "epoch": 1.9612516449773358, "grad_norm": 1.7926842955012665, "learning_rate": 2.0978190690043032e-08, "loss": 0.6051908731460571, "step": 6707 }, { "epoch": 1.9615440853926014, "grad_norm": 1.8529134419730404, "learning_rate": 2.066635916605386e-08, "loss": 0.5426267385482788, "step": 6708 }, { "epoch": 1.9618365258078665, "grad_norm": 2.064852719580073, "learning_rate": 2.0356860250626554e-08, "loss": 0.5888626575469971, "step": 6709 }, { "epoch": 1.962128966223132, "grad_norm": 1.4287637894797525, "learning_rate": 2.004969401610657e-08, "loss": 0.5225001573562622, "step": 6710 }, { "epoch": 1.9624214066383976, "grad_norm": 1.616132198436982, "learning_rate": 1.974486053429092e-08, "loss": 0.5735136270523071, "step": 6711 }, { "epoch": 1.9627138470536627, "grad_norm": 1.6327567238976746, "learning_rate": 1.9442359876433724e-08, "loss": 0.5302764177322388, "step": 6712 }, { "epoch": 1.9630062874689282, "grad_norm": 1.625182085046959, "learning_rate": 1.9142192113241752e-08, "loss": 0.5078837871551514, "step": 6713 }, { "epoch": 1.9632987278841936, "grad_norm": 1.7110845788062152, "learning_rate": 1.884435731487888e-08, "loss": 0.5772985219955444, "step": 6714 }, { "epoch": 1.963591168299459, "grad_norm": 2.2561904758082925, "learning_rate": 1.8548855550959423e-08, "loss": 0.5974931716918945, "step": 6715 }, { "epoch": 1.9638836087147244, "grad_norm": 1.8105323667501525, "learning_rate": 1.8255686890558123e-08, "loss": 0.5065072774887085, "step": 6716 }, { "epoch": 1.9641760491299898, "grad_norm": 1.4374806170365766, "learning_rate": 1.7964851402199058e-08, "loss": 0.4729428291320801, "step": 6717 }, { "epoch": 1.964468489545255, "grad_norm": 1.7924892088352824, "learning_rate": 1.7676349153864515e-08, "loss": 0.46363723278045654, "step": 6718 }, { "epoch": 1.9647609299605207, "grad_norm": 1.6096201158909726, "learning_rate": 1.7390180212990547e-08, "loss": 0.5436959266662598, "step": 6719 }, { "epoch": 1.965053370375786, "grad_norm": 1.8570609869736334, "learning_rate": 1.7106344646465877e-08, "loss": 0.7571452856063843, "step": 6720 }, { "epoch": 1.9653458107910513, "grad_norm": 1.7203125443062617, "learning_rate": 1.682484252063632e-08, "loss": 0.5724680423736572, "step": 6721 }, { "epoch": 1.9656382512063169, "grad_norm": 1.5552868811193872, "learning_rate": 1.654567390130146e-08, "loss": 0.46937745809555054, "step": 6722 }, { "epoch": 1.965930691621582, "grad_norm": 1.4639592826813614, "learning_rate": 1.6268838853713552e-08, "loss": 0.5764822363853455, "step": 6723 }, { "epoch": 1.9662231320368475, "grad_norm": 1.8890557259087926, "learning_rate": 1.5994337442584164e-08, "loss": 0.6074192523956299, "step": 6724 }, { "epoch": 1.9665155724521128, "grad_norm": 1.8156005720173343, "learning_rate": 1.572216973207419e-08, "loss": 0.6001715064048767, "step": 6725 }, { "epoch": 1.9668080128673782, "grad_norm": 1.8635551001096793, "learning_rate": 1.545233578580163e-08, "loss": 0.5819540619850159, "step": 6726 }, { "epoch": 1.9671004532826437, "grad_norm": 1.509757451229315, "learning_rate": 1.518483566683826e-08, "loss": 0.4745405912399292, "step": 6727 }, { "epoch": 1.967392893697909, "grad_norm": 1.5301158686504193, "learning_rate": 1.4919669437710725e-08, "loss": 0.4438042640686035, "step": 6728 }, { "epoch": 1.9676853341131744, "grad_norm": 1.6058873643565785, "learning_rate": 1.465683716040056e-08, "loss": 0.45798003673553467, "step": 6729 }, { "epoch": 1.96797777452844, "grad_norm": 1.5582798501168125, "learning_rate": 1.4396338896341955e-08, "loss": 0.3918766379356384, "step": 6730 }, { "epoch": 1.9682702149437052, "grad_norm": 1.6253936447718431, "learning_rate": 1.4138174706426199e-08, "loss": 0.5266170501708984, "step": 6731 }, { "epoch": 1.9685626553589706, "grad_norm": 1.733772185361853, "learning_rate": 1.3882344650998359e-08, "loss": 0.5166668891906738, "step": 6732 }, { "epoch": 1.9688550957742361, "grad_norm": 1.7595735268115036, "learning_rate": 1.3628848789853932e-08, "loss": 0.39324697852134705, "step": 6733 }, { "epoch": 1.9691475361895012, "grad_norm": 1.8212233848125128, "learning_rate": 1.3377687182248855e-08, "loss": 0.4915732443332672, "step": 6734 }, { "epoch": 1.9694399766047668, "grad_norm": 1.7689973508355645, "learning_rate": 1.31288598868895e-08, "loss": 0.5416492819786072, "step": 6735 }, { "epoch": 1.9697324170200323, "grad_norm": 1.6021351256215517, "learning_rate": 1.288236696193823e-08, "loss": 0.4713748097419739, "step": 6736 }, { "epoch": 1.9700248574352974, "grad_norm": 1.7411270752119496, "learning_rate": 1.263820846501118e-08, "loss": 0.44074663519859314, "step": 6737 }, { "epoch": 1.970317297850563, "grad_norm": 1.7164561827524085, "learning_rate": 1.2396384453179366e-08, "loss": 0.4694680869579315, "step": 6738 }, { "epoch": 1.9706097382658283, "grad_norm": 1.8691907501418656, "learning_rate": 1.215689498296535e-08, "loss": 0.553142786026001, "step": 6739 }, { "epoch": 1.9709021786810936, "grad_norm": 1.7953149807008746, "learning_rate": 1.1919740110351019e-08, "loss": 0.533849835395813, "step": 6740 }, { "epoch": 1.9711946190963592, "grad_norm": 1.9503927011602655, "learning_rate": 1.1684919890768698e-08, "loss": 0.5448808670043945, "step": 6741 }, { "epoch": 1.9714870595116245, "grad_norm": 1.6447356703420446, "learning_rate": 1.1452434379106703e-08, "loss": 0.46860289573669434, "step": 6742 }, { "epoch": 1.9717794999268898, "grad_norm": 2.0052944353876696, "learning_rate": 1.122228362970712e-08, "loss": 0.5552232265472412, "step": 6743 }, { "epoch": 1.9720719403421554, "grad_norm": 1.7151457677082285, "learning_rate": 1.0994467696364698e-08, "loss": 0.4639692008495331, "step": 6744 }, { "epoch": 1.9723643807574207, "grad_norm": 2.0905035821875746, "learning_rate": 1.076898663233239e-08, "loss": 0.7129387259483337, "step": 6745 }, { "epoch": 1.972656821172686, "grad_norm": 1.6674482501618961, "learning_rate": 1.0545840490313597e-08, "loss": 0.6637833118438721, "step": 6746 }, { "epoch": 1.9729492615879516, "grad_norm": 2.029336881837252, "learning_rate": 1.0325029322467705e-08, "loss": 0.6215991973876953, "step": 6747 }, { "epoch": 1.9732417020032167, "grad_norm": 1.878624196936373, "learning_rate": 1.0106553180407874e-08, "loss": 0.48594456911087036, "step": 6748 }, { "epoch": 1.9735341424184822, "grad_norm": 1.9063825585940108, "learning_rate": 9.890412115202142e-09, "loss": 0.5443629622459412, "step": 6749 }, { "epoch": 1.9738265828337478, "grad_norm": 1.7053157420855176, "learning_rate": 9.676606177371207e-09, "loss": 0.643796443939209, "step": 6750 }, { "epoch": 1.974119023249013, "grad_norm": 1.6282972872252912, "learning_rate": 9.465135416891757e-09, "loss": 0.6305385828018188, "step": 6751 }, { "epoch": 1.9744114636642784, "grad_norm": 1.5632532849336644, "learning_rate": 9.255999883193146e-09, "loss": 0.5120108723640442, "step": 6752 }, { "epoch": 1.9747039040795438, "grad_norm": 1.6718955354026932, "learning_rate": 9.0491996251596e-09, "loss": 0.5552967190742493, "step": 6753 }, { "epoch": 1.974996344494809, "grad_norm": 1.935016742711985, "learning_rate": 8.84473469113023e-09, "loss": 0.6341986656188965, "step": 6754 }, { "epoch": 1.9752887849100746, "grad_norm": 1.9011990155600869, "learning_rate": 8.642605128896808e-09, "loss": 0.5204262137413025, "step": 6755 }, { "epoch": 1.97558122532534, "grad_norm": 1.75594319264598, "learning_rate": 8.442810985705984e-09, "loss": 0.4980974793434143, "step": 6756 }, { "epoch": 1.9758736657406053, "grad_norm": 1.9165104575442982, "learning_rate": 8.245352308258181e-09, "loss": 0.5432465076446533, "step": 6757 }, { "epoch": 1.9761661061558708, "grad_norm": 1.7852742537308695, "learning_rate": 8.0502291427087e-09, "loss": 0.813039243221283, "step": 6758 }, { "epoch": 1.9764585465711362, "grad_norm": 1.8018799007975157, "learning_rate": 7.85744153466661e-09, "loss": 0.5723720788955688, "step": 6759 }, { "epoch": 1.9767509869864015, "grad_norm": 1.8628448153664545, "learning_rate": 7.666989529193647e-09, "loss": 0.5562596321105957, "step": 6760 }, { "epoch": 1.977043427401667, "grad_norm": 1.796195928066652, "learning_rate": 7.478873170807532e-09, "loss": 0.5455175638198853, "step": 6761 }, { "epoch": 1.9773358678169322, "grad_norm": 1.791853318736957, "learning_rate": 7.2930925034797595e-09, "loss": 0.5753832459449768, "step": 6762 }, { "epoch": 1.9776283082321977, "grad_norm": 1.786340662775674, "learning_rate": 7.109647570634482e-09, "loss": 0.49962282180786133, "step": 6763 }, { "epoch": 1.977920748647463, "grad_norm": 1.4222417158044076, "learning_rate": 6.9285384151507316e-09, "loss": 0.44443345069885254, "step": 6764 }, { "epoch": 1.9782131890627284, "grad_norm": 1.5729694345436978, "learning_rate": 6.749765079363535e-09, "loss": 0.3236424922943115, "step": 6765 }, { "epoch": 1.978505629477994, "grad_norm": 1.766865850057596, "learning_rate": 6.573327605057245e-09, "loss": 0.5246942639350891, "step": 6766 }, { "epoch": 1.9787980698932592, "grad_norm": 1.6890664092399734, "learning_rate": 6.399226033475536e-09, "loss": 0.6525053381919861, "step": 6767 }, { "epoch": 1.9790905103085246, "grad_norm": 1.5450928873923104, "learning_rate": 6.227460405312524e-09, "loss": 0.502121090888977, "step": 6768 }, { "epoch": 1.9793829507237901, "grad_norm": 2.00727430176714, "learning_rate": 6.058030760718314e-09, "loss": 0.6137609481811523, "step": 6769 }, { "epoch": 1.9796753911390554, "grad_norm": 2.274345342275455, "learning_rate": 5.890937139294561e-09, "loss": 0.6673166751861572, "step": 6770 }, { "epoch": 1.9799678315543208, "grad_norm": 1.8444636633461322, "learning_rate": 5.726179580098912e-09, "loss": 0.5888657569885254, "step": 6771 }, { "epoch": 1.9802602719695863, "grad_norm": 1.6693157475267608, "learning_rate": 5.563758121642781e-09, "loss": 0.5239546298980713, "step": 6772 }, { "epoch": 1.9805527123848514, "grad_norm": 1.8912704609026834, "learning_rate": 5.403672801890247e-09, "loss": 0.5446778535842896, "step": 6773 }, { "epoch": 1.980845152800117, "grad_norm": 1.9927071253973727, "learning_rate": 5.245923658262486e-09, "loss": 0.6198326349258423, "step": 6774 }, { "epoch": 1.9811375932153825, "grad_norm": 2.15235475034657, "learning_rate": 5.090510727630005e-09, "loss": 0.586353063583374, "step": 6775 }, { "epoch": 1.9814300336306476, "grad_norm": 1.7195990521736408, "learning_rate": 4.93743404632041e-09, "loss": 0.6344239711761475, "step": 6776 }, { "epoch": 1.9817224740459132, "grad_norm": 1.7280364585810115, "learning_rate": 4.7866936501150816e-09, "loss": 0.529091477394104, "step": 6777 }, { "epoch": 1.9820149144611785, "grad_norm": 1.7357230298596742, "learning_rate": 4.6382895742491665e-09, "loss": 0.50063157081604, "step": 6778 }, { "epoch": 1.9823073548764438, "grad_norm": 1.526019679238999, "learning_rate": 4.492221853409362e-09, "loss": 0.48398512601852417, "step": 6779 }, { "epoch": 1.9825997952917094, "grad_norm": 1.5319705226915326, "learning_rate": 4.348490521738358e-09, "loss": 0.5330454707145691, "step": 6780 }, { "epoch": 1.9828922357069747, "grad_norm": 1.829830860451363, "learning_rate": 4.207095612833723e-09, "loss": 0.4562032222747803, "step": 6781 }, { "epoch": 1.98318467612224, "grad_norm": 1.7011927258883048, "learning_rate": 4.0680371597456855e-09, "loss": 0.47456252574920654, "step": 6782 }, { "epoch": 1.9834771165375056, "grad_norm": 1.8486724201847988, "learning_rate": 3.931315194977137e-09, "loss": 0.6283844709396362, "step": 6783 }, { "epoch": 1.983769556952771, "grad_norm": 1.7243813126388492, "learning_rate": 3.7969297504858445e-09, "loss": 0.5886485576629639, "step": 6784 }, { "epoch": 1.9840619973680362, "grad_norm": 1.679651544361786, "learning_rate": 3.664880857685571e-09, "loss": 0.4711921811103821, "step": 6785 }, { "epoch": 1.9843544377833018, "grad_norm": 1.8051937774075772, "learning_rate": 3.5351685474394048e-09, "loss": 0.5372034311294556, "step": 6786 }, { "epoch": 1.9846468781985669, "grad_norm": 1.7143010926050217, "learning_rate": 3.4077928500686473e-09, "loss": 0.5314334034919739, "step": 6787 }, { "epoch": 1.9849393186138324, "grad_norm": 1.7988305575744603, "learning_rate": 3.2827537953461496e-09, "loss": 0.6022863984107971, "step": 6788 }, { "epoch": 1.985231759029098, "grad_norm": 1.844296066004364, "learning_rate": 3.160051412499643e-09, "loss": 0.6739746928215027, "step": 6789 }, { "epoch": 1.985524199444363, "grad_norm": 1.795022844462659, "learning_rate": 3.0396857302084082e-09, "loss": 0.6454254388809204, "step": 6790 }, { "epoch": 1.9858166398596286, "grad_norm": 1.7777744811692944, "learning_rate": 2.9216567766088276e-09, "loss": 0.567995011806488, "step": 6791 }, { "epoch": 1.986109080274894, "grad_norm": 1.7916482396337698, "learning_rate": 2.8059645792877233e-09, "loss": 0.568576455116272, "step": 6792 }, { "epoch": 1.9864015206901593, "grad_norm": 1.5789903561856604, "learning_rate": 2.6926091652890175e-09, "loss": 0.5053816437721252, "step": 6793 }, { "epoch": 1.9866939611054248, "grad_norm": 1.4966825154239165, "learning_rate": 2.5815905611081825e-09, "loss": 0.47705504298210144, "step": 6794 }, { "epoch": 1.9869864015206902, "grad_norm": 1.7555838648022946, "learning_rate": 2.472908792695572e-09, "loss": 0.48271438479423523, "step": 6795 }, { "epoch": 1.9872788419359555, "grad_norm": 1.675207035758499, "learning_rate": 2.3665638854541982e-09, "loss": 0.5694486498832703, "step": 6796 }, { "epoch": 1.987571282351221, "grad_norm": 1.6539598401922624, "learning_rate": 2.2625558642419553e-09, "loss": 0.4940011501312256, "step": 6797 }, { "epoch": 1.9878637227664864, "grad_norm": 1.8961348890729253, "learning_rate": 2.160884753370507e-09, "loss": 0.5536549091339111, "step": 6798 }, { "epoch": 1.9881561631817517, "grad_norm": 1.923836316704977, "learning_rate": 2.0615505766041765e-09, "loss": 0.5354948043823242, "step": 6799 }, { "epoch": 1.9884486035970173, "grad_norm": 1.9901895658271425, "learning_rate": 1.9645533571610585e-09, "loss": 0.6246936321258545, "step": 6800 } ], "logging_steps": 1, "max_steps": 6840, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2077336313905152.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }